frequency_analyser 1.1.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +23 -4
- data/lib/frequency_analyser.rb +2 -0
- data/lib/frequency_analyser/aggregator.rb +26 -0
- data/lib/frequency_analyser/base.rb +14 -12
- data/lib/frequency_analyser/modifier.rb +37 -0
- metadata +6 -4
data/README.md
CHANGED
@@ -36,15 +36,34 @@ FrequencyAnalyser.analyse('foo', File.new('bar'), StringIO.new('baz'), ['q', 'u'
|
|
36
36
|
#=> { 'a'=>2, 'b'=>2, 'f'=>1, 'o'=>2, 'q'=>1, 'r'=>1, 'u'=>1, 'x'=>1, 'z'=>1 }
|
37
37
|
```
|
38
38
|
|
39
|
+
## Probabilities and Percentages
|
40
|
+
|
41
|
+
If you'd like to calculate the frequency probabilities instead, you can pass in an optional symbol:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
FrequencyAnalyser.analyse('Hello, world!', :probability)
|
45
|
+
=> { 'd'=>0.1, 'e'=>0.1, 'h'=>0.1, 'l'=>0.3, 'o'=>0.2, 'r'=>0.1, 'w'=>0.1 }
|
46
|
+
```
|
47
|
+
|
48
|
+
The same goes for percentages:
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
FrequencyAnalyser.analyse('Hello, world!', :probability)
|
52
|
+
=> { 'd'=>10, 'e'=>10, 'h'=>10, 'l'=>30, 'o'=>20, 'r'=>10, 'w'=>10 }
|
53
|
+
```
|
54
|
+
|
39
55
|
## Counting other things
|
40
56
|
|
41
|
-
By
|
42
|
-
change this by instantiating your own
|
57
|
+
By default, Frequency Analyser counts alphabetic characters. You can
|
58
|
+
change this by instantiating your own support classes:
|
43
59
|
|
44
60
|
```ruby
|
45
|
-
counter
|
46
|
-
|
61
|
+
counter = FrequencyAnalyser::Counter.new(%w(1 3 5 !))
|
62
|
+
aggregator = FrequencyAnalyser::Aggregator.new(counter)
|
63
|
+
analyser = FrequencyAnalyser.new(aggregator)
|
47
64
|
|
48
65
|
analyser.analyse('!12321!')
|
49
66
|
#=> { '!'=>2, '1'=>2, '3'=>1 }
|
50
67
|
```
|
68
|
+
|
69
|
+
Most of the gem is architected in this way, so it should be straightforward to add new modes, for example.
|
data/lib/frequency_analyser.rb
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
class FrequencyAnalyser::Aggregator < Struct.new(:counter, :aggregation)
|
2
|
+
|
3
|
+
def initialize(counter = fa::Counter, aggregation = fa::Aggregation.new)
|
4
|
+
super
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.aggregate(*files)
|
8
|
+
new.aggregate(*files)
|
9
|
+
end
|
10
|
+
|
11
|
+
def aggregate(*files)
|
12
|
+
files = [files].flatten
|
13
|
+
files.each do |file|
|
14
|
+
file.each_line do |line|
|
15
|
+
aggregation << counter.count(line)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
aggregation
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def fa
|
23
|
+
FrequencyAnalyser
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -1,23 +1,25 @@
|
|
1
|
-
class FrequencyAnalyser < Struct.new(:
|
1
|
+
class FrequencyAnalyser < Struct.new(:aggregator, :modifier)
|
2
2
|
|
3
|
-
def initialize(
|
3
|
+
def initialize(aggregator = Aggregator, modifier = Modifier)
|
4
4
|
super
|
5
5
|
end
|
6
6
|
|
7
|
-
def self.analyse(*
|
8
|
-
new.analyse(*
|
7
|
+
def self.analyse(*args)
|
8
|
+
new.analyse(*args)
|
9
9
|
end
|
10
10
|
|
11
|
-
def analyse(*
|
12
|
-
files =
|
11
|
+
def analyse(*args)
|
12
|
+
files, mode = coerce(args)
|
13
|
+
aggregation = aggregator.aggregate(files)
|
14
|
+
modifier.modify(aggregation, mode)
|
15
|
+
end
|
13
16
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
17
|
+
private
|
18
|
+
def coerce(args)
|
19
|
+
files = [args].flatten
|
20
|
+
mode = files.pop if files.last.is_a? Symbol
|
19
21
|
|
20
|
-
|
22
|
+
[files, mode]
|
21
23
|
end
|
22
24
|
|
23
25
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class FrequencyAnalyser::Modifier
|
2
|
+
|
3
|
+
def self.modify(aggregation, mode)
|
4
|
+
new.modify(aggregation, mode)
|
5
|
+
end
|
6
|
+
|
7
|
+
def modify(aggregation, mode)
|
8
|
+
case mode
|
9
|
+
when :probability
|
10
|
+
probability(aggregation)
|
11
|
+
when :percentage
|
12
|
+
percentage(aggregation)
|
13
|
+
else
|
14
|
+
aggregation
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def probability(aggregation)
|
20
|
+
sum = sum(aggregation)
|
21
|
+
aggregation.inject(aggregation) do |hash, (k, v)|
|
22
|
+
hash.merge!(k => v.to_f / sum)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def percentage(aggregation)
|
27
|
+
sum = sum(aggregation)
|
28
|
+
aggregation.inject(aggregation) do |hash, (k, v)|
|
29
|
+
hash.merge!(k => v.to_f / sum * 100)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def sum(aggregation)
|
34
|
+
aggregation.values.inject(:+)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: frequency_analyser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 1.
|
10
|
+
version: 1.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Christopher Patuzzo
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-09-
|
18
|
+
date: 2012-09-16 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: rspec
|
@@ -42,8 +42,10 @@ extra_rdoc_files: []
|
|
42
42
|
files:
|
43
43
|
- README.md
|
44
44
|
- lib/frequency_analyser/aggregation.rb
|
45
|
+
- lib/frequency_analyser/aggregator.rb
|
45
46
|
- lib/frequency_analyser/base.rb
|
46
47
|
- lib/frequency_analyser/counter.rb
|
48
|
+
- lib/frequency_analyser/modifier.rb
|
47
49
|
- lib/frequency_analyser.rb
|
48
50
|
homepage: https://github.com/cpatuzzo/frequency_analyser
|
49
51
|
licenses: []
|