frequency_analyser 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +23 -4
- data/lib/frequency_analyser.rb +2 -0
- data/lib/frequency_analyser/aggregator.rb +26 -0
- data/lib/frequency_analyser/base.rb +14 -12
- data/lib/frequency_analyser/modifier.rb +37 -0
- metadata +6 -4
data/README.md
CHANGED
@@ -36,15 +36,34 @@ FrequencyAnalyser.analyse('foo', File.new('bar'), StringIO.new('baz'), ['q', 'u'
|
|
36
36
|
#=> { 'a'=>2, 'b'=>2, 'f'=>1, 'o'=>2, 'q'=>1, 'r'=>1, 'u'=>1, 'x'=>1, 'z'=>1 }
|
37
37
|
```
|
38
38
|
|
39
|
+
## Probabilities and Percentages
|
40
|
+
|
41
|
+
If you'd like to calculate the frequency probabilities instead, you can pass in an optional symbol:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
FrequencyAnalyser.analyse('Hello, world!', :probability)
|
45
|
+
=> { 'd'=>0.1, 'e'=>0.1, 'h'=>0.1, 'l'=>0.3, 'o'=>0.2, 'r'=>0.1, 'w'=>0.1 }
|
46
|
+
```
|
47
|
+
|
48
|
+
The same goes for percentages:
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
FrequencyAnalyser.analyse('Hello, world!', :probability)
|
52
|
+
=> { 'd'=>10, 'e'=>10, 'h'=>10, 'l'=>30, 'o'=>20, 'r'=>10, 'w'=>10 }
|
53
|
+
```
|
54
|
+
|
39
55
|
## Counting other things
|
40
56
|
|
41
|
-
By
|
42
|
-
change this by instantiating your own
|
57
|
+
By default, Frequency Analyser counts alphabetic characters. You can
|
58
|
+
change this by instantiating your own support classes:
|
43
59
|
|
44
60
|
```ruby
|
45
|
-
counter
|
46
|
-
|
61
|
+
counter = FrequencyAnalyser::Counter.new(%w(1 3 5 !))
|
62
|
+
aggregator = FrequencyAnalyser::Aggregator.new(counter)
|
63
|
+
analyser = FrequencyAnalyser.new(aggregator)
|
47
64
|
|
48
65
|
analyser.analyse('!12321!')
|
49
66
|
#=> { '!'=>2, '1'=>2, '3'=>1 }
|
50
67
|
```
|
68
|
+
|
69
|
+
Most of the gem is architected in this way, so it should be straightforward to add new modes, for example.
|
data/lib/frequency_analyser.rb
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
class FrequencyAnalyser::Aggregator < Struct.new(:counter, :aggregation)
|
2
|
+
|
3
|
+
def initialize(counter = fa::Counter, aggregation = fa::Aggregation.new)
|
4
|
+
super
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.aggregate(*files)
|
8
|
+
new.aggregate(*files)
|
9
|
+
end
|
10
|
+
|
11
|
+
def aggregate(*files)
|
12
|
+
files = [files].flatten
|
13
|
+
files.each do |file|
|
14
|
+
file.each_line do |line|
|
15
|
+
aggregation << counter.count(line)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
aggregation
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def fa
|
23
|
+
FrequencyAnalyser
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -1,23 +1,25 @@
|
|
1
|
-
class FrequencyAnalyser < Struct.new(:
|
1
|
+
class FrequencyAnalyser < Struct.new(:aggregator, :modifier)
|
2
2
|
|
3
|
-
def initialize(
|
3
|
+
def initialize(aggregator = Aggregator, modifier = Modifier)
|
4
4
|
super
|
5
5
|
end
|
6
6
|
|
7
|
-
def self.analyse(*
|
8
|
-
new.analyse(*
|
7
|
+
def self.analyse(*args)
|
8
|
+
new.analyse(*args)
|
9
9
|
end
|
10
10
|
|
11
|
-
def analyse(*
|
12
|
-
files =
|
11
|
+
def analyse(*args)
|
12
|
+
files, mode = coerce(args)
|
13
|
+
aggregation = aggregator.aggregate(files)
|
14
|
+
modifier.modify(aggregation, mode)
|
15
|
+
end
|
13
16
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
17
|
+
private
|
18
|
+
def coerce(args)
|
19
|
+
files = [args].flatten
|
20
|
+
mode = files.pop if files.last.is_a? Symbol
|
19
21
|
|
20
|
-
|
22
|
+
[files, mode]
|
21
23
|
end
|
22
24
|
|
23
25
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class FrequencyAnalyser::Modifier
|
2
|
+
|
3
|
+
def self.modify(aggregation, mode)
|
4
|
+
new.modify(aggregation, mode)
|
5
|
+
end
|
6
|
+
|
7
|
+
def modify(aggregation, mode)
|
8
|
+
case mode
|
9
|
+
when :probability
|
10
|
+
probability(aggregation)
|
11
|
+
when :percentage
|
12
|
+
percentage(aggregation)
|
13
|
+
else
|
14
|
+
aggregation
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def probability(aggregation)
|
20
|
+
sum = sum(aggregation)
|
21
|
+
aggregation.inject(aggregation) do |hash, (k, v)|
|
22
|
+
hash.merge!(k => v.to_f / sum)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def percentage(aggregation)
|
27
|
+
sum = sum(aggregation)
|
28
|
+
aggregation.inject(aggregation) do |hash, (k, v)|
|
29
|
+
hash.merge!(k => v.to_f / sum * 100)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def sum(aggregation)
|
34
|
+
aggregation.values.inject(:+)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: frequency_analyser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 1.
|
10
|
+
version: 1.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Christopher Patuzzo
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-09-
|
18
|
+
date: 2012-09-16 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: rspec
|
@@ -42,8 +42,10 @@ extra_rdoc_files: []
|
|
42
42
|
files:
|
43
43
|
- README.md
|
44
44
|
- lib/frequency_analyser/aggregation.rb
|
45
|
+
- lib/frequency_analyser/aggregator.rb
|
45
46
|
- lib/frequency_analyser/base.rb
|
46
47
|
- lib/frequency_analyser/counter.rb
|
48
|
+
- lib/frequency_analyser/modifier.rb
|
47
49
|
- lib/frequency_analyser.rb
|
48
50
|
homepage: https://github.com/cpatuzzo/frequency_analyser
|
49
51
|
licenses: []
|