words_counted 0.1.1 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -16
- data/lib/words_counted/counter.rb +22 -18
- data/lib/words_counted/version.rb +1 -1
- data/lib/words_counted.rb +1 -4
- data/spec/words_counted/counter_spec.rb +19 -9
- data/words_counted.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 07c1e76ee27525e7aa28de6a61dedde8ba6eae39
|
4
|
+
data.tar.gz: e8062169aaf99c19947a246ff33385e1fca928a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 680cc6c8048a809941f4e23c53c99a2ee7fed5e3d0fe7943d9842b29967c7d19f8430143d56ed510042a493e166c99c4ae70c9b82e4b21fb4aeb3dbe9280f52e
|
7
|
+
data.tar.gz: ec317b1d90f3f14ba399996c8061835393d4aac6dd55f8ebc342b95a315c710bd62d3c9981e37cff6397863b8b5e10744748b1cede99e610dbf5cdd59aafcb90
|
data/README.md
CHANGED
@@ -50,7 +50,7 @@ counter = WordsCounted.count(
|
|
50
50
|
)
|
51
51
|
|
52
52
|
# Using a file
|
53
|
-
counter = WordsCounted.from_file("path/to/my/file.txt")
|
53
|
+
counter = WordsCounted.from_file("path/or/url/to/my/file.txt")
|
54
54
|
```
|
55
55
|
|
56
56
|
### API
|
@@ -172,9 +172,9 @@ counter.words
|
|
172
172
|
#=> ["We", "are", "all", "in", "the", "gutter", "but", "some", "of", "us", "are", "looking", "at", "the", "stars"]
|
173
173
|
```
|
174
174
|
|
175
|
-
#### `.word_density`
|
175
|
+
#### `.word_density([ precision = 2 ])`
|
176
176
|
|
177
|
-
Returns a two-dimentional array of words and their density.
|
177
|
+
Returns a two-dimentional array of words and their density to a precision of two. It accepts a precision argument which defaults to two.
|
178
178
|
|
179
179
|
```ruby
|
180
180
|
counter.word_density
|
@@ -182,17 +182,17 @@ counter.word_density
|
|
182
182
|
[
|
183
183
|
["are", 13.33],
|
184
184
|
["the", 13.33],
|
185
|
-
["but", 6.67],
|
186
|
-
["us", 6.67],
|
187
|
-
["of", 6.67],
|
188
|
-
["some", 6.67],
|
189
|
-
["looking", 6.67],
|
190
|
-
["gutter", 6.67],
|
191
|
-
["at", 6.67],
|
192
|
-
["in", 6.67],
|
193
|
-
["all", 6.67],
|
194
|
-
["stars", 6.67],
|
195
|
-
["we", 6.67]
|
185
|
+
["but", 6.67 ],
|
186
|
+
["us", 6.67 ],
|
187
|
+
["of", 6.67 ],
|
188
|
+
["some", 6.67 ],
|
189
|
+
["looking", 6.67 ],
|
190
|
+
["gutter", 6.67 ],
|
191
|
+
["at", 6.67 ],
|
192
|
+
["in", 6.67 ],
|
193
|
+
["all", 6.67 ],
|
194
|
+
["stars", 6.67 ],
|
195
|
+
["we", 6.67 ]
|
196
196
|
]
|
197
197
|
```
|
198
198
|
|
@@ -204,9 +204,9 @@ Returns the string's character count.
|
|
204
204
|
counter.char_count #=> 76
|
205
205
|
```
|
206
206
|
|
207
|
-
#### `.average_chars_per_word`
|
207
|
+
#### `.average_chars_per_word([ precision = 2 ])`
|
208
208
|
|
209
|
-
Returns the average character count per word.
|
209
|
+
Returns the average character count per word. Accepts a precision argument which defaults to two.
|
210
210
|
|
211
211
|
```ruby
|
212
212
|
counter.average_chars_per_word #=> 4
|
@@ -4,14 +4,18 @@ module WordsCounted
|
|
4
4
|
|
5
5
|
WORD_REGEXP = /[\p{Alpha}\-']+/
|
6
6
|
|
7
|
+
def self.from_file(path, options = {})
|
8
|
+
File.open(path) do |file|
|
9
|
+
new file.read, options
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
7
13
|
def initialize(string, options = {})
|
8
14
|
@options = options
|
9
15
|
exclude = filter_proc(options[:exclude])
|
10
16
|
@words = string.scan(regexp).reject { |word| exclude.call(word) }
|
11
17
|
@char_count = @words.join.size
|
12
|
-
@word_occurrences = words.each_with_object(Hash.new(0))
|
13
|
-
hash[word.downcase] += 1
|
14
|
-
end
|
18
|
+
@word_occurrences = words.each_with_object(Hash.new(0)) { |word, hash| hash[word.downcase] += 1 }
|
15
19
|
@word_lengths = words.each_with_object({}) { |word, hash| hash[word] ||= word.length }
|
16
20
|
end
|
17
21
|
|
@@ -23,8 +27,8 @@ module WordsCounted
|
|
23
27
|
words.uniq.size
|
24
28
|
end
|
25
29
|
|
26
|
-
def average_chars_per_word
|
27
|
-
(char_count / word_count).round(
|
30
|
+
def average_chars_per_word(precision = 2)
|
31
|
+
(char_count.to_f / word_count.to_f).round(precision)
|
28
32
|
end
|
29
33
|
|
30
34
|
def most_occurring_words
|
@@ -35,28 +39,29 @@ module WordsCounted
|
|
35
39
|
highest_ranking word_lengths
|
36
40
|
end
|
37
41
|
|
38
|
-
def word_density
|
39
|
-
word_occurrences.each_with_object({}) do |(word, occ), hash|
|
40
|
-
hash[word] =
|
41
|
-
end
|
42
|
+
def word_density(precision = 2)
|
43
|
+
word_densities = word_occurrences.each_with_object({}) do |(word, occ), hash|
|
44
|
+
hash[word] = (occ.to_f / word_count.to_f * 100).round(precision)
|
45
|
+
end
|
46
|
+
sort_by_descending_value word_densities
|
42
47
|
end
|
43
48
|
|
44
49
|
def sorted_word_occurrences
|
45
|
-
word_occurrences
|
50
|
+
sort_by_descending_value word_occurrences
|
46
51
|
end
|
47
52
|
|
48
53
|
def sorted_word_lengths
|
49
|
-
word_lengths
|
54
|
+
sort_by_descending_value word_lengths
|
50
55
|
end
|
51
56
|
|
52
57
|
private
|
53
58
|
|
54
59
|
def highest_ranking(entries)
|
55
|
-
entries.group_by { |
|
60
|
+
entries.group_by { |_, value| value }.sort.last.last
|
56
61
|
end
|
57
62
|
|
58
|
-
def
|
59
|
-
|
63
|
+
def sort_by_descending_value(entries)
|
64
|
+
entries.sort_by { |_, value| value }.reverse
|
60
65
|
end
|
61
66
|
|
62
67
|
def regexp
|
@@ -74,13 +79,12 @@ module WordsCounted
|
|
74
79
|
->(word) {
|
75
80
|
exclusion_list.include?(word.downcase)
|
76
81
|
}
|
77
|
-
elsif Regexp.try_convert(filter)
|
78
|
-
|
79
|
-
Proc.new { |word| word =~ filter }
|
82
|
+
elsif regexp_filter = Regexp.try_convert(filter)
|
83
|
+
Proc.new { |word| word =~ regexp_filter }
|
80
84
|
elsif filter.respond_to?(:to_proc)
|
81
85
|
filter.to_proc
|
82
86
|
else
|
83
|
-
raise ArgumentError, "Filter must String, Array,
|
87
|
+
raise ArgumentError, "Filter must String, Array, Lambda, or Regexp"
|
84
88
|
end
|
85
89
|
end
|
86
90
|
end
|
data/lib/words_counted.rb
CHANGED
@@ -175,32 +175,42 @@ module WordsCounted
|
|
175
175
|
end
|
176
176
|
|
177
177
|
it "returns words and their density in percent" do
|
178
|
-
counter = Counter.new("His name was
|
179
|
-
expect(counter.word_density).to eq([["major",
|
178
|
+
counter = Counter.new("His name was Major, major Major Major.")
|
179
|
+
expect(counter.word_density).to eq([["major", 57.14], ["was", 14.29], ["name", 14.29], ["his", 14.29]])
|
180
|
+
end
|
181
|
+
|
182
|
+
it "accepts a precision" do
|
183
|
+
counter = Counter.new("His name was Major, major Major Major.")
|
184
|
+
expect(counter.word_density(4)).to eq([["major", 57.1429], ["was", 14.2857], ["name", 14.2857], ["his", 14.2857]])
|
180
185
|
end
|
181
186
|
end
|
182
187
|
|
183
188
|
describe "char_count" do
|
184
189
|
it "returns the number of chars in the passed in string" do
|
185
|
-
counter = Counter.new("His name was
|
186
|
-
expect(counter.char_count).to eq(
|
190
|
+
counter = Counter.new("His name was Major, major Major Major.")
|
191
|
+
expect(counter.char_count).to eq(30)
|
187
192
|
end
|
188
193
|
|
189
194
|
it "returns the number of chars in the passed in string after the filter is applied" do
|
190
|
-
counter = Counter.new("His name was
|
195
|
+
counter = Counter.new("His name was Major, major Major Major.", exclude: "Major")
|
191
196
|
expect(counter.char_count).to eq(10)
|
192
197
|
end
|
193
198
|
end
|
194
199
|
|
195
200
|
describe "average_chars_per_word" do
|
196
201
|
it "returns the average number of chars per word" do
|
197
|
-
counter = Counter.new("His name was major, Major Major Major
|
198
|
-
expect(counter.average_chars_per_word).to eq(4)
|
202
|
+
counter = Counter.new("His name was major, Major Major Major.")
|
203
|
+
expect(counter.average_chars_per_word).to eq(4.29)
|
199
204
|
end
|
200
205
|
|
201
206
|
it "returns the average number of chars per word after the filter is applied" do
|
202
|
-
counter = Counter.new("His name was
|
203
|
-
expect(counter.average_chars_per_word).to eq(3)
|
207
|
+
counter = Counter.new("His name was Major, Major Major Major.", exclude: "Major")
|
208
|
+
expect(counter.average_chars_per_word).to eq(3.33)
|
209
|
+
end
|
210
|
+
|
211
|
+
it "accepts precision" do
|
212
|
+
counter = Counter.new("This line should have 39 characters minus spaces.")
|
213
|
+
expect(counter.average_chars_per_word(4)).to eq(5.5714)
|
204
214
|
end
|
205
215
|
end
|
206
216
|
|
data/words_counted.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.version = WordsCounted::VERSION
|
10
10
|
spec.authors = ["Mohamad El-Husseini"]
|
11
11
|
spec.email = ["husseini.mel@gmail.com"]
|
12
|
-
spec.description = %q{A Ruby word counter with helpful utility methods.}
|
12
|
+
spec.description = %q{A Ruby word counter and string analyser with helpful utility methods.}
|
13
13
|
spec.summary = %q{See README.}
|
14
14
|
spec.homepage = "https://github.com/abitdodgy/words_counted"
|
15
15
|
spec.license = "MIT"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: words_counted
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mohamad El-Husseini
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
- - '>='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
description: A Ruby word counter with helpful utility methods.
|
69
|
+
description: A Ruby word counter and string analyser with helpful utility methods.
|
70
70
|
email:
|
71
71
|
- husseini.mel@gmail.com
|
72
72
|
executables: []
|