words_counted 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -16
- data/lib/words_counted/counter.rb +22 -18
- data/lib/words_counted/version.rb +1 -1
- data/lib/words_counted.rb +1 -4
- data/spec/words_counted/counter_spec.rb +19 -9
- data/words_counted.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 07c1e76ee27525e7aa28de6a61dedde8ba6eae39
|
4
|
+
data.tar.gz: e8062169aaf99c19947a246ff33385e1fca928a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 680cc6c8048a809941f4e23c53c99a2ee7fed5e3d0fe7943d9842b29967c7d19f8430143d56ed510042a493e166c99c4ae70c9b82e4b21fb4aeb3dbe9280f52e
|
7
|
+
data.tar.gz: ec317b1d90f3f14ba399996c8061835393d4aac6dd55f8ebc342b95a315c710bd62d3c9981e37cff6397863b8b5e10744748b1cede99e610dbf5cdd59aafcb90
|
data/README.md
CHANGED
@@ -50,7 +50,7 @@ counter = WordsCounted.count(
|
|
50
50
|
)
|
51
51
|
|
52
52
|
# Using a file
|
53
|
-
counter = WordsCounted.from_file("path/to/my/file.txt")
|
53
|
+
counter = WordsCounted.from_file("path/or/url/to/my/file.txt")
|
54
54
|
```
|
55
55
|
|
56
56
|
### API
|
@@ -172,9 +172,9 @@ counter.words
|
|
172
172
|
#=> ["We", "are", "all", "in", "the", "gutter", "but", "some", "of", "us", "are", "looking", "at", "the", "stars"]
|
173
173
|
```
|
174
174
|
|
175
|
-
#### `.word_density`
|
175
|
+
#### `.word_density([ precision = 2 ])`
|
176
176
|
|
177
|
-
Returns a two-dimentional array of words and their density.
|
177
|
+
Returns a two-dimentional array of words and their density to a precision of two. It accepts a precision argument which defaults to two.
|
178
178
|
|
179
179
|
```ruby
|
180
180
|
counter.word_density
|
@@ -182,17 +182,17 @@ counter.word_density
|
|
182
182
|
[
|
183
183
|
["are", 13.33],
|
184
184
|
["the", 13.33],
|
185
|
-
["but", 6.67],
|
186
|
-
["us", 6.67],
|
187
|
-
["of", 6.67],
|
188
|
-
["some", 6.67],
|
189
|
-
["looking", 6.67],
|
190
|
-
["gutter", 6.67],
|
191
|
-
["at", 6.67],
|
192
|
-
["in", 6.67],
|
193
|
-
["all", 6.67],
|
194
|
-
["stars", 6.67],
|
195
|
-
["we", 6.67]
|
185
|
+
["but", 6.67 ],
|
186
|
+
["us", 6.67 ],
|
187
|
+
["of", 6.67 ],
|
188
|
+
["some", 6.67 ],
|
189
|
+
["looking", 6.67 ],
|
190
|
+
["gutter", 6.67 ],
|
191
|
+
["at", 6.67 ],
|
192
|
+
["in", 6.67 ],
|
193
|
+
["all", 6.67 ],
|
194
|
+
["stars", 6.67 ],
|
195
|
+
["we", 6.67 ]
|
196
196
|
]
|
197
197
|
```
|
198
198
|
|
@@ -204,9 +204,9 @@ Returns the string's character count.
|
|
204
204
|
counter.char_count #=> 76
|
205
205
|
```
|
206
206
|
|
207
|
-
#### `.average_chars_per_word`
|
207
|
+
#### `.average_chars_per_word([ precision = 2 ])`
|
208
208
|
|
209
|
-
Returns the average character count per word.
|
209
|
+
Returns the average character count per word. Accepts a precision argument which defaults to two.
|
210
210
|
|
211
211
|
```ruby
|
212
212
|
counter.average_chars_per_word #=> 4
|
@@ -4,14 +4,18 @@ module WordsCounted
|
|
4
4
|
|
5
5
|
WORD_REGEXP = /[\p{Alpha}\-']+/
|
6
6
|
|
7
|
+
def self.from_file(path, options = {})
|
8
|
+
File.open(path) do |file|
|
9
|
+
new file.read, options
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
7
13
|
def initialize(string, options = {})
|
8
14
|
@options = options
|
9
15
|
exclude = filter_proc(options[:exclude])
|
10
16
|
@words = string.scan(regexp).reject { |word| exclude.call(word) }
|
11
17
|
@char_count = @words.join.size
|
12
|
-
@word_occurrences = words.each_with_object(Hash.new(0))
|
13
|
-
hash[word.downcase] += 1
|
14
|
-
end
|
18
|
+
@word_occurrences = words.each_with_object(Hash.new(0)) { |word, hash| hash[word.downcase] += 1 }
|
15
19
|
@word_lengths = words.each_with_object({}) { |word, hash| hash[word] ||= word.length }
|
16
20
|
end
|
17
21
|
|
@@ -23,8 +27,8 @@ module WordsCounted
|
|
23
27
|
words.uniq.size
|
24
28
|
end
|
25
29
|
|
26
|
-
def average_chars_per_word
|
27
|
-
(char_count / word_count).round(
|
30
|
+
def average_chars_per_word(precision = 2)
|
31
|
+
(char_count.to_f / word_count.to_f).round(precision)
|
28
32
|
end
|
29
33
|
|
30
34
|
def most_occurring_words
|
@@ -35,28 +39,29 @@ module WordsCounted
|
|
35
39
|
highest_ranking word_lengths
|
36
40
|
end
|
37
41
|
|
38
|
-
def word_density
|
39
|
-
word_occurrences.each_with_object({}) do |(word, occ), hash|
|
40
|
-
hash[word] =
|
41
|
-
end
|
42
|
+
def word_density(precision = 2)
|
43
|
+
word_densities = word_occurrences.each_with_object({}) do |(word, occ), hash|
|
44
|
+
hash[word] = (occ.to_f / word_count.to_f * 100).round(precision)
|
45
|
+
end
|
46
|
+
sort_by_descending_value word_densities
|
42
47
|
end
|
43
48
|
|
44
49
|
def sorted_word_occurrences
|
45
|
-
word_occurrences
|
50
|
+
sort_by_descending_value word_occurrences
|
46
51
|
end
|
47
52
|
|
48
53
|
def sorted_word_lengths
|
49
|
-
word_lengths
|
54
|
+
sort_by_descending_value word_lengths
|
50
55
|
end
|
51
56
|
|
52
57
|
private
|
53
58
|
|
54
59
|
def highest_ranking(entries)
|
55
|
-
entries.group_by { |
|
60
|
+
entries.group_by { |_, value| value }.sort.last.last
|
56
61
|
end
|
57
62
|
|
58
|
-
def
|
59
|
-
|
63
|
+
def sort_by_descending_value(entries)
|
64
|
+
entries.sort_by { |_, value| value }.reverse
|
60
65
|
end
|
61
66
|
|
62
67
|
def regexp
|
@@ -74,13 +79,12 @@ module WordsCounted
|
|
74
79
|
->(word) {
|
75
80
|
exclusion_list.include?(word.downcase)
|
76
81
|
}
|
77
|
-
elsif Regexp.try_convert(filter)
|
78
|
-
|
79
|
-
Proc.new { |word| word =~ filter }
|
82
|
+
elsif regexp_filter = Regexp.try_convert(filter)
|
83
|
+
Proc.new { |word| word =~ regexp_filter }
|
80
84
|
elsif filter.respond_to?(:to_proc)
|
81
85
|
filter.to_proc
|
82
86
|
else
|
83
|
-
raise ArgumentError, "Filter must String, Array,
|
87
|
+
raise ArgumentError, "Filter must String, Array, Lambda, or Regexp"
|
84
88
|
end
|
85
89
|
end
|
86
90
|
end
|
data/lib/words_counted.rb
CHANGED
@@ -175,32 +175,42 @@ module WordsCounted
|
|
175
175
|
end
|
176
176
|
|
177
177
|
it "returns words and their density in percent" do
|
178
|
-
counter = Counter.new("His name was
|
179
|
-
expect(counter.word_density).to eq([["major",
|
178
|
+
counter = Counter.new("His name was Major, major Major Major.")
|
179
|
+
expect(counter.word_density).to eq([["major", 57.14], ["was", 14.29], ["name", 14.29], ["his", 14.29]])
|
180
|
+
end
|
181
|
+
|
182
|
+
it "accepts a precision" do
|
183
|
+
counter = Counter.new("His name was Major, major Major Major.")
|
184
|
+
expect(counter.word_density(4)).to eq([["major", 57.1429], ["was", 14.2857], ["name", 14.2857], ["his", 14.2857]])
|
180
185
|
end
|
181
186
|
end
|
182
187
|
|
183
188
|
describe "char_count" do
|
184
189
|
it "returns the number of chars in the passed in string" do
|
185
|
-
counter = Counter.new("His name was
|
186
|
-
expect(counter.char_count).to eq(
|
190
|
+
counter = Counter.new("His name was Major, major Major Major.")
|
191
|
+
expect(counter.char_count).to eq(30)
|
187
192
|
end
|
188
193
|
|
189
194
|
it "returns the number of chars in the passed in string after the filter is applied" do
|
190
|
-
counter = Counter.new("His name was
|
195
|
+
counter = Counter.new("His name was Major, major Major Major.", exclude: "Major")
|
191
196
|
expect(counter.char_count).to eq(10)
|
192
197
|
end
|
193
198
|
end
|
194
199
|
|
195
200
|
describe "average_chars_per_word" do
|
196
201
|
it "returns the average number of chars per word" do
|
197
|
-
counter = Counter.new("His name was major, Major Major Major
|
198
|
-
expect(counter.average_chars_per_word).to eq(4)
|
202
|
+
counter = Counter.new("His name was major, Major Major Major.")
|
203
|
+
expect(counter.average_chars_per_word).to eq(4.29)
|
199
204
|
end
|
200
205
|
|
201
206
|
it "returns the average number of chars per word after the filter is applied" do
|
202
|
-
counter = Counter.new("His name was
|
203
|
-
expect(counter.average_chars_per_word).to eq(3)
|
207
|
+
counter = Counter.new("His name was Major, Major Major Major.", exclude: "Major")
|
208
|
+
expect(counter.average_chars_per_word).to eq(3.33)
|
209
|
+
end
|
210
|
+
|
211
|
+
it "accepts precision" do
|
212
|
+
counter = Counter.new("This line should have 39 characters minus spaces.")
|
213
|
+
expect(counter.average_chars_per_word(4)).to eq(5.5714)
|
204
214
|
end
|
205
215
|
end
|
206
216
|
|
data/words_counted.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.version = WordsCounted::VERSION
|
10
10
|
spec.authors = ["Mohamad El-Husseini"]
|
11
11
|
spec.email = ["husseini.mel@gmail.com"]
|
12
|
-
spec.description = %q{A Ruby word counter with helpful utility methods.}
|
12
|
+
spec.description = %q{A Ruby word counter and string analyser with helpful utility methods.}
|
13
13
|
spec.summary = %q{See README.}
|
14
14
|
spec.homepage = "https://github.com/abitdodgy/words_counted"
|
15
15
|
spec.license = "MIT"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: words_counted
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mohamad El-Husseini
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
- - '>='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
description: A Ruby word counter with helpful utility methods.
|
69
|
+
description: A Ruby word counter and string analyser with helpful utility methods.
|
70
70
|
email:
|
71
71
|
- husseini.mel@gmail.com
|
72
72
|
executables: []
|