words_counted 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/lib/words_counted/counter.rb +5 -3
- data/lib/words_counted/version.rb +1 -1
- data/spec/words_counted/counter_spec.rb +5 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b9b07e20d2f4adda71cca50cab03b13b8ed7655e
|
4
|
+
data.tar.gz: f960b67f29488004565aaea9ca2fae20d01b17fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a5b7d91f4d8d90956d82b9de7f0c96fd27b1db3fbc284e4de3506c8b9a250c91c34f5156ea6bcfde21a58cb044795d560ffc9f10d80f8ad96a0df5487bc1696
|
7
|
+
data.tar.gz: 99403fb14b085a7b2fa4b3db80e3510d4d8ee9a6e63a8b398af1b1c0243f6c04f1ba27000297b7d3c10d9904736569240b9b935c04385177bc93f072ddb336eb
|
data/README.md
CHANGED
@@ -176,13 +176,13 @@ counter.words
|
|
176
176
|
Defining words is tricky business. Out of the box, the default regexp accounts for letters, hyphenated words, and apostrophes. This means `twenty-one` is treated as one word. So is `Mohamad's`.
|
177
177
|
|
178
178
|
```ruby
|
179
|
-
/[
|
179
|
+
/[\p{Alpha}\-']+/
|
180
180
|
```
|
181
181
|
|
182
182
|
If you prefer, you can pass in your own criteria in the form of a Ruby regexp to split your string as desired. For example, if you wanted to count numbers as words, you could pass the following regex instead of the default one.
|
183
183
|
|
184
184
|
```ruby
|
185
|
-
counter = WordsCounted::Counter.new("I am 007.", regex: /[
|
185
|
+
counter = WordsCounted::Counter.new("I am 007.", regex: /[\p{Alnum}\-']+/)
|
186
186
|
counter.words
|
187
187
|
#=> ["I", "am", "007"]
|
188
188
|
```
|
@@ -244,7 +244,7 @@ end
|
|
244
244
|
|
245
245
|
#### Isn't it better to write this in JavaScript?
|
246
246
|
|
247
|
-
![Picard face
|
247
|
+
![Picard face-palm](http://stream1.gifsoup.com/view3/1290449/picard-facepalm-o.gif "Picard face-palm")
|
248
248
|
|
249
249
|
## About
|
250
250
|
|
@@ -12,7 +12,7 @@ module WordsCounted
|
|
12
12
|
#
|
13
13
|
# Words are alpha characters and can include hyphens and apostrophes.
|
14
14
|
#
|
15
|
-
WORD_REGEX = /[
|
15
|
+
WORD_REGEX = /[\p{Alpha}\-']+/
|
16
16
|
|
17
17
|
# Initializes an instance of Counter and splits a given string into an array of words.
|
18
18
|
#
|
@@ -56,7 +56,7 @@ module WordsCounted
|
|
56
56
|
def initialize(string, options = {})
|
57
57
|
@options = options
|
58
58
|
|
59
|
-
@words = string.
|
59
|
+
@words = string.scan(regex).reject { |word| filter.split.include? word.downcase }
|
60
60
|
|
61
61
|
@word_occurrences = words.each_with_object(Hash.new(0)) do |word, result|
|
62
62
|
result[word.downcase] += 1
|
@@ -100,7 +100,9 @@ module WordsCounted
|
|
100
100
|
# @returns [Hash] a hash map of words as keys and their density as values in percent.
|
101
101
|
#
|
102
102
|
def word_density
|
103
|
-
word_occurrences.each_with_object({})
|
103
|
+
word_occurrences.each_with_object({}) do |(word, occ), hash|
|
104
|
+
hash[word] = percent_of_n(occ)
|
105
|
+
end.sort_by { |_, v| v }.reverse
|
104
106
|
end
|
105
107
|
|
106
108
|
private
|
@@ -5,6 +5,10 @@ module WordsCounted
|
|
5
5
|
let(:counter) { Counter.new("We are all in the gutter, but some of us are looking at the stars.") }
|
6
6
|
|
7
7
|
describe "#initialize" do
|
8
|
+
it "sets @options" do
|
9
|
+
expect(counter.instance_variables).to include(:@options)
|
10
|
+
end
|
11
|
+
|
8
12
|
it "sets @words" do
|
9
13
|
expect(counter.instance_variables).to include(:@words)
|
10
14
|
end
|
@@ -48,7 +52,7 @@ module WordsCounted
|
|
48
52
|
end
|
49
53
|
|
50
54
|
it "splits words based on regex" do
|
51
|
-
counter = Counter.new("I am 007.", regex: /[
|
55
|
+
counter = Counter.new("I am 007.", regex: /[\p{Alnum}\-']+/)
|
52
56
|
expect(counter.words).to eq(["I", "am", "007"])
|
53
57
|
end
|
54
58
|
end
|