words_counted 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/lib/words_counted/counter.rb +5 -3
- data/lib/words_counted/version.rb +1 -1
- data/spec/words_counted/counter_spec.rb +5 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b9b07e20d2f4adda71cca50cab03b13b8ed7655e
|
4
|
+
data.tar.gz: f960b67f29488004565aaea9ca2fae20d01b17fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a5b7d91f4d8d90956d82b9de7f0c96fd27b1db3fbc284e4de3506c8b9a250c91c34f5156ea6bcfde21a58cb044795d560ffc9f10d80f8ad96a0df5487bc1696
|
7
|
+
data.tar.gz: 99403fb14b085a7b2fa4b3db80e3510d4d8ee9a6e63a8b398af1b1c0243f6c04f1ba27000297b7d3c10d9904736569240b9b935c04385177bc93f072ddb336eb
|
data/README.md
CHANGED
@@ -176,13 +176,13 @@ counter.words
|
|
176
176
|
Defining words is tricky business. Out of the box, the default regexp accounts for letters, hyphenated words, and apostrophes. This means `twenty-one` is treated as one word. So is `Mohamad's`.
|
177
177
|
|
178
178
|
```ruby
|
179
|
-
/[
|
179
|
+
/[\p{Alpha}\-']+/
|
180
180
|
```
|
181
181
|
|
182
182
|
If you prefer, you can pass in your own criteria in the form of a Ruby regexp to split your string as desired. For example, if you wanted to count numbers as words, you could pass the following regex instead of the default one.
|
183
183
|
|
184
184
|
```ruby
|
185
|
-
counter = WordsCounted::Counter.new("I am 007.", regex: /[
|
185
|
+
counter = WordsCounted::Counter.new("I am 007.", regex: /[\p{Alnum}\-']+/)
|
186
186
|
counter.words
|
187
187
|
#=> ["I", "am", "007"]
|
188
188
|
```
|
@@ -244,7 +244,7 @@ end
|
|
244
244
|
|
245
245
|
#### Isn't it better to write this in JavaScript?
|
246
246
|
|
247
|
-

|
248
248
|
|
249
249
|
## About
|
250
250
|
|
@@ -12,7 +12,7 @@ module WordsCounted
|
|
12
12
|
#
|
13
13
|
# Words are alpha characters and can include hyphens and apostrophes.
|
14
14
|
#
|
15
|
-
WORD_REGEX = /[
|
15
|
+
WORD_REGEX = /[\p{Alpha}\-']+/
|
16
16
|
|
17
17
|
# Initializes an instance of Counter and splits a given string into an array of words.
|
18
18
|
#
|
@@ -56,7 +56,7 @@ module WordsCounted
|
|
56
56
|
def initialize(string, options = {})
|
57
57
|
@options = options
|
58
58
|
|
59
|
-
@words = string.
|
59
|
+
@words = string.scan(regex).reject { |word| filter.split.include? word.downcase }
|
60
60
|
|
61
61
|
@word_occurrences = words.each_with_object(Hash.new(0)) do |word, result|
|
62
62
|
result[word.downcase] += 1
|
@@ -100,7 +100,9 @@ module WordsCounted
|
|
100
100
|
# @returns [Hash] a hash map of words as keys and their density as values in percent.
|
101
101
|
#
|
102
102
|
def word_density
|
103
|
-
word_occurrences.each_with_object({})
|
103
|
+
word_occurrences.each_with_object({}) do |(word, occ), hash|
|
104
|
+
hash[word] = percent_of_n(occ)
|
105
|
+
end.sort_by { |_, v| v }.reverse
|
104
106
|
end
|
105
107
|
|
106
108
|
private
|
@@ -5,6 +5,10 @@ module WordsCounted
|
|
5
5
|
let(:counter) { Counter.new("We are all in the gutter, but some of us are looking at the stars.") }
|
6
6
|
|
7
7
|
describe "#initialize" do
|
8
|
+
it "sets @options" do
|
9
|
+
expect(counter.instance_variables).to include(:@options)
|
10
|
+
end
|
11
|
+
|
8
12
|
it "sets @words" do
|
9
13
|
expect(counter.instance_variables).to include(:@words)
|
10
14
|
end
|
@@ -48,7 +52,7 @@ module WordsCounted
|
|
48
52
|
end
|
49
53
|
|
50
54
|
it "splits words based on regex" do
|
51
|
-
counter = Counter.new("I am 007.", regex: /[
|
55
|
+
counter = Counter.new("I am 007.", regex: /[\p{Alnum}\-']+/)
|
52
56
|
expect(counter.words).to eq(["I", "am", "007"])
|
53
57
|
end
|
54
58
|
end
|