words_counted 0.1.5 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.hound.yml +2 -0
- data/.ruby-style.yml +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/.yardopts +3 -2
- data/CHANGELOG.md +29 -0
- data/README.md +146 -189
- data/lib/refinements/hash_refinements.rb +14 -0
- data/lib/words_counted/counter.rb +113 -72
- data/lib/words_counted/deprecated.rb +78 -0
- data/lib/words_counted/tokeniser.rb +163 -0
- data/lib/words_counted/version.rb +1 -1
- data/lib/words_counted.rb +31 -4
- data/spec/words_counted/counter_spec.rb +49 -204
- data/spec/words_counted/deprecated_spec.rb +99 -0
- data/spec/words_counted/tokeniser_spec.rb +133 -0
- data/spec/words_counted_spec.rb +34 -0
- data/words_counted.gemspec +2 -2
- metadata +25 -12
@@ -3,240 +3,85 @@ require_relative "../spec_helper"
|
|
3
3
|
|
4
4
|
module WordsCounted
|
5
5
|
describe Counter do
|
6
|
-
let(:counter)
|
7
|
-
|
8
|
-
|
9
|
-
it "sets @options" do
|
10
|
-
expect(counter.instance_variables).to include(:@options)
|
11
|
-
end
|
12
|
-
|
13
|
-
it "sets @char_count" do
|
14
|
-
expect(counter.instance_variables).to include(:@char_count)
|
15
|
-
end
|
16
|
-
|
17
|
-
it "sets @words" do
|
18
|
-
expect(counter.instance_variables).to include(:@words)
|
19
|
-
end
|
20
|
-
|
21
|
-
it "sets @word_occurrences" do
|
22
|
-
expect(counter.instance_variables).to include(:@word_occurrences)
|
23
|
-
end
|
24
|
-
|
25
|
-
it "sets @word_lengths" do
|
26
|
-
expect(counter.instance_variables).to include(:@word_lengths)
|
27
|
-
end
|
6
|
+
let(:counter) do
|
7
|
+
tokens = WordsCounted::Tokeniser.new("one three three three woot woot").tokenise
|
8
|
+
Counter.new(tokens)
|
28
9
|
end
|
29
10
|
|
30
|
-
describe "
|
31
|
-
it "
|
32
|
-
expect(counter.
|
33
|
-
end
|
34
|
-
|
35
|
-
it "splits words" do
|
36
|
-
expect(counter.words).to eq(%w[we are all in the gutter but some of us are looking at the stars])
|
37
|
-
end
|
38
|
-
|
39
|
-
it "removes special characters" do
|
40
|
-
counter = Counter.new("Hello! # $ % 12345 * & % How do you do?")
|
41
|
-
expect(counter.words).to eq(%w[hello how do you do])
|
42
|
-
end
|
43
|
-
|
44
|
-
it "counts hyphenated words as one" do
|
45
|
-
counter = Counter.new("I am twenty-two.")
|
46
|
-
expect(counter.words).to eq(%w[i am twenty-two])
|
47
|
-
end
|
48
|
-
|
49
|
-
it "does not split words on apostrophe" do
|
50
|
-
counter = Counter.new("Bust 'em! Them be Jim's bastards'.")
|
51
|
-
expect(counter.words).to eq(%w[bust 'em them be jim's bastards'])
|
52
|
-
end
|
53
|
-
|
54
|
-
it "does not split on unicode chars" do
|
55
|
-
counter = Counter.new("São Paulo")
|
56
|
-
expect(counter.words).to eq(%w[são paulo])
|
57
|
-
end
|
58
|
-
|
59
|
-
it "it accepts a string filter" do
|
60
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: "magnificent")
|
61
|
-
expect(counter.words).to eq(%w[that was trevor])
|
62
|
-
end
|
63
|
-
|
64
|
-
it "it accepts a string filter with multiple words" do
|
65
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: "was magnificent")
|
66
|
-
expect(counter.words).to eq(%w[that trevor])
|
67
|
-
end
|
68
|
-
|
69
|
-
it "filters words in uppercase when using a string filter" do
|
70
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: "Magnificent")
|
71
|
-
expect(counter.words).to eq(%w[that was trevor])
|
72
|
-
end
|
73
|
-
|
74
|
-
it "accepts a regexp filter" do
|
75
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: /magnificent/i)
|
76
|
-
expect(counter.words).to eq(%w[that was trevor])
|
77
|
-
end
|
78
|
-
|
79
|
-
it "accepts an array filter" do
|
80
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: ['That', 'was'])
|
81
|
-
expect(counter.words).to eq(%w[magnificent trevor])
|
82
|
-
end
|
83
|
-
|
84
|
-
it "accepts a lambda filter" do
|
85
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: ->(w) { w == 'that' })
|
86
|
-
expect(counter.words).to eq(%w[was magnificent trevor])
|
87
|
-
end
|
88
|
-
|
89
|
-
it "accepts a custom regexp" do
|
90
|
-
counter = Counter.new("I am 007.", regexp: /[\p{Alnum}\-']+/)
|
91
|
-
expect(counter.words).to eq(["i", "am", "007"])
|
92
|
-
end
|
93
|
-
|
94
|
-
it "char_count should be calculated after the filter is applied" do
|
95
|
-
counter = Counter.new("I am Legend.", exclude: "I am")
|
96
|
-
expect(counter.char_count).to eq(6)
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
describe "word_count" do
|
101
|
-
it "returns the correct word count" do
|
102
|
-
expect(counter.word_count).to eq(15)
|
11
|
+
describe "initialize" do
|
12
|
+
it "sets @tokens" do
|
13
|
+
expect(counter.instance_variables).to include(:@tokens)
|
103
14
|
end
|
104
15
|
end
|
105
16
|
|
106
|
-
describe "
|
107
|
-
it "returns
|
108
|
-
expect(counter.
|
109
|
-
end
|
110
|
-
|
111
|
-
it "treats capitalized words as the same word" do
|
112
|
-
counter = Counter.new("Bad, bad, piggy!")
|
113
|
-
expect(counter.word_occurrences).to eq({ "bad" => 2, "piggy" => 1 })
|
17
|
+
describe "#token_count" do
|
18
|
+
it "returns the correct number of tokens" do
|
19
|
+
expect(counter.token_count).to eq(6)
|
114
20
|
end
|
115
21
|
end
|
116
22
|
|
117
|
-
describe "
|
118
|
-
it "returns
|
119
|
-
expect(counter.
|
120
|
-
end
|
121
|
-
|
122
|
-
it "returns a two dimensional array sorted by descending word occurrence" do
|
123
|
-
counter = Counter.new("Blue, green, green, green, orange, green, orange, red, orange, red")
|
124
|
-
expect(counter.sorted_word_occurrences).to eq([ ["green", 4], ["orange", 3], ["red", 2], ["blue", 1] ])
|
23
|
+
describe "#uniq_token_count" do
|
24
|
+
it "returns the number of unique token" do
|
25
|
+
expect(counter.uniq_token_count).to eq(3)
|
125
26
|
end
|
126
27
|
end
|
127
28
|
|
128
|
-
describe "
|
129
|
-
it "returns
|
130
|
-
expect(counter.
|
131
|
-
end
|
132
|
-
|
133
|
-
it "returns highest occuring words" do
|
134
|
-
counter = Counter.new("Orange orange Apple apple banana")
|
135
|
-
expect(counter.most_occurring_words).to eq([["orange", 2],["apple", 2]])
|
29
|
+
describe "#char_count" do
|
30
|
+
it "returns the correct number of chars" do
|
31
|
+
expect(counter.char_count).to eq(26)
|
136
32
|
end
|
137
33
|
end
|
138
34
|
|
139
|
-
describe
|
140
|
-
it "returns a
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
counter = Counter.new("One two three.")
|
146
|
-
expect(counter.word_lengths).to eq({ "one" => 3, "two" => 3, "three" => 5 })
|
35
|
+
describe "#token_frequency" do
|
36
|
+
it "returns a two-dimensional array where each member array is a token and its frequency in descending order" do
|
37
|
+
expected = [
|
38
|
+
['three', 3], ['woot', 2], ['one', 1]
|
39
|
+
]
|
40
|
+
expect(counter.token_frequency).to eq(expected)
|
147
41
|
end
|
148
42
|
end
|
149
43
|
|
150
|
-
describe "
|
151
|
-
it "returns
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
counter = Counter.new("I am not certain of that")
|
157
|
-
expect(counter.sorted_word_lengths).to eq([ ["certain", 7], ["that", 4], ["not", 3], ["of", 2], ["am", 2], ["i", 1] ])
|
44
|
+
describe "#token_lengths" do
|
45
|
+
it "returns a two-dimensional array where each member array is a token and its length in descending order" do
|
46
|
+
expected = [
|
47
|
+
['three', 5], ['woot', 4], ['one', 3]
|
48
|
+
]
|
49
|
+
expect(counter.token_lengths).to eq(expected)
|
158
50
|
end
|
159
51
|
end
|
160
52
|
|
161
|
-
describe "
|
162
|
-
it "returns
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
counter = Counter.new("Those whom the gods love grow young.")
|
168
|
-
expect(counter.longest_words).to eq([["those", 5],["young", 5]])
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
describe "word_density" do
|
173
|
-
it "returns an array" do
|
174
|
-
expect(counter.word_density).to be_a(Array)
|
175
|
-
end
|
176
|
-
|
177
|
-
it "returns words and their density in percent" do
|
178
|
-
counter = Counter.new("His name was Major, major Major Major.")
|
179
|
-
expect(counter.word_density).to eq([["major", 57.14], ["was", 14.29], ["name", 14.29], ["his", 14.29]])
|
53
|
+
describe "#token_density" do
|
54
|
+
it "returns a two-dimensional array where each member array is a token and its density in descending order" do
|
55
|
+
expected = [
|
56
|
+
['three', 0.5], ['woot', 0.33], ['one', 0.17]
|
57
|
+
]
|
58
|
+
expect(counter.token_density).to eq(expected)
|
180
59
|
end
|
181
60
|
|
182
61
|
it "accepts a precision" do
|
183
|
-
|
184
|
-
|
62
|
+
expected = [
|
63
|
+
['three', 0.5], ['woot', 0.3333], ['one', 0.1667]
|
64
|
+
]
|
65
|
+
expect(counter.token_density(precision: 4)).to eq(expected)
|
185
66
|
end
|
186
67
|
end
|
187
68
|
|
188
|
-
describe "
|
189
|
-
it "returns
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
it "returns the number of chars in the passed in string after the filter is applied" do
|
195
|
-
counter = Counter.new("His name was Major, major Major Major.", exclude: "Major")
|
196
|
-
expect(counter.char_count).to eq(10)
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
describe "average_chars_per_word" do
|
201
|
-
it "returns the average number of chars per word" do
|
202
|
-
counter = Counter.new("His name was major, Major Major Major.")
|
203
|
-
expect(counter.average_chars_per_word).to eq(4.29)
|
204
|
-
end
|
205
|
-
|
206
|
-
it "returns the average number of chars per word after the filter is applied" do
|
207
|
-
counter = Counter.new("His name was Major, Major Major Major.", exclude: "Major")
|
208
|
-
expect(counter.average_chars_per_word).to eq(3.33)
|
209
|
-
end
|
210
|
-
|
211
|
-
it "accepts precision" do
|
212
|
-
counter = Counter.new("This line should have 39 characters minus spaces.")
|
213
|
-
expect(counter.average_chars_per_word(4)).to eq(5.5714)
|
69
|
+
describe "#most_frequent_tokens" do
|
70
|
+
it "returns a hash of the tokens with the highest frequency, where each key a token, and each value is its frequency" do
|
71
|
+
expected = {
|
72
|
+
'three' => 3
|
73
|
+
}
|
74
|
+
expect(counter.most_frequent_tokens).to eq(expected)
|
214
75
|
end
|
215
76
|
end
|
216
77
|
|
217
|
-
describe "
|
218
|
-
it "returns
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
counter = Counter.new("Up down. Down up.")
|
224
|
-
expect(counter.unique_word_count).to eq(2)
|
78
|
+
describe "#longest_tokens" do
|
79
|
+
it "returns a hash of the tokens with the highest length, where each key a token, and each value is its length" do
|
80
|
+
expected = {
|
81
|
+
'three' => 5
|
82
|
+
}
|
83
|
+
expect(counter.longest_tokens).to eq(expected)
|
225
84
|
end
|
226
85
|
end
|
227
86
|
end
|
228
|
-
|
229
|
-
describe "count" do
|
230
|
-
it "returns count for a single word" do
|
231
|
-
counter = Counter.new("I am so clever that sometimes I don't understand a single word of what I am saying.")
|
232
|
-
expect(counter.count("i")).to eq(3)
|
233
|
-
end
|
234
|
-
end
|
235
|
-
|
236
|
-
describe "from_file" do
|
237
|
-
it "opens and reads a text file" do
|
238
|
-
counter = WordsCounted.from_file('spec/support/the_hart_and_the_hunter.txt')
|
239
|
-
expect(counter.word_count).to eq(139)
|
240
|
-
end
|
241
|
-
end
|
242
87
|
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require_relative "../spec_helper"
|
3
|
+
|
4
|
+
module WordsCounted
|
5
|
+
warn "Methods being tested are deprecated"
|
6
|
+
|
7
|
+
describe Counter do
|
8
|
+
let(:counter) do
|
9
|
+
tokens = WordsCounted::Tokeniser.new("one three three three woot woot").tokenise
|
10
|
+
Counter.new(tokens)
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#word_density" do
|
14
|
+
it "returns words and their density in percent" do
|
15
|
+
expected = [
|
16
|
+
['three', 50.0], ['woot', 33.33], ['one', 16.67]
|
17
|
+
]
|
18
|
+
expect(counter.word_density).to eq(expected)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "accepts a precision" do
|
22
|
+
expected = [
|
23
|
+
['three', 50.0], ['woot', 33.3333], ['one', 16.6667]
|
24
|
+
]
|
25
|
+
expect(counter.word_density(4)).to eq(expected)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe "#word_occurrences" do
|
30
|
+
it "returns a two dimensional array sorted by descending word occurrence" do
|
31
|
+
expected = {
|
32
|
+
'three' => 3, 'woot' => 2, 'one' => 1
|
33
|
+
}
|
34
|
+
expect(counter.word_occurrences).to eq(expected)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
describe "#sorted_word_occurrences" do
|
39
|
+
it "returns a two dimensional array sorted by descending word occurrence" do
|
40
|
+
expected = [
|
41
|
+
['three', 3], ['woot', 2], ['one', 1]
|
42
|
+
]
|
43
|
+
expect(counter.sorted_word_occurrences).to eq(expected)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "#word_lengths" do
|
48
|
+
it "returns a hash of of words and their length sorted descending by length" do
|
49
|
+
expected = {
|
50
|
+
'three' => 5, 'woot' => 4, 'one' => 3
|
51
|
+
}
|
52
|
+
expect(counter.word_lengths).to eq(expected)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "#sorted_word_lengths" do
|
57
|
+
it "returns a two dimensional array sorted by descending word length" do
|
58
|
+
expected = [
|
59
|
+
['three', 5], ['woot', 4], ['one', 3]
|
60
|
+
]
|
61
|
+
expect(counter.sorted_word_lengths).to eq(expected)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "#longest_words" do
|
66
|
+
it "returns a two-dimentional array of the longest words and their lengths" do
|
67
|
+
expected = [
|
68
|
+
['three', 5]
|
69
|
+
]
|
70
|
+
expect(counter.longest_words).to eq(expected)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
describe "#most_occurring_words" do
|
75
|
+
it "returns a two-dimentional array of words with the highest frequency and their frequencies" do
|
76
|
+
expected = [
|
77
|
+
['three', 3]
|
78
|
+
]
|
79
|
+
expect(counter.most_occurring_words).to eq(expected)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe "#average_chars_per_word" do
|
84
|
+
it "returns the average number of chars per word" do
|
85
|
+
expect(counter.average_chars_per_word).to eq(4.33)
|
86
|
+
end
|
87
|
+
|
88
|
+
it "accepts precision" do
|
89
|
+
expect(counter.average_chars_per_word(4)).to eq(4.3333)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "#count" do
|
94
|
+
it "returns count for a single word" do
|
95
|
+
expect(counter.count('one')).to eq(1)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require_relative "../spec_helper"
|
3
|
+
|
4
|
+
module WordsCounted
|
5
|
+
describe Tokeniser do
|
6
|
+
describe "initialize" do
|
7
|
+
it "sets @input" do
|
8
|
+
tokeniser = Tokeniser.new("Hello World!")
|
9
|
+
expect(tokeniser.instance_variables).to include(:@input)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#tokenise" do
|
14
|
+
it "normalises tokens and returns an array" do
|
15
|
+
tokens = Tokeniser.new("Hello HELLO").tokenise
|
16
|
+
expect(tokens).to eq(%w[hello hello])
|
17
|
+
end
|
18
|
+
|
19
|
+
context "without arguments" do
|
20
|
+
it "removes none alpha-numeric chars" do
|
21
|
+
tokens = Tokeniser.new("Hello world! # $ % 12345 * & % ?").tokenise
|
22
|
+
expect(tokens).to eq(%w[hello world])
|
23
|
+
end
|
24
|
+
|
25
|
+
it "does not split on hyphens" do
|
26
|
+
tokens = Tokeniser.new("I am twenty-two.").tokenise
|
27
|
+
expect(tokens).to eq(%w[i am twenty-two])
|
28
|
+
end
|
29
|
+
|
30
|
+
it "does not split on apostrophe" do
|
31
|
+
tokens = Tokeniser.new("Bust 'em! It's Jim's gang.").tokenise
|
32
|
+
expect(tokens).to eq(%w[bust 'em it's jim's gang])
|
33
|
+
end
|
34
|
+
|
35
|
+
it "does not split on unicode chars" do
|
36
|
+
tokens = Tokeniser.new("Bayrūt").tokenise
|
37
|
+
expect(tokens).to eq(%w[bayrūt])
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "with `pattern` options" do
|
42
|
+
it "splits on accepts a custom pattern" do
|
43
|
+
tokens = Tokeniser.new("We-Are-ALL").tokenise(pattern: /[^-]+/)
|
44
|
+
expect(tokens).to eq(%w[we are all])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
context "with `exclude` option" do
|
49
|
+
context "as a string" do
|
50
|
+
let(:tokeniser) { Tokeniser.new("That was magnificent, Trevor.") }
|
51
|
+
|
52
|
+
it "it accepts a string filter" do
|
53
|
+
tokens = tokeniser.tokenise(exclude: "magnificent")
|
54
|
+
expect(tokens).to eq(%w[that was trevor])
|
55
|
+
end
|
56
|
+
|
57
|
+
it "accepts a string filter with multiple space-delimited tokens" do
|
58
|
+
tokens = tokeniser.tokenise(exclude: "was magnificent")
|
59
|
+
expect(tokens).to eq(%w[that trevor])
|
60
|
+
end
|
61
|
+
|
62
|
+
it "normalises string filter" do
|
63
|
+
tokens = tokeniser.tokenise(exclude: "MAGNIFICENT")
|
64
|
+
expect(tokens).to eq(%w[that was trevor])
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
context "as a regular expression" do
|
69
|
+
it "filters on match" do
|
70
|
+
tokeniser = Tokeniser.new("That was magnificent, Trevor.")
|
71
|
+
tokens = tokeniser.tokenise(exclude: /magnificent/i)
|
72
|
+
expect(tokens).to eq(%w[that was trevor])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context "as a lambda" do
|
77
|
+
it "calls lambda" do
|
78
|
+
tokeniser = Tokeniser.new("That was magnificent, Trevor.")
|
79
|
+
tokens = tokeniser.tokenise(exclude: ->(token) { token.length < 5 })
|
80
|
+
expect(tokens).to eq(%w[magnificent trevor])
|
81
|
+
end
|
82
|
+
|
83
|
+
it "accepts a symbol for shorthand notation" do
|
84
|
+
tokeniser = Tokeniser.new("That was magnificent, محمد.}")
|
85
|
+
tokens = tokeniser.tokenise(exclude: :ascii_only?)
|
86
|
+
expect(tokens).to eq(%w[محمد])
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
context "as an array" do
|
91
|
+
let(:tokeniser) { Tokeniser.new("That was magnificent, Trevor.") }
|
92
|
+
|
93
|
+
it "accepts an array of strings" do
|
94
|
+
tokens = tokeniser.tokenise(exclude: ["magnificent"])
|
95
|
+
expect(tokens).to eq(%w[that was trevor])
|
96
|
+
end
|
97
|
+
|
98
|
+
it "accepts an array regular expressions" do
|
99
|
+
tokens = tokeniser.tokenise(exclude: [/that/, /was/])
|
100
|
+
expect(tokens).to eq(%w[magnificent trevor])
|
101
|
+
end
|
102
|
+
|
103
|
+
it "accepts an array of lambdas" do
|
104
|
+
filters = [
|
105
|
+
->(token) { token.length < 4 },
|
106
|
+
->(token) { token.length > 6 }
|
107
|
+
]
|
108
|
+
tokens = tokeniser.tokenise(exclude: filters)
|
109
|
+
expect(tokens).to eq(%w[that trevor])
|
110
|
+
end
|
111
|
+
|
112
|
+
it "accepts a mixed array" do
|
113
|
+
filters = [
|
114
|
+
"that",
|
115
|
+
->(token) { token.length < 4 },
|
116
|
+
/magnificent/
|
117
|
+
]
|
118
|
+
tokens = tokeniser.tokenise(exclude: filters)
|
119
|
+
expect(tokens).to eq(["trevor"])
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
context "with an invalid filter" do
|
124
|
+
it "raises an `ArgumentError`" do
|
125
|
+
expect {
|
126
|
+
Tokeniser.new("Hello world!").tokenise(exclude: 1)
|
127
|
+
}.to raise_error(ArgumentError)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require_relative "spec_helper"
|
3
|
+
|
4
|
+
describe WordsCounted do
|
5
|
+
describe ".from_file" do
|
6
|
+
let(:file_path) { "spec/support/the_hart_and_the_hunter.txt" }
|
7
|
+
|
8
|
+
it "opens and reads a text file" do
|
9
|
+
counter = WordsCounted.from_file(file_path)
|
10
|
+
expect(counter.token_count).to eq(139)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "opens and reads a text file with options" do
|
14
|
+
counter = WordsCounted.from_file(file_path, exclude: "hunter")
|
15
|
+
expect(counter.token_count).to eq(135)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe ".count" do
|
20
|
+
let(:string) do
|
21
|
+
"We are all in the gutter, but some of us are looking at the stars."
|
22
|
+
end
|
23
|
+
|
24
|
+
it "returns a counter instance with given input as tokens" do
|
25
|
+
counter = WordsCounted.count(string)
|
26
|
+
expect(counter.token_count).to eq(15)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "returns a counter instance with given input and options" do
|
30
|
+
counter = WordsCounted.count(string, exclude: "the gutter")
|
31
|
+
expect(counter.token_count).to eq(12)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/words_counted.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.version = WordsCounted::VERSION
|
10
10
|
spec.authors = ["Mohamad El-Husseini"]
|
11
11
|
spec.email = ["husseini.mel@gmail.com"]
|
12
|
-
spec.description = %q{A Ruby
|
12
|
+
spec.description = %q{A Ruby natural language processor to extract stats from text, such was word count and more.}
|
13
13
|
spec.summary = %q{See README.}
|
14
14
|
spec.homepage = "https://github.com/abitdodgy/words_counted"
|
15
15
|
spec.license = "MIT"
|
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
20
|
spec.require_paths = ["lib"]
|
21
21
|
|
22
|
-
spec.add_development_dependency "bundler"
|
22
|
+
spec.add_development_dependency "bundler"
|
23
23
|
spec.add_development_dependency "rake"
|
24
24
|
spec.add_development_dependency "rspec"
|
25
25
|
spec.add_development_dependency "pry"
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: words_counted
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mohamad El-Husseini
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-10-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,7 +66,8 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
description: A Ruby
|
69
|
+
description: A Ruby natural language processor to extract stats from text, such was
|
70
|
+
word count and more.
|
70
71
|
email:
|
71
72
|
- husseini.mel@gmail.com
|
72
73
|
executables: []
|
@@ -74,25 +75,35 @@ extensions: []
|
|
74
75
|
extra_rdoc_files: []
|
75
76
|
files:
|
76
77
|
- ".gitignore"
|
78
|
+
- ".hound.yml"
|
77
79
|
- ".rspec"
|
80
|
+
- ".ruby-style.yml"
|
81
|
+
- ".ruby-version"
|
82
|
+
- ".travis.yml"
|
78
83
|
- ".yardopts"
|
79
84
|
- CHANGELOG.md
|
80
85
|
- Gemfile
|
81
86
|
- LICENSE.txt
|
82
87
|
- README.md
|
83
88
|
- Rakefile
|
89
|
+
- lib/refinements/hash_refinements.rb
|
84
90
|
- lib/words_counted.rb
|
85
91
|
- lib/words_counted/counter.rb
|
92
|
+
- lib/words_counted/deprecated.rb
|
93
|
+
- lib/words_counted/tokeniser.rb
|
86
94
|
- lib/words_counted/version.rb
|
87
95
|
- spec/spec_helper.rb
|
88
96
|
- spec/support/the_hart_and_the_hunter.txt
|
89
97
|
- spec/words_counted/counter_spec.rb
|
98
|
+
- spec/words_counted/deprecated_spec.rb
|
99
|
+
- spec/words_counted/tokeniser_spec.rb
|
100
|
+
- spec/words_counted_spec.rb
|
90
101
|
- words_counted.gemspec
|
91
102
|
homepage: https://github.com/abitdodgy/words_counted
|
92
103
|
licenses:
|
93
104
|
- MIT
|
94
105
|
metadata: {}
|
95
|
-
post_install_message:
|
106
|
+
post_install_message:
|
96
107
|
rdoc_options: []
|
97
108
|
require_paths:
|
98
109
|
- lib
|
@@ -107,12 +118,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
118
|
- !ruby/object:Gem::Version
|
108
119
|
version: '0'
|
109
120
|
requirements: []
|
110
|
-
|
111
|
-
|
112
|
-
signing_key:
|
121
|
+
rubygems_version: 3.2.15
|
122
|
+
signing_key:
|
113
123
|
specification_version: 4
|
114
124
|
summary: See README.
|
115
125
|
test_files:
|
116
126
|
- spec/spec_helper.rb
|
117
127
|
- spec/support/the_hart_and_the_hunter.txt
|
118
128
|
- spec/words_counted/counter_spec.rb
|
129
|
+
- spec/words_counted/deprecated_spec.rb
|
130
|
+
- spec/words_counted/tokeniser_spec.rb
|
131
|
+
- spec/words_counted_spec.rb
|