words_counted 0.1.5 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.hound.yml +2 -0
- data/.ruby-style.yml +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/.yardopts +3 -2
- data/CHANGELOG.md +29 -0
- data/README.md +146 -189
- data/lib/refinements/hash_refinements.rb +14 -0
- data/lib/words_counted/counter.rb +113 -72
- data/lib/words_counted/deprecated.rb +78 -0
- data/lib/words_counted/tokeniser.rb +163 -0
- data/lib/words_counted/version.rb +1 -1
- data/lib/words_counted.rb +31 -4
- data/spec/words_counted/counter_spec.rb +49 -204
- data/spec/words_counted/deprecated_spec.rb +99 -0
- data/spec/words_counted/tokeniser_spec.rb +133 -0
- data/spec/words_counted_spec.rb +34 -0
- data/words_counted.gemspec +2 -2
- metadata +25 -12
@@ -3,240 +3,85 @@ require_relative "../spec_helper"
|
|
3
3
|
|
4
4
|
module WordsCounted
|
5
5
|
describe Counter do
|
6
|
-
let(:counter)
|
7
|
-
|
8
|
-
|
9
|
-
it "sets @options" do
|
10
|
-
expect(counter.instance_variables).to include(:@options)
|
11
|
-
end
|
12
|
-
|
13
|
-
it "sets @char_count" do
|
14
|
-
expect(counter.instance_variables).to include(:@char_count)
|
15
|
-
end
|
16
|
-
|
17
|
-
it "sets @words" do
|
18
|
-
expect(counter.instance_variables).to include(:@words)
|
19
|
-
end
|
20
|
-
|
21
|
-
it "sets @word_occurrences" do
|
22
|
-
expect(counter.instance_variables).to include(:@word_occurrences)
|
23
|
-
end
|
24
|
-
|
25
|
-
it "sets @word_lengths" do
|
26
|
-
expect(counter.instance_variables).to include(:@word_lengths)
|
27
|
-
end
|
6
|
+
let(:counter) do
|
7
|
+
tokens = WordsCounted::Tokeniser.new("one three three three woot woot").tokenise
|
8
|
+
Counter.new(tokens)
|
28
9
|
end
|
29
10
|
|
30
|
-
describe "
|
31
|
-
it "
|
32
|
-
expect(counter.
|
33
|
-
end
|
34
|
-
|
35
|
-
it "splits words" do
|
36
|
-
expect(counter.words).to eq(%w[we are all in the gutter but some of us are looking at the stars])
|
37
|
-
end
|
38
|
-
|
39
|
-
it "removes special characters" do
|
40
|
-
counter = Counter.new("Hello! # $ % 12345 * & % How do you do?")
|
41
|
-
expect(counter.words).to eq(%w[hello how do you do])
|
42
|
-
end
|
43
|
-
|
44
|
-
it "counts hyphenated words as one" do
|
45
|
-
counter = Counter.new("I am twenty-two.")
|
46
|
-
expect(counter.words).to eq(%w[i am twenty-two])
|
47
|
-
end
|
48
|
-
|
49
|
-
it "does not split words on apostrophe" do
|
50
|
-
counter = Counter.new("Bust 'em! Them be Jim's bastards'.")
|
51
|
-
expect(counter.words).to eq(%w[bust 'em them be jim's bastards'])
|
52
|
-
end
|
53
|
-
|
54
|
-
it "does not split on unicode chars" do
|
55
|
-
counter = Counter.new("São Paulo")
|
56
|
-
expect(counter.words).to eq(%w[são paulo])
|
57
|
-
end
|
58
|
-
|
59
|
-
it "it accepts a string filter" do
|
60
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: "magnificent")
|
61
|
-
expect(counter.words).to eq(%w[that was trevor])
|
62
|
-
end
|
63
|
-
|
64
|
-
it "it accepts a string filter with multiple words" do
|
65
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: "was magnificent")
|
66
|
-
expect(counter.words).to eq(%w[that trevor])
|
67
|
-
end
|
68
|
-
|
69
|
-
it "filters words in uppercase when using a string filter" do
|
70
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: "Magnificent")
|
71
|
-
expect(counter.words).to eq(%w[that was trevor])
|
72
|
-
end
|
73
|
-
|
74
|
-
it "accepts a regexp filter" do
|
75
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: /magnificent/i)
|
76
|
-
expect(counter.words).to eq(%w[that was trevor])
|
77
|
-
end
|
78
|
-
|
79
|
-
it "accepts an array filter" do
|
80
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: ['That', 'was'])
|
81
|
-
expect(counter.words).to eq(%w[magnificent trevor])
|
82
|
-
end
|
83
|
-
|
84
|
-
it "accepts a lambda filter" do
|
85
|
-
counter = Counter.new("That was magnificent, Trevor.", exclude: ->(w) { w == 'that' })
|
86
|
-
expect(counter.words).to eq(%w[was magnificent trevor])
|
87
|
-
end
|
88
|
-
|
89
|
-
it "accepts a custom regexp" do
|
90
|
-
counter = Counter.new("I am 007.", regexp: /[\p{Alnum}\-']+/)
|
91
|
-
expect(counter.words).to eq(["i", "am", "007"])
|
92
|
-
end
|
93
|
-
|
94
|
-
it "char_count should be calculated after the filter is applied" do
|
95
|
-
counter = Counter.new("I am Legend.", exclude: "I am")
|
96
|
-
expect(counter.char_count).to eq(6)
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
describe "word_count" do
|
101
|
-
it "returns the correct word count" do
|
102
|
-
expect(counter.word_count).to eq(15)
|
11
|
+
describe "initialize" do
|
12
|
+
it "sets @tokens" do
|
13
|
+
expect(counter.instance_variables).to include(:@tokens)
|
103
14
|
end
|
104
15
|
end
|
105
16
|
|
106
|
-
describe "
|
107
|
-
it "returns
|
108
|
-
expect(counter.
|
109
|
-
end
|
110
|
-
|
111
|
-
it "treats capitalized words as the same word" do
|
112
|
-
counter = Counter.new("Bad, bad, piggy!")
|
113
|
-
expect(counter.word_occurrences).to eq({ "bad" => 2, "piggy" => 1 })
|
17
|
+
describe "#token_count" do
|
18
|
+
it "returns the correct number of tokens" do
|
19
|
+
expect(counter.token_count).to eq(6)
|
114
20
|
end
|
115
21
|
end
|
116
22
|
|
117
|
-
describe "
|
118
|
-
it "returns
|
119
|
-
expect(counter.
|
120
|
-
end
|
121
|
-
|
122
|
-
it "returns a two dimensional array sorted by descending word occurrence" do
|
123
|
-
counter = Counter.new("Blue, green, green, green, orange, green, orange, red, orange, red")
|
124
|
-
expect(counter.sorted_word_occurrences).to eq([ ["green", 4], ["orange", 3], ["red", 2], ["blue", 1] ])
|
23
|
+
describe "#uniq_token_count" do
|
24
|
+
it "returns the number of unique token" do
|
25
|
+
expect(counter.uniq_token_count).to eq(3)
|
125
26
|
end
|
126
27
|
end
|
127
28
|
|
128
|
-
describe "
|
129
|
-
it "returns
|
130
|
-
expect(counter.
|
131
|
-
end
|
132
|
-
|
133
|
-
it "returns highest occuring words" do
|
134
|
-
counter = Counter.new("Orange orange Apple apple banana")
|
135
|
-
expect(counter.most_occurring_words).to eq([["orange", 2],["apple", 2]])
|
29
|
+
describe "#char_count" do
|
30
|
+
it "returns the correct number of chars" do
|
31
|
+
expect(counter.char_count).to eq(26)
|
136
32
|
end
|
137
33
|
end
|
138
34
|
|
139
|
-
describe
|
140
|
-
it "returns a
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
counter = Counter.new("One two three.")
|
146
|
-
expect(counter.word_lengths).to eq({ "one" => 3, "two" => 3, "three" => 5 })
|
35
|
+
describe "#token_frequency" do
|
36
|
+
it "returns a two-dimensional array where each member array is a token and its frequency in descending order" do
|
37
|
+
expected = [
|
38
|
+
['three', 3], ['woot', 2], ['one', 1]
|
39
|
+
]
|
40
|
+
expect(counter.token_frequency).to eq(expected)
|
147
41
|
end
|
148
42
|
end
|
149
43
|
|
150
|
-
describe "
|
151
|
-
it "returns
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
counter = Counter.new("I am not certain of that")
|
157
|
-
expect(counter.sorted_word_lengths).to eq([ ["certain", 7], ["that", 4], ["not", 3], ["of", 2], ["am", 2], ["i", 1] ])
|
44
|
+
describe "#token_lengths" do
|
45
|
+
it "returns a two-dimensional array where each member array is a token and its length in descending order" do
|
46
|
+
expected = [
|
47
|
+
['three', 5], ['woot', 4], ['one', 3]
|
48
|
+
]
|
49
|
+
expect(counter.token_lengths).to eq(expected)
|
158
50
|
end
|
159
51
|
end
|
160
52
|
|
161
|
-
describe "
|
162
|
-
it "returns
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
counter = Counter.new("Those whom the gods love grow young.")
|
168
|
-
expect(counter.longest_words).to eq([["those", 5],["young", 5]])
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
describe "word_density" do
|
173
|
-
it "returns an array" do
|
174
|
-
expect(counter.word_density).to be_a(Array)
|
175
|
-
end
|
176
|
-
|
177
|
-
it "returns words and their density in percent" do
|
178
|
-
counter = Counter.new("His name was Major, major Major Major.")
|
179
|
-
expect(counter.word_density).to eq([["major", 57.14], ["was", 14.29], ["name", 14.29], ["his", 14.29]])
|
53
|
+
describe "#token_density" do
|
54
|
+
it "returns a two-dimensional array where each member array is a token and its density in descending order" do
|
55
|
+
expected = [
|
56
|
+
['three', 0.5], ['woot', 0.33], ['one', 0.17]
|
57
|
+
]
|
58
|
+
expect(counter.token_density).to eq(expected)
|
180
59
|
end
|
181
60
|
|
182
61
|
it "accepts a precision" do
|
183
|
-
|
184
|
-
|
62
|
+
expected = [
|
63
|
+
['three', 0.5], ['woot', 0.3333], ['one', 0.1667]
|
64
|
+
]
|
65
|
+
expect(counter.token_density(precision: 4)).to eq(expected)
|
185
66
|
end
|
186
67
|
end
|
187
68
|
|
188
|
-
describe "
|
189
|
-
it "returns
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
it "returns the number of chars in the passed in string after the filter is applied" do
|
195
|
-
counter = Counter.new("His name was Major, major Major Major.", exclude: "Major")
|
196
|
-
expect(counter.char_count).to eq(10)
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
describe "average_chars_per_word" do
|
201
|
-
it "returns the average number of chars per word" do
|
202
|
-
counter = Counter.new("His name was major, Major Major Major.")
|
203
|
-
expect(counter.average_chars_per_word).to eq(4.29)
|
204
|
-
end
|
205
|
-
|
206
|
-
it "returns the average number of chars per word after the filter is applied" do
|
207
|
-
counter = Counter.new("His name was Major, Major Major Major.", exclude: "Major")
|
208
|
-
expect(counter.average_chars_per_word).to eq(3.33)
|
209
|
-
end
|
210
|
-
|
211
|
-
it "accepts precision" do
|
212
|
-
counter = Counter.new("This line should have 39 characters minus spaces.")
|
213
|
-
expect(counter.average_chars_per_word(4)).to eq(5.5714)
|
69
|
+
describe "#most_frequent_tokens" do
|
70
|
+
it "returns a hash of the tokens with the highest frequency, where each key a token, and each value is its frequency" do
|
71
|
+
expected = {
|
72
|
+
'three' => 3
|
73
|
+
}
|
74
|
+
expect(counter.most_frequent_tokens).to eq(expected)
|
214
75
|
end
|
215
76
|
end
|
216
77
|
|
217
|
-
describe "
|
218
|
-
it "returns
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
counter = Counter.new("Up down. Down up.")
|
224
|
-
expect(counter.unique_word_count).to eq(2)
|
78
|
+
describe "#longest_tokens" do
|
79
|
+
it "returns a hash of the tokens with the highest length, where each key a token, and each value is its length" do
|
80
|
+
expected = {
|
81
|
+
'three' => 5
|
82
|
+
}
|
83
|
+
expect(counter.longest_tokens).to eq(expected)
|
225
84
|
end
|
226
85
|
end
|
227
86
|
end
|
228
|
-
|
229
|
-
describe "count" do
|
230
|
-
it "returns count for a single word" do
|
231
|
-
counter = Counter.new("I am so clever that sometimes I don't understand a single word of what I am saying.")
|
232
|
-
expect(counter.count("i")).to eq(3)
|
233
|
-
end
|
234
|
-
end
|
235
|
-
|
236
|
-
describe "from_file" do
|
237
|
-
it "opens and reads a text file" do
|
238
|
-
counter = WordsCounted.from_file('spec/support/the_hart_and_the_hunter.txt')
|
239
|
-
expect(counter.word_count).to eq(139)
|
240
|
-
end
|
241
|
-
end
|
242
87
|
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require_relative "../spec_helper"
|
3
|
+
|
4
|
+
module WordsCounted
|
5
|
+
warn "Methods being tested are deprecated"
|
6
|
+
|
7
|
+
describe Counter do
|
8
|
+
let(:counter) do
|
9
|
+
tokens = WordsCounted::Tokeniser.new("one three three three woot woot").tokenise
|
10
|
+
Counter.new(tokens)
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#word_density" do
|
14
|
+
it "returns words and their density in percent" do
|
15
|
+
expected = [
|
16
|
+
['three', 50.0], ['woot', 33.33], ['one', 16.67]
|
17
|
+
]
|
18
|
+
expect(counter.word_density).to eq(expected)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "accepts a precision" do
|
22
|
+
expected = [
|
23
|
+
['three', 50.0], ['woot', 33.3333], ['one', 16.6667]
|
24
|
+
]
|
25
|
+
expect(counter.word_density(4)).to eq(expected)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe "#word_occurrences" do
|
30
|
+
it "returns a two dimensional array sorted by descending word occurrence" do
|
31
|
+
expected = {
|
32
|
+
'three' => 3, 'woot' => 2, 'one' => 1
|
33
|
+
}
|
34
|
+
expect(counter.word_occurrences).to eq(expected)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
describe "#sorted_word_occurrences" do
|
39
|
+
it "returns a two dimensional array sorted by descending word occurrence" do
|
40
|
+
expected = [
|
41
|
+
['three', 3], ['woot', 2], ['one', 1]
|
42
|
+
]
|
43
|
+
expect(counter.sorted_word_occurrences).to eq(expected)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "#word_lengths" do
|
48
|
+
it "returns a hash of of words and their length sorted descending by length" do
|
49
|
+
expected = {
|
50
|
+
'three' => 5, 'woot' => 4, 'one' => 3
|
51
|
+
}
|
52
|
+
expect(counter.word_lengths).to eq(expected)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "#sorted_word_lengths" do
|
57
|
+
it "returns a two dimensional array sorted by descending word length" do
|
58
|
+
expected = [
|
59
|
+
['three', 5], ['woot', 4], ['one', 3]
|
60
|
+
]
|
61
|
+
expect(counter.sorted_word_lengths).to eq(expected)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "#longest_words" do
|
66
|
+
it "returns a two-dimentional array of the longest words and their lengths" do
|
67
|
+
expected = [
|
68
|
+
['three', 5]
|
69
|
+
]
|
70
|
+
expect(counter.longest_words).to eq(expected)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
describe "#most_occurring_words" do
|
75
|
+
it "returns a two-dimentional array of words with the highest frequency and their frequencies" do
|
76
|
+
expected = [
|
77
|
+
['three', 3]
|
78
|
+
]
|
79
|
+
expect(counter.most_occurring_words).to eq(expected)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe "#average_chars_per_word" do
|
84
|
+
it "returns the average number of chars per word" do
|
85
|
+
expect(counter.average_chars_per_word).to eq(4.33)
|
86
|
+
end
|
87
|
+
|
88
|
+
it "accepts precision" do
|
89
|
+
expect(counter.average_chars_per_word(4)).to eq(4.3333)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "#count" do
|
94
|
+
it "returns count for a single word" do
|
95
|
+
expect(counter.count('one')).to eq(1)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require_relative "../spec_helper"
|
3
|
+
|
4
|
+
module WordsCounted
|
5
|
+
describe Tokeniser do
|
6
|
+
describe "initialize" do
|
7
|
+
it "sets @input" do
|
8
|
+
tokeniser = Tokeniser.new("Hello World!")
|
9
|
+
expect(tokeniser.instance_variables).to include(:@input)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#tokenise" do
|
14
|
+
it "normalises tokens and returns an array" do
|
15
|
+
tokens = Tokeniser.new("Hello HELLO").tokenise
|
16
|
+
expect(tokens).to eq(%w[hello hello])
|
17
|
+
end
|
18
|
+
|
19
|
+
context "without arguments" do
|
20
|
+
it "removes none alpha-numeric chars" do
|
21
|
+
tokens = Tokeniser.new("Hello world! # $ % 12345 * & % ?").tokenise
|
22
|
+
expect(tokens).to eq(%w[hello world])
|
23
|
+
end
|
24
|
+
|
25
|
+
it "does not split on hyphens" do
|
26
|
+
tokens = Tokeniser.new("I am twenty-two.").tokenise
|
27
|
+
expect(tokens).to eq(%w[i am twenty-two])
|
28
|
+
end
|
29
|
+
|
30
|
+
it "does not split on apostrophe" do
|
31
|
+
tokens = Tokeniser.new("Bust 'em! It's Jim's gang.").tokenise
|
32
|
+
expect(tokens).to eq(%w[bust 'em it's jim's gang])
|
33
|
+
end
|
34
|
+
|
35
|
+
it "does not split on unicode chars" do
|
36
|
+
tokens = Tokeniser.new("Bayrūt").tokenise
|
37
|
+
expect(tokens).to eq(%w[bayrūt])
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "with `pattern` options" do
|
42
|
+
it "splits on accepts a custom pattern" do
|
43
|
+
tokens = Tokeniser.new("We-Are-ALL").tokenise(pattern: /[^-]+/)
|
44
|
+
expect(tokens).to eq(%w[we are all])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
context "with `exclude` option" do
|
49
|
+
context "as a string" do
|
50
|
+
let(:tokeniser) { Tokeniser.new("That was magnificent, Trevor.") }
|
51
|
+
|
52
|
+
it "it accepts a string filter" do
|
53
|
+
tokens = tokeniser.tokenise(exclude: "magnificent")
|
54
|
+
expect(tokens).to eq(%w[that was trevor])
|
55
|
+
end
|
56
|
+
|
57
|
+
it "accepts a string filter with multiple space-delimited tokens" do
|
58
|
+
tokens = tokeniser.tokenise(exclude: "was magnificent")
|
59
|
+
expect(tokens).to eq(%w[that trevor])
|
60
|
+
end
|
61
|
+
|
62
|
+
it "normalises string filter" do
|
63
|
+
tokens = tokeniser.tokenise(exclude: "MAGNIFICENT")
|
64
|
+
expect(tokens).to eq(%w[that was trevor])
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
context "as a regular expression" do
|
69
|
+
it "filters on match" do
|
70
|
+
tokeniser = Tokeniser.new("That was magnificent, Trevor.")
|
71
|
+
tokens = tokeniser.tokenise(exclude: /magnificent/i)
|
72
|
+
expect(tokens).to eq(%w[that was trevor])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context "as a lambda" do
|
77
|
+
it "calls lambda" do
|
78
|
+
tokeniser = Tokeniser.new("That was magnificent, Trevor.")
|
79
|
+
tokens = tokeniser.tokenise(exclude: ->(token) { token.length < 5 })
|
80
|
+
expect(tokens).to eq(%w[magnificent trevor])
|
81
|
+
end
|
82
|
+
|
83
|
+
it "accepts a symbol for shorthand notation" do
|
84
|
+
tokeniser = Tokeniser.new("That was magnificent, محمد.}")
|
85
|
+
tokens = tokeniser.tokenise(exclude: :ascii_only?)
|
86
|
+
expect(tokens).to eq(%w[محمد])
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
context "as an array" do
|
91
|
+
let(:tokeniser) { Tokeniser.new("That was magnificent, Trevor.") }
|
92
|
+
|
93
|
+
it "accepts an array of strings" do
|
94
|
+
tokens = tokeniser.tokenise(exclude: ["magnificent"])
|
95
|
+
expect(tokens).to eq(%w[that was trevor])
|
96
|
+
end
|
97
|
+
|
98
|
+
it "accepts an array regular expressions" do
|
99
|
+
tokens = tokeniser.tokenise(exclude: [/that/, /was/])
|
100
|
+
expect(tokens).to eq(%w[magnificent trevor])
|
101
|
+
end
|
102
|
+
|
103
|
+
it "accepts an array of lambdas" do
|
104
|
+
filters = [
|
105
|
+
->(token) { token.length < 4 },
|
106
|
+
->(token) { token.length > 6 }
|
107
|
+
]
|
108
|
+
tokens = tokeniser.tokenise(exclude: filters)
|
109
|
+
expect(tokens).to eq(%w[that trevor])
|
110
|
+
end
|
111
|
+
|
112
|
+
it "accepts a mixed array" do
|
113
|
+
filters = [
|
114
|
+
"that",
|
115
|
+
->(token) { token.length < 4 },
|
116
|
+
/magnificent/
|
117
|
+
]
|
118
|
+
tokens = tokeniser.tokenise(exclude: filters)
|
119
|
+
expect(tokens).to eq(["trevor"])
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
context "with an invalid filter" do
|
124
|
+
it "raises an `ArgumentError`" do
|
125
|
+
expect {
|
126
|
+
Tokeniser.new("Hello world!").tokenise(exclude: 1)
|
127
|
+
}.to raise_error(ArgumentError)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require_relative "spec_helper"
|
3
|
+
|
4
|
+
describe WordsCounted do
|
5
|
+
describe ".from_file" do
|
6
|
+
let(:file_path) { "spec/support/the_hart_and_the_hunter.txt" }
|
7
|
+
|
8
|
+
it "opens and reads a text file" do
|
9
|
+
counter = WordsCounted.from_file(file_path)
|
10
|
+
expect(counter.token_count).to eq(139)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "opens and reads a text file with options" do
|
14
|
+
counter = WordsCounted.from_file(file_path, exclude: "hunter")
|
15
|
+
expect(counter.token_count).to eq(135)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe ".count" do
|
20
|
+
let(:string) do
|
21
|
+
"We are all in the gutter, but some of us are looking at the stars."
|
22
|
+
end
|
23
|
+
|
24
|
+
it "returns a counter instance with given input as tokens" do
|
25
|
+
counter = WordsCounted.count(string)
|
26
|
+
expect(counter.token_count).to eq(15)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "returns a counter instance with given input and options" do
|
30
|
+
counter = WordsCounted.count(string, exclude: "the gutter")
|
31
|
+
expect(counter.token_count).to eq(12)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/words_counted.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.version = WordsCounted::VERSION
|
10
10
|
spec.authors = ["Mohamad El-Husseini"]
|
11
11
|
spec.email = ["husseini.mel@gmail.com"]
|
12
|
-
spec.description = %q{A Ruby
|
12
|
+
spec.description = %q{A Ruby natural language processor to extract stats from text, such was word count and more.}
|
13
13
|
spec.summary = %q{See README.}
|
14
14
|
spec.homepage = "https://github.com/abitdodgy/words_counted"
|
15
15
|
spec.license = "MIT"
|
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
20
|
spec.require_paths = ["lib"]
|
21
21
|
|
22
|
-
spec.add_development_dependency "bundler"
|
22
|
+
spec.add_development_dependency "bundler"
|
23
23
|
spec.add_development_dependency "rake"
|
24
24
|
spec.add_development_dependency "rspec"
|
25
25
|
spec.add_development_dependency "pry"
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: words_counted
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mohamad El-Husseini
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-10-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,7 +66,8 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
description: A Ruby
|
69
|
+
description: A Ruby natural language processor to extract stats from text, such was
|
70
|
+
word count and more.
|
70
71
|
email:
|
71
72
|
- husseini.mel@gmail.com
|
72
73
|
executables: []
|
@@ -74,25 +75,35 @@ extensions: []
|
|
74
75
|
extra_rdoc_files: []
|
75
76
|
files:
|
76
77
|
- ".gitignore"
|
78
|
+
- ".hound.yml"
|
77
79
|
- ".rspec"
|
80
|
+
- ".ruby-style.yml"
|
81
|
+
- ".ruby-version"
|
82
|
+
- ".travis.yml"
|
78
83
|
- ".yardopts"
|
79
84
|
- CHANGELOG.md
|
80
85
|
- Gemfile
|
81
86
|
- LICENSE.txt
|
82
87
|
- README.md
|
83
88
|
- Rakefile
|
89
|
+
- lib/refinements/hash_refinements.rb
|
84
90
|
- lib/words_counted.rb
|
85
91
|
- lib/words_counted/counter.rb
|
92
|
+
- lib/words_counted/deprecated.rb
|
93
|
+
- lib/words_counted/tokeniser.rb
|
86
94
|
- lib/words_counted/version.rb
|
87
95
|
- spec/spec_helper.rb
|
88
96
|
- spec/support/the_hart_and_the_hunter.txt
|
89
97
|
- spec/words_counted/counter_spec.rb
|
98
|
+
- spec/words_counted/deprecated_spec.rb
|
99
|
+
- spec/words_counted/tokeniser_spec.rb
|
100
|
+
- spec/words_counted_spec.rb
|
90
101
|
- words_counted.gemspec
|
91
102
|
homepage: https://github.com/abitdodgy/words_counted
|
92
103
|
licenses:
|
93
104
|
- MIT
|
94
105
|
metadata: {}
|
95
|
-
post_install_message:
|
106
|
+
post_install_message:
|
96
107
|
rdoc_options: []
|
97
108
|
require_paths:
|
98
109
|
- lib
|
@@ -107,12 +118,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
118
|
- !ruby/object:Gem::Version
|
108
119
|
version: '0'
|
109
120
|
requirements: []
|
110
|
-
|
111
|
-
|
112
|
-
signing_key:
|
121
|
+
rubygems_version: 3.2.15
|
122
|
+
signing_key:
|
113
123
|
specification_version: 4
|
114
124
|
summary: See README.
|
115
125
|
test_files:
|
116
126
|
- spec/spec_helper.rb
|
117
127
|
- spec/support/the_hart_and_the_hunter.txt
|
118
128
|
- spec/words_counted/counter_spec.rb
|
129
|
+
- spec/words_counted/deprecated_spec.rb
|
130
|
+
- spec/words_counted/tokeniser_spec.rb
|
131
|
+
- spec/words_counted_spec.rb
|