word_count_analyzer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +554 -0
  8. data/Rakefile +2 -0
  9. data/lib/word_count_analyzer.rb +14 -0
  10. data/lib/word_count_analyzer/analyzer.rb +34 -0
  11. data/lib/word_count_analyzer/contraction.rb +176 -0
  12. data/lib/word_count_analyzer/counter.rb +230 -0
  13. data/lib/word_count_analyzer/date.rb +149 -0
  14. data/lib/word_count_analyzer/ellipsis.rb +48 -0
  15. data/lib/word_count_analyzer/hyperlink.rb +53 -0
  16. data/lib/word_count_analyzer/hyphenated_word.rb +23 -0
  17. data/lib/word_count_analyzer/number.rb +23 -0
  18. data/lib/word_count_analyzer/numbered_list.rb +61 -0
  19. data/lib/word_count_analyzer/punctuation.rb +52 -0
  20. data/lib/word_count_analyzer/slash.rb +84 -0
  21. data/lib/word_count_analyzer/version.rb +3 -0
  22. data/lib/word_count_analyzer/xhtml.rb +26 -0
  23. data/spec/spec_helper.rb +1 -0
  24. data/spec/word_count_analyzer/analyzer_spec.rb +11 -0
  25. data/spec/word_count_analyzer/contraction_spec.rb +124 -0
  26. data/spec/word_count_analyzer/counter_spec.rb +647 -0
  27. data/spec/word_count_analyzer/date_spec.rb +257 -0
  28. data/spec/word_count_analyzer/ellipsis_spec.rb +69 -0
  29. data/spec/word_count_analyzer/hyperlink_spec.rb +77 -0
  30. data/spec/word_count_analyzer/hyphenated_word_spec.rb +81 -0
  31. data/spec/word_count_analyzer/number_spec.rb +63 -0
  32. data/spec/word_count_analyzer/numbered_list_spec.rb +69 -0
  33. data/spec/word_count_analyzer/punctuation_spec.rb +91 -0
  34. data/spec/word_count_analyzer/slash_spec.rb +105 -0
  35. data/spec/word_count_analyzer/xhtml_spec.rb +65 -0
  36. data/word_count_analyzer.gemspec +26 -0
  37. metadata +153 -0
@@ -0,0 +1,257 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe WordCountAnalyzer::Date do
4
+ context '#includes_date?' do
5
+ it 'returns true if the string includes a date #001' do
6
+ string = 'Today is Monday, April 4th, 2011, aka 04/04/2011.'
7
+ ws = WordCountAnalyzer::Date.new(string: string)
8
+ expect(ws.includes_date?).to eq(true)
9
+ end
10
+
11
+ it 'returns true if the string includes a date #002' do
12
+ string = 'Today is Monday April 4th 2011.'
13
+ ws = WordCountAnalyzer::Date.new(string: string)
14
+ expect(ws.includes_date?).to eq(true)
15
+ end
16
+
17
+ it 'returns true if the string includes a date #003' do
18
+ string = 'Today is April 4th, 2011.'
19
+ ws = WordCountAnalyzer::Date.new(string: string)
20
+ expect(ws.includes_date?).to eq(true)
21
+ end
22
+
23
+ it 'returns true if the string includes a date #004' do
24
+ string = 'Today is Mon., Apr. 4, 2011.'
25
+ ws = WordCountAnalyzer::Date.new(string: string)
26
+ expect(ws.includes_date?).to eq(true)
27
+ end
28
+
29
+ it 'returns true if the string includes a date #005' do
30
+ string = 'Today is 04/04/2011.'
31
+ ws = WordCountAnalyzer::Date.new(string: string)
32
+ expect(ws.includes_date?).to eq(true)
33
+ end
34
+
35
+ it 'returns true if the string includes a date #006' do
36
+ string = 'Today is 04.04.2011.'
37
+ ws = WordCountAnalyzer::Date.new(string: string)
38
+ expect(ws.includes_date?).to eq(true)
39
+ end
40
+
41
+ it 'returns true if the string includes a date #007' do
42
+ string = 'Today is 2011.04.04.'
43
+ ws = WordCountAnalyzer::Date.new(string: string)
44
+ expect(ws.includes_date?).to eq(true)
45
+ end
46
+
47
+ it 'returns true if the string includes a date #008' do
48
+ string = 'Today is 2011/04/04.'
49
+ ws = WordCountAnalyzer::Date.new(string: string)
50
+ expect(ws.includes_date?).to eq(true)
51
+ end
52
+
53
+ it 'returns true if the string includes a date #009' do
54
+ string = 'Today is 2011-04-04.'
55
+ ws = WordCountAnalyzer::Date.new(string: string)
56
+ expect(ws.includes_date?).to eq(true)
57
+ end
58
+
59
+ it 'returns true if the string includes a date #010' do
60
+ string = 'Today is 04-04-2011.'
61
+ ws = WordCountAnalyzer::Date.new(string: string)
62
+ expect(ws.includes_date?).to eq(true)
63
+ end
64
+
65
+ it 'returns true if the string includes a date #011' do
66
+ string = 'Today is 2003 November 9.'
67
+ ws = WordCountAnalyzer::Date.new(string: string)
68
+ expect(ws.includes_date?).to eq(true)
69
+ end
70
+
71
+ it 'returns true if the string includes a date #012' do
72
+ string = 'Today is 2003Nov9.'
73
+ ws = WordCountAnalyzer::Date.new(string: string)
74
+ expect(ws.includes_date?).to eq(true)
75
+ end
76
+
77
+ it 'returns true if the string includes a date #013' do
78
+ string = 'Today is 2003Nov09.'
79
+ ws = WordCountAnalyzer::Date.new(string: string)
80
+ expect(ws.includes_date?).to eq(true)
81
+ end
82
+
83
+ it 'returns true if the string includes a date #014' do
84
+ string = 'Today is 2003-Nov-9.'
85
+ ws = WordCountAnalyzer::Date.new(string: string)
86
+ expect(ws.includes_date?).to eq(true)
87
+ end
88
+
89
+ it 'returns true if the string includes a date #015' do
90
+ string = 'Today is 2003-Nov-09.'
91
+ ws = WordCountAnalyzer::Date.new(string: string)
92
+ expect(ws.includes_date?).to eq(true)
93
+ end
94
+
95
+ it 'returns true if the string includes a date #016' do
96
+ string = 'Today is 2003-Nov-9, Sunday.'
97
+ ws = WordCountAnalyzer::Date.new(string: string)
98
+ expect(ws.includes_date?).to eq(true)
99
+ end
100
+
101
+ it 'returns true if the string includes a date #017' do
102
+ string = 'Today is 2003. november 9.'
103
+ ws = WordCountAnalyzer::Date.new(string: string)
104
+ expect(ws.includes_date?).to eq(true)
105
+ end
106
+
107
+ it 'returns true if the string includes a date #018' do
108
+ string = 'Today is 2003.11.9.'
109
+ ws = WordCountAnalyzer::Date.new(string: string)
110
+ expect(ws.includes_date?).to eq(true)
111
+ end
112
+
113
+ it 'returns true if the string includes a date #019' do
114
+ string = 'Today is Monday, Apr. 4, 2011.'
115
+ ws = WordCountAnalyzer::Date.new(string: string)
116
+ expect(ws.includes_date?).to eq(true)
117
+ end
118
+
119
+ it 'returns true if the string includes a date #020' do
120
+ string = 'Today is 2003/11/09.'
121
+ ws = WordCountAnalyzer::Date.new(string: string)
122
+ expect(ws.includes_date?).to eq(true)
123
+ end
124
+
125
+ it 'returns true if the string includes a date #021' do
126
+ string = 'Today is 20030109.'
127
+ ws = WordCountAnalyzer::Date.new(string: string)
128
+ expect(ws.includes_date?).to eq(true)
129
+ end
130
+
131
+ it 'returns true if the string includes a date #022' do
132
+ string = 'Today is 01092003.'
133
+ ws = WordCountAnalyzer::Date.new(string: string)
134
+ expect(ws.includes_date?).to eq(true)
135
+ end
136
+
137
+ it 'returns true if the string includes a date #023' do
138
+ string = 'Today is Sunday, November 9, 2014.'
139
+ ws = WordCountAnalyzer::Date.new(string: string)
140
+ expect(ws.includes_date?).to eq(true)
141
+ end
142
+
143
+ it 'returns true if the string includes a date #024' do
144
+ string = 'Today is November 9, 2014.'
145
+ ws = WordCountAnalyzer::Date.new(string: string)
146
+ expect(ws.includes_date?).to eq(true)
147
+ end
148
+
149
+ it 'returns true if the string includes a date #025' do
150
+ string = 'Today is Nov. 9, 2014.'
151
+ ws = WordCountAnalyzer::Date.new(string: string)
152
+ expect(ws.includes_date?).to eq(true)
153
+ end
154
+
155
+ it 'returns true if the string includes a date #026' do
156
+ string = 'Today is july 1st.'
157
+ ws = WordCountAnalyzer::Date.new(string: string)
158
+ expect(ws.includes_date?).to eq(true)
159
+ end
160
+
161
+ it 'returns true if the string includes a date #027' do
162
+ string = 'Today is jul. 1st.'
163
+ ws = WordCountAnalyzer::Date.new(string: string)
164
+ expect(ws.includes_date?).to eq(true)
165
+ end
166
+
167
+ it 'returns true if the string includes a date #028' do
168
+ string = 'Today is 8 November 2014.'
169
+ ws = WordCountAnalyzer::Date.new(string: string)
170
+ expect(ws.includes_date?).to eq(true)
171
+ end
172
+
173
+ it 'returns true if the string includes a date #029' do
174
+ string = 'Today is 8. November 2014.'
175
+ ws = WordCountAnalyzer::Date.new(string: string)
176
+ expect(ws.includes_date?).to eq(true)
177
+ end
178
+
179
+ it 'returns true if the string includes a date #030' do
180
+ string = 'Today is 08-Nov-2014.'
181
+ ws = WordCountAnalyzer::Date.new(string: string)
182
+ expect(ws.includes_date?).to eq(true)
183
+ end
184
+
185
+ it 'returns true if the string includes a date #031' do
186
+ string = 'Today is 08Nov14.'
187
+ ws = WordCountAnalyzer::Date.new(string: string)
188
+ expect(ws.includes_date?).to eq(true)
189
+ end
190
+
191
+ it 'returns true if the string includes a date #032' do
192
+ string = 'Today is 8th November 2014.'
193
+ ws = WordCountAnalyzer::Date.new(string: string)
194
+ expect(ws.includes_date?).to eq(true)
195
+ end
196
+
197
+ it 'returns true if the string includes a date #033' do
198
+ string = 'Today is the 8th of November 2014.'
199
+ ws = WordCountAnalyzer::Date.new(string: string)
200
+ expect(ws.includes_date?).to eq(true)
201
+ end
202
+
203
+ it 'returns true if the string includes a date #034' do
204
+ string = 'Today is 08/Nov/2014.'
205
+ ws = WordCountAnalyzer::Date.new(string: string)
206
+ expect(ws.includes_date?).to eq(true)
207
+ end
208
+
209
+ it 'returns true if the string includes a date #035' do
210
+ string = 'Today is Sunday, 8 November 2014.'
211
+ ws = WordCountAnalyzer::Date.new(string: string)
212
+ expect(ws.includes_date?).to eq(true)
213
+ end
214
+
215
+ it 'returns true if the string includes a date #036' do
216
+ string = 'Today is 8 November 2014.'
217
+ ws = WordCountAnalyzer::Date.new(string: string)
218
+ expect(ws.includes_date?).to eq(true)
219
+ end
220
+
221
+ it 'returns false if the string does not include a date #037' do
222
+ string = 'Hello world. There is no date here - $50,000. The sun is hot.'
223
+ ws = WordCountAnalyzer::Date.new(string: string)
224
+ expect(ws.includes_date?).to eq(false)
225
+ end
226
+ end
227
+
228
+ context '#occurences' do
229
+ it 'counts the date occurences in a string #001' do
230
+ string = 'Today is Sunday, 8 November 2014.'
231
+ ws = WordCountAnalyzer::Date.new(string: string)
232
+ expect(ws.occurences).to eq(1)
233
+ end
234
+
235
+ it 'counts the date occurences in a string #002' do
236
+ string = 'Today is Sunday, 8 November 2014. Yesterday was 07/Nov/2014.'
237
+ ws = WordCountAnalyzer::Date.new(string: string)
238
+ expect(ws.occurences).to eq(2)
239
+ end
240
+ end
241
+
242
+ context '#replace' do
243
+ it 'replaces the date occurences in a string #001' do
244
+ string = 'Today is Tues. March 3rd, 2011.'
245
+ ws = WordCountAnalyzer::Date.new(string: string)
246
+ expect(ws.replace).to eq('Today is wsdateword ')
247
+ end
248
+ end
249
+
250
+ context '#replace_number_only_date' do
251
+ it 'replaces only the number date occurences in a string' do
252
+ string = 'Today is Tues. March 3rd, 2011. 4/28/2013'
253
+ ws = WordCountAnalyzer::Date.new(string: string)
254
+ expect(ws.replace_number_only_date).to eq("Today is Tues. March 3rd, 2011. wsdateword ")
255
+ end
256
+ end
257
+ end
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe WordCountAnalyzer::Ellipsis do
4
+ context '#includes_ellipsis?' do
5
+ it 'returns true if the string includes an ellipsis #001' do
6
+ string = 'Using an ellipsis … causes different counts.'
7
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
8
+ expect(ws.includes_ellipsis?).to eq(true)
9
+ end
10
+
11
+ it 'returns true if the string includes an ellipsis #002' do
12
+ string = 'Using an ellipsis causes different counts…depending on the style that you use.'
13
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
14
+ expect(ws.includes_ellipsis?).to eq(true)
15
+ end
16
+
17
+ it 'returns true if the string includes an ellipsis #003' do
18
+ string = 'Using an ellipsis causes different counts depending on the style . . . that you use.'
19
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
20
+ expect(ws.includes_ellipsis?).to eq(true)
21
+ end
22
+
23
+ it 'returns true if the string includes an ellipsis #004' do
24
+ string = 'Using an ellipsis causes different counts depending on the style . . . . that you use.'
25
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
26
+ expect(ws.includes_ellipsis?).to eq(true)
27
+ end
28
+
29
+ it 'returns true if the string includes an ellipsis #005' do
30
+ string = 'Using an ellipsis causes different counts depending on the style.... that you use.'
31
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
32
+ expect(ws.includes_ellipsis?).to eq(true)
33
+ end
34
+
35
+ it "returns false if the string doesn't include an ellipsis #006" do
36
+ string = 'Hello world.'
37
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
38
+ expect(ws.includes_ellipsis?).to eq(false)
39
+ end
40
+
41
+ it "returns false if the string includes a dotted_line #007" do
42
+ string = '.....'
43
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
44
+ expect(ws.includes_ellipsis?).to eq(false)
45
+ end
46
+
47
+ it "returns false if the string includes a dotted_line #007" do
48
+ string = "Here is one …………………………………………………………………… and another ......"
49
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
50
+ expect(ws.includes_ellipsis?).to eq(false)
51
+ end
52
+ end
53
+
54
+ context '#replace' do
55
+ it 'returns a string with the ellipsis replaced #001' do
56
+ string = 'Using an ellipsis … causes different counts…depending on the style . . . that you use. I never meant that.... She left the store. The practice was not abandoned. . . .'
57
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
58
+ expect(ws.replace).to eq("Using an ellipsis wseword causes different counts wseword depending on the style wseword that you use. I never meant that. wseword She left the store. The practice was not abandoned wseword ")
59
+ end
60
+ end
61
+
62
+ context '#occurences' do
63
+ it 'returns a string with the ellipsis replaced #001' do
64
+ string = 'Using an ellipsis … causes different counts…depending on the style . . . that you use. I never meant that.... She left the store. The practice was not abandoned. . . .'
65
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
66
+ expect(ws.occurences).to eq(5)
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,77 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe WordCountAnalyzer::Hyperlink do
4
+ context '#hyperlink?' do
5
+ it 'returns true if the string is a hyperlink #001' do
6
+ string = "http://www.example.com/this-IS-a_test/hello.html"
7
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
8
+ expect(ws.hyperlink?).to eq(true)
9
+ end
10
+
11
+ it 'returns true if the string is a hyperlink #002' do
12
+ string = "http://www.google.co.uk"
13
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
14
+ expect(ws.hyperlink?).to eq(true)
15
+ end
16
+
17
+ it 'returns true if the string is a hyperlink #003' do
18
+ string = "https://google.co.uk"
19
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
20
+ expect(ws.hyperlink?).to eq(true)
21
+ end
22
+
23
+ it 'returns false if the string is not a hyperlink #004' do
24
+ string = "hello"
25
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
26
+ expect(ws.hyperlink?).to eq(false)
27
+ end
28
+
29
+ it 'returns false if the string is not a hyperlink #005' do
30
+ string = "john@gmail.com"
31
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
32
+ expect(ws.hyperlink?).to eq(false)
33
+ end
34
+
35
+ it 'returns false if the string is not a hyperlink #006' do
36
+ string = "date:"
37
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
38
+ expect(ws.hyperlink?).to eq(false)
39
+ end
40
+
41
+ it 'returns false if the string is not a hyperlink #007' do
42
+ string = 'The file location is c:\Users\johndoe.'
43
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
44
+ expect(ws.hyperlink?).to eq(false)
45
+ end
46
+ end
47
+
48
+ context '#occurences' do
49
+ it 'returns the occurences of hyperlink tokens in a string #001' do
50
+ string = "Today the date is: Jan 1. Visit https://www.example.com/hello or http://www.google.co.uk"
51
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
52
+ expect(ws.occurences).to eq(2)
53
+ end
54
+ end
55
+
56
+ context '#replace' do
57
+ it 'replaces the hyperlinks in a string with regular tokens #001' do
58
+ string = "Today the date is: Jan 1. Visit https://www.example.com/hello or http://www.google.co.uk"
59
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
60
+ expect(ws.replace).to eq("Today the date is: Jan 1. Visit wslinkword or wslinkword ")
61
+ end
62
+
63
+ it 'replaces the hyperlinks in a string with regular tokens #002' do
64
+ string = 'The file location is c:\Users\johndoe or d:\Users\john\www'
65
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
66
+ expect(ws.replace).to eq('The file location is c:\Users\johndoe or d:\Users\john\www')
67
+ end
68
+ end
69
+
70
+ context '#replace_split_at_period' do
71
+ it 'replaces the hyperlinks in a string with regular tokens, split at periods #001' do
72
+ string = "http://www.google.co.uk"
73
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
74
+ expect(ws.replace_split_at_period).to eq("http://www google co uk")
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,81 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe WordCountAnalyzer::HyphenatedWord do
4
+ context '#hyphenated_word?' do
5
+ it 'returns true if the token is a hyphenated word #001' do
6
+ token = 'devil-may-care'
7
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
8
+ expect(ws.hyphenated_word?).to eq(true)
9
+ end
10
+
11
+ it 'returns true if the token is a hyphenated word #002' do
12
+ token = '(2R)-2-methylsulfanyl-3-hydroxybutanedioate'
13
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
14
+ expect(ws.hyphenated_word?).to eq(true)
15
+ end
16
+
17
+ it 'returns false if the token is not a hyphenated word' do
18
+ token = 'hello'
19
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
20
+ expect(ws.hyphenated_word?).to eq(false)
21
+ end
22
+
23
+ it 'returns false if the token is a hyperlink' do
24
+ token = 'https://www.example-one.com'
25
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
26
+ expect(ws.hyphenated_word?).to eq(false)
27
+ end
28
+
29
+ it 'returns false if the token is long string of dashes' do
30
+ token = '------------'
31
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
32
+ expect(ws.hyphenated_word?).to eq(false)
33
+ end
34
+
35
+ it 'returns true if the token is a hyphenated word (small em dashes)' do
36
+ token = 'devil﹘may﹘care'
37
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
38
+ expect(ws.hyphenated_word?).to eq(true)
39
+ end
40
+ end
41
+
42
+ context '#count_as_multiple' do
43
+ it 'returns the count of the individual words that are separated by the hyphen' do
44
+ token = 'devil-may-care'
45
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
46
+ expect(ws.count_as_multiple).to eq(3)
47
+ end
48
+
49
+ it 'handles small em dashes' do
50
+ token = 'devil﹘may﹘care'
51
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
52
+ expect(ws.count_as_multiple).to eq(3)
53
+ end
54
+
55
+ it 'returns the count of the individual words that are separated by the hyphen #002' do
56
+ token = '(2R)-2-methylsulfanyl-3-hydroxybutanedioate'
57
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
58
+ expect(ws.count_as_multiple).to eq(5)
59
+ end
60
+ end
61
+
62
+ context '#replace' do
63
+ it 'splits hyphenated words #001' do
64
+ token = 'devil-may-care'
65
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
66
+ expect(ws.replace).to eq('devil may care')
67
+ end
68
+
69
+ it 'splits hyphenated words #002' do
70
+ token = 'devil﹘may﹘care'
71
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
72
+ expect(ws.replace).to eq('devil may care')
73
+ end
74
+
75
+ it 'splits hyphenated words #003' do
76
+ token = '(2R)-2-methylsulfanyl-3-hydroxybutanedioate'
77
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
78
+ expect(ws.replace).to eq('(2R) 2 methylsulfanyl 3 hydroxybutanedioate')
79
+ end
80
+ end
81
+ end