word_count_analyzer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +554 -0
  8. data/Rakefile +2 -0
  9. data/lib/word_count_analyzer.rb +14 -0
  10. data/lib/word_count_analyzer/analyzer.rb +34 -0
  11. data/lib/word_count_analyzer/contraction.rb +176 -0
  12. data/lib/word_count_analyzer/counter.rb +230 -0
  13. data/lib/word_count_analyzer/date.rb +149 -0
  14. data/lib/word_count_analyzer/ellipsis.rb +48 -0
  15. data/lib/word_count_analyzer/hyperlink.rb +53 -0
  16. data/lib/word_count_analyzer/hyphenated_word.rb +23 -0
  17. data/lib/word_count_analyzer/number.rb +23 -0
  18. data/lib/word_count_analyzer/numbered_list.rb +61 -0
  19. data/lib/word_count_analyzer/punctuation.rb +52 -0
  20. data/lib/word_count_analyzer/slash.rb +84 -0
  21. data/lib/word_count_analyzer/version.rb +3 -0
  22. data/lib/word_count_analyzer/xhtml.rb +26 -0
  23. data/spec/spec_helper.rb +1 -0
  24. data/spec/word_count_analyzer/analyzer_spec.rb +11 -0
  25. data/spec/word_count_analyzer/contraction_spec.rb +124 -0
  26. data/spec/word_count_analyzer/counter_spec.rb +647 -0
  27. data/spec/word_count_analyzer/date_spec.rb +257 -0
  28. data/spec/word_count_analyzer/ellipsis_spec.rb +69 -0
  29. data/spec/word_count_analyzer/hyperlink_spec.rb +77 -0
  30. data/spec/word_count_analyzer/hyphenated_word_spec.rb +81 -0
  31. data/spec/word_count_analyzer/number_spec.rb +63 -0
  32. data/spec/word_count_analyzer/numbered_list_spec.rb +69 -0
  33. data/spec/word_count_analyzer/punctuation_spec.rb +91 -0
  34. data/spec/word_count_analyzer/slash_spec.rb +105 -0
  35. data/spec/word_count_analyzer/xhtml_spec.rb +65 -0
  36. data/word_count_analyzer.gemspec +26 -0
  37. metadata +153 -0
@@ -0,0 +1,257 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe WordCountAnalyzer::Date do
4
+ context '#includes_date?' do
5
+ it 'returns true if the string includes a date #001' do
6
+ string = 'Today is Monday, April 4th, 2011, aka 04/04/2011.'
7
+ ws = WordCountAnalyzer::Date.new(string: string)
8
+ expect(ws.includes_date?).to eq(true)
9
+ end
10
+
11
+ it 'returns true if the string includes a date #002' do
12
+ string = 'Today is Monday April 4th 2011.'
13
+ ws = WordCountAnalyzer::Date.new(string: string)
14
+ expect(ws.includes_date?).to eq(true)
15
+ end
16
+
17
+ it 'returns true if the string includes a date #003' do
18
+ string = 'Today is April 4th, 2011.'
19
+ ws = WordCountAnalyzer::Date.new(string: string)
20
+ expect(ws.includes_date?).to eq(true)
21
+ end
22
+
23
+ it 'returns true if the string includes a date #004' do
24
+ string = 'Today is Mon., Apr. 4, 2011.'
25
+ ws = WordCountAnalyzer::Date.new(string: string)
26
+ expect(ws.includes_date?).to eq(true)
27
+ end
28
+
29
+ it 'returns true if the string includes a date #005' do
30
+ string = 'Today is 04/04/2011.'
31
+ ws = WordCountAnalyzer::Date.new(string: string)
32
+ expect(ws.includes_date?).to eq(true)
33
+ end
34
+
35
+ it 'returns true if the string includes a date #006' do
36
+ string = 'Today is 04.04.2011.'
37
+ ws = WordCountAnalyzer::Date.new(string: string)
38
+ expect(ws.includes_date?).to eq(true)
39
+ end
40
+
41
+ it 'returns true if the string includes a date #007' do
42
+ string = 'Today is 2011.04.04.'
43
+ ws = WordCountAnalyzer::Date.new(string: string)
44
+ expect(ws.includes_date?).to eq(true)
45
+ end
46
+
47
+ it 'returns true if the string includes a date #008' do
48
+ string = 'Today is 2011/04/04.'
49
+ ws = WordCountAnalyzer::Date.new(string: string)
50
+ expect(ws.includes_date?).to eq(true)
51
+ end
52
+
53
+ it 'returns true if the string includes a date #009' do
54
+ string = 'Today is 2011-04-04.'
55
+ ws = WordCountAnalyzer::Date.new(string: string)
56
+ expect(ws.includes_date?).to eq(true)
57
+ end
58
+
59
+ it 'returns true if the string includes a date #010' do
60
+ string = 'Today is 04-04-2011.'
61
+ ws = WordCountAnalyzer::Date.new(string: string)
62
+ expect(ws.includes_date?).to eq(true)
63
+ end
64
+
65
+ it 'returns true if the string includes a date #011' do
66
+ string = 'Today is 2003 November 9.'
67
+ ws = WordCountAnalyzer::Date.new(string: string)
68
+ expect(ws.includes_date?).to eq(true)
69
+ end
70
+
71
+ it 'returns true if the string includes a date #012' do
72
+ string = 'Today is 2003Nov9.'
73
+ ws = WordCountAnalyzer::Date.new(string: string)
74
+ expect(ws.includes_date?).to eq(true)
75
+ end
76
+
77
+ it 'returns true if the string includes a date #013' do
78
+ string = 'Today is 2003Nov09.'
79
+ ws = WordCountAnalyzer::Date.new(string: string)
80
+ expect(ws.includes_date?).to eq(true)
81
+ end
82
+
83
+ it 'returns true if the string includes a date #014' do
84
+ string = 'Today is 2003-Nov-9.'
85
+ ws = WordCountAnalyzer::Date.new(string: string)
86
+ expect(ws.includes_date?).to eq(true)
87
+ end
88
+
89
+ it 'returns true if the string includes a date #015' do
90
+ string = 'Today is 2003-Nov-09.'
91
+ ws = WordCountAnalyzer::Date.new(string: string)
92
+ expect(ws.includes_date?).to eq(true)
93
+ end
94
+
95
+ it 'returns true if the string includes a date #016' do
96
+ string = 'Today is 2003-Nov-9, Sunday.'
97
+ ws = WordCountAnalyzer::Date.new(string: string)
98
+ expect(ws.includes_date?).to eq(true)
99
+ end
100
+
101
+ it 'returns true if the string includes a date #017' do
102
+ string = 'Today is 2003. november 9.'
103
+ ws = WordCountAnalyzer::Date.new(string: string)
104
+ expect(ws.includes_date?).to eq(true)
105
+ end
106
+
107
+ it 'returns true if the string includes a date #018' do
108
+ string = 'Today is 2003.11.9.'
109
+ ws = WordCountAnalyzer::Date.new(string: string)
110
+ expect(ws.includes_date?).to eq(true)
111
+ end
112
+
113
+ it 'returns true if the string includes a date #019' do
114
+ string = 'Today is Monday, Apr. 4, 2011.'
115
+ ws = WordCountAnalyzer::Date.new(string: string)
116
+ expect(ws.includes_date?).to eq(true)
117
+ end
118
+
119
+ it 'returns true if the string includes a date #020' do
120
+ string = 'Today is 2003/11/09.'
121
+ ws = WordCountAnalyzer::Date.new(string: string)
122
+ expect(ws.includes_date?).to eq(true)
123
+ end
124
+
125
+ it 'returns true if the string includes a date #021' do
126
+ string = 'Today is 20030109.'
127
+ ws = WordCountAnalyzer::Date.new(string: string)
128
+ expect(ws.includes_date?).to eq(true)
129
+ end
130
+
131
+ it 'returns true if the string includes a date #022' do
132
+ string = 'Today is 01092003.'
133
+ ws = WordCountAnalyzer::Date.new(string: string)
134
+ expect(ws.includes_date?).to eq(true)
135
+ end
136
+
137
+ it 'returns true if the string includes a date #023' do
138
+ string = 'Today is Sunday, November 9, 2014.'
139
+ ws = WordCountAnalyzer::Date.new(string: string)
140
+ expect(ws.includes_date?).to eq(true)
141
+ end
142
+
143
+ it 'returns true if the string includes a date #024' do
144
+ string = 'Today is November 9, 2014.'
145
+ ws = WordCountAnalyzer::Date.new(string: string)
146
+ expect(ws.includes_date?).to eq(true)
147
+ end
148
+
149
+ it 'returns true if the string includes a date #025' do
150
+ string = 'Today is Nov. 9, 2014.'
151
+ ws = WordCountAnalyzer::Date.new(string: string)
152
+ expect(ws.includes_date?).to eq(true)
153
+ end
154
+
155
+ it 'returns true if the string includes a date #026' do
156
+ string = 'Today is july 1st.'
157
+ ws = WordCountAnalyzer::Date.new(string: string)
158
+ expect(ws.includes_date?).to eq(true)
159
+ end
160
+
161
+ it 'returns true if the string includes a date #027' do
162
+ string = 'Today is jul. 1st.'
163
+ ws = WordCountAnalyzer::Date.new(string: string)
164
+ expect(ws.includes_date?).to eq(true)
165
+ end
166
+
167
+ it 'returns true if the string includes a date #028' do
168
+ string = 'Today is 8 November 2014.'
169
+ ws = WordCountAnalyzer::Date.new(string: string)
170
+ expect(ws.includes_date?).to eq(true)
171
+ end
172
+
173
+ it 'returns true if the string includes a date #029' do
174
+ string = 'Today is 8. November 2014.'
175
+ ws = WordCountAnalyzer::Date.new(string: string)
176
+ expect(ws.includes_date?).to eq(true)
177
+ end
178
+
179
+ it 'returns true if the string includes a date #030' do
180
+ string = 'Today is 08-Nov-2014.'
181
+ ws = WordCountAnalyzer::Date.new(string: string)
182
+ expect(ws.includes_date?).to eq(true)
183
+ end
184
+
185
+ it 'returns true if the string includes a date #031' do
186
+ string = 'Today is 08Nov14.'
187
+ ws = WordCountAnalyzer::Date.new(string: string)
188
+ expect(ws.includes_date?).to eq(true)
189
+ end
190
+
191
+ it 'returns true if the string includes a date #032' do
192
+ string = 'Today is 8th November 2014.'
193
+ ws = WordCountAnalyzer::Date.new(string: string)
194
+ expect(ws.includes_date?).to eq(true)
195
+ end
196
+
197
+ it 'returns true if the string includes a date #033' do
198
+ string = 'Today is the 8th of November 2014.'
199
+ ws = WordCountAnalyzer::Date.new(string: string)
200
+ expect(ws.includes_date?).to eq(true)
201
+ end
202
+
203
+ it 'returns true if the string includes a date #034' do
204
+ string = 'Today is 08/Nov/2014.'
205
+ ws = WordCountAnalyzer::Date.new(string: string)
206
+ expect(ws.includes_date?).to eq(true)
207
+ end
208
+
209
+ it 'returns true if the string includes a date #035' do
210
+ string = 'Today is Sunday, 8 November 2014.'
211
+ ws = WordCountAnalyzer::Date.new(string: string)
212
+ expect(ws.includes_date?).to eq(true)
213
+ end
214
+
215
+ it 'returns true if the string includes a date #036' do
216
+ string = 'Today is 8 November 2014.'
217
+ ws = WordCountAnalyzer::Date.new(string: string)
218
+ expect(ws.includes_date?).to eq(true)
219
+ end
220
+
221
+ it 'returns false if the string does not include a date #037' do
222
+ string = 'Hello world. There is no date here - $50,000. The sun is hot.'
223
+ ws = WordCountAnalyzer::Date.new(string: string)
224
+ expect(ws.includes_date?).to eq(false)
225
+ end
226
+ end
227
+
228
+ context '#occurences' do
229
+ it 'counts the date occurences in a string #001' do
230
+ string = 'Today is Sunday, 8 November 2014.'
231
+ ws = WordCountAnalyzer::Date.new(string: string)
232
+ expect(ws.occurences).to eq(1)
233
+ end
234
+
235
+ it 'counts the date occurences in a string #002' do
236
+ string = 'Today is Sunday, 8 November 2014. Yesterday was 07/Nov/2014.'
237
+ ws = WordCountAnalyzer::Date.new(string: string)
238
+ expect(ws.occurences).to eq(2)
239
+ end
240
+ end
241
+
242
+ context '#replace' do
243
+ it 'replaces the date occurences in a string #001' do
244
+ string = 'Today is Tues. March 3rd, 2011.'
245
+ ws = WordCountAnalyzer::Date.new(string: string)
246
+ expect(ws.replace).to eq('Today is wsdateword ')
247
+ end
248
+ end
249
+
250
+ context '#replace_number_only_date' do
251
+ it 'replaces only the number date occurences in a string' do
252
+ string = 'Today is Tues. March 3rd, 2011. 4/28/2013'
253
+ ws = WordCountAnalyzer::Date.new(string: string)
254
+ expect(ws.replace_number_only_date).to eq("Today is Tues. March 3rd, 2011. wsdateword ")
255
+ end
256
+ end
257
+ end
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe WordCountAnalyzer::Ellipsis do
4
+ context '#includes_ellipsis?' do
5
+ it 'returns true if the string includes an ellipsis #001' do
6
+ string = 'Using an ellipsis … causes different counts.'
7
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
8
+ expect(ws.includes_ellipsis?).to eq(true)
9
+ end
10
+
11
+ it 'returns true if the string includes an ellipsis #002' do
12
+ string = 'Using an ellipsis causes different counts…depending on the style that you use.'
13
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
14
+ expect(ws.includes_ellipsis?).to eq(true)
15
+ end
16
+
17
+ it 'returns true if the string includes an ellipsis #003' do
18
+ string = 'Using an ellipsis causes different counts depending on the style . . . that you use.'
19
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
20
+ expect(ws.includes_ellipsis?).to eq(true)
21
+ end
22
+
23
+ it 'returns true if the string includes an ellipsis #004' do
24
+ string = 'Using an ellipsis causes different counts depending on the style . . . . that you use.'
25
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
26
+ expect(ws.includes_ellipsis?).to eq(true)
27
+ end
28
+
29
+ it 'returns true if the string includes an ellipsis #005' do
30
+ string = 'Using an ellipsis causes different counts depending on the style.... that you use.'
31
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
32
+ expect(ws.includes_ellipsis?).to eq(true)
33
+ end
34
+
35
+ it "returns false if the string doesn't include an ellipsis #006" do
36
+ string = 'Hello world.'
37
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
38
+ expect(ws.includes_ellipsis?).to eq(false)
39
+ end
40
+
41
+ it "returns false if the string includes a dotted_line #007" do
42
+ string = '.....'
43
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
44
+ expect(ws.includes_ellipsis?).to eq(false)
45
+ end
46
+
47
+ it "returns false if the string includes a dotted_line #007" do
48
+ string = "Here is one …………………………………………………………………… and another ......"
49
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
50
+ expect(ws.includes_ellipsis?).to eq(false)
51
+ end
52
+ end
53
+
54
+ context '#replace' do
55
+ it 'returns a string with the ellipsis replaced #001' do
56
+ string = 'Using an ellipsis … causes different counts…depending on the style . . . that you use. I never meant that.... She left the store. The practice was not abandoned. . . .'
57
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
58
+ expect(ws.replace).to eq("Using an ellipsis wseword causes different counts wseword depending on the style wseword that you use. I never meant that. wseword She left the store. The practice was not abandoned wseword ")
59
+ end
60
+ end
61
+
62
+ context '#occurences' do
63
+ it 'returns a string with the ellipsis replaced #001' do
64
+ string = 'Using an ellipsis … causes different counts…depending on the style . . . that you use. I never meant that.... She left the store. The practice was not abandoned. . . .'
65
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
66
+ expect(ws.occurences).to eq(5)
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,77 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe WordCountAnalyzer::Hyperlink do
4
+ context '#hyperlink?' do
5
+ it 'returns true if the string is a hyperlink #001' do
6
+ string = "http://www.example.com/this-IS-a_test/hello.html"
7
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
8
+ expect(ws.hyperlink?).to eq(true)
9
+ end
10
+
11
+ it 'returns true if the string is a hyperlink #002' do
12
+ string = "http://www.google.co.uk"
13
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
14
+ expect(ws.hyperlink?).to eq(true)
15
+ end
16
+
17
+ it 'returns true if the string is a hyperlink #003' do
18
+ string = "https://google.co.uk"
19
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
20
+ expect(ws.hyperlink?).to eq(true)
21
+ end
22
+
23
+ it 'returns false if the string is not a hyperlink #004' do
24
+ string = "hello"
25
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
26
+ expect(ws.hyperlink?).to eq(false)
27
+ end
28
+
29
+ it 'returns false if the string is not a hyperlink #005' do
30
+ string = "john@gmail.com"
31
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
32
+ expect(ws.hyperlink?).to eq(false)
33
+ end
34
+
35
+ it 'returns false if the string is not a hyperlink #006' do
36
+ string = "date:"
37
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
38
+ expect(ws.hyperlink?).to eq(false)
39
+ end
40
+
41
+ it 'returns false if the string is not a hyperlink #007' do
42
+ string = 'The file location is c:\Users\johndoe.'
43
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
44
+ expect(ws.hyperlink?).to eq(false)
45
+ end
46
+ end
47
+
48
+ context '#occurences' do
49
+ it 'returns the occurences of hyperlink tokens in a string #001' do
50
+ string = "Today the date is: Jan 1. Visit https://www.example.com/hello or http://www.google.co.uk"
51
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
52
+ expect(ws.occurences).to eq(2)
53
+ end
54
+ end
55
+
56
+ context '#replace' do
57
+ it 'replaces the hyperlinks in a string with regular tokens #001' do
58
+ string = "Today the date is: Jan 1. Visit https://www.example.com/hello or http://www.google.co.uk"
59
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
60
+ expect(ws.replace).to eq("Today the date is: Jan 1. Visit wslinkword or wslinkword ")
61
+ end
62
+
63
+ it 'replaces the hyperlinks in a string with regular tokens #002' do
64
+ string = 'The file location is c:\Users\johndoe or d:\Users\john\www'
65
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
66
+ expect(ws.replace).to eq('The file location is c:\Users\johndoe or d:\Users\john\www')
67
+ end
68
+ end
69
+
70
+ context '#replace_split_at_period' do
71
+ it 'replaces the hyperlinks in a string with regular tokens, split at periods #001' do
72
+ string = "http://www.google.co.uk"
73
+ ws = WordCountAnalyzer::Hyperlink.new(string: string)
74
+ expect(ws.replace_split_at_period).to eq("http://www google co uk")
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,81 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe WordCountAnalyzer::HyphenatedWord do
4
+ context '#hyphenated_word?' do
5
+ it 'returns true if the token is a hyphenated word #001' do
6
+ token = 'devil-may-care'
7
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
8
+ expect(ws.hyphenated_word?).to eq(true)
9
+ end
10
+
11
+ it 'returns true if the token is a hyphenated word #002' do
12
+ token = '(2R)-2-methylsulfanyl-3-hydroxybutanedioate'
13
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
14
+ expect(ws.hyphenated_word?).to eq(true)
15
+ end
16
+
17
+ it 'returns false if the token is not a hyphenated word' do
18
+ token = 'hello'
19
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
20
+ expect(ws.hyphenated_word?).to eq(false)
21
+ end
22
+
23
+ it 'returns false if the token is a hyperlink' do
24
+ token = 'https://www.example-one.com'
25
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
26
+ expect(ws.hyphenated_word?).to eq(false)
27
+ end
28
+
29
+ it 'returns false if the token is long string of dashes' do
30
+ token = '------------'
31
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
32
+ expect(ws.hyphenated_word?).to eq(false)
33
+ end
34
+
35
+ it 'returns true if the token is a hyphenated word (small em dashes)' do
36
+ token = 'devil﹘may﹘care'
37
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
38
+ expect(ws.hyphenated_word?).to eq(true)
39
+ end
40
+ end
41
+
42
+ context '#count_as_multiple' do
43
+ it 'returns the count of the individual words that are separated by the hyphen' do
44
+ token = 'devil-may-care'
45
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
46
+ expect(ws.count_as_multiple).to eq(3)
47
+ end
48
+
49
+ it 'handles small em dashes' do
50
+ token = 'devil﹘may﹘care'
51
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
52
+ expect(ws.count_as_multiple).to eq(3)
53
+ end
54
+
55
+ it 'returns the count of the individual words that are separated by the hyphen #002' do
56
+ token = '(2R)-2-methylsulfanyl-3-hydroxybutanedioate'
57
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
58
+ expect(ws.count_as_multiple).to eq(5)
59
+ end
60
+ end
61
+
62
+ context '#replace' do
63
+ it 'splits hyphenated words #001' do
64
+ token = 'devil-may-care'
65
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
66
+ expect(ws.replace).to eq('devil may care')
67
+ end
68
+
69
+ it 'splits hyphenated words #002' do
70
+ token = 'devil﹘may﹘care'
71
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
72
+ expect(ws.replace).to eq('devil may care')
73
+ end
74
+
75
+ it 'splits hyphenated words #003' do
76
+ token = '(2R)-2-methylsulfanyl-3-hydroxybutanedioate'
77
+ ws = WordCountAnalyzer::HyphenatedWord.new(token: token)
78
+ expect(ws.replace).to eq('(2R) 2 methylsulfanyl 3 hydroxybutanedioate')
79
+ end
80
+ end
81
+ end