word_count_analyzer 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e0495c86a5d6731ba74d17ee091a8d0b9b7f225
4
- data.tar.gz: 400039a69d59b118fa4e42c96fc59e4d796bf0f3
3
+ metadata.gz: b715b4c2304ae956644121693d584b0b37638845
4
+ data.tar.gz: 3b6d753132d5b9e511378ec06f990efa21fcc86b
5
5
  SHA512:
6
- metadata.gz: 80ef57f5085d9ceb8b6de3f516313109e872a6f0cbaf3749417d5108ba51d0711bc9bfa8575593db46968d2f9d35ff0aef7be0f1613a546968d851d122a840f2
7
- data.tar.gz: 4c6b1843507d4774183345b59ea7c44b3aca8e967965f79c96ae187ca069c48986bb7ef892b20959feb8ce2566ff62dbefda6232c396fb754fbd85bd989657cb
6
+ metadata.gz: 504f3e98b336ebcce4129b137e5293f3fbe38593fd935d55b56d9807f77c17a7e5ae955c4fba41fde8fe8a9e725232253038e5b912ff892eed8ece89f1819b34
7
+ data.tar.gz: 2ae63d08ea641d00f60f4d83f07e1a376cd51cdc632bafcf96a1c3e6b7b173c5e970625fa51749b879893e0386a202bebf799448703373cccd6c855bf8c56040
data/README.md CHANGED
@@ -151,6 +151,8 @@ WordCountAnalyzer::Counter.new(
151
151
 
152
152
  ##### `date`
153
153
  **default** = `'no_special_treatment'`
154
+ - `'no_special_treatment'`
155
+ Dates will not be searched for in the string. Therefore, how a date is handled in the word count will depend on other settings.
154
156
  - `'count_as_one'`
155
157
  Counts a date as one word. This is more commonly seen in translation CAT tools where a date is thought of as a *placeable* that can usually be automatically translated. Examples:
156
158
  - Monday, April 4th, 2011 (1 word)
@@ -163,8 +165,6 @@ WordCountAnalyzer::Counter.new(
163
165
  - 2003 November 9 (1 word)
164
166
  - 2003-Nov-9 (1 word)
165
167
  - and others...
166
- - `'no_special_treatment'`
167
- Dates will not be searched for in the string. Therefore, how a date is handled in the word count will depend on other settings.
168
168
 
169
169
  <hr>
170
170
 
@@ -197,15 +197,15 @@ WordCountAnalyzer::Counter.new(
197
197
 
198
198
  ##### `forward_slash`
199
199
  **default** = `'count_as_multiple_except_dates'`
200
- - `'count_as_one'`
201
- Counts any tokens that include a forward slash as one word. Example:
202
- - she/he/it (1 word)
203
- - `'count_as_multiple'`
204
- Separates any tokens that include a forward slash at the slash(s) and counts each token individually. Whether dates, hyperlinks and xhtml are included depends on what is set for those options. Example:
205
- - she/he/it (3 words)
206
200
  - `'count_as_multiple_except_dates'`
207
201
  Separates any tokens that include a forward slash (except dates) at the slash(s) and counts each token individually. Example:
208
202
  - she/he/it 4/25/2014 (4 words)
203
+ - `'count_as_multiple'`
204
+ Separates any tokens that include a forward slash at the slash(s) and counts each token individually. Whether dates, hyperlinks and xhtml are included depends on what is set for those options. Example:
205
+ - she/he/it (3 words)
206
+ - `'count_as_one'`
207
+ Counts any tokens that include a forward slash as one word. Example:
208
+ - she/he/it (1 word)
209
209
 
210
210
  <hr>
211
211
 
@@ -222,37 +222,37 @@ WordCountAnalyzer::Counter.new(
222
222
 
223
223
  ##### `dotted_line`
224
224
  **default** = `'ignore'`
225
- - `'count'`
226
- Counts a dotted line as one word.
227
225
  - `'ignore'`
228
226
  Ignores any dotted lines in the string and does not count them towards the word count.
227
+ - `'count'`
228
+ Counts a dotted line as one word.
229
229
 
230
230
  <hr>
231
231
 
232
232
  ##### `dashed_line`
233
233
  **default** = `'ignore'`
234
+ - `'ignore'`
235
+ Ignores any dashed lines in the string and does not count them towards the word count.
234
236
  - `'count'`
235
237
  Counts a dashed line as one word.
236
- - `'ignore'`
237
- Ignores any dashed lines in the string and does not count them towards the word count.
238
238
 
239
239
  <hr>
240
240
 
241
241
  ##### `underscore`
242
242
  **default** = `'ignore'`
243
+ - `'ignore'`
244
+ Ignores any series of underscores in the string and does not count them towards the word count.
243
245
  - `'count'`
244
246
  Counts a series of underscores as one word.
245
- - `'ignore'`
246
- Ignores any series of underscores in the string and does not count them towards the word count.
247
247
 
248
248
  <hr>
249
249
 
250
250
  ##### `stray_punctuation`
251
251
  **default** = `'ignore'`
252
+ - `'ignore'`
253
+ Ignores any punctuation marks surrounded on both sides by a whitespace in the string and does not count them towards the word count.
252
254
  - `'count'`
253
255
  Counts a punctuation mark surrounded on both sides by a whitespace as one word.
254
- - `'ignore'`
255
- Ignores any punctuation marks surrounded on both sides by a whitespace in the string and does not count them towards the word count.
256
256
 
257
257
  ### Gray Area Details
258
258
 
@@ -9,7 +9,7 @@ module WordCountAnalyzer
9
9
  # Rubular: http://rubular.com/r/2VvZ8wRbd8
10
10
  FOUR_SPACE_REGEX = /(?<=[a-z])(\.\s){3}\.(\z|$|\n)/
11
11
 
12
- OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}([^\.]|$)/
12
+ OTHER_THREE_PERIOD_REGEX = /(?<=[^\.])\.{3}(?=([^\.]|$))/
13
13
 
14
14
  UNICODE_ELLIPSIS = /(?<=[^…])…{1}(?=[^…])/
15
15
 
@@ -60,6 +60,7 @@ module WordCountAnalyzer
60
60
  processed_string.gsub!(FORWARD_SLASH_REGEX).each do |match|
61
61
  match.split(/\/+/).join(' ')
62
62
  end
63
+ processed_string
63
64
  end
64
65
 
65
66
  def replace_forward_slashes_except_dates
@@ -68,6 +69,7 @@ module WordCountAnalyzer
68
69
  except_date_string.gsub!(FORWARD_SLASH_REGEX).each do |match|
69
70
  match.split(/\/+/).join(' ')
70
71
  end
72
+ except_date_string
71
73
  end
72
74
 
73
75
  def backslash_occurences
@@ -79,6 +81,7 @@ module WordCountAnalyzer
79
81
  processed_string.gsub!(BACKSLASH_REGEX).each do |match|
80
82
  ' word ' * match.split(/\\+/).length
81
83
  end
84
+ processed_string
82
85
  end
83
86
  end
84
87
  end
@@ -1,3 +1,3 @@
1
1
  module WordCountAnalyzer
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -553,6 +553,12 @@ RSpec.describe WordCountAnalyzer::Counter do
553
553
  expect(ws.count).to eq(66)
554
554
  end
555
555
 
556
+ it 'counts the words in a string #005' do
557
+ text = "Hello world... 11/22/2013"
558
+ ws = WordCountAnalyzer::Counter.new(text: text)
559
+ expect(ws.count).to eq(3)
560
+ end
561
+
556
562
  context 'Pages Word Count' do
557
563
  it 'reverse engineers Pages word count #001' do
558
564
  text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word_count_analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias