word_count_analyzer 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e0495c86a5d6731ba74d17ee091a8d0b9b7f225
4
- data.tar.gz: 400039a69d59b118fa4e42c96fc59e4d796bf0f3
3
+ metadata.gz: b715b4c2304ae956644121693d584b0b37638845
4
+ data.tar.gz: 3b6d753132d5b9e511378ec06f990efa21fcc86b
5
5
  SHA512:
6
- metadata.gz: 80ef57f5085d9ceb8b6de3f516313109e872a6f0cbaf3749417d5108ba51d0711bc9bfa8575593db46968d2f9d35ff0aef7be0f1613a546968d851d122a840f2
7
- data.tar.gz: 4c6b1843507d4774183345b59ea7c44b3aca8e967965f79c96ae187ca069c48986bb7ef892b20959feb8ce2566ff62dbefda6232c396fb754fbd85bd989657cb
6
+ metadata.gz: 504f3e98b336ebcce4129b137e5293f3fbe38593fd935d55b56d9807f77c17a7e5ae955c4fba41fde8fe8a9e725232253038e5b912ff892eed8ece89f1819b34
7
+ data.tar.gz: 2ae63d08ea641d00f60f4d83f07e1a376cd51cdc632bafcf96a1c3e6b7b173c5e970625fa51749b879893e0386a202bebf799448703373cccd6c855bf8c56040
data/README.md CHANGED
@@ -151,6 +151,8 @@ WordCountAnalyzer::Counter.new(
151
151
 
152
152
  ##### `date`
153
153
  **default** = `'no_special_treatment'`
154
+ - `'no_special_treatment'`
155
+ Dates will not be searched for in the string. Therefore, how a date is handled in the word count will depend on other settings.
154
156
  - `'count_as_one'`
155
157
  Counts a date as one word. This is more commonly seen in translation CAT tools where a date is thought of as a *placeable* that can usually be automatically translated. Examples:
156
158
  - Monday, April 4th, 2011 (1 word)
@@ -163,8 +165,6 @@ WordCountAnalyzer::Counter.new(
163
165
  - 2003 November 9 (1 word)
164
166
  - 2003-Nov-9 (1 word)
165
167
  - and others...
166
- - `'no_special_treatment'`
167
- Dates will not be searched for in the string. Therefore, how a date is handled in the word count will depend on other settings.
168
168
 
169
169
  <hr>
170
170
 
@@ -197,15 +197,15 @@ WordCountAnalyzer::Counter.new(
197
197
 
198
198
  ##### `forward_slash`
199
199
  **default** = `'count_as_multiple_except_dates'`
200
- - `'count_as_one'`
201
- Counts any tokens that include a forward slash as one word. Example:
202
- - she/he/it (1 word)
203
- - `'count_as_multiple'`
204
- Separates any tokens that include a forward slash at the slash(s) and counts each token individually. Whether dates, hyperlinks and xhtml are included depends on what is set for those options. Example:
205
- - she/he/it (3 words)
206
200
  - `'count_as_multiple_except_dates'`
207
201
  Separates any tokens that include a forward slash (except dates) at the slash(s) and counts each token individually. Example:
208
202
  - she/he/it 4/25/2014 (4 words)
203
+ - `'count_as_multiple'`
204
+ Separates any tokens that include a forward slash at the slash(s) and counts each token individually. Whether dates, hyperlinks and xhtml are included depends on what is set for those options. Example:
205
+ - she/he/it (3 words)
206
+ - `'count_as_one'`
207
+ Counts any tokens that include a forward slash as one word. Example:
208
+ - she/he/it (1 word)
209
209
 
210
210
  <hr>
211
211
 
@@ -222,37 +222,37 @@ WordCountAnalyzer::Counter.new(
222
222
 
223
223
  ##### `dotted_line`
224
224
  **default** = `'ignore'`
225
- - `'count'`
226
- Counts a dotted line as one word.
227
225
  - `'ignore'`
228
226
  Ignores any dotted lines in the string and does not count them towards the word count.
227
+ - `'count'`
228
+ Counts a dotted line as one word.
229
229
 
230
230
  <hr>
231
231
 
232
232
  ##### `dashed_line`
233
233
  **default** = `'ignore'`
234
+ - `'ignore'`
235
+ Ignores any dashed lines in the string and does not count them towards the word count.
234
236
  - `'count'`
235
237
  Counts a dashed line as one word.
236
- - `'ignore'`
237
- Ignores any dashed lines in the string and does not count them towards the word count.
238
238
 
239
239
  <hr>
240
240
 
241
241
  ##### `underscore`
242
242
  **default** = `'ignore'`
243
+ - `'ignore'`
244
+ Ignores any series of underscores in the string and does not count them towards the word count.
243
245
  - `'count'`
244
246
  Counts a series of underscores as one word.
245
- - `'ignore'`
246
- Ignores any series of underscores in the string and does not count them towards the word count.
247
247
 
248
248
  <hr>
249
249
 
250
250
  ##### `stray_punctuation`
251
251
  **default** = `'ignore'`
252
+ - `'ignore'`
253
+ Ignores any punctuation marks surrounded on both sides by a whitespace in the string and does not count them towards the word count.
252
254
  - `'count'`
253
255
  Counts a punctuation mark surrounded on both sides by a whitespace as one word.
254
- - `'ignore'`
255
- Ignores any punctuation marks surrounded on both sides by a whitespace in the string and does not count them towards the word count.
256
256
 
257
257
  ### Gray Area Details
258
258
 
@@ -9,7 +9,7 @@ module WordCountAnalyzer
9
9
  # Rubular: http://rubular.com/r/2VvZ8wRbd8
10
10
  FOUR_SPACE_REGEX = /(?<=[a-z])(\.\s){3}\.(\z|$|\n)/
11
11
 
12
- OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}([^\.]|$)/
12
+ OTHER_THREE_PERIOD_REGEX = /(?<=[^\.])\.{3}(?=([^\.]|$))/
13
13
 
14
14
  UNICODE_ELLIPSIS = /(?<=[^…])…{1}(?=[^…])/
15
15
 
@@ -60,6 +60,7 @@ module WordCountAnalyzer
60
60
  processed_string.gsub!(FORWARD_SLASH_REGEX).each do |match|
61
61
  match.split(/\/+/).join(' ')
62
62
  end
63
+ processed_string
63
64
  end
64
65
 
65
66
  def replace_forward_slashes_except_dates
@@ -68,6 +69,7 @@ module WordCountAnalyzer
68
69
  except_date_string.gsub!(FORWARD_SLASH_REGEX).each do |match|
69
70
  match.split(/\/+/).join(' ')
70
71
  end
72
+ except_date_string
71
73
  end
72
74
 
73
75
  def backslash_occurences
@@ -79,6 +81,7 @@ module WordCountAnalyzer
79
81
  processed_string.gsub!(BACKSLASH_REGEX).each do |match|
80
82
  ' word ' * match.split(/\\+/).length
81
83
  end
84
+ processed_string
82
85
  end
83
86
  end
84
87
  end
@@ -1,3 +1,3 @@
1
1
  module WordCountAnalyzer
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -553,6 +553,12 @@ RSpec.describe WordCountAnalyzer::Counter do
553
553
  expect(ws.count).to eq(66)
554
554
  end
555
555
 
556
+ it 'counts the words in a string #005' do
557
+ text = "Hello world... 11/22/2013"
558
+ ws = WordCountAnalyzer::Counter.new(text: text)
559
+ expect(ws.count).to eq(3)
560
+ end
561
+
556
562
  context 'Pages Word Count' do
557
563
  it 'reverse engineers Pages word count #001' do
558
564
  text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word_count_analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias