word_count_analyzer 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ddbcaa034fe05d88898c67d91a9b2ce66b877914
4
- data.tar.gz: 37f0fbc804c6dac6e0a807a4c3b4c09c7d24e28b
3
+ metadata.gz: 7e0495c86a5d6731ba74d17ee091a8d0b9b7f225
4
+ data.tar.gz: 400039a69d59b118fa4e42c96fc59e4d796bf0f3
5
5
  SHA512:
6
- metadata.gz: 96f1d43573edc85d35704d829d5fb1468909f68f54d90e7f1adcbfbf64c4a6f56f6329650802ae3613af24d4a17967e16fdd29e178f6e51fd7b77159d66f6ba8
7
- data.tar.gz: 9c33f5446a70329c4236f19d01edbe66404402839f102ea985217dfc90efe7f8d74e3fc6fc2e51ae8a43f4a06ea0cb4acf681540660af67a191d8b59a86920b4
6
+ metadata.gz: 80ef57f5085d9ceb8b6de3f516313109e872a6f0cbaf3749417d5108ba51d0711bc9bfa8575593db46968d2f9d35ff0aef7be0f1613a546968d851d122a840f2
7
+ data.tar.gz: 4c6b1843507d4774183345b59ea7c44b3aca8e967965f79c96ae187ca069c48986bb7ef892b20959feb8ce2566ff62dbefda6232c396fb754fbd85bd989657cb
@@ -1,8 +1,5 @@
1
1
  module WordCountAnalyzer
2
2
  class Ellipsis
3
- # Rubular: http://rubular.com/r/i60hCK81fz
4
- THREE_CONSECUTIVE_REGEX = /\.{3}(?=\s+[A-Z])/
5
-
6
3
  # Rubular: http://rubular.com/r/mfdtSeuIf2
7
4
  FOUR_CONSECUTIVE_REGEX = /(?<=[^\.])\.{3}\.(?=[^\.])/
8
5
 
@@ -12,7 +9,7 @@ module WordCountAnalyzer
12
9
  # Rubular: http://rubular.com/r/2VvZ8wRbd8
13
10
  FOUR_SPACE_REGEX = /(?<=[a-z])(\.\s){3}\.(\z|$|\n)/
14
11
 
15
- OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}[^\.]/
12
+ OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}([^\.]|$)/
16
13
 
17
14
  UNICODE_ELLIPSIS = /(?<=[^…])…{1}(?=[^…])/
18
15
 
@@ -22,7 +19,6 @@ module WordCountAnalyzer
22
19
  end
23
20
 
24
21
  def includes_ellipsis?
25
- !(string !~ THREE_CONSECUTIVE_REGEX) ||
26
22
  !(string !~ FOUR_CONSECUTIVE_REGEX) ||
27
23
  !(string !~ THREE_SPACE_REGEX) ||
28
24
  !(string !~ FOUR_SPACE_REGEX) ||
@@ -31,8 +27,7 @@ module WordCountAnalyzer
31
27
  end
32
28
 
33
29
  def replace
34
- string.gsub(THREE_CONSECUTIVE_REGEX, ' wseword ')
35
- .gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
30
+ string.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
36
31
  .gsub(THREE_SPACE_REGEX, ' wseword ')
37
32
  .gsub(FOUR_SPACE_REGEX, ' wseword ')
38
33
  .gsub(OTHER_THREE_PERIOD_REGEX, ' wseword ')
@@ -1,3 +1,3 @@
1
1
  module WordCountAnalyzer
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -2,10 +2,16 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe WordCountAnalyzer::Analyzer do
4
4
  context '#analysis' do
5
- it 'should analyze the gray areas' do
5
+ it 'should analyze the gray areas #001' do
6
6
  text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
7
7
  ws = WordCountAnalyzer::Analyzer.new(text: text)
8
8
  expect(ws.analyze).to eq({"ellipsis"=>1, "hyperlink"=>2, "contraction"=>4, "hyphenated_word"=>2, "date"=>2, "number"=>1, "numbered_list"=>3, "xhtml"=>1, "forward_slash"=>1, "backslash"=>1, "dotted_line"=>1, "dashed_line"=>1, "underscore"=>1, "stray_punctuation"=>5})
9
9
  end
10
+
11
+ it 'should analyze the gray areas #002' do
12
+ text = "hello world ..."
13
+ ws = WordCountAnalyzer::Analyzer.new(text: text)
14
+ expect(ws.analyze).to eq({"ellipsis"=>1, "hyperlink"=>0, "contraction"=>0, "hyphenated_word"=>0, "date"=>0, "number"=>0, "numbered_list"=>0, "xhtml"=>0, "forward_slash"=>0, "backslash"=>0, "dotted_line"=>0, "dashed_line"=>0, "underscore"=>0, "stray_punctuation"=>0})
15
+ end
10
16
  end
11
17
  end
@@ -629,6 +629,11 @@ RSpec.describe WordCountAnalyzer::Counter do
629
629
  expect(ws.count).to eq(6)
630
630
  end
631
631
 
632
+ it 'String #004' do
633
+ ws = WordCountAnalyzer::Counter.new(text: 'hello world ...')
634
+ expect(ws.count).to eq(2)
635
+ end
636
+
632
637
  it 'does not split on unicode chars' do
633
638
  ws = WordCountAnalyzer::Counter.new(text: 'São Paulo')
634
639
  expect(ws.count).to eq(2)
@@ -32,19 +32,25 @@ RSpec.describe WordCountAnalyzer::Ellipsis do
32
32
  expect(ws.includes_ellipsis?).to eq(true)
33
33
  end
34
34
 
35
- it "returns false if the string doesn't include an ellipsis #006" do
35
+ it 'returns true if the string includes an ellipsis #006' do
36
+ string = 'hello world ...'
37
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
38
+ expect(ws.includes_ellipsis?).to eq(true)
39
+ end
40
+
41
+ it "returns false if the string doesn't include an ellipsis #007" do
36
42
  string = 'Hello world.'
37
43
  ws = WordCountAnalyzer::Ellipsis.new(string: string)
38
44
  expect(ws.includes_ellipsis?).to eq(false)
39
45
  end
40
46
 
41
- it "returns false if the string includes a dotted_line #007" do
47
+ it "returns false if the string includes a dotted_line #008" do
42
48
  string = '.....'
43
49
  ws = WordCountAnalyzer::Ellipsis.new(string: string)
44
50
  expect(ws.includes_ellipsis?).to eq(false)
45
51
  end
46
52
 
47
- it "returns false if the string includes a dotted_line #007" do
53
+ it "returns false if the string includes a dotted_line #009" do
48
54
  string = "Here is one …………………………………………………………………… and another ......"
49
55
  ws = WordCountAnalyzer::Ellipsis.new(string: string)
50
56
  expect(ws.includes_ellipsis?).to eq(false)
@@ -55,7 +61,7 @@ RSpec.describe WordCountAnalyzer::Ellipsis do
55
61
  it 'returns a string with the ellipsis replaced #001' do
56
62
  string = 'Using an ellipsis … causes different counts…depending on the style . . . that you use. I never meant that.... She left the store. The practice was not abandoned. . . .'
57
63
  ws = WordCountAnalyzer::Ellipsis.new(string: string)
58
- expect(ws.replace).to eq("Using an ellipsis wseword causes different counts wseword depending on the style wseword that you use. I never meant that. wseword She left the store. The practice was not abandoned wseword ")
64
+ expect(ws.replace).to eq("Using an ellipsis wseword causes different counts wseword depending on the style wseword that you use. I never meant that wseword She left the store. The practice was not abandoned wseword ")
59
65
  end
60
66
  end
61
67
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word_count_analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias