word_count_analyzer 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ddbcaa034fe05d88898c67d91a9b2ce66b877914
4
- data.tar.gz: 37f0fbc804c6dac6e0a807a4c3b4c09c7d24e28b
3
+ metadata.gz: 7e0495c86a5d6731ba74d17ee091a8d0b9b7f225
4
+ data.tar.gz: 400039a69d59b118fa4e42c96fc59e4d796bf0f3
5
5
  SHA512:
6
- metadata.gz: 96f1d43573edc85d35704d829d5fb1468909f68f54d90e7f1adcbfbf64c4a6f56f6329650802ae3613af24d4a17967e16fdd29e178f6e51fd7b77159d66f6ba8
7
- data.tar.gz: 9c33f5446a70329c4236f19d01edbe66404402839f102ea985217dfc90efe7f8d74e3fc6fc2e51ae8a43f4a06ea0cb4acf681540660af67a191d8b59a86920b4
6
+ metadata.gz: 80ef57f5085d9ceb8b6de3f516313109e872a6f0cbaf3749417d5108ba51d0711bc9bfa8575593db46968d2f9d35ff0aef7be0f1613a546968d851d122a840f2
7
+ data.tar.gz: 4c6b1843507d4774183345b59ea7c44b3aca8e967965f79c96ae187ca069c48986bb7ef892b20959feb8ce2566ff62dbefda6232c396fb754fbd85bd989657cb
@@ -1,8 +1,5 @@
1
1
  module WordCountAnalyzer
2
2
  class Ellipsis
3
- # Rubular: http://rubular.com/r/i60hCK81fz
4
- THREE_CONSECUTIVE_REGEX = /\.{3}(?=\s+[A-Z])/
5
-
6
3
  # Rubular: http://rubular.com/r/mfdtSeuIf2
7
4
  FOUR_CONSECUTIVE_REGEX = /(?<=[^\.])\.{3}\.(?=[^\.])/
8
5
 
@@ -12,7 +9,7 @@ module WordCountAnalyzer
12
9
  # Rubular: http://rubular.com/r/2VvZ8wRbd8
13
10
  FOUR_SPACE_REGEX = /(?<=[a-z])(\.\s){3}\.(\z|$|\n)/
14
11
 
15
- OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}[^\.]/
12
+ OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}([^\.]|$)/
16
13
 
17
14
  UNICODE_ELLIPSIS = /(?<=[^…])…{1}(?=[^…])/
18
15
 
@@ -22,7 +19,6 @@ module WordCountAnalyzer
22
19
  end
23
20
 
24
21
  def includes_ellipsis?
25
- !(string !~ THREE_CONSECUTIVE_REGEX) ||
26
22
  !(string !~ FOUR_CONSECUTIVE_REGEX) ||
27
23
  !(string !~ THREE_SPACE_REGEX) ||
28
24
  !(string !~ FOUR_SPACE_REGEX) ||
@@ -31,8 +27,7 @@ module WordCountAnalyzer
31
27
  end
32
28
 
33
29
  def replace
34
- string.gsub(THREE_CONSECUTIVE_REGEX, ' wseword ')
35
- .gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
30
+ string.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
36
31
  .gsub(THREE_SPACE_REGEX, ' wseword ')
37
32
  .gsub(FOUR_SPACE_REGEX, ' wseword ')
38
33
  .gsub(OTHER_THREE_PERIOD_REGEX, ' wseword ')
@@ -1,3 +1,3 @@
1
1
  module WordCountAnalyzer
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -2,10 +2,16 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe WordCountAnalyzer::Analyzer do
4
4
  context '#analysis' do
5
- it 'should analyze the gray areas' do
5
+ it 'should analyze the gray areas #001' do
6
6
  text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
7
7
  ws = WordCountAnalyzer::Analyzer.new(text: text)
8
8
  expect(ws.analyze).to eq({"ellipsis"=>1, "hyperlink"=>2, "contraction"=>4, "hyphenated_word"=>2, "date"=>2, "number"=>1, "numbered_list"=>3, "xhtml"=>1, "forward_slash"=>1, "backslash"=>1, "dotted_line"=>1, "dashed_line"=>1, "underscore"=>1, "stray_punctuation"=>5})
9
9
  end
10
+
11
+ it 'should analyze the gray areas #002' do
12
+ text = "hello world ..."
13
+ ws = WordCountAnalyzer::Analyzer.new(text: text)
14
+ expect(ws.analyze).to eq({"ellipsis"=>1, "hyperlink"=>0, "contraction"=>0, "hyphenated_word"=>0, "date"=>0, "number"=>0, "numbered_list"=>0, "xhtml"=>0, "forward_slash"=>0, "backslash"=>0, "dotted_line"=>0, "dashed_line"=>0, "underscore"=>0, "stray_punctuation"=>0})
15
+ end
10
16
  end
11
17
  end
@@ -629,6 +629,11 @@ RSpec.describe WordCountAnalyzer::Counter do
629
629
  expect(ws.count).to eq(6)
630
630
  end
631
631
 
632
+ it 'String #004' do
633
+ ws = WordCountAnalyzer::Counter.new(text: 'hello world ...')
634
+ expect(ws.count).to eq(2)
635
+ end
636
+
632
637
  it 'does not split on unicode chars' do
633
638
  ws = WordCountAnalyzer::Counter.new(text: 'São Paulo')
634
639
  expect(ws.count).to eq(2)
@@ -32,19 +32,25 @@ RSpec.describe WordCountAnalyzer::Ellipsis do
32
32
  expect(ws.includes_ellipsis?).to eq(true)
33
33
  end
34
34
 
35
- it "returns false if the string doesn't include an ellipsis #006" do
35
+ it 'returns true if the string includes an ellipsis #006' do
36
+ string = 'hello world ...'
37
+ ws = WordCountAnalyzer::Ellipsis.new(string: string)
38
+ expect(ws.includes_ellipsis?).to eq(true)
39
+ end
40
+
41
+ it "returns false if the string doesn't include an ellipsis #007" do
36
42
  string = 'Hello world.'
37
43
  ws = WordCountAnalyzer::Ellipsis.new(string: string)
38
44
  expect(ws.includes_ellipsis?).to eq(false)
39
45
  end
40
46
 
41
- it "returns false if the string includes a dotted_line #007" do
47
+ it "returns false if the string includes a dotted_line #008" do
42
48
  string = '.....'
43
49
  ws = WordCountAnalyzer::Ellipsis.new(string: string)
44
50
  expect(ws.includes_ellipsis?).to eq(false)
45
51
  end
46
52
 
47
- it "returns false if the string includes a dotted_line #007" do
53
+ it "returns false if the string includes a dotted_line #009" do
48
54
  string = "Here is one …………………………………………………………………… and another ......"
49
55
  ws = WordCountAnalyzer::Ellipsis.new(string: string)
50
56
  expect(ws.includes_ellipsis?).to eq(false)
@@ -55,7 +61,7 @@ RSpec.describe WordCountAnalyzer::Ellipsis do
55
61
  it 'returns a string with the ellipsis replaced #001' do
56
62
  string = 'Using an ellipsis … causes different counts…depending on the style . . . that you use. I never meant that.... She left the store. The practice was not abandoned. . . .'
57
63
  ws = WordCountAnalyzer::Ellipsis.new(string: string)
58
- expect(ws.replace).to eq("Using an ellipsis wseword causes different counts wseword depending on the style wseword that you use. I never meant that. wseword She left the store. The practice was not abandoned wseword ")
64
+ expect(ws.replace).to eq("Using an ellipsis wseword causes different counts wseword depending on the style wseword that you use. I never meant that wseword She left the store. The practice was not abandoned wseword ")
59
65
  end
60
66
  end
61
67
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word_count_analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias