word_count_analyzer 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e0495c86a5d6731ba74d17ee091a8d0b9b7f225
|
4
|
+
data.tar.gz: 400039a69d59b118fa4e42c96fc59e4d796bf0f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80ef57f5085d9ceb8b6de3f516313109e872a6f0cbaf3749417d5108ba51d0711bc9bfa8575593db46968d2f9d35ff0aef7be0f1613a546968d851d122a840f2
|
7
|
+
data.tar.gz: 4c6b1843507d4774183345b59ea7c44b3aca8e967965f79c96ae187ca069c48986bb7ef892b20959feb8ce2566ff62dbefda6232c396fb754fbd85bd989657cb
|
@@ -1,8 +1,5 @@
|
|
1
1
|
module WordCountAnalyzer
|
2
2
|
class Ellipsis
|
3
|
-
# Rubular: http://rubular.com/r/i60hCK81fz
|
4
|
-
THREE_CONSECUTIVE_REGEX = /\.{3}(?=\s+[A-Z])/
|
5
|
-
|
6
3
|
# Rubular: http://rubular.com/r/mfdtSeuIf2
|
7
4
|
FOUR_CONSECUTIVE_REGEX = /(?<=[^\.])\.{3}\.(?=[^\.])/
|
8
5
|
|
@@ -12,7 +9,7 @@ module WordCountAnalyzer
|
|
12
9
|
# Rubular: http://rubular.com/r/2VvZ8wRbd8
|
13
10
|
FOUR_SPACE_REGEX = /(?<=[a-z])(\.\s){3}\.(\z|$|\n)/
|
14
11
|
|
15
|
-
OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}[^\.]/
|
12
|
+
OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}([^\.]|$)/
|
16
13
|
|
17
14
|
UNICODE_ELLIPSIS = /(?<=[^…])…{1}(?=[^…])/
|
18
15
|
|
@@ -22,7 +19,6 @@ module WordCountAnalyzer
|
|
22
19
|
end
|
23
20
|
|
24
21
|
def includes_ellipsis?
|
25
|
-
!(string !~ THREE_CONSECUTIVE_REGEX) ||
|
26
22
|
!(string !~ FOUR_CONSECUTIVE_REGEX) ||
|
27
23
|
!(string !~ THREE_SPACE_REGEX) ||
|
28
24
|
!(string !~ FOUR_SPACE_REGEX) ||
|
@@ -31,8 +27,7 @@ module WordCountAnalyzer
|
|
31
27
|
end
|
32
28
|
|
33
29
|
def replace
|
34
|
-
string.gsub(
|
35
|
-
.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
|
30
|
+
string.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
|
36
31
|
.gsub(THREE_SPACE_REGEX, ' wseword ')
|
37
32
|
.gsub(FOUR_SPACE_REGEX, ' wseword ')
|
38
33
|
.gsub(OTHER_THREE_PERIOD_REGEX, ' wseword ')
|
@@ -2,10 +2,16 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
RSpec.describe WordCountAnalyzer::Analyzer do
|
4
4
|
context '#analysis' do
|
5
|
-
it 'should analyze the gray areas' do
|
5
|
+
it 'should analyze the gray areas #001' do
|
6
6
|
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
7
7
|
ws = WordCountAnalyzer::Analyzer.new(text: text)
|
8
8
|
expect(ws.analyze).to eq({"ellipsis"=>1, "hyperlink"=>2, "contraction"=>4, "hyphenated_word"=>2, "date"=>2, "number"=>1, "numbered_list"=>3, "xhtml"=>1, "forward_slash"=>1, "backslash"=>1, "dotted_line"=>1, "dashed_line"=>1, "underscore"=>1, "stray_punctuation"=>5})
|
9
9
|
end
|
10
|
+
|
11
|
+
it 'should analyze the gray areas #002' do
|
12
|
+
text = "hello world ..."
|
13
|
+
ws = WordCountAnalyzer::Analyzer.new(text: text)
|
14
|
+
expect(ws.analyze).to eq({"ellipsis"=>1, "hyperlink"=>0, "contraction"=>0, "hyphenated_word"=>0, "date"=>0, "number"=>0, "numbered_list"=>0, "xhtml"=>0, "forward_slash"=>0, "backslash"=>0, "dotted_line"=>0, "dashed_line"=>0, "underscore"=>0, "stray_punctuation"=>0})
|
15
|
+
end
|
10
16
|
end
|
11
17
|
end
|
@@ -629,6 +629,11 @@ RSpec.describe WordCountAnalyzer::Counter do
|
|
629
629
|
expect(ws.count).to eq(6)
|
630
630
|
end
|
631
631
|
|
632
|
+
it 'String #004' do
|
633
|
+
ws = WordCountAnalyzer::Counter.new(text: 'hello world ...')
|
634
|
+
expect(ws.count).to eq(2)
|
635
|
+
end
|
636
|
+
|
632
637
|
it 'does not split on unicode chars' do
|
633
638
|
ws = WordCountAnalyzer::Counter.new(text: 'São Paulo')
|
634
639
|
expect(ws.count).to eq(2)
|
@@ -32,19 +32,25 @@ RSpec.describe WordCountAnalyzer::Ellipsis do
|
|
32
32
|
expect(ws.includes_ellipsis?).to eq(true)
|
33
33
|
end
|
34
34
|
|
35
|
-
it
|
35
|
+
it 'returns true if the string includes an ellipsis #006' do
|
36
|
+
string = 'hello world ...'
|
37
|
+
ws = WordCountAnalyzer::Ellipsis.new(string: string)
|
38
|
+
expect(ws.includes_ellipsis?).to eq(true)
|
39
|
+
end
|
40
|
+
|
41
|
+
it "returns false if the string doesn't include an ellipsis #007" do
|
36
42
|
string = 'Hello world.'
|
37
43
|
ws = WordCountAnalyzer::Ellipsis.new(string: string)
|
38
44
|
expect(ws.includes_ellipsis?).to eq(false)
|
39
45
|
end
|
40
46
|
|
41
|
-
it "returns false if the string includes a dotted_line #
|
47
|
+
it "returns false if the string includes a dotted_line #008" do
|
42
48
|
string = '.....'
|
43
49
|
ws = WordCountAnalyzer::Ellipsis.new(string: string)
|
44
50
|
expect(ws.includes_ellipsis?).to eq(false)
|
45
51
|
end
|
46
52
|
|
47
|
-
it "returns false if the string includes a dotted_line #
|
53
|
+
it "returns false if the string includes a dotted_line #009" do
|
48
54
|
string = "Here is one …………………………………………………………………… and another ......"
|
49
55
|
ws = WordCountAnalyzer::Ellipsis.new(string: string)
|
50
56
|
expect(ws.includes_ellipsis?).to eq(false)
|
@@ -55,7 +61,7 @@ RSpec.describe WordCountAnalyzer::Ellipsis do
|
|
55
61
|
it 'returns a string with the ellipsis replaced #001' do
|
56
62
|
string = 'Using an ellipsis … causes different counts…depending on the style . . . that you use. I never meant that.... She left the store. The practice was not abandoned. . . .'
|
57
63
|
ws = WordCountAnalyzer::Ellipsis.new(string: string)
|
58
|
-
expect(ws.replace).to eq("Using an ellipsis wseword causes different counts wseword depending on the style wseword that you use. I never meant that
|
64
|
+
expect(ws.replace).to eq("Using an ellipsis wseword causes different counts wseword depending on the style wseword that you use. I never meant that wseword She left the store. The practice was not abandoned wseword ")
|
59
65
|
end
|
60
66
|
end
|
61
67
|
|