word_count_analyzer 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e0495c86a5d6731ba74d17ee091a8d0b9b7f225
|
4
|
+
data.tar.gz: 400039a69d59b118fa4e42c96fc59e4d796bf0f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80ef57f5085d9ceb8b6de3f516313109e872a6f0cbaf3749417d5108ba51d0711bc9bfa8575593db46968d2f9d35ff0aef7be0f1613a546968d851d122a840f2
|
7
|
+
data.tar.gz: 4c6b1843507d4774183345b59ea7c44b3aca8e967965f79c96ae187ca069c48986bb7ef892b20959feb8ce2566ff62dbefda6232c396fb754fbd85bd989657cb
|
@@ -1,8 +1,5 @@
|
|
1
1
|
module WordCountAnalyzer
|
2
2
|
class Ellipsis
|
3
|
-
# Rubular: http://rubular.com/r/i60hCK81fz
|
4
|
-
THREE_CONSECUTIVE_REGEX = /\.{3}(?=\s+[A-Z])/
|
5
|
-
|
6
3
|
# Rubular: http://rubular.com/r/mfdtSeuIf2
|
7
4
|
FOUR_CONSECUTIVE_REGEX = /(?<=[^\.])\.{3}\.(?=[^\.])/
|
8
5
|
|
@@ -12,7 +9,7 @@ module WordCountAnalyzer
|
|
12
9
|
# Rubular: http://rubular.com/r/2VvZ8wRbd8
|
13
10
|
FOUR_SPACE_REGEX = /(?<=[a-z])(\.\s){3}\.(\z|$|\n)/
|
14
11
|
|
15
|
-
OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}[^\.]/
|
12
|
+
OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}([^\.]|$)/
|
16
13
|
|
17
14
|
UNICODE_ELLIPSIS = /(?<=[^…])…{1}(?=[^…])/
|
18
15
|
|
@@ -22,7 +19,6 @@ module WordCountAnalyzer
|
|
22
19
|
end
|
23
20
|
|
24
21
|
def includes_ellipsis?
|
25
|
-
!(string !~ THREE_CONSECUTIVE_REGEX) ||
|
26
22
|
!(string !~ FOUR_CONSECUTIVE_REGEX) ||
|
27
23
|
!(string !~ THREE_SPACE_REGEX) ||
|
28
24
|
!(string !~ FOUR_SPACE_REGEX) ||
|
@@ -31,8 +27,7 @@ module WordCountAnalyzer
|
|
31
27
|
end
|
32
28
|
|
33
29
|
def replace
|
34
|
-
string.gsub(
|
35
|
-
.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
|
30
|
+
string.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
|
36
31
|
.gsub(THREE_SPACE_REGEX, ' wseword ')
|
37
32
|
.gsub(FOUR_SPACE_REGEX, ' wseword ')
|
38
33
|
.gsub(OTHER_THREE_PERIOD_REGEX, ' wseword ')
|
@@ -2,10 +2,16 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
RSpec.describe WordCountAnalyzer::Analyzer do
|
4
4
|
context '#analysis' do
|
5
|
-
it 'should analyze the gray areas' do
|
5
|
+
it 'should analyze the gray areas #001' do
|
6
6
|
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
7
7
|
ws = WordCountAnalyzer::Analyzer.new(text: text)
|
8
8
|
expect(ws.analyze).to eq({"ellipsis"=>1, "hyperlink"=>2, "contraction"=>4, "hyphenated_word"=>2, "date"=>2, "number"=>1, "numbered_list"=>3, "xhtml"=>1, "forward_slash"=>1, "backslash"=>1, "dotted_line"=>1, "dashed_line"=>1, "underscore"=>1, "stray_punctuation"=>5})
|
9
9
|
end
|
10
|
+
|
11
|
+
it 'should analyze the gray areas #002' do
|
12
|
+
text = "hello world ..."
|
13
|
+
ws = WordCountAnalyzer::Analyzer.new(text: text)
|
14
|
+
expect(ws.analyze).to eq({"ellipsis"=>1, "hyperlink"=>0, "contraction"=>0, "hyphenated_word"=>0, "date"=>0, "number"=>0, "numbered_list"=>0, "xhtml"=>0, "forward_slash"=>0, "backslash"=>0, "dotted_line"=>0, "dashed_line"=>0, "underscore"=>0, "stray_punctuation"=>0})
|
15
|
+
end
|
10
16
|
end
|
11
17
|
end
|
@@ -629,6 +629,11 @@ RSpec.describe WordCountAnalyzer::Counter do
|
|
629
629
|
expect(ws.count).to eq(6)
|
630
630
|
end
|
631
631
|
|
632
|
+
it 'String #004' do
|
633
|
+
ws = WordCountAnalyzer::Counter.new(text: 'hello world ...')
|
634
|
+
expect(ws.count).to eq(2)
|
635
|
+
end
|
636
|
+
|
632
637
|
it 'does not split on unicode chars' do
|
633
638
|
ws = WordCountAnalyzer::Counter.new(text: 'São Paulo')
|
634
639
|
expect(ws.count).to eq(2)
|
@@ -32,19 +32,25 @@ RSpec.describe WordCountAnalyzer::Ellipsis do
|
|
32
32
|
expect(ws.includes_ellipsis?).to eq(true)
|
33
33
|
end
|
34
34
|
|
35
|
-
it
|
35
|
+
it 'returns true if the string includes an ellipsis #006' do
|
36
|
+
string = 'hello world ...'
|
37
|
+
ws = WordCountAnalyzer::Ellipsis.new(string: string)
|
38
|
+
expect(ws.includes_ellipsis?).to eq(true)
|
39
|
+
end
|
40
|
+
|
41
|
+
it "returns false if the string doesn't include an ellipsis #007" do
|
36
42
|
string = 'Hello world.'
|
37
43
|
ws = WordCountAnalyzer::Ellipsis.new(string: string)
|
38
44
|
expect(ws.includes_ellipsis?).to eq(false)
|
39
45
|
end
|
40
46
|
|
41
|
-
it "returns false if the string includes a dotted_line #
|
47
|
+
it "returns false if the string includes a dotted_line #008" do
|
42
48
|
string = '.....'
|
43
49
|
ws = WordCountAnalyzer::Ellipsis.new(string: string)
|
44
50
|
expect(ws.includes_ellipsis?).to eq(false)
|
45
51
|
end
|
46
52
|
|
47
|
-
it "returns false if the string includes a dotted_line #
|
53
|
+
it "returns false if the string includes a dotted_line #009" do
|
48
54
|
string = "Here is one …………………………………………………………………… and another ......"
|
49
55
|
ws = WordCountAnalyzer::Ellipsis.new(string: string)
|
50
56
|
expect(ws.includes_ellipsis?).to eq(false)
|
@@ -55,7 +61,7 @@ RSpec.describe WordCountAnalyzer::Ellipsis do
|
|
55
61
|
it 'returns a string with the ellipsis replaced #001' do
|
56
62
|
string = 'Using an ellipsis … causes different counts…depending on the style . . . that you use. I never meant that.... She left the store. The practice was not abandoned. . . .'
|
57
63
|
ws = WordCountAnalyzer::Ellipsis.new(string: string)
|
58
|
-
expect(ws.replace).to eq("Using an ellipsis wseword causes different counts wseword depending on the style wseword that you use. I never meant that
|
64
|
+
expect(ws.replace).to eq("Using an ellipsis wseword causes different counts wseword depending on the style wseword that you use. I never meant that wseword She left the store. The practice was not abandoned wseword ")
|
59
65
|
end
|
60
66
|
end
|
61
67
|
|