word_count_analyzer 0.0.14 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 625971163e5252e84551ad9f2cbdf0a33767a077
4
- data.tar.gz: 898713d69d40a65120d0856a9ac4c3f48eaed58f
3
+ metadata.gz: e5a101dde1b0e3db7728e7c17716ee5e4a3201e7
4
+ data.tar.gz: e16de9a391248d423b88d24c5e2a835a480f8623
5
5
  SHA512:
6
- metadata.gz: 9781cebd86bde81d142db0260dddad85a1b091624a2387866ceacc631c36c602af7af983a7d11d15ca4ee4dba07ba3975ad9026423534c5121736a7ded508371
7
- data.tar.gz: e41d5d0f159b2110e922701d9beff39095130582f59873b795d82f86150e0aac79c1e99979ad51e90b22c648eff0cdfda1a8d43ae09ee140e6e24e10c859c28f
6
+ metadata.gz: cda1823794f39e1b086e93ae62e10cb8db43f6aab8659988a5f391eb4b9af64c898f8224b7e7a4a0689838885c35078f9be4b639eaf237d0175dcd6183783f19
7
+ data.tar.gz: a0900c47c0a29afd32169bb35fddd1dbb96685c420def00d0b26f8440a0a8b71ddc9e5badfb8683096d7e6ddf475c9f66f3c023959a926a4e1696593c0398f73
data/README.md CHANGED
@@ -49,7 +49,7 @@ Other gray areas not covered by this gem:
49
49
 
50
50
  ```ruby
51
51
  text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 4. <html> Some HTML and a hyphenated-word</html>. Don't count stray punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
52
- WordCountAnalyzer::Analyzer.new(text: text).analyze
52
+ WordCountAnalyzer::Analyzer.new.analyze(text)
53
53
 
54
54
  # => {
55
55
  # "ellipsis": 1,
@@ -74,22 +74,21 @@ WordCountAnalyzer::Analyzer.new(text: text).analyze
74
74
  ```ruby
75
75
  text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list \n\n1. item a \n\n2. item b \n\n3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
76
76
 
77
- WordCountAnalyzer::Counter.new(text: text).count
77
+ WordCountAnalyzer::Counter.new.count(text)
78
78
  # => 64
79
79
 
80
80
  # Overrides all settings to match the way Pages handles word count.
81
81
  # N.B. The developers of Pages may change the algorithm at any time so this should just be as an approximation.
82
- WordCountAnalyzer::Counter.new(text: text).pages_count
82
+ WordCountAnalyzer::Counter.new.pages_count(text)
83
83
  # => 76 (or 79 if the list items are not formatted as a list)
84
84
 
85
85
  # Overrides all settings to match the way Microsoft Word and wc (Unix) handle word count.
86
86
  # N.B. The developers of these tools may change the algorithm at any time so this should just be as an approximation.
87
- WordCountAnalyzer::Counter.new(text: text).mword_count
87
+ WordCountAnalyzer::Counter.new.mword_count(text)
88
88
  # => 71
89
89
 
90
90
  # Highly configurable (see all options below)
91
91
  WordCountAnalyzer::Counter.new(
92
- text: text,
93
92
  ellipsis: 'no_special_treatment',
94
93
  hyperlink: 'no_special_treatment',
95
94
  contraction: 'count_as_multiple',
@@ -104,7 +103,7 @@ WordCountAnalyzer::Counter.new(
104
103
  dashed_line: 'count',
105
104
  underscore: 'count',
106
105
  stray_punctuation: 'count'
107
- ).count
106
+ ).count(text)
108
107
 
109
108
  # => 77
110
109
  ```
@@ -1,23 +1,24 @@
1
1
  module WordCountAnalyzer
2
2
  class Analyzer
3
- attr_reader :text
3
+ attr_reader :text, :tagger
4
4
  def initialize(text:)
5
5
  @text = text
6
+ @tagger = EngTagger.new
6
7
  end
7
8
 
8
9
  def analyze
9
10
  analysis = {}
10
- analysis['ellipsis'] = WordCountAnalyzer::Ellipsis.new(string: text).occurences
11
+ analysis['ellipsis'] = WordCountAnalyzer::Ellipsis.new.occurences(text)
11
12
  contraction_count = 0
12
13
  hyphenated_word_count = 0
13
14
  WordCountAnalyzer::Xhtml.new(string: text).replace.split(/\s+/).each_with_index do |token, index|
14
- contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr: EngTagger.new, hyphen: 'single').contraction?
15
+ contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr: tagger, hyphen: 'single').contraction?
15
16
  hyphenated_word_count += 1 if WordCountAnalyzer::HyphenatedWord.new(token: token).hyphenated_word?
16
17
  end
17
- analysis['hyperlink'] = WordCountAnalyzer::Hyperlink.new(string: text).occurences
18
+ analysis['hyperlink'] = WordCountAnalyzer::Hyperlink.new.occurences(text)
18
19
  analysis['contraction'] = contraction_count
19
20
  analysis['hyphenated_word'] = hyphenated_word_count
20
- analysis['date'] = WordCountAnalyzer::Date.new(string: text).occurences
21
+ analysis['date'] = WordCountAnalyzer::Date.new.occurences(text)
21
22
  analysis['number'] = WordCountAnalyzer::Number.new(string: text).occurences
22
23
  analysis['numbered_list'] = WordCountAnalyzer::NumberedList.new(string: text).occurences
23
24
  analysis['xhtml'] = WordCountAnalyzer::Xhtml.new(string: text).occurences
@@ -81,7 +81,7 @@ module WordCountAnalyzer
81
81
  "jack-o'-lantern" => "jack-of-the-lantern",
82
82
  "will-o'-the-wisp" => "will-of-the-wisp",
83
83
  "'twas" => "it was"
84
- }
84
+ }.freeze
85
85
 
86
86
  attr_reader :token, :following_token, :tgr, :hyphen
87
87
  def initialize(token:, following_token:, tgr:, **args)
@@ -1,8 +1,7 @@
1
1
  module WordCountAnalyzer
2
2
  class Counter
3
- attr_reader :text, :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
4
- def initialize(text:, **args)
5
- @text = text
3
+ attr_reader :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
4
+ def initialize(**args)
6
5
  @ellipsis = args[:ellipsis] || 'ignore'
7
6
  @hyperlink = args[:hyperlink] || 'count_as_one'
8
7
  @contraction = args[:contraction] || 'count_as_one'
@@ -18,13 +17,14 @@ module WordCountAnalyzer
18
17
  @underscore = args[:underscore] || 'ignore'
19
18
  @stray_punctuation = args[:stray_punctuation] || 'ignore'
20
19
  @equal_sign = 'ignore'
20
+ @tgr = EngTagger.new
21
21
  end
22
22
 
23
- def count
24
- word_count
23
+ def count(text)
24
+ word_count(text)
25
25
  end
26
26
 
27
- def pages_count
27
+ def pages_count(text)
28
28
  @ellipsis = 'ignore'
29
29
  @hyperlink = 'split_at_period'
30
30
  @contraction = 'count_as_one'
@@ -40,10 +40,10 @@ module WordCountAnalyzer
40
40
  @underscore = 'ignore'
41
41
  @stray_punctuation = 'ignore'
42
42
  @equal_sign = 'break'
43
- word_count
43
+ word_count(text)
44
44
  end
45
45
 
46
- def mword_count
46
+ def mword_count(text)
47
47
  @ellipsis = 'no_special_treatment'
48
48
  @hyperlink = 'count_as_one'
49
49
  @contraction = 'count_as_one'
@@ -58,16 +58,15 @@ module WordCountAnalyzer
58
58
  @dashed_line = 'count'
59
59
  @underscore = 'count'
60
60
  @stray_punctuation = 'count'
61
- word_count
61
+ word_count(text)
62
62
  end
63
63
 
64
64
  private
65
65
 
66
- def word_count
67
- tgr = EngTagger.new
66
+ def word_count(text)
68
67
  processed_text = process_ellipsis(text)
69
68
  processed_text = process_hyperlink(processed_text)
70
- processed_text = process_contraction(processed_text, tgr)
69
+ processed_text = process_contraction(processed_text, @tgr)
71
70
  processed_text = process_date(processed_text)
72
71
  processed_text = process_number(processed_text)
73
72
  processed_text = process_number_list(processed_text)
@@ -85,7 +84,7 @@ module WordCountAnalyzer
85
84
 
86
85
  def process_ellipsis(txt)
87
86
  if ellipsis.eql?('ignore')
88
- WordCountAnalyzer::Ellipsis.new(string: txt).replace.gsub(/wseword/, '')
87
+ WordCountAnalyzer::Ellipsis.new.replace(txt).gsub(/wseword/, '')
89
88
  elsif ellipsis.eql?('no_special_treatment')
90
89
  txt
91
90
  else
@@ -96,9 +95,9 @@ module WordCountAnalyzer
96
95
  def process_hyperlink(txt)
97
96
  case
98
97
  when hyperlink.eql?('count_as_one')
99
- WordCountAnalyzer::Hyperlink.new(string: txt).replace
98
+ WordCountAnalyzer::Hyperlink.new.replace(txt)
100
99
  when hyperlink.eql?('split_at_period')
101
- WordCountAnalyzer::Hyperlink.new(string: txt).replace_split_at_period
100
+ WordCountAnalyzer::Hyperlink.new.replace_split_at_period(txt)
102
101
  when hyperlink.eql?('no_special_treatment')
103
102
  txt
104
103
  else
@@ -131,7 +130,7 @@ module WordCountAnalyzer
131
130
  if date.eql?('no_special_treatment')
132
131
  txt
133
132
  elsif date.eql?('count_as_one')
134
- WordCountAnalyzer::Date.new(string: txt).replace
133
+ WordCountAnalyzer::Date.new.replace(txt)
135
134
  else
136
135
  raise 'The value you specified for date is not a valid option. Please use either `count_as_one` or `no_special_treatment`. The default option is `no_special_treatment`'
137
136
  end
@@ -16,134 +16,107 @@ module WordCountAnalyzer
16
16
  # Rubular: http://rubular.com/r/mpVSeaKwdY
17
17
  DIGIT_ONLY_YEAR_LAST_REGEX = /\d{4}[12]\d{3}\D\.?/
18
18
 
19
- attr_reader :string
20
- def initialize(string:)
21
- @string = string
19
+ def includes_date?(text)
20
+ includes_long_date?(text) || includes_number_only_date?(text)
22
21
  end
23
22
 
24
- def includes_date?
25
- long_date || number_only_date
23
+ def replace(text)
24
+ counter = 0
25
+ DOW_ABBR.map { |day| counter +=1 if text.include?('day') }
26
+ text = redact_dates(counter, text)
27
+ redact_regex(text)
26
28
  end
27
29
 
28
- def replace
29
- new_string = string.dup
30
- counter = 0
31
- DOW_ABBR.each do |day|
32
- counter +=1 if string.include?('day')
33
- end
30
+ def occurences(text)
31
+ replace(text).scan(/wsdateword/).size
32
+ end
33
+
34
+ def replace_number_only_date(text)
35
+ text.gsub(DMY_MDY_REGEX, ' wsdateword ')
36
+ .gsub(YMD_YDM_REGEX, ' wsdateword ')
37
+ .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
38
+ .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
39
+ end
40
+
41
+ private
42
+
43
+ def redact_dates(counter, text)
34
44
  if counter > 0
35
- DOW_ABBR.each do |day|
36
- MONTHS.each do |month|
37
- new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
38
- end
39
- MONTH_ABBR.each do |month|
40
- new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
41
- end
42
- end
43
- DOW.each do |day|
44
- MONTHS.each do |month|
45
- new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
46
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
47
- .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
48
- .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
49
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
50
- .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
51
- end
52
- MONTH_ABBR.each do |month|
53
- new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
54
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
55
- .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
56
- .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
57
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
58
- .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
59
- end
60
- end
45
+ text = redact_dow_abbr(text)
46
+ text = redact_dow(text)
61
47
  else
62
- DOW.each do |day|
63
- MONTHS.each do |month|
64
- new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
65
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
66
- .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
67
- .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
68
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
69
- .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
70
- end
71
- MONTH_ABBR.each do |month|
72
- new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
48
+ text = redact_dow(text)
49
+ text = redact_dow_abbr(text)
50
+ end
51
+ text
52
+ end
53
+
54
+ def redact_regex(text)
55
+ text.gsub(DMY_MDY_REGEX, ' wsdateword ')
56
+ .gsub(YMD_YDM_REGEX, ' wsdateword ')
57
+ .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
58
+ .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
59
+ end
60
+
61
+ def redact_dow(text)
62
+ DOW.each do |day|
63
+ MONTHS.map { |month| text = redact_date(text, day, month) }
64
+ MONTH_ABBR.map { |month| text = redact_date(text, day, month) }
65
+ end
66
+ text
67
+ end
68
+
69
+ def redact_dow_abbr(text)
70
+ DOW_ABBR.each do |day|
71
+ MONTHS.map { |month| text = text.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ') }
72
+ MONTH_ABBR.map { |month| text = text.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ') }
73
+ end
74
+ text
75
+ end
76
+
77
+ def redact_date(text, day, month)
78
+ text.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
73
79
  .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
74
80
  .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
75
81
  .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
76
82
  .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
77
83
  .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
78
- end
79
- end
80
- DOW_ABBR.each do |day|
81
- MONTHS.each do |month|
82
- new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
83
- end
84
- MONTH_ABBR.each do |month|
85
- new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
86
- end
87
- end
88
- end
89
- new_string = new_string.gsub(DMY_MDY_REGEX, ' wsdateword ')
90
- .gsub(YMD_YDM_REGEX, ' wsdateword ')
91
- .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
92
- .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
93
- end
94
-
95
- def occurences
96
- replace.scan(/wsdateword/).size
97
84
  end
98
85
 
99
- def replace_number_only_date
100
- string.gsub(DMY_MDY_REGEX, ' wsdateword ')
101
- .gsub(YMD_YDM_REGEX, ' wsdateword ')
102
- .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
103
- .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
86
+ def includes_long_date?(text)
87
+ includes_long_date_1?(text) || includes_long_date_2?(text)
104
88
  end
105
89
 
106
- private
107
-
108
- def long_date
109
- match_found = false
90
+ def includes_long_date_1?(text)
110
91
  DOW.each do |day|
111
- MONTHS.each do |month|
112
- break if match_found
113
- match_found = check_for_matches(day, month)
114
- end
115
- MONTH_ABBR.each do |month|
116
- break if match_found
117
- match_found = check_for_matches(day, month)
118
- end
92
+ MONTHS.map { |month| return true if check_for_matches(day, month, text) }
93
+ MONTH_ABBR.map { |month| return true if check_for_matches(day, month, text) }
119
94
  end
95
+ false
96
+ end
97
+
98
+ def includes_long_date_2?(text)
120
99
  DOW_ABBR.each do |day|
121
- MONTHS.each do |month|
122
- break if match_found
123
- match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}/i)
124
- end
125
- MONTH_ABBR.each do |month|
126
- break if match_found
127
- match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i)
128
- end
100
+ MONTHS.map { |month| return true if !(text !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}/i) }
101
+ MONTH_ABBR.map { |month| return true if !(text !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) }
129
102
  end
130
- match_found
103
+ false
131
104
  end
132
105
 
133
- def number_only_date
134
- !(string !~ DMY_MDY_REGEX) ||
135
- !(string !~ YMD_YDM_REGEX) ||
136
- !(string !~ DIGIT_ONLY_YEAR_FIRST_REGEX) ||
137
- !(string !~ DIGIT_ONLY_YEAR_LAST_REGEX)
106
+ def includes_number_only_date?(text)
107
+ !(text !~ DMY_MDY_REGEX) ||
108
+ !(text !~ YMD_YDM_REGEX) ||
109
+ !(text !~ DIGIT_ONLY_YEAR_FIRST_REGEX) ||
110
+ !(text !~ DIGIT_ONLY_YEAR_LAST_REGEX)
138
111
  end
139
112
 
140
- def check_for_matches(day, month)
141
- !(string !~ /#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
142
- !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
143
- !(string !~ /\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*/i) ||
144
- !(string !~ /\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+/i) ||
145
- !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*/i) ||
146
- !(string !~ /\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})/i)
113
+ def check_for_matches(day, month, text)
114
+ !(text !~ /#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
115
+ !(text !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
116
+ !(text !~ /\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*/i) ||
117
+ !(text !~ /\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+/i) ||
118
+ !(text !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*/i) ||
119
+ !(text !~ /\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})/i)
147
120
  end
148
121
  end
149
- end
122
+ end
@@ -13,30 +13,25 @@ module WordCountAnalyzer
13
13
 
14
14
  UNICODE_ELLIPSIS = /(?<=[^…]|\A)…{1}(?=[^…]|$)/
15
15
 
16
- attr_reader :string
17
- def initialize(string:)
18
- @string = string
16
+ def includes_ellipsis?(text)
17
+ !(text !~ FOUR_CONSECUTIVE_REGEX) ||
18
+ !(text !~ THREE_SPACE_REGEX) ||
19
+ !(text !~ FOUR_SPACE_REGEX) ||
20
+ !(text !~ OTHER_THREE_PERIOD_REGEX) ||
21
+ !(text !~ UNICODE_ELLIPSIS)
19
22
  end
20
23
 
21
- def includes_ellipsis?
22
- !(string !~ FOUR_CONSECUTIVE_REGEX) ||
23
- !(string !~ THREE_SPACE_REGEX) ||
24
- !(string !~ FOUR_SPACE_REGEX) ||
25
- !(string !~ OTHER_THREE_PERIOD_REGEX) ||
26
- !(string !~ UNICODE_ELLIPSIS)
27
- end
28
-
29
- def replace
30
- string.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
24
+ def replace(text)
25
+ text.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
31
26
  .gsub(THREE_SPACE_REGEX, ' wseword ')
32
27
  .gsub(FOUR_SPACE_REGEX, ' wseword ')
33
28
  .gsub(OTHER_THREE_PERIOD_REGEX, ' wseword ')
34
29
  .gsub(UNICODE_ELLIPSIS, ' wseword ')
35
30
  end
36
31
 
37
- def occurences
32
+ def occurences(text)
38
33
  count = 0
39
- replace.split(' ').map { |token| count += 1 if token.strip.eql?('wseword') }
34
+ replace(text).split(' ').map { |token| count += 1 if token.strip.eql?('wseword') }
40
35
  count
41
36
  end
42
37
  end
@@ -7,49 +7,38 @@ module WordCountAnalyzer
7
7
  # Rubular: http://rubular.com/r/fXa4lp0gfS
8
8
  HYPERLINK_REGEX = /(http|https|www)(\.|:)/
9
9
 
10
- attr_reader :string
11
- def initialize(string:)
12
- @string = string
10
+ def hyperlink?(text)
11
+ !(text !~ URI.regexp) && text !~ NON_HYPERLINK_REGEX && !(text !~ HYPERLINK_REGEX)
13
12
  end
14
13
 
15
- def hyperlink?
16
- !(string !~ URI.regexp) && string !~ NON_HYPERLINK_REGEX && !(string !~ HYPERLINK_REGEX)
14
+ def occurences(text)
15
+ text.scan(URI.regexp).map { |link| link.compact.size > 1 ? 1 : 0 }.inject(0) { |sum, x| sum + x }
17
16
  end
18
17
 
19
- def occurences
20
- counter = 0
21
- string.scan(URI.regexp).each do |link|
22
- counter += 1 if link.compact.size > 1
23
- end
24
- counter
25
- end
26
-
27
- def replace
28
- new_string = string.dup
29
- string.split(/\s+/).each do |token|
18
+ def replace(text)
19
+ text.split(/\s+/).each do |token|
30
20
  if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
31
- new_string = new_string.gsub(/#{Regexp.escape(token.split('">')[0])}/, ' wslinkword ')
21
+ text = text.gsub(/#{Regexp.escape(token.split('">')[0])}/, ' wslinkword ')
32
22
  elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
33
- new_string = new_string.gsub(/#{Regexp.escape(token)}/, ' wslinkword ')
23
+ text = text.gsub(/#{Regexp.escape(token)}/, ' wslinkword ')
34
24
  end
35
25
  end
36
- new_string
26
+ text
37
27
  end
38
28
 
39
- def replace_split_at_period
40
- new_string = string.dup
41
- string.split(/\s+/).each do |token|
29
+ def replace_split_at_period(text)
30
+ text.split(/\s+/).each do |token|
42
31
  if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
43
- new_string.gsub!(/#{Regexp.escape(token.split('">')[0])}/) do |match|
32
+ text.gsub!(/#{Regexp.escape(token.split('">')[0])}/) do |match|
44
33
  match.split('.').join(' ')
45
34
  end
46
35
  elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
47
- new_string.gsub!(/#{Regexp.escape(token)}/) do |match|
36
+ text.gsub!(/#{Regexp.escape(token)}/) do |match|
48
37
  match.split('.').join(' ')
49
38
  end
50
39
  end
51
40
  end
52
- new_string
41
+ text
53
42
  end
54
43
  end
55
44
  end