word_count_analyzer 0.0.14 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 625971163e5252e84551ad9f2cbdf0a33767a077
4
- data.tar.gz: 898713d69d40a65120d0856a9ac4c3f48eaed58f
3
+ metadata.gz: e5a101dde1b0e3db7728e7c17716ee5e4a3201e7
4
+ data.tar.gz: e16de9a391248d423b88d24c5e2a835a480f8623
5
5
  SHA512:
6
- metadata.gz: 9781cebd86bde81d142db0260dddad85a1b091624a2387866ceacc631c36c602af7af983a7d11d15ca4ee4dba07ba3975ad9026423534c5121736a7ded508371
7
- data.tar.gz: e41d5d0f159b2110e922701d9beff39095130582f59873b795d82f86150e0aac79c1e99979ad51e90b22c648eff0cdfda1a8d43ae09ee140e6e24e10c859c28f
6
+ metadata.gz: cda1823794f39e1b086e93ae62e10cb8db43f6aab8659988a5f391eb4b9af64c898f8224b7e7a4a0689838885c35078f9be4b639eaf237d0175dcd6183783f19
7
+ data.tar.gz: a0900c47c0a29afd32169bb35fddd1dbb96685c420def00d0b26f8440a0a8b71ddc9e5badfb8683096d7e6ddf475c9f66f3c023959a926a4e1696593c0398f73
data/README.md CHANGED
@@ -49,7 +49,7 @@ Other gray areas not covered by this gem:
49
49
 
50
50
  ```ruby
51
51
  text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 4. <html> Some HTML and a hyphenated-word</html>. Don't count stray punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
52
- WordCountAnalyzer::Analyzer.new(text: text).analyze
52
+ WordCountAnalyzer::Analyzer.new.analyze(text)
53
53
 
54
54
  # => {
55
55
  # "ellipsis": 1,
@@ -74,22 +74,21 @@ WordCountAnalyzer::Analyzer.new(text: text).analyze
74
74
  ```ruby
75
75
  text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list \n\n1. item a \n\n2. item b \n\n3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
76
76
 
77
- WordCountAnalyzer::Counter.new(text: text).count
77
+ WordCountAnalyzer::Counter.new.count(text)
78
78
  # => 64
79
79
 
80
80
  # Overrides all settings to match the way Pages handles word count.
81
81
  # N.B. The developers of Pages may change the algorithm at any time so this should just be as an approximation.
82
- WordCountAnalyzer::Counter.new(text: text).pages_count
82
+ WordCountAnalyzer::Counter.new.pages_count(text)
83
83
  # => 76 (or 79 if the list items are not formatted as a list)
84
84
 
85
85
  # Overrides all settings to match the way Microsoft Word and wc (Unix) handle word count.
86
86
  # N.B. The developers of these tools may change the algorithm at any time so this should just be as an approximation.
87
- WordCountAnalyzer::Counter.new(text: text).mword_count
87
+ WordCountAnalyzer::Counter.new.mword_count(text)
88
88
  # => 71
89
89
 
90
90
  # Highly configurable (see all options below)
91
91
  WordCountAnalyzer::Counter.new(
92
- text: text,
93
92
  ellipsis: 'no_special_treatment',
94
93
  hyperlink: 'no_special_treatment',
95
94
  contraction: 'count_as_multiple',
@@ -104,7 +103,7 @@ WordCountAnalyzer::Counter.new(
104
103
  dashed_line: 'count',
105
104
  underscore: 'count',
106
105
  stray_punctuation: 'count'
107
- ).count
106
+ ).count(text)
108
107
 
109
108
  # => 77
110
109
  ```
@@ -1,23 +1,24 @@
1
1
  module WordCountAnalyzer
2
2
  class Analyzer
3
- attr_reader :text
3
+ attr_reader :text, :tagger
4
4
  def initialize(text:)
5
5
  @text = text
6
+ @tagger = EngTagger.new
6
7
  end
7
8
 
8
9
  def analyze
9
10
  analysis = {}
10
- analysis['ellipsis'] = WordCountAnalyzer::Ellipsis.new(string: text).occurences
11
+ analysis['ellipsis'] = WordCountAnalyzer::Ellipsis.new.occurences(text)
11
12
  contraction_count = 0
12
13
  hyphenated_word_count = 0
13
14
  WordCountAnalyzer::Xhtml.new(string: text).replace.split(/\s+/).each_with_index do |token, index|
14
- contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr: EngTagger.new, hyphen: 'single').contraction?
15
+ contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr: tagger, hyphen: 'single').contraction?
15
16
  hyphenated_word_count += 1 if WordCountAnalyzer::HyphenatedWord.new(token: token).hyphenated_word?
16
17
  end
17
- analysis['hyperlink'] = WordCountAnalyzer::Hyperlink.new(string: text).occurences
18
+ analysis['hyperlink'] = WordCountAnalyzer::Hyperlink.new.occurences(text)
18
19
  analysis['contraction'] = contraction_count
19
20
  analysis['hyphenated_word'] = hyphenated_word_count
20
- analysis['date'] = WordCountAnalyzer::Date.new(string: text).occurences
21
+ analysis['date'] = WordCountAnalyzer::Date.new.occurences(text)
21
22
  analysis['number'] = WordCountAnalyzer::Number.new(string: text).occurences
22
23
  analysis['numbered_list'] = WordCountAnalyzer::NumberedList.new(string: text).occurences
23
24
  analysis['xhtml'] = WordCountAnalyzer::Xhtml.new(string: text).occurences
@@ -81,7 +81,7 @@ module WordCountAnalyzer
81
81
  "jack-o'-lantern" => "jack-of-the-lantern",
82
82
  "will-o'-the-wisp" => "will-of-the-wisp",
83
83
  "'twas" => "it was"
84
- }
84
+ }.freeze
85
85
 
86
86
  attr_reader :token, :following_token, :tgr, :hyphen
87
87
  def initialize(token:, following_token:, tgr:, **args)
@@ -1,8 +1,7 @@
1
1
  module WordCountAnalyzer
2
2
  class Counter
3
- attr_reader :text, :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
4
- def initialize(text:, **args)
5
- @text = text
3
+ attr_reader :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
4
+ def initialize(**args)
6
5
  @ellipsis = args[:ellipsis] || 'ignore'
7
6
  @hyperlink = args[:hyperlink] || 'count_as_one'
8
7
  @contraction = args[:contraction] || 'count_as_one'
@@ -18,13 +17,14 @@ module WordCountAnalyzer
18
17
  @underscore = args[:underscore] || 'ignore'
19
18
  @stray_punctuation = args[:stray_punctuation] || 'ignore'
20
19
  @equal_sign = 'ignore'
20
+ @tgr = EngTagger.new
21
21
  end
22
22
 
23
- def count
24
- word_count
23
+ def count(text)
24
+ word_count(text)
25
25
  end
26
26
 
27
- def pages_count
27
+ def pages_count(text)
28
28
  @ellipsis = 'ignore'
29
29
  @hyperlink = 'split_at_period'
30
30
  @contraction = 'count_as_one'
@@ -40,10 +40,10 @@ module WordCountAnalyzer
40
40
  @underscore = 'ignore'
41
41
  @stray_punctuation = 'ignore'
42
42
  @equal_sign = 'break'
43
- word_count
43
+ word_count(text)
44
44
  end
45
45
 
46
- def mword_count
46
+ def mword_count(text)
47
47
  @ellipsis = 'no_special_treatment'
48
48
  @hyperlink = 'count_as_one'
49
49
  @contraction = 'count_as_one'
@@ -58,16 +58,15 @@ module WordCountAnalyzer
58
58
  @dashed_line = 'count'
59
59
  @underscore = 'count'
60
60
  @stray_punctuation = 'count'
61
- word_count
61
+ word_count(text)
62
62
  end
63
63
 
64
64
  private
65
65
 
66
- def word_count
67
- tgr = EngTagger.new
66
+ def word_count(text)
68
67
  processed_text = process_ellipsis(text)
69
68
  processed_text = process_hyperlink(processed_text)
70
- processed_text = process_contraction(processed_text, tgr)
69
+ processed_text = process_contraction(processed_text, @tgr)
71
70
  processed_text = process_date(processed_text)
72
71
  processed_text = process_number(processed_text)
73
72
  processed_text = process_number_list(processed_text)
@@ -85,7 +84,7 @@ module WordCountAnalyzer
85
84
 
86
85
  def process_ellipsis(txt)
87
86
  if ellipsis.eql?('ignore')
88
- WordCountAnalyzer::Ellipsis.new(string: txt).replace.gsub(/wseword/, '')
87
+ WordCountAnalyzer::Ellipsis.new.replace(txt).gsub(/wseword/, '')
89
88
  elsif ellipsis.eql?('no_special_treatment')
90
89
  txt
91
90
  else
@@ -96,9 +95,9 @@ module WordCountAnalyzer
96
95
  def process_hyperlink(txt)
97
96
  case
98
97
  when hyperlink.eql?('count_as_one')
99
- WordCountAnalyzer::Hyperlink.new(string: txt).replace
98
+ WordCountAnalyzer::Hyperlink.new.replace(txt)
100
99
  when hyperlink.eql?('split_at_period')
101
- WordCountAnalyzer::Hyperlink.new(string: txt).replace_split_at_period
100
+ WordCountAnalyzer::Hyperlink.new.replace_split_at_period(txt)
102
101
  when hyperlink.eql?('no_special_treatment')
103
102
  txt
104
103
  else
@@ -131,7 +130,7 @@ module WordCountAnalyzer
131
130
  if date.eql?('no_special_treatment')
132
131
  txt
133
132
  elsif date.eql?('count_as_one')
134
- WordCountAnalyzer::Date.new(string: txt).replace
133
+ WordCountAnalyzer::Date.new.replace(txt)
135
134
  else
136
135
  raise 'The value you specified for date is not a valid option. Please use either `count_as_one` or `no_special_treatment`. The default option is `no_special_treatment`'
137
136
  end
@@ -16,134 +16,107 @@ module WordCountAnalyzer
16
16
  # Rubular: http://rubular.com/r/mpVSeaKwdY
17
17
  DIGIT_ONLY_YEAR_LAST_REGEX = /\d{4}[12]\d{3}\D\.?/
18
18
 
19
- attr_reader :string
20
- def initialize(string:)
21
- @string = string
19
+ def includes_date?(text)
20
+ includes_long_date?(text) || includes_number_only_date?(text)
22
21
  end
23
22
 
24
- def includes_date?
25
- long_date || number_only_date
23
+ def replace(text)
24
+ counter = 0
25
+ DOW_ABBR.map { |day| counter +=1 if text.include?('day') }
26
+ text = redact_dates(counter, text)
27
+ redact_regex(text)
26
28
  end
27
29
 
28
- def replace
29
- new_string = string.dup
30
- counter = 0
31
- DOW_ABBR.each do |day|
32
- counter +=1 if string.include?('day')
33
- end
30
+ def occurences(text)
31
+ replace(text).scan(/wsdateword/).size
32
+ end
33
+
34
+ def replace_number_only_date(text)
35
+ text.gsub(DMY_MDY_REGEX, ' wsdateword ')
36
+ .gsub(YMD_YDM_REGEX, ' wsdateword ')
37
+ .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
38
+ .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
39
+ end
40
+
41
+ private
42
+
43
+ def redact_dates(counter, text)
34
44
  if counter > 0
35
- DOW_ABBR.each do |day|
36
- MONTHS.each do |month|
37
- new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
38
- end
39
- MONTH_ABBR.each do |month|
40
- new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
41
- end
42
- end
43
- DOW.each do |day|
44
- MONTHS.each do |month|
45
- new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
46
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
47
- .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
48
- .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
49
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
50
- .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
51
- end
52
- MONTH_ABBR.each do |month|
53
- new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
54
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
55
- .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
56
- .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
57
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
58
- .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
59
- end
60
- end
45
+ text = redact_dow_abbr(text)
46
+ text = redact_dow(text)
61
47
  else
62
- DOW.each do |day|
63
- MONTHS.each do |month|
64
- new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
65
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
66
- .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
67
- .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
68
- .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
69
- .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
70
- end
71
- MONTH_ABBR.each do |month|
72
- new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
48
+ text = redact_dow(text)
49
+ text = redact_dow_abbr(text)
50
+ end
51
+ text
52
+ end
53
+
54
+ def redact_regex(text)
55
+ text.gsub(DMY_MDY_REGEX, ' wsdateword ')
56
+ .gsub(YMD_YDM_REGEX, ' wsdateword ')
57
+ .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
58
+ .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
59
+ end
60
+
61
+ def redact_dow(text)
62
+ DOW.each do |day|
63
+ MONTHS.map { |month| text = redact_date(text, day, month) }
64
+ MONTH_ABBR.map { |month| text = redact_date(text, day, month) }
65
+ end
66
+ text
67
+ end
68
+
69
+ def redact_dow_abbr(text)
70
+ DOW_ABBR.each do |day|
71
+ MONTHS.map { |month| text = text.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ') }
72
+ MONTH_ABBR.map { |month| text = text.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ') }
73
+ end
74
+ text
75
+ end
76
+
77
+ def redact_date(text, day, month)
78
+ text.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
73
79
  .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
74
80
  .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
75
81
  .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
76
82
  .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
77
83
  .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
78
- end
79
- end
80
- DOW_ABBR.each do |day|
81
- MONTHS.each do |month|
82
- new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
83
- end
84
- MONTH_ABBR.each do |month|
85
- new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
86
- end
87
- end
88
- end
89
- new_string = new_string.gsub(DMY_MDY_REGEX, ' wsdateword ')
90
- .gsub(YMD_YDM_REGEX, ' wsdateword ')
91
- .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
92
- .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
93
- end
94
-
95
- def occurences
96
- replace.scan(/wsdateword/).size
97
84
  end
98
85
 
99
- def replace_number_only_date
100
- string.gsub(DMY_MDY_REGEX, ' wsdateword ')
101
- .gsub(YMD_YDM_REGEX, ' wsdateword ')
102
- .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
103
- .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
86
+ def includes_long_date?(text)
87
+ includes_long_date_1?(text) || includes_long_date_2?(text)
104
88
  end
105
89
 
106
- private
107
-
108
- def long_date
109
- match_found = false
90
+ def includes_long_date_1?(text)
110
91
  DOW.each do |day|
111
- MONTHS.each do |month|
112
- break if match_found
113
- match_found = check_for_matches(day, month)
114
- end
115
- MONTH_ABBR.each do |month|
116
- break if match_found
117
- match_found = check_for_matches(day, month)
118
- end
92
+ MONTHS.map { |month| return true if check_for_matches(day, month, text) }
93
+ MONTH_ABBR.map { |month| return true if check_for_matches(day, month, text) }
119
94
  end
95
+ false
96
+ end
97
+
98
+ def includes_long_date_2?(text)
120
99
  DOW_ABBR.each do |day|
121
- MONTHS.each do |month|
122
- break if match_found
123
- match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}/i)
124
- end
125
- MONTH_ABBR.each do |month|
126
- break if match_found
127
- match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i)
128
- end
100
+ MONTHS.map { |month| return true if !(text !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}/i) }
101
+ MONTH_ABBR.map { |month| return true if !(text !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) }
129
102
  end
130
- match_found
103
+ false
131
104
  end
132
105
 
133
- def number_only_date
134
- !(string !~ DMY_MDY_REGEX) ||
135
- !(string !~ YMD_YDM_REGEX) ||
136
- !(string !~ DIGIT_ONLY_YEAR_FIRST_REGEX) ||
137
- !(string !~ DIGIT_ONLY_YEAR_LAST_REGEX)
106
+ def includes_number_only_date?(text)
107
+ !(text !~ DMY_MDY_REGEX) ||
108
+ !(text !~ YMD_YDM_REGEX) ||
109
+ !(text !~ DIGIT_ONLY_YEAR_FIRST_REGEX) ||
110
+ !(text !~ DIGIT_ONLY_YEAR_LAST_REGEX)
138
111
  end
139
112
 
140
- def check_for_matches(day, month)
141
- !(string !~ /#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
142
- !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
143
- !(string !~ /\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*/i) ||
144
- !(string !~ /\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+/i) ||
145
- !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*/i) ||
146
- !(string !~ /\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})/i)
113
+ def check_for_matches(day, month, text)
114
+ !(text !~ /#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
115
+ !(text !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
116
+ !(text !~ /\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*/i) ||
117
+ !(text !~ /\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+/i) ||
118
+ !(text !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*/i) ||
119
+ !(text !~ /\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})/i)
147
120
  end
148
121
  end
149
- end
122
+ end
@@ -13,30 +13,25 @@ module WordCountAnalyzer
13
13
 
14
14
  UNICODE_ELLIPSIS = /(?<=[^…]|\A)…{1}(?=[^…]|$)/
15
15
 
16
- attr_reader :string
17
- def initialize(string:)
18
- @string = string
16
+ def includes_ellipsis?(text)
17
+ !(text !~ FOUR_CONSECUTIVE_REGEX) ||
18
+ !(text !~ THREE_SPACE_REGEX) ||
19
+ !(text !~ FOUR_SPACE_REGEX) ||
20
+ !(text !~ OTHER_THREE_PERIOD_REGEX) ||
21
+ !(text !~ UNICODE_ELLIPSIS)
19
22
  end
20
23
 
21
- def includes_ellipsis?
22
- !(string !~ FOUR_CONSECUTIVE_REGEX) ||
23
- !(string !~ THREE_SPACE_REGEX) ||
24
- !(string !~ FOUR_SPACE_REGEX) ||
25
- !(string !~ OTHER_THREE_PERIOD_REGEX) ||
26
- !(string !~ UNICODE_ELLIPSIS)
27
- end
28
-
29
- def replace
30
- string.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
24
+ def replace(text)
25
+ text.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
31
26
  .gsub(THREE_SPACE_REGEX, ' wseword ')
32
27
  .gsub(FOUR_SPACE_REGEX, ' wseword ')
33
28
  .gsub(OTHER_THREE_PERIOD_REGEX, ' wseword ')
34
29
  .gsub(UNICODE_ELLIPSIS, ' wseword ')
35
30
  end
36
31
 
37
- def occurences
32
+ def occurences(text)
38
33
  count = 0
39
- replace.split(' ').map { |token| count += 1 if token.strip.eql?('wseword') }
34
+ replace(text).split(' ').map { |token| count += 1 if token.strip.eql?('wseword') }
40
35
  count
41
36
  end
42
37
  end
@@ -7,49 +7,38 @@ module WordCountAnalyzer
7
7
  # Rubular: http://rubular.com/r/fXa4lp0gfS
8
8
  HYPERLINK_REGEX = /(http|https|www)(\.|:)/
9
9
 
10
- attr_reader :string
11
- def initialize(string:)
12
- @string = string
10
+ def hyperlink?(text)
11
+ !(text !~ URI.regexp) && text !~ NON_HYPERLINK_REGEX && !(text !~ HYPERLINK_REGEX)
13
12
  end
14
13
 
15
- def hyperlink?
16
- !(string !~ URI.regexp) && string !~ NON_HYPERLINK_REGEX && !(string !~ HYPERLINK_REGEX)
14
+ def occurences(text)
15
+ text.scan(URI.regexp).map { |link| link.compact.size > 1 ? 1 : 0 }.inject(0) { |sum, x| sum + x }
17
16
  end
18
17
 
19
- def occurences
20
- counter = 0
21
- string.scan(URI.regexp).each do |link|
22
- counter += 1 if link.compact.size > 1
23
- end
24
- counter
25
- end
26
-
27
- def replace
28
- new_string = string.dup
29
- string.split(/\s+/).each do |token|
18
+ def replace(text)
19
+ text.split(/\s+/).each do |token|
30
20
  if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
31
- new_string = new_string.gsub(/#{Regexp.escape(token.split('">')[0])}/, ' wslinkword ')
21
+ text = text.gsub(/#{Regexp.escape(token.split('">')[0])}/, ' wslinkword ')
32
22
  elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
33
- new_string = new_string.gsub(/#{Regexp.escape(token)}/, ' wslinkword ')
23
+ text = text.gsub(/#{Regexp.escape(token)}/, ' wslinkword ')
34
24
  end
35
25
  end
36
- new_string
26
+ text
37
27
  end
38
28
 
39
- def replace_split_at_period
40
- new_string = string.dup
41
- string.split(/\s+/).each do |token|
29
+ def replace_split_at_period(text)
30
+ text.split(/\s+/).each do |token|
42
31
  if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
43
- new_string.gsub!(/#{Regexp.escape(token.split('">')[0])}/) do |match|
32
+ text.gsub!(/#{Regexp.escape(token.split('">')[0])}/) do |match|
44
33
  match.split('.').join(' ')
45
34
  end
46
35
  elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
47
- new_string.gsub!(/#{Regexp.escape(token)}/) do |match|
36
+ text.gsub!(/#{Regexp.escape(token)}/) do |match|
48
37
  match.split('.').join(' ')
49
38
  end
50
39
  end
51
40
  end
52
- new_string
41
+ text
53
42
  end
54
43
  end
55
44
  end