RubyGems - word_count_analyzer - Versions diffs - 0.0.14 → 1.0.0 - Mend

word_count_analyzer 0.0.14 → 1.0.0

Files changed (19) hide show

checksums.yaml +4 -4
data/README.md +5 -6
data/lib/word_count_analyzer/analyzer.rb +6 -5
data/lib/word_count_analyzer/contraction.rb +1 -1
data/lib/word_count_analyzer/counter.rb +15 -16
data/lib/word_count_analyzer/date.rb +79 -106
data/lib/word_count_analyzer/ellipsis.rb +10 -15
data/lib/word_count_analyzer/hyperlink.rb +14 -25
data/lib/word_count_analyzer/hyphenated_word.rb +1 -1
data/lib/word_count_analyzer/number.rb +1 -1
data/lib/word_count_analyzer/slash.rb +8 -7
data/lib/word_count_analyzer/version.rb +1 -1
data/spec/word_count_analyzer/counter_spec.rb +123 -160
data/spec/word_count_analyzer/date_spec.rb +85 -85
data/spec/word_count_analyzer/ellipsis_spec.rb +33 -33
data/spec/word_count_analyzer/hyperlink_spec.rb +23 -23
data/spec/word_count_analyzer/performance_spec.rb +46 -0
data/word_count_analyzer.gemspec +1 -0
metadata +18 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 625971163e5252e84551ad9f2cbdf0a33767a077
-  data.tar.gz: 898713d69d40a65120d0856a9ac4c3f48eaed58f
+  metadata.gz: e5a101dde1b0e3db7728e7c17716ee5e4a3201e7
+  data.tar.gz: e16de9a391248d423b88d24c5e2a835a480f8623
 SHA512:
-  metadata.gz: 9781cebd86bde81d142db0260dddad85a1b091624a2387866ceacc631c36c602af7af983a7d11d15ca4ee4dba07ba3975ad9026423534c5121736a7ded508371
-  data.tar.gz: e41d5d0f159b2110e922701d9beff39095130582f59873b795d82f86150e0aac79c1e99979ad51e90b22c648eff0cdfda1a8d43ae09ee140e6e24e10c859c28f
+  metadata.gz: cda1823794f39e1b086e93ae62e10cb8db43f6aab8659988a5f391eb4b9af64c898f8224b7e7a4a0689838885c35078f9be4b639eaf237d0175dcd6183783f19
+  data.tar.gz: a0900c47c0a29afd32169bb35fddd1dbb96685c420def00d0b26f8440a0a8b71ddc9e5badfb8683096d7e6ddf475c9f66f3c023959a926a4e1696593c0398f73

data/README.md CHANGED Viewed

@@ -49,7 +49,7 @@ Other gray areas not covered by this gem:
 ```ruby
 text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 4. <html> Some HTML and a hyphenated-word</html>. Don't count stray punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
-WordCountAnalyzer::Analyzer.new(text: text).analyze
+WordCountAnalyzer::Analyzer.new.analyze(text)
 # =>   {
 #        "ellipsis": 1,
@@ -74,22 +74,21 @@ WordCountAnalyzer::Analyzer.new(text: text).analyze
 ```ruby
 text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list \n\n1. item a \n\n2. item b \n\n3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
-WordCountAnalyzer::Counter.new(text: text).count
+WordCountAnalyzer::Counter.new.count(text)
 # => 64
 # Overrides all settings to match the way Pages handles word count.
 # N.B. The developers of Pages may change the algorithm at any time so this should just be as an approximation.
-WordCountAnalyzer::Counter.new(text: text).pages_count
+WordCountAnalyzer::Counter.new.pages_count(text)
 # => 76 (or 79 if the list items are not formatted as a list)
 # Overrides all settings to match the way Microsoft Word and wc (Unix) handle word count.
 # N.B. The developers of these tools may change the algorithm at any time so this should just be as an approximation.
-WordCountAnalyzer::Counter.new(text: text).mword_count
+WordCountAnalyzer::Counter.new.mword_count(text)
 # => 71
 # Highly configurable (see all options below)
 WordCountAnalyzer::Counter.new(
-  text: text,
   ellipsis: 'no_special_treatment',
   hyperlink: 'no_special_treatment',
   contraction: 'count_as_multiple',
@@ -104,7 +103,7 @@ WordCountAnalyzer::Counter.new(
   dashed_line: 'count',
   underscore: 'count',
   stray_punctuation: 'count'
-).count
+).count(text)
 # => 77
 ```

data/lib/word_count_analyzer/analyzer.rb CHANGED Viewed

@@ -1,23 +1,24 @@
 module WordCountAnalyzer
   class Analyzer
-    attr_reader :text
+    attr_reader :text, :tagger
     def initialize(text:)
       @text = text
+      @tagger = EngTagger.new
     end
     def analyze
       analysis = {}
-      analysis['ellipsis'] = WordCountAnalyzer::Ellipsis.new(string: text).occurences
+      analysis['ellipsis'] = WordCountAnalyzer::Ellipsis.new.occurences(text)
       contraction_count = 0
       hyphenated_word_count = 0
       WordCountAnalyzer::Xhtml.new(string: text).replace.split(/\s+/).each_with_index do |token, index|
-        contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr: EngTagger.new, hyphen: 'single').contraction?
+        contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr: tagger, hyphen: 'single').contraction?
         hyphenated_word_count += 1 if WordCountAnalyzer::HyphenatedWord.new(token: token).hyphenated_word?
       end
-      analysis['hyperlink'] = WordCountAnalyzer::Hyperlink.new(string: text).occurences
+      analysis['hyperlink'] = WordCountAnalyzer::Hyperlink.new.occurences(text)
       analysis['contraction'] = contraction_count
       analysis['hyphenated_word'] = hyphenated_word_count
-      analysis['date'] = WordCountAnalyzer::Date.new(string: text).occurences
+      analysis['date'] = WordCountAnalyzer::Date.new.occurences(text)
       analysis['number'] = WordCountAnalyzer::Number.new(string: text).occurences
       analysis['numbered_list'] = WordCountAnalyzer::NumberedList.new(string: text).occurences
       analysis['xhtml'] = WordCountAnalyzer::Xhtml.new(string: text).occurences

data/lib/word_count_analyzer/contraction.rb CHANGED Viewed

@@ -81,7 +81,7 @@ module WordCountAnalyzer
       "jack-o'-lantern"   => "jack-of-the-lantern",
       "will-o'-the-wisp"  => "will-of-the-wisp",
       "'twas"             => "it was"
-    }
+    }.freeze
     attr_reader :token, :following_token, :tgr, :hyphen
     def initialize(token:, following_token:, tgr:, **args)

data/lib/word_count_analyzer/counter.rb CHANGED Viewed

@@ -1,8 +1,7 @@
 module WordCountAnalyzer
   class Counter
-    attr_reader :text, :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
-    def initialize(text:, **args)
-      @text = text
+    attr_reader :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
+    def initialize(**args)
       @ellipsis = args[:ellipsis] || 'ignore'
       @hyperlink = args[:hyperlink] || 'count_as_one'
       @contraction = args[:contraction] || 'count_as_one'
@@ -18,13 +17,14 @@ module WordCountAnalyzer
       @underscore = args[:underscore] || 'ignore'
       @stray_punctuation = args[:stray_punctuation] || 'ignore'
       @equal_sign = 'ignore'
+      @tgr = EngTagger.new
     end
-    def count
-      word_count
+    def count(text)
+      word_count(text)
     end
-    def pages_count
+    def pages_count(text)
       @ellipsis = 'ignore'
       @hyperlink = 'split_at_period'
       @contraction = 'count_as_one'
@@ -40,10 +40,10 @@ module WordCountAnalyzer
       @underscore = 'ignore'
       @stray_punctuation = 'ignore'
       @equal_sign = 'break'
-      word_count
+      word_count(text)
     end
-    def mword_count
+    def mword_count(text)
       @ellipsis = 'no_special_treatment'
       @hyperlink = 'count_as_one'
       @contraction = 'count_as_one'
@@ -58,16 +58,15 @@ module WordCountAnalyzer
       @dashed_line = 'count'
       @underscore = 'count'
       @stray_punctuation = 'count'
-      word_count
+      word_count(text)
     end
     private
-    def word_count
-      tgr = EngTagger.new
+    def word_count(text)
       processed_text = process_ellipsis(text)
       processed_text = process_hyperlink(processed_text)
-      processed_text = process_contraction(processed_text, tgr)
+      processed_text = process_contraction(processed_text, @tgr)
       processed_text = process_date(processed_text)
       processed_text = process_number(processed_text)
       processed_text = process_number_list(processed_text)
@@ -85,7 +84,7 @@ module WordCountAnalyzer
     def process_ellipsis(txt)
       if ellipsis.eql?('ignore')
-        WordCountAnalyzer::Ellipsis.new(string: txt).replace.gsub(/wseword/, '')
+        WordCountAnalyzer::Ellipsis.new.replace(txt).gsub(/wseword/, '')
       elsif ellipsis.eql?('no_special_treatment')
         txt
       else
@@ -96,9 +95,9 @@ module WordCountAnalyzer
     def process_hyperlink(txt)
       case
       when hyperlink.eql?('count_as_one')
-        WordCountAnalyzer::Hyperlink.new(string: txt).replace
+        WordCountAnalyzer::Hyperlink.new.replace(txt)
       when hyperlink.eql?('split_at_period')
-        WordCountAnalyzer::Hyperlink.new(string: txt).replace_split_at_period
+        WordCountAnalyzer::Hyperlink.new.replace_split_at_period(txt)
       when hyperlink.eql?('no_special_treatment')
         txt
       else
@@ -131,7 +130,7 @@ module WordCountAnalyzer
       if date.eql?('no_special_treatment')
         txt
       elsif date.eql?('count_as_one')
-        WordCountAnalyzer::Date.new(string: txt).replace
+        WordCountAnalyzer::Date.new.replace(txt)
       else
         raise 'The value you specified for date is not a valid option. Please use either `count_as_one` or `no_special_treatment`. The default option is `no_special_treatment`'
       end

data/lib/word_count_analyzer/date.rb CHANGED Viewed

@@ -16,134 +16,107 @@ module WordCountAnalyzer
     # Rubular: http://rubular.com/r/mpVSeaKwdY
     DIGIT_ONLY_YEAR_LAST_REGEX = /\d{4}[12]\d{3}\D\.?/
-    attr_reader :string
-    def initialize(string:)
-      @string = string
+    def includes_date?(text)
+      includes_long_date?(text) || includes_number_only_date?(text)
     end
-    def includes_date?
-      long_date || number_only_date
+    def replace(text)
+      counter = 0
+      DOW_ABBR.map { |day| counter +=1 if text.include?('day') }
+      text = redact_dates(counter, text)
+      redact_regex(text)
     end
-    def replace
-      new_string = string.dup
-      counter = 0
-      DOW_ABBR.each do |day|
-        counter +=1 if string.include?('day')
-      end
+    def occurences(text)
+      replace(text).scan(/wsdateword/).size
+    end
+    def replace_number_only_date(text)
+      text.gsub(DMY_MDY_REGEX, ' wsdateword ')
+          .gsub(YMD_YDM_REGEX, ' wsdateword ')
+          .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
+          .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
+    end
+    private
+    def redact_dates(counter, text)
       if counter > 0
-        DOW_ABBR.each do |day|
-          MONTHS.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-          end
-          MONTH_ABBR.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-          end
-        end
-        DOW.each do |day|
-          MONTHS.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                                   .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                                   .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                                   .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
-                                   .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                                   .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
-          end
-          MONTH_ABBR.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                           .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                           .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                           .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
-                           .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                           .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
-          end
-        end
+        text = redact_dow_abbr(text)
+        text = redact_dow(text)
       else
-        DOW.each do |day|
-          MONTHS.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                                   .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                                   .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                                   .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
-                                   .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                                   .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
-          end
-          MONTH_ABBR.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
+        text = redact_dow(text)
+        text = redact_dow_abbr(text)
+      end
+      text
+    end
+    def redact_regex(text)
+      text.gsub(DMY_MDY_REGEX, ' wsdateword ')
+          .gsub(YMD_YDM_REGEX, ' wsdateword ')
+          .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
+          .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
+    end
+    def redact_dow(text)
+      DOW.each do |day|
+        MONTHS.map { |month| text = redact_date(text, day, month) }
+        MONTH_ABBR.map { |month| text = redact_date(text, day, month) }
+      end
+      text
+    end
+    def redact_dow_abbr(text)
+      DOW_ABBR.each do |day|
+        MONTHS.map { |month| text = text.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ') }
+        MONTH_ABBR.map { |month| text = text.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ') }
+      end
+      text
+    end
+    def redact_date(text, day, month)
+      text.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
                            .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
                            .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
                            .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
                            .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
                            .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
-          end
-        end
-        DOW_ABBR.each do |day|
-          MONTHS.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-          end
-          MONTH_ABBR.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-          end
-        end
-      end
-      new_string = new_string.gsub(DMY_MDY_REGEX, ' wsdateword ')
-                     .gsub(YMD_YDM_REGEX, ' wsdateword ')
-                     .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
-                     .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
-    end
-    def occurences
-      replace.scan(/wsdateword/).size
     end
-    def replace_number_only_date
-      string.gsub(DMY_MDY_REGEX, ' wsdateword ')
-            .gsub(YMD_YDM_REGEX, ' wsdateword ')
-            .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
-            .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
+    def includes_long_date?(text)
+      includes_long_date_1?(text) || includes_long_date_2?(text)
     end
-    private
-    def long_date
-      match_found = false
+    def includes_long_date_1?(text)
       DOW.each do |day|
-        MONTHS.each do |month|
-          break if match_found
-          match_found = check_for_matches(day, month)
-        end
-        MONTH_ABBR.each do |month|
-          break if match_found
-          match_found = check_for_matches(day, month)
-        end
+        MONTHS.map { |month| return true if check_for_matches(day, month, text) }
+        MONTH_ABBR.map { |month| return true if check_for_matches(day, month, text) }
       end
+      false
+    end
+    def includes_long_date_2?(text)
       DOW_ABBR.each do |day|
-        MONTHS.each do |month|
-          break if match_found
-          match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}/i)
-        end
-        MONTH_ABBR.each do |month|
-          break if match_found
-          match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i)
-        end
+        MONTHS.map { |month| return true if !(text !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}/i) }
+        MONTH_ABBR.map { |month| return true if !(text !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) }
       end
-      match_found
+      false
     end
-    def number_only_date
-      !(string !~ DMY_MDY_REGEX) ||
-      !(string !~ YMD_YDM_REGEX) ||
-      !(string !~ DIGIT_ONLY_YEAR_FIRST_REGEX) ||
-      !(string !~ DIGIT_ONLY_YEAR_LAST_REGEX)
+    def includes_number_only_date?(text)
+      !(text !~ DMY_MDY_REGEX) ||
+      !(text !~ YMD_YDM_REGEX) ||
+      !(text !~ DIGIT_ONLY_YEAR_FIRST_REGEX) ||
+      !(text !~ DIGIT_ONLY_YEAR_LAST_REGEX)
     end
-    def check_for_matches(day, month)
-      !(string !~ /#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
-      !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
-      !(string !~ /\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*/i) ||
-      !(string !~ /\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+/i) ||
-      !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*/i) ||
-      !(string !~ /\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})/i)
+    def check_for_matches(day, month, text)
+      !(text !~ /#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
+      !(text !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
+      !(text !~ /\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*/i) ||
+      !(text !~ /\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+/i) ||
+      !(text !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*/i) ||
+      !(text !~ /\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})/i)
     end
   end
-end
+end

data/lib/word_count_analyzer/ellipsis.rb CHANGED Viewed

@@ -13,30 +13,25 @@ module WordCountAnalyzer
     UNICODE_ELLIPSIS = /(?<=[^…]|\A)…{1}(?=[^…]|$)/
-    attr_reader :string
-    def initialize(string:)
-      @string = string
+    def includes_ellipsis?(text)
+      !(text !~ FOUR_CONSECUTIVE_REGEX) ||
+      !(text !~ THREE_SPACE_REGEX) ||
+      !(text !~ FOUR_SPACE_REGEX) ||
+      !(text !~ OTHER_THREE_PERIOD_REGEX) ||
+      !(text !~ UNICODE_ELLIPSIS)
     end
-    def includes_ellipsis?
-      !(string !~ FOUR_CONSECUTIVE_REGEX) ||
-      !(string !~ THREE_SPACE_REGEX) ||
-      !(string !~ FOUR_SPACE_REGEX) ||
-      !(string !~ OTHER_THREE_PERIOD_REGEX) ||
-      !(string !~ UNICODE_ELLIPSIS)
-    end
-    def replace
-      string.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
+    def replace(text)
+      text.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
             .gsub(THREE_SPACE_REGEX, ' wseword ')
             .gsub(FOUR_SPACE_REGEX, ' wseword ')
             .gsub(OTHER_THREE_PERIOD_REGEX, ' wseword ')
             .gsub(UNICODE_ELLIPSIS, ' wseword ')
     end
-    def occurences
+    def occurences(text)
       count = 0
-      replace.split(' ').map { |token| count += 1 if token.strip.eql?('wseword') }
+      replace(text).split(' ').map { |token| count += 1 if token.strip.eql?('wseword') }
       count
     end
   end

data/lib/word_count_analyzer/hyperlink.rb CHANGED Viewed

@@ -7,49 +7,38 @@ module WordCountAnalyzer
     # Rubular: http://rubular.com/r/fXa4lp0gfS
     HYPERLINK_REGEX = /(http|https|www)(\.|:)/
-    attr_reader :string
-    def initialize(string:)
-      @string = string
+    def hyperlink?(text)
+      !(text !~ URI.regexp) && text !~ NON_HYPERLINK_REGEX && !(text !~ HYPERLINK_REGEX)
     end
-    def hyperlink?
-      !(string !~ URI.regexp) && string !~ NON_HYPERLINK_REGEX && !(string !~ HYPERLINK_REGEX)
+    def occurences(text)
+      text.scan(URI.regexp).map { |link| link.compact.size > 1 ? 1 : 0 }.inject(0) { |sum, x| sum + x }
     end
-    def occurences
-      counter = 0
-      string.scan(URI.regexp).each do |link|
-        counter += 1 if link.compact.size > 1
-      end
-      counter
-    end
-    def replace
-      new_string = string.dup
-      string.split(/\s+/).each do |token|
+    def replace(text)
+      text.split(/\s+/).each do |token|
         if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
-          new_string = new_string.gsub(/#{Regexp.escape(token.split('">')[0])}/, ' wslinkword ')
+          text = text.gsub(/#{Regexp.escape(token.split('">')[0])}/, ' wslinkword ')
         elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
-          new_string = new_string.gsub(/#{Regexp.escape(token)}/, ' wslinkword ')
+          text = text.gsub(/#{Regexp.escape(token)}/, ' wslinkword ')
         end
       end
-      new_string
+      text
     end
-    def replace_split_at_period
-      new_string = string.dup
-      string.split(/\s+/).each do |token|
+    def replace_split_at_period(text)
+      text.split(/\s+/).each do |token|
         if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
-          new_string.gsub!(/#{Regexp.escape(token.split('">')[0])}/) do |match|
+          text.gsub!(/#{Regexp.escape(token.split('">')[0])}/) do |match|
             match.split('.').join(' ')
           end
         elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
-          new_string.gsub!(/#{Regexp.escape(token)}/) do |match|
+          text.gsub!(/#{Regexp.escape(token)}/) do |match|
             match.split('.').join(' ')
           end
         end
       end
-      new_string
+      text
     end
   end
 end