RubyGems - word_count_analyzer - Versions diffs - 0.0.14 → 1.0.0 - Mend

word_count_analyzer 0.0.14 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/README.md +5 -6
data/lib/word_count_analyzer/analyzer.rb +6 -5
data/lib/word_count_analyzer/contraction.rb +1 -1
data/lib/word_count_analyzer/counter.rb +15 -16
data/lib/word_count_analyzer/date.rb +79 -106
data/lib/word_count_analyzer/ellipsis.rb +10 -15
data/lib/word_count_analyzer/hyperlink.rb +14 -25
data/lib/word_count_analyzer/hyphenated_word.rb +1 -1
data/lib/word_count_analyzer/number.rb +1 -1
data/lib/word_count_analyzer/slash.rb +8 -7
data/lib/word_count_analyzer/version.rb +1 -1
data/spec/word_count_analyzer/counter_spec.rb +123 -160
data/spec/word_count_analyzer/date_spec.rb +85 -85
data/spec/word_count_analyzer/ellipsis_spec.rb +33 -33
data/spec/word_count_analyzer/hyperlink_spec.rb +23 -23
data/spec/word_count_analyzer/performance_spec.rb +46 -0
data/word_count_analyzer.gemspec +1 -0
metadata +18 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 625971163e5252e84551ad9f2cbdf0a33767a077
-  data.tar.gz: 898713d69d40a65120d0856a9ac4c3f48eaed58f
+  metadata.gz: e5a101dde1b0e3db7728e7c17716ee5e4a3201e7
+  data.tar.gz: e16de9a391248d423b88d24c5e2a835a480f8623
 SHA512:
-  metadata.gz: 9781cebd86bde81d142db0260dddad85a1b091624a2387866ceacc631c36c602af7af983a7d11d15ca4ee4dba07ba3975ad9026423534c5121736a7ded508371
-  data.tar.gz: e41d5d0f159b2110e922701d9beff39095130582f59873b795d82f86150e0aac79c1e99979ad51e90b22c648eff0cdfda1a8d43ae09ee140e6e24e10c859c28f
+  metadata.gz: cda1823794f39e1b086e93ae62e10cb8db43f6aab8659988a5f391eb4b9af64c898f8224b7e7a4a0689838885c35078f9be4b639eaf237d0175dcd6183783f19
+  data.tar.gz: a0900c47c0a29afd32169bb35fddd1dbb96685c420def00d0b26f8440a0a8b71ddc9e5badfb8683096d7e6ddf475c9f66f3c023959a926a4e1696593c0398f73

data/README.md CHANGED Viewed

@@ -49,7 +49,7 @@ Other gray areas not covered by this gem:
 ```ruby
 text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 4. <html> Some HTML and a hyphenated-word</html>. Don't count stray punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
-WordCountAnalyzer::Analyzer.new(text: text).analyze
+WordCountAnalyzer::Analyzer.new.analyze(text)
 # =>   {
 #        "ellipsis": 1,
@@ -74,22 +74,21 @@ WordCountAnalyzer::Analyzer.new(text: text).analyze
 ```ruby
 text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list \n\n1. item a \n\n2. item b \n\n3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
-WordCountAnalyzer::Counter.new(text: text).count
+WordCountAnalyzer::Counter.new.count(text)
 # => 64
 # Overrides all settings to match the way Pages handles word count.
 # N.B. The developers of Pages may change the algorithm at any time so this should just be as an approximation.
-WordCountAnalyzer::Counter.new(text: text).pages_count
+WordCountAnalyzer::Counter.new.pages_count(text)
 # => 76 (or 79 if the list items are not formatted as a list)
 # Overrides all settings to match the way Microsoft Word and wc (Unix) handle word count.
 # N.B. The developers of these tools may change the algorithm at any time so this should just be as an approximation.
-WordCountAnalyzer::Counter.new(text: text).mword_count
+WordCountAnalyzer::Counter.new.mword_count(text)
 # => 71
 # Highly configurable (see all options below)
 WordCountAnalyzer::Counter.new(
-  text: text,
   ellipsis: 'no_special_treatment',
   hyperlink: 'no_special_treatment',
   contraction: 'count_as_multiple',
@@ -104,7 +103,7 @@ WordCountAnalyzer::Counter.new(
   dashed_line: 'count',
   underscore: 'count',
   stray_punctuation: 'count'
-).count
+).count(text)
 # => 77
 ```

data/lib/word_count_analyzer/analyzer.rb CHANGED Viewed

@@ -1,23 +1,24 @@
 module WordCountAnalyzer
   class Analyzer
-    attr_reader :text
+    attr_reader :text, :tagger
     def initialize(text:)
       @text = text
+      @tagger = EngTagger.new
     end
     def analyze
       analysis = {}
-      analysis['ellipsis'] = WordCountAnalyzer::Ellipsis.new(string: text).occurences
+      analysis['ellipsis'] = WordCountAnalyzer::Ellipsis.new.occurences(text)
       contraction_count = 0
       hyphenated_word_count = 0
       WordCountAnalyzer::Xhtml.new(string: text).replace.split(/\s+/).each_with_index do |token, index|
-        contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr: EngTagger.new, hyphen: 'single').contraction?
+        contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr: tagger, hyphen: 'single').contraction?
         hyphenated_word_count += 1 if WordCountAnalyzer::HyphenatedWord.new(token: token).hyphenated_word?
       end
-      analysis['hyperlink'] = WordCountAnalyzer::Hyperlink.new(string: text).occurences
+      analysis['hyperlink'] = WordCountAnalyzer::Hyperlink.new.occurences(text)
       analysis['contraction'] = contraction_count
       analysis['hyphenated_word'] = hyphenated_word_count
-      analysis['date'] = WordCountAnalyzer::Date.new(string: text).occurences
+      analysis['date'] = WordCountAnalyzer::Date.new.occurences(text)
       analysis['number'] = WordCountAnalyzer::Number.new(string: text).occurences
       analysis['numbered_list'] = WordCountAnalyzer::NumberedList.new(string: text).occurences
       analysis['xhtml'] = WordCountAnalyzer::Xhtml.new(string: text).occurences

data/lib/word_count_analyzer/contraction.rb CHANGED Viewed

@@ -81,7 +81,7 @@ module WordCountAnalyzer
       "jack-o'-lantern"   => "jack-of-the-lantern",
       "will-o'-the-wisp"  => "will-of-the-wisp",
       "'twas"             => "it was"
-    }
+    }.freeze
     attr_reader :token, :following_token, :tgr, :hyphen
     def initialize(token:, following_token:, tgr:, **args)

data/lib/word_count_analyzer/counter.rb CHANGED Viewed

@@ -1,8 +1,7 @@
 module WordCountAnalyzer
   class Counter
-    attr_reader :text, :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
-    def initialize(text:, **args)
-      @text = text
+    attr_reader :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
+    def initialize(**args)
       @ellipsis = args[:ellipsis] || 'ignore'
       @hyperlink = args[:hyperlink] || 'count_as_one'
       @contraction = args[:contraction] || 'count_as_one'
@@ -18,13 +17,14 @@ module WordCountAnalyzer
       @underscore = args[:underscore] || 'ignore'
       @stray_punctuation = args[:stray_punctuation] || 'ignore'
       @equal_sign = 'ignore'
+      @tgr = EngTagger.new
     end
-    def count
-      word_count
+    def count(text)
+      word_count(text)
     end
-    def pages_count
+    def pages_count(text)
       @ellipsis = 'ignore'
       @hyperlink = 'split_at_period'
       @contraction = 'count_as_one'
@@ -40,10 +40,10 @@ module WordCountAnalyzer
       @underscore = 'ignore'
       @stray_punctuation = 'ignore'
       @equal_sign = 'break'
-      word_count
+      word_count(text)
     end
-    def mword_count
+    def mword_count(text)
       @ellipsis = 'no_special_treatment'
       @hyperlink = 'count_as_one'
       @contraction = 'count_as_one'
@@ -58,16 +58,15 @@ module WordCountAnalyzer
       @dashed_line = 'count'
       @underscore = 'count'
       @stray_punctuation = 'count'
-      word_count
+      word_count(text)
     end
     private
-    def word_count
-      tgr = EngTagger.new
+    def word_count(text)
       processed_text = process_ellipsis(text)
       processed_text = process_hyperlink(processed_text)
-      processed_text = process_contraction(processed_text, tgr)
+      processed_text = process_contraction(processed_text, @tgr)
       processed_text = process_date(processed_text)
       processed_text = process_number(processed_text)
       processed_text = process_number_list(processed_text)
@@ -85,7 +84,7 @@ module WordCountAnalyzer
     def process_ellipsis(txt)
       if ellipsis.eql?('ignore')
-        WordCountAnalyzer::Ellipsis.new(string: txt).replace.gsub(/wseword/, '')
+        WordCountAnalyzer::Ellipsis.new.replace(txt).gsub(/wseword/, '')
       elsif ellipsis.eql?('no_special_treatment')
         txt
       else
@@ -96,9 +95,9 @@ module WordCountAnalyzer
     def process_hyperlink(txt)
       case
       when hyperlink.eql?('count_as_one')
-        WordCountAnalyzer::Hyperlink.new(string: txt).replace
+        WordCountAnalyzer::Hyperlink.new.replace(txt)
       when hyperlink.eql?('split_at_period')
-        WordCountAnalyzer::Hyperlink.new(string: txt).replace_split_at_period
+        WordCountAnalyzer::Hyperlink.new.replace_split_at_period(txt)
       when hyperlink.eql?('no_special_treatment')
         txt
       else
@@ -131,7 +130,7 @@ module WordCountAnalyzer
       if date.eql?('no_special_treatment')
         txt
       elsif date.eql?('count_as_one')
-        WordCountAnalyzer::Date.new(string: txt).replace
+        WordCountAnalyzer::Date.new.replace(txt)
       else
         raise 'The value you specified for date is not a valid option. Please use either `count_as_one` or `no_special_treatment`. The default option is `no_special_treatment`'
       end

data/lib/word_count_analyzer/date.rb CHANGED Viewed

@@ -16,134 +16,107 @@ module WordCountAnalyzer
     # Rubular: http://rubular.com/r/mpVSeaKwdY
     DIGIT_ONLY_YEAR_LAST_REGEX = /\d{4}[12]\d{3}\D\.?/
-    attr_reader :string
-    def initialize(string:)
-      @string = string
+    def includes_date?(text)
+      includes_long_date?(text) || includes_number_only_date?(text)
     end
-    def includes_date?
-      long_date || number_only_date
+    def replace(text)
+      counter = 0
+      DOW_ABBR.map { |day| counter +=1 if text.include?('day') }
+      text = redact_dates(counter, text)
+      redact_regex(text)
     end
-    def replace
-      new_string = string.dup
-      counter = 0
-      DOW_ABBR.each do |day|
-        counter +=1 if string.include?('day')
-      end
+    def occurences(text)
+      replace(text).scan(/wsdateword/).size
+    end
+    def replace_number_only_date(text)
+      text.gsub(DMY_MDY_REGEX, ' wsdateword ')
+          .gsub(YMD_YDM_REGEX, ' wsdateword ')
+          .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
+          .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
+    end
+    private
+    def redact_dates(counter, text)
       if counter > 0
-        DOW_ABBR.each do |day|
-          MONTHS.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-          end
-          MONTH_ABBR.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-          end
-        end
-        DOW.each do |day|
-          MONTHS.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                                   .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                                   .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                                   .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
-                                   .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                                   .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
-          end
-          MONTH_ABBR.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                           .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                           .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                           .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
-                           .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                           .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
-          end
-        end
+        text = redact_dow_abbr(text)
+        text = redact_dow(text)
       else
-        DOW.each do |day|
-          MONTHS.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                                   .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-                                   .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                                   .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
-                                   .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
-                                   .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
-          end
-          MONTH_ABBR.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
+        text = redact_dow(text)
+        text = redact_dow_abbr(text)
+      end
+      text
+    end
+    def redact_regex(text)
+      text.gsub(DMY_MDY_REGEX, ' wsdateword ')
+          .gsub(YMD_YDM_REGEX, ' wsdateword ')
+          .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
+          .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
+    end
+    def redact_dow(text)
+      DOW.each do |day|
+        MONTHS.map { |month| text = redact_date(text, day, month) }
+        MONTH_ABBR.map { |month| text = redact_date(text, day, month) }
+      end
+      text
+    end
+    def redact_dow_abbr(text)
+      DOW_ABBR.each do |day|
+        MONTHS.map { |month| text = text.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ') }
+        MONTH_ABBR.map { |month| text = text.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ') }
+      end
+      text
+    end
+    def redact_date(text, day, month)
+      text.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
                            .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
                            .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
                            .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
                            .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*\.?/i, ' wsdateword ')
                            .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
-          end
-        end
-        DOW_ABBR.each do |day|
-          MONTHS.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-          end
-          MONTH_ABBR.each do |month|
-            new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}\.?/i, ' wsdateword ')
-          end
-        end
-      end
-      new_string = new_string.gsub(DMY_MDY_REGEX, ' wsdateword ')
-                     .gsub(YMD_YDM_REGEX, ' wsdateword ')
-                     .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
-                     .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
-    end
-    def occurences
-      replace.scan(/wsdateword/).size
     end
-    def replace_number_only_date
-      string.gsub(DMY_MDY_REGEX, ' wsdateword ')
-            .gsub(YMD_YDM_REGEX, ' wsdateword ')
-            .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
-            .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
+    def includes_long_date?(text)
+      includes_long_date_1?(text) || includes_long_date_2?(text)
     end
-    private
-    def long_date
-      match_found = false
+    def includes_long_date_1?(text)
       DOW.each do |day|
-        MONTHS.each do |month|
-          break if match_found
-          match_found = check_for_matches(day, month)
-        end
-        MONTH_ABBR.each do |month|
-          break if match_found
-          match_found = check_for_matches(day, month)
-        end
+        MONTHS.map { |month| return true if check_for_matches(day, month, text) }
+        MONTH_ABBR.map { |month| return true if check_for_matches(day, month, text) }
       end
+      false
+    end
+    def includes_long_date_2?(text)
       DOW_ABBR.each do |day|
-        MONTHS.each do |month|
-          break if match_found
-          match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}/i)
-        end
-        MONTH_ABBR.each do |month|
-          break if match_found
-          match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i)
-        end
+        MONTHS.map { |month| return true if !(text !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*(,)*\s\d{4}/i) }
+        MONTH_ABBR.map { |month| return true if !(text !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) }
       end
-      match_found
+      false
     end
-    def number_only_date
-      !(string !~ DMY_MDY_REGEX) ||
-      !(string !~ YMD_YDM_REGEX) ||
-      !(string !~ DIGIT_ONLY_YEAR_FIRST_REGEX) ||
-      !(string !~ DIGIT_ONLY_YEAR_LAST_REGEX)
+    def includes_number_only_date?(text)
+      !(text !~ DMY_MDY_REGEX) ||
+      !(text !~ YMD_YDM_REGEX) ||
+      !(text !~ DIGIT_ONLY_YEAR_FIRST_REGEX) ||
+      !(text !~ DIGIT_ONLY_YEAR_LAST_REGEX)
     end
-    def check_for_matches(day, month)
-      !(string !~ /#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
-      !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
-      !(string !~ /\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*/i) ||
-      !(string !~ /\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+/i) ||
-      !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*/i) ||
-      !(string !~ /\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})/i)
+    def check_for_matches(day, month, text)
+      !(text !~ /#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
+      !(text !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*(,)*\s\d{4}/i) ||
+      !(text !~ /\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th|st)*/i) ||
+      !(text !~ /\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+/i) ||
+      !(text !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th|st)*/i) ||
+      !(text !~ /\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})/i)
     end
   end
-end
+end

data/lib/word_count_analyzer/ellipsis.rb CHANGED Viewed

@@ -13,30 +13,25 @@ module WordCountAnalyzer
     UNICODE_ELLIPSIS = /(?<=[^…]|\A)…{1}(?=[^…]|$)/
-    attr_reader :string
-    def initialize(string:)
-      @string = string
+    def includes_ellipsis?(text)
+      !(text !~ FOUR_CONSECUTIVE_REGEX) ||
+      !(text !~ THREE_SPACE_REGEX) ||
+      !(text !~ FOUR_SPACE_REGEX) ||
+      !(text !~ OTHER_THREE_PERIOD_REGEX) ||
+      !(text !~ UNICODE_ELLIPSIS)
     end
-    def includes_ellipsis?
-      !(string !~ FOUR_CONSECUTIVE_REGEX) ||
-      !(string !~ THREE_SPACE_REGEX) ||
-      !(string !~ FOUR_SPACE_REGEX) ||
-      !(string !~ OTHER_THREE_PERIOD_REGEX) ||
-      !(string !~ UNICODE_ELLIPSIS)
-    end
-    def replace
-      string.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
+    def replace(text)
+      text.gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
             .gsub(THREE_SPACE_REGEX, ' wseword ')
             .gsub(FOUR_SPACE_REGEX, ' wseword ')
             .gsub(OTHER_THREE_PERIOD_REGEX, ' wseword ')
             .gsub(UNICODE_ELLIPSIS, ' wseword ')
     end
-    def occurences
+    def occurences(text)
       count = 0
-      replace.split(' ').map { |token| count += 1 if token.strip.eql?('wseword') }
+      replace(text).split(' ').map { |token| count += 1 if token.strip.eql?('wseword') }
       count
     end
   end

data/lib/word_count_analyzer/hyperlink.rb CHANGED Viewed

@@ -7,49 +7,38 @@ module WordCountAnalyzer
     # Rubular: http://rubular.com/r/fXa4lp0gfS
     HYPERLINK_REGEX = /(http|https|www)(\.|:)/
-    attr_reader :string
-    def initialize(string:)
-      @string = string
+    def hyperlink?(text)
+      !(text !~ URI.regexp) && text !~ NON_HYPERLINK_REGEX && !(text !~ HYPERLINK_REGEX)
     end
-    def hyperlink?
-      !(string !~ URI.regexp) && string !~ NON_HYPERLINK_REGEX && !(string !~ HYPERLINK_REGEX)
+    def occurences(text)
+      text.scan(URI.regexp).map { |link| link.compact.size > 1 ? 1 : 0 }.inject(0) { |sum, x| sum + x }
     end
-    def occurences
-      counter = 0
-      string.scan(URI.regexp).each do |link|
-        counter += 1 if link.compact.size > 1
-      end
-      counter
-    end
-    def replace
-      new_string = string.dup
-      string.split(/\s+/).each do |token|
+    def replace(text)
+      text.split(/\s+/).each do |token|
         if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
-          new_string = new_string.gsub(/#{Regexp.escape(token.split('">')[0])}/, ' wslinkword ')
+          text = text.gsub(/#{Regexp.escape(token.split('">')[0])}/, ' wslinkword ')
         elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
-          new_string = new_string.gsub(/#{Regexp.escape(token)}/, ' wslinkword ')
+          text = text.gsub(/#{Regexp.escape(token)}/, ' wslinkword ')
         end
       end
-      new_string
+      text
     end
-    def replace_split_at_period
-      new_string = string.dup
-      string.split(/\s+/).each do |token|
+    def replace_split_at_period(text)
+      text.split(/\s+/).each do |token|
         if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
-          new_string.gsub!(/#{Regexp.escape(token.split('">')[0])}/) do |match|
+          text.gsub!(/#{Regexp.escape(token.split('">')[0])}/) do |match|
             match.split('.').join(' ')
           end
         elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
-          new_string.gsub!(/#{Regexp.escape(token)}/) do |match|
+          text.gsub!(/#{Regexp.escape(token)}/) do |match|
             match.split('.').join(' ')
           end
         end
       end
-      new_string
+      text
     end
   end
 end