RubyGems - word_count_analyzer - Versions diffs - 0.0.1 - Mend

word_count_analyzer 0.0.1

Files changed (37) hide show

checksums.yaml +7 -0
data/.gitignore +14 -0
data/.rspec +1 -0
data/.travis.yml +5 -0
data/Gemfile +4 -0
data/LICENSE.txt +22 -0
data/README.md +554 -0
data/Rakefile +2 -0
data/lib/word_count_analyzer.rb +14 -0
data/lib/word_count_analyzer/analyzer.rb +34 -0
data/lib/word_count_analyzer/contraction.rb +176 -0
data/lib/word_count_analyzer/counter.rb +230 -0
data/lib/word_count_analyzer/date.rb +149 -0
data/lib/word_count_analyzer/ellipsis.rb +48 -0
data/lib/word_count_analyzer/hyperlink.rb +53 -0
data/lib/word_count_analyzer/hyphenated_word.rb +23 -0
data/lib/word_count_analyzer/number.rb +23 -0
data/lib/word_count_analyzer/numbered_list.rb +61 -0
data/lib/word_count_analyzer/punctuation.rb +52 -0
data/lib/word_count_analyzer/slash.rb +84 -0
data/lib/word_count_analyzer/version.rb +3 -0
data/lib/word_count_analyzer/xhtml.rb +26 -0
data/spec/spec_helper.rb +1 -0
data/spec/word_count_analyzer/analyzer_spec.rb +11 -0
data/spec/word_count_analyzer/contraction_spec.rb +124 -0
data/spec/word_count_analyzer/counter_spec.rb +647 -0
data/spec/word_count_analyzer/date_spec.rb +257 -0
data/spec/word_count_analyzer/ellipsis_spec.rb +69 -0
data/spec/word_count_analyzer/hyperlink_spec.rb +77 -0
data/spec/word_count_analyzer/hyphenated_word_spec.rb +81 -0
data/spec/word_count_analyzer/number_spec.rb +63 -0
data/spec/word_count_analyzer/numbered_list_spec.rb +69 -0
data/spec/word_count_analyzer/punctuation_spec.rb +91 -0
data/spec/word_count_analyzer/slash_spec.rb +105 -0
data/spec/word_count_analyzer/xhtml_spec.rb +65 -0
data/word_count_analyzer.gemspec +26 -0
metadata +153 -0

data/Rakefile ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ require "bundler/gem_tasks"
2	+

data/lib/word_count_analyzer.rb ADDED Viewed

@@ -0,0 +1,14 @@
+require "word_count_analyzer/version"
+require "word_count_analyzer/analyzer"
+require "word_count_analyzer/counter"
+require "word_count_analyzer/contraction"
+require "word_count_analyzer/hyperlink"
+require "word_count_analyzer/hyphenated_word"
+require "word_count_analyzer/date"
+require "word_count_analyzer/ellipsis"
+require "word_count_analyzer/numbered_list"
+require "word_count_analyzer/xhtml"
+require "word_count_analyzer/number"
+require "word_count_analyzer/slash"
+require "word_count_analyzer/punctuation"
+require "engtagger"

data/lib/word_count_analyzer/analyzer.rb ADDED Viewed

@@ -0,0 +1,34 @@
+module WordCountAnalyzer
+  class Analyzer
+    attr_reader :text, :tgr
+    def initialize(text:)
+      @text = text
+      @tgr = EngTagger.new
+    end
+    def analyze
+      analysis = {}
+      analysis['ellipsis'] = WordCountAnalyzer::Ellipsis.new(string: text).occurences
+      contraction_count = 0
+      hyphenated_word_count = 0
+      WordCountAnalyzer::Xhtml.new(string: text).replace.split(/\s+/).each_with_index do |token, index|
+        contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr: tgr, hyphen: 'single').contraction?
+        hyphenated_word_count += 1 if WordCountAnalyzer::HyphenatedWord.new(token: token).hyphenated_word?
+      end
+      analysis['hyperlink'] = WordCountAnalyzer::Hyperlink.new(string: text).occurences
+      analysis['contraction'] = contraction_count
+      analysis['hyphenated_word'] = hyphenated_word_count
+      analysis['date'] = WordCountAnalyzer::Date.new(string: text).occurences
+      analysis['number'] = WordCountAnalyzer::Number.new(string: text).occurences
+      analysis['numbered_list'] = WordCountAnalyzer::NumberedList.new(string: text).occurences
+      analysis['xhtml'] = WordCountAnalyzer::Xhtml.new(string: text).occurences
+      analysis['forward_slash'] = WordCountAnalyzer::Slash.new(string: text).forward_slash_occurences
+      analysis['backslash'] = WordCountAnalyzer::Slash.new(string: text).backslash_occurences
+      analysis['dotted_line'] = WordCountAnalyzer::Punctuation.new(string: text).dotted_line_ocurrances
+      analysis['dashed_line'] = WordCountAnalyzer::Punctuation.new(string: text).dashed_line_ocurrances
+      analysis['underscore'] = WordCountAnalyzer::Punctuation.new(string: text).underscore_ocurrances
+      analysis['stray_punctuation'] = WordCountAnalyzer::Punctuation.new(string: text).stray_punctuation_occurences
+      analysis
+    end
+  end
+end

data/lib/word_count_analyzer/contraction.rb ADDED Viewed

@@ -0,0 +1,176 @@
+module WordCountAnalyzer
+  class Contraction
+    CONTRACTIONS = {
+      "i'm"               => "I am",
+      "i'll"              => "I will",
+      "i'd"               => "I would",
+      "i've"              => "I have",
+      "i'd"               => "I had",
+      "you're"            => "you are",
+      "you'll"            => "you will",
+      "you'd"             => "you would",
+      "you've"            => "you have",
+      "you'd"             => "you had",
+      "he's"              => "he is",
+      "he'll"             => "he will",
+      "he'd"              => "he would",
+      "he's"              => "he has",
+      "he'd"              => "he had",
+      "she's"             => "she is",
+      "she'll"            => "she will",
+      "she'd"             => "she would",
+      "she's"             => "she has",
+      "she'd"             => "she had",
+      "it's"              => "it is",
+      "'tis"              => "it is",
+      "it'll"             => "it will",
+      "it'd"              => "it would",
+      "it's"              => "it has",
+      "it'd"              => "it had",
+      "we're"             => "we are",
+      "we'll"             => "we will",
+      "we'd"              => "we would",
+      "we've"             => "we have",
+      "we'd"              => "we had",
+      "they're"           => "they are",
+      "they'll"           => "they will",
+      "they'd"            => "they would",
+      "they've"           => "they have",
+      "they'd"            => "they had",
+      "that's"            => "that is",
+      "that'll"           => "that will",
+      "that'd"            => "that would",
+      "that's"            => "that has",
+      "that'd"            => "that had",
+      "who's"             => "who is",
+      "who'll"            => "who will",
+      "who'd"             => "who would",
+      "who's"             => "who has",
+      "who'd"             => "who had",
+      "what's"            => "what is",
+      "what're"           => "what are",
+      "what'll"           => "what will",
+      "what'd"            => "what would",
+      "what's"            => "what has",
+      "what'd"            => "what had",
+      "where's"           => "where is",
+      "where'll"          => "where will",
+      "where'd"           => "where would",
+      "where's"           => "where has",
+      "where'd"           => "where had",
+      "when's"            => "when is",
+      "when'll"           => "when will",
+      "when'd"            => "when would",
+      "when's"            => "when has",
+      "when'd"            => "when had",
+      "why's"             => "why is",
+      "why'll"            => "why will",
+      "why'd"             => "why would",
+      "why's"             => "why has",
+      "why'd"             => "why had",
+      "how's"             => "how is",
+      "how'll"            => "how will",
+      "how'd"             => "how would",
+      "how's"             => "how has",
+      "how'd"             => "how had",
+      "she'd've"          => "she would have",
+      "'tisn't"           => "it is not",
+      "isn't"             => "is not",
+      "aren't"            => "are not",
+      "wasn't"            => "was not",
+      "weren't"           => "were not",
+      "haven't"           => "have not",
+      "hasn't"            => "has not",
+      "hadn't"            => "had not",
+      "won't"             => "will not",
+      "wouldn't"          => "would not",
+      "don't"             => "do not",
+      "doesn't"           => "does not",
+      "didn't"            => "did not",
+      "can't"             => "cannot",
+      "couldn't"          => "could not",
+      "shouldn't"         => "should not",
+      "mightn't"          => "might not",
+      "mustn't"           => "must not",
+      "would've"          => "would have",
+      "should've"         => "should have",
+      "could've"          => "could have",
+      "might've"          => "might have",
+      "must've"           => "must have",
+      "o'"                => "of",
+      "o'clock"           => "of the clock",
+      "ma'am"             => "madam",
+      "ne'er-do-well"     => "never-do-well",
+      "cat-o'-nine-tails" => "cat-of-nine-tails",
+      "jack-o'-lantern"   => "jack-of-the-lantern",
+      "will-o'-the-wisp"  => "will-of-the-wisp",
+      "'twas"             => "it was"
+    }
+    attr_reader :token, :following_token, :tgr, :hyphen
+    def initialize(token:, following_token:, tgr:, **args)
+      @token = token
+      @following_token = following_token
+      @tgr = tgr
+      @hyphen = args[:hyphen] || 'count_as_one'
+    end
+    def contraction?
+      common_contraction? ||
+      (apostrophe_s_token? &&
+        following_is_not_a_noun?)
+    end
+    def expanded_count
+      if self.contraction?
+        if common_contraction?
+          calculate_contraction_length
+        else
+          2
+        end
+      else
+        1
+      end
+    end
+    def replace
+      if CONTRACTIONS.has_key?(token.downcase)
+        CONTRACTIONS[token.downcase]
+      elsif apostrophe_s_token? && following_is_not_a_noun?
+        ' word word '
+      else
+        token
+      end
+    end
+    private
+    def calculate_contraction_length
+      if hyphen.eql?('count_as_one') && hyphen
+        contraction_length
+      else
+        contraction_length_hyphen
+      end
+    end
+    def contraction_length
+      CONTRACTIONS[token.downcase].split(' ').length
+    end
+    def contraction_length_hyphen
+      CONTRACTIONS[token.downcase].split(' ').map { |token| token.split('-') }.flatten.length
+    end
+    def common_contraction?
+      CONTRACTIONS.has_key?(token.downcase)
+    end
+    def following_is_not_a_noun?
+      !tgr.add_tags(following_token)[1].downcase.eql?('n')
+    end
+    def apostrophe_s_token?
+      token.include?("'s")
+    end
+  end
+end

data/lib/word_count_analyzer/counter.rb ADDED Viewed

@@ -0,0 +1,230 @@
+module WordCountAnalyzer
+  class Counter
+    attr_reader :text, :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :tgr
+    def initialize(text:, **args)
+      @text = text
+      @ellipsis = args[:ellipsis] || 'ignore'
+      @hyperlink = args[:hyperlink] || 'count_as_one'
+      @contraction = args[:contraction] || 'count_as_one'
+      @hyphenated_word = args[:hyphenated_word] || 'count_as_one'
+      @date = args[:date] || 'no_special_treatment'
+      @number = args[:number] || 'count'
+      @numbered_list = args[:numbered_list] || 'count'
+      @xhtml = args[:xhtml] || 'remove'
+      @forward_slash = args[:forward_slash] || 'count_as_multiple_except_dates'
+      @backslash = args[:backslash] || 'count_as_one'
+      @dotted_line = args[:dotted_line] || 'ignore'
+      @dashed_line = args[:dashed_line] || 'ignore'
+      @underscore = args[:underscore] || 'ignore'
+      @stray_punctuation = args[:stray_punctuation] || 'ignore'
+      @tgr = EngTagger.new
+    end
+    def count
+      word_count
+    end
+    def pages_count
+      @ellipsis = 'no_special_treatment'
+      @hyperlink = 'split_at_period'
+      @contraction = 'count_as_one'
+      @hyphenated_word = 'count_as_multiple'
+      @date = 'no_special_treatment'
+      @number = 'count'
+      @numbered_list = 'count'
+      @xhtml = 'keep'
+      @forward_slash = 'count_as_multiple'
+      @backslash = 'count_as_multiple'
+      @dotted_line = 'ignore'
+      @dashed_line = 'ignore'
+      @underscore = 'ignore'
+      @stray_punctuation = 'ignore'
+      word_count
+    end
+    def mword_count
+      @ellipsis = 'no_special_treatment'
+      @hyperlink = 'count_as_one'
+      @contraction = 'count_as_one'
+      @hyphenated_word = 'count_as_one'
+      @date = 'no_special_treatment'
+      @number = 'count'
+      @numbered_list = 'count'
+      @xhtml = 'keep'
+      @forward_slash = 'count_as_one'
+      @backslash = 'count_as_one'
+      @dotted_line = 'count'
+      @dashed_line = 'count'
+      @underscore = 'count'
+      @stray_punctuation = 'count'
+      word_count
+    end
+    private
+    def word_count
+      processed_text = process_ellipsis(text)
+      processed_text = process_hyperlink(processed_text)
+      processed_text = process_contraction(processed_text)
+      processed_text = process_date(processed_text)
+      processed_text = process_number(processed_text)
+      processed_text = process_number_list(processed_text)
+      processed_text = process_xhtml(processed_text)
+      processed_text = process_forward_slash(processed_text)
+      processed_text = process_backslash(processed_text)
+      processed_text = process_hyphenated_word(processed_text)
+      processed_text = process_dotted_line(processed_text)
+      processed_text = process_dashed_line(processed_text)
+      processed_text = process_underscore(processed_text)
+      processed_text = process_stray_punctuation(processed_text)
+      processed_text.split(/\s+/).reject(&:empty?).size
+    end
+    def process_ellipsis(txt)
+      if ellipsis.eql?('ignore')
+        WordCountAnalyzer::Ellipsis.new(string: txt).replace.gsub(/wseword/, '')
+      elsif ellipsis.eql?('no_special_treatment')
+        txt
+      else
+        raise 'The value you specified for ellipsis is not a valid option. Please use either `ignore` or `no_special_treatment`. The default option is `ignore`'
+      end
+    end
+    def process_hyperlink(txt)
+      case
+      when hyperlink.eql?('count_as_one')
+        WordCountAnalyzer::Hyperlink.new(string: txt).replace
+      when hyperlink.eql?('split_at_period')
+        WordCountAnalyzer::Hyperlink.new(string: txt).replace_split_at_period
+      when hyperlink.eql?('no_special_treatment')
+        txt
+      else
+        raise 'The value you specified for hyperlink is not a valid option. Please use either `count_as_one`, `split_at_period`, or `no_special_treatment`. The default option is `count_as_one`'
+      end
+    end
+    def process_contraction(txt)
+      if contraction.eql?('count_as_one')
+        txt
+      elsif contraction.eql?('count_as_multiple')
+        array = txt.split(/\s+/)
+        array.each_with_index.map { |token, i| WordCountAnalyzer::Contraction.new(token: token, following_token: array[i +1], tgr: tgr).replace }.join(' ')
+      else
+        raise 'The value you specified for contraction is not a valid option. Please use either `count_as_one` or `count_as_multiple`. The default option is `count_as_one`'
+      end
+    end
+    def process_hyphenated_word(txt)
+      if hyphenated_word.eql?('count_as_one')
+        txt
+      elsif hyphenated_word.eql?('count_as_multiple')
+        txt.split(/\s+/).each_with_index.map { |token, i| WordCountAnalyzer::HyphenatedWord.new(token: token).replace }.join(' ')
+      else
+        raise 'The value you specified for hyphenated_word is not a valid option. Please use either `count_as_one` or `count_as_multiple`. The default option is `count_as_one`'
+      end
+    end
+    def process_date(txt)
+      if date.eql?('no_special_treatment')
+        txt
+      elsif date.eql?('count_as_one')
+        WordCountAnalyzer::Date.new(string: txt).replace
+      else
+        raise 'The value you specified for date is not a valid option. Please use either `count_as_one` or `no_special_treatment`. The default option is `no_special_treatment`'
+      end
+    end
+    def process_number(txt)
+      if number.eql?('ignore')
+        WordCountAnalyzer::Number.new(string: txt).replace.gsub(/wsnumword/, '')
+      elsif number.eql?('count')
+        txt
+      else
+        raise 'The value you specified for number is not a valid option. Please use either `ignore` or `count`. The default option is `count`'
+      end
+    end
+    def process_number_list(txt)
+      if numbered_list.eql?('ignore')
+        WordCountAnalyzer::NumberedList.new(string: txt).replace
+      elsif numbered_list.eql?('count')
+        txt
+      else
+        raise 'The value you specified for numbered_list is not a valid option. Please use either `ignore` or `count`. The default option is `count`'
+      end
+    end
+    def process_xhtml(txt)
+      if xhtml.eql?('remove')
+        WordCountAnalyzer::Xhtml.new(string: txt).replace
+      elsif xhtml.eql?('keep')
+        txt
+      else
+        raise 'The value you specified for xhtml is not a valid option. Please use either `remove` or `keep`. The default option is `remove`'
+      end
+    end
+    def process_forward_slash(txt)
+      case
+      when forward_slash.eql?('count_as_multiple')
+        WordCountAnalyzer::Slash.new(string: txt, date: date, xhtml: xhtml, hyperlink: hyperlink).replace_forward_slashes
+      when forward_slash.eql?('count_as_multiple_except_dates')
+        WordCountAnalyzer::Slash.new(string: txt, date: date, xhtml: xhtml, hyperlink: hyperlink).replace_forward_slashes_except_dates
+      when forward_slash.eql?('count_as_one')
+        txt
+      else
+        raise 'The value you specified for forward_slash is not a valid option. Please use either `count_as_one`, `count_as_multiple` or `count_as_multiple_except_dates`. The default option is `count_as_multiple_except_dates`'
+      end
+    end
+    def process_backslash(txt)
+      if backslash.eql?('count_as_multiple')
+        WordCountAnalyzer::Slash.new(string: txt, date: date, xhtml: xhtml, hyperlink: hyperlink).replace_backslashes
+      elsif backslash.eql?('count_as_one')
+        txt
+      else
+        raise 'The value you specified for backslash is not a valid option. Please use either `count_as_one` or `count_as_multiple`. The default option is `count_as_one`'
+      end
+    end
+    def process_dotted_line(txt)
+      if dotted_line.eql?('ignore')
+        WordCountAnalyzer::Punctuation.new(string: txt).replace_dotted_line
+      elsif dotted_line.eql?('count')
+        txt
+      else
+        raise 'The value you specified for dotted_line is not a valid option. Please use either `ignore` or `count`. The default option is `ignore`'
+      end
+    end
+    def process_dashed_line(txt)
+      if dashed_line.eql?('ignore')
+        WordCountAnalyzer::Punctuation.new(string: txt).replace_dashed_line
+      elsif dashed_line.eql?('count')
+        txt
+      else
+        raise 'The value you specified for dashed_line is not a valid option. Please use either `ignore` or `count`. The default option is `ignore`'
+      end
+    end
+    def process_underscore(txt)
+      if underscore.eql?('ignore')
+        WordCountAnalyzer::Punctuation.new(string: txt).replace_underscore
+      elsif underscore.eql?('count')
+        txt
+      else
+        raise 'The value you specified for underscore is not a valid option. Please use either `ignore` or `count`. The default option is `ignore`'
+      end
+    end
+    def process_stray_punctuation(txt)
+      if stray_punctuation.eql?('ignore')
+        WordCountAnalyzer::Punctuation.new(string: txt).replace_stray_punctuation
+      elsif stray_punctuation.eql?('count')
+        txt
+      else
+        raise 'The value you specified for stray_punctuation is not a valid option. Please use either `ignore` or `count`. The default option is `ignore`'
+      end
+    end
+  end
+end