RubyGems - textstat - Versions diffs - 0.1.1 → 0.1.7 - Mend

textstat 0.1.1 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/lib/counter.rb +37 -37
data/lib/dictionaries/ca.txt +2487 -2487
data/lib/dictionaries/cs.txt +2499 -2499
data/lib/dictionaries/en_us.txt +2944 -2944
data/lib/dictionaries/nl.txt +2549 -2549
data/lib/textstat.rb +309 -293
data/lib/textstat/version.rb +3 -3
data/spec/textstat_spec.rb +191 -161
metadata +5 -6

data/lib/textstat.rb CHANGED Viewed

@@ -1,293 +1,309 @@
-require 'text-hyphen'
-class TextStat
-  def self.char_count(text, ignore_spaces = true)
-    text = text.delete(' ') if ignore_spaces
-    text.length
-  end
-  def self.lexicon_count(text, remove_punctuation = true)
-    text  = text.gsub(/[^a-zA-Z\s]/, '').squeeze(' ') if remove_punctuation
-    count = text.split(' ').count
-    count
-  end
-  def self.syllable_count(text, language = 'en_us')
-    return 0 if text.empty?
-    text = text.downcase
-    text.gsub(/[^a-zA-Z\s]/, '').squeeze(' ')
-    dictionary = Text::Hyphen.new(language: language, left: 0, right: 0)
-    count = 0
-    text.split(' ').each do |word|
-      word_hyphenated = dictionary.visualise(word)
-      count += [1, word_hyphenated.count('-') + 1].max
-    end
-    count
-  end
-  def self.sentence_count(text)
-    text.scan(/[\.\?!][\'\\)\]]*[ |\n][A-Z]/).map(&:strip).count + 1
-  end
-  def self.avg_sentence_length(text)
-    asl = lexicon_count(text).to_f / sentence_count(text).to_f
-    asl.round(1)
-  rescue ZeroDivisionError
-    0.0
-  end
-  def self.avg_syllables_per_word(text)
-    syllable = syllable_count(text)
-    words    = lexicon_count(text)
-    begin
-      syllables_per_word = syllable.to_f / words.to_f
-      return syllables_per_word.round(1)
-    rescue ZeroDivisionError
-      return 0.0
-    end
-  end
-  def self.avg_letter_per_word(text)
-    letters_per_word = char_count(text).to_f / lexicon_count(text).to_f
-    letters_per_word.round(2)
-  rescue ZeroDivisionError
-    0.0
-  end
-  def self.avg_sentence_per_word(text)
-    sentence_per_word = sentence_count(text).to_f / lexicon_count(text).to_f
-    sentence_per_word.round(2)
-  rescue ZeroDivisionError
-    0.0
-  end
-  def self.flesch_reading_ease(text)
-    sentence_length    = avg_sentence_length(text)
-    syllables_per_word = avg_syllables_per_word(text)
-    flesch = (
-    206.835 - (1.015 * sentence_length).to_f - (84.6 * syllables_per_word).to_f
-    )
-    flesch.round(2)
-  end
-  def self.flesch_kincaid_grade(text)
-    sentence_length = avg_sentence_length(text)
-    syllables_per_word = avg_syllables_per_word(text)
-    flesch = (0.39 * sentence_length.to_f) + (11.8 * syllables_per_word.to_f) - 15.59
-    flesch.round(1)
-  end
-  def self.polysyllab_count(text)
-    count = 0
-    text.split(' ').each do |word|
-      w = syllable_count(word)
-      count += 1 if w >= 3
-    end
-    count
-  end
-  def self.smog_index(text)
-    sentences = sentence_count(text)
-    if sentences >= 3
-      begin
-        polysyllab = polysyllab_count(text)
-        smog = (
-        (1.043 * (30 * (polysyllab / sentences))**0.5) + 3.1291)
-        return smog.round(1)
-      rescue ZeroDivisionError
-        return 0.0
-      end
-    else
-      return 0.0
-    end
-  end
-  def self.coleman_liau_index(text)
-    letters   = (avg_letter_per_word(text) * 100).round(2)
-    sentences = (avg_sentence_per_word(text) * 100).round(2)
-    coleman   = ((0.058 * letters) - (0.296 * sentences) - 15.8).to_f
-    coleman.round(2)
-  end
-  def self.automated_readability_index(text)
-    chars     = char_count(text)
-    words     = lexicon_count(text)
-    sentences = sentence_count(text)
-    begin
-      a = chars.to_f / words.to_f
-      b = words.to_f / sentences.to_f
-      readability = (
-      (4.71 * a.round(2) + (0.5 * b.round(2))) - 21.43)
-      return readability.round(1)
-    rescue ZeroDivisionError
-      return 0.0
-    end
-  end
-  def self.linsear_write_formula(text)
-    easy_word = 0
-    difficult_word = 0
-    text_list = text.split(' ')[0..100]
-    text_list.each do |word|
-      if syllable_count(word) < 3
-        easy_word += 1
-      else
-        difficult_word += 1
-      end
-    end
-    text = text_list.join(' ')
-    number = ((easy_word * 1 + difficult_word * 3) / sentence_count(text)).to_f
-    if number <= 20
-      number -= 2
-    end
-    return number / 2
-  end
-  def self.difficult_words(text, language = 'en_us')
-    require 'set'
-    easy_words = Set.new
-    File.read("lib/dictionaries/#{language}.txt").each_line do |line|
-      easy_words << line.chop
-    end
-    text_list = text.downcase.gsub(/[^0-9a-z ]/i, '').split(' ')
-    diff_words_set = Set.new
-    text_list.each do |value|
-      unless easy_words.include? value
-        if syllable_count(value) > 1
-          diff_words_set.add(value)
-        end
-      end
-    end
-    return diff_words_set.length
-  end
-  def self.dale_chall_readability_score(text)
-    word_count = lexicon_count(text)
-    count = word_count - difficult_words(text)
-    begin
-      per = count.to_f / word_count.to_f * 100
-    rescue ZeroDivisionError
-      return 0.0
-    end
-    difficult_words = 100 - per
-    score = (
-    (0.1579 * difficult_words)
-    + (0.0496 * avg_sentence_length(text)))
-    if difficult_words > 5
-      score += 3.6365
-    end
-    return score.round(2)
-  end
-  def self.gunning_fog(text)
-    begin
-      per_diff_words = (
-      (difficult_words(text) / lexicon_count(text) * 100) + 5)
-      grade = 0.4 * (avg_sentence_length(text) + per_diff_words)
-      return grade.round(2)
-    rescue ZeroDivisionError
-      return 0.0
-    end
-  end
-  def self.lix(text)
-    words = text.split(' ')
-    words_length = words.length
-    long_words = words.select { |word| word.length > 6 }.count
-    per_long_words = (long_words * 100).to_f / words_length
-    asl = avg_sentence_length(text)
-    lix = asl + per_long_words
-    return lix.round(2)
-  end
-  def self.text_standard(text, float_output=nil)
-    grade = []
-    lower = flesch_kincaid_grade(text).round
-    upper = flesch_kincaid_grade(text).ceil
-    grade.append(lower.to_i)
-    grade.append(upper.to_i)
-    # Appending Flesch Reading Easy
-    score = flesch_reading_ease(text)
-    if score < 100 && score >= 90
-      grade.append(5)
-    elsif score < 90 && score >= 80
-      grade.append(6)
-    elsif score < 80 && score >= 70
-      grade.append(7)
-    elsif score < 70 && score >= 60
-      grade.append(8)
-      grade.append(9)
-    elsif score < 60 && score >= 50
-      grade.append(10)
-    elsif score < 50 && score >= 40
-      grade.append(11)
-    elsif score < 40 && score >= 30
-      grade.append(12)
-    else
-      grade.append(13)
-    end
-    # Appending SMOG Index
-    lower = smog_index(text).round
-    upper = smog_index(text).ceil
-    grade.append(lower.to_i)
-    grade.append(upper.to_i)
-    # Appending Coleman_Liau_Index
-    lower = coleman_liau_index(text).round
-    upper = coleman_liau_index(text).ceil
-    grade.append(lower.to_i)
-    grade.append(upper.to_i)
-    # Appending Automated_Readability_Index
-    lower = automated_readability_index(text).round
-    upper = automated_readability_index(text).ceil
-    grade.append(lower.to_i)
-    grade.append(upper.to_i)
-    # Appending Dale_Chall_Readability_Score
-    lower = dale_chall_readability_score(text).round
-    upper = dale_chall_readability_score(text).ceil
-    grade.append(lower.to_i)
-    grade.append(upper.to_i)
-    # Appending Linsear_Write_Formula
-    lower = linsear_write_formula(text).round
-    upper = linsear_write_formula(text).ceil
-    grade.append(lower.to_i)
-    grade.append(upper.to_i)
-    # Appending Gunning Fog Index
-    lower = gunning_fog(text).round
-    upper = gunning_fog(text).ceil
-    grade.append(lower.to_i)
-    grade.append(upper.to_i)
-    # Finding the Readability Consensus based upon all the above tests
-    require 'counter'
-    d = Counter.new(grade)
-    final_grade = d.most_common(1)
-    score = final_grade[0][0]
-    if float_output
-      return score.to_f
-    else
-      return "#{score.to_i - 1}th and #{score.to_i}th grade"
-    end
-  end
-end
+require 'text-hyphen'
+class TextStat
+  GEM_PATH = File.dirname(File.dirname(__FILE__))
+  def self.char_count(text, ignore_spaces = true)
+    text = text.delete(' ') if ignore_spaces
+    text.length
+  end
+  def self.lexicon_count(text, remove_punctuation = true)
+    text  = text.gsub(/[^a-zA-Z\s]/, '').squeeze(' ') if remove_punctuation
+    count = text.split(' ').count
+    count
+  end
+  def self.syllable_count(text, language = 'en_us')
+    return 0 if text.empty?
+    text = text.downcase
+    text.gsub(/[^a-zA-Z\s]/, '').squeeze(' ')
+    dictionary = Text::Hyphen.new(language: language, left: 0, right: 0)
+    count = 0
+    text.split(' ').each do |word|
+      word_hyphenated = dictionary.visualise(word)
+      count += word_hyphenated.count('-') + 1
+    end
+    count
+  end
+  def self.sentence_count(text)
+    text.scan(/[\.\?!][\'\\)\]]*[ |\n][A-Z]/).map(&:strip).count + 1
+  end
+  def self.avg_sentence_length(text)
+    asl = lexicon_count(text).to_f / sentence_count(text)
+    asl.round(1)
+  rescue ZeroDivisionError
+    0.0
+  end
+  def self.avg_syllables_per_word(text)
+    syllable = syllable_count(text)
+    words    = lexicon_count(text)
+    begin
+      syllables_per_word = syllable.to_f / words
+      syllables_per_word.round(1)
+    rescue ZeroDivisionError
+      0.0
+    end
+  end
+  def self.avg_letter_per_word(text)
+    letters_per_word = char_count(text).to_f / lexicon_count(text)
+    letters_per_word.round(2)
+  rescue ZeroDivisionError
+    0.0
+  end
+  def self.avg_sentence_per_word(text)
+    sentence_per_word = sentence_count(text).to_f / lexicon_count(text)
+    sentence_per_word.round(2)
+  rescue ZeroDivisionError
+    0.0
+  end
+  def self.flesch_reading_ease(text)
+    sentence_length    = avg_sentence_length(text)
+    syllables_per_word = avg_syllables_per_word(text)
+    flesch = 206.835 - 1.015 * sentence_length - 84.6 * syllables_per_word
+    flesch.round(2)
+  end
+  def self.flesch_kincaid_grade(text)
+    sentence_length = avg_sentence_length(text)
+    syllables_per_word = avg_syllables_per_word(text)
+    flesch = 0.39 * sentence_length + 11.8 * syllables_per_word - 15.59
+    flesch.round(1)
+  end
+  def self.polysyllab_count(text)
+    count = 0
+    text.split(' ').each do |word|
+      w = syllable_count(word)
+      count += 1 if w >= 3
+    end
+    count
+  end
+  def self.smog_index(text)
+    sentences = sentence_count(text)
+    if sentences >= 3
+      begin
+        polysyllab = polysyllab_count(text)
+        smog = 1.043 * Math.sqrt(30.0 * polysyllab / sentences) + 3.1291
+        smog.round(1)
+      rescue ZeroDivisionError
+        0.0
+      end
+    else
+      0.0
+    end
+  end
+  def self.coleman_liau_index(text)
+    letters = (avg_letter_per_word(text) * 100).round(2)
+    sentences = (avg_sentence_per_word(text) * 100).round(2)
+    coleman = 0.0588 * letters - 0.296 * sentences - 15.8
+    coleman.round(2)
+  end
+  def self.automated_readability_index(text)
+    chars = char_count(text)
+    words = lexicon_count(text)
+    sentences = sentence_count(text)
+    begin
+      a = chars.to_f / words
+      b = words.to_f / sentences
+      readability = 4.71 * a + 0.5 * b - 21.43
+      readability.round(1)
+    rescue ZeroDivisionError
+      0.0
+    end
+  end
+  def self.linsear_write_formula(text)
+    easy_word = 0
+    difficult_word = 0
+    text_list = text.split(' ')[0..100]
+    text_list.each do |word|
+      if syllable_count(word) < 3
+        easy_word += 1
+      else
+        difficult_word += 1
+      end
+    end
+    text = text_list.join(' ')
+    number = (easy_word * 1 + difficult_word * 3).to_f / sentence_count(text)
+    number -= 2 if number <= 20
+    number / 2
+  end
+  def self.difficult_words(text, language = 'en_us')
+    require 'set'
+    easy_words = Set.new
+    File.read(File.join(dictionary_path, "#{language}.txt")).each_line do |line|
+      easy_words << line.chop
+    end
+    text_list = text.downcase.gsub(/[^0-9a-z ]/i, '').split(' ')
+    diff_words_set = Set.new
+    text_list.each do |value|
+      next if easy_words.include? value
+      diff_words_set.add(value) if syllable_count(value) > 1
+    end
+    diff_words_set.length
+  end
+  def self.dale_chall_readability_score(text)
+    word_count = lexicon_count(text)
+    count = word_count - difficult_words(text)
+    begin
+      per = 100.0 * count / word_count
+    rescue ZeroDivisionError
+      return 0.0
+    end
+    difficult_words = 100 - per
+    score = 0.1579 * difficult_words + 0.0496 * avg_sentence_length(text)
+    score += 3.6365 if difficult_words > 5
+    score.round(2)
+  end
+  def self.gunning_fog(text)
+    per_diff_words = 100.0 * difficult_words(text) / lexicon_count(text) + 5
+    grade = 0.4 * (avg_sentence_length(text) + per_diff_words)
+    grade.round(2)
+  rescue ZeroDivisionError
+    0.0
+  end
+  def self.lix(text)
+    words = text.split(' ')
+    words_length = words.length
+    long_words = words.count { |word| word.length > 6 }
+    per_long_words = 100.0 * long_words / words_length
+    asl = avg_sentence_length(text)
+    lix = asl + per_long_words
+    lix.round(2)
+  end
+  def self.forcast(text, language = 'en_us')
+    words = text.split(' ')[0..149]
+    words_with_one_syllabe = words.count {
+      |word| syllable_count(word, language) == 1
+    }
+    forcast = 20 - (words_with_one_syllabe / 10)
+    forcast
+  end
+  def self.powers_sumner_kearl(text)
+    grade = 0.0778 * avg_sentence_length(text) + 0.0455 * syllable_count(text) - 2.2029
+    grade.round(2)
+  end
+  def self.spache(text, language = 'en_us')
+    words = text.split(' ').count
+    unfamiliar_words = difficult_words(text, language) / words
+    grade = (0.141 * avg_sentence_length(text)) + (0.086 * unfamiliar_words) + 0.839
+    grade.round(2)
+  end
+  def self.text_standard(text, float_output=nil)
+    grade = []
+    lower = flesch_kincaid_grade(text).round
+    upper = flesch_kincaid_grade(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Flesch Reading Easy
+    score = flesch_reading_ease(text)
+    if score < 100 && score >= 90
+      grade.append(5)
+    elsif score < 90 && score >= 80
+      grade.append(6)
+    elsif score < 80 && score >= 70
+      grade.append(7)
+    elsif score < 70 && score >= 60
+      grade.append(8)
+      grade.append(9)
+    elsif score < 60 && score >= 50
+      grade.append(10)
+    elsif score < 50 && score >= 40
+      grade.append(11)
+    elsif score < 40 && score >= 30
+      grade.append(12)
+    else
+      grade.append(13)
+    end
+    # Appending SMOG Index
+    lower = smog_index(text).round
+    upper = smog_index(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Coleman_Liau_Index
+    lower = coleman_liau_index(text).round
+    upper = coleman_liau_index(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Automated_Readability_Index
+    lower = automated_readability_index(text).round
+    upper = automated_readability_index(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Dale_Chall_Readability_Score
+    lower = dale_chall_readability_score(text).round
+    upper = dale_chall_readability_score(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Linsear_Write_Formula
+    lower = linsear_write_formula(text).round
+    upper = linsear_write_formula(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Gunning Fog Index
+    lower = gunning_fog(text).round
+    upper = gunning_fog(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Finding the Readability Consensus based upon all the above tests
+    require 'counter'
+    d = Counter.new(grade)
+    final_grade = d.most_common(1)
+    score = final_grade[0][0]
+    if float_output
+      score.to_f
+    else
+      "#{score.to_i - 1}th and #{score.to_i}th grade"
+    end
+  end
+  def self.dictionary_path=(path)
+    @dictionary_path = path
+  end
+  def self.dictionary_path
+    @dictionary_path ||= File.join(TextStat::GEM_PATH, 'lib', 'dictionaries')
+  end
+end