RubyGems - textstat - Versions diffs - 0.1.0 - Mend

textstat 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/lib/textstat/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+class TextStat
+  VERSION = "0.1.0"
+end

data/lib/textstat.rb ADDED Viewed

@@ -0,0 +1,293 @@
+require 'text-hyphen'
+class TextStat
+  def self.char_count(text, ignore_spaces = true)
+    text = text.delete(' ') if ignore_spaces
+    text.length
+  end
+  def self.lexicon_count(text, remove_punctuation = true)
+    text  = text.gsub(/[^a-zA-Z\s]/, '').squeeze(' ') if remove_punctuation
+    count = text.split(' ').count
+    count
+  end
+  def self.syllable_count(text, language = 'en_us')
+    return 0 if text.empty?
+    text = text.downcase
+    text.gsub(/[^a-zA-Z\s]/, '').squeeze(' ')
+    dictionary = Text::Hyphen.new(language: language, left: 0, right: 0)
+    count = 0
+    text.split(' ').each do |word|
+      word_hyphenated = dictionary.visualise(word)
+      count += [1, word_hyphenated.count('-') + 1].max
+    end
+    count
+  end
+  def self.sentence_count(text)
+    text.scan(/[\.\?!][\'\\)\]]*[ |\n][A-Z]/).map(&:strip).count + 1
+  end
+  def self.avg_sentence_length(text)
+    asl = lexicon_count(text).to_f / sentence_count(text).to_f
+    asl.round(1)
+  rescue ZeroDivisionError
+    0.0
+  end
+  def self.avg_syllables_per_word(text)
+    syllable = syllable_count(text)
+    words    = lexicon_count(text)
+    begin
+      syllables_per_word = syllable.to_f / words.to_f
+      return syllables_per_word.round(1)
+    rescue ZeroDivisionError
+      return 0.0
+    end
+  end
+  def self.avg_letter_per_word(text)
+    letters_per_word = char_count(text).to_f / lexicon_count(text).to_f
+    letters_per_word.round(2)
+  rescue ZeroDivisionError
+    0.0
+  end
+  def self.avg_sentence_per_word(text)
+    sentence_per_word = sentence_count(text).to_f / lexicon_count(text).to_f
+    sentence_per_word.round(2)
+  rescue ZeroDivisionError
+    0.0
+  end
+  def self.flesch_reading_ease(text)
+    sentence_length    = avg_sentence_length(text)
+    syllables_per_word = avg_syllables_per_word(text)
+    flesch = (
+    206.835 - (1.015 * sentence_length).to_f - (84.6 * syllables_per_word).to_f
+    )
+    flesch.round(2)
+  end
+  def self.flesch_kincaid_grade(text)
+    sentence_length = avg_sentence_length(text)
+    syllables_per_word = avg_syllables_per_word(text)
+    flesch = (0.39 * sentence_length.to_f) + (11.8 * syllables_per_word.to_f) - 15.59
+    flesch.round(1)
+  end
+  def self.polysyllab_count(text)
+    count = 0
+    text.split(' ').each do |word|
+      w = syllable_count(word)
+      count += 1 if w >= 3
+    end
+    count
+  end
+  def self.smog_index(text)
+    sentences = sentence_count(text)
+    if sentences >= 3
+      begin
+        polysyllab = polysyllab_count(text)
+        smog = (
+        (1.043 * (30 * (polysyllab / sentences))**0.5) + 3.1291)
+        return smog.round(1)
+      rescue ZeroDivisionError
+        return 0.0
+      end
+    else
+      return 0.0
+    end
+  end
+  def self.coleman_liau_index(text)
+    letters   = (avg_letter_per_word(text) * 100).round(2)
+    sentences = (avg_sentence_per_word(text) * 100).round(2)
+    coleman   = ((0.058 * letters) - (0.296 * sentences) - 15.8).to_f
+    coleman.round(2)
+  end
+  def self.automated_readability_index(text)
+    chars     = char_count(text)
+    words     = lexicon_count(text)
+    sentences = sentence_count(text)
+    begin
+      a = chars.to_f / words.to_f
+      b = words.to_f / sentences.to_f
+      readability = (
+      (4.71 * a.round(2) + (0.5 * b.round(2))) - 21.43)
+      return readability.round(1)
+    rescue ZeroDivisionError
+      return 0.0
+    end
+  end
+  def self.linsear_write_formula(text)
+    easy_word = 0
+    difficult_word = 0
+    text_list = text.split(' ')[0..100]
+    text_list.each do |word|
+      if syllable_count(word) < 3
+        easy_word += 1
+      else
+        difficult_word += 1
+      end
+    end
+    text = text_list.join(' ')
+    number = ((easy_word * 1 + difficult_word * 3) / sentence_count(text)).to_f
+    if number <= 20
+      number -= 2
+    end
+    return number / 2
+  end
+  def self.difficult_words(text)
+    require 'set'
+    easy_words = Set.new
+    File.read('lib/easy_words.txt').each_line do |line|
+      easy_words << line.chop
+    end
+    text_list = text.downcase.gsub(/[^0-9a-z ]/i, '').split(' ')
+    diff_words_set = Set.new
+    text_list.each do |value|
+      unless easy_words.include? value
+        if syllable_count(value) > 1
+          diff_words_set.add(value)
+        end
+      end
+    end
+    return diff_words_set.length
+  end
+  def self.dale_chall_readability_score(text)
+    word_count = lexicon_count(text)
+    count = word_count - difficult_words(text)
+    begin
+      per = count.to_f / word_count.to_f * 100
+    rescue ZeroDivisionError
+      return 0.0
+    end
+    difficult_words = 100 - per
+    score = (
+    (0.1579 * difficult_words)
+    + (0.0496 * avg_sentence_length(text)))
+    if difficult_words > 5
+      score += 3.6365
+    end
+    return score.round(2)
+  end
+  def self.gunning_fog(text)
+    begin
+      per_diff_words = (
+      (difficult_words(text) / lexicon_count(text) * 100) + 5)
+      grade = 0.4 * (avg_sentence_length(text) + per_diff_words)
+      return grade.round(2)
+    rescue ZeroDivisionError
+      return 0.0
+    end
+  end
+  def self.lix(text)
+    words = text.split(' ')
+    words_length = words.length
+    long_words = words.select { |word| word.length > 6 }.count
+    per_long_words = (long_words * 100).to_f / words_length
+    asl = avg_sentence_length(text)
+    lix = asl + per_long_words
+    return lix.round(2)
+  end
+  def self.text_standard(text, float_output=nil)
+    grade = []
+    lower = flesch_kincaid_grade(text).round
+    upper = flesch_kincaid_grade(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Flesch Reading Easy
+    score = flesch_reading_ease(text)
+    if score < 100 && score >= 90
+      grade.append(5)
+    elsif score < 90 && score >= 80
+      grade.append(6)
+    elsif score < 80 && score >= 70
+      grade.append(7)
+    elsif score < 70 && score >= 60
+      grade.append(8)
+      grade.append(9)
+    elsif score < 60 && score >= 50
+      grade.append(10)
+    elsif score < 50 && score >= 40
+      grade.append(11)
+    elsif score < 40 && score >= 30
+      grade.append(12)
+    else
+      grade.append(13)
+    end
+    # Appending SMOG Index
+    lower = smog_index(text).round
+    upper = smog_index(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Coleman_Liau_Index
+    lower = coleman_liau_index(text).round
+    upper = coleman_liau_index(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Automated_Readability_Index
+    lower = automated_readability_index(text).round
+    upper = automated_readability_index(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Dale_Chall_Readability_Score
+    lower = dale_chall_readability_score(text).round
+    upper = dale_chall_readability_score(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Linsear_Write_Formula
+    lower = linsear_write_formula(text).round
+    upper = linsear_write_formula(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Appending Gunning Fog Index
+    lower = gunning_fog(text).round
+    upper = gunning_fog(text).ceil
+    grade.append(lower.to_i)
+    grade.append(upper.to_i)
+    # Finding the Readability Consensus based upon all the above tests
+    require 'counter'
+    d = Counter.new(grade)
+    final_grade = d.most_common(1)
+    score = final_grade[0][0]
+    if float_output
+      return score.to_f
+    else
+      return "#{score.to_i - 1}th and #{score.to_i}th grade"
+    end
+  end
+end

data/spec/textstat_spec.rb ADDED Viewed

@@ -0,0 +1,162 @@
+require 'rspec'
+require_relative '../lib/textstat.rb'
+describe TextStat do
+  before do
+    @long_test = 'Playing ... games has always been thought to be ' \
+       'important to the development of well-balanced and ' \
+       'creative children; however, what part, if any, ' \
+       'they should play in the lives of adults has never ' \
+       'been researched that deeply. I believe that ' \
+       'playing games is every bit as important for adults ' \
+       'as for children. Not only is taking time out to ' \
+       'play games with our children and other adults ' \
+       'valuable to building interpersonal relationships ' \
+       'but is also a wonderful way to release built up ' \
+       "tension.\n" \
+       "There's nothing my husband enjoys more after a " \
+       'hard day of work than to come home and play a game ' \
+       'of Chess with someone. This enables him to unwind ' \
+       "from the day's activities and to discuss the highs " \
+       'and lows of the day in a non-threatening, kick back ' \
+       'environment. One of my most memorable wedding ' \
+       'gifts, a Backgammon set, was received by a close ' \
+       'friend. I asked him why in the world he had given ' \
+       'us such a gift. He replied that he felt that an ' \
+       'important aspect of marriage was for a couple to ' \
+       'never quit playing games together. Over the years, ' \
+       'as I have come to purchase and play, with other ' \
+       'couples & coworkers, many games like: Monopoly, ' \
+       'Chutes & Ladders, Mastermind, Dweebs, Geeks, & ' \
+       'Weirdos, etc. I can reflect on the integral part ' \
+       'they have played in our weekends and our ' \
+       '"shut-off the T.V. and do something more ' \
+       'stimulating" weeks. They have enriched my life and ' \
+       'made it more interesting. Sadly, many adults ' \
+       'forget that games even exist and have put them ' \
+       'away in the cupboards, forgotten until the ' \
+       "grandchildren come over.\n" \
+       'All too often, adults get so caught up in working ' \
+       'to pay the bills and keeping up with the ' \
+       "\"Joneses'\" that they neglect to harness the fun " \
+       'in life; the fun that can be the reward of ' \
+       'enjoying a relaxing game with another person. It ' \
+       'has been said that "man is that he might have ' \
+       'joy" but all too often we skate through life ' \
+       'without much of it. Playing games allows us to: ' \
+       'relax, learn something new and stimulating, ' \
+       'interact with people on a different more ' \
+       'comfortable level, and to enjoy non-threatening ' \
+       'competition. For these reasons, adults should ' \
+       'place a higher priority on playing games in their ' \
+       'lives'
+  end
+  context 'When testing the TextStat class' do
+    it 'should return the correct number of chars' do
+      count = TextStat.char_count(@long_test)
+      count_spaces = TextStat.char_count(@long_test, false)
+      expect(count).to eql 1750
+      expect(count_spaces).to eql 2123
+    end
+    it 'should return the correct number of lexicons' do
+      count = TextStat.lexicon_count(@long_test)
+      count_punctuation = TextStat.lexicon_count(@long_test, false)
+      expect(count).to eql 372
+      expect(count_punctuation).to eql 376
+    end
+    it 'should return the correct number of syllables' do
+      count = TextStat.syllable_count(@long_test)
+      expect(count).to eql 559
+    end
+    it 'should return the correct number of sentences' do
+      count = TextStat.sentence_count(@long_test)
+      expect(count).to eql 16
+    end
+    it 'should return the correct average sentence length' do
+      avg = TextStat.avg_sentence_length(@long_test)
+      expect(avg).to eql 23.3
+    end
+    it 'should return the correct average syllables per word' do
+      avg = TextStat.avg_syllables_per_word(@long_test)
+      expect(avg).to eql 1.5
+    end
+    it 'should return the correct average letters per word' do
+      avg = TextStat.avg_letter_per_word(@long_test)
+      expect(avg).to eql 4.7
+    end
+    it 'should return the correct average sentence per word' do
+      avg = TextStat.avg_sentence_per_word(@long_test)
+      expect(avg).to eql 0.04
+    end
+    it 'should return the correct Flesch reading-ease test score' do
+      score = TextStat.flesch_reading_ease(@long_test)
+      expect(score).to eql 56.29
+    end
+    it 'should return the correct Flesch–Kincaid grade' do
+      score = TextStat.flesch_kincaid_grade(@long_test)
+      expect(score).to eql 11.2
+    end
+    it 'should return the correct number of polysyllab' do
+      count = TextStat.polysyllab_count(@long_test)
+      expect(count).to eql 43
+    end
+    it 'should return the correct smog index' do
+      index = TextStat.smog_index(@long_test)
+      expect(index).to eql 11.2
+    end
+    it 'should return the correct Coleman–Liau index' do
+      index = TextStat.coleman_liau_index(@long_test)
+      expect(index).to eql 10.28
+    end
+    it 'should return the correct automated readability index' do
+      index = TextStat.automated_readability_index(@long_test)
+      expect(index).to eql 12.3
+    end
+    it 'should return the correct linsear write formula result' do
+      result = TextStat.linsear_write_formula(@long_test)
+      expect(result).to eql 14.5
+    end
+    it 'should return the correct difficult words result' do
+      result = TextStat.difficult_words(@long_test)
+      expect(result).to eql 58
+    end
+    it 'should return the correct Dale–Chall readability score' do
+      score = TextStat.dale_chall_readability_score(@long_test)
+      expect(score).to eql 4.79
+    end
+    it 'should return the correct Gunning fog score' do
+      score = TextStat.gunning_fog(@long_test)
+      expect(score).to eql 11.32
+    end
+    it 'should return the correct Lix readability test score' do
+      score = TextStat.lix(@long_test)
+      expect(score).to eql 45.11
+    end
+    it 'should return the readability consensus score' do
+      standard = TextStat.text_standard(@long_test)
+      expect(standard).to eql '10th and 11th grade'
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,116 @@
+--- !ruby/object:Gem::Specification
+name: textstat
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Jakub Polak
+autorequire:
+bindir: exe
+cert_chain: []
+date: 2018-11-12 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: text-hyphen
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.4'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.4.1
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.4'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.4.1
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 2.0.a
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 2.0.a
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.0'
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.0'
+description:
+email:
+- jakub.polak.vz@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/counter.rb
+- lib/easy_words.txt
+- lib/textstat.rb
+- lib/textstat/version.rb
+- spec/textstat_spec.rb
+homepage: https://github.com/kupolak/textstat
+licenses:
+- MIT
+metadata:
+  homepage_uri: https://github.com/kupolak/textstat
+  source_code_uri: https://github.com/kupolak/textstat
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.7.8
+signing_key:
+specification_version: 4
+summary: Ruby gem to calculate readability statistics of a text object - paragraphs,
+  sentences, articles
+test_files:
+- spec/textstat_spec.rb
+- lib/easy_words.txt