RubyGems - markovfun - Versions diffs - 0.0.2 → 0.0.3 - Mend

markovfun 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/.gitignore CHANGED Viewed

@@ -8,6 +8,7 @@ InstalledFiles
 _yardoc
 coverage
 doc/
+texts/
 lib/bundler/man
 pkg
 rdoc

data/Gemfile CHANGED Viewed

@@ -2,3 +2,4 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in markovfun.gemspec
 gemspec
+pry

data/README.md CHANGED Viewed

@@ -2,11 +2,7 @@
 This gem generates sentences from textfiles using trigrams.
 It is based on Alex Rudnick's Python Markov chain generator,
-<<<<<<< HEAD
 the code for which is [here](https://github.com/alexrudnick/hackerschool-demos/tree/master/ngrams).
-=======
-the code for which is (here)[https://github.com/alexrudnick/hackerschool-demos/tree/master/ngrams].
->>>>>>> f8c8a08... Updated README
 ## Installation
@@ -24,11 +20,42 @@ Or install it yourself as:
 ## Usage
+### File Processing
+Get sentences from a text file:
+`sentences = Markovfun::Util.get_sentences("bible.txt")`
+### Trigrams
+Create hash storing counts of words that follow two previous words.
+`counts = Markovfun::Trigram.get_counts(sentences)`
+Convert hash of counts to hash of probabilities.
+`probs = Markovfun::Trigram.counts_to_probs(counts)`
+Generate a sentence with a specified min length (in this case, 4) from the probability hash.
+`sentence = Markovfun::Trigram.sentence_from_probs_hash(probs, 4)`
+Score the sentence by "surprisal value" given a probability has.
+`Markovfun::Trigram.score_sentence(sentence, probs)`
+### Sample Program
 Here's how you can generate a sentence from a text file.
 ```
-sentences = Markovfun.get_sentences("bible.txt")
-counts = Markovfun.get_counts(sentences)
-probs = Markovfun.counts_to_probs(counts)
-Markovfun.sentence_from_probs_hash(probs)
+sentences = Markovfun::Util.get_sentences("bible.txt")
+counts = Markovfun::Trigram.get_counts(sentences)
+probs = Markovfun::Trigram.counts_to_probs(counts)
+Markovfun::Trigram.sentence_from_probs_hash(probs, 4)
 ```
+### Sample Sentence!
+From "The Beautiful and the Damned":
+"I liked him tremendously--ah, she had enjoyed a rather romantic figure, a scholar, a recluse, a tower of erudition."

data/lib/markovfun/trigram.rb ADDED Viewed

@@ -0,0 +1,104 @@
+require "markovfun/version"
+require "markovfun/util"
+module Markovfun
+  module Trigram
+    include Markovfun::Util
+    # Generates a sentence, given a file.
+    def self.sentence_from_file(filename, min_length)
+      sentences = get_sentences(filename)
+      counts = get_counts(sentences)
+      probs = counts_to_probs(counts)
+      sentence_from_probs_hash(probs, min_length)
+    end
+    # Returns a counts hash, given a list of sentences.
+    # The keys to the hash are all observed combinations of [prev2, prev1],
+    # where prev2 and prev1 are the two previous words.
+    # The values are hashes, in which the keys are words (cur) that have followed
+    # prev2 and prev1, and the values are the number of occurrences.
+    def self.get_counts(sentences)
+      counts_hash = {}
+      sentences.each do |sent|
+        # nil denotes the beginnings and ends of sentences
+        sent = [nil, nil] + sent + [nil]
+        sent.zip(sent[1..-1], sent[2..-1]).each do |prev2, prev1, cur|
+          counts_hash[[prev2, prev1]] ||= {}
+          if !(counts_hash[[prev2, prev1]][cur])
+            counts_hash[[prev2, prev1]][cur] = 1
+          else
+            counts_hash[[prev2, prev1]][cur] += 1
+          end
+        end
+      end
+      counts_hash
+    end
+    # Generates a probability hash, given a counts hash.
+    # Similar to counts_hash, except containing the probability that a word
+    # follows two preceding words (as opposed to number of occurrences).
+    def self.counts_to_probs(counts_hash)
+      probs_hash = {}
+      counts_hash.each do |prev, cur_freq|
+        probs_hash[prev] ||= {}
+        cur_freq.each do |cur, freq|
+          prob = freq.to_f / cur_freq.values.reduce(:+)
+          probs_hash[prev][cur] = prob
+        end
+      end
+      probs_hash
+    end
+    # Generates a sample word, given a probability hash.
+    def self.sample_word(probs_hash)
+      score = rand
+      probs_hash.each do |word, prob|
+        return word if score < prob
+        score -= prob
+      end
+    end
+    # Generates a sample sentence, given a probability hash.
+    def self.sample_sentence(probs_hash)
+      prev2 = nil
+      prev1 = nil
+      out = []
+      while true
+        cur = sample_word(probs_hash[[prev2, prev1]])
+        if cur.nil?
+          return out
+        else
+          out << cur
+          prev2 = prev1
+          prev1 = cur
+        end
+      end
+    end
+    # Generates a sentence from a probability hash.
+    def self.sentence_from_probs_hash(probs, min_length)
+      sent = []
+      while score_sentence(sent, probs) > 30 || sent.length < min_length
+        sent = sample_sentence(probs)
+      end
+      sent = sent[0..-2].join(" ") + "."
+      sent
+    end
+    # Scores a sentence, depending on the likelihood that it occurs
+    # within a corpus.
+    def self.score_sentence(sent, probs)
+      total_surprise = 0
+      sent = sent[0..-2].split(" ").push(".")
+      sent = [nil, nil] + sent + [nil]
+      sent.zip(sent[1..-1], sent[2..-1]).each do |prev2, prev1, cur|
+        total_surprise += -Math.log(probs[[prev2, prev1]][cur], 2)
+      end
+      total_surprise
+    end
+  end
+end

data/lib/markovfun/util.rb ADDED Viewed

@@ -0,0 +1,27 @@
+require "markovfun/version"
+module Markovfun
+  module Util
+    # Gets lines from a file.
+    def self.get_lines(filename)
+      file = File.open(filename, "r")
+      data = file.read
+      file.close
+      lines = data.split("\n")
+      lines.map! { |l| l.strip.split(" ") }
+    end
+    # Gets sentences from a file.
+    def self.get_sentences(filename)
+      file = File.open(filename, "r")
+      data = file.read
+      file.close
+      data.gsub!(/\n/, " ")
+      data.gsub!(/"/,"")
+      sentences = data.split(".")
+      sentences.map! { |s| s.strip.split(" ").push(".") }
+      sentences.select! { |s| s[0].capitalize == s[0] }
+    end
+  end
+end

data/lib/markovfun/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Markovfun
-  VERSION = "0.0.2"
+  VERSION = "0.0.3"
 end

data/lib/markovfun.rb CHANGED Viewed

@@ -1,123 +1,2 @@
-require "markovfun/version"
-require 'pry'
-module Markovfun
-  # Generates a sentence, given a file.
-  def self.sentence_from_file(filename)
-    sentences = get_sentences(filename)
-    counts = buildcounts(sentences)
-    probs = counts_to_probs(counts)
-    sentence_from_probs_hash(probs)
-  end
-  # Gets lines from a file.
-  def self.get_lines(filename)
-    file = File.open(filename, "r")
-    data = file.read
-    file.close
-    lines = data.split("\n")
-    lines.map! { |l| l.strip.split(" ") }
-  end
-  # Gets sentences from a file.
-  def self.get_sentences(filename)
-    file = File.open(filename, "r")
-    data = file.read
-    file.close
-    data.gsub!(/\n/, "")
-    data.gsub!(/"/,"")
-    sentences = data.split(".")
-    sentences.map! { |s| s.strip.split(" ").push(".") }
-    sentences.select! { |s| s[0].capitalize == s[0] }
-  end
-  # Returns a counts hash, given a list of sentences.
-  # The keys to the hash are all observed combinations of [prev2, prev1],
-  # where prev2 and prev1 are the two previous words.
-  # The values are hashes, in which the keys are words (cur) that have followed
-  # prev2 and prev1, and the values are the number of occurrences.
-  def self.get_counts(sentences)
-    counts_hash = {}
-    sentences.each do |sent|
-      # nil denotes the beginnings and ends of sentences
-      sent = [nil, nil] + sent + [nil]
-      sent.zip(sent[1..-1], sent[2..-1]).each do |prev2, prev1, cur|
-        counts_hash[[prev2, prev1]] ||= {}
-        if !(counts_hash[[prev2, prev1]][cur])
-          counts_hash[[prev2, prev1]][cur] = 1
-        else
-          counts_hash[[prev2, prev1]][cur] += 1
-        end
-      end
-    end
-    counts_hash
-  end
-  # Generates a probability hash, given a counts hash.
-  # Similar to counts_hash, except containing the probability that a word
-  # follows two preceding words (as opposed to number of occurrences).
-  def self.counts_to_probs(counts_hash)
-    probs_hash = {}
-    counts_hash.each do |prev, cur_freq|
-      probs_hash[prev] ||= {}
-      cur_freq.each do |cur, freq|
-        prob = freq.to_f / cur_freq.values.reduce(:+)
-        probs_hash[prev][cur] = prob
-      end
-    end
-    probs_hash
-  end
-  # Generates a sample word, given a probability hash.
-  def self.sample_word(probs_hash)
-    score = rand
-    probs_hash.each do |word, prob|
-      return word if score < prob
-      score -= prob
-    end
-  end
-  # Generates a sample sentence, given a probability hash.
-  def self.sample_sentence(probs_hash)
-    prev2 = nil
-    prev1 = nil
-    out = []
-    while true
-      cur = sample_word(probs_hash[[prev2, prev1]])
-      if cur.nil?
-        return out
-      else
-        out << cur
-        prev2 = prev1
-        prev1 = cur
-      end
-    end
-  end
-  # Scores a sentence, depending on the likelihood that it occurs
-  # within a corpus.
-  def self.score_sentence(sent, probs)
-    total_surprise = 0
-    sent = [nil, nil] + sent + [nil]
-    sent.zip(sent[1..-1], sent[2..-1]).each do |prev2, prev1, cur|
-      total_surprise += -Math.log(probs[[prev2, prev1]][cur], 2)
-    end
-    total_surprise
-  end
-  # Generates a sentence from a probability hash.
-  def self.sentence_from_probs_hash(probs)
-    sent = []
-    while score_sentence(sent, probs) > 30 || sent.length < 4
-      sent = sample_sentence(probs)
-    end
-    puts "score: #{score_sentence(sent, probs)}"
-    sent = sent[0..-2].join(" ") + "."
-    puts sent
-    sent
-  end
-end
+require 'markovfun/trigram'
+require 'markovfun/util'

data/markovfun.gemspec CHANGED Viewed

@@ -2,6 +2,7 @@
 lib = File.expand_path('../lib', __FILE__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'markovfun/version'
+require 'pry'
 Gem::Specification.new do |spec|
   spec.name          = "markovfun"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: markovfun
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
   prerelease:
 platform: ruby
 authors:
@@ -56,6 +56,8 @@ files:
 - README.md
 - Rakefile
 - lib/markovfun.rb
+- lib/markovfun/trigram.rb
+- lib/markovfun/util.rb
 - lib/markovfun/version.rb
 - markovfun.gemspec
 homepage: https://github.com/mariapacana/markovfun