RubyGems - markovfun - Versions diffs - 0.0.2 → 0.0.3 - Mend

markovfun 0.0.2 → 0.0.3

Files changed (9) hide show

data/.gitignore CHANGED Viewed

@@ -8,6 +8,7 @@ InstalledFiles
 _yardoc
 coverage
 doc/
+texts/
 lib/bundler/man
 pkg
 rdoc

data/Gemfile CHANGED Viewed

@@ -2,3 +2,4 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in markovfun.gemspec
 gemspec
+pry

data/README.md CHANGED Viewed

@@ -2,11 +2,7 @@
 This gem generates sentences from textfiles using trigrams.
 It is based on Alex Rudnick's Python Markov chain generator,
-<<<<<<< HEAD
 the code for which is [here](https://github.com/alexrudnick/hackerschool-demos/tree/master/ngrams).
-=======
-the code for which is (here)[https://github.com/alexrudnick/hackerschool-demos/tree/master/ngrams].
->>>>>>> f8c8a08... Updated README
 ## Installation
@@ -24,11 +20,42 @@ Or install it yourself as:
 ## Usage
+### File Processing
+Get sentences from a text file:
+`sentences = Markovfun::Util.get_sentences("bible.txt")`
+### Trigrams
+Create hash storing counts of words that follow two previous words.
+`counts = Markovfun::Trigram.get_counts(sentences)`
+Convert hash of counts to hash of probabilities.
+`probs = Markovfun::Trigram.counts_to_probs(counts)`
+Generate a sentence with a specified min length (in this case, 4) from the probability hash.
+`sentence = Markovfun::Trigram.sentence_from_probs_hash(probs, 4)`
+Score the sentence by "surprisal value" given a probability has.
+`Markovfun::Trigram.score_sentence(sentence, probs)`
+### Sample Program
 Here's how you can generate a sentence from a text file.
 ```
-sentences = Markovfun.get_sentences("bible.txt")
-counts = Markovfun.get_counts(sentences)
-probs = Markovfun.counts_to_probs(counts)
-Markovfun.sentence_from_probs_hash(probs)
+sentences = Markovfun::Util.get_sentences("bible.txt")
+counts = Markovfun::Trigram.get_counts(sentences)
+probs = Markovfun::Trigram.counts_to_probs(counts)
+Markovfun::Trigram.sentence_from_probs_hash(probs, 4)
 ```
+### Sample Sentence!
+From "The Beautiful and the Damned":
+"I liked him tremendously--ah, she had enjoyed a rather romantic figure, a scholar, a recluse, a tower of erudition."

data/lib/markovfun/trigram.rb ADDED Viewed

@@ -0,0 +1,104 @@
+require "markovfun/version"
+require "markovfun/util"
+module Markovfun
+  module Trigram
+    include Markovfun::Util
+    # Generates a sentence, given a file.
+    def self.sentence_from_file(filename, min_length)
+      sentences = get_sentences(filename)
+      counts = get_counts(sentences)
+      probs = counts_to_probs(counts)
+      sentence_from_probs_hash(probs, min_length)
+    end
+    # Returns a counts hash, given a list of sentences.
+    # The keys to the hash are all observed combinations of [prev2, prev1],
+    # where prev2 and prev1 are the two previous words.
+    # The values are hashes, in which the keys are words (cur) that have followed
+    # prev2 and prev1, and the values are the number of occurrences.
+    def self.get_counts(sentences)
+      counts_hash = {}
+      sentences.each do |sent|
+        # nil denotes the beginnings and ends of sentences
+        sent = [nil, nil] + sent + [nil]
+        sent.zip(sent[1..-1], sent[2..-1]).each do |prev2, prev1, cur|
+          counts_hash[[prev2, prev1]] ||= {}
+          if !(counts_hash[[prev2, prev1]][cur])
+            counts_hash[[prev2, prev1]][cur] = 1
+          else
+            counts_hash[[prev2, prev1]][cur] += 1
+          end
+        end
+      end
+      counts_hash
+    end
+    # Generates a probability hash, given a counts hash.
+    # Similar to counts_hash, except containing the probability that a word
+    # follows two preceding words (as opposed to number of occurrences).
+    def self.counts_to_probs(counts_hash)
+      probs_hash = {}
+      counts_hash.each do |prev, cur_freq|
+        probs_hash[prev] ||= {}
+        cur_freq.each do |cur, freq|
+          prob = freq.to_f / cur_freq.values.reduce(:+)
+          probs_hash[prev][cur] = prob
+        end
+      end
+      probs_hash
+    end
+    # Generates a sample word, given a probability hash.
+    def self.sample_word(probs_hash)
+      score = rand
+      probs_hash.each do |word, prob|
+        return word if score < prob
+        score -= prob
+      end
+    end
+    # Generates a sample sentence, given a probability hash.
+    def self.sample_sentence(probs_hash)
+      prev2 = nil
+      prev1 = nil
+      out = []
+      while true
+        cur = sample_word(probs_hash[[prev2, prev1]])
+        if cur.nil?
+          return out
+        else
+          out << cur
+          prev2 = prev1
+          prev1 = cur
+        end
+      end
+    end
+    # Generates a sentence from a probability hash.
+    def self.sentence_from_probs_hash(probs, min_length)
+      sent = []
+      while score_sentence(sent, probs) > 30 || sent.length < min_length
+        sent = sample_sentence(probs)
+      end
+      sent = sent[0..-2].join(" ") + "."
+      sent
+    end
+    # Scores a sentence, depending on the likelihood that it occurs
+    # within a corpus.
+    def self.score_sentence(sent, probs)
+      total_surprise = 0
+      sent = sent[0..-2].split(" ").push(".")
+      sent = [nil, nil] + sent + [nil]
+      sent.zip(sent[1..-1], sent[2..-1]).each do |prev2, prev1, cur|
+        total_surprise += -Math.log(probs[[prev2, prev1]][cur], 2)
+      end
+      total_surprise
+    end
+  end
+end

data/lib/markovfun/util.rb ADDED Viewed

@@ -0,0 +1,27 @@
+require "markovfun/version"
+module Markovfun
+  module Util
+    # Gets lines from a file.
+    def self.get_lines(filename)
+      file = File.open(filename, "r")
+      data = file.read
+      file.close
+      lines = data.split("\n")
+      lines.map! { |l| l.strip.split(" ") }
+    end
+    # Gets sentences from a file.
+    def self.get_sentences(filename)
+      file = File.open(filename, "r")
+      data = file.read
+      file.close
+      data.gsub!(/\n/, " ")
+      data.gsub!(/"/,"")
+      sentences = data.split(".")
+      sentences.map! { |s| s.strip.split(" ").push(".") }
+      sentences.select! { |s| s[0].capitalize == s[0] }
+    end
+  end
+end

data/lib/markovfun/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Markovfun
-  VERSION = "0.0.2"
+  VERSION = "0.0.3"
 end

data/lib/markovfun.rb CHANGED Viewed

@@ -1,123 +1,2 @@
-require "markovfun/version"
-require 'pry'
-module Markovfun
-  # Generates a sentence, given a file.
-  def self.sentence_from_file(filename)
-    sentences = get_sentences(filename)
-    counts = buildcounts(sentences)
-    probs = counts_to_probs(counts)
-    sentence_from_probs_hash(probs)
-  end
-  # Gets lines from a file.
-  def self.get_lines(filename)
-    file = File.open(filename, "r")
-    data = file.read
-    file.close
-    lines = data.split("\n")
-    lines.map! { |l| l.strip.split(" ") }
-  end
-  # Gets sentences from a file.
-  def self.get_sentences(filename)
-    file = File.open(filename, "r")
-    data = file.read
-    file.close
-    data.gsub!(/\n/, "")
-    data.gsub!(/"/,"")
-    sentences = data.split(".")
-    sentences.map! { |s| s.strip.split(" ").push(".") }
-    sentences.select! { |s| s[0].capitalize == s[0] }
-  end
-  # Returns a counts hash, given a list of sentences.
-  # The keys to the hash are all observed combinations of [prev2, prev1],
-  # where prev2 and prev1 are the two previous words.
-  # The values are hashes, in which the keys are words (cur) that have followed
-  # prev2 and prev1, and the values are the number of occurrences.
-  def self.get_counts(sentences)
-    counts_hash = {}
-    sentences.each do |sent|
-      # nil denotes the beginnings and ends of sentences
-      sent = [nil, nil] + sent + [nil]
-      sent.zip(sent[1..-1], sent[2..-1]).each do |prev2, prev1, cur|
-        counts_hash[[prev2, prev1]] ||= {}
-        if !(counts_hash[[prev2, prev1]][cur])
-          counts_hash[[prev2, prev1]][cur] = 1
-        else
-          counts_hash[[prev2, prev1]][cur] += 1
-        end
-      end
-    end
-    counts_hash
-  end
-  # Generates a probability hash, given a counts hash.
-  # Similar to counts_hash, except containing the probability that a word
-  # follows two preceding words (as opposed to number of occurrences).
-  def self.counts_to_probs(counts_hash)
-    probs_hash = {}
-    counts_hash.each do |prev, cur_freq|
-      probs_hash[prev] ||= {}
-      cur_freq.each do |cur, freq|
-        prob = freq.to_f / cur_freq.values.reduce(:+)
-        probs_hash[prev][cur] = prob
-      end
-    end
-    probs_hash
-  end
-  # Generates a sample word, given a probability hash.
-  def self.sample_word(probs_hash)
-    score = rand
-    probs_hash.each do |word, prob|
-      return word if score < prob
-      score -= prob
-    end
-  end
-  # Generates a sample sentence, given a probability hash.
-  def self.sample_sentence(probs_hash)
-    prev2 = nil
-    prev1 = nil
-    out = []
-    while true
-      cur = sample_word(probs_hash[[prev2, prev1]])
-      if cur.nil?
-        return out
-      else
-        out << cur
-        prev2 = prev1
-        prev1 = cur
-      end
-    end
-  end
-  # Scores a sentence, depending on the likelihood that it occurs
-  # within a corpus.
-  def self.score_sentence(sent, probs)
-    total_surprise = 0
-    sent = [nil, nil] + sent + [nil]
-    sent.zip(sent[1..-1], sent[2..-1]).each do |prev2, prev1, cur|
-      total_surprise += -Math.log(probs[[prev2, prev1]][cur], 2)
-    end
-    total_surprise
-  end
-  # Generates a sentence from a probability hash.
-  def self.sentence_from_probs_hash(probs)
-    sent = []
-    while score_sentence(sent, probs) > 30 || sent.length < 4
-      sent = sample_sentence(probs)
-    end
-    puts "score: #{score_sentence(sent, probs)}"
-    sent = sent[0..-2].join(" ") + "."
-    puts sent
-    sent
-  end
-end
+require 'markovfun/trigram'
+require 'markovfun/util'

data/markovfun.gemspec CHANGED Viewed

@@ -2,6 +2,7 @@
 lib = File.expand_path('../lib', __FILE__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'markovfun/version'
+require 'pry'
 Gem::Specification.new do |spec|
   spec.name          = "markovfun"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: markovfun
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
   prerelease:
 platform: ruby
 authors:
@@ -56,6 +56,8 @@ files:
 - README.md
 - Rakefile
 - lib/markovfun.rb
+- lib/markovfun/trigram.rb
+- lib/markovfun/util.rb
 - lib/markovfun/version.rb
 - markovfun.gemspec
 homepage: https://github.com/mariapacana/markovfun