RubyGems - marky_markov - Versions diffs - 0.3.0 → 0.3.2 - Mend

marky_markov 0.3.0 → 0.3.2

Files changed (8) hide show

data/README.md +2 -2
data/lib/marky_markov.rb +12 -6
data/lib/marky_markov/markov_dictionary.rb +13 -10
data/lib/marky_markov/markov_sentence_generator.rb +59 -34
data/lib/marky_markov/persistent_dictionary.rb +4 -2
data/marky_markov.gemspec +3 -2
data/spec/marky_markov/marky_markov_spec.rb +64 -0
metadata +5 -4

data/README.md CHANGED Viewed

@@ -7,8 +7,8 @@ in Ruby. It can be used both from the command-line and as a library within your
 NOTE: 0.3.0 now uses arrays with multiple entries per word instead of a
 hash key for each word with the value representing number of occurences.
 While a less elegant solution, it leads to faster text generation. We
-are also now using msgpack instead of yajl-json to store the dictionary
-which should lead to faster usage in the command-line app.
+are also now using ox instead of yajl-json to store the dictionary
+as yajl-json does not appear to support arrays within hashes properly.
 NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
 ability to generate proper sentences (generate_n_sentences) instead of simply a

data/lib/marky_markov.rb CHANGED Viewed

@@ -4,12 +4,12 @@
 require_relative 'marky_markov/persistent_dictionary'
 require_relative 'marky_markov/markov_sentence_generator'
-# @version = 0.3.0
+# @version = 0.3.2
 # @author Matt Furden
 # Module containing TemporaryDictionary and Dictionary for creation of
 # Markov Chain Dictionaries and generating sentences from those dictionaries.
 module MarkyMarkov
-  VERSION = '0.3.0'
+  VERSION = '0.3.2'
   class TemporaryDictionary
     # Create a new Temporary Markov Chain Dictionary and sentence generator for use.
@@ -26,6 +26,12 @@ module MarkyMarkov
       @sentence = MarkovSentenceGenerator.new(@dictionary)
     end
+    # Returns the MarkovDictionary objects dictionary hash.
+    # @return [Hash] the MarkovDictionary hash.
+    def dictionary
+      @dictionary.dictionary
+    end
     # Parses a given file and adds the sentences it contains to the current dictionary.
     #
     # @example Open a text file and add its contents to the dictionary.
@@ -93,13 +99,13 @@ module MarkyMarkov
     end
     # @since 0.1.4
-    # Modify respond_to? to include generate_n_words and generate_n_sentences
+    # Modify respond_to_missing? to include generate_n_words and generate_n_sentences
     # method_missing implementation.
-    def respond_to?(method_sym, include_private = false)
+    def respond_to_missing?(method_sym, include_private)
       if method_sym.to_s =~ /^generate_(\d*)_word[s]*$/
-        generate_n_words($1.to_i)
+        true
       elsif method_sym.to_s =~ /^generate_(\d*)_sentence[s]*$/
-        generate_n_sentences($1.to_i)
+        true
       else
         super
       end

data/lib/marky_markov/markov_dictionary.rb CHANGED Viewed

@@ -1,19 +1,20 @@
 # @private
-class MarkovDictionary
-  attr_accessor :dictionary, :depth
-  def initialize(depth=2)
-    @dictionary = {}
+class MarkovDictionary # :nodoc:
+  attr_reader :dictionary, :depth
+  def initialize(depth=2) @dictionary = {}
     @depth = depth
+    @split_words = /([.?!])|[\s]+/
+    @split_sentence = /(?<=[.!?])\s+/
   end
   # If File does not exist.
-  class FileNotFoundError < Exception
+  class FileNotFoundError < Exception # :nodoc:
   end
   # Open supplied text file:
   def open_source(source)
     if File.exists?(source)
-      File.open(source, "r").read.split
+      File.open(source, "r").read.split(@split_sentence)
     else
       raise FileNotFoundError.new("#{source} does not exist!")
     end
@@ -36,10 +37,12 @@ class MarkovDictionary
   # @example Add a string
   #   parse_source("Hi, how are you doing?", false)
   def parse_source(source, file=true)
-    contents = file ? open_source(source) : contents = source.split
-    contents.each_cons(@depth+1) do |words|
-       self.add_word(words[0..-2], words[-1])
+    contents = file ? open_source(source) : contents = source.split(@split_sentence)
+    contents.map! {|sentence| sentence.gsub(/["()]/,"")}
+    contents.each do |sentence|
+      sentence.split(@split_words).each_cons(@depth+1) do |words|
+        self.add_word(words[0..-2], words[-1])
+      end
     end
-    @dictionary[contents.last(@depth)] ||= []
   end
 end

data/lib/marky_markov/markov_sentence_generator.rb CHANGED Viewed

@@ -1,47 +1,67 @@
+# Messing about with the NullObject pattern, can't apply it in too many
+# places in this one. Need to evaluate what else could be used in this
+# aside from my first instinct of defaulting to []
 # @private
-class MarkovSentenceGenerator
+class NullObject # :nodoc:
+  def method_missing (*args, &block)
+    self
+  end
+  def nil?; true; end
+  def <<(*); end
+  def to_str; end
+  def to_ary; []; end
+end
+# @private
+NULL_OBJECT = NullObject.new # :nodoc:
+# @private
+class MarkovSentenceGenerator # :nodoc:
   def initialize(dictionary)
     @dictionary = dictionary
     @depth = @dictionary.depth
   end
-  # Returns a random word via picking a random key from the dictionary.
+  # Returns a random word vsjfia picking a random key from the dictionary.
   # In the case of the TwoWordDictionary, it returns two words to ensure
   # that the sentence will have a valid two word string to pick the next
   # word from.
-  #
+  # wordslength
   # @return [String] a string containing a random dictionary key.
   def random_word
-    keys = @dictionary.dictionary.keys
-    keys[rand(keys.length)]
+    words = @dictionary.dictionary.keys
+    words[rand(words.length)]
   end
   # Generates a random capitalized word via picking a random key from the
   # dictionary and recurring if the word is lowercase.
   #
   # (see #random_word)
-  def random_capitalized_word(attempts=0)
-    keys = @dictionary.dictionary.keys
-    x = keys[rand(keys.length)]
-    if /[A-Z]/ =~ x[0]
-      return x
-    elsif attempts < 30
-      # If you don't find a capitalized word after 30 attempts, just use
-      # a lowercase word as there may be no capitals in the dicationary.
-      random_capitalized_word(attempts+1)
-    else
-      random_word
+  def random_capitalized_word
+    attempts = 0
+    # If you don't find a capitalized word after 15 attempts, just use
+    # a lowercase word as there may be no capitals in the dicationary.
+    until attempts > 15
+      attempts += 1
+      words = @dictionary.dictionary.keys
+      random_choice = words[rand(words.length)]
+      if random_choice[0] =~ /[A-Z]/
+        return random_choice
+      end
     end
+    random_word
   end
-  # Returns a word based upon the likelyhood of it appearing after the supplied word.
-  #
+  # Returns a word based upon the likelihood of it appearing after the supplied word.
+  #
   def weighted_random(lastword)
     # If word has no words in its dictionary (last word in source text file)
     # have it pick a random word to display instead.
-    if word = @dictionary.dictionary[lastword]
-      word.sample
-    end
+    @dictionary.dictionary.fetch(lastword, NULL_OBJECT).sample
+  end
+  def punctuation?(word)
+    word =~ /[.!?]/
   end
   # Generates a sentence of (wordcount) length using the weighted_random function.
@@ -52,10 +72,13 @@ class MarkovSentenceGenerator
     sentence = []
     sentence.concat(random_capitalized_word)
     (wordcount-1).times do
-      if word = weighted_random(sentence.last(@depth))
-        sentence << word
-      else
+      word = weighted_random(sentence.last(@depth))
+      if punctuation?(word[0])
+        sentence[-1] = sentence.last.dup << word
+      elsif word.nil?
         sentence.concat(random_capitalized_word)
+      else
+        sentence << word
       end
     end
     sentence.pop(sentence.length-wordcount)
@@ -68,19 +91,21 @@ class MarkovSentenceGenerator
   # @return [String] the sentence(s) generated.
   def generate_sentence(sentencecount)
     sentence = []
+    # Find out how many actual keys are in the dictionary.
+    key_count = @dictionary.dictionary.keys.length
+    # If less than 30 keys, use that plus five as your maximum sentence length.
+    maximum_length = key_count < 30 ? key_count + 5 : 30
     sentencecount.times do
-      # Find out how many actual keys are in the dictionary.
-      key_count = @dictionary.dictionary.keys.length
-      # If less than 30 keys, use that plus five as your maximum sentence length.
-      maximum_length = key_count < 30 ? key_count + 5 : 30
-      stop_at_index = sentence.count + maximum_length
+      wordcount = 0
       sentence.concat(random_capitalized_word)
-      until (/[.!?]/ =~ sentence.last[-1])
+      until (punctuation?(sentence.last[-1])) || wordcount > maximum_length
+        wordcount += 1
         word = weighted_random(sentence.last(@depth))
-        sentence << word unless word.nil?
-        # If a word ending with a . ! or ?  isn't found after 30 words,
-        # just add a period as there may be none in the dictionary.
-        sentence[-1] << "." if word.nil? || sentence.count > stop_at_index
+        if punctuation?(word)
+          sentence[-1] = sentence.last.dup << word
+        else
+          sentence << word
+        end
       end
     end
     sentence.join(' ')

data/lib/marky_markov/persistent_dictionary.rb CHANGED Viewed

@@ -2,9 +2,9 @@ require 'ox'
 require_relative 'markov_dictionary'
 # @private
-class PersistentDictionary < MarkovDictionary
+class PersistentDictionary < MarkovDictionary # :nodoc:
-  class DepthNotInRangeError < Exception
+  class DepthNotInRangeError < Exception # :nodoc:
   end
   # Creates a PersistentDictionary object using the supplied dictionary file.
@@ -19,6 +19,8 @@ class PersistentDictionary < MarkovDictionary
       raise DepthNotInRangeError.new("Depth must be between 1 and 5")
     end
     @dictionarylocation = dictionary
+    @split_words = /([.?!])|[\s]+/
+    @split_sentence = /(?<=[.!?])\s+/
     self.open_dictionary
   end

data/marky_markov.gemspec CHANGED Viewed

@@ -13,8 +13,8 @@ Gem::Specification.new do |s|
   ## If your rubyforge_project name is different, then edit it and comment out
   ## the sub! line in the Rakefile
   s.name              = 'marky_markov'
-  s.version           = '0.3.0'
-  s.date              = '2012-02-09'
+  s.version           = '0.3.2'
+  s.date              = '2012-02-12'
   s.rubyforge_project = 'marky_markov'
   ## Make sure your summary is short. The description may be as long
@@ -69,6 +69,7 @@ Gem::Specification.new do |s|
     marky_markov.gemspec
     spec/marky_markov/markov_dictionary_spec.rb
     spec/marky_markov/markov_sentence_generator_spec.rb
+    spec/marky_markov/marky_markov_spec.rb
     spec/marky_markov/persistent_dictionary_spec.rb
     spec/spec_helper.rb
     spec/test.txt

data/spec/marky_markov/marky_markov_spec.rb ADDED Viewed

@@ -0,0 +1,64 @@
+require 'spec_helper'
+describe MarkyMarkov do
+  context "TemporaryDictionary" do
+    before(:each) do
+      @textsource = "spec/test.txt"
+      @dictionary = MarkyMarkov::TemporaryDictionary.new
+      @onedictcompare = { ["The"] => ["cat"],
+                       ["cat"] => ["likes"],
+                     ["likes"] => ["pie"],
+                       ["pie"] => ["and"],
+                       ["and"] => ["chainsaws"],
+                 ["chainsaws"] => []}
+      @twodictcompare = {["The", "cat"] => ["likes"],
+                   ["and", "chainsaws"] => [],
+                       ["cat", "likes"] => ["pie"],
+                       ["likes", "pie"] => ["and"],
+                         ["pie", "and"] => ["chainsaws"]}
+    end
+    it "should be able to parse a string" do
+      @dictionary.parse_string "The cat likes pie and chainsaws"
+      @dictionary.dictionary.should eql(@twodictcompare)
+    end
+    it "should generate the right number of sentences" do
+    end
+    it "should create the right number of words" do
+    end
+  end
+  context "PersistentDictionary" do
+    before(:each) do
+      @textsource = "spec/test.txt"
+      @dictionary = MarkyMarkov::Dictionary.new(@textsource)
+      @onedictcompare = { ["The"] => ["cat"],
+                       ["cat"] => ["likes"],
+                     ["likes"] => ["pie"],
+                       ["pie"] => ["and"],
+                       ["and"] => ["chainsaws"],
+                 ["chainsaws"] => []}
+      @twodictcompare = {["The", "cat"] => ["likes"],
+                   ["and", "chainsaws"] => [],
+                       ["cat", "likes"] => ["pie"],
+                       ["likes", "pie"] => ["and"],
+                         ["pie", "and"] => ["chainsaws"]}
+    end
+    it "should load the saved dictionary" do
+    end
+    it "should be able to parse a string" do
+      @dictionary.parse_string "The cat likes pie and chainsaws"
+      @dictionary.dictionary.should include(@twodictcompare)
+    end
+    it "should generate the right number of sentences" do
+    end
+    it "should create the right number of words" do
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: marky_markov
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.3.2
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-02-09 00:00:00.000000000 Z
+date: 2012-02-12 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ox
-  requirement: &70184695396840 !ruby/object:Gem::Requirement
+  requirement: &70164029504260 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -21,7 +21,7 @@ dependencies:
         version: '1.4'
   type: :runtime
   prerelease: false
-  version_requirements: *70184695396840
+  version_requirements: *70164029504260
 description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
   input from\n  either a source file or a string. While usable as a module in your
   code it can also be called on\n  from the command line and piped into like a standard
@@ -42,6 +42,7 @@ files:
 - marky_markov.gemspec
 - spec/marky_markov/markov_dictionary_spec.rb
 - spec/marky_markov/markov_sentence_generator_spec.rb
+- spec/marky_markov/marky_markov_spec.rb
 - spec/marky_markov/persistent_dictionary_spec.rb
 - spec/spec_helper.rb
 - spec/test.txt