RubyGems - marky_markov - Versions diffs - 0.3.0 → 0.3.2 - Mend

marky_markov 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/README.md +2 -2
data/lib/marky_markov.rb +12 -6
data/lib/marky_markov/markov_dictionary.rb +13 -10
data/lib/marky_markov/markov_sentence_generator.rb +59 -34
data/lib/marky_markov/persistent_dictionary.rb +4 -2
data/marky_markov.gemspec +3 -2
data/spec/marky_markov/marky_markov_spec.rb +64 -0
metadata +5 -4

data/README.md CHANGED Viewed

@@ -7,8 +7,8 @@ in Ruby. It can be used both from the command-line and as a library within your
 NOTE: 0.3.0 now uses arrays with multiple entries per word instead of a
 hash key for each word with the value representing number of occurences.
 While a less elegant solution, it leads to faster text generation. We
-are also now using msgpack instead of yajl-json to store the dictionary
-which should lead to faster usage in the command-line app.
+are also now using ox instead of yajl-json to store the dictionary
+as yajl-json does not appear to support arrays within hashes properly.
 NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
 ability to generate proper sentences (generate_n_sentences) instead of simply a

data/lib/marky_markov.rb CHANGED Viewed

@@ -4,12 +4,12 @@
 require_relative 'marky_markov/persistent_dictionary'
 require_relative 'marky_markov/markov_sentence_generator'
-# @version = 0.3.0
+# @version = 0.3.2
 # @author Matt Furden
 # Module containing TemporaryDictionary and Dictionary for creation of
 # Markov Chain Dictionaries and generating sentences from those dictionaries.
 module MarkyMarkov
-  VERSION = '0.3.0'
+  VERSION = '0.3.2'
   class TemporaryDictionary
     # Create a new Temporary Markov Chain Dictionary and sentence generator for use.
@@ -26,6 +26,12 @@ module MarkyMarkov
       @sentence = MarkovSentenceGenerator.new(@dictionary)
     end
+    # Returns the MarkovDictionary objects dictionary hash.
+    # @return [Hash] the MarkovDictionary hash.
+    def dictionary
+      @dictionary.dictionary
+    end
     # Parses a given file and adds the sentences it contains to the current dictionary.
     #
     # @example Open a text file and add its contents to the dictionary.
@@ -93,13 +99,13 @@ module MarkyMarkov
     end
     # @since 0.1.4
-    # Modify respond_to? to include generate_n_words and generate_n_sentences
+    # Modify respond_to_missing? to include generate_n_words and generate_n_sentences
     # method_missing implementation.
-    def respond_to?(method_sym, include_private = false)
+    def respond_to_missing?(method_sym, include_private)
       if method_sym.to_s =~ /^generate_(\d*)_word[s]*$/
-        generate_n_words($1.to_i)
+        true
       elsif method_sym.to_s =~ /^generate_(\d*)_sentence[s]*$/
-        generate_n_sentences($1.to_i)
+        true
       else
         super
       end

data/lib/marky_markov/markov_dictionary.rb CHANGED Viewed

@@ -1,19 +1,20 @@
 # @private
-class MarkovDictionary
-  attr_accessor :dictionary, :depth
-  def initialize(depth=2)
-    @dictionary = {}
+class MarkovDictionary # :nodoc:
+  attr_reader :dictionary, :depth
+  def initialize(depth=2) @dictionary = {}
     @depth = depth
+    @split_words = /([.?!])|[\s]+/
+    @split_sentence = /(?<=[.!?])\s+/
   end
   # If File does not exist.
-  class FileNotFoundError < Exception
+  class FileNotFoundError < Exception # :nodoc:
   end
   # Open supplied text file:
   def open_source(source)
     if File.exists?(source)
-      File.open(source, "r").read.split
+      File.open(source, "r").read.split(@split_sentence)
     else
       raise FileNotFoundError.new("#{source} does not exist!")
     end
@@ -36,10 +37,12 @@ class MarkovDictionary
   # @example Add a string
   #   parse_source("Hi, how are you doing?", false)
   def parse_source(source, file=true)
-    contents = file ? open_source(source) : contents = source.split
-    contents.each_cons(@depth+1) do |words|
-       self.add_word(words[0..-2], words[-1])
+    contents = file ? open_source(source) : contents = source.split(@split_sentence)
+    contents.map! {|sentence| sentence.gsub(/["()]/,"")}
+    contents.each do |sentence|
+      sentence.split(@split_words).each_cons(@depth+1) do |words|
+        self.add_word(words[0..-2], words[-1])
+      end
     end
-    @dictionary[contents.last(@depth)] ||= []
   end
 end

data/lib/marky_markov/markov_sentence_generator.rb CHANGED Viewed

@@ -1,47 +1,67 @@
+# Messing about with the NullObject pattern, can't apply it in too many
+# places in this one. Need to evaluate what else could be used in this
+# aside from my first instinct of defaulting to []
 # @private
-class MarkovSentenceGenerator
+class NullObject # :nodoc:
+  def method_missing (*args, &block)
+    self
+  end
+  def nil?; true; end
+  def <<(*); end
+  def to_str; end
+  def to_ary; []; end
+end
+# @private
+NULL_OBJECT = NullObject.new # :nodoc:
+# @private
+class MarkovSentenceGenerator # :nodoc:
   def initialize(dictionary)
     @dictionary = dictionary
     @depth = @dictionary.depth
   end
-  # Returns a random word via picking a random key from the dictionary.
+  # Returns a random word vsjfia picking a random key from the dictionary.
   # In the case of the TwoWordDictionary, it returns two words to ensure
   # that the sentence will have a valid two word string to pick the next
   # word from.
-  #
+  # wordslength
   # @return [String] a string containing a random dictionary key.
   def random_word
-    keys = @dictionary.dictionary.keys
-    keys[rand(keys.length)]
+    words = @dictionary.dictionary.keys
+    words[rand(words.length)]
   end
   # Generates a random capitalized word via picking a random key from the
   # dictionary and recurring if the word is lowercase.
   #
   # (see #random_word)
-  def random_capitalized_word(attempts=0)
-    keys = @dictionary.dictionary.keys
-    x = keys[rand(keys.length)]
-    if /[A-Z]/ =~ x[0]
-      return x
-    elsif attempts < 30
-      # If you don't find a capitalized word after 30 attempts, just use
-      # a lowercase word as there may be no capitals in the dicationary.
-      random_capitalized_word(attempts+1)
-    else
-      random_word
+  def random_capitalized_word
+    attempts = 0
+    # If you don't find a capitalized word after 15 attempts, just use
+    # a lowercase word as there may be no capitals in the dicationary.
+    until attempts > 15
+      attempts += 1
+      words = @dictionary.dictionary.keys
+      random_choice = words[rand(words.length)]
+      if random_choice[0] =~ /[A-Z]/
+        return random_choice
+      end
     end
+    random_word
   end
-  # Returns a word based upon the likelyhood of it appearing after the supplied word.
-  #
+  # Returns a word based upon the likelihood of it appearing after the supplied word.
+  #
   def weighted_random(lastword)
     # If word has no words in its dictionary (last word in source text file)
     # have it pick a random word to display instead.
-    if word = @dictionary.dictionary[lastword]
-      word.sample
-    end
+    @dictionary.dictionary.fetch(lastword, NULL_OBJECT).sample
+  end
+  def punctuation?(word)
+    word =~ /[.!?]/
   end
   # Generates a sentence of (wordcount) length using the weighted_random function.
@@ -52,10 +72,13 @@ class MarkovSentenceGenerator
     sentence = []
     sentence.concat(random_capitalized_word)
     (wordcount-1).times do
-      if word = weighted_random(sentence.last(@depth))
-        sentence << word
-      else
+      word = weighted_random(sentence.last(@depth))
+      if punctuation?(word[0])
+        sentence[-1] = sentence.last.dup << word
+      elsif word.nil?
         sentence.concat(random_capitalized_word)
+      else
+        sentence << word
       end
     end
     sentence.pop(sentence.length-wordcount)
@@ -68,19 +91,21 @@ class MarkovSentenceGenerator
   # @return [String] the sentence(s) generated.
   def generate_sentence(sentencecount)
     sentence = []
+    # Find out how many actual keys are in the dictionary.
+    key_count = @dictionary.dictionary.keys.length
+    # If less than 30 keys, use that plus five as your maximum sentence length.
+    maximum_length = key_count < 30 ? key_count + 5 : 30
     sentencecount.times do
-      # Find out how many actual keys are in the dictionary.
-      key_count = @dictionary.dictionary.keys.length
-      # If less than 30 keys, use that plus five as your maximum sentence length.
-      maximum_length = key_count < 30 ? key_count + 5 : 30
-      stop_at_index = sentence.count + maximum_length
+      wordcount = 0
       sentence.concat(random_capitalized_word)
-      until (/[.!?]/ =~ sentence.last[-1])
+      until (punctuation?(sentence.last[-1])) || wordcount > maximum_length
+        wordcount += 1
         word = weighted_random(sentence.last(@depth))
-        sentence << word unless word.nil?
-        # If a word ending with a . ! or ?  isn't found after 30 words,
-        # just add a period as there may be none in the dictionary.
-        sentence[-1] << "." if word.nil? || sentence.count > stop_at_index
+        if punctuation?(word)
+          sentence[-1] = sentence.last.dup << word
+        else
+          sentence << word
+        end
       end
     end
     sentence.join(' ')

data/lib/marky_markov/persistent_dictionary.rb CHANGED Viewed

@@ -2,9 +2,9 @@ require 'ox'
 require_relative 'markov_dictionary'
 # @private
-class PersistentDictionary < MarkovDictionary
+class PersistentDictionary < MarkovDictionary # :nodoc:
-  class DepthNotInRangeError < Exception
+  class DepthNotInRangeError < Exception # :nodoc:
   end
   # Creates a PersistentDictionary object using the supplied dictionary file.
@@ -19,6 +19,8 @@ class PersistentDictionary < MarkovDictionary
       raise DepthNotInRangeError.new("Depth must be between 1 and 5")
     end
     @dictionarylocation = dictionary
+    @split_words = /([.?!])|[\s]+/
+    @split_sentence = /(?<=[.!?])\s+/
     self.open_dictionary
   end

data/marky_markov.gemspec CHANGED Viewed

@@ -13,8 +13,8 @@ Gem::Specification.new do |s|
   ## If your rubyforge_project name is different, then edit it and comment out
   ## the sub! line in the Rakefile
   s.name              = 'marky_markov'
-  s.version           = '0.3.0'
-  s.date              = '2012-02-09'
+  s.version           = '0.3.2'
+  s.date              = '2012-02-12'
   s.rubyforge_project = 'marky_markov'
   ## Make sure your summary is short. The description may be as long
@@ -69,6 +69,7 @@ Gem::Specification.new do |s|
     marky_markov.gemspec
     spec/marky_markov/markov_dictionary_spec.rb
     spec/marky_markov/markov_sentence_generator_spec.rb
+    spec/marky_markov/marky_markov_spec.rb
     spec/marky_markov/persistent_dictionary_spec.rb
     spec/spec_helper.rb
     spec/test.txt

data/spec/marky_markov/marky_markov_spec.rb ADDED Viewed

@@ -0,0 +1,64 @@
+require 'spec_helper'
+describe MarkyMarkov do
+  context "TemporaryDictionary" do
+    before(:each) do
+      @textsource = "spec/test.txt"
+      @dictionary = MarkyMarkov::TemporaryDictionary.new
+      @onedictcompare = { ["The"] => ["cat"],
+                       ["cat"] => ["likes"],
+                     ["likes"] => ["pie"],
+                       ["pie"] => ["and"],
+                       ["and"] => ["chainsaws"],
+                 ["chainsaws"] => []}
+      @twodictcompare = {["The", "cat"] => ["likes"],
+                   ["and", "chainsaws"] => [],
+                       ["cat", "likes"] => ["pie"],
+                       ["likes", "pie"] => ["and"],
+                         ["pie", "and"] => ["chainsaws"]}
+    end
+    it "should be able to parse a string" do
+      @dictionary.parse_string "The cat likes pie and chainsaws"
+      @dictionary.dictionary.should eql(@twodictcompare)
+    end
+    it "should generate the right number of sentences" do
+    end
+    it "should create the right number of words" do
+    end
+  end
+  context "PersistentDictionary" do
+    before(:each) do
+      @textsource = "spec/test.txt"
+      @dictionary = MarkyMarkov::Dictionary.new(@textsource)
+      @onedictcompare = { ["The"] => ["cat"],
+                       ["cat"] => ["likes"],
+                     ["likes"] => ["pie"],
+                       ["pie"] => ["and"],
+                       ["and"] => ["chainsaws"],
+                 ["chainsaws"] => []}
+      @twodictcompare = {["The", "cat"] => ["likes"],
+                   ["and", "chainsaws"] => [],
+                       ["cat", "likes"] => ["pie"],
+                       ["likes", "pie"] => ["and"],
+                         ["pie", "and"] => ["chainsaws"]}
+    end
+    it "should load the saved dictionary" do
+    end
+    it "should be able to parse a string" do
+      @dictionary.parse_string "The cat likes pie and chainsaws"
+      @dictionary.dictionary.should include(@twodictcompare)
+    end
+    it "should generate the right number of sentences" do
+    end
+    it "should create the right number of words" do
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: marky_markov
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.3.2
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-02-09 00:00:00.000000000 Z
+date: 2012-02-12 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ox
-  requirement: &70184695396840 !ruby/object:Gem::Requirement
+  requirement: &70164029504260 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -21,7 +21,7 @@ dependencies:
         version: '1.4'
   type: :runtime
   prerelease: false
-  version_requirements: *70184695396840
+  version_requirements: *70164029504260
 description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
   input from\n  either a source file or a string. While usable as a module in your
   code it can also be called on\n  from the command line and piped into like a standard
@@ -42,6 +42,7 @@ files:
 - marky_markov.gemspec
 - spec/marky_markov/markov_dictionary_spec.rb
 - spec/marky_markov/markov_sentence_generator_spec.rb
+- spec/marky_markov/marky_markov_spec.rb
 - spec/marky_markov/persistent_dictionary_spec.rb
 - spec/spec_helper.rb
 - spec/test.txt