RubyGems - marky_markov - Versions diffs - 0.2.0 → 0.3.0 - Mend

marky_markov 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/README.md +11 -5
data/lib/marky_markov.rb +2 -2
data/lib/marky_markov/markov_dictionary.rb +4 -5
data/lib/marky_markov/markov_sentence_generator.rb +13 -15
data/lib/marky_markov/persistent_dictionary.rb +6 -7
data/marky_markov.gemspec +82 -0
data/spec/marky_markov/markov_dictionary_spec.rb +17 -28
data/spec/textdictcompare.mmd +35 -1
metadata +8 -10

data/README.md CHANGED Viewed

@@ -4,6 +4,12 @@ Marky Markov and the Funky Sentences
 Marky Markov is a naïve experiment in Markov Chain generation implemented
 in Ruby. It can be used both from the command-line and as a library within your code.
+NOTE: 0.3.0 now uses arrays with multiple entries per word instead of a
+hash key for each word with the value representing number of occurences.
+While a less elegant solution, it leads to faster text generation. We
+are also now using msgpack instead of yajl-json to store the dictionary
+which should lead to faster usage in the command-line app.
 NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
 ability to generate proper sentences (generate_n_sentences) instead of simply a
 maximum number of words. The command-line app has changed to sentences as its default
@@ -46,9 +52,9 @@ number in there, well, you can!
     markov.generate_20_words
 The default dictionary depth is two words.
- `{"I hope"     => {"this" => 1},
-  "hope this"  => {"makes" => 1},
-  "this makes" => {"sense" => 1}}`
+ `{["I", "hope"]    => ["this"],
+  ["hope", "this"]  => ["makes"],
+  ["this", "makes"] => ["sense"]}`
 but it can be set to a depth between 1 and 5 upon dictionary creation,
 though really any higher than 3 and it starts to simply print passages
 from the source text.
@@ -56,8 +62,8 @@ from the source text.
     markov = MarkyMarkov::Dictionary.new('dictionary', 3)
 creates a dictionary with a depth of three words.
-`{"I hope this" => {"makes" => 1},
-  "hope this makes" => {"sense" => 1}`
+`{["I", "hope", "this"]     => ["makes"],
+  ["hope", "this", "makes"] => ["sense"]`
 If you want to delete a dictionary you call it upon the Dictionary class itself while
 passing in the filename/location.

data/lib/marky_markov.rb CHANGED Viewed

@@ -4,12 +4,12 @@
 require_relative 'marky_markov/persistent_dictionary'
 require_relative 'marky_markov/markov_sentence_generator'
-# @version = 0.2.0
+# @version = 0.3.0
 # @author Matt Furden
 # Module containing TemporaryDictionary and Dictionary for creation of
 # Markov Chain Dictionaries and generating sentences from those dictionaries.
 module MarkyMarkov
-  VERSION = '0.2.0'
+  VERSION = '0.3.0'
   class TemporaryDictionary
     # Create a new Temporary Markov Chain Dictionary and sentence generator for use.

data/lib/marky_markov/markov_dictionary.rb CHANGED Viewed

@@ -24,9 +24,8 @@ class MarkovDictionary
   # @example Adding a word
   #   add_word("Hello", "world")
   def add_word(rootword, followedby)
-    @dictionary[rootword] ||= Hash.new(0)
-    @dictionary[rootword][followedby] ||= 0
-    @dictionary[rootword][followedby] += 1
+    @dictionary[rootword] ||= []
+    @dictionary[rootword] << followedby
   end
   # Given a source of text, be it a text file (file=true) or a string (file=false)
@@ -39,8 +38,8 @@ class MarkovDictionary
   def parse_source(source, file=true)
     contents = file ? open_source(source) : contents = source.split
     contents.each_cons(@depth+1) do |words|
-       self.add_word(words[0..-2].join(' '), words[-1])
+       self.add_word(words[0..-2], words[-1])
     end
-    @dictionary[contents.last(@depth).join(' ')] ||= Hash.new(0)
+    @dictionary[contents.last(@depth)] ||= []
   end
 end

data/lib/marky_markov/markov_sentence_generator.rb CHANGED Viewed

@@ -39,16 +39,9 @@ class MarkovSentenceGenerator
   def weighted_random(lastword)
     # If word has no words in its dictionary (last word in source text file)
     # have it pick a random word to display instead.
-    @dictionary.dictionary.fetch(lastword, random_word)
-      total = @dictionary.dictionary[lastword].values.inject(:+)
-      return random_word if total.nil?
-      random = rand(total)+1
-      @dictionary.dictionary[lastword].each do |word, occurs|
-        random -= occurs
-        if random <= 0
-          return word
-        end
-      end
+    if word = @dictionary.dictionary[lastword]
+      word.sample
+    end
   end
   # Generates a sentence of (wordcount) length using the weighted_random function.
@@ -57,9 +50,13 @@ class MarkovSentenceGenerator
   # @return [String] the words, hopefully forming sentences generated.
   def generate(wordcount)
     sentence = []
-    sentence.concat(random_capitalized_word.split)
+    sentence.concat(random_capitalized_word)
     (wordcount-1).times do
-      sentence.concat(weighted_random(sentence.last(@depth).join(' ')).split)
+      if word = weighted_random(sentence.last(@depth))
+        sentence << word
+      else
+        sentence.concat(random_capitalized_word)
+      end
     end
     sentence.pop(sentence.length-wordcount)
     sentence.join(' ')
@@ -77,12 +74,13 @@ class MarkovSentenceGenerator
       # If less than 30 keys, use that plus five as your maximum sentence length.
       maximum_length = key_count < 30 ? key_count + 5 : 30
       stop_at_index = sentence.count + maximum_length
-      sentence.concat(random_capitalized_word.split)
+      sentence.concat(random_capitalized_word)
       until (/[.!?]/ =~ sentence.last[-1])
-        sentence.concat(weighted_random(sentence.last(@depth).join(' ')).split)
+        word = weighted_random(sentence.last(@depth))
+        sentence << word unless word.nil?
         # If a word ending with a . ! or ?  isn't found after 30 words,
         # just add a period as there may be none in the dictionary.
-        sentence[-1] << "." if sentence.count > stop_at_index
+        sentence[-1] << "." if word.nil? || sentence.count > stop_at_index
       end
     end
     sentence.join(' ')

data/lib/marky_markov/persistent_dictionary.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-require 'yajl'
+require 'ox'
 require_relative 'markov_dictionary'
 # @private
@@ -28,10 +28,9 @@ class PersistentDictionary < MarkovDictionary
   # otherwise it creates an empty hash.
   def open_dictionary
     if File.exists?(@dictionarylocation)
-      File.open(@dictionarylocation,'r').each do |f|
-        @depth = f[0].to_i
-        @dictionary = Yajl::Parser.parse(f[1..-1])
-      end
+      file = File.new(@dictionarylocation, 'r').read
+        @depth = file[0].to_i
+        @dictionary = Ox.parse_obj(file[1..-1])
     else
       @dictionary = {}
     end
@@ -40,9 +39,9 @@ class PersistentDictionary < MarkovDictionary
   # Saves the PersistentDictionary objects @dictionary hash
   # to disk in JSON format.
   def save_dictionary!
-    json = Yajl::Encoder.encode(@dictionary)
+    packed = Ox.dump(@dictionary)
     File.open(@dictionarylocation, 'w') do |f|
-      f.puts @depth.to_s + json
+      f.write @depth.to_s + packed
     end
     true
   end

data/marky_markov.gemspec ADDED Viewed

@@ -0,0 +1,82 @@
+## This is the rakegem gemspec template. Make sure you read and understand
+## all of the comments. Some sections require modification, and others can
+## be deleted if you don't need them. Once you understand the contents of
+## this file, feel free to delete any comments that begin with two hash marks.
+## You can find comprehensive Gem::Specification documentation, at
+## http://docs.rubygems.org/read/chapter/20
+Gem::Specification.new do |s|
+  s.specification_version = 2 if s.respond_to? :specification_version=
+  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+  s.rubygems_version = '1.3.5'
+  ## Leave these as is they will be modified for you by the rake gemspec task.
+  ## If your rubyforge_project name is different, then edit it and comment out
+  ## the sub! line in the Rakefile
+  s.name              = 'marky_markov'
+  s.version           = '0.3.0'
+  s.date              = '2012-02-09'
+  s.rubyforge_project = 'marky_markov'
+  ## Make sure your summary is short. The description may be as long
+  ## as you like.
+  s.summary     = "Simple Markov Chain generation available in the command-line"
+  s.description = "MarkyMarkov makes it easy to generate simply Markov Chains based upon input from
+  either a source file or a string. While usable as a module in your code it can also be called on
+  from the command line and piped into like a standard UNIX application."
+  ## List the primary authors. If there are a bunch of authors, it's probably
+  ## better to set the email to an email list or something. If you don't have
+  ## a custom homepage, consider using your GitHub URL or the like.
+  s.authors  = ["Matt Furden"]
+  s.email    = 'mfurden@gmail.com'
+  s.homepage = 'http://www.thefurd.com'
+  ## This gets added to the $LOAD_PATH so that 'lib/NAME.rb' can be required as
+  ## require 'NAME.rb' or'/lib/NAME/file.rb' can be as require 'NAME/file.rb'
+  s.require_paths = %w[lib]
+  ## This sections is only necessary if you have C extensions.
+  #s.require_paths << 'ext'
+  #s.extensions = %w[ext/extconf.rb]
+  ## If your gem includes any executables, list them here.
+  s.executables = ["marky_markov"]
+  ## Specify any RDoc options here. You'll want to add your README and
+  ## LICENSE files to the extra_rdoc_files list.
+  s.rdoc_options = ["--charset=UTF-8"]
+  s.extra_rdoc_files = %w[README.md]
+  ## List your runtime dependencies here. Runtime dependencies are those
+  ## that are needed for an end user to actually USE your code.
+  s.add_dependency('ox', "~> 1.4")
+  ## List your development dependencies here. Development dependencies are
+  ## those that are only needed during development
+  #s.add_development_dependency('DEVDEPNAME', [">= 1.1.0", "< 2.0.0"])
+  ## Leave this section as-is. It will be automatically generated from the
+  ## contents of your Git repository via the gemspec task. DO NOT REMOVE
+  ## THE MANIFEST COMMENTS, they are used as delimiters by the task.
+  # = MANIFEST =
+  s.files = %w[
+    README.md
+    bin/marky_markov
+    lib/marky_markov.rb
+    lib/marky_markov/markov_dictionary.rb
+    lib/marky_markov/markov_sentence_generator.rb
+    lib/marky_markov/persistent_dictionary.rb
+    marky_markov.gemspec
+    spec/marky_markov/markov_dictionary_spec.rb
+    spec/marky_markov/markov_sentence_generator_spec.rb
+    spec/marky_markov/persistent_dictionary_spec.rb
+    spec/spec_helper.rb
+    spec/test.txt
+    spec/textdictcompare.mmd
+  ]
+  # = MANIFEST =
+  ## Test files will be grabbed from the file list. Make sure the path glob
+  ## matches what you actually use.
+  s.test_files = s.files.select { |path| path =~ /^test\/test_.*\.rb/ }
+end

data/spec/marky_markov/markov_dictionary_spec.rb CHANGED Viewed

@@ -5,19 +5,13 @@ describe MarkovDictionary do
     before(:each) do
       @onetextsource = "spec/test.txt"
       @onedict = MarkovDictionary.new(1)
-      @onedict.parse_source("Hello how are you doing today", false)
-      @onestringdict = {"Hello"   => {"how"        => 1},
-                        "how"     => {"are"        => 1},
-                        "are"     => {"you"        => 1},
-                        "you"     => {"doing"      => 1},
-                        "doing"   => {"today"      => 1},
-                        "today"   => {} }
-      @onetextdict = {"The" => {"cat"=>1},
-                      "and" => {"chainsaws"=>1},
-                      "cat" => {"likes"=>1},
-                      "chainsaws" => {},
-                      "likes" => {"pie"=>1},
-                      "pie" => {"and"=>1} }
+      @onedict.parse_source("The cat likes pie and chainsaws", false)
+      @onetextdict = { ["The"] => ["cat"],
+                       ["cat"] => ["likes"],
+                     ["likes"] => ["pie"],
+                       ["pie"] => ["and"],
+                       ["and"] => ["chainsaws"],
+                 ["chainsaws"] => []}
     end
     it "can open a file" do
@@ -30,8 +24,8 @@ describe MarkovDictionary do
     end
     it "can add a word to the dictionary" do
-      @onedict.add_word("to", "be")
-      @onedict.dictionary.should include("to" => {"be" => 1})
+      @onedict.add_word(["to"], "be")
+      @onedict.dictionary.should include(["to"] => ["be"])
     end
     it "create a dictionary via parsing a text file" do
@@ -41,7 +35,7 @@ describe MarkovDictionary do
     end
     it "builds a one word dictionary properly" do
-      @onedict.dictionary.should eql(@onestringdict)
+      @onedict.dictionary.should eql(@onetextdict)
     end
   end
@@ -50,21 +44,16 @@ describe MarkovDictionary do
       @twodict = MarkovDictionary.new
       @twodict.parse_source("The cat likes pie and chainsaws", false)
       @twotextsource = "spec/test.txt"
-      @twostringdict = { "The cat"       => { "likes"     => 1},
-                         "cat likes"     => { "pie"       => 1 },
-                         "likes pie"     => {"and"        => 1 },
-                         "pie and"       => { "chainsaws" => 1 },
-                         "and chainsaws" => {} }
-      @twotextdict = {"The cat"          => {"likes"      => 1},
-                      "cat likes"        => {"pie"        => 1},
-                      "likes pie"        => {"and"        => 1},
-                      "pie and"          => {"chainsaws"  => 1},
-                      "and chainsaws"    => {}}
+      @twotextdict = {["The", "cat"] => ["likes"],
+                  ["and", "chainsaws"] => [],
+                      ["cat", "likes"] => ["pie"],
+                      ["likes", "pie"] => ["and"],
+                        ["pie", "and"] => ["chainsaws"]}
     end
     it "can add a word to the two-word dictionary" do
-      @twodict.add_word("Zebras like", "kung-fu")
-      @twodict.dictionary.should eql(@twostringdict.merge( {"Zebras like" => {"kung-fu" => 1}} ))
+      @twodict.add_word(["Zebras", "like"], "kung-fu")
+      @twodict.dictionary.should eql(@twotextdict.merge( {["Zebras", "like"] => ["kung-fu"]} ))
     end
     it "create a two-word dictionary via parsing a text file" do

data/spec/textdictcompare.mmd CHANGED Viewed

@@ -1 +1,35 @@
-2{"The cat":{"likes":1},"cat likes":{"pie":1},"likes pie":{"and":1},"pie and":{"chainsaws":1},"and chainsaws":{}}
+2<h>
+  <a>
+    <s>The</s>
+    <s>cat</s>
+  </a>
+  <a>
+    <s>likes</s>
+  </a>
+  <a>
+    <s>cat</s>
+    <s>likes</s>
+  </a>
+  <a>
+    <s>pie</s>
+  </a>
+  <a>
+    <s>likes</s>
+    <s>pie</s>
+  </a>
+  <a>
+    <s>and</s>
+  </a>
+  <a>
+    <s>pie</s>
+    <s>and</s>
+  </a>
+  <a>
+    <s>chainsaws</s>
+  </a>
+  <a>
+    <s>and</s>
+    <s>chainsaws</s>
+  </a>
+  <a/>
+</h>

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: marky_markov
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.3.0
   prerelease:
 platform: ruby
 authors:
@@ -9,22 +9,19 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-02-08 00:00:00.000000000 Z
+date: 2012-02-09 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: yajl-ruby
-  requirement: &70254574670860 !ruby/object:Gem::Requirement
+  name: ox
+  requirement: &70184695396840 !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ! '>='
+    - - ~>
       - !ruby/object:Gem::Version
-        version: 1.1.0
-    - - <
-      - !ruby/object:Gem::Version
-        version: 2.0.0
+        version: '1.4'
   type: :runtime
   prerelease: false
-  version_requirements: *70254574670860
+  version_requirements: *70184695396840
 description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
   input from\n  either a source file or a string. While usable as a module in your
   code it can also be called on\n  from the command line and piped into like a standard
@@ -42,6 +39,7 @@ files:
 - lib/marky_markov/markov_dictionary.rb
 - lib/marky_markov/markov_sentence_generator.rb
 - lib/marky_markov/persistent_dictionary.rb
+- marky_markov.gemspec
 - spec/marky_markov/markov_dictionary_spec.rb
 - spec/marky_markov/markov_sentence_generator_spec.rb
 - spec/marky_markov/persistent_dictionary_spec.rb