RubyGems - markov-generator - Versions diffs - 0.10.0 → 0.11.0 - Mend

markov-generator 0.10.0 → 0.11.0

Files changed (16) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 09d0c8e708f75e9c610108c700c74fb6f9db6dbc
-  data.tar.gz: 7173ff93d857e356b0149a33de3743392264cff5
+  metadata.gz: d847256699f3f91abcbf0302c2be407bf97cb653
+  data.tar.gz: 19e95af2ef8f231c56f1ac9e76716518c1f8b057
 SHA512:
-  metadata.gz: 53cf62b9c8ec50f55112fe94e6e55eac01e3ebb5ffcff56c7a896f148761ce681d4f6968bf42190bceecf8175ea8b58fa0a8af4f13806e42b1852e2cf667f17d
-  data.tar.gz: c3d757b5b3841b47ae2df2b7e0236fb03ca7fadf60556cad53a245727aadbbe808859f03855d2567a52d6c66f3d6d90d2bf1ff46f12d3607ef87608a382f4ff8
+  metadata.gz: f06aef8afefd6f20daeee4cb77d7a72daa87126331caeaa9deeda9d5beef1f77c2908d59167bb91b66e170fa92fdffdb6b464b92506f43c03563a65d98ae0737
+  data.tar.gz: 997fbd9590015db2bb8137baee2f83488a1979367ef2e516a322d67f871369d8a0e394d02241e67c76cbb933b34c16977f48226d41ee95fc4d442a33fd386998

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.10.0
1	+ 0.11.0

data/lib/markov.rb CHANGED

@@ -1,8 +1,10 @@
 module Markov
+  require 'markov/util'
   require 'markov/token'
   require 'markov/parser'
+  require 'markov/dictionary'
   require 'markov/generator'
   def generator(depth=3)

data/lib/markov/dictionary.rb ADDED

@@ -0,0 +1,76 @@
+require 'securerandom'
+class Markov::Dictionary
+  include Markov::Util
+  def initialize(depth)
+    @depth = depth
+    @dictionary = {}
+    @start_words = {}
+    srand
+  end
+  def empty?
+    @dictionary.empty?
+  end
+  def dump_startwords
+    @start_words.keys.each do |start_words|
+      puts "#{start_words} -> #{tokens_to_sentence @dictionary[start_words]}"
+    end
+  end
+  def dump_dictionary
+    @dictionary.keys.each do |keys|
+      following = @dictionary[keys]
+      sentence = []
+      following.each do |word|
+        sentence << "#{word.to_s},"
+      end
+      s = sentence.join(" ")
+      puts "#{keys} => #{s.slice(0,s.length-1)}"
+    end
+  end
+  def add_to_start_words(tokens)
+    return if tokens[0].kind != :word
+    tokens[0].word = tokens[0].word.capitalize
+    start_words = tokens_to_words tokens
+    @start_words[start_words] ||= tokens
+  end
+  def add_to_dictionary(tokens)
+    token = tokens.last
+    return if token == nil || token.word == ""
+    key_words = tokens_to_words tokens[0, @depth-1]
+    @dictionary[key_words] ||= []
+    @dictionary[key_words] << token
+  end
+  def select_start_words
+    @start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
+  end
+  def select_next_token(tokens)
+    token = @dictionary[ tokens_to_words(tokens)]
+    return Markov::Token.new("X", :noop) if token == nil
+    token[random_number(tokens.length-1)]
+  end
+  def select_next_word(tokens)
+    token = nil
+    begin
+      token = select_next_token(tokens)
+    end until token.kind == :word
+    token
+  end
+end

data/lib/markov/generator.rb CHANGED

@@ -2,15 +2,15 @@
 require 'securerandom'
 class Markov::Generator
+  include Markov::Util
   def initialize(depth)
     @depth = depth
-    @dictionary = {}
-    @start_words = {}
     @unparsed_sentences = []
     @tokens = []
+    @dict = Markov::Dictionary.new(depth)
     srand
   end
@@ -35,8 +35,8 @@ class Markov::Generator
           # need to store the words in both the dictionary
           # and the list of start words
-          add_to_start_words word_seq[0, @depth-1]
-          add_to_dictionary word_seq
+          @dict.add_to_start_words word_seq[0, @depth-1]
+          @dict.add_to_dictionary word_seq
           token = parser.next_token
           state = :sentence
@@ -48,10 +48,10 @@ class Markov::Generator
           word_seq << token
           # add to the dictionary
-          add_to_dictionary word_seq
+          @dict.add_to_dictionary word_seq
           # stop current sequence and start again
-          if token.kind == :stop
+          if token == nil || token.kind == :stop
             word_seq = []
             state = :start
           end
@@ -61,12 +61,13 @@ class Markov::Generator
     rescue => e
       # nothing to rescue
       puts e
+      puts e.backtrace
     end
   end # end parse_text
-  def generate_sentence(min_length=20)
-    if @dictionary.empty?
+  def generate_sentence(min_length=15)
+    if @dict.empty?
       raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
     end
@@ -74,42 +75,54 @@ class Markov::Generator
     complete_sentence = false
     # initialize
-    select_start_words.each {|w| tokens << w}
+    @dict.select_start_words.each {|w| tokens << w}
     prev_token = tokens.last
     begin
-      token =  select_next_token tokens.last(@depth-1)
+      token =  @dict.select_next_token tokens.last(@depth-1)
-      if token.kind == :stop
-        token =  select_next_word tokens.last(@depth-1) if prev_token.kind == :special
+      if token.kind == :word
         tokens << token
+        prev_token = token
       elsif token.kind == :special
-        token =  select_next_word tokens.last(@depth-1) if prev_token.kind == :special
-        tokens << token
+        if prev_token.kind == :word
+          tokens << token
+          prev_token = token
+        end
+      elsif token.kind == :stop
+        if prev_token.kind == :word
+          tokens << token
+          prev_token = token
+        end
       elsif token.kind == :noop
-        token = Token.new(".", :stop)
-        tokens[tokens.length-1] = token
-      else
-        tokens << token
+        if prev_token.kind == :word
+          tokens << Markov::Token.new(".", :stop)
+        end
+        # start a new sentence
+        @dict.select_start_words.each {|w| tokens << w}
+        prev_token = tokens.last
       end
-      prev_token = token
+      if (token.kind == :stop) && (tokens.size > min_length)
+        #puts "-- DONE(#{tokens.size}) #{tokens_to_debug tokens}"
+        return tokens_to_sentence tokens
+      end
-      if token.kind == :stop
-        if tokens.size < min_length
-          select_start_words.each {|w| tokens << w}
-          prev_token = tokens.last
-        else
-          complete_sentence = true
-        end
+      # default circuit-breaker
+      if tokens.size > min_length * 4
+        # restart
+        tokens = []
+        complete_sentence = false
+        # initialize
+        @dict.select_start_words.each {|w| tokens << w}
+        prev_token = tokens.last
       end
-      # circuit-breaker
-      complete_sentence = true if tokens.size > min_length*2
     end until complete_sentence
     tokens_to_sentence tokens
-  end #end generate_sentence
+  end
   def dump_startwords
     @start_words.keys.each do |start_words|
@@ -118,80 +131,11 @@ class Markov::Generator
   end
   def dump_dictionary
-    @dictionary.keys.each do |keys|
-      following = @dictionary[keys]
-      sentence = []
-      following.each do |word|
-        sentence << "#{word.to_s},"
-      end
-      s = sentence.join(" ")
-      puts "#{keys} => #{s.slice(0,s.length-1)}"
-    end
-  end
-  private
-  def add_to_start_words(tokens)
-    return if tokens[0].kind != :word
-    tokens[0].word = tokens[0].word.capitalize
-    start_words = tokens_to_words tokens
-    @start_words[start_words] ||= tokens
+    @dict.dump_dictionary
   end
-  def add_to_dictionary(tokens)
-    token = tokens.last
-    return if token.word == ""
-    key_words = tokens_to_words tokens[0, @depth-1]
-    @dictionary[key_words] ||= []
-    @dictionary[key_words] << token
-  end
-  def tokens_to_words(tokens)
-    words = []
-    tokens.each do |t|
-      words << t.word
-    end
-    words
-  end
-  def tokens_to_sentence(tokens)
-    s = ""
-    tokens.each do |t|
-      if t.kind != :word
-        s << t.word
-      else
-        s << " " + t.word
-      end
-    end
-    s[1, s.length-1]
-  end
-  def select_start_words
-    @start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
-  end
-  def select_next_token(tokens)
-    token = @dictionary[ tokens_to_words(tokens)]
-    return Token.new("X", :noop) if token == nil
-    token[random_number(tokens.length-1)]
-  end
-  def select_next_word(tokens)
-    token = nil
-    begin
-      token = select_next_token(tokens)
-    end until token.kind == :word
-    token
-  end
-  def random_number(upper_limit)
-    (SecureRandom.random_number * upper_limit).to_i
+  def dump_startwords
+    @dict.dump_startwords
   end
 end

data/lib/markov/parser.rb CHANGED

@@ -59,7 +59,7 @@ class Markov::Parser
     return @tokens.slice!(0) if @tokens
     @tokens = []
-    nil
+    nil
   end # end next_token
   private

data/lib/markov/token.rb CHANGED

@@ -8,4 +8,15 @@ class Markov::Token < Struct.new(:word, :kind)
   def to_s
     "#{kind}(#{word})"
   end
+  def to_symbol
+    if kind == :word
+      "WORD"
+    elsif kind == :special
+      "S(#{word})"
+    else
+      "STOP(#{word})"
+    end
+  end
 end

data/lib/markov/util.rb ADDED

@@ -0,0 +1,44 @@
+require 'securerandom'
+module Markov::Util
+  def tokens_to_words(tokens)
+    words = []
+    tokens.each do |t|
+      words << t.word
+    end
+    words
+  end
+  def tokens_to_sentence(tokens)
+    s = ""
+    tokens.each do |t|
+      if t.kind != :word
+        s << t.word
+      else
+        s << " " + t.word
+      end
+    end
+    s[1, s.length-1]
+  end
+  def tokens_to_debug(tokens)
+    s = ""
+    tokens.each do |t|
+      if t.kind != :word
+        s << " " + t.to_symbol
+      else
+        s << " " + t.word
+      end
+    end
+    s[1, s.length-1]
+  end
+  def random_number(upper_limit)
+    (SecureRandom.random_number * upper_limit).to_i
+  end
+end

data/markov-generator.gemspec CHANGED

@@ -2,16 +2,16 @@
 # DO NOT EDIT THIS FILE DIRECTLY
 # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
 # -*- encoding: utf-8 -*-
-# stub: markov-generator 0.10.0 ruby lib
+# stub: markov-generator 0.11.0 ruby lib
 Gem::Specification.new do |s|
   s.name = "markov-generator"
-  s.version = "0.10.0"
+  s.version = "0.11.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.require_paths = ["lib"]
   s.authors = ["Michael Kuehl"]
-  s.date = "2016-01-11"
+  s.date = "2016-01-12"
   s.description = "A Markov Chain text generator library"
   s.email = "hello@ratchet.cc"
   s.extra_rdoc_files = [
@@ -29,14 +29,19 @@ Gem::Specification.new do |s|
     "Rakefile",
     "VERSION",
     "lib/markov.rb",
+    "lib/markov/dictionary.rb",
     "lib/markov/generator.rb",
     "lib/markov/parser.rb",
     "lib/markov/token.rb",
+    "lib/markov/util.rb",
     "markov-generator.gemspec",
     "test/generator_test.rb",
     "test/test_bulk_markov.rb",
     "test/test_markov.rb",
-    "test/test_parser.rb"
+    "test/test_parser.rb",
+    "test/texts/alice.txt",
+    "test/texts/cthulhu.txt",
+    "test/texts/grimm.txt"
   ]
   s.homepage = "http://github.com/ratchetcc/markov-generator"
   s.licenses = ["MIT"]

data/test/generator_test.rb CHANGED

@@ -1,15 +1,15 @@
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
-require 'markov/generator'
+require 'markov'
-markov = Markov::Generator.new
-markov.parse_source_file "./generator_test2.txt"
-markov.parse_source_file "./generator_test1.txt"
+markov = Markov.generator
+markov.parse_text "./test/texts/generator_test.txt"
+#markov.dump_startwords
 #markov.dump_dictionary
-#markov.dump_start_words
-markov.dump_dictionary_stats
+puts ""
 1..5.times do
   puts "#{markov.generate_sentence}"
+  puts ""
 end