RubyGems - markov_chains - Versions diffs - 0.0.0 → 0.1.0 - Mend

markov_chains 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/Gemfile +2 -0
data/README.md +5 -0
data/lib/markov_chains/dictionary.rb +57 -49
data/lib/markov_chains/generator.rb +9 -10
data/lib/markov_chains/version.rb +1 -1
data/test/test_dictionary.rb +6 -6
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 045674e5b0e005db622a8d1c6bae356074e88049
-  data.tar.gz: a52da2defd03a2cc01f5d8312cf5481cd27b2b20
+  metadata.gz: ee16106b2590f0bc8e9eda0d97a5fcc271ee22f3
+  data.tar.gz: 6858012c3c1f13b9f29992dcf4b61a955424d687
 SHA512:
-  metadata.gz: cf4da09f7460ecf5506a17eaeb2fde1ea04ea72dbc4d2f1f951d3f9b79a37a818168f9e4d796f56e1d921aed5f08be3d32427ae7f6e2ef46ffa26c6b9340711f
-  data.tar.gz: e40a5e57101f9bfd448e2ee9520ec240ecfaa3236e30ec31150af23a12a23d15be2f3211a58a1811bc60fba2999e5ca985bb8eec769cfcd8a218f62ccc6995d2
+  metadata.gz: d52d880b9f62e9eab776176cd03bc5513442a078fa992a5c0f157a1a15465d63a08386cfa15c0bc50801fef944809c824892bc495de566de02c8cf85447119f4
+  data.tar.gz: b80a72e8a71bb2c1b90355b1347e7b41032794716ad9cab3ad820062d32634265e9bd33e4b3de1ea6c1a647bc06cf063a8a21466763a235d97207930dbfb3a3a

data/.gitignore CHANGED Viewed

@@ -12,3 +12,4 @@
 *.o
 *.a
 mkmf.log
+*.gem

data/Gemfile CHANGED Viewed

@@ -2,3 +2,5 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in markov-chains.gemspec
 gemspec
+gem 'minitest'

data/README.md CHANGED Viewed

@@ -29,6 +29,11 @@ Generate a number of sentences, here 5:
     generator.get_sentences(5)
+## Change log
+* Version 0.1.0 - Added order attribute in the generator for higher-order markov chains and rewrote the most of the internal code to handle order
+* Version 0.0.0 - Initial release
 ## Contributing
 1. Fork it ( https://github.com/[justindomingue]/markov-chains/fork )

data/lib/markov_chains/dictionary.rb CHANGED Viewed

@@ -1,75 +1,83 @@
 module MarkovChains
   class Dictionary
+    attr_reader :order
     # Initialized the dictionary with a text source.
     #
-    # @example Create a new dictionary
+    # @example Create a new dictionary of order 1
     #   MarkovChains::Dictionary.new(string)
+    # @example Create a new dictinary of order 2
+    #   MarkovChains::Dictionary.new(string, 2)
+    #
     # @param [String] the text source
+    # @param [int] the order of the dictionary. The order is the "memory" of the dictionary, meaning that an order <n> dictionary will consider <n> words to generate the next one. Order of 1 or 2 are typical. More than 3 and the generated sentences will be the same as the source.
     #
-    def initialize(text)
-      @words = Hash.new
+    def initialize(text, order = 1)
+      @order = order
+      @words_for = Hash.new
       @start_words = Array.new
-      wordlist = text.split
+      # Standarize input text
+      text.delete! "\n"
-      # Add first word as possible start word
-      @start_words.push wordlist[0]
+      # Process each sentence
-      # Process each word
-      wordlist.each_with_index do |word, index|
-          add(word, wordlist[index + 1]) if index <= wordlist.size - 2
-      end
+      # <sentences> has format sentence+terminator:
+      #   ["sent1", "term1", "sent2", "term2", ...]
+      seps = /([.!?]+)/
+      sentences = text.split seps
+      sentences.each_slice(2) { |s,t| process_sentence(s.strip,t) }
     end
-    # Returns a word based on the likelihood of it appearing after the input word
+    # Processes a single sentence with terminator
     #
-    # @example Get a word likely to appear next to the word 'It'
-    #   get('It')
+    # @example Process a sentence
+    #   process_sentence("It is sunny today", "!")
     #
-    # @param [String] word for which we want a possible next word
-    # @return [String] word that is likely to follow the input word
+    # @param [String] sentence to process
+    # @param [Character] sentence terminator
     #
-    def get(word)
-      return "" if !@words[word]
-      followers = @words[word]
-      sum = followers.inject(0) {|sum,kv| sum += kv[1]}
-      random = rand(sum)+1
-      partial_sum = 0
-      next_word = followers.find do |word, count|
-        partial_sum += count
-        partial_sum >= random
-      end.first
-      next_word
+    private def process_sentence(sentence, terminator)
+      # Consider phrases/words/clauses separators when splitting
+      seps = "([,;:])"
+      # Split <sentence> into words
+      words = sentence.gsub(/[^#{seps}\w'\s]/, "").gsub(/(#{seps})\s+/, '\1').split(/\s+|#{seps}/)
+      words << terminator
+      # Add <@order> start words to the list
+      @start_words << words[0, @order]
+      # Add the words to the frequency hash <words_for>
+      until words.size < @order + 1 do
+        (@words_for[words[0, @order]] ||= []) << words[@order]
+        words.shift
+      end
     end
-    # Returns a word beginning a sentence seen in the source
+    # Returns a word based on the likelihood of it appearing after the input array of words
     #
-    # @example Get a start word
-    #   get_start_word()
-    # @return [String] a possible start word
+    # @example Get a word likely to appear next to the word 'It'
+    #   get(['It'])           # => 'has'
+    # @example Get a word likely to appear next to the words 'It has been' (with a dictionary of order 2)
+    #   get(['It has'])  # => 'been'
     #
-    def get_start_word
-      @start_words.sample
+    # @param [[String]] array of words for which we want a possible next word
+    # @return [String] word that is likely to follow the input
+    #
+    def get(words)
+      (@words_for[words] || []).sample
     end
-  private
-    # Adds word-next_word combination to the dictionary
+    # Returns a list of words beginning a sentence seen in the source
     #
-    # @example Adding a word in a order 1 dictionary
-    #   add_word("It", "has")
-    # @example Adding words in a order m dictionary
-    #   add_word("It has", "been")
+    # @example Get a start words
+    #   get_start_word    # => ['It', 'has']
     #
-    # @param word [string] root word
-    # @param next_word [string] word following the root word
+    # @return [[String]] array of words that could start a sentence
     #
-    def add(word, next_word)
-      @words[word] = Hash.new(0) if !@words[word]
-      @words[word][next_word] += 1
-      @start_words.push(next_word) if word.end_with? '.' and !word.nil?
+    def get_start_words
+      @start_words.sample
     end
   end
-end
+end

data/lib/markov_chains/generator.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module MarkovChains
   class Generator
     # Initializes the generator
     #
     # @example Create a new generator
@@ -8,8 +8,8 @@ module MarkovChains
     #
     # @param Text source to generate sentences from
     #
-    def initialize(text)
-      @dict = MarkovChains::Dictionary.new(text)
+    def initialize(text, order = 1)
+      @dict = MarkovChains::Dictionary.new(text, order)
     end
     # Returns a given number of randonly generated sentences
@@ -24,17 +24,16 @@ module MarkovChains
       sentences = []
       n.times do
-        sentence = ""
-        word = @dict.get_start_word
-        until sentence.scan(/\.|\?|!/).size == 1
-          sentence << word << " "
-          word = @dict.get(word)
+        sentence = @dict.get_start_words
+        while nw = @dict.get(sentence[-@dict.order, @dict.order])
+          sentence << nw
         end
-        sentences.push sentence
+        sentences << (sentence[0...-1].join(" ").gsub(/\s([,;:])/, '\1') << sentence.last)
       end
       sentences
     end
   end
-end
+end

data/lib/markov_chains/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module MarkovChains
-  VERSION = '0.0.0'
+  VERSION = '0.1.0'
 end

data/test/test_dictionary.rb CHANGED Viewed

@@ -4,16 +4,16 @@ require 'markov_chains'
 class DictionaryTest < Minitest::Test
   def setup
     source = "Why did the chicken cross the road?"
-    @dict = MarkovChains::Dictionary.new(source)
+    @order1 = MarkovChains::Dictionary.new(source)
   end
   def test_get
-    assert_equal "did", @dict.get("Why")
-    assert_equal true, %w(chicken road).include?( @dict.get("the"))
-    assert_equal "", @dict.get("Not there...")
+    assert_equal "did", @order1.get(["Why"])
+    assert_equal true, %w(chicken road).include?(@order1.get(["the"]))
+    assert_equal nil, @order1.get(["Not there..."])
   end
   def test_get_start_word
-    assert_equal "Why", @dict.get_start_word()
+    assert_equal ["Why"], @order1.get_start_words
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: markov_chains
 version: !ruby/object:Gem::Version
-  version: 0.0.0
+  version: 0.1.0
 platform: ruby
 authors:
 - justindomingue
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-12-30 00:00:00.000000000 Z
+date: 2015-01-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler