RubyGems - my_first_markov - Versions diffs - 0.0.2 → 0.0.3 - Mend

my_first_markov 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/bin/my_first_markov.rb +13 -1
data/lib/my_first_markov/chain.rb +82 -15
data/lib/my_first_markov/version.rb +1 -1
data/test/sample_text.txt +5 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b8b6a08834fbbc7a6062885517e7288865901c91
-  data.tar.gz: 609072cc5f6d1d661e668a93ae3c8399c4483595
+  metadata.gz: 4118db4aac92b92a6507de32613bfebe291803b1
+  data.tar.gz: 0bca4ef8652db717d917657c50aaadab0a9c1c50
 SHA512:
-  metadata.gz: 49998299be2bee0c59d3916b341043a97d26e17e32e3e8c0af13f94d33ade05f781f518154bfbcd3eacacfa6a71953c0291f40fa2f630acee729eaacb53bfa12
-  data.tar.gz: 17eb6a296ebe795f099fa5f4ff581e77533d6f4d13a87bb3535e5e5c81415dffd74454dd1fd6b4d06ac0322f1f47c45266a0b3de7a2f1159494919817c056781
+  metadata.gz: 98b0d4ea9dd4ed5a29bd1590e383d6c012c6b72000fb7f1ff4330a95bd7efa5da526c5f7a0e7b0ead1c5fd403143a540324a8b65ac1166158887b118f64c60ef
+  data.tar.gz: a8cbb0c688c22a5ac68b0499f507f6f5a101a94091c2cec9f04f98ba58937b09997c8093708bc12a64143a591e7e9130b539d5989d01b8fef7bcf40a48605a5a

data/bin/my_first_markov.rb CHANGED Viewed

@@ -24,11 +24,23 @@ if File.basename(__FILE__) == File.basename($PROGRAM_NAME)
       $0  a                    ./test/sample_text.txt         [random_next]         character
       > p
+      OR
+      $0 <firstN> <file-glob of entry observations> <split_on: word* | character> <next_method: first>
     EOH
     warn(msg)
     exit
+  elsif starting_entry =~ /\-\-first/
+    next_method = starting_entry.dup[2..-1]
+    starting_entry = nil
+    #puts "calling MyFirstMarkov::Chain.file_to_entries(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
+    entries, _na, next_method, count = MyFirstMarkov::Chain.file_to_entries(file, split_on, starting_entry, next_method)
+    mc = MyFirstMarkov::Chain.new(entries)
+    puts mc.send(next_method, count)
+    exit
   end
-  puts "calling MyFirstMarkov::Chain.from_file(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
+  #puts "calling MyFirstMarkov::Chain.from_file(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
   puts MyFirstMarkov::Chain.from_file(file, split_on, starting_entry, next_method)
 end

data/lib/my_first_markov/chain.rb CHANGED Viewed

@@ -1,8 +1,14 @@
+# ./bin/my_first_markov.rb --first ./test/sample_text.txt
+# ./bin/my_first_markov.rb apple ./test/sample_text.txt
+# ./bin/my_first_markov.rb apple ./test/sample_text.txt most_likely_next
+require 'json'
 module MyFirstMarkov
   class Chain
+    DEFAULT_COUNT = 5
+    DEFAULT_DEBUG = true
     def self.next_methods
-      ["random_next", "most_likely_next"]
+      ["random_next", "most_likely_next", "first"]
     end
     def self.default_next_method
@@ -17,12 +23,29 @@ module MyFirstMarkov
       split_on_values.first
     end
+    def self.from_downcase_file(file, split_on, starting_entry, next_method)
+      entries, starting_entry, next_method, count = file_to_entries(file, split_on, starting_entry, next_method)
+      return from_entries(entries.map(&:downcase), starting_entry, next_method, count)
+    end
     def self.from_file(file, split_on, starting_entry, next_method)
+      from_entries(*file_to_entries(file, split_on, starting_entry, next_method))
+    end
+    def self.file_to_entries(file, split_on, starting_entry, next_method)
       unless split_on && MyFirstMarkov::Chain.split_on_values.include?(split_on.downcase)
         split_on = MyFirstMarkov::Chain.default_split_on_value
       end
-      unless next_method && MyFirstMarkov::Chain.next_methods.include?(next_method.downcase)
+      if next_method
+        if matches = next_method.match(/^(\D+)(\d+)$/)
+          next_method = matches[1]
+          count = matches[2]
+          unless MyFirstMarkov::Chain.next_methods.include?(next_method.downcase)
+            next_method = MyFirstMarkov::Chain.default_next_method
+          end
+        end
+      else
         next_method = MyFirstMarkov::Chain.default_next_method
       end
@@ -34,14 +57,20 @@ module MyFirstMarkov
       ("word" == split_on.downcase) ? entries = data.split : entries = data.split(//)
       entries ||= []
-      new(entries).send(next_method.downcase, starting_entry)
+      #puts "return [#{entries.inspect}, #{starting_entry.inspect}, #{next_method.inspect}, #{count || DEFAULT_COUNT}]"
+      return [entries, starting_entry, next_method, count || DEFAULT_COUNT]
+    end
+    def self.from_entries(entries, starting_entry, next_method, count)
+      new(entries).send(next_method.downcase, starting_entry, count)
     end
-    def initialize(ordered_list)
+    def initialize(ordered_entries, debug=DEFAULT_DEBUG)
+      @debug = debug
       @entries = Hash.new
-      ordered_list.each_with_index do |entry, index|
-        next_entry_idx = next_idx_or_nil(index, ordered_list.size)
-        add(entry, ordered_list[next_entry_idx]) if next_entry_idx
+      ordered_entries.each_with_index do |entry, index|
+        next_entry_idx = next_idx_or_nil(index, ordered_entries.size)
+        add(entry, ordered_entries[next_entry_idx]) if next_entry_idx
       end
     end
@@ -50,36 +79,74 @@ module MyFirstMarkov
       @entries[entry][next_entry] += 1
     end
-    def most_likely_next(entry)
+    def first(count=nil)
+      count ||= DEFAULT_COUNT
+      # @entries.keys.sort {|a,b| num_observations_for(b) <=> num_observations_for(a) }.take(count)
+      results = @entries.keys.reduce({}) { |memo, key|
+        memo[key] = num_observations_for(key); memo
+      }.sort { |a,b| num_observations_for(b.first) <=> num_observations_for(a.first) }
+        .take(count.to_i)
+      if (@debug)
+        results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
+      else
+        #results.first
+        results.map(&:first).to_json # the "entry" part, not the "num_observations"
+      end
+    end
+    def most_likely_next(entry, count=nil)
+      count ||= DEFAULT_COUNT
       _next(entry) do |observation_total, next_entries_and_observations|
-        next_entries_and_observations
+        results = next_entries_and_observations
           .sort {|a,b| b.last <=> a.last} # sort (in reverse) by observations
-          .first # choose an array with the largest observation (could be many with same #)
-          .first # the "entry" part, not the "num_observations"
+          .take(count.to_i) # choose the array(s) with the largest observation (could be many with same #)
+        if (@debug)
+          # debug:
+          results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
+        else
+          results.map(&:first).to_json # the "entry" part, not the "num_observations"
+        end
       end
     end
-    def random_next(entry)
+    def random_next(entry, count=nil)
+      count ||= 1
+      #puts "called w/ entry: #{entry.inspect}, count: #{count.inspect}"
       _next(entry) do |observation_total, next_entries_and_observations|
         random_threshold = rand(observation_total) + 1
         partial_observation_sum = 0
-        next_entries_and_observations.find { |next_entry, num_observations|
+        results = next_entries_and_observations.select { |next_entry, num_observations|
           partial_observation_sum += num_observations
           partial_observation_sum >= random_threshold
-        }.first # we want the "entry" not the "num_observations"
+        }.take(count.to_i)
+        if (@debug)
+          # debug:
+          #{ result.first => result.last }.to_json
+          results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
+        else
+          #result.first # the "entry" part, not the "num_observations"
+          results.map(&:first).to_json # the "entry" part, not the "num_observations"
+        end
       end
     end
     private
+    def num_observations_for(entry)
+      @entries[entry].reduce(0) {|sum,entry_observations| sum += entry_observations.last}
+    end
     def _next(entry, &block)
       return "" unless @entries.key?(entry)
       # remember each entry contains a hash of the form {subsequent_entry: num_of_observations, other_subsequent_entry: num_of_observaions, ...}
       # calling reduce on a hash converts to an array [[s_entry, observation_count], ...]
-      num_of_observations = @entries[entry].reduce(0) {|sum,entry_observations| sum += entry_observations.last}
+      num_of_observations = num_observations_for(entry)
       return block.call(num_of_observations, @entries[entry])
     end

data/lib/my_first_markov/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module MyFirstMarkov
-  VERSION = "0.0.2"
+  VERSION = "0.0.3"
 end

data/test/sample_text.txt CHANGED Viewed

@@ -1 +1,5 @@
-apple this apple is a text\nthis apple was a text\nthis is a big apple text\nthis could be an apple big old apple text\nthis apple is not apple  text apple
+apple this apple is a text
+this apple was a text
+this is a big apple text
+this could be an apple big old apple text
+this apple is not apple  text apple

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: my_first_markov
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
 platform: ruby
 authors:
 - Jay Tee