my_first_markov 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b8b6a08834fbbc7a6062885517e7288865901c91
4
- data.tar.gz: 609072cc5f6d1d661e668a93ae3c8399c4483595
3
+ metadata.gz: 4118db4aac92b92a6507de32613bfebe291803b1
4
+ data.tar.gz: 0bca4ef8652db717d917657c50aaadab0a9c1c50
5
5
  SHA512:
6
- metadata.gz: 49998299be2bee0c59d3916b341043a97d26e17e32e3e8c0af13f94d33ade05f781f518154bfbcd3eacacfa6a71953c0291f40fa2f630acee729eaacb53bfa12
7
- data.tar.gz: 17eb6a296ebe795f099fa5f4ff581e77533d6f4d13a87bb3535e5e5c81415dffd74454dd1fd6b4d06ac0322f1f47c45266a0b3de7a2f1159494919817c056781
6
+ metadata.gz: 98b0d4ea9dd4ed5a29bd1590e383d6c012c6b72000fb7f1ff4330a95bd7efa5da526c5f7a0e7b0ead1c5fd403143a540324a8b65ac1166158887b118f64c60ef
7
+ data.tar.gz: a8cbb0c688c22a5ac68b0499f507f6f5a101a94091c2cec9f04f98ba58937b09997c8093708bc12a64143a591e7e9130b539d5989d01b8fef7bcf40a48605a5a
@@ -24,11 +24,23 @@ if File.basename(__FILE__) == File.basename($PROGRAM_NAME)
24
24
 
25
25
  $0 a ./test/sample_text.txt [random_next] character
26
26
  > p
27
+
28
+ OR
29
+
30
+ $0 <firstN> <file-glob of entry observations> <split_on: word* | character> <next_method: first>
27
31
  EOH
28
32
  warn(msg)
29
33
  exit
34
+ elsif starting_entry =~ /\-\-first/
35
+ next_method = starting_entry.dup[2..-1]
36
+ starting_entry = nil
37
+ #puts "calling MyFirstMarkov::Chain.file_to_entries(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
38
+ entries, _na, next_method, count = MyFirstMarkov::Chain.file_to_entries(file, split_on, starting_entry, next_method)
39
+ mc = MyFirstMarkov::Chain.new(entries)
40
+ puts mc.send(next_method, count)
41
+ exit
30
42
  end
31
43
 
32
- puts "calling MyFirstMarkov::Chain.from_file(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
44
+ #puts "calling MyFirstMarkov::Chain.from_file(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
33
45
  puts MyFirstMarkov::Chain.from_file(file, split_on, starting_entry, next_method)
34
46
  end
@@ -1,8 +1,14 @@
1
+ # ./bin/my_first_markov.rb --first ./test/sample_text.txt
2
+ # ./bin/my_first_markov.rb apple ./test/sample_text.txt
3
+ # ./bin/my_first_markov.rb apple ./test/sample_text.txt most_likely_next
4
+ require 'json'
1
5
  module MyFirstMarkov
2
6
  class Chain
7
+ DEFAULT_COUNT = 5
8
+ DEFAULT_DEBUG = true
3
9
 
4
10
  def self.next_methods
5
- ["random_next", "most_likely_next"]
11
+ ["random_next", "most_likely_next", "first"]
6
12
  end
7
13
 
8
14
  def self.default_next_method
@@ -17,12 +23,29 @@ module MyFirstMarkov
17
23
  split_on_values.first
18
24
  end
19
25
 
26
+ def self.from_downcase_file(file, split_on, starting_entry, next_method)
27
+ entries, starting_entry, next_method, count = file_to_entries(file, split_on, starting_entry, next_method)
28
+ return from_entries(entries.map(&:downcase), starting_entry, next_method, count)
29
+ end
30
+
20
31
  def self.from_file(file, split_on, starting_entry, next_method)
32
+ from_entries(*file_to_entries(file, split_on, starting_entry, next_method))
33
+ end
34
+
35
+ def self.file_to_entries(file, split_on, starting_entry, next_method)
21
36
  unless split_on && MyFirstMarkov::Chain.split_on_values.include?(split_on.downcase)
22
37
  split_on = MyFirstMarkov::Chain.default_split_on_value
23
38
  end
24
39
 
25
- unless next_method && MyFirstMarkov::Chain.next_methods.include?(next_method.downcase)
40
+ if next_method
41
+ if matches = next_method.match(/^(\D+)(\d+)$/)
42
+ next_method = matches[1]
43
+ count = matches[2]
44
+ unless MyFirstMarkov::Chain.next_methods.include?(next_method.downcase)
45
+ next_method = MyFirstMarkov::Chain.default_next_method
46
+ end
47
+ end
48
+ else
26
49
  next_method = MyFirstMarkov::Chain.default_next_method
27
50
  end
28
51
 
@@ -34,14 +57,20 @@ module MyFirstMarkov
34
57
  ("word" == split_on.downcase) ? entries = data.split : entries = data.split(//)
35
58
  entries ||= []
36
59
 
37
- new(entries).send(next_method.downcase, starting_entry)
60
+ #puts "return [#{entries.inspect}, #{starting_entry.inspect}, #{next_method.inspect}, #{count || DEFAULT_COUNT}]"
61
+ return [entries, starting_entry, next_method, count || DEFAULT_COUNT]
62
+ end
63
+
64
+ def self.from_entries(entries, starting_entry, next_method, count)
65
+ new(entries).send(next_method.downcase, starting_entry, count)
38
66
  end
39
67
 
40
- def initialize(ordered_list)
68
+ def initialize(ordered_entries, debug=DEFAULT_DEBUG)
69
+ @debug = debug
41
70
  @entries = Hash.new
42
- ordered_list.each_with_index do |entry, index|
43
- next_entry_idx = next_idx_or_nil(index, ordered_list.size)
44
- add(entry, ordered_list[next_entry_idx]) if next_entry_idx
71
+ ordered_entries.each_with_index do |entry, index|
72
+ next_entry_idx = next_idx_or_nil(index, ordered_entries.size)
73
+ add(entry, ordered_entries[next_entry_idx]) if next_entry_idx
45
74
  end
46
75
  end
47
76
 
@@ -50,36 +79,74 @@ module MyFirstMarkov
50
79
  @entries[entry][next_entry] += 1
51
80
  end
52
81
 
53
- def most_likely_next(entry)
82
+ def first(count=nil)
83
+ count ||= DEFAULT_COUNT
84
+ # @entries.keys.sort {|a,b| num_observations_for(b) <=> num_observations_for(a) }.take(count)
85
+ results = @entries.keys.reduce({}) { |memo, key|
86
+ memo[key] = num_observations_for(key); memo
87
+ }.sort { |a,b| num_observations_for(b.first) <=> num_observations_for(a.first) }
88
+ .take(count.to_i)
89
+
90
+ if (@debug)
91
+ results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
92
+ else
93
+ #results.first
94
+ results.map(&:first).to_json # the "entry" part, not the "num_observations"
95
+ end
96
+ end
97
+
98
+ def most_likely_next(entry, count=nil)
99
+ count ||= DEFAULT_COUNT
54
100
  _next(entry) do |observation_total, next_entries_and_observations|
55
- next_entries_and_observations
101
+ results = next_entries_and_observations
56
102
  .sort {|a,b| b.last <=> a.last} # sort (in reverse) by observations
57
- .first # choose an array with the largest observation (could be many with same #)
58
- .first # the "entry" part, not the "num_observations"
103
+ .take(count.to_i) # choose the array(s) with the largest observation (could be many with same #)
104
+
105
+ if (@debug)
106
+ # debug:
107
+ results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
108
+ else
109
+ results.map(&:first).to_json # the "entry" part, not the "num_observations"
110
+ end
59
111
  end
60
112
  end
61
113
 
62
- def random_next(entry)
114
+ def random_next(entry, count=nil)
115
+ count ||= 1
116
+ #puts "called w/ entry: #{entry.inspect}, count: #{count.inspect}"
63
117
  _next(entry) do |observation_total, next_entries_and_observations|
64
118
  random_threshold = rand(observation_total) + 1
65
119
  partial_observation_sum = 0
66
120
 
67
- next_entries_and_observations.find { |next_entry, num_observations|
121
+ results = next_entries_and_observations.select { |next_entry, num_observations|
68
122
  partial_observation_sum += num_observations
69
123
  partial_observation_sum >= random_threshold
70
- }.first # we want the "entry" not the "num_observations"
124
+ }.take(count.to_i)
125
+
126
+ if (@debug)
127
+ # debug:
128
+ #{ result.first => result.last }.to_json
129
+ results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
130
+ else
131
+ #result.first # the "entry" part, not the "num_observations"
132
+ results.map(&:first).to_json # the "entry" part, not the "num_observations"
133
+ end
71
134
  end
72
135
  end
73
136
 
74
137
 
75
138
  private
76
139
 
140
+ def num_observations_for(entry)
141
+ @entries[entry].reduce(0) {|sum,entry_observations| sum += entry_observations.last}
142
+ end
143
+
77
144
  def _next(entry, &block)
78
145
  return "" unless @entries.key?(entry)
79
146
 
80
147
  # remember each entry contains a hash of the form {subsequent_entry: num_of_observations, other_subsequent_entry: num_of_observaions, ...}
81
148
  # calling reduce on a hash converts to an array [[s_entry, observation_count], ...]
82
- num_of_observations = @entries[entry].reduce(0) {|sum,entry_observations| sum += entry_observations.last}
149
+ num_of_observations = num_observations_for(entry)
83
150
  return block.call(num_of_observations, @entries[entry])
84
151
  end
85
152
 
@@ -1,3 +1,3 @@
1
1
  module MyFirstMarkov
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/test/sample_text.txt CHANGED
@@ -1 +1,5 @@
1
- apple this apple is a text\nthis apple was a text\nthis is a big apple text\nthis could be an apple big old apple text\nthis apple is not apple text apple
1
+ apple this apple is a text
2
+ this apple was a text
3
+ this is a big apple text
4
+ this could be an apple big old apple text
5
+ this apple is not apple text apple
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: my_first_markov
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jay Tee