my_first_markov 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b8b6a08834fbbc7a6062885517e7288865901c91
4
- data.tar.gz: 609072cc5f6d1d661e668a93ae3c8399c4483595
3
+ metadata.gz: 4118db4aac92b92a6507de32613bfebe291803b1
4
+ data.tar.gz: 0bca4ef8652db717d917657c50aaadab0a9c1c50
5
5
  SHA512:
6
- metadata.gz: 49998299be2bee0c59d3916b341043a97d26e17e32e3e8c0af13f94d33ade05f781f518154bfbcd3eacacfa6a71953c0291f40fa2f630acee729eaacb53bfa12
7
- data.tar.gz: 17eb6a296ebe795f099fa5f4ff581e77533d6f4d13a87bb3535e5e5c81415dffd74454dd1fd6b4d06ac0322f1f47c45266a0b3de7a2f1159494919817c056781
6
+ metadata.gz: 98b0d4ea9dd4ed5a29bd1590e383d6c012c6b72000fb7f1ff4330a95bd7efa5da526c5f7a0e7b0ead1c5fd403143a540324a8b65ac1166158887b118f64c60ef
7
+ data.tar.gz: a8cbb0c688c22a5ac68b0499f507f6f5a101a94091c2cec9f04f98ba58937b09997c8093708bc12a64143a591e7e9130b539d5989d01b8fef7bcf40a48605a5a
@@ -24,11 +24,23 @@ if File.basename(__FILE__) == File.basename($PROGRAM_NAME)
24
24
 
25
25
  $0 a ./test/sample_text.txt [random_next] character
26
26
  > p
27
+
28
+ OR
29
+
30
+ $0 <firstN> <file-glob of entry observations> <split_on: word* | character> <next_method: first>
27
31
  EOH
28
32
  warn(msg)
29
33
  exit
34
+ elsif starting_entry =~ /\-\-first/
35
+ next_method = starting_entry.dup[2..-1]
36
+ starting_entry = nil
37
+ #puts "calling MyFirstMarkov::Chain.file_to_entries(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
38
+ entries, _na, next_method, count = MyFirstMarkov::Chain.file_to_entries(file, split_on, starting_entry, next_method)
39
+ mc = MyFirstMarkov::Chain.new(entries)
40
+ puts mc.send(next_method, count)
41
+ exit
30
42
  end
31
43
 
32
- puts "calling MyFirstMarkov::Chain.from_file(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
44
+ #puts "calling MyFirstMarkov::Chain.from_file(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
33
45
  puts MyFirstMarkov::Chain.from_file(file, split_on, starting_entry, next_method)
34
46
  end
@@ -1,8 +1,14 @@
1
+ # ./bin/my_first_markov.rb --first ./test/sample_text.txt
2
+ # ./bin/my_first_markov.rb apple ./test/sample_text.txt
3
+ # ./bin/my_first_markov.rb apple ./test/sample_text.txt most_likely_next
4
+ require 'json'
1
5
  module MyFirstMarkov
2
6
  class Chain
7
+ DEFAULT_COUNT = 5
8
+ DEFAULT_DEBUG = true
3
9
 
4
10
  def self.next_methods
5
- ["random_next", "most_likely_next"]
11
+ ["random_next", "most_likely_next", "first"]
6
12
  end
7
13
 
8
14
  def self.default_next_method
@@ -17,12 +23,29 @@ module MyFirstMarkov
17
23
  split_on_values.first
18
24
  end
19
25
 
26
+ def self.from_downcase_file(file, split_on, starting_entry, next_method)
27
+ entries, starting_entry, next_method, count = file_to_entries(file, split_on, starting_entry, next_method)
28
+ return from_entries(entries.map(&:downcase), starting_entry, next_method, count)
29
+ end
30
+
20
31
  def self.from_file(file, split_on, starting_entry, next_method)
32
+ from_entries(*file_to_entries(file, split_on, starting_entry, next_method))
33
+ end
34
+
35
+ def self.file_to_entries(file, split_on, starting_entry, next_method)
21
36
  unless split_on && MyFirstMarkov::Chain.split_on_values.include?(split_on.downcase)
22
37
  split_on = MyFirstMarkov::Chain.default_split_on_value
23
38
  end
24
39
 
25
- unless next_method && MyFirstMarkov::Chain.next_methods.include?(next_method.downcase)
40
+ if next_method
41
+ if matches = next_method.match(/^(\D+)(\d+)$/)
42
+ next_method = matches[1]
43
+ count = matches[2]
44
+ unless MyFirstMarkov::Chain.next_methods.include?(next_method.downcase)
45
+ next_method = MyFirstMarkov::Chain.default_next_method
46
+ end
47
+ end
48
+ else
26
49
  next_method = MyFirstMarkov::Chain.default_next_method
27
50
  end
28
51
 
@@ -34,14 +57,20 @@ module MyFirstMarkov
34
57
  ("word" == split_on.downcase) ? entries = data.split : entries = data.split(//)
35
58
  entries ||= []
36
59
 
37
- new(entries).send(next_method.downcase, starting_entry)
60
+ #puts "return [#{entries.inspect}, #{starting_entry.inspect}, #{next_method.inspect}, #{count || DEFAULT_COUNT}]"
61
+ return [entries, starting_entry, next_method, count || DEFAULT_COUNT]
62
+ end
63
+
64
+ def self.from_entries(entries, starting_entry, next_method, count)
65
+ new(entries).send(next_method.downcase, starting_entry, count)
38
66
  end
39
67
 
40
- def initialize(ordered_list)
68
+ def initialize(ordered_entries, debug=DEFAULT_DEBUG)
69
+ @debug = debug
41
70
  @entries = Hash.new
42
- ordered_list.each_with_index do |entry, index|
43
- next_entry_idx = next_idx_or_nil(index, ordered_list.size)
44
- add(entry, ordered_list[next_entry_idx]) if next_entry_idx
71
+ ordered_entries.each_with_index do |entry, index|
72
+ next_entry_idx = next_idx_or_nil(index, ordered_entries.size)
73
+ add(entry, ordered_entries[next_entry_idx]) if next_entry_idx
45
74
  end
46
75
  end
47
76
 
@@ -50,36 +79,74 @@ module MyFirstMarkov
50
79
  @entries[entry][next_entry] += 1
51
80
  end
52
81
 
53
- def most_likely_next(entry)
82
+ def first(count=nil)
83
+ count ||= DEFAULT_COUNT
84
+ # @entries.keys.sort {|a,b| num_observations_for(b) <=> num_observations_for(a) }.take(count)
85
+ results = @entries.keys.reduce({}) { |memo, key|
86
+ memo[key] = num_observations_for(key); memo
87
+ }.sort { |a,b| num_observations_for(b.first) <=> num_observations_for(a.first) }
88
+ .take(count.to_i)
89
+
90
+ if (@debug)
91
+ results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
92
+ else
93
+ #results.first
94
+ results.map(&:first).to_json # the "entry" part, not the "num_observations"
95
+ end
96
+ end
97
+
98
+ def most_likely_next(entry, count=nil)
99
+ count ||= DEFAULT_COUNT
54
100
  _next(entry) do |observation_total, next_entries_and_observations|
55
- next_entries_and_observations
101
+ results = next_entries_and_observations
56
102
  .sort {|a,b| b.last <=> a.last} # sort (in reverse) by observations
57
- .first # choose an array with the largest observation (could be many with same #)
58
- .first # the "entry" part, not the "num_observations"
103
+ .take(count.to_i) # choose the array(s) with the largest observation (could be many with same #)
104
+
105
+ if (@debug)
106
+ # debug:
107
+ results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
108
+ else
109
+ results.map(&:first).to_json # the "entry" part, not the "num_observations"
110
+ end
59
111
  end
60
112
  end
61
113
 
62
- def random_next(entry)
114
+ def random_next(entry, count=nil)
115
+ count ||= 1
116
+ #puts "called w/ entry: #{entry.inspect}, count: #{count.inspect}"
63
117
  _next(entry) do |observation_total, next_entries_and_observations|
64
118
  random_threshold = rand(observation_total) + 1
65
119
  partial_observation_sum = 0
66
120
 
67
- next_entries_and_observations.find { |next_entry, num_observations|
121
+ results = next_entries_and_observations.select { |next_entry, num_observations|
68
122
  partial_observation_sum += num_observations
69
123
  partial_observation_sum >= random_threshold
70
- }.first # we want the "entry" not the "num_observations"
124
+ }.take(count.to_i)
125
+
126
+ if (@debug)
127
+ # debug:
128
+ #{ result.first => result.last }.to_json
129
+ results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
130
+ else
131
+ #result.first # the "entry" part, not the "num_observations"
132
+ results.map(&:first).to_json # the "entry" part, not the "num_observations"
133
+ end
71
134
  end
72
135
  end
73
136
 
74
137
 
75
138
  private
76
139
 
140
+ def num_observations_for(entry)
141
+ @entries[entry].reduce(0) {|sum,entry_observations| sum += entry_observations.last}
142
+ end
143
+
77
144
  def _next(entry, &block)
78
145
  return "" unless @entries.key?(entry)
79
146
 
80
147
  # remember each entry contains a hash of the form {subsequent_entry: num_of_observations, other_subsequent_entry: num_of_observaions, ...}
81
148
  # calling reduce on a hash converts to an array [[s_entry, observation_count], ...]
82
- num_of_observations = @entries[entry].reduce(0) {|sum,entry_observations| sum += entry_observations.last}
149
+ num_of_observations = num_observations_for(entry)
83
150
  return block.call(num_of_observations, @entries[entry])
84
151
  end
85
152
 
@@ -1,3 +1,3 @@
1
1
  module MyFirstMarkov
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/test/sample_text.txt CHANGED
@@ -1 +1,5 @@
1
- apple this apple is a text\nthis apple was a text\nthis is a big apple text\nthis could be an apple big old apple text\nthis apple is not apple text apple
1
+ apple this apple is a text
2
+ this apple was a text
3
+ this is a big apple text
4
+ this could be an apple big old apple text
5
+ this apple is not apple text apple
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: my_first_markov
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jay Tee