my_first_markov 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/my_first_markov.rb +13 -1
- data/lib/my_first_markov/chain.rb +82 -15
- data/lib/my_first_markov/version.rb +1 -1
- data/test/sample_text.txt +5 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4118db4aac92b92a6507de32613bfebe291803b1
|
4
|
+
data.tar.gz: 0bca4ef8652db717d917657c50aaadab0a9c1c50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 98b0d4ea9dd4ed5a29bd1590e383d6c012c6b72000fb7f1ff4330a95bd7efa5da526c5f7a0e7b0ead1c5fd403143a540324a8b65ac1166158887b118f64c60ef
|
7
|
+
data.tar.gz: a8cbb0c688c22a5ac68b0499f507f6f5a101a94091c2cec9f04f98ba58937b09997c8093708bc12a64143a591e7e9130b539d5989d01b8fef7bcf40a48605a5a
|
data/bin/my_first_markov.rb
CHANGED
@@ -24,11 +24,23 @@ if File.basename(__FILE__) == File.basename($PROGRAM_NAME)
|
|
24
24
|
|
25
25
|
$0 a ./test/sample_text.txt [random_next] character
|
26
26
|
> p
|
27
|
+
|
28
|
+
OR
|
29
|
+
|
30
|
+
$0 <firstN> <file-glob of entry observations> <split_on: word* | character> <next_method: first>
|
27
31
|
EOH
|
28
32
|
warn(msg)
|
29
33
|
exit
|
34
|
+
elsif starting_entry =~ /\-\-first/
|
35
|
+
next_method = starting_entry.dup[2..-1]
|
36
|
+
starting_entry = nil
|
37
|
+
#puts "calling MyFirstMarkov::Chain.file_to_entries(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
|
38
|
+
entries, _na, next_method, count = MyFirstMarkov::Chain.file_to_entries(file, split_on, starting_entry, next_method)
|
39
|
+
mc = MyFirstMarkov::Chain.new(entries)
|
40
|
+
puts mc.send(next_method, count)
|
41
|
+
exit
|
30
42
|
end
|
31
43
|
|
32
|
-
puts "calling MyFirstMarkov::Chain.from_file(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
|
44
|
+
#puts "calling MyFirstMarkov::Chain.from_file(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
|
33
45
|
puts MyFirstMarkov::Chain.from_file(file, split_on, starting_entry, next_method)
|
34
46
|
end
|
@@ -1,8 +1,14 @@
|
|
1
|
+
# ./bin/my_first_markov.rb --first ./test/sample_text.txt
|
2
|
+
# ./bin/my_first_markov.rb apple ./test/sample_text.txt
|
3
|
+
# ./bin/my_first_markov.rb apple ./test/sample_text.txt most_likely_next
|
4
|
+
require 'json'
|
1
5
|
module MyFirstMarkov
|
2
6
|
class Chain
|
7
|
+
DEFAULT_COUNT = 5
|
8
|
+
DEFAULT_DEBUG = true
|
3
9
|
|
4
10
|
def self.next_methods
|
5
|
-
["random_next", "most_likely_next"]
|
11
|
+
["random_next", "most_likely_next", "first"]
|
6
12
|
end
|
7
13
|
|
8
14
|
def self.default_next_method
|
@@ -17,12 +23,29 @@ module MyFirstMarkov
|
|
17
23
|
split_on_values.first
|
18
24
|
end
|
19
25
|
|
26
|
+
def self.from_downcase_file(file, split_on, starting_entry, next_method)
|
27
|
+
entries, starting_entry, next_method, count = file_to_entries(file, split_on, starting_entry, next_method)
|
28
|
+
return from_entries(entries.map(&:downcase), starting_entry, next_method, count)
|
29
|
+
end
|
30
|
+
|
20
31
|
def self.from_file(file, split_on, starting_entry, next_method)
|
32
|
+
from_entries(*file_to_entries(file, split_on, starting_entry, next_method))
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.file_to_entries(file, split_on, starting_entry, next_method)
|
21
36
|
unless split_on && MyFirstMarkov::Chain.split_on_values.include?(split_on.downcase)
|
22
37
|
split_on = MyFirstMarkov::Chain.default_split_on_value
|
23
38
|
end
|
24
39
|
|
25
|
-
|
40
|
+
if next_method
|
41
|
+
if matches = next_method.match(/^(\D+)(\d+)$/)
|
42
|
+
next_method = matches[1]
|
43
|
+
count = matches[2]
|
44
|
+
unless MyFirstMarkov::Chain.next_methods.include?(next_method.downcase)
|
45
|
+
next_method = MyFirstMarkov::Chain.default_next_method
|
46
|
+
end
|
47
|
+
end
|
48
|
+
else
|
26
49
|
next_method = MyFirstMarkov::Chain.default_next_method
|
27
50
|
end
|
28
51
|
|
@@ -34,14 +57,20 @@ module MyFirstMarkov
|
|
34
57
|
("word" == split_on.downcase) ? entries = data.split : entries = data.split(//)
|
35
58
|
entries ||= []
|
36
59
|
|
37
|
-
|
60
|
+
#puts "return [#{entries.inspect}, #{starting_entry.inspect}, #{next_method.inspect}, #{count || DEFAULT_COUNT}]"
|
61
|
+
return [entries, starting_entry, next_method, count || DEFAULT_COUNT]
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.from_entries(entries, starting_entry, next_method, count)
|
65
|
+
new(entries).send(next_method.downcase, starting_entry, count)
|
38
66
|
end
|
39
67
|
|
40
|
-
def initialize(
|
68
|
+
def initialize(ordered_entries, debug=DEFAULT_DEBUG)
|
69
|
+
@debug = debug
|
41
70
|
@entries = Hash.new
|
42
|
-
|
43
|
-
next_entry_idx = next_idx_or_nil(index,
|
44
|
-
add(entry,
|
71
|
+
ordered_entries.each_with_index do |entry, index|
|
72
|
+
next_entry_idx = next_idx_or_nil(index, ordered_entries.size)
|
73
|
+
add(entry, ordered_entries[next_entry_idx]) if next_entry_idx
|
45
74
|
end
|
46
75
|
end
|
47
76
|
|
@@ -50,36 +79,74 @@ module MyFirstMarkov
|
|
50
79
|
@entries[entry][next_entry] += 1
|
51
80
|
end
|
52
81
|
|
53
|
-
def
|
82
|
+
def first(count=nil)
|
83
|
+
count ||= DEFAULT_COUNT
|
84
|
+
# @entries.keys.sort {|a,b| num_observations_for(b) <=> num_observations_for(a) }.take(count)
|
85
|
+
results = @entries.keys.reduce({}) { |memo, key|
|
86
|
+
memo[key] = num_observations_for(key); memo
|
87
|
+
}.sort { |a,b| num_observations_for(b.first) <=> num_observations_for(a.first) }
|
88
|
+
.take(count.to_i)
|
89
|
+
|
90
|
+
if (@debug)
|
91
|
+
results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
|
92
|
+
else
|
93
|
+
#results.first
|
94
|
+
results.map(&:first).to_json # the "entry" part, not the "num_observations"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def most_likely_next(entry, count=nil)
|
99
|
+
count ||= DEFAULT_COUNT
|
54
100
|
_next(entry) do |observation_total, next_entries_and_observations|
|
55
|
-
next_entries_and_observations
|
101
|
+
results = next_entries_and_observations
|
56
102
|
.sort {|a,b| b.last <=> a.last} # sort (in reverse) by observations
|
57
|
-
.
|
58
|
-
|
103
|
+
.take(count.to_i) # choose the array(s) with the largest observation (could be many with same #)
|
104
|
+
|
105
|
+
if (@debug)
|
106
|
+
# debug:
|
107
|
+
results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
|
108
|
+
else
|
109
|
+
results.map(&:first).to_json # the "entry" part, not the "num_observations"
|
110
|
+
end
|
59
111
|
end
|
60
112
|
end
|
61
113
|
|
62
|
-
def random_next(entry)
|
114
|
+
def random_next(entry, count=nil)
|
115
|
+
count ||= 1
|
116
|
+
#puts "called w/ entry: #{entry.inspect}, count: #{count.inspect}"
|
63
117
|
_next(entry) do |observation_total, next_entries_and_observations|
|
64
118
|
random_threshold = rand(observation_total) + 1
|
65
119
|
partial_observation_sum = 0
|
66
120
|
|
67
|
-
next_entries_and_observations.
|
121
|
+
results = next_entries_and_observations.select { |next_entry, num_observations|
|
68
122
|
partial_observation_sum += num_observations
|
69
123
|
partial_observation_sum >= random_threshold
|
70
|
-
}.
|
124
|
+
}.take(count.to_i)
|
125
|
+
|
126
|
+
if (@debug)
|
127
|
+
# debug:
|
128
|
+
#{ result.first => result.last }.to_json
|
129
|
+
results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
|
130
|
+
else
|
131
|
+
#result.first # the "entry" part, not the "num_observations"
|
132
|
+
results.map(&:first).to_json # the "entry" part, not the "num_observations"
|
133
|
+
end
|
71
134
|
end
|
72
135
|
end
|
73
136
|
|
74
137
|
|
75
138
|
private
|
76
139
|
|
140
|
+
def num_observations_for(entry)
|
141
|
+
@entries[entry].reduce(0) {|sum,entry_observations| sum += entry_observations.last}
|
142
|
+
end
|
143
|
+
|
77
144
|
def _next(entry, &block)
|
78
145
|
return "" unless @entries.key?(entry)
|
79
146
|
|
80
147
|
# remember each entry contains a hash of the form {subsequent_entry: num_of_observations, other_subsequent_entry: num_of_observaions, ...}
|
81
148
|
# calling reduce on a hash converts to an array [[s_entry, observation_count], ...]
|
82
|
-
num_of_observations =
|
149
|
+
num_of_observations = num_observations_for(entry)
|
83
150
|
return block.call(num_of_observations, @entries[entry])
|
84
151
|
end
|
85
152
|
|
data/test/sample_text.txt
CHANGED
@@ -1 +1,5 @@
|
|
1
|
-
apple this apple is a text
|
1
|
+
apple this apple is a text
|
2
|
+
this apple was a text
|
3
|
+
this is a big apple text
|
4
|
+
this could be an apple big old apple text
|
5
|
+
this apple is not apple text apple
|