my_first_markov 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/my_first_markov.rb +13 -1
- data/lib/my_first_markov/chain.rb +82 -15
- data/lib/my_first_markov/version.rb +1 -1
- data/test/sample_text.txt +5 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4118db4aac92b92a6507de32613bfebe291803b1
|
4
|
+
data.tar.gz: 0bca4ef8652db717d917657c50aaadab0a9c1c50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 98b0d4ea9dd4ed5a29bd1590e383d6c012c6b72000fb7f1ff4330a95bd7efa5da526c5f7a0e7b0ead1c5fd403143a540324a8b65ac1166158887b118f64c60ef
|
7
|
+
data.tar.gz: a8cbb0c688c22a5ac68b0499f507f6f5a101a94091c2cec9f04f98ba58937b09997c8093708bc12a64143a591e7e9130b539d5989d01b8fef7bcf40a48605a5a
|
data/bin/my_first_markov.rb
CHANGED
@@ -24,11 +24,23 @@ if File.basename(__FILE__) == File.basename($PROGRAM_NAME)
|
|
24
24
|
|
25
25
|
$0 a ./test/sample_text.txt [random_next] character
|
26
26
|
> p
|
27
|
+
|
28
|
+
OR
|
29
|
+
|
30
|
+
$0 <firstN> <file-glob of entry observations> <split_on: word* | character> <next_method: first>
|
27
31
|
EOH
|
28
32
|
warn(msg)
|
29
33
|
exit
|
34
|
+
elsif starting_entry =~ /\-\-first/
|
35
|
+
next_method = starting_entry.dup[2..-1]
|
36
|
+
starting_entry = nil
|
37
|
+
#puts "calling MyFirstMarkov::Chain.file_to_entries(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
|
38
|
+
entries, _na, next_method, count = MyFirstMarkov::Chain.file_to_entries(file, split_on, starting_entry, next_method)
|
39
|
+
mc = MyFirstMarkov::Chain.new(entries)
|
40
|
+
puts mc.send(next_method, count)
|
41
|
+
exit
|
30
42
|
end
|
31
43
|
|
32
|
-
puts "calling MyFirstMarkov::Chain.from_file(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
|
44
|
+
#puts "calling MyFirstMarkov::Chain.from_file(#{file}, #{split_on.inspect}, #{starting_entry.inspect}, #{next_method.inspect})"
|
33
45
|
puts MyFirstMarkov::Chain.from_file(file, split_on, starting_entry, next_method)
|
34
46
|
end
|
@@ -1,8 +1,14 @@
|
|
1
|
+
# ./bin/my_first_markov.rb --first ./test/sample_text.txt
|
2
|
+
# ./bin/my_first_markov.rb apple ./test/sample_text.txt
|
3
|
+
# ./bin/my_first_markov.rb apple ./test/sample_text.txt most_likely_next
|
4
|
+
require 'json'
|
1
5
|
module MyFirstMarkov
|
2
6
|
class Chain
|
7
|
+
DEFAULT_COUNT = 5
|
8
|
+
DEFAULT_DEBUG = true
|
3
9
|
|
4
10
|
def self.next_methods
|
5
|
-
["random_next", "most_likely_next"]
|
11
|
+
["random_next", "most_likely_next", "first"]
|
6
12
|
end
|
7
13
|
|
8
14
|
def self.default_next_method
|
@@ -17,12 +23,29 @@ module MyFirstMarkov
|
|
17
23
|
split_on_values.first
|
18
24
|
end
|
19
25
|
|
26
|
+
def self.from_downcase_file(file, split_on, starting_entry, next_method)
|
27
|
+
entries, starting_entry, next_method, count = file_to_entries(file, split_on, starting_entry, next_method)
|
28
|
+
return from_entries(entries.map(&:downcase), starting_entry, next_method, count)
|
29
|
+
end
|
30
|
+
|
20
31
|
def self.from_file(file, split_on, starting_entry, next_method)
|
32
|
+
from_entries(*file_to_entries(file, split_on, starting_entry, next_method))
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.file_to_entries(file, split_on, starting_entry, next_method)
|
21
36
|
unless split_on && MyFirstMarkov::Chain.split_on_values.include?(split_on.downcase)
|
22
37
|
split_on = MyFirstMarkov::Chain.default_split_on_value
|
23
38
|
end
|
24
39
|
|
25
|
-
|
40
|
+
if next_method
|
41
|
+
if matches = next_method.match(/^(\D+)(\d+)$/)
|
42
|
+
next_method = matches[1]
|
43
|
+
count = matches[2]
|
44
|
+
unless MyFirstMarkov::Chain.next_methods.include?(next_method.downcase)
|
45
|
+
next_method = MyFirstMarkov::Chain.default_next_method
|
46
|
+
end
|
47
|
+
end
|
48
|
+
else
|
26
49
|
next_method = MyFirstMarkov::Chain.default_next_method
|
27
50
|
end
|
28
51
|
|
@@ -34,14 +57,20 @@ module MyFirstMarkov
|
|
34
57
|
("word" == split_on.downcase) ? entries = data.split : entries = data.split(//)
|
35
58
|
entries ||= []
|
36
59
|
|
37
|
-
|
60
|
+
#puts "return [#{entries.inspect}, #{starting_entry.inspect}, #{next_method.inspect}, #{count || DEFAULT_COUNT}]"
|
61
|
+
return [entries, starting_entry, next_method, count || DEFAULT_COUNT]
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.from_entries(entries, starting_entry, next_method, count)
|
65
|
+
new(entries).send(next_method.downcase, starting_entry, count)
|
38
66
|
end
|
39
67
|
|
40
|
-
def initialize(
|
68
|
+
def initialize(ordered_entries, debug=DEFAULT_DEBUG)
|
69
|
+
@debug = debug
|
41
70
|
@entries = Hash.new
|
42
|
-
|
43
|
-
next_entry_idx = next_idx_or_nil(index,
|
44
|
-
add(entry,
|
71
|
+
ordered_entries.each_with_index do |entry, index|
|
72
|
+
next_entry_idx = next_idx_or_nil(index, ordered_entries.size)
|
73
|
+
add(entry, ordered_entries[next_entry_idx]) if next_entry_idx
|
45
74
|
end
|
46
75
|
end
|
47
76
|
|
@@ -50,36 +79,74 @@ module MyFirstMarkov
|
|
50
79
|
@entries[entry][next_entry] += 1
|
51
80
|
end
|
52
81
|
|
53
|
-
def
|
82
|
+
def first(count=nil)
|
83
|
+
count ||= DEFAULT_COUNT
|
84
|
+
# @entries.keys.sort {|a,b| num_observations_for(b) <=> num_observations_for(a) }.take(count)
|
85
|
+
results = @entries.keys.reduce({}) { |memo, key|
|
86
|
+
memo[key] = num_observations_for(key); memo
|
87
|
+
}.sort { |a,b| num_observations_for(b.first) <=> num_observations_for(a.first) }
|
88
|
+
.take(count.to_i)
|
89
|
+
|
90
|
+
if (@debug)
|
91
|
+
results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
|
92
|
+
else
|
93
|
+
#results.first
|
94
|
+
results.map(&:first).to_json # the "entry" part, not the "num_observations"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def most_likely_next(entry, count=nil)
|
99
|
+
count ||= DEFAULT_COUNT
|
54
100
|
_next(entry) do |observation_total, next_entries_and_observations|
|
55
|
-
next_entries_and_observations
|
101
|
+
results = next_entries_and_observations
|
56
102
|
.sort {|a,b| b.last <=> a.last} # sort (in reverse) by observations
|
57
|
-
.
|
58
|
-
|
103
|
+
.take(count.to_i) # choose the array(s) with the largest observation (could be many with same #)
|
104
|
+
|
105
|
+
if (@debug)
|
106
|
+
# debug:
|
107
|
+
results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
|
108
|
+
else
|
109
|
+
results.map(&:first).to_json # the "entry" part, not the "num_observations"
|
110
|
+
end
|
59
111
|
end
|
60
112
|
end
|
61
113
|
|
62
|
-
def random_next(entry)
|
114
|
+
def random_next(entry, count=nil)
|
115
|
+
count ||= 1
|
116
|
+
#puts "called w/ entry: #{entry.inspect}, count: #{count.inspect}"
|
63
117
|
_next(entry) do |observation_total, next_entries_and_observations|
|
64
118
|
random_threshold = rand(observation_total) + 1
|
65
119
|
partial_observation_sum = 0
|
66
120
|
|
67
|
-
next_entries_and_observations.
|
121
|
+
results = next_entries_and_observations.select { |next_entry, num_observations|
|
68
122
|
partial_observation_sum += num_observations
|
69
123
|
partial_observation_sum >= random_threshold
|
70
|
-
}.
|
124
|
+
}.take(count.to_i)
|
125
|
+
|
126
|
+
if (@debug)
|
127
|
+
# debug:
|
128
|
+
#{ result.first => result.last }.to_json
|
129
|
+
results.reduce({}) { |memo, ary| memo[ary.first] = ary.last; memo }.to_json
|
130
|
+
else
|
131
|
+
#result.first # the "entry" part, not the "num_observations"
|
132
|
+
results.map(&:first).to_json # the "entry" part, not the "num_observations"
|
133
|
+
end
|
71
134
|
end
|
72
135
|
end
|
73
136
|
|
74
137
|
|
75
138
|
private
|
76
139
|
|
140
|
+
def num_observations_for(entry)
|
141
|
+
@entries[entry].reduce(0) {|sum,entry_observations| sum += entry_observations.last}
|
142
|
+
end
|
143
|
+
|
77
144
|
def _next(entry, &block)
|
78
145
|
return "" unless @entries.key?(entry)
|
79
146
|
|
80
147
|
# remember each entry contains a hash of the form {subsequent_entry: num_of_observations, other_subsequent_entry: num_of_observaions, ...}
|
81
148
|
# calling reduce on a hash converts to an array [[s_entry, observation_count], ...]
|
82
|
-
num_of_observations =
|
149
|
+
num_of_observations = num_observations_for(entry)
|
83
150
|
return block.call(num_of_observations, @entries[entry])
|
84
151
|
end
|
85
152
|
|
data/test/sample_text.txt
CHANGED
@@ -1 +1,5 @@
|
|
1
|
-
apple this apple is a text
|
1
|
+
apple this apple is a text
|
2
|
+
this apple was a text
|
3
|
+
this is a big apple text
|
4
|
+
this could be an apple big old apple text
|
5
|
+
this apple is not apple text apple
|