markov-generator 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/markov.rb +2 -0
- data/lib/markov/dictionary.rb +76 -0
- data/lib/markov/generator.rb +47 -103
- data/lib/markov/parser.rb +1 -1
- data/lib/markov/token.rb +11 -0
- data/lib/markov/util.rb +44 -0
- data/markov-generator.gemspec +9 -4
- data/test/generator_test.rb +7 -7
- data/test/test_bulk_markov.rb +2 -1
- data/test/test_markov.rb +9 -5
- data/test/texts/alice.txt +3328 -0
- data/test/texts/cthulhu.txt +1118 -0
- data/test/texts/grimm.txt +9173 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d847256699f3f91abcbf0302c2be407bf97cb653
|
4
|
+
data.tar.gz: 19e95af2ef8f231c56f1ac9e76716518c1f8b057
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f06aef8afefd6f20daeee4cb77d7a72daa87126331caeaa9deeda9d5beef1f77c2908d59167bb91b66e170fa92fdffdb6b464b92506f43c03563a65d98ae0737
|
7
|
+
data.tar.gz: 997fbd9590015db2bb8137baee2f83488a1979367ef2e516a322d67f871369d8a0e394d02241e67c76cbb933b34c16977f48226d41ee95fc4d442a33fd386998
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.11.0
|
data/lib/markov.rb
CHANGED
@@ -0,0 +1,76 @@
|
|
1
|
+
|
2
|
+
require 'securerandom'
|
3
|
+
|
4
|
+
class Markov::Dictionary
|
5
|
+
include Markov::Util
|
6
|
+
|
7
|
+
def initialize(depth)
|
8
|
+
@depth = depth
|
9
|
+
|
10
|
+
@dictionary = {}
|
11
|
+
@start_words = {}
|
12
|
+
|
13
|
+
srand
|
14
|
+
end
|
15
|
+
|
16
|
+
def empty?
|
17
|
+
@dictionary.empty?
|
18
|
+
end
|
19
|
+
|
20
|
+
def dump_startwords
|
21
|
+
@start_words.keys.each do |start_words|
|
22
|
+
puts "#{start_words} -> #{tokens_to_sentence @dictionary[start_words]}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def dump_dictionary
|
27
|
+
@dictionary.keys.each do |keys|
|
28
|
+
following = @dictionary[keys]
|
29
|
+
sentence = []
|
30
|
+
following.each do |word|
|
31
|
+
sentence << "#{word.to_s},"
|
32
|
+
end
|
33
|
+
s = sentence.join(" ")
|
34
|
+
puts "#{keys} => #{s.slice(0,s.length-1)}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_to_start_words(tokens)
|
39
|
+
return if tokens[0].kind != :word
|
40
|
+
|
41
|
+
tokens[0].word = tokens[0].word.capitalize
|
42
|
+
start_words = tokens_to_words tokens
|
43
|
+
|
44
|
+
@start_words[start_words] ||= tokens
|
45
|
+
end
|
46
|
+
|
47
|
+
def add_to_dictionary(tokens)
|
48
|
+
token = tokens.last
|
49
|
+
return if token == nil || token.word == ""
|
50
|
+
|
51
|
+
key_words = tokens_to_words tokens[0, @depth-1]
|
52
|
+
|
53
|
+
@dictionary[key_words] ||= []
|
54
|
+
@dictionary[key_words] << token
|
55
|
+
end
|
56
|
+
|
57
|
+
def select_start_words
|
58
|
+
@start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
|
59
|
+
end
|
60
|
+
|
61
|
+
def select_next_token(tokens)
|
62
|
+
token = @dictionary[ tokens_to_words(tokens)]
|
63
|
+
|
64
|
+
return Markov::Token.new("X", :noop) if token == nil
|
65
|
+
token[random_number(tokens.length-1)]
|
66
|
+
end
|
67
|
+
|
68
|
+
def select_next_word(tokens)
|
69
|
+
token = nil
|
70
|
+
begin
|
71
|
+
token = select_next_token(tokens)
|
72
|
+
end until token.kind == :word
|
73
|
+
token
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
data/lib/markov/generator.rb
CHANGED
@@ -2,15 +2,15 @@
|
|
2
2
|
require 'securerandom'
|
3
3
|
|
4
4
|
class Markov::Generator
|
5
|
-
|
5
|
+
include Markov::Util
|
6
|
+
|
6
7
|
def initialize(depth)
|
7
8
|
@depth = depth
|
8
|
-
|
9
|
-
@dictionary = {}
|
10
|
-
@start_words = {}
|
11
9
|
@unparsed_sentences = []
|
12
10
|
@tokens = []
|
13
11
|
|
12
|
+
@dict = Markov::Dictionary.new(depth)
|
13
|
+
|
14
14
|
srand
|
15
15
|
end
|
16
16
|
|
@@ -35,8 +35,8 @@ class Markov::Generator
|
|
35
35
|
|
36
36
|
# need to store the words in both the dictionary
|
37
37
|
# and the list of start words
|
38
|
-
add_to_start_words word_seq[0, @depth-1]
|
39
|
-
add_to_dictionary word_seq
|
38
|
+
@dict.add_to_start_words word_seq[0, @depth-1]
|
39
|
+
@dict.add_to_dictionary word_seq
|
40
40
|
|
41
41
|
token = parser.next_token
|
42
42
|
state = :sentence
|
@@ -48,10 +48,10 @@ class Markov::Generator
|
|
48
48
|
word_seq << token
|
49
49
|
|
50
50
|
# add to the dictionary
|
51
|
-
add_to_dictionary word_seq
|
51
|
+
@dict.add_to_dictionary word_seq
|
52
52
|
|
53
53
|
# stop current sequence and start again
|
54
|
-
if token.kind == :stop
|
54
|
+
if token == nil || token.kind == :stop
|
55
55
|
word_seq = []
|
56
56
|
state = :start
|
57
57
|
end
|
@@ -61,12 +61,13 @@ class Markov::Generator
|
|
61
61
|
rescue => e
|
62
62
|
# nothing to rescue
|
63
63
|
puts e
|
64
|
+
puts e.backtrace
|
64
65
|
end
|
65
66
|
|
66
67
|
end # end parse_text
|
67
68
|
|
68
|
-
def generate_sentence(min_length=
|
69
|
-
if @
|
69
|
+
def generate_sentence(min_length=15)
|
70
|
+
if @dict.empty?
|
70
71
|
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
|
71
72
|
end
|
72
73
|
|
@@ -74,42 +75,54 @@ class Markov::Generator
|
|
74
75
|
complete_sentence = false
|
75
76
|
|
76
77
|
# initialize
|
77
|
-
select_start_words.each {|w| tokens << w}
|
78
|
+
@dict.select_start_words.each {|w| tokens << w}
|
78
79
|
prev_token = tokens.last
|
79
80
|
|
80
81
|
begin
|
81
|
-
token = select_next_token tokens.last(@depth-1)
|
82
|
+
token = @dict.select_next_token tokens.last(@depth-1)
|
82
83
|
|
83
|
-
if token.kind == :
|
84
|
-
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
|
84
|
+
if token.kind == :word
|
85
85
|
tokens << token
|
86
|
+
prev_token = token
|
86
87
|
elsif token.kind == :special
|
87
|
-
|
88
|
-
|
88
|
+
if prev_token.kind == :word
|
89
|
+
tokens << token
|
90
|
+
prev_token = token
|
91
|
+
end
|
92
|
+
elsif token.kind == :stop
|
93
|
+
if prev_token.kind == :word
|
94
|
+
tokens << token
|
95
|
+
prev_token = token
|
96
|
+
end
|
89
97
|
elsif token.kind == :noop
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
98
|
+
if prev_token.kind == :word
|
99
|
+
tokens << Markov::Token.new(".", :stop)
|
100
|
+
end
|
101
|
+
# start a new sentence
|
102
|
+
@dict.select_start_words.each {|w| tokens << w}
|
103
|
+
prev_token = tokens.last
|
94
104
|
end
|
95
105
|
|
96
|
-
|
106
|
+
if (token.kind == :stop) && (tokens.size > min_length)
|
107
|
+
#puts "-- DONE(#{tokens.size}) #{tokens_to_debug tokens}"
|
108
|
+
return tokens_to_sentence tokens
|
109
|
+
end
|
97
110
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
111
|
+
# default circuit-breaker
|
112
|
+
if tokens.size > min_length * 4
|
113
|
+
# restart
|
114
|
+
tokens = []
|
115
|
+
complete_sentence = false
|
116
|
+
|
117
|
+
# initialize
|
118
|
+
@dict.select_start_words.each {|w| tokens << w}
|
119
|
+
prev_token = tokens.last
|
105
120
|
end
|
106
121
|
|
107
|
-
# circuit-breaker
|
108
|
-
complete_sentence = true if tokens.size > min_length*2
|
109
122
|
end until complete_sentence
|
110
123
|
|
111
124
|
tokens_to_sentence tokens
|
112
|
-
end
|
125
|
+
end
|
113
126
|
|
114
127
|
def dump_startwords
|
115
128
|
@start_words.keys.each do |start_words|
|
@@ -118,80 +131,11 @@ class Markov::Generator
|
|
118
131
|
end
|
119
132
|
|
120
133
|
def dump_dictionary
|
121
|
-
@
|
122
|
-
following = @dictionary[keys]
|
123
|
-
sentence = []
|
124
|
-
following.each do |word|
|
125
|
-
sentence << "#{word.to_s},"
|
126
|
-
end
|
127
|
-
s = sentence.join(" ")
|
128
|
-
puts "#{keys} => #{s.slice(0,s.length-1)}"
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
private
|
133
|
-
|
134
|
-
def add_to_start_words(tokens)
|
135
|
-
return if tokens[0].kind != :word
|
136
|
-
|
137
|
-
tokens[0].word = tokens[0].word.capitalize
|
138
|
-
start_words = tokens_to_words tokens
|
139
|
-
|
140
|
-
@start_words[start_words] ||= tokens
|
134
|
+
@dict.dump_dictionary
|
141
135
|
end
|
142
136
|
|
143
|
-
def
|
144
|
-
|
145
|
-
return if token.word == ""
|
146
|
-
|
147
|
-
key_words = tokens_to_words tokens[0, @depth-1]
|
148
|
-
|
149
|
-
@dictionary[key_words] ||= []
|
150
|
-
@dictionary[key_words] << token
|
151
|
-
end
|
152
|
-
|
153
|
-
def tokens_to_words(tokens)
|
154
|
-
words = []
|
155
|
-
tokens.each do |t|
|
156
|
-
words << t.word
|
157
|
-
end
|
158
|
-
words
|
159
|
-
end
|
160
|
-
|
161
|
-
def tokens_to_sentence(tokens)
|
162
|
-
s = ""
|
163
|
-
tokens.each do |t|
|
164
|
-
if t.kind != :word
|
165
|
-
s << t.word
|
166
|
-
else
|
167
|
-
s << " " + t.word
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
s[1, s.length-1]
|
172
|
-
end
|
173
|
-
|
174
|
-
def select_start_words
|
175
|
-
@start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
|
176
|
-
end
|
177
|
-
|
178
|
-
def select_next_token(tokens)
|
179
|
-
token = @dictionary[ tokens_to_words(tokens)]
|
180
|
-
|
181
|
-
return Token.new("X", :noop) if token == nil
|
182
|
-
token[random_number(tokens.length-1)]
|
183
|
-
end
|
184
|
-
|
185
|
-
def select_next_word(tokens)
|
186
|
-
token = nil
|
187
|
-
begin
|
188
|
-
token = select_next_token(tokens)
|
189
|
-
end until token.kind == :word
|
190
|
-
token
|
191
|
-
end
|
192
|
-
|
193
|
-
def random_number(upper_limit)
|
194
|
-
(SecureRandom.random_number * upper_limit).to_i
|
137
|
+
def dump_startwords
|
138
|
+
@dict.dump_startwords
|
195
139
|
end
|
196
140
|
|
197
141
|
end
|
data/lib/markov/parser.rb
CHANGED
data/lib/markov/token.rb
CHANGED
data/lib/markov/util.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
|
2
|
+
require 'securerandom'
|
3
|
+
|
4
|
+
module Markov::Util
|
5
|
+
|
6
|
+
def tokens_to_words(tokens)
|
7
|
+
words = []
|
8
|
+
tokens.each do |t|
|
9
|
+
words << t.word
|
10
|
+
end
|
11
|
+
words
|
12
|
+
end
|
13
|
+
|
14
|
+
def tokens_to_sentence(tokens)
|
15
|
+
s = ""
|
16
|
+
tokens.each do |t|
|
17
|
+
if t.kind != :word
|
18
|
+
s << t.word
|
19
|
+
else
|
20
|
+
s << " " + t.word
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
s[1, s.length-1]
|
25
|
+
end
|
26
|
+
|
27
|
+
def tokens_to_debug(tokens)
|
28
|
+
s = ""
|
29
|
+
tokens.each do |t|
|
30
|
+
if t.kind != :word
|
31
|
+
s << " " + t.to_symbol
|
32
|
+
else
|
33
|
+
s << " " + t.word
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
s[1, s.length-1]
|
38
|
+
end
|
39
|
+
|
40
|
+
def random_number(upper_limit)
|
41
|
+
(SecureRandom.random_number * upper_limit).to_i
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/markov-generator.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: markov-generator 0.
|
5
|
+
# stub: markov-generator 0.11.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "markov-generator"
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "0.11.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Michael Kuehl"]
|
14
|
-
s.date = "2016-01-
|
14
|
+
s.date = "2016-01-12"
|
15
15
|
s.description = "A Markov Chain text generator library"
|
16
16
|
s.email = "hello@ratchet.cc"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -29,14 +29,19 @@ Gem::Specification.new do |s|
|
|
29
29
|
"Rakefile",
|
30
30
|
"VERSION",
|
31
31
|
"lib/markov.rb",
|
32
|
+
"lib/markov/dictionary.rb",
|
32
33
|
"lib/markov/generator.rb",
|
33
34
|
"lib/markov/parser.rb",
|
34
35
|
"lib/markov/token.rb",
|
36
|
+
"lib/markov/util.rb",
|
35
37
|
"markov-generator.gemspec",
|
36
38
|
"test/generator_test.rb",
|
37
39
|
"test/test_bulk_markov.rb",
|
38
40
|
"test/test_markov.rb",
|
39
|
-
"test/test_parser.rb"
|
41
|
+
"test/test_parser.rb",
|
42
|
+
"test/texts/alice.txt",
|
43
|
+
"test/texts/cthulhu.txt",
|
44
|
+
"test/texts/grimm.txt"
|
40
45
|
]
|
41
46
|
s.homepage = "http://github.com/ratchetcc/markov-generator"
|
42
47
|
s.licenses = ["MIT"]
|
data/test/generator_test.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
1
2
|
|
2
|
-
require 'markov
|
3
|
+
require 'markov'
|
3
4
|
|
4
|
-
markov = Markov
|
5
|
-
markov.
|
6
|
-
markov.parse_source_file "./generator_test1.txt"
|
5
|
+
markov = Markov.generator
|
6
|
+
markov.parse_text "./test/texts/generator_test.txt"
|
7
7
|
|
8
|
+
#markov.dump_startwords
|
8
9
|
#markov.dump_dictionary
|
9
|
-
|
10
|
-
markov.dump_dictionary_stats
|
10
|
+
puts ""
|
11
11
|
|
12
12
|
1..5.times do
|
13
13
|
puts "#{markov.generate_sentence}"
|
14
|
+
puts ""
|
14
15
|
end
|
15
|
-
|