markov-generator 0.10.0 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/markov.rb +2 -0
- data/lib/markov/dictionary.rb +76 -0
- data/lib/markov/generator.rb +47 -103
- data/lib/markov/parser.rb +1 -1
- data/lib/markov/token.rb +11 -0
- data/lib/markov/util.rb +44 -0
- data/markov-generator.gemspec +9 -4
- data/test/generator_test.rb +7 -7
- data/test/test_bulk_markov.rb +2 -1
- data/test/test_markov.rb +9 -5
- data/test/texts/alice.txt +3328 -0
- data/test/texts/cthulhu.txt +1118 -0
- data/test/texts/grimm.txt +9173 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d847256699f3f91abcbf0302c2be407bf97cb653
|
4
|
+
data.tar.gz: 19e95af2ef8f231c56f1ac9e76716518c1f8b057
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f06aef8afefd6f20daeee4cb77d7a72daa87126331caeaa9deeda9d5beef1f77c2908d59167bb91b66e170fa92fdffdb6b464b92506f43c03563a65d98ae0737
|
7
|
+
data.tar.gz: 997fbd9590015db2bb8137baee2f83488a1979367ef2e516a322d67f871369d8a0e394d02241e67c76cbb933b34c16977f48226d41ee95fc4d442a33fd386998
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.11.0
|
data/lib/markov.rb
CHANGED
@@ -0,0 +1,76 @@
|
|
1
|
+
|
2
|
+
require 'securerandom'
|
3
|
+
|
4
|
+
class Markov::Dictionary
|
5
|
+
include Markov::Util
|
6
|
+
|
7
|
+
def initialize(depth)
|
8
|
+
@depth = depth
|
9
|
+
|
10
|
+
@dictionary = {}
|
11
|
+
@start_words = {}
|
12
|
+
|
13
|
+
srand
|
14
|
+
end
|
15
|
+
|
16
|
+
def empty?
|
17
|
+
@dictionary.empty?
|
18
|
+
end
|
19
|
+
|
20
|
+
def dump_startwords
|
21
|
+
@start_words.keys.each do |start_words|
|
22
|
+
puts "#{start_words} -> #{tokens_to_sentence @dictionary[start_words]}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def dump_dictionary
|
27
|
+
@dictionary.keys.each do |keys|
|
28
|
+
following = @dictionary[keys]
|
29
|
+
sentence = []
|
30
|
+
following.each do |word|
|
31
|
+
sentence << "#{word.to_s},"
|
32
|
+
end
|
33
|
+
s = sentence.join(" ")
|
34
|
+
puts "#{keys} => #{s.slice(0,s.length-1)}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_to_start_words(tokens)
|
39
|
+
return if tokens[0].kind != :word
|
40
|
+
|
41
|
+
tokens[0].word = tokens[0].word.capitalize
|
42
|
+
start_words = tokens_to_words tokens
|
43
|
+
|
44
|
+
@start_words[start_words] ||= tokens
|
45
|
+
end
|
46
|
+
|
47
|
+
def add_to_dictionary(tokens)
|
48
|
+
token = tokens.last
|
49
|
+
return if token == nil || token.word == ""
|
50
|
+
|
51
|
+
key_words = tokens_to_words tokens[0, @depth-1]
|
52
|
+
|
53
|
+
@dictionary[key_words] ||= []
|
54
|
+
@dictionary[key_words] << token
|
55
|
+
end
|
56
|
+
|
57
|
+
def select_start_words
|
58
|
+
@start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
|
59
|
+
end
|
60
|
+
|
61
|
+
def select_next_token(tokens)
|
62
|
+
token = @dictionary[ tokens_to_words(tokens)]
|
63
|
+
|
64
|
+
return Markov::Token.new("X", :noop) if token == nil
|
65
|
+
token[random_number(tokens.length-1)]
|
66
|
+
end
|
67
|
+
|
68
|
+
def select_next_word(tokens)
|
69
|
+
token = nil
|
70
|
+
begin
|
71
|
+
token = select_next_token(tokens)
|
72
|
+
end until token.kind == :word
|
73
|
+
token
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
data/lib/markov/generator.rb
CHANGED
@@ -2,15 +2,15 @@
|
|
2
2
|
require 'securerandom'
|
3
3
|
|
4
4
|
class Markov::Generator
|
5
|
-
|
5
|
+
include Markov::Util
|
6
|
+
|
6
7
|
def initialize(depth)
|
7
8
|
@depth = depth
|
8
|
-
|
9
|
-
@dictionary = {}
|
10
|
-
@start_words = {}
|
11
9
|
@unparsed_sentences = []
|
12
10
|
@tokens = []
|
13
11
|
|
12
|
+
@dict = Markov::Dictionary.new(depth)
|
13
|
+
|
14
14
|
srand
|
15
15
|
end
|
16
16
|
|
@@ -35,8 +35,8 @@ class Markov::Generator
|
|
35
35
|
|
36
36
|
# need to store the words in both the dictionary
|
37
37
|
# and the list of start words
|
38
|
-
add_to_start_words word_seq[0, @depth-1]
|
39
|
-
add_to_dictionary word_seq
|
38
|
+
@dict.add_to_start_words word_seq[0, @depth-1]
|
39
|
+
@dict.add_to_dictionary word_seq
|
40
40
|
|
41
41
|
token = parser.next_token
|
42
42
|
state = :sentence
|
@@ -48,10 +48,10 @@ class Markov::Generator
|
|
48
48
|
word_seq << token
|
49
49
|
|
50
50
|
# add to the dictionary
|
51
|
-
add_to_dictionary word_seq
|
51
|
+
@dict.add_to_dictionary word_seq
|
52
52
|
|
53
53
|
# stop current sequence and start again
|
54
|
-
if token.kind == :stop
|
54
|
+
if token == nil || token.kind == :stop
|
55
55
|
word_seq = []
|
56
56
|
state = :start
|
57
57
|
end
|
@@ -61,12 +61,13 @@ class Markov::Generator
|
|
61
61
|
rescue => e
|
62
62
|
# nothing to rescue
|
63
63
|
puts e
|
64
|
+
puts e.backtrace
|
64
65
|
end
|
65
66
|
|
66
67
|
end # end parse_text
|
67
68
|
|
68
|
-
def generate_sentence(min_length=
|
69
|
-
if @
|
69
|
+
def generate_sentence(min_length=15)
|
70
|
+
if @dict.empty?
|
70
71
|
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
|
71
72
|
end
|
72
73
|
|
@@ -74,42 +75,54 @@ class Markov::Generator
|
|
74
75
|
complete_sentence = false
|
75
76
|
|
76
77
|
# initialize
|
77
|
-
select_start_words.each {|w| tokens << w}
|
78
|
+
@dict.select_start_words.each {|w| tokens << w}
|
78
79
|
prev_token = tokens.last
|
79
80
|
|
80
81
|
begin
|
81
|
-
token = select_next_token tokens.last(@depth-1)
|
82
|
+
token = @dict.select_next_token tokens.last(@depth-1)
|
82
83
|
|
83
|
-
if token.kind == :
|
84
|
-
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
|
84
|
+
if token.kind == :word
|
85
85
|
tokens << token
|
86
|
+
prev_token = token
|
86
87
|
elsif token.kind == :special
|
87
|
-
|
88
|
-
|
88
|
+
if prev_token.kind == :word
|
89
|
+
tokens << token
|
90
|
+
prev_token = token
|
91
|
+
end
|
92
|
+
elsif token.kind == :stop
|
93
|
+
if prev_token.kind == :word
|
94
|
+
tokens << token
|
95
|
+
prev_token = token
|
96
|
+
end
|
89
97
|
elsif token.kind == :noop
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
98
|
+
if prev_token.kind == :word
|
99
|
+
tokens << Markov::Token.new(".", :stop)
|
100
|
+
end
|
101
|
+
# start a new sentence
|
102
|
+
@dict.select_start_words.each {|w| tokens << w}
|
103
|
+
prev_token = tokens.last
|
94
104
|
end
|
95
105
|
|
96
|
-
|
106
|
+
if (token.kind == :stop) && (tokens.size > min_length)
|
107
|
+
#puts "-- DONE(#{tokens.size}) #{tokens_to_debug tokens}"
|
108
|
+
return tokens_to_sentence tokens
|
109
|
+
end
|
97
110
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
111
|
+
# default circuit-breaker
|
112
|
+
if tokens.size > min_length * 4
|
113
|
+
# restart
|
114
|
+
tokens = []
|
115
|
+
complete_sentence = false
|
116
|
+
|
117
|
+
# initialize
|
118
|
+
@dict.select_start_words.each {|w| tokens << w}
|
119
|
+
prev_token = tokens.last
|
105
120
|
end
|
106
121
|
|
107
|
-
# circuit-breaker
|
108
|
-
complete_sentence = true if tokens.size > min_length*2
|
109
122
|
end until complete_sentence
|
110
123
|
|
111
124
|
tokens_to_sentence tokens
|
112
|
-
end
|
125
|
+
end
|
113
126
|
|
114
127
|
def dump_startwords
|
115
128
|
@start_words.keys.each do |start_words|
|
@@ -118,80 +131,11 @@ class Markov::Generator
|
|
118
131
|
end
|
119
132
|
|
120
133
|
def dump_dictionary
|
121
|
-
@
|
122
|
-
following = @dictionary[keys]
|
123
|
-
sentence = []
|
124
|
-
following.each do |word|
|
125
|
-
sentence << "#{word.to_s},"
|
126
|
-
end
|
127
|
-
s = sentence.join(" ")
|
128
|
-
puts "#{keys} => #{s.slice(0,s.length-1)}"
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
private
|
133
|
-
|
134
|
-
def add_to_start_words(tokens)
|
135
|
-
return if tokens[0].kind != :word
|
136
|
-
|
137
|
-
tokens[0].word = tokens[0].word.capitalize
|
138
|
-
start_words = tokens_to_words tokens
|
139
|
-
|
140
|
-
@start_words[start_words] ||= tokens
|
134
|
+
@dict.dump_dictionary
|
141
135
|
end
|
142
136
|
|
143
|
-
def
|
144
|
-
|
145
|
-
return if token.word == ""
|
146
|
-
|
147
|
-
key_words = tokens_to_words tokens[0, @depth-1]
|
148
|
-
|
149
|
-
@dictionary[key_words] ||= []
|
150
|
-
@dictionary[key_words] << token
|
151
|
-
end
|
152
|
-
|
153
|
-
def tokens_to_words(tokens)
|
154
|
-
words = []
|
155
|
-
tokens.each do |t|
|
156
|
-
words << t.word
|
157
|
-
end
|
158
|
-
words
|
159
|
-
end
|
160
|
-
|
161
|
-
def tokens_to_sentence(tokens)
|
162
|
-
s = ""
|
163
|
-
tokens.each do |t|
|
164
|
-
if t.kind != :word
|
165
|
-
s << t.word
|
166
|
-
else
|
167
|
-
s << " " + t.word
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
s[1, s.length-1]
|
172
|
-
end
|
173
|
-
|
174
|
-
def select_start_words
|
175
|
-
@start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
|
176
|
-
end
|
177
|
-
|
178
|
-
def select_next_token(tokens)
|
179
|
-
token = @dictionary[ tokens_to_words(tokens)]
|
180
|
-
|
181
|
-
return Token.new("X", :noop) if token == nil
|
182
|
-
token[random_number(tokens.length-1)]
|
183
|
-
end
|
184
|
-
|
185
|
-
def select_next_word(tokens)
|
186
|
-
token = nil
|
187
|
-
begin
|
188
|
-
token = select_next_token(tokens)
|
189
|
-
end until token.kind == :word
|
190
|
-
token
|
191
|
-
end
|
192
|
-
|
193
|
-
def random_number(upper_limit)
|
194
|
-
(SecureRandom.random_number * upper_limit).to_i
|
137
|
+
def dump_startwords
|
138
|
+
@dict.dump_startwords
|
195
139
|
end
|
196
140
|
|
197
141
|
end
|
data/lib/markov/parser.rb
CHANGED
data/lib/markov/token.rb
CHANGED
data/lib/markov/util.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
|
2
|
+
require 'securerandom'
|
3
|
+
|
4
|
+
module Markov::Util
|
5
|
+
|
6
|
+
def tokens_to_words(tokens)
|
7
|
+
words = []
|
8
|
+
tokens.each do |t|
|
9
|
+
words << t.word
|
10
|
+
end
|
11
|
+
words
|
12
|
+
end
|
13
|
+
|
14
|
+
def tokens_to_sentence(tokens)
|
15
|
+
s = ""
|
16
|
+
tokens.each do |t|
|
17
|
+
if t.kind != :word
|
18
|
+
s << t.word
|
19
|
+
else
|
20
|
+
s << " " + t.word
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
s[1, s.length-1]
|
25
|
+
end
|
26
|
+
|
27
|
+
def tokens_to_debug(tokens)
|
28
|
+
s = ""
|
29
|
+
tokens.each do |t|
|
30
|
+
if t.kind != :word
|
31
|
+
s << " " + t.to_symbol
|
32
|
+
else
|
33
|
+
s << " " + t.word
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
s[1, s.length-1]
|
38
|
+
end
|
39
|
+
|
40
|
+
def random_number(upper_limit)
|
41
|
+
(SecureRandom.random_number * upper_limit).to_i
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/markov-generator.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: markov-generator 0.
|
5
|
+
# stub: markov-generator 0.11.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "markov-generator"
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "0.11.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Michael Kuehl"]
|
14
|
-
s.date = "2016-01-
|
14
|
+
s.date = "2016-01-12"
|
15
15
|
s.description = "A Markov Chain text generator library"
|
16
16
|
s.email = "hello@ratchet.cc"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -29,14 +29,19 @@ Gem::Specification.new do |s|
|
|
29
29
|
"Rakefile",
|
30
30
|
"VERSION",
|
31
31
|
"lib/markov.rb",
|
32
|
+
"lib/markov/dictionary.rb",
|
32
33
|
"lib/markov/generator.rb",
|
33
34
|
"lib/markov/parser.rb",
|
34
35
|
"lib/markov/token.rb",
|
36
|
+
"lib/markov/util.rb",
|
35
37
|
"markov-generator.gemspec",
|
36
38
|
"test/generator_test.rb",
|
37
39
|
"test/test_bulk_markov.rb",
|
38
40
|
"test/test_markov.rb",
|
39
|
-
"test/test_parser.rb"
|
41
|
+
"test/test_parser.rb",
|
42
|
+
"test/texts/alice.txt",
|
43
|
+
"test/texts/cthulhu.txt",
|
44
|
+
"test/texts/grimm.txt"
|
40
45
|
]
|
41
46
|
s.homepage = "http://github.com/ratchetcc/markov-generator"
|
42
47
|
s.licenses = ["MIT"]
|
data/test/generator_test.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
1
2
|
|
2
|
-
require 'markov
|
3
|
+
require 'markov'
|
3
4
|
|
4
|
-
markov = Markov
|
5
|
-
markov.
|
6
|
-
markov.parse_source_file "./generator_test1.txt"
|
5
|
+
markov = Markov.generator
|
6
|
+
markov.parse_text "./test/texts/generator_test.txt"
|
7
7
|
|
8
|
+
#markov.dump_startwords
|
8
9
|
#markov.dump_dictionary
|
9
|
-
|
10
|
-
markov.dump_dictionary_stats
|
10
|
+
puts ""
|
11
11
|
|
12
12
|
1..5.times do
|
13
13
|
puts "#{markov.generate_sentence}"
|
14
|
+
puts ""
|
14
15
|
end
|
15
|
-
|