markov-generator 0.9.2 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/VERSION +1 -1
- data/lib/markov.rb +13 -0
- data/lib/markov/generator.rb +161 -261
- data/lib/markov/parser.rb +77 -0
- data/lib/markov/token.rb +11 -0
- data/markov-generator.gemspec +12 -8
- data/test/generator_test.rb +2 -2
- data/test/test_bulk_markov.rb +19 -0
- data/test/test_markov.rb +13 -0
- data/test/test_parser.rb +13 -0
- metadata +11 -7
- data/test/file_parser_test.rb +0 -25
- data/test/file_parser_test.txt +0 -11
- data/test/helper.rb +0 -34
- data/test/test_markov_generator.rb +0 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 09d0c8e708f75e9c610108c700c74fb6f9db6dbc
|
4
|
+
data.tar.gz: 7173ff93d857e356b0149a33de3743392264cff5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 53cf62b9c8ec50f55112fe94e6e55eac01e3ebb5ffcff56c7a896f148761ce681d4f6968bf42190bceecf8175ea8b58fa0a8af4f13806e42b1852e2cf667f17d
|
7
|
+
data.tar.gz: c3d757b5b3841b47ae2df2b7e0236fb03ca7fadf60556cad53a245727aadbbe808859f03855d2567a52d6c66f3d6d90d2bf1ff46f12d3607ef87608a382f4ff8
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
markov
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.2.4
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.10.0
|
data/lib/markov.rb
ADDED
data/lib/markov/generator.rb
CHANGED
@@ -1,297 +1,197 @@
|
|
1
1
|
|
2
2
|
require 'securerandom'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
end
|
9
|
-
|
10
|
-
class Generator
|
11
|
-
|
12
|
-
attr_reader :depth
|
4
|
+
class Markov::Generator
|
5
|
+
|
6
|
+
def initialize(depth)
|
7
|
+
@depth = depth
|
13
8
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
@split_words = /([,.?!])|[\s]/
|
19
|
-
@replace_chars = /[„':;_"()]/
|
20
|
-
|
21
|
-
@dictionary = {}
|
22
|
-
@start_words = {}
|
23
|
-
@unparsed_sentences = []
|
24
|
-
@tokens = []
|
25
|
-
srand
|
26
|
-
end
|
9
|
+
@dictionary = {}
|
10
|
+
@start_words = {}
|
11
|
+
@unparsed_sentences = []
|
12
|
+
@tokens = []
|
27
13
|
|
28
|
-
|
29
|
-
|
14
|
+
srand
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse_text(source)
|
30
18
|
|
31
|
-
|
32
|
-
|
19
|
+
parser = Markov::Parser.new
|
20
|
+
parser.load_text source
|
33
21
|
|
34
|
-
|
35
|
-
|
36
|
-
parse_text
|
37
|
-
end
|
22
|
+
state = :start # :start, :word, :special, :stop
|
23
|
+
word_seq = []
|
38
24
|
|
39
|
-
|
40
|
-
|
41
|
-
if File.exists?(source)
|
42
|
-
sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(@split_sentence)
|
43
|
-
else
|
44
|
-
raise FileNotFoundError.new("#{source} does not exist!")
|
45
|
-
end
|
46
|
-
|
47
|
-
sentences.each do |sentence|
|
48
|
-
add_unparsed_sentence sentence
|
49
|
-
end
|
50
|
-
|
51
|
-
parse_text
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
def generate_sentence(min_length=20)
|
56
|
-
if @dictionary.empty?
|
57
|
-
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
|
58
|
-
end
|
59
|
-
|
60
|
-
tokens = []
|
61
|
-
complete_sentence = false
|
62
|
-
|
63
|
-
# initialize
|
64
|
-
select_start_words.each {|w| tokens << w}
|
65
|
-
prev_token = tokens.last
|
66
|
-
|
67
|
-
begin
|
68
|
-
token = select_next_token tokens.last(@depth-1)
|
69
|
-
|
70
|
-
if token.kind == :stop
|
71
|
-
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
|
72
|
-
tokens << token
|
73
|
-
elsif token.kind == :special
|
74
|
-
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
|
75
|
-
tokens << token
|
76
|
-
elsif token.kind == :noop
|
77
|
-
token = Token.new(".", :stop)
|
78
|
-
tokens[tokens.length-1] = token
|
79
|
-
else
|
80
|
-
tokens << token
|
81
|
-
end
|
25
|
+
begin
|
26
|
+
while token = parser.next_token
|
82
27
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
else
|
90
|
-
complete_sentence = true
|
28
|
+
if state == :start
|
29
|
+
word_seq << token
|
30
|
+
|
31
|
+
# fill the array
|
32
|
+
(@depth-word_seq.size).times do
|
33
|
+
word_seq << parser.next_token
|
91
34
|
end
|
35
|
+
|
36
|
+
# need to store the words in both the dictionary
|
37
|
+
# and the list of start words
|
38
|
+
add_to_start_words word_seq[0, @depth-1]
|
39
|
+
add_to_dictionary word_seq
|
40
|
+
|
41
|
+
token = parser.next_token
|
42
|
+
state = :sentence
|
92
43
|
end
|
93
44
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
def dump_dictionary
|
108
|
-
@dictionary.keys.each do |words|
|
109
|
-
following = @dictionary[words]
|
110
|
-
sentence = "#{words[0]},#{words[1]},"
|
111
|
-
following.each do |s|
|
112
|
-
sentence << "#{s.word},"
|
45
|
+
if state == :sentence
|
46
|
+
# move the array one position
|
47
|
+
word_seq.slice!(0)
|
48
|
+
word_seq << token
|
49
|
+
|
50
|
+
# add to the dictionary
|
51
|
+
add_to_dictionary word_seq
|
52
|
+
|
53
|
+
# stop current sequence and start again
|
54
|
+
if token.kind == :stop
|
55
|
+
word_seq = []
|
56
|
+
state = :start
|
57
|
+
end
|
113
58
|
end
|
114
59
|
|
115
|
-
puts "#{sentence.slice(0,sentence.length-1)}"
|
116
60
|
end
|
61
|
+
rescue => e
|
62
|
+
# nothing to rescue
|
63
|
+
puts e
|
117
64
|
end
|
118
65
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
begin
|
148
|
-
while token = next_token
|
149
|
-
|
150
|
-
if state == :start
|
151
|
-
word_seq << token
|
152
|
-
|
153
|
-
# fill the array
|
154
|
-
(@depth-word_seq.size).times do
|
155
|
-
word_seq << next_token
|
156
|
-
end
|
157
|
-
|
158
|
-
# need to store the words in both the dictionary
|
159
|
-
# and the list of start words
|
160
|
-
add_to_start_words word_seq[0, @depth-1]
|
161
|
-
add_to_dictionary word_seq
|
162
|
-
|
163
|
-
token = next_token
|
164
|
-
state = :sentence
|
165
|
-
end
|
166
|
-
|
167
|
-
if state == :sentence
|
168
|
-
# move the array one position
|
169
|
-
word_seq.slice!(0)
|
170
|
-
word_seq << token
|
171
|
-
|
172
|
-
# add to the dictionary
|
173
|
-
add_to_dictionary word_seq
|
174
|
-
|
175
|
-
# stop current sequence and start again
|
176
|
-
if token.kind == :stop
|
177
|
-
word_seq = []
|
178
|
-
state = :start
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
end # end while
|
183
|
-
|
184
|
-
rescue
|
185
|
-
# nothing to rescue
|
66
|
+
end # end parse_text
|
67
|
+
|
68
|
+
def generate_sentence(min_length=20)
|
69
|
+
if @dictionary.empty?
|
70
|
+
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
|
71
|
+
end
|
72
|
+
|
73
|
+
tokens = []
|
74
|
+
complete_sentence = false
|
75
|
+
|
76
|
+
# initialize
|
77
|
+
select_start_words.each {|w| tokens << w}
|
78
|
+
prev_token = tokens.last
|
79
|
+
|
80
|
+
begin
|
81
|
+
token = select_next_token tokens.last(@depth-1)
|
82
|
+
|
83
|
+
if token.kind == :stop
|
84
|
+
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
|
85
|
+
tokens << token
|
86
|
+
elsif token.kind == :special
|
87
|
+
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
|
88
|
+
tokens << token
|
89
|
+
elsif token.kind == :noop
|
90
|
+
token = Token.new(".", :stop)
|
91
|
+
tokens[tokens.length-1] = token
|
92
|
+
else
|
93
|
+
tokens << token
|
186
94
|
end
|
187
95
|
|
188
|
-
|
189
|
-
|
190
|
-
def next_token
|
96
|
+
prev_token = token
|
191
97
|
|
192
|
-
if
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
if word.include?(",")
|
198
|
-
@tokens << Token.new(",", :special)
|
199
|
-
elsif word.include?("?")
|
200
|
-
@tokens << Token.new("?", :stop)
|
201
|
-
elsif word.include?("!")
|
202
|
-
@tokens << Token.new("!", :stop)
|
203
|
-
elsif word.include?(".")
|
204
|
-
@tokens << Token.new(".", :stop)
|
205
|
-
elsif word == ""
|
206
|
-
# skip blanks
|
207
|
-
else
|
208
|
-
@tokens << Token.new(word, :word)
|
209
|
-
end
|
210
|
-
end
|
98
|
+
if token.kind == :stop
|
99
|
+
if tokens.size < min_length
|
100
|
+
select_start_words.each {|w| tokens << w}
|
101
|
+
prev_token = tokens.last
|
211
102
|
else
|
212
|
-
|
103
|
+
complete_sentence = true
|
213
104
|
end
|
214
105
|
end
|
215
106
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
nil
|
220
|
-
end # end next_token
|
107
|
+
# circuit-breaker
|
108
|
+
complete_sentence = true if tokens.size > min_length*2
|
109
|
+
end until complete_sentence
|
221
110
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
end
|
229
|
-
|
230
|
-
end
|
231
|
-
|
232
|
-
def add_to_start_words(tokens)
|
233
|
-
return if tokens[0].kind != :word
|
234
|
-
|
235
|
-
tokens[0].word = tokens[0].word.capitalize
|
236
|
-
start_words = tokens_to_words tokens
|
237
|
-
|
238
|
-
@start_words[start_words] ||= tokens
|
239
|
-
|
240
|
-
end
|
241
|
-
|
242
|
-
def add_to_dictionary(tokens)
|
243
|
-
token = tokens.last
|
244
|
-
return if token.word == ""
|
245
|
-
|
246
|
-
key_words = tokens_to_words tokens[0, @depth-1]
|
247
|
-
|
248
|
-
@dictionary[key_words] ||= []
|
249
|
-
@dictionary[key_words] << token
|
111
|
+
tokens_to_sentence tokens
|
112
|
+
end #end generate_sentence
|
113
|
+
|
114
|
+
def dump_startwords
|
115
|
+
@start_words.keys.each do |start_words|
|
116
|
+
puts "#{start_words}"
|
250
117
|
end
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
118
|
+
end
|
119
|
+
|
120
|
+
def dump_dictionary
|
121
|
+
@dictionary.keys.each do |keys|
|
122
|
+
following = @dictionary[keys]
|
123
|
+
sentence = []
|
124
|
+
following.each do |word|
|
125
|
+
sentence << "#{word.to_s},"
|
256
126
|
end
|
257
|
-
|
127
|
+
s = sentence.join(" ")
|
128
|
+
puts "#{keys} => #{s.slice(0,s.length-1)}"
|
258
129
|
end
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
else
|
266
|
-
s << " " + t.word
|
267
|
-
end
|
268
|
-
end
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def add_to_start_words(tokens)
|
135
|
+
return if tokens[0].kind != :word
|
269
136
|
|
270
|
-
|
271
|
-
|
137
|
+
tokens[0].word = tokens[0].word.capitalize
|
138
|
+
start_words = tokens_to_words tokens
|
272
139
|
|
273
|
-
|
274
|
-
|
275
|
-
|
140
|
+
@start_words[start_words] ||= tokens
|
141
|
+
end
|
142
|
+
|
143
|
+
def add_to_dictionary(tokens)
|
144
|
+
token = tokens.last
|
145
|
+
return if token.word == ""
|
276
146
|
|
277
|
-
|
278
|
-
token = @dictionary[ tokens_to_words(tokens)]
|
279
|
-
|
280
|
-
return Token.new("X", :noop) if token == nil
|
281
|
-
token[random_number(tokens.length-1)]
|
282
|
-
end
|
147
|
+
key_words = tokens_to_words tokens[0, @depth-1]
|
283
148
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
149
|
+
@dictionary[key_words] ||= []
|
150
|
+
@dictionary[key_words] << token
|
151
|
+
end
|
152
|
+
|
153
|
+
def tokens_to_words(tokens)
|
154
|
+
words = []
|
155
|
+
tokens.each do |t|
|
156
|
+
words << t.word
|
290
157
|
end
|
291
|
-
|
292
|
-
|
293
|
-
|
158
|
+
words
|
159
|
+
end
|
160
|
+
|
161
|
+
def tokens_to_sentence(tokens)
|
162
|
+
s = ""
|
163
|
+
tokens.each do |t|
|
164
|
+
if t.kind != :word
|
165
|
+
s << t.word
|
166
|
+
else
|
167
|
+
s << " " + t.word
|
168
|
+
end
|
294
169
|
end
|
170
|
+
|
171
|
+
s[1, s.length-1]
|
172
|
+
end
|
173
|
+
|
174
|
+
def select_start_words
|
175
|
+
@start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
|
176
|
+
end
|
177
|
+
|
178
|
+
def select_next_token(tokens)
|
179
|
+
token = @dictionary[ tokens_to_words(tokens)]
|
180
|
+
|
181
|
+
return Token.new("X", :noop) if token == nil
|
182
|
+
token[random_number(tokens.length-1)]
|
183
|
+
end
|
184
|
+
|
185
|
+
def select_next_word(tokens)
|
186
|
+
token = nil
|
187
|
+
begin
|
188
|
+
token = select_next_token(tokens)
|
189
|
+
end until token.kind == :word
|
190
|
+
token
|
191
|
+
end
|
192
|
+
|
193
|
+
def random_number(upper_limit)
|
194
|
+
(SecureRandom.random_number * upper_limit).to_i
|
295
195
|
end
|
296
196
|
|
297
|
-
end
|
197
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
|
2
|
+
class Markov::Parser
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@split_sentence = /(?<=[.?!])\s+/
|
6
|
+
@split_words = /([,.?!])|[\s]/
|
7
|
+
@replace_chars = /[„':;_"()]/
|
8
|
+
|
9
|
+
@unparsed_sentences = []
|
10
|
+
@tokens = []
|
11
|
+
end
|
12
|
+
|
13
|
+
class FileNotFoundError < Exception # :nodoc:
|
14
|
+
end
|
15
|
+
|
16
|
+
class EmptyDictionaryError < Exception # :nodoc:
|
17
|
+
end
|
18
|
+
|
19
|
+
def load_text(source)
|
20
|
+
|
21
|
+
if File.exists?(source)
|
22
|
+
sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(@split_sentence)
|
23
|
+
else
|
24
|
+
raise FileNotFoundError.new("#{source} does not exist!")
|
25
|
+
end
|
26
|
+
|
27
|
+
sentences.each do |sentence|
|
28
|
+
add_unparsed_sentence sentence
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def next_token
|
34
|
+
|
35
|
+
if @tokens.empty?
|
36
|
+
sentence = @unparsed_sentences.slice!(0)
|
37
|
+
if sentence
|
38
|
+
sentence.each do |word|
|
39
|
+
|
40
|
+
if word.include?(",")
|
41
|
+
@tokens << Markov::Token.new(",", :special)
|
42
|
+
elsif word.include?("?")
|
43
|
+
@tokens << Markov::Token.new("?", :stop)
|
44
|
+
elsif word.include?("!")
|
45
|
+
@tokens << Markov::Token.new("!", :stop)
|
46
|
+
elsif word.include?(".")
|
47
|
+
@tokens << Markov::Token.new(".", :stop)
|
48
|
+
elsif word == ""
|
49
|
+
# skip blanks
|
50
|
+
else
|
51
|
+
@tokens << Markov::Token.new(word, :word)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
else
|
55
|
+
@tokens = nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
return @tokens.slice!(0) if @tokens
|
60
|
+
|
61
|
+
@tokens = []
|
62
|
+
nil
|
63
|
+
end # end next_token
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def add_unparsed_sentence(sentence)
|
68
|
+
|
69
|
+
sentence.gsub!(@replace_chars, "")
|
70
|
+
words = sentence.split(@split_words)
|
71
|
+
if words && !words.empty?
|
72
|
+
@unparsed_sentences << words
|
73
|
+
end
|
74
|
+
|
75
|
+
end # add_unparsed_sentence
|
76
|
+
|
77
|
+
end
|
data/lib/markov/token.rb
ADDED
data/markov-generator.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: markov-generator 0.
|
5
|
+
# stub: markov-generator 0.10.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "markov-generator"
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "0.10.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Michael Kuehl"]
|
14
|
-
s.date = "
|
14
|
+
s.date = "2016-01-11"
|
15
15
|
s.description = "A Markov Chain text generator library"
|
16
16
|
s.email = "hello@ratchet.cc"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -20,23 +20,27 @@ Gem::Specification.new do |s|
|
|
20
20
|
]
|
21
21
|
s.files = [
|
22
22
|
".document",
|
23
|
+
".ruby-gemset",
|
24
|
+
".ruby-version",
|
23
25
|
"Gemfile",
|
24
26
|
"Gemfile.lock",
|
25
27
|
"LICENSE.txt",
|
26
28
|
"README.rdoc",
|
27
29
|
"Rakefile",
|
28
30
|
"VERSION",
|
31
|
+
"lib/markov.rb",
|
29
32
|
"lib/markov/generator.rb",
|
33
|
+
"lib/markov/parser.rb",
|
34
|
+
"lib/markov/token.rb",
|
30
35
|
"markov-generator.gemspec",
|
31
|
-
"test/file_parser_test.rb",
|
32
|
-
"test/file_parser_test.txt",
|
33
36
|
"test/generator_test.rb",
|
34
|
-
"test/
|
35
|
-
"test/
|
37
|
+
"test/test_bulk_markov.rb",
|
38
|
+
"test/test_markov.rb",
|
39
|
+
"test/test_parser.rb"
|
36
40
|
]
|
37
41
|
s.homepage = "http://github.com/ratchetcc/markov-generator"
|
38
42
|
s.licenses = ["MIT"]
|
39
|
-
s.rubygems_version = "2.4.
|
43
|
+
s.rubygems_version = "2.4.8"
|
40
44
|
s.summary = "Markov Chain text generator"
|
41
45
|
|
42
46
|
if s.respond_to? :specification_version then
|
data/test/generator_test.rb
CHANGED
@@ -5,8 +5,8 @@ markov = Markov::Generator.new
|
|
5
5
|
markov.parse_source_file "./generator_test2.txt"
|
6
6
|
markov.parse_source_file "./generator_test1.txt"
|
7
7
|
|
8
|
-
markov.dump_dictionary
|
9
|
-
markov.dump_start_words
|
8
|
+
#markov.dump_dictionary
|
9
|
+
#markov.dump_start_words
|
10
10
|
markov.dump_dictionary_stats
|
11
11
|
|
12
12
|
1..5.times do
|
@@ -0,0 +1,19 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
|
3
|
+
require 'markov'
|
4
|
+
|
5
|
+
source_dir = ARGV[0]
|
6
|
+
|
7
|
+
markov = Markov.generator(3)
|
8
|
+
|
9
|
+
Dir["#{source_dir}/*.txt"].each do | f |
|
10
|
+
puts "*** Analyzing '#{f}' "
|
11
|
+
markov.parse_text f
|
12
|
+
end
|
13
|
+
|
14
|
+
#markov.dump_startwords
|
15
|
+
#markov.dump_dictionary
|
16
|
+
|
17
|
+
1..5.times do
|
18
|
+
puts "\n#{markov.generate_sentence}"
|
19
|
+
end
|
data/test/test_markov.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
|
3
|
+
require 'markov'
|
4
|
+
|
5
|
+
source = ARGV[0]
|
6
|
+
|
7
|
+
markov = Markov.generator(3)
|
8
|
+
markov.parse_text source
|
9
|
+
|
10
|
+
#markov.dump_startwords
|
11
|
+
markov.dump_dictionary
|
12
|
+
|
13
|
+
puts "#{markov.generate_sentence}"
|
data/test/test_parser.rb
ADDED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: markov-generator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Kuehl
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: shoulda
|
@@ -89,19 +89,23 @@ extra_rdoc_files:
|
|
89
89
|
- README.rdoc
|
90
90
|
files:
|
91
91
|
- ".document"
|
92
|
+
- ".ruby-gemset"
|
93
|
+
- ".ruby-version"
|
92
94
|
- Gemfile
|
93
95
|
- Gemfile.lock
|
94
96
|
- LICENSE.txt
|
95
97
|
- README.rdoc
|
96
98
|
- Rakefile
|
97
99
|
- VERSION
|
100
|
+
- lib/markov.rb
|
98
101
|
- lib/markov/generator.rb
|
102
|
+
- lib/markov/parser.rb
|
103
|
+
- lib/markov/token.rb
|
99
104
|
- markov-generator.gemspec
|
100
|
-
- test/file_parser_test.rb
|
101
|
-
- test/file_parser_test.txt
|
102
105
|
- test/generator_test.rb
|
103
|
-
- test/
|
104
|
-
- test/
|
106
|
+
- test/test_bulk_markov.rb
|
107
|
+
- test/test_markov.rb
|
108
|
+
- test/test_parser.rb
|
105
109
|
homepage: http://github.com/ratchetcc/markov-generator
|
106
110
|
licenses:
|
107
111
|
- MIT
|
@@ -122,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
126
|
version: '0'
|
123
127
|
requirements: []
|
124
128
|
rubyforge_project:
|
125
|
-
rubygems_version: 2.4.
|
129
|
+
rubygems_version: 2.4.8
|
126
130
|
signing_key:
|
127
131
|
specification_version: 4
|
128
132
|
summary: Markov Chain text generator
|
data/test/file_parser_test.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'markov/generator'
|
3
|
-
|
4
|
-
#markov = Markov::Generator.new
|
5
|
-
#markov.parse_source_file "./test_seed.txt"
|
6
|
-
|
7
|
-
#puts "#{markov.generate_sentence}"
|
8
|
-
|
9
|
-
#@split_words = /([',.?!\n-])|[\s]+/
|
10
|
-
#@split_sentence = /(?<=[.!?\n])\s+/
|
11
|
-
|
12
|
-
split_sentence = /(?<=[.?!])\s+/
|
13
|
-
split_words = /([,.?!])|[\s]/
|
14
|
-
replace_chars = /[„':;_"()]/
|
15
|
-
|
16
|
-
#source = "./file_parser_test.txt"
|
17
|
-
source = "./seed_alts1.txt"
|
18
|
-
|
19
|
-
sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(split_sentence)
|
20
|
-
|
21
|
-
sentences.each do |sentence|
|
22
|
-
puts sentence
|
23
|
-
puts sentence.gsub!( replace_chars, "")
|
24
|
-
puts "#{sentence.split(split_words)}"
|
25
|
-
end
|
data/test/file_parser_test.txt
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
The most merciful thing in the world, I think, is the inability of the human mind to correlate all its contents. We live on a placid island of
|
2
|
-
ignorance, in the midst of black seas of infinity-and it was not meant that we should voyage far.
|
3
|
-
|
4
|
-
The_sciences, each straining in - its own
|
5
|
-
direction, "have", hitherto harmed us little; but (some day) the piecing
|
6
|
-
together of dissociated "knowledge" will open up such terrifying vistas of
|
7
|
-
reality, and of our frightful 'position' therein, that we shall either go
|
8
|
-
mad from the revelation or flee from the light into the peace and safety
|
9
|
-
of a new dark age.
|
10
|
-
|
11
|
-
Where do we go? Nowhere! she said.
|
data/test/helper.rb
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
require 'simplecov'
|
2
|
-
|
3
|
-
module SimpleCov::Configuration
|
4
|
-
def clean_filters
|
5
|
-
@filters = []
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
SimpleCov.configure do
|
10
|
-
clean_filters
|
11
|
-
load_adapter 'test_frameworks'
|
12
|
-
end
|
13
|
-
|
14
|
-
ENV["COVERAGE"] && SimpleCov.start do
|
15
|
-
add_filter "/.rvm/"
|
16
|
-
end
|
17
|
-
require 'rubygems'
|
18
|
-
require 'bundler'
|
19
|
-
begin
|
20
|
-
Bundler.setup(:default, :development)
|
21
|
-
rescue Bundler::BundlerError => e
|
22
|
-
$stderr.puts e.message
|
23
|
-
$stderr.puts "Run `bundle install` to install missing gems"
|
24
|
-
exit e.status_code
|
25
|
-
end
|
26
|
-
require 'test/unit'
|
27
|
-
require 'shoulda'
|
28
|
-
|
29
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
30
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
31
|
-
require 'markov_generator'
|
32
|
-
|
33
|
-
class Test::Unit::TestCase
|
34
|
-
end
|