markov-generator 0.9.2 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/VERSION +1 -1
- data/lib/markov.rb +13 -0
- data/lib/markov/generator.rb +161 -261
- data/lib/markov/parser.rb +77 -0
- data/lib/markov/token.rb +11 -0
- data/markov-generator.gemspec +12 -8
- data/test/generator_test.rb +2 -2
- data/test/test_bulk_markov.rb +19 -0
- data/test/test_markov.rb +13 -0
- data/test/test_parser.rb +13 -0
- metadata +11 -7
- data/test/file_parser_test.rb +0 -25
- data/test/file_parser_test.txt +0 -11
- data/test/helper.rb +0 -34
- data/test/test_markov_generator.rb +0 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 09d0c8e708f75e9c610108c700c74fb6f9db6dbc
|
4
|
+
data.tar.gz: 7173ff93d857e356b0149a33de3743392264cff5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 53cf62b9c8ec50f55112fe94e6e55eac01e3ebb5ffcff56c7a896f148761ce681d4f6968bf42190bceecf8175ea8b58fa0a8af4f13806e42b1852e2cf667f17d
|
7
|
+
data.tar.gz: c3d757b5b3841b47ae2df2b7e0236fb03ca7fadf60556cad53a245727aadbbe808859f03855d2567a52d6c66f3d6d90d2bf1ff46f12d3607ef87608a382f4ff8
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
markov
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.2.4
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.10.0
|
data/lib/markov.rb
ADDED
data/lib/markov/generator.rb
CHANGED
@@ -1,297 +1,197 @@
|
|
1
1
|
|
2
2
|
require 'securerandom'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
end
|
9
|
-
|
10
|
-
class Generator
|
11
|
-
|
12
|
-
attr_reader :depth
|
4
|
+
class Markov::Generator
|
5
|
+
|
6
|
+
def initialize(depth)
|
7
|
+
@depth = depth
|
13
8
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
@split_words = /([,.?!])|[\s]/
|
19
|
-
@replace_chars = /[„':;_"()]/
|
20
|
-
|
21
|
-
@dictionary = {}
|
22
|
-
@start_words = {}
|
23
|
-
@unparsed_sentences = []
|
24
|
-
@tokens = []
|
25
|
-
srand
|
26
|
-
end
|
9
|
+
@dictionary = {}
|
10
|
+
@start_words = {}
|
11
|
+
@unparsed_sentences = []
|
12
|
+
@tokens = []
|
27
13
|
|
28
|
-
|
29
|
-
|
14
|
+
srand
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse_text(source)
|
30
18
|
|
31
|
-
|
32
|
-
|
19
|
+
parser = Markov::Parser.new
|
20
|
+
parser.load_text source
|
33
21
|
|
34
|
-
|
35
|
-
|
36
|
-
parse_text
|
37
|
-
end
|
22
|
+
state = :start # :start, :word, :special, :stop
|
23
|
+
word_seq = []
|
38
24
|
|
39
|
-
|
40
|
-
|
41
|
-
if File.exists?(source)
|
42
|
-
sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(@split_sentence)
|
43
|
-
else
|
44
|
-
raise FileNotFoundError.new("#{source} does not exist!")
|
45
|
-
end
|
46
|
-
|
47
|
-
sentences.each do |sentence|
|
48
|
-
add_unparsed_sentence sentence
|
49
|
-
end
|
50
|
-
|
51
|
-
parse_text
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
def generate_sentence(min_length=20)
|
56
|
-
if @dictionary.empty?
|
57
|
-
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
|
58
|
-
end
|
59
|
-
|
60
|
-
tokens = []
|
61
|
-
complete_sentence = false
|
62
|
-
|
63
|
-
# initialize
|
64
|
-
select_start_words.each {|w| tokens << w}
|
65
|
-
prev_token = tokens.last
|
66
|
-
|
67
|
-
begin
|
68
|
-
token = select_next_token tokens.last(@depth-1)
|
69
|
-
|
70
|
-
if token.kind == :stop
|
71
|
-
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
|
72
|
-
tokens << token
|
73
|
-
elsif token.kind == :special
|
74
|
-
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
|
75
|
-
tokens << token
|
76
|
-
elsif token.kind == :noop
|
77
|
-
token = Token.new(".", :stop)
|
78
|
-
tokens[tokens.length-1] = token
|
79
|
-
else
|
80
|
-
tokens << token
|
81
|
-
end
|
25
|
+
begin
|
26
|
+
while token = parser.next_token
|
82
27
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
else
|
90
|
-
complete_sentence = true
|
28
|
+
if state == :start
|
29
|
+
word_seq << token
|
30
|
+
|
31
|
+
# fill the array
|
32
|
+
(@depth-word_seq.size).times do
|
33
|
+
word_seq << parser.next_token
|
91
34
|
end
|
35
|
+
|
36
|
+
# need to store the words in both the dictionary
|
37
|
+
# and the list of start words
|
38
|
+
add_to_start_words word_seq[0, @depth-1]
|
39
|
+
add_to_dictionary word_seq
|
40
|
+
|
41
|
+
token = parser.next_token
|
42
|
+
state = :sentence
|
92
43
|
end
|
93
44
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
def dump_dictionary
|
108
|
-
@dictionary.keys.each do |words|
|
109
|
-
following = @dictionary[words]
|
110
|
-
sentence = "#{words[0]},#{words[1]},"
|
111
|
-
following.each do |s|
|
112
|
-
sentence << "#{s.word},"
|
45
|
+
if state == :sentence
|
46
|
+
# move the array one position
|
47
|
+
word_seq.slice!(0)
|
48
|
+
word_seq << token
|
49
|
+
|
50
|
+
# add to the dictionary
|
51
|
+
add_to_dictionary word_seq
|
52
|
+
|
53
|
+
# stop current sequence and start again
|
54
|
+
if token.kind == :stop
|
55
|
+
word_seq = []
|
56
|
+
state = :start
|
57
|
+
end
|
113
58
|
end
|
114
59
|
|
115
|
-
puts "#{sentence.slice(0,sentence.length-1)}"
|
116
60
|
end
|
61
|
+
rescue => e
|
62
|
+
# nothing to rescue
|
63
|
+
puts e
|
117
64
|
end
|
118
65
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
begin
|
148
|
-
while token = next_token
|
149
|
-
|
150
|
-
if state == :start
|
151
|
-
word_seq << token
|
152
|
-
|
153
|
-
# fill the array
|
154
|
-
(@depth-word_seq.size).times do
|
155
|
-
word_seq << next_token
|
156
|
-
end
|
157
|
-
|
158
|
-
# need to store the words in both the dictionary
|
159
|
-
# and the list of start words
|
160
|
-
add_to_start_words word_seq[0, @depth-1]
|
161
|
-
add_to_dictionary word_seq
|
162
|
-
|
163
|
-
token = next_token
|
164
|
-
state = :sentence
|
165
|
-
end
|
166
|
-
|
167
|
-
if state == :sentence
|
168
|
-
# move the array one position
|
169
|
-
word_seq.slice!(0)
|
170
|
-
word_seq << token
|
171
|
-
|
172
|
-
# add to the dictionary
|
173
|
-
add_to_dictionary word_seq
|
174
|
-
|
175
|
-
# stop current sequence and start again
|
176
|
-
if token.kind == :stop
|
177
|
-
word_seq = []
|
178
|
-
state = :start
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
end # end while
|
183
|
-
|
184
|
-
rescue
|
185
|
-
# nothing to rescue
|
66
|
+
end # end parse_text
|
67
|
+
|
68
|
+
def generate_sentence(min_length=20)
|
69
|
+
if @dictionary.empty?
|
70
|
+
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
|
71
|
+
end
|
72
|
+
|
73
|
+
tokens = []
|
74
|
+
complete_sentence = false
|
75
|
+
|
76
|
+
# initialize
|
77
|
+
select_start_words.each {|w| tokens << w}
|
78
|
+
prev_token = tokens.last
|
79
|
+
|
80
|
+
begin
|
81
|
+
token = select_next_token tokens.last(@depth-1)
|
82
|
+
|
83
|
+
if token.kind == :stop
|
84
|
+
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
|
85
|
+
tokens << token
|
86
|
+
elsif token.kind == :special
|
87
|
+
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
|
88
|
+
tokens << token
|
89
|
+
elsif token.kind == :noop
|
90
|
+
token = Token.new(".", :stop)
|
91
|
+
tokens[tokens.length-1] = token
|
92
|
+
else
|
93
|
+
tokens << token
|
186
94
|
end
|
187
95
|
|
188
|
-
|
189
|
-
|
190
|
-
def next_token
|
96
|
+
prev_token = token
|
191
97
|
|
192
|
-
if
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
if word.include?(",")
|
198
|
-
@tokens << Token.new(",", :special)
|
199
|
-
elsif word.include?("?")
|
200
|
-
@tokens << Token.new("?", :stop)
|
201
|
-
elsif word.include?("!")
|
202
|
-
@tokens << Token.new("!", :stop)
|
203
|
-
elsif word.include?(".")
|
204
|
-
@tokens << Token.new(".", :stop)
|
205
|
-
elsif word == ""
|
206
|
-
# skip blanks
|
207
|
-
else
|
208
|
-
@tokens << Token.new(word, :word)
|
209
|
-
end
|
210
|
-
end
|
98
|
+
if token.kind == :stop
|
99
|
+
if tokens.size < min_length
|
100
|
+
select_start_words.each {|w| tokens << w}
|
101
|
+
prev_token = tokens.last
|
211
102
|
else
|
212
|
-
|
103
|
+
complete_sentence = true
|
213
104
|
end
|
214
105
|
end
|
215
106
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
nil
|
220
|
-
end # end next_token
|
107
|
+
# circuit-breaker
|
108
|
+
complete_sentence = true if tokens.size > min_length*2
|
109
|
+
end until complete_sentence
|
221
110
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
end
|
229
|
-
|
230
|
-
end
|
231
|
-
|
232
|
-
def add_to_start_words(tokens)
|
233
|
-
return if tokens[0].kind != :word
|
234
|
-
|
235
|
-
tokens[0].word = tokens[0].word.capitalize
|
236
|
-
start_words = tokens_to_words tokens
|
237
|
-
|
238
|
-
@start_words[start_words] ||= tokens
|
239
|
-
|
240
|
-
end
|
241
|
-
|
242
|
-
def add_to_dictionary(tokens)
|
243
|
-
token = tokens.last
|
244
|
-
return if token.word == ""
|
245
|
-
|
246
|
-
key_words = tokens_to_words tokens[0, @depth-1]
|
247
|
-
|
248
|
-
@dictionary[key_words] ||= []
|
249
|
-
@dictionary[key_words] << token
|
111
|
+
tokens_to_sentence tokens
|
112
|
+
end #end generate_sentence
|
113
|
+
|
114
|
+
def dump_startwords
|
115
|
+
@start_words.keys.each do |start_words|
|
116
|
+
puts "#{start_words}"
|
250
117
|
end
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
118
|
+
end
|
119
|
+
|
120
|
+
def dump_dictionary
|
121
|
+
@dictionary.keys.each do |keys|
|
122
|
+
following = @dictionary[keys]
|
123
|
+
sentence = []
|
124
|
+
following.each do |word|
|
125
|
+
sentence << "#{word.to_s},"
|
256
126
|
end
|
257
|
-
|
127
|
+
s = sentence.join(" ")
|
128
|
+
puts "#{keys} => #{s.slice(0,s.length-1)}"
|
258
129
|
end
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
else
|
266
|
-
s << " " + t.word
|
267
|
-
end
|
268
|
-
end
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def add_to_start_words(tokens)
|
135
|
+
return if tokens[0].kind != :word
|
269
136
|
|
270
|
-
|
271
|
-
|
137
|
+
tokens[0].word = tokens[0].word.capitalize
|
138
|
+
start_words = tokens_to_words tokens
|
272
139
|
|
273
|
-
|
274
|
-
|
275
|
-
|
140
|
+
@start_words[start_words] ||= tokens
|
141
|
+
end
|
142
|
+
|
143
|
+
def add_to_dictionary(tokens)
|
144
|
+
token = tokens.last
|
145
|
+
return if token.word == ""
|
276
146
|
|
277
|
-
|
278
|
-
token = @dictionary[ tokens_to_words(tokens)]
|
279
|
-
|
280
|
-
return Token.new("X", :noop) if token == nil
|
281
|
-
token[random_number(tokens.length-1)]
|
282
|
-
end
|
147
|
+
key_words = tokens_to_words tokens[0, @depth-1]
|
283
148
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
149
|
+
@dictionary[key_words] ||= []
|
150
|
+
@dictionary[key_words] << token
|
151
|
+
end
|
152
|
+
|
153
|
+
def tokens_to_words(tokens)
|
154
|
+
words = []
|
155
|
+
tokens.each do |t|
|
156
|
+
words << t.word
|
290
157
|
end
|
291
|
-
|
292
|
-
|
293
|
-
|
158
|
+
words
|
159
|
+
end
|
160
|
+
|
161
|
+
def tokens_to_sentence(tokens)
|
162
|
+
s = ""
|
163
|
+
tokens.each do |t|
|
164
|
+
if t.kind != :word
|
165
|
+
s << t.word
|
166
|
+
else
|
167
|
+
s << " " + t.word
|
168
|
+
end
|
294
169
|
end
|
170
|
+
|
171
|
+
s[1, s.length-1]
|
172
|
+
end
|
173
|
+
|
174
|
+
def select_start_words
|
175
|
+
@start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
|
176
|
+
end
|
177
|
+
|
178
|
+
def select_next_token(tokens)
|
179
|
+
token = @dictionary[ tokens_to_words(tokens)]
|
180
|
+
|
181
|
+
return Token.new("X", :noop) if token == nil
|
182
|
+
token[random_number(tokens.length-1)]
|
183
|
+
end
|
184
|
+
|
185
|
+
def select_next_word(tokens)
|
186
|
+
token = nil
|
187
|
+
begin
|
188
|
+
token = select_next_token(tokens)
|
189
|
+
end until token.kind == :word
|
190
|
+
token
|
191
|
+
end
|
192
|
+
|
193
|
+
def random_number(upper_limit)
|
194
|
+
(SecureRandom.random_number * upper_limit).to_i
|
295
195
|
end
|
296
196
|
|
297
|
-
end
|
197
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
|
2
|
+
class Markov::Parser
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@split_sentence = /(?<=[.?!])\s+/
|
6
|
+
@split_words = /([,.?!])|[\s]/
|
7
|
+
@replace_chars = /[„':;_"()]/
|
8
|
+
|
9
|
+
@unparsed_sentences = []
|
10
|
+
@tokens = []
|
11
|
+
end
|
12
|
+
|
13
|
+
class FileNotFoundError < Exception # :nodoc:
|
14
|
+
end
|
15
|
+
|
16
|
+
class EmptyDictionaryError < Exception # :nodoc:
|
17
|
+
end
|
18
|
+
|
19
|
+
def load_text(source)
|
20
|
+
|
21
|
+
if File.exists?(source)
|
22
|
+
sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(@split_sentence)
|
23
|
+
else
|
24
|
+
raise FileNotFoundError.new("#{source} does not exist!")
|
25
|
+
end
|
26
|
+
|
27
|
+
sentences.each do |sentence|
|
28
|
+
add_unparsed_sentence sentence
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def next_token
|
34
|
+
|
35
|
+
if @tokens.empty?
|
36
|
+
sentence = @unparsed_sentences.slice!(0)
|
37
|
+
if sentence
|
38
|
+
sentence.each do |word|
|
39
|
+
|
40
|
+
if word.include?(",")
|
41
|
+
@tokens << Markov::Token.new(",", :special)
|
42
|
+
elsif word.include?("?")
|
43
|
+
@tokens << Markov::Token.new("?", :stop)
|
44
|
+
elsif word.include?("!")
|
45
|
+
@tokens << Markov::Token.new("!", :stop)
|
46
|
+
elsif word.include?(".")
|
47
|
+
@tokens << Markov::Token.new(".", :stop)
|
48
|
+
elsif word == ""
|
49
|
+
# skip blanks
|
50
|
+
else
|
51
|
+
@tokens << Markov::Token.new(word, :word)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
else
|
55
|
+
@tokens = nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
return @tokens.slice!(0) if @tokens
|
60
|
+
|
61
|
+
@tokens = []
|
62
|
+
nil
|
63
|
+
end # end next_token
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def add_unparsed_sentence(sentence)
|
68
|
+
|
69
|
+
sentence.gsub!(@replace_chars, "")
|
70
|
+
words = sentence.split(@split_words)
|
71
|
+
if words && !words.empty?
|
72
|
+
@unparsed_sentences << words
|
73
|
+
end
|
74
|
+
|
75
|
+
end # add_unparsed_sentence
|
76
|
+
|
77
|
+
end
|
data/lib/markov/token.rb
ADDED
data/markov-generator.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: markov-generator 0.
|
5
|
+
# stub: markov-generator 0.10.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "markov-generator"
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "0.10.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Michael Kuehl"]
|
14
|
-
s.date = "
|
14
|
+
s.date = "2016-01-11"
|
15
15
|
s.description = "A Markov Chain text generator library"
|
16
16
|
s.email = "hello@ratchet.cc"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -20,23 +20,27 @@ Gem::Specification.new do |s|
|
|
20
20
|
]
|
21
21
|
s.files = [
|
22
22
|
".document",
|
23
|
+
".ruby-gemset",
|
24
|
+
".ruby-version",
|
23
25
|
"Gemfile",
|
24
26
|
"Gemfile.lock",
|
25
27
|
"LICENSE.txt",
|
26
28
|
"README.rdoc",
|
27
29
|
"Rakefile",
|
28
30
|
"VERSION",
|
31
|
+
"lib/markov.rb",
|
29
32
|
"lib/markov/generator.rb",
|
33
|
+
"lib/markov/parser.rb",
|
34
|
+
"lib/markov/token.rb",
|
30
35
|
"markov-generator.gemspec",
|
31
|
-
"test/file_parser_test.rb",
|
32
|
-
"test/file_parser_test.txt",
|
33
36
|
"test/generator_test.rb",
|
34
|
-
"test/
|
35
|
-
"test/
|
37
|
+
"test/test_bulk_markov.rb",
|
38
|
+
"test/test_markov.rb",
|
39
|
+
"test/test_parser.rb"
|
36
40
|
]
|
37
41
|
s.homepage = "http://github.com/ratchetcc/markov-generator"
|
38
42
|
s.licenses = ["MIT"]
|
39
|
-
s.rubygems_version = "2.4.
|
43
|
+
s.rubygems_version = "2.4.8"
|
40
44
|
s.summary = "Markov Chain text generator"
|
41
45
|
|
42
46
|
if s.respond_to? :specification_version then
|
data/test/generator_test.rb
CHANGED
@@ -5,8 +5,8 @@ markov = Markov::Generator.new
|
|
5
5
|
markov.parse_source_file "./generator_test2.txt"
|
6
6
|
markov.parse_source_file "./generator_test1.txt"
|
7
7
|
|
8
|
-
markov.dump_dictionary
|
9
|
-
markov.dump_start_words
|
8
|
+
#markov.dump_dictionary
|
9
|
+
#markov.dump_start_words
|
10
10
|
markov.dump_dictionary_stats
|
11
11
|
|
12
12
|
1..5.times do
|
@@ -0,0 +1,19 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
|
3
|
+
require 'markov'
|
4
|
+
|
5
|
+
source_dir = ARGV[0]
|
6
|
+
|
7
|
+
markov = Markov.generator(3)
|
8
|
+
|
9
|
+
Dir["#{source_dir}/*.txt"].each do | f |
|
10
|
+
puts "*** Analyzing '#{f}' "
|
11
|
+
markov.parse_text f
|
12
|
+
end
|
13
|
+
|
14
|
+
#markov.dump_startwords
|
15
|
+
#markov.dump_dictionary
|
16
|
+
|
17
|
+
1..5.times do
|
18
|
+
puts "\n#{markov.generate_sentence}"
|
19
|
+
end
|
data/test/test_markov.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
|
3
|
+
require 'markov'
|
4
|
+
|
5
|
+
source = ARGV[0]
|
6
|
+
|
7
|
+
markov = Markov.generator(3)
|
8
|
+
markov.parse_text source
|
9
|
+
|
10
|
+
#markov.dump_startwords
|
11
|
+
markov.dump_dictionary
|
12
|
+
|
13
|
+
puts "#{markov.generate_sentence}"
|
data/test/test_parser.rb
ADDED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: markov-generator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Kuehl
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: shoulda
|
@@ -89,19 +89,23 @@ extra_rdoc_files:
|
|
89
89
|
- README.rdoc
|
90
90
|
files:
|
91
91
|
- ".document"
|
92
|
+
- ".ruby-gemset"
|
93
|
+
- ".ruby-version"
|
92
94
|
- Gemfile
|
93
95
|
- Gemfile.lock
|
94
96
|
- LICENSE.txt
|
95
97
|
- README.rdoc
|
96
98
|
- Rakefile
|
97
99
|
- VERSION
|
100
|
+
- lib/markov.rb
|
98
101
|
- lib/markov/generator.rb
|
102
|
+
- lib/markov/parser.rb
|
103
|
+
- lib/markov/token.rb
|
99
104
|
- markov-generator.gemspec
|
100
|
-
- test/file_parser_test.rb
|
101
|
-
- test/file_parser_test.txt
|
102
105
|
- test/generator_test.rb
|
103
|
-
- test/
|
104
|
-
- test/
|
106
|
+
- test/test_bulk_markov.rb
|
107
|
+
- test/test_markov.rb
|
108
|
+
- test/test_parser.rb
|
105
109
|
homepage: http://github.com/ratchetcc/markov-generator
|
106
110
|
licenses:
|
107
111
|
- MIT
|
@@ -122,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
126
|
version: '0'
|
123
127
|
requirements: []
|
124
128
|
rubyforge_project:
|
125
|
-
rubygems_version: 2.4.
|
129
|
+
rubygems_version: 2.4.8
|
126
130
|
signing_key:
|
127
131
|
specification_version: 4
|
128
132
|
summary: Markov Chain text generator
|
data/test/file_parser_test.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'markov/generator'
|
3
|
-
|
4
|
-
#markov = Markov::Generator.new
|
5
|
-
#markov.parse_source_file "./test_seed.txt"
|
6
|
-
|
7
|
-
#puts "#{markov.generate_sentence}"
|
8
|
-
|
9
|
-
#@split_words = /([',.?!\n-])|[\s]+/
|
10
|
-
#@split_sentence = /(?<=[.!?\n])\s+/
|
11
|
-
|
12
|
-
split_sentence = /(?<=[.?!])\s+/
|
13
|
-
split_words = /([,.?!])|[\s]/
|
14
|
-
replace_chars = /[„':;_"()]/
|
15
|
-
|
16
|
-
#source = "./file_parser_test.txt"
|
17
|
-
source = "./seed_alts1.txt"
|
18
|
-
|
19
|
-
sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(split_sentence)
|
20
|
-
|
21
|
-
sentences.each do |sentence|
|
22
|
-
puts sentence
|
23
|
-
puts sentence.gsub!( replace_chars, "")
|
24
|
-
puts "#{sentence.split(split_words)}"
|
25
|
-
end
|
data/test/file_parser_test.txt
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
The most merciful thing in the world, I think, is the inability of the human mind to correlate all its contents. We live on a placid island of
|
2
|
-
ignorance, in the midst of black seas of infinity-and it was not meant that we should voyage far.
|
3
|
-
|
4
|
-
The_sciences, each straining in - its own
|
5
|
-
direction, "have", hitherto harmed us little; but (some day) the piecing
|
6
|
-
together of dissociated "knowledge" will open up such terrifying vistas of
|
7
|
-
reality, and of our frightful 'position' therein, that we shall either go
|
8
|
-
mad from the revelation or flee from the light into the peace and safety
|
9
|
-
of a new dark age.
|
10
|
-
|
11
|
-
Where do we go? Nowhere! she said.
|
data/test/helper.rb
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
require 'simplecov'
|
2
|
-
|
3
|
-
module SimpleCov::Configuration
|
4
|
-
def clean_filters
|
5
|
-
@filters = []
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
SimpleCov.configure do
|
10
|
-
clean_filters
|
11
|
-
load_adapter 'test_frameworks'
|
12
|
-
end
|
13
|
-
|
14
|
-
ENV["COVERAGE"] && SimpleCov.start do
|
15
|
-
add_filter "/.rvm/"
|
16
|
-
end
|
17
|
-
require 'rubygems'
|
18
|
-
require 'bundler'
|
19
|
-
begin
|
20
|
-
Bundler.setup(:default, :development)
|
21
|
-
rescue Bundler::BundlerError => e
|
22
|
-
$stderr.puts e.message
|
23
|
-
$stderr.puts "Run `bundle install` to install missing gems"
|
24
|
-
exit e.status_code
|
25
|
-
end
|
26
|
-
require 'test/unit'
|
27
|
-
require 'shoulda'
|
28
|
-
|
29
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
30
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
31
|
-
require 'markov_generator'
|
32
|
-
|
33
|
-
class Test::Unit::TestCase
|
34
|
-
end
|