markov-generator 0.10.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 09d0c8e708f75e9c610108c700c74fb6f9db6dbc
4
- data.tar.gz: 7173ff93d857e356b0149a33de3743392264cff5
3
+ metadata.gz: d847256699f3f91abcbf0302c2be407bf97cb653
4
+ data.tar.gz: 19e95af2ef8f231c56f1ac9e76716518c1f8b057
5
5
  SHA512:
6
- metadata.gz: 53cf62b9c8ec50f55112fe94e6e55eac01e3ebb5ffcff56c7a896f148761ce681d4f6968bf42190bceecf8175ea8b58fa0a8af4f13806e42b1852e2cf667f17d
7
- data.tar.gz: c3d757b5b3841b47ae2df2b7e0236fb03ca7fadf60556cad53a245727aadbbe808859f03855d2567a52d6c66f3d6d90d2bf1ff46f12d3607ef87608a382f4ff8
6
+ metadata.gz: f06aef8afefd6f20daeee4cb77d7a72daa87126331caeaa9deeda9d5beef1f77c2908d59167bb91b66e170fa92fdffdb6b464b92506f43c03563a65d98ae0737
7
+ data.tar.gz: 997fbd9590015db2bb8137baee2f83488a1979367ef2e516a322d67f871369d8a0e394d02241e67c76cbb933b34c16977f48226d41ee95fc4d442a33fd386998
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.10.0
1
+ 0.11.0
@@ -1,8 +1,10 @@
1
1
 
2
2
  module Markov
3
3
 
4
+ require 'markov/util'
4
5
  require 'markov/token'
5
6
  require 'markov/parser'
7
+ require 'markov/dictionary'
6
8
  require 'markov/generator'
7
9
 
8
10
  def generator(depth=3)
@@ -0,0 +1,76 @@
1
+
2
+ require 'securerandom'
3
+
4
+ class Markov::Dictionary
5
+ include Markov::Util
6
+
7
+ def initialize(depth)
8
+ @depth = depth
9
+
10
+ @dictionary = {}
11
+ @start_words = {}
12
+
13
+ srand
14
+ end
15
+
16
+ def empty?
17
+ @dictionary.empty?
18
+ end
19
+
20
+ def dump_startwords
21
+ @start_words.keys.each do |start_words|
22
+ puts "#{start_words} -> #{tokens_to_sentence @dictionary[start_words]}"
23
+ end
24
+ end
25
+
26
+ def dump_dictionary
27
+ @dictionary.keys.each do |keys|
28
+ following = @dictionary[keys]
29
+ sentence = []
30
+ following.each do |word|
31
+ sentence << "#{word.to_s},"
32
+ end
33
+ s = sentence.join(" ")
34
+ puts "#{keys} => #{s.slice(0,s.length-1)}"
35
+ end
36
+ end
37
+
38
+ def add_to_start_words(tokens)
39
+ return if tokens[0].kind != :word
40
+
41
+ tokens[0].word = tokens[0].word.capitalize
42
+ start_words = tokens_to_words tokens
43
+
44
+ @start_words[start_words] ||= tokens
45
+ end
46
+
47
+ def add_to_dictionary(tokens)
48
+ token = tokens.last
49
+ return if token == nil || token.word == ""
50
+
51
+ key_words = tokens_to_words tokens[0, @depth-1]
52
+
53
+ @dictionary[key_words] ||= []
54
+ @dictionary[key_words] << token
55
+ end
56
+
57
+ def select_start_words
58
+ @start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
59
+ end
60
+
61
+ def select_next_token(tokens)
62
+ token = @dictionary[ tokens_to_words(tokens)]
63
+
64
+ return Markov::Token.new("X", :noop) if token == nil
65
+ token[random_number(tokens.length-1)]
66
+ end
67
+
68
+ def select_next_word(tokens)
69
+ token = nil
70
+ begin
71
+ token = select_next_token(tokens)
72
+ end until token.kind == :word
73
+ token
74
+ end
75
+
76
+ end
@@ -2,15 +2,15 @@
2
2
  require 'securerandom'
3
3
 
4
4
  class Markov::Generator
5
-
5
+ include Markov::Util
6
+
6
7
  def initialize(depth)
7
8
  @depth = depth
8
-
9
- @dictionary = {}
10
- @start_words = {}
11
9
  @unparsed_sentences = []
12
10
  @tokens = []
13
11
 
12
+ @dict = Markov::Dictionary.new(depth)
13
+
14
14
  srand
15
15
  end
16
16
 
@@ -35,8 +35,8 @@ class Markov::Generator
35
35
 
36
36
  # need to store the words in both the dictionary
37
37
  # and the list of start words
38
- add_to_start_words word_seq[0, @depth-1]
39
- add_to_dictionary word_seq
38
+ @dict.add_to_start_words word_seq[0, @depth-1]
39
+ @dict.add_to_dictionary word_seq
40
40
 
41
41
  token = parser.next_token
42
42
  state = :sentence
@@ -48,10 +48,10 @@ class Markov::Generator
48
48
  word_seq << token
49
49
 
50
50
  # add to the dictionary
51
- add_to_dictionary word_seq
51
+ @dict.add_to_dictionary word_seq
52
52
 
53
53
  # stop current sequence and start again
54
- if token.kind == :stop
54
+ if token == nil || token.kind == :stop
55
55
  word_seq = []
56
56
  state = :start
57
57
  end
@@ -61,12 +61,13 @@ class Markov::Generator
61
61
  rescue => e
62
62
  # nothing to rescue
63
63
  puts e
64
+ puts e.backtrace
64
65
  end
65
66
 
66
67
  end # end parse_text
67
68
 
68
- def generate_sentence(min_length=20)
69
- if @dictionary.empty?
69
+ def generate_sentence(min_length=15)
70
+ if @dict.empty?
70
71
  raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
71
72
  end
72
73
 
@@ -74,42 +75,54 @@ class Markov::Generator
74
75
  complete_sentence = false
75
76
 
76
77
  # initialize
77
- select_start_words.each {|w| tokens << w}
78
+ @dict.select_start_words.each {|w| tokens << w}
78
79
  prev_token = tokens.last
79
80
 
80
81
  begin
81
- token = select_next_token tokens.last(@depth-1)
82
+ token = @dict.select_next_token tokens.last(@depth-1)
82
83
 
83
- if token.kind == :stop
84
- token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
84
+ if token.kind == :word
85
85
  tokens << token
86
+ prev_token = token
86
87
  elsif token.kind == :special
87
- token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
88
- tokens << token
88
+ if prev_token.kind == :word
89
+ tokens << token
90
+ prev_token = token
91
+ end
92
+ elsif token.kind == :stop
93
+ if prev_token.kind == :word
94
+ tokens << token
95
+ prev_token = token
96
+ end
89
97
  elsif token.kind == :noop
90
- token = Token.new(".", :stop)
91
- tokens[tokens.length-1] = token
92
- else
93
- tokens << token
98
+ if prev_token.kind == :word
99
+ tokens << Markov::Token.new(".", :stop)
100
+ end
101
+ # start a new sentence
102
+ @dict.select_start_words.each {|w| tokens << w}
103
+ prev_token = tokens.last
94
104
  end
95
105
 
96
- prev_token = token
106
+ if (token.kind == :stop) && (tokens.size > min_length)
107
+ #puts "-- DONE(#{tokens.size}) #{tokens_to_debug tokens}"
108
+ return tokens_to_sentence tokens
109
+ end
97
110
 
98
- if token.kind == :stop
99
- if tokens.size < min_length
100
- select_start_words.each {|w| tokens << w}
101
- prev_token = tokens.last
102
- else
103
- complete_sentence = true
104
- end
111
+ # default circuit-breaker
112
+ if tokens.size > min_length * 4
113
+ # restart
114
+ tokens = []
115
+ complete_sentence = false
116
+
117
+ # initialize
118
+ @dict.select_start_words.each {|w| tokens << w}
119
+ prev_token = tokens.last
105
120
  end
106
121
 
107
- # circuit-breaker
108
- complete_sentence = true if tokens.size > min_length*2
109
122
  end until complete_sentence
110
123
 
111
124
  tokens_to_sentence tokens
112
- end #end generate_sentence
125
+ end
113
126
 
114
127
  def dump_startwords
115
128
  @start_words.keys.each do |start_words|
@@ -118,80 +131,11 @@ class Markov::Generator
118
131
  end
119
132
 
120
133
  def dump_dictionary
121
- @dictionary.keys.each do |keys|
122
- following = @dictionary[keys]
123
- sentence = []
124
- following.each do |word|
125
- sentence << "#{word.to_s},"
126
- end
127
- s = sentence.join(" ")
128
- puts "#{keys} => #{s.slice(0,s.length-1)}"
129
- end
130
- end
131
-
132
- private
133
-
134
- def add_to_start_words(tokens)
135
- return if tokens[0].kind != :word
136
-
137
- tokens[0].word = tokens[0].word.capitalize
138
- start_words = tokens_to_words tokens
139
-
140
- @start_words[start_words] ||= tokens
134
+ @dict.dump_dictionary
141
135
  end
142
136
 
143
- def add_to_dictionary(tokens)
144
- token = tokens.last
145
- return if token.word == ""
146
-
147
- key_words = tokens_to_words tokens[0, @depth-1]
148
-
149
- @dictionary[key_words] ||= []
150
- @dictionary[key_words] << token
151
- end
152
-
153
- def tokens_to_words(tokens)
154
- words = []
155
- tokens.each do |t|
156
- words << t.word
157
- end
158
- words
159
- end
160
-
161
- def tokens_to_sentence(tokens)
162
- s = ""
163
- tokens.each do |t|
164
- if t.kind != :word
165
- s << t.word
166
- else
167
- s << " " + t.word
168
- end
169
- end
170
-
171
- s[1, s.length-1]
172
- end
173
-
174
- def select_start_words
175
- @start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
176
- end
177
-
178
- def select_next_token(tokens)
179
- token = @dictionary[ tokens_to_words(tokens)]
180
-
181
- return Token.new("X", :noop) if token == nil
182
- token[random_number(tokens.length-1)]
183
- end
184
-
185
- def select_next_word(tokens)
186
- token = nil
187
- begin
188
- token = select_next_token(tokens)
189
- end until token.kind == :word
190
- token
191
- end
192
-
193
- def random_number(upper_limit)
194
- (SecureRandom.random_number * upper_limit).to_i
137
+ def dump_startwords
138
+ @dict.dump_startwords
195
139
  end
196
140
 
197
141
  end
@@ -59,7 +59,7 @@ class Markov::Parser
59
59
  return @tokens.slice!(0) if @tokens
60
60
 
61
61
  @tokens = []
62
- nil
62
+ nil
63
63
  end # end next_token
64
64
 
65
65
  private
@@ -8,4 +8,15 @@ class Markov::Token < Struct.new(:word, :kind)
8
8
  def to_s
9
9
  "#{kind}(#{word})"
10
10
  end
11
+
12
+ def to_symbol
13
+ if kind == :word
14
+ "WORD"
15
+ elsif kind == :special
16
+ "S(#{word})"
17
+ else
18
+ "STOP(#{word})"
19
+ end
20
+ end
21
+
11
22
  end
@@ -0,0 +1,44 @@
1
+
2
+ require 'securerandom'
3
+
4
+ module Markov::Util
5
+
6
+ def tokens_to_words(tokens)
7
+ words = []
8
+ tokens.each do |t|
9
+ words << t.word
10
+ end
11
+ words
12
+ end
13
+
14
+ def tokens_to_sentence(tokens)
15
+ s = ""
16
+ tokens.each do |t|
17
+ if t.kind != :word
18
+ s << t.word
19
+ else
20
+ s << " " + t.word
21
+ end
22
+ end
23
+
24
+ s[1, s.length-1]
25
+ end
26
+
27
+ def tokens_to_debug(tokens)
28
+ s = ""
29
+ tokens.each do |t|
30
+ if t.kind != :word
31
+ s << " " + t.to_symbol
32
+ else
33
+ s << " " + t.word
34
+ end
35
+ end
36
+
37
+ s[1, s.length-1]
38
+ end
39
+
40
+ def random_number(upper_limit)
41
+ (SecureRandom.random_number * upper_limit).to_i
42
+ end
43
+
44
+ end
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: markov-generator 0.10.0 ruby lib
5
+ # stub: markov-generator 0.11.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "markov-generator"
9
- s.version = "0.10.0"
9
+ s.version = "0.11.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Michael Kuehl"]
14
- s.date = "2016-01-11"
14
+ s.date = "2016-01-12"
15
15
  s.description = "A Markov Chain text generator library"
16
16
  s.email = "hello@ratchet.cc"
17
17
  s.extra_rdoc_files = [
@@ -29,14 +29,19 @@ Gem::Specification.new do |s|
29
29
  "Rakefile",
30
30
  "VERSION",
31
31
  "lib/markov.rb",
32
+ "lib/markov/dictionary.rb",
32
33
  "lib/markov/generator.rb",
33
34
  "lib/markov/parser.rb",
34
35
  "lib/markov/token.rb",
36
+ "lib/markov/util.rb",
35
37
  "markov-generator.gemspec",
36
38
  "test/generator_test.rb",
37
39
  "test/test_bulk_markov.rb",
38
40
  "test/test_markov.rb",
39
- "test/test_parser.rb"
41
+ "test/test_parser.rb",
42
+ "test/texts/alice.txt",
43
+ "test/texts/cthulhu.txt",
44
+ "test/texts/grimm.txt"
40
45
  ]
41
46
  s.homepage = "http://github.com/ratchetcc/markov-generator"
42
47
  s.licenses = ["MIT"]
@@ -1,15 +1,15 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
1
2
 
2
- require 'markov/generator'
3
+ require 'markov'
3
4
 
4
- markov = Markov::Generator.new
5
- markov.parse_source_file "./generator_test2.txt"
6
- markov.parse_source_file "./generator_test1.txt"
5
+ markov = Markov.generator
6
+ markov.parse_text "./test/texts/generator_test.txt"
7
7
 
8
+ #markov.dump_startwords
8
9
  #markov.dump_dictionary
9
- #markov.dump_start_words
10
- markov.dump_dictionary_stats
10
+ puts ""
11
11
 
12
12
  1..5.times do
13
13
  puts "#{markov.generate_sentence}"
14
+ puts ""
14
15
  end
15
-