markov-generator 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 09d0c8e708f75e9c610108c700c74fb6f9db6dbc
4
- data.tar.gz: 7173ff93d857e356b0149a33de3743392264cff5
3
+ metadata.gz: d847256699f3f91abcbf0302c2be407bf97cb653
4
+ data.tar.gz: 19e95af2ef8f231c56f1ac9e76716518c1f8b057
5
5
  SHA512:
6
- metadata.gz: 53cf62b9c8ec50f55112fe94e6e55eac01e3ebb5ffcff56c7a896f148761ce681d4f6968bf42190bceecf8175ea8b58fa0a8af4f13806e42b1852e2cf667f17d
7
- data.tar.gz: c3d757b5b3841b47ae2df2b7e0236fb03ca7fadf60556cad53a245727aadbbe808859f03855d2567a52d6c66f3d6d90d2bf1ff46f12d3607ef87608a382f4ff8
6
+ metadata.gz: f06aef8afefd6f20daeee4cb77d7a72daa87126331caeaa9deeda9d5beef1f77c2908d59167bb91b66e170fa92fdffdb6b464b92506f43c03563a65d98ae0737
7
+ data.tar.gz: 997fbd9590015db2bb8137baee2f83488a1979367ef2e516a322d67f871369d8a0e394d02241e67c76cbb933b34c16977f48226d41ee95fc4d442a33fd386998
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.10.0
1
+ 0.11.0
@@ -1,8 +1,10 @@
1
1
 
2
2
  module Markov
3
3
 
4
+ require 'markov/util'
4
5
  require 'markov/token'
5
6
  require 'markov/parser'
7
+ require 'markov/dictionary'
6
8
  require 'markov/generator'
7
9
 
8
10
  def generator(depth=3)
@@ -0,0 +1,76 @@
1
+
2
+ require 'securerandom'
3
+
4
+ class Markov::Dictionary
5
+ include Markov::Util
6
+
7
+ def initialize(depth)
8
+ @depth = depth
9
+
10
+ @dictionary = {}
11
+ @start_words = {}
12
+
13
+ srand
14
+ end
15
+
16
+ def empty?
17
+ @dictionary.empty?
18
+ end
19
+
20
+ def dump_startwords
21
+ @start_words.keys.each do |start_words|
22
+ puts "#{start_words} -> #{tokens_to_sentence @dictionary[start_words]}"
23
+ end
24
+ end
25
+
26
+ def dump_dictionary
27
+ @dictionary.keys.each do |keys|
28
+ following = @dictionary[keys]
29
+ sentence = []
30
+ following.each do |word|
31
+ sentence << "#{word.to_s},"
32
+ end
33
+ s = sentence.join(" ")
34
+ puts "#{keys} => #{s.slice(0,s.length-1)}"
35
+ end
36
+ end
37
+
38
+ def add_to_start_words(tokens)
39
+ return if tokens[0].kind != :word
40
+
41
+ tokens[0].word = tokens[0].word.capitalize
42
+ start_words = tokens_to_words tokens
43
+
44
+ @start_words[start_words] ||= tokens
45
+ end
46
+
47
+ def add_to_dictionary(tokens)
48
+ token = tokens.last
49
+ return if token == nil || token.word == ""
50
+
51
+ key_words = tokens_to_words tokens[0, @depth-1]
52
+
53
+ @dictionary[key_words] ||= []
54
+ @dictionary[key_words] << token
55
+ end
56
+
57
+ def select_start_words
58
+ @start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
59
+ end
60
+
61
+ def select_next_token(tokens)
62
+ token = @dictionary[ tokens_to_words(tokens)]
63
+
64
+ return Markov::Token.new("X", :noop) if token == nil
65
+ token[random_number(tokens.length-1)]
66
+ end
67
+
68
+ def select_next_word(tokens)
69
+ token = nil
70
+ begin
71
+ token = select_next_token(tokens)
72
+ end until token.kind == :word
73
+ token
74
+ end
75
+
76
+ end
@@ -2,15 +2,15 @@
2
2
  require 'securerandom'
3
3
 
4
4
  class Markov::Generator
5
-
5
+ include Markov::Util
6
+
6
7
  def initialize(depth)
7
8
  @depth = depth
8
-
9
- @dictionary = {}
10
- @start_words = {}
11
9
  @unparsed_sentences = []
12
10
  @tokens = []
13
11
 
12
+ @dict = Markov::Dictionary.new(depth)
13
+
14
14
  srand
15
15
  end
16
16
 
@@ -35,8 +35,8 @@ class Markov::Generator
35
35
 
36
36
  # need to store the words in both the dictionary
37
37
  # and the list of start words
38
- add_to_start_words word_seq[0, @depth-1]
39
- add_to_dictionary word_seq
38
+ @dict.add_to_start_words word_seq[0, @depth-1]
39
+ @dict.add_to_dictionary word_seq
40
40
 
41
41
  token = parser.next_token
42
42
  state = :sentence
@@ -48,10 +48,10 @@ class Markov::Generator
48
48
  word_seq << token
49
49
 
50
50
  # add to the dictionary
51
- add_to_dictionary word_seq
51
+ @dict.add_to_dictionary word_seq
52
52
 
53
53
  # stop current sequence and start again
54
- if token.kind == :stop
54
+ if token == nil || token.kind == :stop
55
55
  word_seq = []
56
56
  state = :start
57
57
  end
@@ -61,12 +61,13 @@ class Markov::Generator
61
61
  rescue => e
62
62
  # nothing to rescue
63
63
  puts e
64
+ puts e.backtrace
64
65
  end
65
66
 
66
67
  end # end parse_text
67
68
 
68
- def generate_sentence(min_length=20)
69
- if @dictionary.empty?
69
+ def generate_sentence(min_length=15)
70
+ if @dict.empty?
70
71
  raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
71
72
  end
72
73
 
@@ -74,42 +75,54 @@ class Markov::Generator
74
75
  complete_sentence = false
75
76
 
76
77
  # initialize
77
- select_start_words.each {|w| tokens << w}
78
+ @dict.select_start_words.each {|w| tokens << w}
78
79
  prev_token = tokens.last
79
80
 
80
81
  begin
81
- token = select_next_token tokens.last(@depth-1)
82
+ token = @dict.select_next_token tokens.last(@depth-1)
82
83
 
83
- if token.kind == :stop
84
- token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
84
+ if token.kind == :word
85
85
  tokens << token
86
+ prev_token = token
86
87
  elsif token.kind == :special
87
- token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
88
- tokens << token
88
+ if prev_token.kind == :word
89
+ tokens << token
90
+ prev_token = token
91
+ end
92
+ elsif token.kind == :stop
93
+ if prev_token.kind == :word
94
+ tokens << token
95
+ prev_token = token
96
+ end
89
97
  elsif token.kind == :noop
90
- token = Token.new(".", :stop)
91
- tokens[tokens.length-1] = token
92
- else
93
- tokens << token
98
+ if prev_token.kind == :word
99
+ tokens << Markov::Token.new(".", :stop)
100
+ end
101
+ # start a new sentence
102
+ @dict.select_start_words.each {|w| tokens << w}
103
+ prev_token = tokens.last
94
104
  end
95
105
 
96
- prev_token = token
106
+ if (token.kind == :stop) && (tokens.size > min_length)
107
+ #puts "-- DONE(#{tokens.size}) #{tokens_to_debug tokens}"
108
+ return tokens_to_sentence tokens
109
+ end
97
110
 
98
- if token.kind == :stop
99
- if tokens.size < min_length
100
- select_start_words.each {|w| tokens << w}
101
- prev_token = tokens.last
102
- else
103
- complete_sentence = true
104
- end
111
+ # default circuit-breaker
112
+ if tokens.size > min_length * 4
113
+ # restart
114
+ tokens = []
115
+ complete_sentence = false
116
+
117
+ # initialize
118
+ @dict.select_start_words.each {|w| tokens << w}
119
+ prev_token = tokens.last
105
120
  end
106
121
 
107
- # circuit-breaker
108
- complete_sentence = true if tokens.size > min_length*2
109
122
  end until complete_sentence
110
123
 
111
124
  tokens_to_sentence tokens
112
- end #end generate_sentence
125
+ end
113
126
 
114
127
  def dump_startwords
115
128
  @start_words.keys.each do |start_words|
@@ -118,80 +131,11 @@ class Markov::Generator
118
131
  end
119
132
 
120
133
  def dump_dictionary
121
- @dictionary.keys.each do |keys|
122
- following = @dictionary[keys]
123
- sentence = []
124
- following.each do |word|
125
- sentence << "#{word.to_s},"
126
- end
127
- s = sentence.join(" ")
128
- puts "#{keys} => #{s.slice(0,s.length-1)}"
129
- end
130
- end
131
-
132
- private
133
-
134
- def add_to_start_words(tokens)
135
- return if tokens[0].kind != :word
136
-
137
- tokens[0].word = tokens[0].word.capitalize
138
- start_words = tokens_to_words tokens
139
-
140
- @start_words[start_words] ||= tokens
134
+ @dict.dump_dictionary
141
135
  end
142
136
 
143
- def add_to_dictionary(tokens)
144
- token = tokens.last
145
- return if token.word == ""
146
-
147
- key_words = tokens_to_words tokens[0, @depth-1]
148
-
149
- @dictionary[key_words] ||= []
150
- @dictionary[key_words] << token
151
- end
152
-
153
- def tokens_to_words(tokens)
154
- words = []
155
- tokens.each do |t|
156
- words << t.word
157
- end
158
- words
159
- end
160
-
161
- def tokens_to_sentence(tokens)
162
- s = ""
163
- tokens.each do |t|
164
- if t.kind != :word
165
- s << t.word
166
- else
167
- s << " " + t.word
168
- end
169
- end
170
-
171
- s[1, s.length-1]
172
- end
173
-
174
- def select_start_words
175
- @start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
176
- end
177
-
178
- def select_next_token(tokens)
179
- token = @dictionary[ tokens_to_words(tokens)]
180
-
181
- return Token.new("X", :noop) if token == nil
182
- token[random_number(tokens.length-1)]
183
- end
184
-
185
- def select_next_word(tokens)
186
- token = nil
187
- begin
188
- token = select_next_token(tokens)
189
- end until token.kind == :word
190
- token
191
- end
192
-
193
- def random_number(upper_limit)
194
- (SecureRandom.random_number * upper_limit).to_i
137
+ def dump_startwords
138
+ @dict.dump_startwords
195
139
  end
196
140
 
197
141
  end
@@ -59,7 +59,7 @@ class Markov::Parser
59
59
  return @tokens.slice!(0) if @tokens
60
60
 
61
61
  @tokens = []
62
- nil
62
+ nil
63
63
  end # end next_token
64
64
 
65
65
  private
@@ -8,4 +8,15 @@ class Markov::Token < Struct.new(:word, :kind)
8
8
  def to_s
9
9
  "#{kind}(#{word})"
10
10
  end
11
+
12
+ def to_symbol
13
+ if kind == :word
14
+ "WORD"
15
+ elsif kind == :special
16
+ "S(#{word})"
17
+ else
18
+ "STOP(#{word})"
19
+ end
20
+ end
21
+
11
22
  end
@@ -0,0 +1,44 @@
1
+
2
+ require 'securerandom'
3
+
4
+ module Markov::Util
5
+
6
+ def tokens_to_words(tokens)
7
+ words = []
8
+ tokens.each do |t|
9
+ words << t.word
10
+ end
11
+ words
12
+ end
13
+
14
+ def tokens_to_sentence(tokens)
15
+ s = ""
16
+ tokens.each do |t|
17
+ if t.kind != :word
18
+ s << t.word
19
+ else
20
+ s << " " + t.word
21
+ end
22
+ end
23
+
24
+ s[1, s.length-1]
25
+ end
26
+
27
+ def tokens_to_debug(tokens)
28
+ s = ""
29
+ tokens.each do |t|
30
+ if t.kind != :word
31
+ s << " " + t.to_symbol
32
+ else
33
+ s << " " + t.word
34
+ end
35
+ end
36
+
37
+ s[1, s.length-1]
38
+ end
39
+
40
+ def random_number(upper_limit)
41
+ (SecureRandom.random_number * upper_limit).to_i
42
+ end
43
+
44
+ end
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: markov-generator 0.10.0 ruby lib
5
+ # stub: markov-generator 0.11.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "markov-generator"
9
- s.version = "0.10.0"
9
+ s.version = "0.11.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Michael Kuehl"]
14
- s.date = "2016-01-11"
14
+ s.date = "2016-01-12"
15
15
  s.description = "A Markov Chain text generator library"
16
16
  s.email = "hello@ratchet.cc"
17
17
  s.extra_rdoc_files = [
@@ -29,14 +29,19 @@ Gem::Specification.new do |s|
29
29
  "Rakefile",
30
30
  "VERSION",
31
31
  "lib/markov.rb",
32
+ "lib/markov/dictionary.rb",
32
33
  "lib/markov/generator.rb",
33
34
  "lib/markov/parser.rb",
34
35
  "lib/markov/token.rb",
36
+ "lib/markov/util.rb",
35
37
  "markov-generator.gemspec",
36
38
  "test/generator_test.rb",
37
39
  "test/test_bulk_markov.rb",
38
40
  "test/test_markov.rb",
39
- "test/test_parser.rb"
41
+ "test/test_parser.rb",
42
+ "test/texts/alice.txt",
43
+ "test/texts/cthulhu.txt",
44
+ "test/texts/grimm.txt"
40
45
  ]
41
46
  s.homepage = "http://github.com/ratchetcc/markov-generator"
42
47
  s.licenses = ["MIT"]
@@ -1,15 +1,15 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
1
2
 
2
- require 'markov/generator'
3
+ require 'markov'
3
4
 
4
- markov = Markov::Generator.new
5
- markov.parse_source_file "./generator_test2.txt"
6
- markov.parse_source_file "./generator_test1.txt"
5
+ markov = Markov.generator
6
+ markov.parse_text "./test/texts/generator_test.txt"
7
7
 
8
+ #markov.dump_startwords
8
9
  #markov.dump_dictionary
9
- #markov.dump_start_words
10
- markov.dump_dictionary_stats
10
+ puts ""
11
11
 
12
12
  1..5.times do
13
13
  puts "#{markov.generate_sentence}"
14
+ puts ""
14
15
  end
15
-