markov-generator 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2aa1e1c86fea5b2b44acbc8fde22a4cbffc9f17f
4
+ data.tar.gz: 5ab92534b88d461d14cb603b1d83e0ea4b3488ef
5
+ SHA512:
6
+ metadata.gz: 3199d76a06a3b42f1b92e8ba3d0ebfaeff5020ac52ad8016a8c8f3206811f45fdfda797f457d0789e738738215f0c5ad7cb08972c883a4194f79f313f9a1baa7
7
+ data.tar.gz: 1aafb17d5e58b38b8f77d049a4af7d3edabceffb85e7ed435cdec312bcbb5020a3d9aea8b402aec84282617490a4d28a6c8622184a702b61f63f5234f036be01
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler", "~> 1.0"
12
+ gem "jeweler", "~> 2.0.1"
13
+ gem "simplecov", ">= 0"
14
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,80 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ activesupport (4.2.1)
5
+ i18n (~> 0.7)
6
+ json (~> 1.7, >= 1.7.7)
7
+ minitest (~> 5.1)
8
+ thread_safe (~> 0.3, >= 0.3.4)
9
+ tzinfo (~> 1.1)
10
+ addressable (2.3.8)
11
+ builder (3.2.2)
12
+ descendants_tracker (0.0.4)
13
+ thread_safe (~> 0.3, >= 0.3.1)
14
+ docile (1.1.5)
15
+ faraday (0.9.1)
16
+ multipart-post (>= 1.2, < 3)
17
+ git (1.2.9.1)
18
+ github_api (0.12.3)
19
+ addressable (~> 2.3)
20
+ descendants_tracker (~> 0.0.4)
21
+ faraday (~> 0.8, < 0.10)
22
+ hashie (>= 3.3)
23
+ multi_json (>= 1.7.5, < 2.0)
24
+ nokogiri (~> 1.6.3)
25
+ oauth2
26
+ hashie (3.4.1)
27
+ highline (1.7.2)
28
+ i18n (0.7.0)
29
+ jeweler (2.0.1)
30
+ builder
31
+ bundler (>= 1.0)
32
+ git (>= 1.2.5)
33
+ github_api
34
+ highline (>= 1.6.15)
35
+ nokogiri (>= 1.5.10)
36
+ rake
37
+ rdoc
38
+ json (1.8.2)
39
+ jwt (1.5.0)
40
+ mini_portile (0.6.2)
41
+ minitest (5.6.1)
42
+ multi_json (1.11.0)
43
+ multi_xml (0.5.5)
44
+ multipart-post (2.0.0)
45
+ nokogiri (1.6.6.2)
46
+ mini_portile (~> 0.6.0)
47
+ oauth2 (1.0.0)
48
+ faraday (>= 0.8, < 0.10)
49
+ jwt (~> 1.0)
50
+ multi_json (~> 1.3)
51
+ multi_xml (~> 0.5)
52
+ rack (~> 1.2)
53
+ rack (1.6.1)
54
+ rake (10.4.2)
55
+ rdoc (3.12.2)
56
+ json (~> 1.4)
57
+ shoulda (3.5.0)
58
+ shoulda-context (~> 1.0, >= 1.0.1)
59
+ shoulda-matchers (>= 1.4.1, < 3.0)
60
+ shoulda-context (1.2.1)
61
+ shoulda-matchers (2.8.0)
62
+ activesupport (>= 3.0.0)
63
+ simplecov (0.10.0)
64
+ docile (~> 1.1.0)
65
+ json (~> 1.8)
66
+ simplecov-html (~> 0.10.0)
67
+ simplecov-html (0.10.0)
68
+ thread_safe (0.3.5)
69
+ tzinfo (1.2.2)
70
+ thread_safe (~> 0.1)
71
+
72
+ PLATFORMS
73
+ ruby
74
+
75
+ DEPENDENCIES
76
+ bundler (~> 1.0)
77
+ jeweler (~> 2.0.1)
78
+ rdoc (~> 3.12)
79
+ shoulda
80
+ simplecov
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2015 Michael Kuehl
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = markov_generator
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to markov_generator
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
9
+ * Fork the project.
10
+ * Start a feature/bugfix branch.
11
+ * Commit and push until you are happy with your contribution.
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2015 Michael Kuehl. See LICENSE.txt for
18
+ further details.
19
+
data/Rakefile ADDED
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
17
+ gem.name = "markov-generator"
18
+ gem.homepage = "http://github.com/ratchetcc/markov-generator"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Markov Chain text generator}
21
+ gem.description = %Q{A Markov Chain text generator library}
22
+ gem.email = "hello@ratchet.cc"
23
+ gem.authors = ["Michael Kuehl"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ desc "Code coverage detail"
36
+ task :simplecov do
37
+ ENV['COVERAGE'] = "true"
38
+ Rake::Task['test'].execute
39
+ end
40
+
41
+ task :default => :test
42
+
43
+ require 'rdoc/task'
44
+ Rake::RDocTask.new do |rdoc|
45
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
46
+
47
+ rdoc.rdoc_dir = 'rdoc'
48
+ rdoc.title = "markov-generator #{version}"
49
+ rdoc.rdoc_files.include('README*')
50
+ rdoc.rdoc_files.include('lib/**/*.rb')
51
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.9.0
@@ -0,0 +1,289 @@
1
+
2
+ require 'securerandom'
3
+
4
+ module Markov
5
+
6
+ class Token < Struct.new(:word, :kind)
7
+ # used as an internal structure to hold words etc
8
+ end
9
+
10
+ class Generator
11
+
12
+ attr_reader :depth
13
+
14
+ def initialize(depth=3)
15
+ @depth = depth
16
+ @split_words = /([',.?!\n-])|[\s]+/
17
+ @split_sentence = /(?<=[.!?\n])\s+/
18
+ @dictionary = {}
19
+ @start_words = {}
20
+ @unparsed_sentences = []
21
+ @tokens = []
22
+ srand
23
+ end
24
+
25
+ class FileNotFoundError < Exception # :nodoc:
26
+ end
27
+
28
+ class EmptyDictionaryError < Exception # :nodoc:
29
+ end
30
+
31
+ def parse_string(sentence)
32
+ add_unparsed_sentence sentence
33
+ parse_text
34
+ end
35
+
36
+ def parse_source_file(source)
37
+
38
+ if File.exists?(source)
39
+ sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(@split_sentence)
40
+ else
41
+ raise FileNotFoundError.new("#{source} does not exist!")
42
+ end
43
+
44
+ sentences.each do |sentence|
45
+ add_unparsed_sentence sentence
46
+ end
47
+
48
+ parse_text
49
+
50
+ end
51
+
52
+ def generate_sentence(min_length=20)
53
+ if @dictionary.empty?
54
+ raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
55
+ end
56
+
57
+ tokens = []
58
+ complete_sentence = false
59
+
60
+ # initialize
61
+ select_start_words.each {|w| tokens << w}
62
+ prev_token = tokens.last
63
+
64
+ begin
65
+ token = select_next_token tokens.last(@depth-1)
66
+
67
+ if token.kind == :stop
68
+ token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
69
+ tokens << token
70
+ elsif token.kind == :special
71
+ token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
72
+ tokens << token
73
+ elsif token.kind == :noop
74
+ token = Token.new(".", :stop)
75
+ tokens[tokens.length-1] = token
76
+ else
77
+ tokens << token
78
+ end
79
+
80
+ prev_token = token
81
+
82
+ if token.kind == :stop
83
+ if tokens.size < min_length
84
+ select_start_words.each {|w| tokens << w}
85
+ prev_token = tokens.last
86
+ else
87
+ complete_sentence = true
88
+ end
89
+ end
90
+
91
+ # circuit-breaker
92
+ complete_sentence = true if tokens.size > min_length*2
93
+ end until complete_sentence
94
+
95
+ tokens_to_sentence tokens
96
+ end
97
+
98
+ def dump_start_words
99
+ @start_words.keys.each do |words|
100
+ puts "#{words[0]},#{words[1]}"
101
+ end
102
+ end
103
+
104
+ def dump_dictionary
105
+ @dictionary.keys.each do |words|
106
+ following = @dictionary[words]
107
+ sentence = "#{words[0]},#{words[1]},"
108
+ following.each do |s|
109
+ sentence << "#{s.word},"
110
+ end
111
+
112
+ puts "#{sentence.slice(0,sentence.length-1)}"
113
+ end
114
+ end
115
+
116
+ private
117
+
118
+ def parse_text
119
+
120
+ state = :start # :start, :word, :special, :stop
121
+ word_seq = []
122
+
123
+ begin
124
+ while token = next_token
125
+
126
+ if state == :start
127
+ word_seq << token
128
+
129
+ # fill the array
130
+ (@depth-word_seq.size).times do
131
+ word_seq << next_token
132
+ end
133
+
134
+ # need to store the words in both the dictionary
135
+ # and the list of start words
136
+ add_to_start_words word_seq[0, @depth-1]
137
+ add_to_dictionary word_seq
138
+
139
+ token = next_token
140
+ state = :sentence
141
+ end
142
+
143
+ if state == :sentence
144
+ # move the array one position
145
+ word_seq.slice!(0)
146
+ word_seq << token
147
+
148
+ # add to the dictionary
149
+ add_to_dictionary word_seq
150
+
151
+ # stop current sequence and start again
152
+ if token.kind == :stop
153
+ word_seq = []
154
+ state = :start
155
+ end
156
+ end
157
+
158
+ end # end while
159
+
160
+ rescue
161
+ # nothing to rescue
162
+ end
163
+
164
+ end # end parse_text
165
+
166
+ def next_token
167
+ if @tokens.empty?
168
+ sentence = @unparsed_sentences.slice!(0)
169
+ if sentence
170
+ sentence.each do |word|
171
+
172
+ if word.include?("'")
173
+ @tokens << Token.new("'", :special)
174
+ elsif word.include?(",")
175
+ @tokens << Token.new(",", :special)
176
+ elsif word.include?("?")
177
+ @tokens << Token.new("?", :stop)
178
+ elsif word.include?("!")
179
+ @tokens << Token.new("!", :stop)
180
+ elsif word.include?(":")
181
+ @tokens << Token.new(":", :special)
182
+ elsif word.include?(";")
183
+ @tokens << Token.new(";", :special)
184
+ elsif word.include?("-")
185
+ @tokens << Token.new("-", :special)
186
+ elsif word.include?(".")
187
+ @tokens << Token.new(".", :stop)
188
+ elsif word == "\n"
189
+ # skip
190
+ else
191
+ @tokens << Token.new(word, :word)
192
+ end
193
+ end
194
+ else
195
+ @tokens = nil
196
+ end
197
+ end
198
+
199
+ return @tokens.slice!(0) if @tokens
200
+ nil
201
+ end # end next_token
202
+
203
+ def add_unparsed_sentence(sentence)
204
+ # replace unwanted characterts
205
+ sentence.gsub(/["„':_()]/,"")
206
+ sentence.gsub(/-/,"")
207
+ sentence.gsub(/,/,"")
208
+
209
+ parts = sentence.split(@split_words)
210
+ if parts && !parts.empty?
211
+ @unparsed_sentences << parts
212
+ end
213
+
214
+ end
215
+
216
+ def add_to_start_words(tokens)
217
+ return if tokens[0].kind != :word
218
+
219
+ tokens[0].word = tokens[0].word.capitalize
220
+ start_words = tokens_to_words tokens
221
+
222
+ @start_words[start_words] ||= tokens
223
+
224
+ end
225
+
226
+ def add_to_dictionary(tokens)
227
+ token = tokens.last
228
+ return if token.word == ""
229
+
230
+ key_words = tokens_to_words tokens[0, @depth-1]
231
+
232
+ @dictionary[key_words] ||= []
233
+ @dictionary[key_words] << token
234
+ end
235
+
236
+ def tokens_to_words(tokens)
237
+ words = []
238
+ tokens.each do |t|
239
+ words << t.word
240
+ end
241
+ words
242
+ end
243
+
244
+ def tokens_to_sentence(tokens)
245
+ s = ""
246
+ tokens.each do |t|
247
+ if t.kind != :word
248
+ s << t.word
249
+ else
250
+ s << " " + t.word
251
+ end
252
+ end
253
+
254
+ s[1, s.length-1]
255
+ end
256
+
257
+ def select_start_words
258
+ @start_words[ @start_words.keys[random_number( @start_words.keys.length-1)]]
259
+ end
260
+
261
+ def select_next_token(tokens)
262
+ token = @dictionary[ tokens_to_words(tokens)]
263
+
264
+ return Token.new("X", :noop) if token == nil
265
+ token[random_number(tokens.length-1)]
266
+ end
267
+
268
+ def select_next_word(tokens)
269
+ token = nil
270
+ begin
271
+ token = select_next_token(tokens)
272
+ end until token.kind == :word
273
+ token
274
+ end
275
+
276
+ def random_number(upper_limit)
277
+ (SecureRandom.random_number * upper_limit).to_i
278
+ end
279
+ end
280
+
281
+ end
282
+
283
+ #markov = Markov::Generator.new
284
+
285
+ #Dir["../../public/text/seed_*"].each do | f |
286
+ # markov.parse_source_file f
287
+ #end
288
+
289
+ #markov.dump_dictionary
data/test/helper.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'simplecov'
2
+
3
+ module SimpleCov::Configuration
4
+ def clean_filters
5
+ @filters = []
6
+ end
7
+ end
8
+
9
+ SimpleCov.configure do
10
+ clean_filters
11
+ load_adapter 'test_frameworks'
12
+ end
13
+
14
+ ENV["COVERAGE"] && SimpleCov.start do
15
+ add_filter "/.rvm/"
16
+ end
17
+ require 'rubygems'
18
+ require 'bundler'
19
+ begin
20
+ Bundler.setup(:default, :development)
21
+ rescue Bundler::BundlerError => e
22
+ $stderr.puts e.message
23
+ $stderr.puts "Run `bundle install` to install missing gems"
24
+ exit e.status_code
25
+ end
26
+ require 'test/unit'
27
+ require 'shoulda'
28
+
29
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
30
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
31
+ require 'markov_generator'
32
+
33
+ class Test::Unit::TestCase
34
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestMarkovGenerator < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,125 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: markov-generator
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.0
5
+ platform: ruby
6
+ authors:
7
+ - Michael Kuehl
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: shoulda
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rdoc
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.12'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.12'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: jeweler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 2.0.1
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 2.0.1
69
+ - !ruby/object:Gem::Dependency
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: A Markov Chain text generator library
84
+ email: hello@ratchet.cc
85
+ executables: []
86
+ extensions: []
87
+ extra_rdoc_files:
88
+ - LICENSE.txt
89
+ - README.rdoc
90
+ files:
91
+ - ".document"
92
+ - Gemfile
93
+ - Gemfile.lock
94
+ - LICENSE.txt
95
+ - README.rdoc
96
+ - Rakefile
97
+ - VERSION
98
+ - lib/markov/generator.rb
99
+ - test/helper.rb
100
+ - test/test_markov_generator.rb
101
+ homepage: http://github.com/ratchetcc/markov-generator
102
+ licenses:
103
+ - MIT
104
+ metadata: {}
105
+ post_install_message:
106
+ rdoc_options: []
107
+ require_paths:
108
+ - lib
109
+ required_ruby_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ required_rubygems_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ requirements: []
120
+ rubyforge_project:
121
+ rubygems_version: 2.4.5
122
+ signing_key:
123
+ specification_version: 4
124
+ summary: Markov Chain text generator
125
+ test_files: []