literate_randomizer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ %w{version markov}.each do |file|
2
+ require File.join(File.dirname(__FILE__),"literate_randomizer", file)
3
+ end
4
+
5
+ module LiterateRandomizer
6
+
7
+ def LiterateRandomizer::create(options={})
8
+ MarkovChain.new options
9
+ end
10
+ end
@@ -0,0 +1,173 @@
1
+ #source: http://openmonkey.com/blog/2008/10/23/using-markov-chains-to-provide-english-language-seed-data-for-your-rails-application/
2
+ # Tim Riley
3
+ # Gemified by Shane Brinkman-Davis
4
+
5
+ module LiterateRandomizer
6
+ class MarkovChain
7
+ DEFAULT_PUNCTUATION_DISTRIBUTION = %w{. . . . . . . . ? !}
8
+ attr_accessor :randomizer, :init_options, :punctuation_distribution
9
+ attr_reader :markov_words, :words, :first_words
10
+
11
+ def default_source_material
12
+ File.expand_path File.join(File.dirname(__FILE__),"..","..","data","the_lost_world_by_arthur_conan_doyle.txt")
13
+ end
14
+
15
+ # options:
16
+ # :source_material => string
17
+ # :source_material_file => filename
18
+ def source_material(options=init_options)
19
+ options[:source_material] || File.read(options[:source_material_file] || default_source_material)
20
+ end
21
+
22
+
23
+ def chain_add(word, next_word)
24
+ markov_words[word] ||= Hash.new(0)
25
+ markov_words[word][next_word] += 1
26
+ end
27
+
28
+ # remove any non-alpha characters from word
29
+ def scrub_word(word)
30
+ word &&= word[/[A-Za-z][A-Za-z'-]*/]
31
+ word &&= word[/[A-Za-z'-]*[A-Za-z]/]
32
+ (word && word.strip) || ""
33
+ end
34
+
35
+ def scrub_word_list(word_list)
36
+ word_list.split(/[\s]+/).collect {|a| scrub_word(a)}.select {|a| a.length>0}
37
+ end
38
+
39
+ def capitalize(word)
40
+ word.chars.first.upcase+word[1..-1]
41
+ end
42
+
43
+ def source_sentances
44
+ source_material.split(/([.?!"]\s|--| ')+/)
45
+ end
46
+
47
+ # remove all dead-end words
48
+ def prune_markov_words
49
+ @markov_words.keys.each do |key|
50
+ @markov_key.delete(key) if @markov_words[key].length == 0
51
+ end
52
+ end
53
+
54
+ def populate_markov_words
55
+ @markov_words = {}
56
+ @words = {}
57
+ @first_words = {}
58
+ source_sentances.each do |sentance|
59
+ word_list = scrub_word_list sentance
60
+ @first_words[word_list[0]] = true
61
+ word_list.each_with_index do |word, index|
62
+ @words[word] = true
63
+ next_word = word_list[index+1]
64
+ chain_add word, next_word if next_word
65
+ end
66
+ end
67
+ prune_markov_words
68
+ end
69
+
70
+ def populate_markov_sum
71
+ @markov_weighted_sum = {}
72
+ @markov_words.each do |word,followers|
73
+ @markov_weighted_sum[word] = followers.inject(0) {|sum,kv| sum + kv[1]}
74
+ end
75
+ end
76
+
77
+ def populate
78
+ populate_markov_words
79
+ populate_markov_sum
80
+ end
81
+
82
+ def rand_count(r)
83
+ return r if r.kind_of? Integer
84
+ rand(r.max-r.min)+r.min
85
+ end
86
+
87
+ # options:
88
+ # :source_material => string OR
89
+ # :source_material_file => filename
90
+ # :randomizer - responds to .rand(limit) - this primarilly exists for testing
91
+ # :punctuation_distribution => DEFAULT_PUNCTUATION_DISTRIBUTION - punctiation is randomly selected from this array
92
+ def initialize(options={})
93
+ @init_options = options
94
+ @randomizer = randomizer || Random.new()
95
+ @punctuation_distribution = options[:punctuation_distribution] || DEFAULT_PUNCTUATION_DISTRIBUTION
96
+
97
+ populate
98
+ end
99
+
100
+ def inspect
101
+ "#<#{self.class}: #{@words.length} words, #{@markov_words.length} word-chains, #{@first_words.length} first_words>"
102
+ end
103
+
104
+ def next_word(word)
105
+ return if !markov_words[word]
106
+ sum = @markov_weighted_sum[word]
107
+ random = rand(sum)+1
108
+ partial_sum = 0
109
+ markov_words[word].find do |word, count|
110
+ partial_sum += count
111
+ partial_sum >= random
112
+ end.first
113
+ end
114
+
115
+ def rand(limit=nil)
116
+ @randomizer.rand(limit)
117
+ end
118
+
119
+ # return a random word
120
+ def word
121
+ @cached_word_keys ||= words.keys
122
+ @cached_word_keys[rand(@cached_word_keys.length)]
123
+ end
124
+
125
+ # return a random first word of a sentance
126
+ def first_word
127
+ @cached_first_word_keys ||= first_words.keys
128
+ @cached_first_word_keys[rand(@cached_first_word_keys.length)]
129
+ end
130
+
131
+ # return a random first word of a sentance
132
+ def markov_word
133
+ @cached_markov_word_keys ||= markov_words.keys
134
+ @cached_markov_word_keys[rand(@cached_markov_word_keys.length)]
135
+ end
136
+
137
+ def punctuation
138
+ @punctuation_distribution[rand(@punctuation_distribution.length)]
139
+ end
140
+
141
+ # return a random sentance
142
+ # options:
143
+ # * :first_word => nil - the start word
144
+ # * :words => range or int - number of words in sentance
145
+ # * :punctuation => nil - punction to end the sentance with (nil == randomly selected from punctuation_distribution)
146
+ def sentance(options={})
147
+ word = options[:first_word] || self.markov_word
148
+ count = rand_count options[:words] || (3..15)
149
+ punctuation = options[:punctuation] || self.punctuation
150
+
151
+ capitalize(count.times.collect do
152
+ word.tap {word = next_word(word)}
153
+ end.compact.join(" ") + punctuation)
154
+ end
155
+
156
+ # return a random paragraph
157
+ # options:
158
+ # * :first_word => nil - the first word of the paragraph
159
+ # * :words => range or int - number of words in sentance
160
+ # * :sentances => range or int - number of sentances in paragraph
161
+ # * :punctuation => nil - punction to end the paragraph with (nil == randomly selected from punctuation_distribution)
162
+ def paragraph(options={})
163
+ count = rand_count options[:sentances] || (5..15)
164
+
165
+ count.times.collect do |i|
166
+ op = options.clone
167
+ op.delete :punctuation unless i==count-1
168
+ op.delete :first_word unless i==0
169
+ sentance op
170
+ end.join(" ")
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,3 @@
1
+ module LiterateRandomizer
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'literate_randomizer/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "literate_randomizer"
8
+ gem.version = LiterateRandomizer::VERSION
9
+ gem.authors = ["Shane Brinkman-Davis"]
10
+ gem.email = ["shanebdavis@gmail.com"]
11
+ gem.description = %q{A random sentence and paragraph generator gem. Using Markov chains, this generates near-english prose.}
12
+ gem.summary = %q{A random sentence and paragraph generator gem. Using Markov chains, this generates near-english prose.}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_development_dependency 'rspec', '~> 2.6.0'
21
+ end
@@ -0,0 +1,70 @@
1
+ require File.join(File.dirname(__FILE__),"..","lib","literate_randomizer")
2
+
3
+ describe LiterateRandomizer do
4
+
5
+ def new_lr(options={})
6
+ $lr ||= LiterateRandomizer.create options
7
+ $lr.randomizer = Random.new(1)
8
+ $lr
9
+ end
10
+
11
+ it "should be possible to create a randomizer" do
12
+ lr = new_lr
13
+ lr.should_not == nil
14
+ end
15
+
16
+ it "words.length should be the number of words in the file" do
17
+ new_lr.words.length.should == 9143
18
+ end
19
+
20
+ it "first_words.length should be the number words starting sentances in the file" do
21
+ new_lr.first_words.length.should == 754
22
+ end
23
+
24
+ it "source_sentances.length should be the number of sentances in the file" do
25
+ new_lr.source_sentances.length.should == 10699
26
+ new_lr.source_sentances.length.should > new_lr.first_word.length
27
+ end
28
+
29
+ it "word should return a random word" do
30
+ new_lr.word.should == "own"
31
+ end
32
+
33
+ it "sentance should return a random sentance" do
34
+ new_lr.sentance.should == "Bad form of my own chances are a riding-whip!"
35
+ end
36
+
37
+ it "sentance length should work" do
38
+ new_lr.sentance(:words => 1).should == "Bad?"
39
+ new_lr.sentance(:words => 3).should == "Bad job for?"
40
+ new_lr.sentance(:words => 5).should == "Bad job for a final?"
41
+ new_lr.sentance(:words => 7).should == "Bad job for a final credit of?"
42
+ new_lr.sentance(:words => 9).should == "Bad job for a final credit of the side?"
43
+ new_lr.sentance(:words => 2..7).should == "Bad job for a final credit?"
44
+ end
45
+
46
+ it "successive calls should vary" do
47
+ lr = new_lr
48
+ lr.sentance.should == "Bad form of my own chances are a riding-whip!"
49
+ lr.sentance.should == "Hit you that book down below as his tattered sketch-book which held."
50
+ lr.sentance.should == "Seated upon their journey up my sleeve and incalculable people start to-morrow!"
51
+ end
52
+
53
+ it "paragraph should work" do
54
+ lr = new_lr
55
+ lr.paragraph.should == "Bad form of my own chances are a riding-whip! Hit you that book down below as his tattered sketch-book which held. Seated upon their journey up my sleeve and incalculable people start to-morrow! Telling you propose to this half-educated age of the bushes at last supreme! Placed over us. Rubbing his strong sunlight struck me and Fate with the effect of. Columns until he came at a. Elusive enemies while beneath the main river up in it because on. Fully justified in the big as the bank of that the. Variety of photographs said for the words!"
56
+ end
57
+
58
+ it "first_word should work" do
59
+ new_lr.paragraph(:sentances => 5, :words=>3).should == "Bad job for? Discreetly vague way. Melee in the. Gleam of a. Puffing red-faced irascible."
60
+ new_lr.paragraph(:sentances => 2..4, :words=>3).should == "Bad job for? Discreetly vague way. Melee in the."
61
+ end
62
+
63
+ it "first_word should work" do
64
+ new_lr.paragraph(:first_word => "A",:sentances => 5, :words=>3).should == "A roaring rumbling. Instanced a most. Melee in the. Gleam of a. Puffing red-faced irascible."
65
+ end
66
+
67
+ it "punctuation should work" do
68
+ new_lr.paragraph(:punctuation => "!!!",:sentances => 5, :words=>3).should == "Bad job for? Discreetly vague way. Melee in the. Gleam of a. Puffing as a!!!"
69
+ end
70
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: literate_randomizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Shane Brinkman-Davis
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-18 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.6.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 2.6.0
30
+ description: A random sentence and paragraph generator gem. Using Markov chains, this
31
+ generates near-english prose.
32
+ email:
33
+ - shanebdavis@gmail.com
34
+ executables: []
35
+ extensions: []
36
+ extra_rdoc_files: []
37
+ files:
38
+ - .gitignore
39
+ - Gemfile
40
+ - LICENSE.txt
41
+ - README.md
42
+ - Rakefile
43
+ - data/the_lost_world_by_arthur_conan_doyle.txt
44
+ - lib/literate_randomizer.rb
45
+ - lib/literate_randomizer/markov.rb
46
+ - lib/literate_randomizer/version.rb
47
+ - literate_randomizer.gemspec
48
+ - spec/literate_randomizer_spec.rb
49
+ homepage: ''
50
+ licenses: []
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 1.8.24
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: A random sentence and paragraph generator gem. Using Markov chains, this
73
+ generates near-english prose.
74
+ test_files:
75
+ - spec/literate_randomizer_spec.rb