literate_randomizer 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +59 -0
- data/Rakefile +1 -0
- data/data/the_lost_world_by_arthur_conan_doyle.txt +8380 -0
- data/lib/literate_randomizer.rb +10 -0
- data/lib/literate_randomizer/markov.rb +173 -0
- data/lib/literate_randomizer/version.rb +3 -0
- data/literate_randomizer.gemspec +21 -0
- data/spec/literate_randomizer_spec.rb +70 -0
- metadata +75 -0
@@ -0,0 +1,173 @@
|
|
1
|
+
#source: http://openmonkey.com/blog/2008/10/23/using-markov-chains-to-provide-english-language-seed-data-for-your-rails-application/
|
2
|
+
# Tim Riley
|
3
|
+
# Gemified by Shane Brinkman-Davis
|
4
|
+
|
5
|
+
module LiterateRandomizer
|
6
|
+
class MarkovChain
|
7
|
+
DEFAULT_PUNCTUATION_DISTRIBUTION = %w{. . . . . . . . ? !}
|
8
|
+
attr_accessor :randomizer, :init_options, :punctuation_distribution
|
9
|
+
attr_reader :markov_words, :words, :first_words
|
10
|
+
|
11
|
+
def default_source_material
|
12
|
+
File.expand_path File.join(File.dirname(__FILE__),"..","..","data","the_lost_world_by_arthur_conan_doyle.txt")
|
13
|
+
end
|
14
|
+
|
15
|
+
# options:
|
16
|
+
# :source_material => string
|
17
|
+
# :source_material_file => filename
|
18
|
+
def source_material(options=init_options)
|
19
|
+
options[:source_material] || File.read(options[:source_material_file] || default_source_material)
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
def chain_add(word, next_word)
|
24
|
+
markov_words[word] ||= Hash.new(0)
|
25
|
+
markov_words[word][next_word] += 1
|
26
|
+
end
|
27
|
+
|
28
|
+
# remove any non-alpha characters from word
|
29
|
+
def scrub_word(word)
|
30
|
+
word &&= word[/[A-Za-z][A-Za-z'-]*/]
|
31
|
+
word &&= word[/[A-Za-z'-]*[A-Za-z]/]
|
32
|
+
(word && word.strip) || ""
|
33
|
+
end
|
34
|
+
|
35
|
+
def scrub_word_list(word_list)
|
36
|
+
word_list.split(/[\s]+/).collect {|a| scrub_word(a)}.select {|a| a.length>0}
|
37
|
+
end
|
38
|
+
|
39
|
+
def capitalize(word)
|
40
|
+
word.chars.first.upcase+word[1..-1]
|
41
|
+
end
|
42
|
+
|
43
|
+
def source_sentances
|
44
|
+
source_material.split(/([.?!"]\s|--| ')+/)
|
45
|
+
end
|
46
|
+
|
47
|
+
# remove all dead-end words
|
48
|
+
def prune_markov_words
|
49
|
+
@markov_words.keys.each do |key|
|
50
|
+
@markov_key.delete(key) if @markov_words[key].length == 0
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def populate_markov_words
|
55
|
+
@markov_words = {}
|
56
|
+
@words = {}
|
57
|
+
@first_words = {}
|
58
|
+
source_sentances.each do |sentance|
|
59
|
+
word_list = scrub_word_list sentance
|
60
|
+
@first_words[word_list[0]] = true
|
61
|
+
word_list.each_with_index do |word, index|
|
62
|
+
@words[word] = true
|
63
|
+
next_word = word_list[index+1]
|
64
|
+
chain_add word, next_word if next_word
|
65
|
+
end
|
66
|
+
end
|
67
|
+
prune_markov_words
|
68
|
+
end
|
69
|
+
|
70
|
+
def populate_markov_sum
|
71
|
+
@markov_weighted_sum = {}
|
72
|
+
@markov_words.each do |word,followers|
|
73
|
+
@markov_weighted_sum[word] = followers.inject(0) {|sum,kv| sum + kv[1]}
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def populate
|
78
|
+
populate_markov_words
|
79
|
+
populate_markov_sum
|
80
|
+
end
|
81
|
+
|
82
|
+
def rand_count(r)
|
83
|
+
return r if r.kind_of? Integer
|
84
|
+
rand(r.max-r.min)+r.min
|
85
|
+
end
|
86
|
+
|
87
|
+
# options:
|
88
|
+
# :source_material => string OR
|
89
|
+
# :source_material_file => filename
|
90
|
+
# :randomizer - responds to .rand(limit) - this primarilly exists for testing
|
91
|
+
# :punctuation_distribution => DEFAULT_PUNCTUATION_DISTRIBUTION - punctiation is randomly selected from this array
|
92
|
+
def initialize(options={})
|
93
|
+
@init_options = options
|
94
|
+
@randomizer = randomizer || Random.new()
|
95
|
+
@punctuation_distribution = options[:punctuation_distribution] || DEFAULT_PUNCTUATION_DISTRIBUTION
|
96
|
+
|
97
|
+
populate
|
98
|
+
end
|
99
|
+
|
100
|
+
def inspect
|
101
|
+
"#<#{self.class}: #{@words.length} words, #{@markov_words.length} word-chains, #{@first_words.length} first_words>"
|
102
|
+
end
|
103
|
+
|
104
|
+
def next_word(word)
|
105
|
+
return if !markov_words[word]
|
106
|
+
sum = @markov_weighted_sum[word]
|
107
|
+
random = rand(sum)+1
|
108
|
+
partial_sum = 0
|
109
|
+
markov_words[word].find do |word, count|
|
110
|
+
partial_sum += count
|
111
|
+
partial_sum >= random
|
112
|
+
end.first
|
113
|
+
end
|
114
|
+
|
115
|
+
def rand(limit=nil)
|
116
|
+
@randomizer.rand(limit)
|
117
|
+
end
|
118
|
+
|
119
|
+
# return a random word
|
120
|
+
def word
|
121
|
+
@cached_word_keys ||= words.keys
|
122
|
+
@cached_word_keys[rand(@cached_word_keys.length)]
|
123
|
+
end
|
124
|
+
|
125
|
+
# return a random first word of a sentance
|
126
|
+
def first_word
|
127
|
+
@cached_first_word_keys ||= first_words.keys
|
128
|
+
@cached_first_word_keys[rand(@cached_first_word_keys.length)]
|
129
|
+
end
|
130
|
+
|
131
|
+
# return a random first word of a sentance
|
132
|
+
def markov_word
|
133
|
+
@cached_markov_word_keys ||= markov_words.keys
|
134
|
+
@cached_markov_word_keys[rand(@cached_markov_word_keys.length)]
|
135
|
+
end
|
136
|
+
|
137
|
+
def punctuation
|
138
|
+
@punctuation_distribution[rand(@punctuation_distribution.length)]
|
139
|
+
end
|
140
|
+
|
141
|
+
# return a random sentance
|
142
|
+
# options:
|
143
|
+
# * :first_word => nil - the start word
|
144
|
+
# * :words => range or int - number of words in sentance
|
145
|
+
# * :punctuation => nil - punction to end the sentance with (nil == randomly selected from punctuation_distribution)
|
146
|
+
def sentance(options={})
|
147
|
+
word = options[:first_word] || self.markov_word
|
148
|
+
count = rand_count options[:words] || (3..15)
|
149
|
+
punctuation = options[:punctuation] || self.punctuation
|
150
|
+
|
151
|
+
capitalize(count.times.collect do
|
152
|
+
word.tap {word = next_word(word)}
|
153
|
+
end.compact.join(" ") + punctuation)
|
154
|
+
end
|
155
|
+
|
156
|
+
# return a random paragraph
|
157
|
+
# options:
|
158
|
+
# * :first_word => nil - the first word of the paragraph
|
159
|
+
# * :words => range or int - number of words in sentance
|
160
|
+
# * :sentances => range or int - number of sentances in paragraph
|
161
|
+
# * :punctuation => nil - punction to end the paragraph with (nil == randomly selected from punctuation_distribution)
|
162
|
+
def paragraph(options={})
|
163
|
+
count = rand_count options[:sentances] || (5..15)
|
164
|
+
|
165
|
+
count.times.collect do |i|
|
166
|
+
op = options.clone
|
167
|
+
op.delete :punctuation unless i==count-1
|
168
|
+
op.delete :first_word unless i==0
|
169
|
+
sentance op
|
170
|
+
end.join(" ")
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'literate_randomizer/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "literate_randomizer"
|
8
|
+
gem.version = LiterateRandomizer::VERSION
|
9
|
+
gem.authors = ["Shane Brinkman-Davis"]
|
10
|
+
gem.email = ["shanebdavis@gmail.com"]
|
11
|
+
gem.description = %q{A random sentence and paragraph generator gem. Using Markov chains, this generates near-english prose.}
|
12
|
+
gem.summary = %q{A random sentence and paragraph generator gem. Using Markov chains, this generates near-english prose.}
|
13
|
+
gem.homepage = ""
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
|
20
|
+
gem.add_development_dependency 'rspec', '~> 2.6.0'
|
21
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),"..","lib","literate_randomizer")
|
2
|
+
|
3
|
+
describe LiterateRandomizer do
|
4
|
+
|
5
|
+
def new_lr(options={})
|
6
|
+
$lr ||= LiterateRandomizer.create options
|
7
|
+
$lr.randomizer = Random.new(1)
|
8
|
+
$lr
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should be possible to create a randomizer" do
|
12
|
+
lr = new_lr
|
13
|
+
lr.should_not == nil
|
14
|
+
end
|
15
|
+
|
16
|
+
it "words.length should be the number of words in the file" do
|
17
|
+
new_lr.words.length.should == 9143
|
18
|
+
end
|
19
|
+
|
20
|
+
it "first_words.length should be the number words starting sentances in the file" do
|
21
|
+
new_lr.first_words.length.should == 754
|
22
|
+
end
|
23
|
+
|
24
|
+
it "source_sentances.length should be the number of sentances in the file" do
|
25
|
+
new_lr.source_sentances.length.should == 10699
|
26
|
+
new_lr.source_sentances.length.should > new_lr.first_word.length
|
27
|
+
end
|
28
|
+
|
29
|
+
it "word should return a random word" do
|
30
|
+
new_lr.word.should == "own"
|
31
|
+
end
|
32
|
+
|
33
|
+
it "sentance should return a random sentance" do
|
34
|
+
new_lr.sentance.should == "Bad form of my own chances are a riding-whip!"
|
35
|
+
end
|
36
|
+
|
37
|
+
it "sentance length should work" do
|
38
|
+
new_lr.sentance(:words => 1).should == "Bad?"
|
39
|
+
new_lr.sentance(:words => 3).should == "Bad job for?"
|
40
|
+
new_lr.sentance(:words => 5).should == "Bad job for a final?"
|
41
|
+
new_lr.sentance(:words => 7).should == "Bad job for a final credit of?"
|
42
|
+
new_lr.sentance(:words => 9).should == "Bad job for a final credit of the side?"
|
43
|
+
new_lr.sentance(:words => 2..7).should == "Bad job for a final credit?"
|
44
|
+
end
|
45
|
+
|
46
|
+
it "successive calls should vary" do
|
47
|
+
lr = new_lr
|
48
|
+
lr.sentance.should == "Bad form of my own chances are a riding-whip!"
|
49
|
+
lr.sentance.should == "Hit you that book down below as his tattered sketch-book which held."
|
50
|
+
lr.sentance.should == "Seated upon their journey up my sleeve and incalculable people start to-morrow!"
|
51
|
+
end
|
52
|
+
|
53
|
+
it "paragraph should work" do
|
54
|
+
lr = new_lr
|
55
|
+
lr.paragraph.should == "Bad form of my own chances are a riding-whip! Hit you that book down below as his tattered sketch-book which held. Seated upon their journey up my sleeve and incalculable people start to-morrow! Telling you propose to this half-educated age of the bushes at last supreme! Placed over us. Rubbing his strong sunlight struck me and Fate with the effect of. Columns until he came at a. Elusive enemies while beneath the main river up in it because on. Fully justified in the big as the bank of that the. Variety of photographs said for the words!"
|
56
|
+
end
|
57
|
+
|
58
|
+
it "first_word should work" do
|
59
|
+
new_lr.paragraph(:sentances => 5, :words=>3).should == "Bad job for? Discreetly vague way. Melee in the. Gleam of a. Puffing red-faced irascible."
|
60
|
+
new_lr.paragraph(:sentances => 2..4, :words=>3).should == "Bad job for? Discreetly vague way. Melee in the."
|
61
|
+
end
|
62
|
+
|
63
|
+
it "first_word should work" do
|
64
|
+
new_lr.paragraph(:first_word => "A",:sentances => 5, :words=>3).should == "A roaring rumbling. Instanced a most. Melee in the. Gleam of a. Puffing red-faced irascible."
|
65
|
+
end
|
66
|
+
|
67
|
+
it "punctuation should work" do
|
68
|
+
new_lr.paragraph(:punctuation => "!!!",:sentances => 5, :words=>3).should == "Bad job for? Discreetly vague way. Melee in the. Gleam of a. Puffing as a!!!"
|
69
|
+
end
|
70
|
+
end
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: literate_randomizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Shane Brinkman-Davis
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-10-18 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.6.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 2.6.0
|
30
|
+
description: A random sentence and paragraph generator gem. Using Markov chains, this
|
31
|
+
generates near-english prose.
|
32
|
+
email:
|
33
|
+
- shanebdavis@gmail.com
|
34
|
+
executables: []
|
35
|
+
extensions: []
|
36
|
+
extra_rdoc_files: []
|
37
|
+
files:
|
38
|
+
- .gitignore
|
39
|
+
- Gemfile
|
40
|
+
- LICENSE.txt
|
41
|
+
- README.md
|
42
|
+
- Rakefile
|
43
|
+
- data/the_lost_world_by_arthur_conan_doyle.txt
|
44
|
+
- lib/literate_randomizer.rb
|
45
|
+
- lib/literate_randomizer/markov.rb
|
46
|
+
- lib/literate_randomizer/version.rb
|
47
|
+
- literate_randomizer.gemspec
|
48
|
+
- spec/literate_randomizer_spec.rb
|
49
|
+
homepage: ''
|
50
|
+
licenses: []
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ! '>='
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
requirements: []
|
68
|
+
rubyforge_project:
|
69
|
+
rubygems_version: 1.8.24
|
70
|
+
signing_key:
|
71
|
+
specification_version: 3
|
72
|
+
summary: A random sentence and paragraph generator gem. Using Markov chains, this
|
73
|
+
generates near-english prose.
|
74
|
+
test_files:
|
75
|
+
- spec/literate_randomizer_spec.rb
|