markov_chain 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +11 -0
- data/Gemfile.lock +18 -0
- data/History.txt +3 -0
- data/README.rdoc +27 -0
- data/Rakefile +26 -0
- data/VERSION +1 -0
- data/lib/dictionaries/american_english +98569 -0
- data/lib/markov_chain.rb +124 -0
- data/test/markov_chain_test.rb +13 -0
- data/test/test_helper.rb +9 -0
- metadata +103 -0
data/lib/markov_chain.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
class MarkovChain
|
4
|
+
include Singleton
|
5
|
+
|
6
|
+
class << self
|
7
|
+
def instance
|
8
|
+
@__instance__ ||= new
|
9
|
+
end
|
10
|
+
|
11
|
+
def generate seed, options = {}
|
12
|
+
raise ArgumentError, 'Seed must be at least two characters' unless seed.length > 1
|
13
|
+
|
14
|
+
max_word_length = seed.length + 8
|
15
|
+
|
16
|
+
defaults = {
|
17
|
+
:max_size => 100,
|
18
|
+
:max_word_length => max_word_length,
|
19
|
+
:max_retries => 1000
|
20
|
+
}
|
21
|
+
|
22
|
+
options = defaults.merge options
|
23
|
+
|
24
|
+
load_dictionary! unless dictionary_loaded?
|
25
|
+
|
26
|
+
words = [seed]
|
27
|
+
old_words = []
|
28
|
+
retries = 0
|
29
|
+
|
30
|
+
mc = MarkovChain.instance
|
31
|
+
|
32
|
+
while words.length < options[:max_size] && retries < options[:max_retries] do
|
33
|
+
old_words = words.dup
|
34
|
+
word = seed.dup
|
35
|
+
old_word = ''
|
36
|
+
|
37
|
+
while word.length < options[:max_word_length] && word != old_word
|
38
|
+
old_word = word.dup
|
39
|
+
word << mc.get(word.slice(word.length - 1, 1))
|
40
|
+
words.push word.dup
|
41
|
+
end
|
42
|
+
|
43
|
+
words = words.map { |w| w.gsub(/[^a-z]/, '') }.uniq
|
44
|
+
|
45
|
+
retries += 1 if words == old_words
|
46
|
+
end
|
47
|
+
|
48
|
+
words = words[0..(options[:max_size] - 1)]
|
49
|
+
|
50
|
+
words.sort { |a, b| a.length <=> b.length }
|
51
|
+
end
|
52
|
+
|
53
|
+
def load_dictionary! file = :american_english
|
54
|
+
if file.class == Symbol
|
55
|
+
file = File.join File.dirname(__FILE__), 'dictionaries', file.to_s
|
56
|
+
end
|
57
|
+
|
58
|
+
dictionary = File.read file
|
59
|
+
|
60
|
+
if file
|
61
|
+
file.each do |line|
|
62
|
+
s = line.strip
|
63
|
+
instance.add_str s
|
64
|
+
end
|
65
|
+
|
66
|
+
@dictionary_loaded = true
|
67
|
+
@dictionary = File.expand_path file
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def dictionary_loaded?
|
72
|
+
!!@dictionary_loaded
|
73
|
+
end
|
74
|
+
|
75
|
+
def dictionary
|
76
|
+
@dictionary
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def initialize
|
81
|
+
@chars = Hash.new
|
82
|
+
end
|
83
|
+
|
84
|
+
def add_str str
|
85
|
+
index = 0
|
86
|
+
each_char(str) do |char|
|
87
|
+
add(char, str[index + 1]) if index <= str.size - 2
|
88
|
+
index += 1
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def add char, next_char
|
93
|
+
@chars[char] = Hash.new(0) if !@chars[char]
|
94
|
+
@chars[char][next_char] += 1
|
95
|
+
end
|
96
|
+
|
97
|
+
def get char
|
98
|
+
return '' if !@chars[char]
|
99
|
+
|
100
|
+
followers = @chars[char]
|
101
|
+
sum = followers.inject(0) { |sum,kv| sum += kv[1] }
|
102
|
+
random = rand(sum) + 1
|
103
|
+
partial_sum = 0
|
104
|
+
|
105
|
+
next_char = followers.find do |char, count|
|
106
|
+
partial_sum += count
|
107
|
+
partial_sum >= random
|
108
|
+
end.first
|
109
|
+
|
110
|
+
next_char
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def each_char str
|
116
|
+
if block_given?
|
117
|
+
str.scan(/./m) do |x|
|
118
|
+
yield x
|
119
|
+
end
|
120
|
+
else
|
121
|
+
str.scan /./m
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class MarkovChainTest < Test::Unit::TestCase
|
4
|
+
should 'load default dictionary' do
|
5
|
+
MarkovChain.load_dictionary!
|
6
|
+
assert MarkovChain.dictionary_loaded?
|
7
|
+
assert_equal 'american_english', File.basename(MarkovChain.dictionary)
|
8
|
+
end
|
9
|
+
|
10
|
+
should 'generate 100 words by default' do
|
11
|
+
assert_equal 100, MarkovChain.generate('foo').length
|
12
|
+
end
|
13
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: markov_chain
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Alex Rabarts
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-10-31 00:00:00 +00:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rake
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: jeweler
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id002
|
49
|
+
description: Markov chain generator
|
50
|
+
email: alexrabarts@gmail.com
|
51
|
+
executables: []
|
52
|
+
|
53
|
+
extensions: []
|
54
|
+
|
55
|
+
extra_rdoc_files:
|
56
|
+
- README.rdoc
|
57
|
+
files:
|
58
|
+
- Gemfile
|
59
|
+
- Gemfile.lock
|
60
|
+
- History.txt
|
61
|
+
- README.rdoc
|
62
|
+
- Rakefile
|
63
|
+
- VERSION
|
64
|
+
- lib/dictionaries/american_english
|
65
|
+
- lib/markov_chain.rb
|
66
|
+
- test/markov_chain_test.rb
|
67
|
+
- test/test_helper.rb
|
68
|
+
has_rdoc: true
|
69
|
+
homepage: http://github.com/alexrabarts/markov_chain
|
70
|
+
licenses: []
|
71
|
+
|
72
|
+
post_install_message:
|
73
|
+
rdoc_options: []
|
74
|
+
|
75
|
+
require_paths:
|
76
|
+
- lib
|
77
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
hash: 3
|
83
|
+
segments:
|
84
|
+
- 0
|
85
|
+
version: "0"
|
86
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
hash: 3
|
92
|
+
segments:
|
93
|
+
- 0
|
94
|
+
version: "0"
|
95
|
+
requirements: []
|
96
|
+
|
97
|
+
rubyforge_project:
|
98
|
+
rubygems_version: 1.4.2
|
99
|
+
signing_key:
|
100
|
+
specification_version: 3
|
101
|
+
summary: A simple Markov chain generator.
|
102
|
+
test_files: []
|
103
|
+
|