marky_markov 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +11 -5
- data/lib/marky_markov.rb +2 -2
- data/lib/marky_markov/markov_dictionary.rb +4 -5
- data/lib/marky_markov/markov_sentence_generator.rb +13 -15
- data/lib/marky_markov/persistent_dictionary.rb +6 -7
- data/marky_markov.gemspec +82 -0
- data/spec/marky_markov/markov_dictionary_spec.rb +17 -28
- data/spec/textdictcompare.mmd +35 -1
- metadata +8 -10
data/README.md
CHANGED
@@ -4,6 +4,12 @@ Marky Markov and the Funky Sentences
|
|
4
4
|
Marky Markov is a naïve experiment in Markov Chain generation implemented
|
5
5
|
in Ruby. It can be used both from the command-line and as a library within your code.
|
6
6
|
|
7
|
+
NOTE: 0.3.0 now uses arrays with multiple entries per word instead of a
|
8
|
+
hash key for each word with the value representing number of occurences.
|
9
|
+
While a less elegant solution, it leads to faster text generation. We
|
10
|
+
are also now using msgpack instead of yajl-json to store the dictionary
|
11
|
+
which should lead to faster usage in the command-line app.
|
12
|
+
|
7
13
|
NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
|
8
14
|
ability to generate proper sentences (generate_n_sentences) instead of simply a
|
9
15
|
maximum number of words. The command-line app has changed to sentences as its default
|
@@ -46,9 +52,9 @@ number in there, well, you can!
|
|
46
52
|
markov.generate_20_words
|
47
53
|
|
48
54
|
The default dictionary depth is two words.
|
49
|
-
`{"I hope"
|
50
|
-
"hope this" =>
|
51
|
-
"this makes" =>
|
55
|
+
`{["I", "hope"] => ["this"],
|
56
|
+
["hope", "this"] => ["makes"],
|
57
|
+
["this", "makes"] => ["sense"]}`
|
52
58
|
but it can be set to a depth between 1 and 5 upon dictionary creation,
|
53
59
|
though really any higher than 3 and it starts to simply print passages
|
54
60
|
from the source text.
|
@@ -56,8 +62,8 @@ from the source text.
|
|
56
62
|
markov = MarkyMarkov::Dictionary.new('dictionary', 3)
|
57
63
|
|
58
64
|
creates a dictionary with a depth of three words.
|
59
|
-
`{"I hope this"
|
60
|
-
"hope this makes" =>
|
65
|
+
`{["I", "hope", "this"] => ["makes"],
|
66
|
+
["hope", "this", "makes"] => ["sense"]`
|
61
67
|
|
62
68
|
If you want to delete a dictionary you call it upon the Dictionary class itself while
|
63
69
|
passing in the filename/location.
|
data/lib/marky_markov.rb
CHANGED
@@ -4,12 +4,12 @@
|
|
4
4
|
require_relative 'marky_markov/persistent_dictionary'
|
5
5
|
require_relative 'marky_markov/markov_sentence_generator'
|
6
6
|
|
7
|
-
# @version = 0.
|
7
|
+
# @version = 0.3.0
|
8
8
|
# @author Matt Furden
|
9
9
|
# Module containing TemporaryDictionary and Dictionary for creation of
|
10
10
|
# Markov Chain Dictionaries and generating sentences from those dictionaries.
|
11
11
|
module MarkyMarkov
|
12
|
-
VERSION = '0.
|
12
|
+
VERSION = '0.3.0'
|
13
13
|
|
14
14
|
class TemporaryDictionary
|
15
15
|
# Create a new Temporary Markov Chain Dictionary and sentence generator for use.
|
@@ -24,9 +24,8 @@ class MarkovDictionary
|
|
24
24
|
# @example Adding a word
|
25
25
|
# add_word("Hello", "world")
|
26
26
|
def add_word(rootword, followedby)
|
27
|
-
@dictionary[rootword] ||=
|
28
|
-
@dictionary[rootword]
|
29
|
-
@dictionary[rootword][followedby] += 1
|
27
|
+
@dictionary[rootword] ||= []
|
28
|
+
@dictionary[rootword] << followedby
|
30
29
|
end
|
31
30
|
|
32
31
|
# Given a source of text, be it a text file (file=true) or a string (file=false)
|
@@ -39,8 +38,8 @@ class MarkovDictionary
|
|
39
38
|
def parse_source(source, file=true)
|
40
39
|
contents = file ? open_source(source) : contents = source.split
|
41
40
|
contents.each_cons(@depth+1) do |words|
|
42
|
-
self.add_word(words[0..-2]
|
41
|
+
self.add_word(words[0..-2], words[-1])
|
43
42
|
end
|
44
|
-
@dictionary[contents.last(@depth)
|
43
|
+
@dictionary[contents.last(@depth)] ||= []
|
45
44
|
end
|
46
45
|
end
|
@@ -39,16 +39,9 @@ class MarkovSentenceGenerator
|
|
39
39
|
def weighted_random(lastword)
|
40
40
|
# If word has no words in its dictionary (last word in source text file)
|
41
41
|
# have it pick a random word to display instead.
|
42
|
-
@dictionary.dictionary
|
43
|
-
|
44
|
-
|
45
|
-
random = rand(total)+1
|
46
|
-
@dictionary.dictionary[lastword].each do |word, occurs|
|
47
|
-
random -= occurs
|
48
|
-
if random <= 0
|
49
|
-
return word
|
50
|
-
end
|
51
|
-
end
|
42
|
+
if word = @dictionary.dictionary[lastword]
|
43
|
+
word.sample
|
44
|
+
end
|
52
45
|
end
|
53
46
|
|
54
47
|
# Generates a sentence of (wordcount) length using the weighted_random function.
|
@@ -57,9 +50,13 @@ class MarkovSentenceGenerator
|
|
57
50
|
# @return [String] the words, hopefully forming sentences generated.
|
58
51
|
def generate(wordcount)
|
59
52
|
sentence = []
|
60
|
-
sentence.concat(random_capitalized_word
|
53
|
+
sentence.concat(random_capitalized_word)
|
61
54
|
(wordcount-1).times do
|
62
|
-
|
55
|
+
if word = weighted_random(sentence.last(@depth))
|
56
|
+
sentence << word
|
57
|
+
else
|
58
|
+
sentence.concat(random_capitalized_word)
|
59
|
+
end
|
63
60
|
end
|
64
61
|
sentence.pop(sentence.length-wordcount)
|
65
62
|
sentence.join(' ')
|
@@ -77,12 +74,13 @@ class MarkovSentenceGenerator
|
|
77
74
|
# If less than 30 keys, use that plus five as your maximum sentence length.
|
78
75
|
maximum_length = key_count < 30 ? key_count + 5 : 30
|
79
76
|
stop_at_index = sentence.count + maximum_length
|
80
|
-
sentence.concat(random_capitalized_word
|
77
|
+
sentence.concat(random_capitalized_word)
|
81
78
|
until (/[.!?]/ =~ sentence.last[-1])
|
82
|
-
|
79
|
+
word = weighted_random(sentence.last(@depth))
|
80
|
+
sentence << word unless word.nil?
|
83
81
|
# If a word ending with a . ! or ? isn't found after 30 words,
|
84
82
|
# just add a period as there may be none in the dictionary.
|
85
|
-
sentence[-1] << "." if sentence.count > stop_at_index
|
83
|
+
sentence[-1] << "." if word.nil? || sentence.count > stop_at_index
|
86
84
|
end
|
87
85
|
end
|
88
86
|
sentence.join(' ')
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'ox'
|
2
2
|
require_relative 'markov_dictionary'
|
3
3
|
|
4
4
|
# @private
|
@@ -28,10 +28,9 @@ class PersistentDictionary < MarkovDictionary
|
|
28
28
|
# otherwise it creates an empty hash.
|
29
29
|
def open_dictionary
|
30
30
|
if File.exists?(@dictionarylocation)
|
31
|
-
File.
|
32
|
-
@depth =
|
33
|
-
@dictionary =
|
34
|
-
end
|
31
|
+
file = File.new(@dictionarylocation, 'r').read
|
32
|
+
@depth = file[0].to_i
|
33
|
+
@dictionary = Ox.parse_obj(file[1..-1])
|
35
34
|
else
|
36
35
|
@dictionary = {}
|
37
36
|
end
|
@@ -40,9 +39,9 @@ class PersistentDictionary < MarkovDictionary
|
|
40
39
|
# Saves the PersistentDictionary objects @dictionary hash
|
41
40
|
# to disk in JSON format.
|
42
41
|
def save_dictionary!
|
43
|
-
|
42
|
+
packed = Ox.dump(@dictionary)
|
44
43
|
File.open(@dictionarylocation, 'w') do |f|
|
45
|
-
f.
|
44
|
+
f.write @depth.to_s + packed
|
46
45
|
end
|
47
46
|
true
|
48
47
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
## This is the rakegem gemspec template. Make sure you read and understand
|
2
|
+
## all of the comments. Some sections require modification, and others can
|
3
|
+
## be deleted if you don't need them. Once you understand the contents of
|
4
|
+
## this file, feel free to delete any comments that begin with two hash marks.
|
5
|
+
## You can find comprehensive Gem::Specification documentation, at
|
6
|
+
## http://docs.rubygems.org/read/chapter/20
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.specification_version = 2 if s.respond_to? :specification_version=
|
9
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
10
|
+
s.rubygems_version = '1.3.5'
|
11
|
+
|
12
|
+
## Leave these as is they will be modified for you by the rake gemspec task.
|
13
|
+
## If your rubyforge_project name is different, then edit it and comment out
|
14
|
+
## the sub! line in the Rakefile
|
15
|
+
s.name = 'marky_markov'
|
16
|
+
s.version = '0.3.0'
|
17
|
+
s.date = '2012-02-09'
|
18
|
+
s.rubyforge_project = 'marky_markov'
|
19
|
+
|
20
|
+
## Make sure your summary is short. The description may be as long
|
21
|
+
## as you like.
|
22
|
+
s.summary = "Simple Markov Chain generation available in the command-line"
|
23
|
+
s.description = "MarkyMarkov makes it easy to generate simply Markov Chains based upon input from
|
24
|
+
either a source file or a string. While usable as a module in your code it can also be called on
|
25
|
+
from the command line and piped into like a standard UNIX application."
|
26
|
+
|
27
|
+
## List the primary authors. If there are a bunch of authors, it's probably
|
28
|
+
## better to set the email to an email list or something. If you don't have
|
29
|
+
## a custom homepage, consider using your GitHub URL or the like.
|
30
|
+
s.authors = ["Matt Furden"]
|
31
|
+
s.email = 'mfurden@gmail.com'
|
32
|
+
s.homepage = 'http://www.thefurd.com'
|
33
|
+
|
34
|
+
## This gets added to the $LOAD_PATH so that 'lib/NAME.rb' can be required as
|
35
|
+
## require 'NAME.rb' or'/lib/NAME/file.rb' can be as require 'NAME/file.rb'
|
36
|
+
s.require_paths = %w[lib]
|
37
|
+
|
38
|
+
## This sections is only necessary if you have C extensions.
|
39
|
+
#s.require_paths << 'ext'
|
40
|
+
#s.extensions = %w[ext/extconf.rb]
|
41
|
+
|
42
|
+
## If your gem includes any executables, list them here.
|
43
|
+
s.executables = ["marky_markov"]
|
44
|
+
|
45
|
+
## Specify any RDoc options here. You'll want to add your README and
|
46
|
+
## LICENSE files to the extra_rdoc_files list.
|
47
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
48
|
+
s.extra_rdoc_files = %w[README.md]
|
49
|
+
|
50
|
+
## List your runtime dependencies here. Runtime dependencies are those
|
51
|
+
## that are needed for an end user to actually USE your code.
|
52
|
+
s.add_dependency('ox', "~> 1.4")
|
53
|
+
|
54
|
+
## List your development dependencies here. Development dependencies are
|
55
|
+
## those that are only needed during development
|
56
|
+
#s.add_development_dependency('DEVDEPNAME', [">= 1.1.0", "< 2.0.0"])
|
57
|
+
|
58
|
+
## Leave this section as-is. It will be automatically generated from the
|
59
|
+
## contents of your Git repository via the gemspec task. DO NOT REMOVE
|
60
|
+
## THE MANIFEST COMMENTS, they are used as delimiters by the task.
|
61
|
+
# = MANIFEST =
|
62
|
+
s.files = %w[
|
63
|
+
README.md
|
64
|
+
bin/marky_markov
|
65
|
+
lib/marky_markov.rb
|
66
|
+
lib/marky_markov/markov_dictionary.rb
|
67
|
+
lib/marky_markov/markov_sentence_generator.rb
|
68
|
+
lib/marky_markov/persistent_dictionary.rb
|
69
|
+
marky_markov.gemspec
|
70
|
+
spec/marky_markov/markov_dictionary_spec.rb
|
71
|
+
spec/marky_markov/markov_sentence_generator_spec.rb
|
72
|
+
spec/marky_markov/persistent_dictionary_spec.rb
|
73
|
+
spec/spec_helper.rb
|
74
|
+
spec/test.txt
|
75
|
+
spec/textdictcompare.mmd
|
76
|
+
]
|
77
|
+
# = MANIFEST =
|
78
|
+
|
79
|
+
## Test files will be grabbed from the file list. Make sure the path glob
|
80
|
+
## matches what you actually use.
|
81
|
+
s.test_files = s.files.select { |path| path =~ /^test\/test_.*\.rb/ }
|
82
|
+
end
|
@@ -5,19 +5,13 @@ describe MarkovDictionary do
|
|
5
5
|
before(:each) do
|
6
6
|
@onetextsource = "spec/test.txt"
|
7
7
|
@onedict = MarkovDictionary.new(1)
|
8
|
-
@onedict.parse_source("
|
9
|
-
@
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
@onetextdict = {"The" => {"cat"=>1},
|
16
|
-
"and" => {"chainsaws"=>1},
|
17
|
-
"cat" => {"likes"=>1},
|
18
|
-
"chainsaws" => {},
|
19
|
-
"likes" => {"pie"=>1},
|
20
|
-
"pie" => {"and"=>1} }
|
8
|
+
@onedict.parse_source("The cat likes pie and chainsaws", false)
|
9
|
+
@onetextdict = { ["The"] => ["cat"],
|
10
|
+
["cat"] => ["likes"],
|
11
|
+
["likes"] => ["pie"],
|
12
|
+
["pie"] => ["and"],
|
13
|
+
["and"] => ["chainsaws"],
|
14
|
+
["chainsaws"] => []}
|
21
15
|
end
|
22
16
|
|
23
17
|
it "can open a file" do
|
@@ -30,8 +24,8 @@ describe MarkovDictionary do
|
|
30
24
|
end
|
31
25
|
|
32
26
|
it "can add a word to the dictionary" do
|
33
|
-
@onedict.add_word("to", "be")
|
34
|
-
@onedict.dictionary.should include("to" =>
|
27
|
+
@onedict.add_word(["to"], "be")
|
28
|
+
@onedict.dictionary.should include(["to"] => ["be"])
|
35
29
|
end
|
36
30
|
|
37
31
|
it "create a dictionary via parsing a text file" do
|
@@ -41,7 +35,7 @@ describe MarkovDictionary do
|
|
41
35
|
end
|
42
36
|
|
43
37
|
it "builds a one word dictionary properly" do
|
44
|
-
@onedict.dictionary.should eql(@
|
38
|
+
@onedict.dictionary.should eql(@onetextdict)
|
45
39
|
end
|
46
40
|
end
|
47
41
|
|
@@ -50,21 +44,16 @@ describe MarkovDictionary do
|
|
50
44
|
@twodict = MarkovDictionary.new
|
51
45
|
@twodict.parse_source("The cat likes pie and chainsaws", false)
|
52
46
|
@twotextsource = "spec/test.txt"
|
53
|
-
@
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
@twotextdict = {"The cat" => {"likes" => 1},
|
59
|
-
"cat likes" => {"pie" => 1},
|
60
|
-
"likes pie" => {"and" => 1},
|
61
|
-
"pie and" => {"chainsaws" => 1},
|
62
|
-
"and chainsaws" => {}}
|
47
|
+
@twotextdict = {["The", "cat"] => ["likes"],
|
48
|
+
["and", "chainsaws"] => [],
|
49
|
+
["cat", "likes"] => ["pie"],
|
50
|
+
["likes", "pie"] => ["and"],
|
51
|
+
["pie", "and"] => ["chainsaws"]}
|
63
52
|
end
|
64
53
|
|
65
54
|
it "can add a word to the two-word dictionary" do
|
66
|
-
@twodict.add_word("Zebras like", "kung-fu")
|
67
|
-
@twodict.dictionary.should eql(@
|
55
|
+
@twodict.add_word(["Zebras", "like"], "kung-fu")
|
56
|
+
@twodict.dictionary.should eql(@twotextdict.merge( {["Zebras", "like"] => ["kung-fu"]} ))
|
68
57
|
end
|
69
58
|
|
70
59
|
it "create a two-word dictionary via parsing a text file" do
|
data/spec/textdictcompare.mmd
CHANGED
@@ -1 +1,35 @@
|
|
1
|
-
2
|
1
|
+
2<h>
|
2
|
+
<a>
|
3
|
+
<s>The</s>
|
4
|
+
<s>cat</s>
|
5
|
+
</a>
|
6
|
+
<a>
|
7
|
+
<s>likes</s>
|
8
|
+
</a>
|
9
|
+
<a>
|
10
|
+
<s>cat</s>
|
11
|
+
<s>likes</s>
|
12
|
+
</a>
|
13
|
+
<a>
|
14
|
+
<s>pie</s>
|
15
|
+
</a>
|
16
|
+
<a>
|
17
|
+
<s>likes</s>
|
18
|
+
<s>pie</s>
|
19
|
+
</a>
|
20
|
+
<a>
|
21
|
+
<s>and</s>
|
22
|
+
</a>
|
23
|
+
<a>
|
24
|
+
<s>pie</s>
|
25
|
+
<s>and</s>
|
26
|
+
</a>
|
27
|
+
<a>
|
28
|
+
<s>chainsaws</s>
|
29
|
+
</a>
|
30
|
+
<a>
|
31
|
+
<s>and</s>
|
32
|
+
<s>chainsaws</s>
|
33
|
+
</a>
|
34
|
+
<a/>
|
35
|
+
</h>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marky_markov
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,22 +9,19 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
requirement: &
|
15
|
+
name: ox
|
16
|
+
requirement: &70184695396840 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
|
-
- -
|
19
|
+
- - ~>
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 1.
|
22
|
-
- - <
|
23
|
-
- !ruby/object:Gem::Version
|
24
|
-
version: 2.0.0
|
21
|
+
version: '1.4'
|
25
22
|
type: :runtime
|
26
23
|
prerelease: false
|
27
|
-
version_requirements: *
|
24
|
+
version_requirements: *70184695396840
|
28
25
|
description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
|
29
26
|
input from\n either a source file or a string. While usable as a module in your
|
30
27
|
code it can also be called on\n from the command line and piped into like a standard
|
@@ -42,6 +39,7 @@ files:
|
|
42
39
|
- lib/marky_markov/markov_dictionary.rb
|
43
40
|
- lib/marky_markov/markov_sentence_generator.rb
|
44
41
|
- lib/marky_markov/persistent_dictionary.rb
|
42
|
+
- marky_markov.gemspec
|
45
43
|
- spec/marky_markov/markov_dictionary_spec.rb
|
46
44
|
- spec/marky_markov/markov_sentence_generator_spec.rb
|
47
45
|
- spec/marky_markov/persistent_dictionary_spec.rb
|