marky_markov 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -5
- data/lib/marky_markov.rb +2 -2
- data/lib/marky_markov/markov_dictionary.rb +4 -5
- data/lib/marky_markov/markov_sentence_generator.rb +13 -15
- data/lib/marky_markov/persistent_dictionary.rb +6 -7
- data/marky_markov.gemspec +82 -0
- data/spec/marky_markov/markov_dictionary_spec.rb +17 -28
- data/spec/textdictcompare.mmd +35 -1
- metadata +8 -10
data/README.md
CHANGED
@@ -4,6 +4,12 @@ Marky Markov and the Funky Sentences
|
|
4
4
|
Marky Markov is a naïve experiment in Markov Chain generation implemented
|
5
5
|
in Ruby. It can be used both from the command-line and as a library within your code.
|
6
6
|
|
7
|
+
NOTE: 0.3.0 now uses arrays with multiple entries per word instead of a
|
8
|
+
hash key for each word with the value representing number of occurences.
|
9
|
+
While a less elegant solution, it leads to faster text generation. We
|
10
|
+
are also now using msgpack instead of yajl-json to store the dictionary
|
11
|
+
which should lead to faster usage in the command-line app.
|
12
|
+
|
7
13
|
NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
|
8
14
|
ability to generate proper sentences (generate_n_sentences) instead of simply a
|
9
15
|
maximum number of words. The command-line app has changed to sentences as its default
|
@@ -46,9 +52,9 @@ number in there, well, you can!
|
|
46
52
|
markov.generate_20_words
|
47
53
|
|
48
54
|
The default dictionary depth is two words.
|
49
|
-
`{"I hope"
|
50
|
-
"hope this" =>
|
51
|
-
"this makes" =>
|
55
|
+
`{["I", "hope"] => ["this"],
|
56
|
+
["hope", "this"] => ["makes"],
|
57
|
+
["this", "makes"] => ["sense"]}`
|
52
58
|
but it can be set to a depth between 1 and 5 upon dictionary creation,
|
53
59
|
though really any higher than 3 and it starts to simply print passages
|
54
60
|
from the source text.
|
@@ -56,8 +62,8 @@ from the source text.
|
|
56
62
|
markov = MarkyMarkov::Dictionary.new('dictionary', 3)
|
57
63
|
|
58
64
|
creates a dictionary with a depth of three words.
|
59
|
-
`{"I hope this"
|
60
|
-
"hope this makes" =>
|
65
|
+
`{["I", "hope", "this"] => ["makes"],
|
66
|
+
["hope", "this", "makes"] => ["sense"]`
|
61
67
|
|
62
68
|
If you want to delete a dictionary you call it upon the Dictionary class itself while
|
63
69
|
passing in the filename/location.
|
data/lib/marky_markov.rb
CHANGED
@@ -4,12 +4,12 @@
|
|
4
4
|
require_relative 'marky_markov/persistent_dictionary'
|
5
5
|
require_relative 'marky_markov/markov_sentence_generator'
|
6
6
|
|
7
|
-
# @version = 0.
|
7
|
+
# @version = 0.3.0
|
8
8
|
# @author Matt Furden
|
9
9
|
# Module containing TemporaryDictionary and Dictionary for creation of
|
10
10
|
# Markov Chain Dictionaries and generating sentences from those dictionaries.
|
11
11
|
module MarkyMarkov
|
12
|
-
VERSION = '0.
|
12
|
+
VERSION = '0.3.0'
|
13
13
|
|
14
14
|
class TemporaryDictionary
|
15
15
|
# Create a new Temporary Markov Chain Dictionary and sentence generator for use.
|
@@ -24,9 +24,8 @@ class MarkovDictionary
|
|
24
24
|
# @example Adding a word
|
25
25
|
# add_word("Hello", "world")
|
26
26
|
def add_word(rootword, followedby)
|
27
|
-
@dictionary[rootword] ||=
|
28
|
-
@dictionary[rootword]
|
29
|
-
@dictionary[rootword][followedby] += 1
|
27
|
+
@dictionary[rootword] ||= []
|
28
|
+
@dictionary[rootword] << followedby
|
30
29
|
end
|
31
30
|
|
32
31
|
# Given a source of text, be it a text file (file=true) or a string (file=false)
|
@@ -39,8 +38,8 @@ class MarkovDictionary
|
|
39
38
|
def parse_source(source, file=true)
|
40
39
|
contents = file ? open_source(source) : contents = source.split
|
41
40
|
contents.each_cons(@depth+1) do |words|
|
42
|
-
self.add_word(words[0..-2]
|
41
|
+
self.add_word(words[0..-2], words[-1])
|
43
42
|
end
|
44
|
-
@dictionary[contents.last(@depth)
|
43
|
+
@dictionary[contents.last(@depth)] ||= []
|
45
44
|
end
|
46
45
|
end
|
@@ -39,16 +39,9 @@ class MarkovSentenceGenerator
|
|
39
39
|
def weighted_random(lastword)
|
40
40
|
# If word has no words in its dictionary (last word in source text file)
|
41
41
|
# have it pick a random word to display instead.
|
42
|
-
@dictionary.dictionary
|
43
|
-
|
44
|
-
|
45
|
-
random = rand(total)+1
|
46
|
-
@dictionary.dictionary[lastword].each do |word, occurs|
|
47
|
-
random -= occurs
|
48
|
-
if random <= 0
|
49
|
-
return word
|
50
|
-
end
|
51
|
-
end
|
42
|
+
if word = @dictionary.dictionary[lastword]
|
43
|
+
word.sample
|
44
|
+
end
|
52
45
|
end
|
53
46
|
|
54
47
|
# Generates a sentence of (wordcount) length using the weighted_random function.
|
@@ -57,9 +50,13 @@ class MarkovSentenceGenerator
|
|
57
50
|
# @return [String] the words, hopefully forming sentences generated.
|
58
51
|
def generate(wordcount)
|
59
52
|
sentence = []
|
60
|
-
sentence.concat(random_capitalized_word
|
53
|
+
sentence.concat(random_capitalized_word)
|
61
54
|
(wordcount-1).times do
|
62
|
-
|
55
|
+
if word = weighted_random(sentence.last(@depth))
|
56
|
+
sentence << word
|
57
|
+
else
|
58
|
+
sentence.concat(random_capitalized_word)
|
59
|
+
end
|
63
60
|
end
|
64
61
|
sentence.pop(sentence.length-wordcount)
|
65
62
|
sentence.join(' ')
|
@@ -77,12 +74,13 @@ class MarkovSentenceGenerator
|
|
77
74
|
# If less than 30 keys, use that plus five as your maximum sentence length.
|
78
75
|
maximum_length = key_count < 30 ? key_count + 5 : 30
|
79
76
|
stop_at_index = sentence.count + maximum_length
|
80
|
-
sentence.concat(random_capitalized_word
|
77
|
+
sentence.concat(random_capitalized_word)
|
81
78
|
until (/[.!?]/ =~ sentence.last[-1])
|
82
|
-
|
79
|
+
word = weighted_random(sentence.last(@depth))
|
80
|
+
sentence << word unless word.nil?
|
83
81
|
# If a word ending with a . ! or ? isn't found after 30 words,
|
84
82
|
# just add a period as there may be none in the dictionary.
|
85
|
-
sentence[-1] << "." if sentence.count > stop_at_index
|
83
|
+
sentence[-1] << "." if word.nil? || sentence.count > stop_at_index
|
86
84
|
end
|
87
85
|
end
|
88
86
|
sentence.join(' ')
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'ox'
|
2
2
|
require_relative 'markov_dictionary'
|
3
3
|
|
4
4
|
# @private
|
@@ -28,10 +28,9 @@ class PersistentDictionary < MarkovDictionary
|
|
28
28
|
# otherwise it creates an empty hash.
|
29
29
|
def open_dictionary
|
30
30
|
if File.exists?(@dictionarylocation)
|
31
|
-
File.
|
32
|
-
@depth =
|
33
|
-
@dictionary =
|
34
|
-
end
|
31
|
+
file = File.new(@dictionarylocation, 'r').read
|
32
|
+
@depth = file[0].to_i
|
33
|
+
@dictionary = Ox.parse_obj(file[1..-1])
|
35
34
|
else
|
36
35
|
@dictionary = {}
|
37
36
|
end
|
@@ -40,9 +39,9 @@ class PersistentDictionary < MarkovDictionary
|
|
40
39
|
# Saves the PersistentDictionary objects @dictionary hash
|
41
40
|
# to disk in JSON format.
|
42
41
|
def save_dictionary!
|
43
|
-
|
42
|
+
packed = Ox.dump(@dictionary)
|
44
43
|
File.open(@dictionarylocation, 'w') do |f|
|
45
|
-
f.
|
44
|
+
f.write @depth.to_s + packed
|
46
45
|
end
|
47
46
|
true
|
48
47
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
## This is the rakegem gemspec template. Make sure you read and understand
|
2
|
+
## all of the comments. Some sections require modification, and others can
|
3
|
+
## be deleted if you don't need them. Once you understand the contents of
|
4
|
+
## this file, feel free to delete any comments that begin with two hash marks.
|
5
|
+
## You can find comprehensive Gem::Specification documentation, at
|
6
|
+
## http://docs.rubygems.org/read/chapter/20
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.specification_version = 2 if s.respond_to? :specification_version=
|
9
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
10
|
+
s.rubygems_version = '1.3.5'
|
11
|
+
|
12
|
+
## Leave these as is they will be modified for you by the rake gemspec task.
|
13
|
+
## If your rubyforge_project name is different, then edit it and comment out
|
14
|
+
## the sub! line in the Rakefile
|
15
|
+
s.name = 'marky_markov'
|
16
|
+
s.version = '0.3.0'
|
17
|
+
s.date = '2012-02-09'
|
18
|
+
s.rubyforge_project = 'marky_markov'
|
19
|
+
|
20
|
+
## Make sure your summary is short. The description may be as long
|
21
|
+
## as you like.
|
22
|
+
s.summary = "Simple Markov Chain generation available in the command-line"
|
23
|
+
s.description = "MarkyMarkov makes it easy to generate simply Markov Chains based upon input from
|
24
|
+
either a source file or a string. While usable as a module in your code it can also be called on
|
25
|
+
from the command line and piped into like a standard UNIX application."
|
26
|
+
|
27
|
+
## List the primary authors. If there are a bunch of authors, it's probably
|
28
|
+
## better to set the email to an email list or something. If you don't have
|
29
|
+
## a custom homepage, consider using your GitHub URL or the like.
|
30
|
+
s.authors = ["Matt Furden"]
|
31
|
+
s.email = 'mfurden@gmail.com'
|
32
|
+
s.homepage = 'http://www.thefurd.com'
|
33
|
+
|
34
|
+
## This gets added to the $LOAD_PATH so that 'lib/NAME.rb' can be required as
|
35
|
+
## require 'NAME.rb' or'/lib/NAME/file.rb' can be as require 'NAME/file.rb'
|
36
|
+
s.require_paths = %w[lib]
|
37
|
+
|
38
|
+
## This sections is only necessary if you have C extensions.
|
39
|
+
#s.require_paths << 'ext'
|
40
|
+
#s.extensions = %w[ext/extconf.rb]
|
41
|
+
|
42
|
+
## If your gem includes any executables, list them here.
|
43
|
+
s.executables = ["marky_markov"]
|
44
|
+
|
45
|
+
## Specify any RDoc options here. You'll want to add your README and
|
46
|
+
## LICENSE files to the extra_rdoc_files list.
|
47
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
48
|
+
s.extra_rdoc_files = %w[README.md]
|
49
|
+
|
50
|
+
## List your runtime dependencies here. Runtime dependencies are those
|
51
|
+
## that are needed for an end user to actually USE your code.
|
52
|
+
s.add_dependency('ox', "~> 1.4")
|
53
|
+
|
54
|
+
## List your development dependencies here. Development dependencies are
|
55
|
+
## those that are only needed during development
|
56
|
+
#s.add_development_dependency('DEVDEPNAME', [">= 1.1.0", "< 2.0.0"])
|
57
|
+
|
58
|
+
## Leave this section as-is. It will be automatically generated from the
|
59
|
+
## contents of your Git repository via the gemspec task. DO NOT REMOVE
|
60
|
+
## THE MANIFEST COMMENTS, they are used as delimiters by the task.
|
61
|
+
# = MANIFEST =
|
62
|
+
s.files = %w[
|
63
|
+
README.md
|
64
|
+
bin/marky_markov
|
65
|
+
lib/marky_markov.rb
|
66
|
+
lib/marky_markov/markov_dictionary.rb
|
67
|
+
lib/marky_markov/markov_sentence_generator.rb
|
68
|
+
lib/marky_markov/persistent_dictionary.rb
|
69
|
+
marky_markov.gemspec
|
70
|
+
spec/marky_markov/markov_dictionary_spec.rb
|
71
|
+
spec/marky_markov/markov_sentence_generator_spec.rb
|
72
|
+
spec/marky_markov/persistent_dictionary_spec.rb
|
73
|
+
spec/spec_helper.rb
|
74
|
+
spec/test.txt
|
75
|
+
spec/textdictcompare.mmd
|
76
|
+
]
|
77
|
+
# = MANIFEST =
|
78
|
+
|
79
|
+
## Test files will be grabbed from the file list. Make sure the path glob
|
80
|
+
## matches what you actually use.
|
81
|
+
s.test_files = s.files.select { |path| path =~ /^test\/test_.*\.rb/ }
|
82
|
+
end
|
@@ -5,19 +5,13 @@ describe MarkovDictionary do
|
|
5
5
|
before(:each) do
|
6
6
|
@onetextsource = "spec/test.txt"
|
7
7
|
@onedict = MarkovDictionary.new(1)
|
8
|
-
@onedict.parse_source("
|
9
|
-
@
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
@onetextdict = {"The" => {"cat"=>1},
|
16
|
-
"and" => {"chainsaws"=>1},
|
17
|
-
"cat" => {"likes"=>1},
|
18
|
-
"chainsaws" => {},
|
19
|
-
"likes" => {"pie"=>1},
|
20
|
-
"pie" => {"and"=>1} }
|
8
|
+
@onedict.parse_source("The cat likes pie and chainsaws", false)
|
9
|
+
@onetextdict = { ["The"] => ["cat"],
|
10
|
+
["cat"] => ["likes"],
|
11
|
+
["likes"] => ["pie"],
|
12
|
+
["pie"] => ["and"],
|
13
|
+
["and"] => ["chainsaws"],
|
14
|
+
["chainsaws"] => []}
|
21
15
|
end
|
22
16
|
|
23
17
|
it "can open a file" do
|
@@ -30,8 +24,8 @@ describe MarkovDictionary do
|
|
30
24
|
end
|
31
25
|
|
32
26
|
it "can add a word to the dictionary" do
|
33
|
-
@onedict.add_word("to", "be")
|
34
|
-
@onedict.dictionary.should include("to" =>
|
27
|
+
@onedict.add_word(["to"], "be")
|
28
|
+
@onedict.dictionary.should include(["to"] => ["be"])
|
35
29
|
end
|
36
30
|
|
37
31
|
it "create a dictionary via parsing a text file" do
|
@@ -41,7 +35,7 @@ describe MarkovDictionary do
|
|
41
35
|
end
|
42
36
|
|
43
37
|
it "builds a one word dictionary properly" do
|
44
|
-
@onedict.dictionary.should eql(@
|
38
|
+
@onedict.dictionary.should eql(@onetextdict)
|
45
39
|
end
|
46
40
|
end
|
47
41
|
|
@@ -50,21 +44,16 @@ describe MarkovDictionary do
|
|
50
44
|
@twodict = MarkovDictionary.new
|
51
45
|
@twodict.parse_source("The cat likes pie and chainsaws", false)
|
52
46
|
@twotextsource = "spec/test.txt"
|
53
|
-
@
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
@twotextdict = {"The cat" => {"likes" => 1},
|
59
|
-
"cat likes" => {"pie" => 1},
|
60
|
-
"likes pie" => {"and" => 1},
|
61
|
-
"pie and" => {"chainsaws" => 1},
|
62
|
-
"and chainsaws" => {}}
|
47
|
+
@twotextdict = {["The", "cat"] => ["likes"],
|
48
|
+
["and", "chainsaws"] => [],
|
49
|
+
["cat", "likes"] => ["pie"],
|
50
|
+
["likes", "pie"] => ["and"],
|
51
|
+
["pie", "and"] => ["chainsaws"]}
|
63
52
|
end
|
64
53
|
|
65
54
|
it "can add a word to the two-word dictionary" do
|
66
|
-
@twodict.add_word("Zebras like", "kung-fu")
|
67
|
-
@twodict.dictionary.should eql(@
|
55
|
+
@twodict.add_word(["Zebras", "like"], "kung-fu")
|
56
|
+
@twodict.dictionary.should eql(@twotextdict.merge( {["Zebras", "like"] => ["kung-fu"]} ))
|
68
57
|
end
|
69
58
|
|
70
59
|
it "create a two-word dictionary via parsing a text file" do
|
data/spec/textdictcompare.mmd
CHANGED
@@ -1 +1,35 @@
|
|
1
|
-
2
|
1
|
+
2<h>
|
2
|
+
<a>
|
3
|
+
<s>The</s>
|
4
|
+
<s>cat</s>
|
5
|
+
</a>
|
6
|
+
<a>
|
7
|
+
<s>likes</s>
|
8
|
+
</a>
|
9
|
+
<a>
|
10
|
+
<s>cat</s>
|
11
|
+
<s>likes</s>
|
12
|
+
</a>
|
13
|
+
<a>
|
14
|
+
<s>pie</s>
|
15
|
+
</a>
|
16
|
+
<a>
|
17
|
+
<s>likes</s>
|
18
|
+
<s>pie</s>
|
19
|
+
</a>
|
20
|
+
<a>
|
21
|
+
<s>and</s>
|
22
|
+
</a>
|
23
|
+
<a>
|
24
|
+
<s>pie</s>
|
25
|
+
<s>and</s>
|
26
|
+
</a>
|
27
|
+
<a>
|
28
|
+
<s>chainsaws</s>
|
29
|
+
</a>
|
30
|
+
<a>
|
31
|
+
<s>and</s>
|
32
|
+
<s>chainsaws</s>
|
33
|
+
</a>
|
34
|
+
<a/>
|
35
|
+
</h>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marky_markov
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,22 +9,19 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
requirement: &
|
15
|
+
name: ox
|
16
|
+
requirement: &70184695396840 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
|
-
- -
|
19
|
+
- - ~>
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 1.
|
22
|
-
- - <
|
23
|
-
- !ruby/object:Gem::Version
|
24
|
-
version: 2.0.0
|
21
|
+
version: '1.4'
|
25
22
|
type: :runtime
|
26
23
|
prerelease: false
|
27
|
-
version_requirements: *
|
24
|
+
version_requirements: *70184695396840
|
28
25
|
description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
|
29
26
|
input from\n either a source file or a string. While usable as a module in your
|
30
27
|
code it can also be called on\n from the command line and piped into like a standard
|
@@ -42,6 +39,7 @@ files:
|
|
42
39
|
- lib/marky_markov/markov_dictionary.rb
|
43
40
|
- lib/marky_markov/markov_sentence_generator.rb
|
44
41
|
- lib/marky_markov/persistent_dictionary.rb
|
42
|
+
- marky_markov.gemspec
|
45
43
|
- spec/marky_markov/markov_dictionary_spec.rb
|
46
44
|
- spec/marky_markov/markov_sentence_generator_spec.rb
|
47
45
|
- spec/marky_markov/persistent_dictionary_spec.rb
|