marky_markov 0.3.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +2 -2
- data/lib/marky_markov.rb +12 -6
- data/lib/marky_markov/markov_dictionary.rb +13 -10
- data/lib/marky_markov/markov_sentence_generator.rb +59 -34
- data/lib/marky_markov/persistent_dictionary.rb +4 -2
- data/marky_markov.gemspec +3 -2
- data/spec/marky_markov/marky_markov_spec.rb +64 -0
- metadata +5 -4
data/README.md
CHANGED
@@ -7,8 +7,8 @@ in Ruby. It can be used both from the command-line and as a library within your
|
|
7
7
|
NOTE: 0.3.0 now uses arrays with multiple entries per word instead of a
|
8
8
|
hash key for each word with the value representing number of occurences.
|
9
9
|
While a less elegant solution, it leads to faster text generation. We
|
10
|
-
are also now using
|
11
|
-
|
10
|
+
are also now using ox instead of yajl-json to store the dictionary
|
11
|
+
as yajl-json does not appear to support arrays within hashes properly.
|
12
12
|
|
13
13
|
NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
|
14
14
|
ability to generate proper sentences (generate_n_sentences) instead of simply a
|
data/lib/marky_markov.rb
CHANGED
@@ -4,12 +4,12 @@
|
|
4
4
|
require_relative 'marky_markov/persistent_dictionary'
|
5
5
|
require_relative 'marky_markov/markov_sentence_generator'
|
6
6
|
|
7
|
-
# @version = 0.3.
|
7
|
+
# @version = 0.3.2
|
8
8
|
# @author Matt Furden
|
9
9
|
# Module containing TemporaryDictionary and Dictionary for creation of
|
10
10
|
# Markov Chain Dictionaries and generating sentences from those dictionaries.
|
11
11
|
module MarkyMarkov
|
12
|
-
VERSION = '0.3.
|
12
|
+
VERSION = '0.3.2'
|
13
13
|
|
14
14
|
class TemporaryDictionary
|
15
15
|
# Create a new Temporary Markov Chain Dictionary and sentence generator for use.
|
@@ -26,6 +26,12 @@ module MarkyMarkov
|
|
26
26
|
@sentence = MarkovSentenceGenerator.new(@dictionary)
|
27
27
|
end
|
28
28
|
|
29
|
+
# Returns the MarkovDictionary objects dictionary hash.
|
30
|
+
# @return [Hash] the MarkovDictionary hash.
|
31
|
+
def dictionary
|
32
|
+
@dictionary.dictionary
|
33
|
+
end
|
34
|
+
|
29
35
|
# Parses a given file and adds the sentences it contains to the current dictionary.
|
30
36
|
#
|
31
37
|
# @example Open a text file and add its contents to the dictionary.
|
@@ -93,13 +99,13 @@ module MarkyMarkov
|
|
93
99
|
end
|
94
100
|
|
95
101
|
# @since 0.1.4
|
96
|
-
# Modify
|
102
|
+
# Modify respond_to_missing? to include generate_n_words and generate_n_sentences
|
97
103
|
# method_missing implementation.
|
98
|
-
def
|
104
|
+
def respond_to_missing?(method_sym, include_private)
|
99
105
|
if method_sym.to_s =~ /^generate_(\d*)_word[s]*$/
|
100
|
-
|
106
|
+
true
|
101
107
|
elsif method_sym.to_s =~ /^generate_(\d*)_sentence[s]*$/
|
102
|
-
|
108
|
+
true
|
103
109
|
else
|
104
110
|
super
|
105
111
|
end
|
@@ -1,19 +1,20 @@
|
|
1
1
|
# @private
|
2
|
-
class MarkovDictionary
|
3
|
-
|
4
|
-
def initialize(depth=2)
|
5
|
-
@dictionary = {}
|
2
|
+
class MarkovDictionary # :nodoc:
|
3
|
+
attr_reader :dictionary, :depth
|
4
|
+
def initialize(depth=2) @dictionary = {}
|
6
5
|
@depth = depth
|
6
|
+
@split_words = /([.?!])|[\s]+/
|
7
|
+
@split_sentence = /(?<=[.!?])\s+/
|
7
8
|
end
|
8
9
|
|
9
10
|
# If File does not exist.
|
10
|
-
class FileNotFoundError < Exception
|
11
|
+
class FileNotFoundError < Exception # :nodoc:
|
11
12
|
end
|
12
13
|
|
13
14
|
# Open supplied text file:
|
14
15
|
def open_source(source)
|
15
16
|
if File.exists?(source)
|
16
|
-
File.open(source, "r").read.split
|
17
|
+
File.open(source, "r").read.split(@split_sentence)
|
17
18
|
else
|
18
19
|
raise FileNotFoundError.new("#{source} does not exist!")
|
19
20
|
end
|
@@ -36,10 +37,12 @@ class MarkovDictionary
|
|
36
37
|
# @example Add a string
|
37
38
|
# parse_source("Hi, how are you doing?", false)
|
38
39
|
def parse_source(source, file=true)
|
39
|
-
contents = file ? open_source(source) : contents = source.split
|
40
|
-
contents.
|
41
|
-
|
40
|
+
contents = file ? open_source(source) : contents = source.split(@split_sentence)
|
41
|
+
contents.map! {|sentence| sentence.gsub(/["()]/,"")}
|
42
|
+
contents.each do |sentence|
|
43
|
+
sentence.split(@split_words).each_cons(@depth+1) do |words|
|
44
|
+
self.add_word(words[0..-2], words[-1])
|
45
|
+
end
|
42
46
|
end
|
43
|
-
@dictionary[contents.last(@depth)] ||= []
|
44
47
|
end
|
45
48
|
end
|
@@ -1,47 +1,67 @@
|
|
1
|
+
# Messing about with the NullObject pattern, can't apply it in too many
|
2
|
+
# places in this one. Need to evaluate what else could be used in this
|
3
|
+
# aside from my first instinct of defaulting to []
|
1
4
|
# @private
|
2
|
-
class
|
5
|
+
class NullObject # :nodoc:
|
6
|
+
def method_missing (*args, &block)
|
7
|
+
self
|
8
|
+
end
|
9
|
+
def nil?; true; end
|
10
|
+
def <<(*); end
|
11
|
+
def to_str; end
|
12
|
+
def to_ary; []; end
|
13
|
+
end
|
14
|
+
|
15
|
+
# @private
|
16
|
+
NULL_OBJECT = NullObject.new # :nodoc:
|
17
|
+
|
18
|
+
# @private
|
19
|
+
class MarkovSentenceGenerator # :nodoc:
|
3
20
|
def initialize(dictionary)
|
4
21
|
@dictionary = dictionary
|
5
22
|
@depth = @dictionary.depth
|
6
23
|
end
|
7
24
|
|
8
|
-
# Returns a random word
|
25
|
+
# Returns a random word vsjfia picking a random key from the dictionary.
|
9
26
|
# In the case of the TwoWordDictionary, it returns two words to ensure
|
10
27
|
# that the sentence will have a valid two word string to pick the next
|
11
28
|
# word from.
|
12
|
-
#
|
29
|
+
# wordslength
|
13
30
|
# @return [String] a string containing a random dictionary key.
|
14
31
|
def random_word
|
15
|
-
|
16
|
-
|
32
|
+
words = @dictionary.dictionary.keys
|
33
|
+
words[rand(words.length)]
|
17
34
|
end
|
18
35
|
|
19
36
|
# Generates a random capitalized word via picking a random key from the
|
20
37
|
# dictionary and recurring if the word is lowercase.
|
21
38
|
#
|
22
39
|
# (see #random_word)
|
23
|
-
def random_capitalized_word
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
40
|
+
def random_capitalized_word
|
41
|
+
attempts = 0
|
42
|
+
# If you don't find a capitalized word after 15 attempts, just use
|
43
|
+
# a lowercase word as there may be no capitals in the dicationary.
|
44
|
+
until attempts > 15
|
45
|
+
attempts += 1
|
46
|
+
words = @dictionary.dictionary.keys
|
47
|
+
random_choice = words[rand(words.length)]
|
48
|
+
if random_choice[0] =~ /[A-Z]/
|
49
|
+
return random_choice
|
50
|
+
end
|
34
51
|
end
|
52
|
+
random_word
|
35
53
|
end
|
36
54
|
|
37
|
-
# Returns a word based upon the
|
38
|
-
#
|
55
|
+
# Returns a word based upon the likelihood of it appearing after the supplied word.
|
56
|
+
#
|
39
57
|
def weighted_random(lastword)
|
40
58
|
# If word has no words in its dictionary (last word in source text file)
|
41
59
|
# have it pick a random word to display instead.
|
42
|
-
|
43
|
-
|
44
|
-
|
60
|
+
@dictionary.dictionary.fetch(lastword, NULL_OBJECT).sample
|
61
|
+
end
|
62
|
+
|
63
|
+
def punctuation?(word)
|
64
|
+
word =~ /[.!?]/
|
45
65
|
end
|
46
66
|
|
47
67
|
# Generates a sentence of (wordcount) length using the weighted_random function.
|
@@ -52,10 +72,13 @@ class MarkovSentenceGenerator
|
|
52
72
|
sentence = []
|
53
73
|
sentence.concat(random_capitalized_word)
|
54
74
|
(wordcount-1).times do
|
55
|
-
|
56
|
-
|
57
|
-
|
75
|
+
word = weighted_random(sentence.last(@depth))
|
76
|
+
if punctuation?(word[0])
|
77
|
+
sentence[-1] = sentence.last.dup << word
|
78
|
+
elsif word.nil?
|
58
79
|
sentence.concat(random_capitalized_word)
|
80
|
+
else
|
81
|
+
sentence << word
|
59
82
|
end
|
60
83
|
end
|
61
84
|
sentence.pop(sentence.length-wordcount)
|
@@ -68,19 +91,21 @@ class MarkovSentenceGenerator
|
|
68
91
|
# @return [String] the sentence(s) generated.
|
69
92
|
def generate_sentence(sentencecount)
|
70
93
|
sentence = []
|
94
|
+
# Find out how many actual keys are in the dictionary.
|
95
|
+
key_count = @dictionary.dictionary.keys.length
|
96
|
+
# If less than 30 keys, use that plus five as your maximum sentence length.
|
97
|
+
maximum_length = key_count < 30 ? key_count + 5 : 30
|
71
98
|
sentencecount.times do
|
72
|
-
|
73
|
-
key_count = @dictionary.dictionary.keys.length
|
74
|
-
# If less than 30 keys, use that plus five as your maximum sentence length.
|
75
|
-
maximum_length = key_count < 30 ? key_count + 5 : 30
|
76
|
-
stop_at_index = sentence.count + maximum_length
|
99
|
+
wordcount = 0
|
77
100
|
sentence.concat(random_capitalized_word)
|
78
|
-
until (
|
101
|
+
until (punctuation?(sentence.last[-1])) || wordcount > maximum_length
|
102
|
+
wordcount += 1
|
79
103
|
word = weighted_random(sentence.last(@depth))
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
104
|
+
if punctuation?(word)
|
105
|
+
sentence[-1] = sentence.last.dup << word
|
106
|
+
else
|
107
|
+
sentence << word
|
108
|
+
end
|
84
109
|
end
|
85
110
|
end
|
86
111
|
sentence.join(' ')
|
@@ -2,9 +2,9 @@ require 'ox'
|
|
2
2
|
require_relative 'markov_dictionary'
|
3
3
|
|
4
4
|
# @private
|
5
|
-
class PersistentDictionary < MarkovDictionary
|
5
|
+
class PersistentDictionary < MarkovDictionary # :nodoc:
|
6
6
|
|
7
|
-
class DepthNotInRangeError < Exception
|
7
|
+
class DepthNotInRangeError < Exception # :nodoc:
|
8
8
|
end
|
9
9
|
|
10
10
|
# Creates a PersistentDictionary object using the supplied dictionary file.
|
@@ -19,6 +19,8 @@ class PersistentDictionary < MarkovDictionary
|
|
19
19
|
raise DepthNotInRangeError.new("Depth must be between 1 and 5")
|
20
20
|
end
|
21
21
|
@dictionarylocation = dictionary
|
22
|
+
@split_words = /([.?!])|[\s]+/
|
23
|
+
@split_sentence = /(?<=[.!?])\s+/
|
22
24
|
self.open_dictionary
|
23
25
|
end
|
24
26
|
|
data/marky_markov.gemspec
CHANGED
@@ -13,8 +13,8 @@ Gem::Specification.new do |s|
|
|
13
13
|
## If your rubyforge_project name is different, then edit it and comment out
|
14
14
|
## the sub! line in the Rakefile
|
15
15
|
s.name = 'marky_markov'
|
16
|
-
s.version = '0.3.
|
17
|
-
s.date = '2012-02-
|
16
|
+
s.version = '0.3.2'
|
17
|
+
s.date = '2012-02-12'
|
18
18
|
s.rubyforge_project = 'marky_markov'
|
19
19
|
|
20
20
|
## Make sure your summary is short. The description may be as long
|
@@ -69,6 +69,7 @@ Gem::Specification.new do |s|
|
|
69
69
|
marky_markov.gemspec
|
70
70
|
spec/marky_markov/markov_dictionary_spec.rb
|
71
71
|
spec/marky_markov/markov_sentence_generator_spec.rb
|
72
|
+
spec/marky_markov/marky_markov_spec.rb
|
72
73
|
spec/marky_markov/persistent_dictionary_spec.rb
|
73
74
|
spec/spec_helper.rb
|
74
75
|
spec/test.txt
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe MarkyMarkov do
|
4
|
+
context "TemporaryDictionary" do
|
5
|
+
before(:each) do
|
6
|
+
@textsource = "spec/test.txt"
|
7
|
+
@dictionary = MarkyMarkov::TemporaryDictionary.new
|
8
|
+
@onedictcompare = { ["The"] => ["cat"],
|
9
|
+
["cat"] => ["likes"],
|
10
|
+
["likes"] => ["pie"],
|
11
|
+
["pie"] => ["and"],
|
12
|
+
["and"] => ["chainsaws"],
|
13
|
+
["chainsaws"] => []}
|
14
|
+
@twodictcompare = {["The", "cat"] => ["likes"],
|
15
|
+
["and", "chainsaws"] => [],
|
16
|
+
["cat", "likes"] => ["pie"],
|
17
|
+
["likes", "pie"] => ["and"],
|
18
|
+
["pie", "and"] => ["chainsaws"]}
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should be able to parse a string" do
|
22
|
+
@dictionary.parse_string "The cat likes pie and chainsaws"
|
23
|
+
@dictionary.dictionary.should eql(@twodictcompare)
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should generate the right number of sentences" do
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should create the right number of words" do
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context "PersistentDictionary" do
|
34
|
+
before(:each) do
|
35
|
+
@textsource = "spec/test.txt"
|
36
|
+
@dictionary = MarkyMarkov::Dictionary.new(@textsource)
|
37
|
+
@onedictcompare = { ["The"] => ["cat"],
|
38
|
+
["cat"] => ["likes"],
|
39
|
+
["likes"] => ["pie"],
|
40
|
+
["pie"] => ["and"],
|
41
|
+
["and"] => ["chainsaws"],
|
42
|
+
["chainsaws"] => []}
|
43
|
+
@twodictcompare = {["The", "cat"] => ["likes"],
|
44
|
+
["and", "chainsaws"] => [],
|
45
|
+
["cat", "likes"] => ["pie"],
|
46
|
+
["likes", "pie"] => ["and"],
|
47
|
+
["pie", "and"] => ["chainsaws"]}
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should load the saved dictionary" do
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should be able to parse a string" do
|
54
|
+
@dictionary.parse_string "The cat likes pie and chainsaws"
|
55
|
+
@dictionary.dictionary.should include(@twodictcompare)
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should generate the right number of sentences" do
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should create the right number of words" do
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marky_markov
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ox
|
16
|
-
requirement: &
|
16
|
+
requirement: &70164029504260 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '1.4'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70164029504260
|
25
25
|
description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
|
26
26
|
input from\n either a source file or a string. While usable as a module in your
|
27
27
|
code it can also be called on\n from the command line and piped into like a standard
|
@@ -42,6 +42,7 @@ files:
|
|
42
42
|
- marky_markov.gemspec
|
43
43
|
- spec/marky_markov/markov_dictionary_spec.rb
|
44
44
|
- spec/marky_markov/markov_sentence_generator_spec.rb
|
45
|
+
- spec/marky_markov/marky_markov_spec.rb
|
45
46
|
- spec/marky_markov/persistent_dictionary_spec.rb
|
46
47
|
- spec/spec_helper.rb
|
47
48
|
- spec/test.txt
|