marky_markov 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -2
- data/lib/marky_markov.rb +12 -6
- data/lib/marky_markov/markov_dictionary.rb +13 -10
- data/lib/marky_markov/markov_sentence_generator.rb +59 -34
- data/lib/marky_markov/persistent_dictionary.rb +4 -2
- data/marky_markov.gemspec +3 -2
- data/spec/marky_markov/marky_markov_spec.rb +64 -0
- metadata +5 -4
data/README.md
CHANGED
@@ -7,8 +7,8 @@ in Ruby. It can be used both from the command-line and as a library within your
|
|
7
7
|
NOTE: 0.3.0 now uses arrays with multiple entries per word instead of a
|
8
8
|
hash key for each word with the value representing number of occurences.
|
9
9
|
While a less elegant solution, it leads to faster text generation. We
|
10
|
-
are also now using
|
11
|
-
|
10
|
+
are also now using ox instead of yajl-json to store the dictionary
|
11
|
+
as yajl-json does not appear to support arrays within hashes properly.
|
12
12
|
|
13
13
|
NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
|
14
14
|
ability to generate proper sentences (generate_n_sentences) instead of simply a
|
data/lib/marky_markov.rb
CHANGED
@@ -4,12 +4,12 @@
|
|
4
4
|
require_relative 'marky_markov/persistent_dictionary'
|
5
5
|
require_relative 'marky_markov/markov_sentence_generator'
|
6
6
|
|
7
|
-
# @version = 0.3.
|
7
|
+
# @version = 0.3.2
|
8
8
|
# @author Matt Furden
|
9
9
|
# Module containing TemporaryDictionary and Dictionary for creation of
|
10
10
|
# Markov Chain Dictionaries and generating sentences from those dictionaries.
|
11
11
|
module MarkyMarkov
|
12
|
-
VERSION = '0.3.
|
12
|
+
VERSION = '0.3.2'
|
13
13
|
|
14
14
|
class TemporaryDictionary
|
15
15
|
# Create a new Temporary Markov Chain Dictionary and sentence generator for use.
|
@@ -26,6 +26,12 @@ module MarkyMarkov
|
|
26
26
|
@sentence = MarkovSentenceGenerator.new(@dictionary)
|
27
27
|
end
|
28
28
|
|
29
|
+
# Returns the MarkovDictionary objects dictionary hash.
|
30
|
+
# @return [Hash] the MarkovDictionary hash.
|
31
|
+
def dictionary
|
32
|
+
@dictionary.dictionary
|
33
|
+
end
|
34
|
+
|
29
35
|
# Parses a given file and adds the sentences it contains to the current dictionary.
|
30
36
|
#
|
31
37
|
# @example Open a text file and add its contents to the dictionary.
|
@@ -93,13 +99,13 @@ module MarkyMarkov
|
|
93
99
|
end
|
94
100
|
|
95
101
|
# @since 0.1.4
|
96
|
-
# Modify
|
102
|
+
# Modify respond_to_missing? to include generate_n_words and generate_n_sentences
|
97
103
|
# method_missing implementation.
|
98
|
-
def
|
104
|
+
def respond_to_missing?(method_sym, include_private)
|
99
105
|
if method_sym.to_s =~ /^generate_(\d*)_word[s]*$/
|
100
|
-
|
106
|
+
true
|
101
107
|
elsif method_sym.to_s =~ /^generate_(\d*)_sentence[s]*$/
|
102
|
-
|
108
|
+
true
|
103
109
|
else
|
104
110
|
super
|
105
111
|
end
|
@@ -1,19 +1,20 @@
|
|
1
1
|
# @private
|
2
|
-
class MarkovDictionary
|
3
|
-
|
4
|
-
def initialize(depth=2)
|
5
|
-
@dictionary = {}
|
2
|
+
class MarkovDictionary # :nodoc:
|
3
|
+
attr_reader :dictionary, :depth
|
4
|
+
def initialize(depth=2) @dictionary = {}
|
6
5
|
@depth = depth
|
6
|
+
@split_words = /([.?!])|[\s]+/
|
7
|
+
@split_sentence = /(?<=[.!?])\s+/
|
7
8
|
end
|
8
9
|
|
9
10
|
# If File does not exist.
|
10
|
-
class FileNotFoundError < Exception
|
11
|
+
class FileNotFoundError < Exception # :nodoc:
|
11
12
|
end
|
12
13
|
|
13
14
|
# Open supplied text file:
|
14
15
|
def open_source(source)
|
15
16
|
if File.exists?(source)
|
16
|
-
File.open(source, "r").read.split
|
17
|
+
File.open(source, "r").read.split(@split_sentence)
|
17
18
|
else
|
18
19
|
raise FileNotFoundError.new("#{source} does not exist!")
|
19
20
|
end
|
@@ -36,10 +37,12 @@ class MarkovDictionary
|
|
36
37
|
# @example Add a string
|
37
38
|
# parse_source("Hi, how are you doing?", false)
|
38
39
|
def parse_source(source, file=true)
|
39
|
-
contents = file ? open_source(source) : contents = source.split
|
40
|
-
contents.
|
41
|
-
|
40
|
+
contents = file ? open_source(source) : contents = source.split(@split_sentence)
|
41
|
+
contents.map! {|sentence| sentence.gsub(/["()]/,"")}
|
42
|
+
contents.each do |sentence|
|
43
|
+
sentence.split(@split_words).each_cons(@depth+1) do |words|
|
44
|
+
self.add_word(words[0..-2], words[-1])
|
45
|
+
end
|
42
46
|
end
|
43
|
-
@dictionary[contents.last(@depth)] ||= []
|
44
47
|
end
|
45
48
|
end
|
@@ -1,47 +1,67 @@
|
|
1
|
+
# Messing about with the NullObject pattern, can't apply it in too many
|
2
|
+
# places in this one. Need to evaluate what else could be used in this
|
3
|
+
# aside from my first instinct of defaulting to []
|
1
4
|
# @private
|
2
|
-
class
|
5
|
+
class NullObject # :nodoc:
|
6
|
+
def method_missing (*args, &block)
|
7
|
+
self
|
8
|
+
end
|
9
|
+
def nil?; true; end
|
10
|
+
def <<(*); end
|
11
|
+
def to_str; end
|
12
|
+
def to_ary; []; end
|
13
|
+
end
|
14
|
+
|
15
|
+
# @private
|
16
|
+
NULL_OBJECT = NullObject.new # :nodoc:
|
17
|
+
|
18
|
+
# @private
|
19
|
+
class MarkovSentenceGenerator # :nodoc:
|
3
20
|
def initialize(dictionary)
|
4
21
|
@dictionary = dictionary
|
5
22
|
@depth = @dictionary.depth
|
6
23
|
end
|
7
24
|
|
8
|
-
# Returns a random word
|
25
|
+
# Returns a random word vsjfia picking a random key from the dictionary.
|
9
26
|
# In the case of the TwoWordDictionary, it returns two words to ensure
|
10
27
|
# that the sentence will have a valid two word string to pick the next
|
11
28
|
# word from.
|
12
|
-
#
|
29
|
+
# wordslength
|
13
30
|
# @return [String] a string containing a random dictionary key.
|
14
31
|
def random_word
|
15
|
-
|
16
|
-
|
32
|
+
words = @dictionary.dictionary.keys
|
33
|
+
words[rand(words.length)]
|
17
34
|
end
|
18
35
|
|
19
36
|
# Generates a random capitalized word via picking a random key from the
|
20
37
|
# dictionary and recurring if the word is lowercase.
|
21
38
|
#
|
22
39
|
# (see #random_word)
|
23
|
-
def random_capitalized_word
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
40
|
+
def random_capitalized_word
|
41
|
+
attempts = 0
|
42
|
+
# If you don't find a capitalized word after 15 attempts, just use
|
43
|
+
# a lowercase word as there may be no capitals in the dicationary.
|
44
|
+
until attempts > 15
|
45
|
+
attempts += 1
|
46
|
+
words = @dictionary.dictionary.keys
|
47
|
+
random_choice = words[rand(words.length)]
|
48
|
+
if random_choice[0] =~ /[A-Z]/
|
49
|
+
return random_choice
|
50
|
+
end
|
34
51
|
end
|
52
|
+
random_word
|
35
53
|
end
|
36
54
|
|
37
|
-
# Returns a word based upon the
|
38
|
-
#
|
55
|
+
# Returns a word based upon the likelihood of it appearing after the supplied word.
|
56
|
+
#
|
39
57
|
def weighted_random(lastword)
|
40
58
|
# If word has no words in its dictionary (last word in source text file)
|
41
59
|
# have it pick a random word to display instead.
|
42
|
-
|
43
|
-
|
44
|
-
|
60
|
+
@dictionary.dictionary.fetch(lastword, NULL_OBJECT).sample
|
61
|
+
end
|
62
|
+
|
63
|
+
def punctuation?(word)
|
64
|
+
word =~ /[.!?]/
|
45
65
|
end
|
46
66
|
|
47
67
|
# Generates a sentence of (wordcount) length using the weighted_random function.
|
@@ -52,10 +72,13 @@ class MarkovSentenceGenerator
|
|
52
72
|
sentence = []
|
53
73
|
sentence.concat(random_capitalized_word)
|
54
74
|
(wordcount-1).times do
|
55
|
-
|
56
|
-
|
57
|
-
|
75
|
+
word = weighted_random(sentence.last(@depth))
|
76
|
+
if punctuation?(word[0])
|
77
|
+
sentence[-1] = sentence.last.dup << word
|
78
|
+
elsif word.nil?
|
58
79
|
sentence.concat(random_capitalized_word)
|
80
|
+
else
|
81
|
+
sentence << word
|
59
82
|
end
|
60
83
|
end
|
61
84
|
sentence.pop(sentence.length-wordcount)
|
@@ -68,19 +91,21 @@ class MarkovSentenceGenerator
|
|
68
91
|
# @return [String] the sentence(s) generated.
|
69
92
|
def generate_sentence(sentencecount)
|
70
93
|
sentence = []
|
94
|
+
# Find out how many actual keys are in the dictionary.
|
95
|
+
key_count = @dictionary.dictionary.keys.length
|
96
|
+
# If less than 30 keys, use that plus five as your maximum sentence length.
|
97
|
+
maximum_length = key_count < 30 ? key_count + 5 : 30
|
71
98
|
sentencecount.times do
|
72
|
-
|
73
|
-
key_count = @dictionary.dictionary.keys.length
|
74
|
-
# If less than 30 keys, use that plus five as your maximum sentence length.
|
75
|
-
maximum_length = key_count < 30 ? key_count + 5 : 30
|
76
|
-
stop_at_index = sentence.count + maximum_length
|
99
|
+
wordcount = 0
|
77
100
|
sentence.concat(random_capitalized_word)
|
78
|
-
until (
|
101
|
+
until (punctuation?(sentence.last[-1])) || wordcount > maximum_length
|
102
|
+
wordcount += 1
|
79
103
|
word = weighted_random(sentence.last(@depth))
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
104
|
+
if punctuation?(word)
|
105
|
+
sentence[-1] = sentence.last.dup << word
|
106
|
+
else
|
107
|
+
sentence << word
|
108
|
+
end
|
84
109
|
end
|
85
110
|
end
|
86
111
|
sentence.join(' ')
|
@@ -2,9 +2,9 @@ require 'ox'
|
|
2
2
|
require_relative 'markov_dictionary'
|
3
3
|
|
4
4
|
# @private
|
5
|
-
class PersistentDictionary < MarkovDictionary
|
5
|
+
class PersistentDictionary < MarkovDictionary # :nodoc:
|
6
6
|
|
7
|
-
class DepthNotInRangeError < Exception
|
7
|
+
class DepthNotInRangeError < Exception # :nodoc:
|
8
8
|
end
|
9
9
|
|
10
10
|
# Creates a PersistentDictionary object using the supplied dictionary file.
|
@@ -19,6 +19,8 @@ class PersistentDictionary < MarkovDictionary
|
|
19
19
|
raise DepthNotInRangeError.new("Depth must be between 1 and 5")
|
20
20
|
end
|
21
21
|
@dictionarylocation = dictionary
|
22
|
+
@split_words = /([.?!])|[\s]+/
|
23
|
+
@split_sentence = /(?<=[.!?])\s+/
|
22
24
|
self.open_dictionary
|
23
25
|
end
|
24
26
|
|
data/marky_markov.gemspec
CHANGED
@@ -13,8 +13,8 @@ Gem::Specification.new do |s|
|
|
13
13
|
## If your rubyforge_project name is different, then edit it and comment out
|
14
14
|
## the sub! line in the Rakefile
|
15
15
|
s.name = 'marky_markov'
|
16
|
-
s.version = '0.3.
|
17
|
-
s.date = '2012-02-
|
16
|
+
s.version = '0.3.2'
|
17
|
+
s.date = '2012-02-12'
|
18
18
|
s.rubyforge_project = 'marky_markov'
|
19
19
|
|
20
20
|
## Make sure your summary is short. The description may be as long
|
@@ -69,6 +69,7 @@ Gem::Specification.new do |s|
|
|
69
69
|
marky_markov.gemspec
|
70
70
|
spec/marky_markov/markov_dictionary_spec.rb
|
71
71
|
spec/marky_markov/markov_sentence_generator_spec.rb
|
72
|
+
spec/marky_markov/marky_markov_spec.rb
|
72
73
|
spec/marky_markov/persistent_dictionary_spec.rb
|
73
74
|
spec/spec_helper.rb
|
74
75
|
spec/test.txt
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe MarkyMarkov do
|
4
|
+
context "TemporaryDictionary" do
|
5
|
+
before(:each) do
|
6
|
+
@textsource = "spec/test.txt"
|
7
|
+
@dictionary = MarkyMarkov::TemporaryDictionary.new
|
8
|
+
@onedictcompare = { ["The"] => ["cat"],
|
9
|
+
["cat"] => ["likes"],
|
10
|
+
["likes"] => ["pie"],
|
11
|
+
["pie"] => ["and"],
|
12
|
+
["and"] => ["chainsaws"],
|
13
|
+
["chainsaws"] => []}
|
14
|
+
@twodictcompare = {["The", "cat"] => ["likes"],
|
15
|
+
["and", "chainsaws"] => [],
|
16
|
+
["cat", "likes"] => ["pie"],
|
17
|
+
["likes", "pie"] => ["and"],
|
18
|
+
["pie", "and"] => ["chainsaws"]}
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should be able to parse a string" do
|
22
|
+
@dictionary.parse_string "The cat likes pie and chainsaws"
|
23
|
+
@dictionary.dictionary.should eql(@twodictcompare)
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should generate the right number of sentences" do
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should create the right number of words" do
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context "PersistentDictionary" do
|
34
|
+
before(:each) do
|
35
|
+
@textsource = "spec/test.txt"
|
36
|
+
@dictionary = MarkyMarkov::Dictionary.new(@textsource)
|
37
|
+
@onedictcompare = { ["The"] => ["cat"],
|
38
|
+
["cat"] => ["likes"],
|
39
|
+
["likes"] => ["pie"],
|
40
|
+
["pie"] => ["and"],
|
41
|
+
["and"] => ["chainsaws"],
|
42
|
+
["chainsaws"] => []}
|
43
|
+
@twodictcompare = {["The", "cat"] => ["likes"],
|
44
|
+
["and", "chainsaws"] => [],
|
45
|
+
["cat", "likes"] => ["pie"],
|
46
|
+
["likes", "pie"] => ["and"],
|
47
|
+
["pie", "and"] => ["chainsaws"]}
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should load the saved dictionary" do
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should be able to parse a string" do
|
54
|
+
@dictionary.parse_string "The cat likes pie and chainsaws"
|
55
|
+
@dictionary.dictionary.should include(@twodictcompare)
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should generate the right number of sentences" do
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should create the right number of words" do
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marky_markov
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ox
|
16
|
-
requirement: &
|
16
|
+
requirement: &70164029504260 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '1.4'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70164029504260
|
25
25
|
description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
|
26
26
|
input from\n either a source file or a string. While usable as a module in your
|
27
27
|
code it can also be called on\n from the command line and piped into like a standard
|
@@ -42,6 +42,7 @@ files:
|
|
42
42
|
- marky_markov.gemspec
|
43
43
|
- spec/marky_markov/markov_dictionary_spec.rb
|
44
44
|
- spec/marky_markov/markov_sentence_generator_spec.rb
|
45
|
+
- spec/marky_markov/marky_markov_spec.rb
|
45
46
|
- spec/marky_markov/persistent_dictionary_spec.rb
|
46
47
|
- spec/spec_helper.rb
|
47
48
|
- spec/test.txt
|