marky_markov 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/lib/marky_markov.rb +4 -4
- data/lib/marky_markov/markov_dictionary.rb +9 -2
- data/lib/marky_markov/markov_sentence_generator.rb +14 -3
- data/lib/marky_markov/persistent_dictionary.rb +5 -5
- data/marky_markov.gemspec +4 -3
- data/spec/marky_markov/marky_markov_spec.rb +96 -30
- data/spec/spec_helper.rb +0 -1
- metadata +14 -8
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2014 Matt Furden
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
Software), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/lib/marky_markov.rb
CHANGED
@@ -4,12 +4,12 @@
|
|
4
4
|
require_relative 'marky_markov/persistent_dictionary'
|
5
5
|
require_relative 'marky_markov/markov_sentence_generator'
|
6
6
|
|
7
|
-
# @version = 0.3.
|
7
|
+
# @version = 0.3.5
|
8
8
|
# @author Matt Furden
|
9
9
|
# Module containing TemporaryDictionary and Dictionary for creation of
|
10
10
|
# Markov Chain Dictionaries and generating sentences from those dictionaries.
|
11
11
|
module MarkyMarkov
|
12
|
-
VERSION = '0.3.
|
12
|
+
VERSION = '0.3.5'
|
13
13
|
|
14
14
|
class TemporaryDictionary
|
15
15
|
# Create a new Temporary Markov Chain Dictionary and sentence generator for use.
|
@@ -150,8 +150,8 @@ module MarkyMarkov
|
|
150
150
|
# Alternatively, pass in a MarkyMarkov::Dictionary object in
|
151
151
|
# directly and it will delete that objects dictionary from disk.
|
152
152
|
#
|
153
|
-
# @note To ensure that someone doesn't pass in something that shouldn't
|
154
|
-
# be deleted by accident, the filetype .mmd is added to the end of the
|
153
|
+
# @note To ensure that someone doesn't pass in something that shouldn't
|
154
|
+
# be deleted by accident, the filetype .mmd is added to the end of the
|
155
155
|
# supplied argument, so do not include the extension when calling the method.
|
156
156
|
#
|
157
157
|
# @example Delete the dictionary located at '~/markov_dictionary.mmd'
|
@@ -3,7 +3,7 @@ class MarkovDictionary # :nodoc:
|
|
3
3
|
attr_reader :dictionary, :depth
|
4
4
|
def initialize(depth=2) @dictionary = {}
|
5
5
|
@depth = depth
|
6
|
-
@split_words = /([
|
6
|
+
@split_words = /(\.\s+)|(\.$)|([?!])|[\s]+/
|
7
7
|
@split_sentence = /(?<=[.!?])\s+/
|
8
8
|
end
|
9
9
|
|
@@ -39,7 +39,14 @@ class MarkovDictionary # :nodoc:
|
|
39
39
|
# @example Add a string
|
40
40
|
# parse_source("Hi, how are you doing?", false)
|
41
41
|
def parse_source(source, file=true)
|
42
|
-
|
42
|
+
if !source.nil?
|
43
|
+
contents = file ? open_source(source) : contents = source.split(@split_sentence)
|
44
|
+
else
|
45
|
+
contents = []
|
46
|
+
end
|
47
|
+
if( !contents.empty? && !['.', '!', '?'].include?( contents[-1].strip[-1] ) )
|
48
|
+
contents[-1] = contents[-1].strip + '.'
|
49
|
+
end
|
43
50
|
contents.map! {|sentence| sentence.gsub(/["()]/,"")}
|
44
51
|
contents.each do |sentence|
|
45
52
|
sentence.split(@split_words).each_cons(@depth+1) do |words|
|
@@ -15,6 +15,10 @@ end
|
|
15
15
|
# @private
|
16
16
|
NULL_OBJECT = NullObject.new # :nodoc:
|
17
17
|
|
18
|
+
# @private
|
19
|
+
class EmptyDictionaryError < Exception # :nodoc:
|
20
|
+
end
|
21
|
+
|
18
22
|
# @private
|
19
23
|
class MarkovSentenceGenerator # :nodoc:
|
20
24
|
def initialize(dictionary)
|
@@ -22,7 +26,7 @@ class MarkovSentenceGenerator # :nodoc:
|
|
22
26
|
@depth = @dictionary.depth
|
23
27
|
end
|
24
28
|
|
25
|
-
# Returns a random word
|
29
|
+
# Returns a random word via picking a random key from the dictionary.
|
26
30
|
# In the case of the TwoWordDictionary, it returns two words to ensure
|
27
31
|
# that the sentence will have a valid two word string to pick the next
|
28
32
|
# word from.
|
@@ -61,7 +65,7 @@ class MarkovSentenceGenerator # :nodoc:
|
|
61
65
|
end
|
62
66
|
|
63
67
|
def punctuation?(word)
|
64
|
-
word =~ /[
|
68
|
+
( word =~ /[!?]/ || word == '.' )
|
65
69
|
end
|
66
70
|
|
67
71
|
# Generates a sentence of (wordcount) length using the weighted_random function.
|
@@ -69,12 +73,16 @@ class MarkovSentenceGenerator # :nodoc:
|
|
69
73
|
# @param [Int] wordcount The number of words you want the generated string to contain.
|
70
74
|
# @return [String] the words, hopefully forming sentences generated.
|
71
75
|
def generate(wordcount)
|
76
|
+
if @dictionary.dictionary.empty?
|
77
|
+
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string!")
|
78
|
+
end
|
72
79
|
sentence = []
|
73
80
|
sentence.concat(random_capitalized_word)
|
74
81
|
(wordcount-1).times do
|
75
82
|
word = weighted_random(sentence.last(@depth))
|
76
83
|
if punctuation?(word)
|
77
84
|
sentence[-1] = sentence.last.dup << word
|
85
|
+
sentence.concat(random_capitalized_word)
|
78
86
|
elsif word.nil?
|
79
87
|
sentence.concat(random_capitalized_word)
|
80
88
|
else
|
@@ -89,7 +97,10 @@ class MarkovSentenceGenerator # :nodoc:
|
|
89
97
|
#
|
90
98
|
# @param [Int] sentencecount The number of sentences you want the generated string to contain.
|
91
99
|
# @return [String] the sentence(s) generated.
|
92
|
-
def generate_sentence(sentencecount)
|
100
|
+
def generate_sentence(sentencecount)
|
101
|
+
if @dictionary.dictionary.empty?
|
102
|
+
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string!")
|
103
|
+
end
|
93
104
|
sentence = []
|
94
105
|
# Find out how many actual keys are in the dictionary.
|
95
106
|
key_count = @dictionary.dictionary.keys.length
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'msgpack'
|
2
2
|
require_relative 'markov_dictionary'
|
3
3
|
|
4
4
|
# @private
|
@@ -30,9 +30,9 @@ class PersistentDictionary < MarkovDictionary # :nodoc:
|
|
30
30
|
# otherwise it creates an empty hash.
|
31
31
|
def open_dictionary
|
32
32
|
if File.exists?(@dictionarylocation)
|
33
|
-
file = File.new(@dictionarylocation, '
|
33
|
+
file = File.new(@dictionarylocation, 'rb').read
|
34
34
|
@depth = file[0].to_i
|
35
|
-
@dictionary =
|
35
|
+
@dictionary = MessagePack.unpack(file[1..-1])
|
36
36
|
else
|
37
37
|
@dictionary = {}
|
38
38
|
end
|
@@ -41,8 +41,8 @@ class PersistentDictionary < MarkovDictionary # :nodoc:
|
|
41
41
|
# Saves the PersistentDictionary objects @dictionary hash
|
42
42
|
# to disk in JSON format.
|
43
43
|
def save_dictionary!
|
44
|
-
packed =
|
45
|
-
File.open(@dictionarylocation, '
|
44
|
+
packed = @dictionary.to_msgpack
|
45
|
+
File.open(@dictionarylocation, 'wb') do |f|
|
46
46
|
f.write @depth.to_s + packed
|
47
47
|
end
|
48
48
|
true
|
data/marky_markov.gemspec
CHANGED
@@ -13,8 +13,8 @@ Gem::Specification.new do |s|
|
|
13
13
|
## If your rubyforge_project name is different, then edit it and comment out
|
14
14
|
## the sub! line in the Rakefile
|
15
15
|
s.name = 'marky_markov'
|
16
|
-
s.version = '0.3.
|
17
|
-
s.date = '
|
16
|
+
s.version = '0.3.5'
|
17
|
+
s.date = '2014-03-17'
|
18
18
|
s.rubyforge_project = 'marky_markov'
|
19
19
|
|
20
20
|
## Make sure your summary is short. The description may be as long
|
@@ -49,7 +49,7 @@ Gem::Specification.new do |s|
|
|
49
49
|
|
50
50
|
## List your runtime dependencies here. Runtime dependencies are those
|
51
51
|
## that are needed for an end user to actually USE your code.
|
52
|
-
s.add_dependency('
|
52
|
+
s.add_dependency('msgpack')
|
53
53
|
|
54
54
|
## List your development dependencies here. Development dependencies are
|
55
55
|
## those that are only needed during development
|
@@ -60,6 +60,7 @@ Gem::Specification.new do |s|
|
|
60
60
|
## THE MANIFEST COMMENTS, they are used as delimiters by the task.
|
61
61
|
# = MANIFEST =
|
62
62
|
s.files = %w[
|
63
|
+
MIT-LICENSE
|
63
64
|
README.md
|
64
65
|
bin/marky_markov
|
65
66
|
lib/marky_markov.rb
|
@@ -1,65 +1,131 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
+
require 'marky_markov'
|
2
3
|
|
3
4
|
describe MarkyMarkov do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
5
|
+
|
6
|
+
let(:onedictcompare) {
|
7
|
+
{ ["The"] => ["cat"],
|
8
|
+
["cat"] => ["likes"],
|
9
|
+
["likes"] => ["pie"],
|
10
|
+
["pie"] => ["and"],
|
11
|
+
["and"] => ["chainsaws"],
|
12
|
+
["chainsaws"] => ["!"]}
|
13
|
+
}
|
14
|
+
let(:twodictcompare) {
|
15
|
+
{["The", "cat"] => ["likes"],
|
16
|
+
["and", "chainsaws"] => ["!"],
|
17
|
+
["cat", "likes"] => ["pie"],
|
18
|
+
["likes", "pie"] => ["and"],
|
19
|
+
["pie", "and"] => ["chainsaws"]}
|
20
|
+
}
|
18
21
|
|
19
22
|
context "TemporaryDictionary" do
|
20
|
-
|
21
|
-
@dictionary = MarkyMarkov::TemporaryDictionary.new
|
22
|
-
end
|
23
|
+
let(:dictionary) { MarkyMarkov::TemporaryDictionary.new }
|
23
24
|
|
24
25
|
it "should be able to parse a string" do
|
25
|
-
|
26
|
-
|
26
|
+
dictionary.parse_string "The cat likes pie and chainsaws!"
|
27
|
+
dictionary.dictionary.should eql(twodictcompare)
|
27
28
|
end
|
28
29
|
|
29
30
|
it "should generate the right number of sentences" do
|
30
|
-
|
31
|
-
sentence =
|
31
|
+
dictionary.parse_string "Hey man. How are you doing? Let's get pie!"
|
32
|
+
sentence = dictionary.generate_5_sentences
|
32
33
|
sentence.should have(5).scan(/[.?!]/)
|
33
34
|
end
|
34
35
|
|
35
36
|
it "should create the right number of words" do
|
36
|
-
|
37
|
-
sentence =
|
37
|
+
dictionary.parse_string "Hey man. How are you doing? Let's get pie!"
|
38
|
+
sentence = dictionary.generate_10_words
|
38
39
|
sentence.split.should have(10).words
|
39
40
|
end
|
41
|
+
|
42
|
+
it "should not choke on parsing empty string" do
|
43
|
+
lambda {
|
44
|
+
dictionary.parse_string ""
|
45
|
+
}.should_not raise_error
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should not choke on parsing nil" do
|
49
|
+
lambda {
|
50
|
+
dictionary.parse_string nil
|
51
|
+
}.should_not raise_error
|
52
|
+
end
|
53
|
+
it "should raise EmptyDictionaryError if you try to generate from empty dictionary" do
|
54
|
+
lambda {
|
55
|
+
dictionary.parse_string nil
|
56
|
+
dictionary.generate_1_sentences
|
57
|
+
}.should raise_error(EmptyDictionaryError)
|
58
|
+
end
|
59
|
+
|
60
|
+
context "if the sentence doesn't finish with a punctuation" do
|
61
|
+
# null objects?
|
62
|
+
it "should not have trailing spaces in a row" do
|
63
|
+
dictionary.parse_string "I have a pen somewhere "
|
64
|
+
dictionary.generate_4_sentences.should_not match( / / )
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
context "parsing web addresses" do
|
69
|
+
it "should treat 'example.net' as single word" do
|
70
|
+
dictionary.parse_string "i am at example.net now."
|
71
|
+
dictionary.dictionary.values.should include( ['example.net'] )
|
72
|
+
end
|
73
|
+
it "should not break up 'example.net'" do
|
74
|
+
dictionary.parse_string "i am at example.net now."
|
75
|
+
dictionary.dictionary.values.should_not include( ['example'] )
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
context "handling 'http://...'" do
|
80
|
+
# previously, anything containing '.' was considered punctuation
|
81
|
+
it "should generate sentence with space before 'http//:...'" do
|
82
|
+
dictionary.parse_string "I'm viewing some stuff which is at http://example.net now."
|
83
|
+
dictionary.generate_4_sentences.should_not match( /\whttp/ )
|
84
|
+
end
|
85
|
+
end
|
86
|
+
context "when using key depth of 1 word" do
|
87
|
+
let(:depth1dict) { MarkyMarkov::TemporaryDictionary.new(1) }
|
88
|
+
it "should not raise 'negative array size'" do
|
89
|
+
depth1dict.parse_string "short text. with many. full. stops."
|
90
|
+
lambda {
|
91
|
+
depth1dict.generate_15_words
|
92
|
+
}.should_not raise_error
|
93
|
+
end
|
94
|
+
end
|
40
95
|
end
|
41
96
|
|
42
97
|
context "PersistentDictionary" do
|
43
|
-
|
44
|
-
|
45
|
-
|
98
|
+
let!(:dictionary) do |dict|
|
99
|
+
MarkyMarkov::Dictionary.new("spec/data/temptextdict").tap do |d|
|
100
|
+
d.parse_file "spec/data/test.txt"
|
101
|
+
end
|
46
102
|
end
|
47
103
|
|
48
104
|
it "should be able to save a dictionary" do
|
49
|
-
|
105
|
+
dictionary.save_dictionary!.should eql(true)
|
50
106
|
end
|
51
107
|
|
52
108
|
it "should be able to load an existing dictionary" do
|
53
109
|
otherdict = MarkyMarkov::Dictionary.new("spec/data/textdictcompare")
|
54
|
-
|
110
|
+
dictionary.dictionary.should eql(otherdict.dictionary)
|
55
111
|
end
|
56
112
|
|
57
113
|
it "should load the saved dictionary" do
|
58
|
-
|
114
|
+
dictionary.dictionary.should include(twodictcompare)
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should have the correct failure when dictionary is empty: words" do
|
118
|
+
emptydict = MarkyMarkov::Dictionary.new("spec/data/nothing")
|
119
|
+
expect {emptydict.generate_10_words}.to raise_error(EmptyDictionaryError)
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should have the correct failure when dictionary is empty: sentences" do
|
123
|
+
emptydict = MarkyMarkov::Dictionary.new("spec/data/nothing")
|
124
|
+
expect {emptydict.generate_10_sentences}.to raise_error(EmptyDictionaryError)
|
59
125
|
end
|
60
126
|
|
61
127
|
after do
|
62
|
-
PersistentDictionary.delete_dictionary!(
|
128
|
+
PersistentDictionary.delete_dictionary!(dictionary)
|
63
129
|
end
|
64
130
|
end
|
65
131
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marky_markov
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,19 +9,24 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2014-03-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
requirement:
|
15
|
+
name: msgpack
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
|
-
- -
|
19
|
+
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: '
|
21
|
+
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
25
30
|
description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
|
26
31
|
input from\n either a source file or a string. While usable as a module in your
|
27
32
|
code it can also be called on\n from the command line and piped into like a standard
|
@@ -33,6 +38,7 @@ extensions: []
|
|
33
38
|
extra_rdoc_files:
|
34
39
|
- README.md
|
35
40
|
files:
|
41
|
+
- MIT-LICENSE
|
36
42
|
- README.md
|
37
43
|
- bin/marky_markov
|
38
44
|
- lib/marky_markov.rb
|
@@ -66,7 +72,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
72
|
version: '0'
|
67
73
|
requirements: []
|
68
74
|
rubyforge_project: marky_markov
|
69
|
-
rubygems_version: 1.8.
|
75
|
+
rubygems_version: 1.8.24
|
70
76
|
signing_key:
|
71
77
|
specification_version: 2
|
72
78
|
summary: Simple Markov Chain generation available in the command-line
|