marky_markov 0.3.4 → 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +20 -0
- data/lib/marky_markov.rb +4 -4
- data/lib/marky_markov/markov_dictionary.rb +9 -2
- data/lib/marky_markov/markov_sentence_generator.rb +14 -3
- data/lib/marky_markov/persistent_dictionary.rb +5 -5
- data/marky_markov.gemspec +4 -3
- data/spec/marky_markov/marky_markov_spec.rb +96 -30
- data/spec/spec_helper.rb +0 -1
- metadata +14 -8
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2014 Matt Furden
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
Software), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/lib/marky_markov.rb
CHANGED
@@ -4,12 +4,12 @@
|
|
4
4
|
require_relative 'marky_markov/persistent_dictionary'
|
5
5
|
require_relative 'marky_markov/markov_sentence_generator'
|
6
6
|
|
7
|
-
# @version = 0.3.
|
7
|
+
# @version = 0.3.5
|
8
8
|
# @author Matt Furden
|
9
9
|
# Module containing TemporaryDictionary and Dictionary for creation of
|
10
10
|
# Markov Chain Dictionaries and generating sentences from those dictionaries.
|
11
11
|
module MarkyMarkov
|
12
|
-
VERSION = '0.3.
|
12
|
+
VERSION = '0.3.5'
|
13
13
|
|
14
14
|
class TemporaryDictionary
|
15
15
|
# Create a new Temporary Markov Chain Dictionary and sentence generator for use.
|
@@ -150,8 +150,8 @@ module MarkyMarkov
|
|
150
150
|
# Alternatively, pass in a MarkyMarkov::Dictionary object in
|
151
151
|
# directly and it will delete that objects dictionary from disk.
|
152
152
|
#
|
153
|
-
# @note To ensure that someone doesn't pass in something that shouldn't
|
154
|
-
# be deleted by accident, the filetype .mmd is added to the end of the
|
153
|
+
# @note To ensure that someone doesn't pass in something that shouldn't
|
154
|
+
# be deleted by accident, the filetype .mmd is added to the end of the
|
155
155
|
# supplied argument, so do not include the extension when calling the method.
|
156
156
|
#
|
157
157
|
# @example Delete the dictionary located at '~/markov_dictionary.mmd'
|
@@ -3,7 +3,7 @@ class MarkovDictionary # :nodoc:
|
|
3
3
|
attr_reader :dictionary, :depth
|
4
4
|
def initialize(depth=2) @dictionary = {}
|
5
5
|
@depth = depth
|
6
|
-
@split_words = /([
|
6
|
+
@split_words = /(\.\s+)|(\.$)|([?!])|[\s]+/
|
7
7
|
@split_sentence = /(?<=[.!?])\s+/
|
8
8
|
end
|
9
9
|
|
@@ -39,7 +39,14 @@ class MarkovDictionary # :nodoc:
|
|
39
39
|
# @example Add a string
|
40
40
|
# parse_source("Hi, how are you doing?", false)
|
41
41
|
def parse_source(source, file=true)
|
42
|
-
|
42
|
+
if !source.nil?
|
43
|
+
contents = file ? open_source(source) : contents = source.split(@split_sentence)
|
44
|
+
else
|
45
|
+
contents = []
|
46
|
+
end
|
47
|
+
if( !contents.empty? && !['.', '!', '?'].include?( contents[-1].strip[-1] ) )
|
48
|
+
contents[-1] = contents[-1].strip + '.'
|
49
|
+
end
|
43
50
|
contents.map! {|sentence| sentence.gsub(/["()]/,"")}
|
44
51
|
contents.each do |sentence|
|
45
52
|
sentence.split(@split_words).each_cons(@depth+1) do |words|
|
@@ -15,6 +15,10 @@ end
|
|
15
15
|
# @private
|
16
16
|
NULL_OBJECT = NullObject.new # :nodoc:
|
17
17
|
|
18
|
+
# @private
|
19
|
+
class EmptyDictionaryError < Exception # :nodoc:
|
20
|
+
end
|
21
|
+
|
18
22
|
# @private
|
19
23
|
class MarkovSentenceGenerator # :nodoc:
|
20
24
|
def initialize(dictionary)
|
@@ -22,7 +26,7 @@ class MarkovSentenceGenerator # :nodoc:
|
|
22
26
|
@depth = @dictionary.depth
|
23
27
|
end
|
24
28
|
|
25
|
-
# Returns a random word
|
29
|
+
# Returns a random word via picking a random key from the dictionary.
|
26
30
|
# In the case of the TwoWordDictionary, it returns two words to ensure
|
27
31
|
# that the sentence will have a valid two word string to pick the next
|
28
32
|
# word from.
|
@@ -61,7 +65,7 @@ class MarkovSentenceGenerator # :nodoc:
|
|
61
65
|
end
|
62
66
|
|
63
67
|
def punctuation?(word)
|
64
|
-
word =~ /[
|
68
|
+
( word =~ /[!?]/ || word == '.' )
|
65
69
|
end
|
66
70
|
|
67
71
|
# Generates a sentence of (wordcount) length using the weighted_random function.
|
@@ -69,12 +73,16 @@ class MarkovSentenceGenerator # :nodoc:
|
|
69
73
|
# @param [Int] wordcount The number of words you want the generated string to contain.
|
70
74
|
# @return [String] the words, hopefully forming sentences generated.
|
71
75
|
def generate(wordcount)
|
76
|
+
if @dictionary.dictionary.empty?
|
77
|
+
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string!")
|
78
|
+
end
|
72
79
|
sentence = []
|
73
80
|
sentence.concat(random_capitalized_word)
|
74
81
|
(wordcount-1).times do
|
75
82
|
word = weighted_random(sentence.last(@depth))
|
76
83
|
if punctuation?(word)
|
77
84
|
sentence[-1] = sentence.last.dup << word
|
85
|
+
sentence.concat(random_capitalized_word)
|
78
86
|
elsif word.nil?
|
79
87
|
sentence.concat(random_capitalized_word)
|
80
88
|
else
|
@@ -89,7 +97,10 @@ class MarkovSentenceGenerator # :nodoc:
|
|
89
97
|
#
|
90
98
|
# @param [Int] sentencecount The number of sentences you want the generated string to contain.
|
91
99
|
# @return [String] the sentence(s) generated.
|
92
|
-
def generate_sentence(sentencecount)
|
100
|
+
def generate_sentence(sentencecount)
|
101
|
+
if @dictionary.dictionary.empty?
|
102
|
+
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string!")
|
103
|
+
end
|
93
104
|
sentence = []
|
94
105
|
# Find out how many actual keys are in the dictionary.
|
95
106
|
key_count = @dictionary.dictionary.keys.length
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'msgpack'
|
2
2
|
require_relative 'markov_dictionary'
|
3
3
|
|
4
4
|
# @private
|
@@ -30,9 +30,9 @@ class PersistentDictionary < MarkovDictionary # :nodoc:
|
|
30
30
|
# otherwise it creates an empty hash.
|
31
31
|
def open_dictionary
|
32
32
|
if File.exists?(@dictionarylocation)
|
33
|
-
file = File.new(@dictionarylocation, '
|
33
|
+
file = File.new(@dictionarylocation, 'rb').read
|
34
34
|
@depth = file[0].to_i
|
35
|
-
@dictionary =
|
35
|
+
@dictionary = MessagePack.unpack(file[1..-1])
|
36
36
|
else
|
37
37
|
@dictionary = {}
|
38
38
|
end
|
@@ -41,8 +41,8 @@ class PersistentDictionary < MarkovDictionary # :nodoc:
|
|
41
41
|
# Saves the PersistentDictionary objects @dictionary hash
|
42
42
|
# to disk in JSON format.
|
43
43
|
def save_dictionary!
|
44
|
-
packed =
|
45
|
-
File.open(@dictionarylocation, '
|
44
|
+
packed = @dictionary.to_msgpack
|
45
|
+
File.open(@dictionarylocation, 'wb') do |f|
|
46
46
|
f.write @depth.to_s + packed
|
47
47
|
end
|
48
48
|
true
|
data/marky_markov.gemspec
CHANGED
@@ -13,8 +13,8 @@ Gem::Specification.new do |s|
|
|
13
13
|
## If your rubyforge_project name is different, then edit it and comment out
|
14
14
|
## the sub! line in the Rakefile
|
15
15
|
s.name = 'marky_markov'
|
16
|
-
s.version = '0.3.
|
17
|
-
s.date = '
|
16
|
+
s.version = '0.3.5'
|
17
|
+
s.date = '2014-03-17'
|
18
18
|
s.rubyforge_project = 'marky_markov'
|
19
19
|
|
20
20
|
## Make sure your summary is short. The description may be as long
|
@@ -49,7 +49,7 @@ Gem::Specification.new do |s|
|
|
49
49
|
|
50
50
|
## List your runtime dependencies here. Runtime dependencies are those
|
51
51
|
## that are needed for an end user to actually USE your code.
|
52
|
-
s.add_dependency('
|
52
|
+
s.add_dependency('msgpack')
|
53
53
|
|
54
54
|
## List your development dependencies here. Development dependencies are
|
55
55
|
## those that are only needed during development
|
@@ -60,6 +60,7 @@ Gem::Specification.new do |s|
|
|
60
60
|
## THE MANIFEST COMMENTS, they are used as delimiters by the task.
|
61
61
|
# = MANIFEST =
|
62
62
|
s.files = %w[
|
63
|
+
MIT-LICENSE
|
63
64
|
README.md
|
64
65
|
bin/marky_markov
|
65
66
|
lib/marky_markov.rb
|
@@ -1,65 +1,131 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
+
require 'marky_markov'
|
2
3
|
|
3
4
|
describe MarkyMarkov do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
5
|
+
|
6
|
+
let(:onedictcompare) {
|
7
|
+
{ ["The"] => ["cat"],
|
8
|
+
["cat"] => ["likes"],
|
9
|
+
["likes"] => ["pie"],
|
10
|
+
["pie"] => ["and"],
|
11
|
+
["and"] => ["chainsaws"],
|
12
|
+
["chainsaws"] => ["!"]}
|
13
|
+
}
|
14
|
+
let(:twodictcompare) {
|
15
|
+
{["The", "cat"] => ["likes"],
|
16
|
+
["and", "chainsaws"] => ["!"],
|
17
|
+
["cat", "likes"] => ["pie"],
|
18
|
+
["likes", "pie"] => ["and"],
|
19
|
+
["pie", "and"] => ["chainsaws"]}
|
20
|
+
}
|
18
21
|
|
19
22
|
context "TemporaryDictionary" do
|
20
|
-
|
21
|
-
@dictionary = MarkyMarkov::TemporaryDictionary.new
|
22
|
-
end
|
23
|
+
let(:dictionary) { MarkyMarkov::TemporaryDictionary.new }
|
23
24
|
|
24
25
|
it "should be able to parse a string" do
|
25
|
-
|
26
|
-
|
26
|
+
dictionary.parse_string "The cat likes pie and chainsaws!"
|
27
|
+
dictionary.dictionary.should eql(twodictcompare)
|
27
28
|
end
|
28
29
|
|
29
30
|
it "should generate the right number of sentences" do
|
30
|
-
|
31
|
-
sentence =
|
31
|
+
dictionary.parse_string "Hey man. How are you doing? Let's get pie!"
|
32
|
+
sentence = dictionary.generate_5_sentences
|
32
33
|
sentence.should have(5).scan(/[.?!]/)
|
33
34
|
end
|
34
35
|
|
35
36
|
it "should create the right number of words" do
|
36
|
-
|
37
|
-
sentence =
|
37
|
+
dictionary.parse_string "Hey man. How are you doing? Let's get pie!"
|
38
|
+
sentence = dictionary.generate_10_words
|
38
39
|
sentence.split.should have(10).words
|
39
40
|
end
|
41
|
+
|
42
|
+
it "should not choke on parsing empty string" do
|
43
|
+
lambda {
|
44
|
+
dictionary.parse_string ""
|
45
|
+
}.should_not raise_error
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should not choke on parsing nil" do
|
49
|
+
lambda {
|
50
|
+
dictionary.parse_string nil
|
51
|
+
}.should_not raise_error
|
52
|
+
end
|
53
|
+
it "should raise EmptyDictionaryError if you try to generate from empty dictionary" do
|
54
|
+
lambda {
|
55
|
+
dictionary.parse_string nil
|
56
|
+
dictionary.generate_1_sentences
|
57
|
+
}.should raise_error(EmptyDictionaryError)
|
58
|
+
end
|
59
|
+
|
60
|
+
context "if the sentence doesn't finish with a punctuation" do
|
61
|
+
# null objects?
|
62
|
+
it "should not have trailing spaces in a row" do
|
63
|
+
dictionary.parse_string "I have a pen somewhere "
|
64
|
+
dictionary.generate_4_sentences.should_not match( / / )
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
context "parsing web addresses" do
|
69
|
+
it "should treat 'example.net' as single word" do
|
70
|
+
dictionary.parse_string "i am at example.net now."
|
71
|
+
dictionary.dictionary.values.should include( ['example.net'] )
|
72
|
+
end
|
73
|
+
it "should not break up 'example.net'" do
|
74
|
+
dictionary.parse_string "i am at example.net now."
|
75
|
+
dictionary.dictionary.values.should_not include( ['example'] )
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
context "handling 'http://...'" do
|
80
|
+
# previously, anything containing '.' was considered punctuation
|
81
|
+
it "should generate sentence with space before 'http//:...'" do
|
82
|
+
dictionary.parse_string "I'm viewing some stuff which is at http://example.net now."
|
83
|
+
dictionary.generate_4_sentences.should_not match( /\whttp/ )
|
84
|
+
end
|
85
|
+
end
|
86
|
+
context "when using key depth of 1 word" do
|
87
|
+
let(:depth1dict) { MarkyMarkov::TemporaryDictionary.new(1) }
|
88
|
+
it "should not raise 'negative array size'" do
|
89
|
+
depth1dict.parse_string "short text. with many. full. stops."
|
90
|
+
lambda {
|
91
|
+
depth1dict.generate_15_words
|
92
|
+
}.should_not raise_error
|
93
|
+
end
|
94
|
+
end
|
40
95
|
end
|
41
96
|
|
42
97
|
context "PersistentDictionary" do
|
43
|
-
|
44
|
-
|
45
|
-
|
98
|
+
let!(:dictionary) do |dict|
|
99
|
+
MarkyMarkov::Dictionary.new("spec/data/temptextdict").tap do |d|
|
100
|
+
d.parse_file "spec/data/test.txt"
|
101
|
+
end
|
46
102
|
end
|
47
103
|
|
48
104
|
it "should be able to save a dictionary" do
|
49
|
-
|
105
|
+
dictionary.save_dictionary!.should eql(true)
|
50
106
|
end
|
51
107
|
|
52
108
|
it "should be able to load an existing dictionary" do
|
53
109
|
otherdict = MarkyMarkov::Dictionary.new("spec/data/textdictcompare")
|
54
|
-
|
110
|
+
dictionary.dictionary.should eql(otherdict.dictionary)
|
55
111
|
end
|
56
112
|
|
57
113
|
it "should load the saved dictionary" do
|
58
|
-
|
114
|
+
dictionary.dictionary.should include(twodictcompare)
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should have the correct failure when dictionary is empty: words" do
|
118
|
+
emptydict = MarkyMarkov::Dictionary.new("spec/data/nothing")
|
119
|
+
expect {emptydict.generate_10_words}.to raise_error(EmptyDictionaryError)
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should have the correct failure when dictionary is empty: sentences" do
|
123
|
+
emptydict = MarkyMarkov::Dictionary.new("spec/data/nothing")
|
124
|
+
expect {emptydict.generate_10_sentences}.to raise_error(EmptyDictionaryError)
|
59
125
|
end
|
60
126
|
|
61
127
|
after do
|
62
|
-
PersistentDictionary.delete_dictionary!(
|
128
|
+
PersistentDictionary.delete_dictionary!(dictionary)
|
63
129
|
end
|
64
130
|
end
|
65
131
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marky_markov
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,19 +9,24 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2014-03-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
requirement:
|
15
|
+
name: msgpack
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
|
-
- -
|
19
|
+
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: '
|
21
|
+
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
25
30
|
description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
|
26
31
|
input from\n either a source file or a string. While usable as a module in your
|
27
32
|
code it can also be called on\n from the command line and piped into like a standard
|
@@ -33,6 +38,7 @@ extensions: []
|
|
33
38
|
extra_rdoc_files:
|
34
39
|
- README.md
|
35
40
|
files:
|
41
|
+
- MIT-LICENSE
|
36
42
|
- README.md
|
37
43
|
- bin/marky_markov
|
38
44
|
- lib/marky_markov.rb
|
@@ -66,7 +72,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
72
|
version: '0'
|
67
73
|
requirements: []
|
68
74
|
rubyforge_project: marky_markov
|
69
|
-
rubygems_version: 1.8.
|
75
|
+
rubygems_version: 1.8.24
|
70
76
|
signing_key:
|
71
77
|
specification_version: 2
|
72
78
|
summary: Simple Markov Chain generation available in the command-line
|