marky_markov 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +30 -18
- data/bin/marky_markov +12 -18
- data/lib/marky_markov.rb +32 -9
- data/lib/marky_markov/markov_sentence_generator.rb +31 -2
- data/lib/marky_markov/persistent_dictionary.rb +2 -2
- data/spec/test.txt +1 -0
- metadata +4 -3
data/README.md
CHANGED
@@ -4,6 +4,11 @@ Marky Markov and the Funky Sentences
|
|
4
4
|
Marky Markov is a naïve experiment in Markov Chain generation implemented
|
5
5
|
in Ruby. It can be used both from the command-line and as a library within your code.
|
6
6
|
|
7
|
+
NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
|
8
|
+
ability to generate proper sentences (generate_n_sentences) instead of simply a
|
9
|
+
maximum number of words. The command-line app has changed to sentences as its default
|
10
|
+
behavior.
|
11
|
+
|
7
12
|
# Installation
|
8
13
|
|
9
14
|
gem install marky_markov
|
@@ -17,8 +22,8 @@ temporary dictionary that will not be saved to disk.
|
|
17
22
|
markov = MarkyMarkov::TemporaryDictionary.new
|
18
23
|
markov.parse_string "These words will be added to the temporary dictionary."
|
19
24
|
markov.parse_file "filename.txt"
|
20
|
-
puts markov.
|
21
|
-
puts markov.generate_n_words
|
25
|
+
puts markov.generate_n_sentences 5
|
26
|
+
puts markov.generate_n_words 200
|
22
27
|
markov.clear!
|
23
28
|
|
24
29
|
Dictionary creates or opens a persistent dictionary at a location defined by its
|
@@ -31,18 +36,22 @@ of the dictionary name.
|
|
31
36
|
markov.parse_file "ENV["HOME"]/Documents/largefileindocs.txt"
|
32
37
|
markov.parse_file "anotherfileyay.txt"
|
33
38
|
puts markov.generate_n_words 10
|
39
|
+
puts markov.generate_n_sentences 2
|
34
40
|
markov.save_dictionary! # Saves the modified dictionary/creates one if it didn't exist.
|
35
41
|
|
36
|
-
If you keep looking at generate_n_words and wonder why you can't put a
|
42
|
+
If you keep looking at generate_n_words or generate_n_sentences and wonder why you can't put a
|
37
43
|
number in there, well, you can!
|
38
44
|
|
45
|
+
markov.generate_7_sentences
|
39
46
|
markov.generate_20_words
|
40
47
|
|
41
48
|
The default dictionary depth is two words.
|
42
49
|
`{"I hope" => {"this" => 1},
|
43
50
|
"hope this" => {"makes" => 1},
|
44
51
|
"this makes" => {"sense" => 1}}`
|
45
|
-
but it can be set to a depth between 1 and
|
52
|
+
but it can be set to a depth between 1 and 5 upon dictionary creation,
|
53
|
+
though really any higher than 3 and it starts to simply print passages
|
54
|
+
from the source text.
|
46
55
|
|
47
56
|
markov = MarkyMarkov::Dictionary.new('dictionary', 3)
|
48
57
|
|
@@ -72,18 +81,18 @@ on different files to continue adding to your dictionary file.
|
|
72
81
|
|
73
82
|
## Say Some Words
|
74
83
|
|
75
|
-
marky_markov speak -
|
84
|
+
marky_markov speak -c 3
|
76
85
|
|
77
|
-
Will use the dictionary to create
|
78
|
-
is passed it will default to
|
86
|
+
Will use the dictionary to create three sentences. If no number
|
87
|
+
is passed it will default to five sentences..
|
79
88
|
|
80
89
|
## Temporary Dictionaries
|
81
90
|
|
82
|
-
marky_markov speak -s other-file.txt -
|
91
|
+
marky_markov speak -s other-file.txt -c 8
|
83
92
|
|
84
93
|
Generates a temporary dictionary based on the source file passed to it
|
85
94
|
and uses that to speak. Here we're loading other-file.txt and
|
86
|
-
restricting the generated
|
95
|
+
restricting the generated text to 8 sentences.
|
87
96
|
|
88
97
|
## STDIN, Pipe Away!
|
89
98
|
|
@@ -99,12 +108,15 @@ though the results are nonsense without a substantial text base to work
|
|
99
108
|
from.
|
100
109
|
|
101
110
|
Usage: marky_markov COMMAND [OPTIONS]
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
+
|
112
|
+
Commands:
|
113
|
+
speak: Generate Markov Chain sentence (5 sentences by default)
|
114
|
+
listen [sentence]: Generate Markov Chain sentence from supplied string.
|
115
|
+
read [file]: Add words to dictionary from supplied text file
|
116
|
+
|
117
|
+
Options
|
118
|
+
-d, --dictionary LOCATION Use custom dictionary location
|
119
|
+
-c, --sentencecount NUMBER Set number of sentences generated
|
120
|
+
-s, --source FILE Generate and use temporary dictionary from source text
|
121
|
+
--reset WARNING: Deletes default dictionary.
|
122
|
+
-h, --help Display this screen
|
data/bin/marky_markov
CHANGED
@@ -6,23 +6,23 @@ require 'marky_markov'
|
|
6
6
|
|
7
7
|
options = {}
|
8
8
|
opt_parser = OptionParser.new do |opts|
|
9
|
-
opts.banner = "Usage:
|
9
|
+
opts.banner = "Usage: marky_markov COMMAND [OPTIONS]"
|
10
10
|
opts.separator ""
|
11
11
|
opts.separator "Commands:"
|
12
|
-
opts.separator " speak: Generate Markov Chain sentence (
|
12
|
+
opts.separator " speak: Generate Markov Chain sentence (5 sentences by default)"
|
13
13
|
opts.separator " listen [sentence]: Generate Markov Chain sentence from supplied string."
|
14
14
|
opts.separator " read [file]: Add words to dictionary from supplied text file"
|
15
15
|
opts.separator ""
|
16
16
|
opts.separator "Options"
|
17
17
|
|
18
18
|
options[:dictionary] = "#{ENV["HOME"]}/.marky_markov_dictionary"
|
19
|
-
opts.on('-
|
19
|
+
opts.on('-d', '--dictionary LOCATION', 'Use custom dictionary location') do |file|
|
20
20
|
options[:dictionary] = file
|
21
21
|
end
|
22
22
|
|
23
|
-
options[:
|
24
|
-
opts.on('-
|
25
|
-
options[:
|
23
|
+
options[:sentencecount] = 5
|
24
|
+
opts.on('-c', '--sentencecount NUMBER', 'Set number of sentences generated') do |number|
|
25
|
+
options[:sentencecount] = number.to_i
|
26
26
|
end
|
27
27
|
|
28
28
|
options[:source] = nil
|
@@ -31,12 +31,6 @@ opt_parser = OptionParser.new do |opts|
|
|
31
31
|
options[:source] = file
|
32
32
|
end
|
33
33
|
|
34
|
-
options[:depth] = 2
|
35
|
-
opts.on('-d', '--depth 1-9', 'Set dictionary depth. The higher the number
|
36
|
-
, the less random the sentence will be. Must be between 1 and 9.') do |number|
|
37
|
-
options[:depth] = number.to_i
|
38
|
-
end
|
39
|
-
|
40
34
|
options[:resetdictionary] = false
|
41
35
|
opts.on('--reset', "WARNING: Deletes default dictionary." ) do
|
42
36
|
options[:resetdictionary] = true
|
@@ -58,7 +52,7 @@ end
|
|
58
52
|
case ARGV[0]
|
59
53
|
when "speak"
|
60
54
|
if options[:source]
|
61
|
-
markov = MarkyMarkov::TemporaryDictionary.new
|
55
|
+
markov = MarkyMarkov::TemporaryDictionary.new
|
62
56
|
markov.parse_file(options[:source])
|
63
57
|
else
|
64
58
|
unless File.exists?("#{options[:dictionary]}.mmd")
|
@@ -68,22 +62,22 @@ when "speak"
|
|
68
62
|
end
|
69
63
|
markov = MarkyMarkov::Dictionary.new(options[:dictionary])
|
70
64
|
end
|
71
|
-
STDOUT.puts markov.
|
65
|
+
STDOUT.puts markov.generate_n_sentences(options[:sentencecount])
|
72
66
|
when "read"
|
73
67
|
source = ARGV[1] || options[:source]
|
74
|
-
markov = MarkyMarkov::Dictionary.new(options[:dictionary]
|
68
|
+
markov = MarkyMarkov::Dictionary.new(options[:dictionary])
|
75
69
|
markov.parse_file(source)
|
76
70
|
markov.save_dictionary!
|
77
71
|
STDOUT.puts "Added #{source} to dictionary."
|
78
72
|
when "listen"
|
79
73
|
markov = MarkyMarkov::TemporaryDictionary.new
|
80
74
|
markov.parse_string(STDIN.tty? ? ARGV[1] : STDIN.read)
|
81
|
-
STDOUT.puts markov.
|
75
|
+
STDOUT.puts markov.generate_n_sentences(options[:sentencecount])
|
82
76
|
else
|
83
77
|
unless STDIN.tty?
|
84
|
-
markov = MarkyMarkov::TemporaryDictionary.new
|
78
|
+
markov = MarkyMarkov::TemporaryDictionary.new
|
85
79
|
markov.parse_string(STDIN.read)
|
86
|
-
STDOUT.puts markov.
|
80
|
+
STDOUT.puts markov.generate_n_sentences(options[:sentencecount])
|
87
81
|
else
|
88
82
|
STDOUT.puts opt_parser
|
89
83
|
end
|
data/lib/marky_markov.rb
CHANGED
@@ -4,12 +4,12 @@
|
|
4
4
|
require_relative 'marky_markov/persistent_dictionary'
|
5
5
|
require_relative 'marky_markov/markov_sentence_generator'
|
6
6
|
|
7
|
-
# @version = 0.
|
7
|
+
# @version = 0.2.0
|
8
8
|
# @author Matt Furden
|
9
9
|
# Module containing TemporaryDictionary and Dictionary for creation of
|
10
10
|
# Markov Chain Dictionaries and generating sentences from those dictionaries.
|
11
11
|
module MarkyMarkov
|
12
|
-
VERSION = '0.
|
12
|
+
VERSION = '0.2.0'
|
13
13
|
|
14
14
|
class TemporaryDictionary
|
15
15
|
# Create a new Temporary Markov Chain Dictionary and sentence generator for use.
|
@@ -49,39 +49,62 @@ module MarkyMarkov
|
|
49
49
|
#
|
50
50
|
# @example Generate a 40 word long string of words.
|
51
51
|
# markov.generate_n_words(40)
|
52
|
+
# @example Generate a 10 word long string of words with method_missing.
|
53
|
+
# markov.generate_10_words
|
52
54
|
# @param [Int] wordcount the number of words you want generated.
|
53
55
|
# @return [String] the sentence generated by the dictionary.
|
54
56
|
def generate_n_words(wordcount)
|
55
57
|
@sentence.generate(wordcount)
|
56
58
|
end
|
57
59
|
|
58
|
-
#
|
59
|
-
#
|
60
|
+
# Generates n sentences using the dictionary generated via
|
61
|
+
# parse_string or parse_file. A sentence is defined as beginning with a
|
62
|
+
# capitalized word and ending with either a . ! or ?
|
60
63
|
#
|
61
|
-
# @since 0.
|
64
|
+
# @since 0.2.0
|
65
|
+
# @example Generate three sentences.
|
66
|
+
# markov.generate_n_sentences(3)
|
67
|
+
# @example Generate six sentences with method_missing.
|
68
|
+
# markov.generate_6_sentences
|
69
|
+
# @param [Int] wordcount the number of sentences you want generated.
|
70
|
+
# @return [String] the sentences generated by the dictionary.
|
71
|
+
def generate_n_sentences(sentencecount)
|
72
|
+
@sentence.generate_sentence(sentencecount)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Dynamically call generate_n_words or generate_n_sentences
|
76
|
+
# if an Int is substituted for the n in the method call.
|
77
|
+
#
|
78
|
+
# @since 0.1.4
|
62
79
|
# @example Generate a 40 and a 1 word long string of words.
|
63
80
|
# markov.generate_40_words
|
64
81
|
# markov.generate_1_word
|
82
|
+
# @example Generate 2 sentences
|
83
|
+
# markov.generate_2_sentences
|
65
84
|
# @return [String] the sentence generated by the dictionary.
|
66
85
|
def method_missing(method_sym, *args, &block)
|
67
86
|
if method_sym.to_s =~ /^generate_(\d*)_word[s]*$/
|
68
87
|
generate_n_words($1.to_i)
|
88
|
+
elsif method_sym.to_s =~ /^generate_(\d*)_sentence[s]*$/
|
89
|
+
generate_n_sentences($1.to_i)
|
69
90
|
else
|
70
91
|
super
|
71
92
|
end
|
72
93
|
end
|
73
94
|
|
74
|
-
# @since 0.1.
|
75
|
-
# Modify respond_to? to include generate_n_words
|
95
|
+
# @since 0.1.4
|
96
|
+
# Modify respond_to? to include generate_n_words and generate_n_sentences
|
97
|
+
# method_missing implementation.
|
76
98
|
def respond_to?(method_sym, include_private = false)
|
77
99
|
if method_sym.to_s =~ /^generate_(\d*)_word[s]*$/
|
78
|
-
|
100
|
+
generate_n_words($1.to_i)
|
101
|
+
elsif method_sym.to_s =~ /^generate_(\d*)_sentence[s]*$/
|
102
|
+
generate_n_sentences($1.to_i)
|
79
103
|
else
|
80
104
|
super
|
81
105
|
end
|
82
106
|
end
|
83
107
|
|
84
|
-
|
85
108
|
# Clears the temporary dictionary's hash, useful for keeping
|
86
109
|
# the same dictionary object but removing the words it has learned.
|
87
110
|
#
|
@@ -20,13 +20,17 @@ class MarkovSentenceGenerator
|
|
20
20
|
# dictionary and recurring if the word is lowercase.
|
21
21
|
#
|
22
22
|
# (see #random_word)
|
23
|
-
def random_capitalized_word
|
23
|
+
def random_capitalized_word(attempts=0)
|
24
24
|
keys = @dictionary.dictionary.keys
|
25
25
|
x = keys[rand(keys.length)]
|
26
26
|
if /[A-Z]/ =~ x[0]
|
27
27
|
return x
|
28
|
+
elsif attempts < 30
|
29
|
+
# If you don't find a capitalized word after 30 attempts, just use
|
30
|
+
# a lowercase word as there may be no capitals in the dicationary.
|
31
|
+
random_capitalized_word(attempts+1)
|
28
32
|
else
|
29
|
-
|
33
|
+
random_word
|
30
34
|
end
|
31
35
|
end
|
32
36
|
|
@@ -49,6 +53,8 @@ class MarkovSentenceGenerator
|
|
49
53
|
|
50
54
|
# Generates a sentence of (wordcount) length using the weighted_random function.
|
51
55
|
#
|
56
|
+
# @param [Int] wordcount The number of words you want the generated string to contain.
|
57
|
+
# @return [String] the words, hopefully forming sentences generated.
|
52
58
|
def generate(wordcount)
|
53
59
|
sentence = []
|
54
60
|
sentence.concat(random_capitalized_word.split)
|
@@ -58,4 +64,27 @@ class MarkovSentenceGenerator
|
|
58
64
|
sentence.pop(sentence.length-wordcount)
|
59
65
|
sentence.join(' ')
|
60
66
|
end
|
67
|
+
|
68
|
+
# Generates a (sentencecount) sentences using the weighted_random function.
|
69
|
+
#
|
70
|
+
# @param [Int] sentencecount The number of sentences you want the generated string to contain.
|
71
|
+
# @return [String] the sentence(s) generated.
|
72
|
+
def generate_sentence(sentencecount)
|
73
|
+
sentence = []
|
74
|
+
sentencecount.times do
|
75
|
+
# Find out how many actual keys are in the dictionary.
|
76
|
+
key_count = @dictionary.dictionary.keys.length
|
77
|
+
# If less than 30 keys, use that plus five as your maximum sentence length.
|
78
|
+
maximum_length = key_count < 30 ? key_count + 5 : 30
|
79
|
+
stop_at_index = sentence.count + maximum_length
|
80
|
+
sentence.concat(random_capitalized_word.split)
|
81
|
+
until (/[.!?]/ =~ sentence.last[-1])
|
82
|
+
sentence.concat(weighted_random(sentence.last(@depth).join(' ')).split)
|
83
|
+
# If a word ending with a . ! or ? isn't found after 30 words,
|
84
|
+
# just add a period as there may be none in the dictionary.
|
85
|
+
sentence[-1] << "." if sentence.count > stop_at_index
|
86
|
+
end
|
87
|
+
end
|
88
|
+
sentence.join(' ')
|
89
|
+
end
|
61
90
|
end
|
@@ -15,8 +15,8 @@ class PersistentDictionary < MarkovDictionary
|
|
15
15
|
attr_reader :dictionarylocation, :depth
|
16
16
|
def initialize(dictionary, depth=2)
|
17
17
|
@depth = depth
|
18
|
-
unless (1..
|
19
|
-
raise DepthNotInRangeError.new("Depth must be between 1 and
|
18
|
+
unless (1..5).include?(depth)
|
19
|
+
raise DepthNotInRangeError.new("Depth must be between 1 and 5")
|
20
20
|
end
|
21
21
|
@dictionarylocation = dictionary
|
22
22
|
self.open_dictionary
|
data/spec/test.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
The cat likes pie and chainsaws
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marky_markov
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-02-08 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: yajl-ruby
|
16
|
-
requirement: &
|
16
|
+
requirement: &70254574670860 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -24,7 +24,7 @@ dependencies:
|
|
24
24
|
version: 2.0.0
|
25
25
|
type: :runtime
|
26
26
|
prerelease: false
|
27
|
-
version_requirements: *
|
27
|
+
version_requirements: *70254574670860
|
28
28
|
description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
|
29
29
|
input from\n either a source file or a string. While usable as a module in your
|
30
30
|
code it can also be called on\n from the command line and piped into like a standard
|
@@ -46,6 +46,7 @@ files:
|
|
46
46
|
- spec/marky_markov/markov_sentence_generator_spec.rb
|
47
47
|
- spec/marky_markov/persistent_dictionary_spec.rb
|
48
48
|
- spec/spec_helper.rb
|
49
|
+
- spec/test.txt
|
49
50
|
- spec/textdictcompare.mmd
|
50
51
|
homepage: http://www.thefurd.com
|
51
52
|
licenses: []
|