marky_markov 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,90 @@
1
+ Marky Markov and the Funky Sentences
2
+ ====================================
3
+
4
+ Marky Markov is a naïve experiment in Markov Chain generation implemented
5
+ in Ruby. It can be used both from the command-line and as a library within your code.
6
+
7
+ # Installation
8
+
9
+ gem install marky_markov
10
+
11
+
12
+ # Module Usage
13
+
14
+ A basic usage of the TemporaryDictionary, which parses strings and files into a
15
+ temporary dictionary that will not be saved to disk.
16
+
17
+ require 'marky_markov'
18
+ markov = MarkyMarkov::TemporaryDictionary.new
19
+ markov.parse_string "These words will be added to the temporary dictionary."
20
+ markov.parse_file "filename.txt"
21
+ puts markov.generate_n_words 50
22
+ puts markov.generate_n_words 3000
23
+ markov.clear!
24
+
25
+ Dictionary creates or opens a persistent dictionary at a location defined by its
26
+ initalizer, and will allow you to build and save a dictionary over multiple runs.
27
+ to ensure existing files aren't overwritten, the system appends .mmd to the end
28
+ of the dictionary name.
29
+
30
+ require 'marky_markov'
31
+ markov = MarkyMarkov::Dictionary.new('dictionary') # Saves/opens dictionary.mmd
32
+ markov.parse_file "ENV["HOME"]/Documents/largefileindocs.txt"
33
+ markov.parse_file "anotherfileyay.txt"
34
+ puts markov.generate_n_words 10
35
+ markov.save_dictionary! # Saves the modified dictionary/creates one if it didn't exist.
36
+
37
+ If you want to delete a dictionary you call it upon the Dictionary class itself while
38
+ passing in the filename/location.
39
+
40
+ MarkyMarkov::Dictionary.delete_dictionary!('dictionary')
41
+
42
+
43
+ # Command-Line Usage
44
+
45
+ ## Build a Dictionary
46
+
47
+ marky_markov read textfile.txt
48
+
49
+ to build your word probability dictionary. You can run the command
50
+ on different files to continue adding to your dictionary file.
51
+
52
+
53
+ ## Say Some Words
54
+
55
+ marky_markov speak -w 30
56
+
57
+ Will use the dictionary to create a 30 word long sentence. If no number
58
+ is passed it will default to 200 words.
59
+
60
+ ## Temporary Dictionaries
61
+
62
+ marky_markov speak -s other-file.txt -w 20
63
+
64
+ Generates a temporary dictionary based on the source file passed to it
65
+ and uses that to speak. Here we're loading other-file.txt and
66
+ restricting the generated sentence to 20 words.
67
+
68
+ ## STDIN, Pipe Away!
69
+
70
+ echo "Hello, how are you" | marky_markov
71
+
72
+ Marky-Markov is compatible with other STDIN/STDOUT command-line
73
+ applications and can accept STDIN.
74
+
75
+ marky_markov listen "Bullfighting is difficult on the moon"
76
+
77
+ You can also supply a string as an argument to generate the text with,
78
+ though the results are nonsense without a substantial text base to work
79
+ from.
80
+
81
+ Usage: marky_markov COMMAND [OPTIONS]
82
+ Commands:
83
+ speak: Generate Markov Chain sentence (default wordcount of 200)
84
+ listen [sentence]: Generate Markov Chain sentence from supplied string.
85
+ read [file]: Add words to dictionary from supplied text file
86
+ Options
87
+ -d, --dictionary FILE Use custom dictionary location
88
+ -w, --wordcount NUMBER Set number of words generated
89
+ -s, --source FILE Generate and use temporary dictionary from source text
90
+ -h, --help Display this screen
data/bin/marky_markov ADDED
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env ruby -i
2
+ #A Markov Chain generator.
3
+
4
+ require 'optparse'
5
+ require 'marky_markov'
6
+
7
+ #if __FILE__ == $0
8
+ options = {}
9
+ opt_parser = OptionParser.new do |opts|
10
+ opts.banner = "Usage: marky-markov COMMAND [OPTIONS]"
11
+ opts.separator ""
12
+ opts.separator "Commands:"
13
+ opts.separator " speak: Generate Markov Chain sentence (default wordcount of 200)"
14
+ opts.separator " listen [sentence]: Generate Markov Chain sentence from supplied string."
15
+ opts.separator " read [file]: Add words to dictionary from supplied text file"
16
+ opts.separator ""
17
+ opts.separator "Options"
18
+
19
+ options[:dictionary] = "#{ENV["HOME"]}/.marky_markov_dictionary"
20
+ opts.on('-d', '--dictionary FILE', 'Use custom dictionary location') do |file|
21
+ options[:dictionary] = file
22
+ end
23
+
24
+ options[:wordcount] = 200
25
+ opts.on('-w', '--wordcount NUMBER', 'Set number of words generated') do |number|
26
+ options[:wordcount] = number.to_i
27
+ end
28
+
29
+ options[:source] = nil
30
+ opts.on('-s', '--source FILE',
31
+ 'Generate and use temporary dictionary from source text') do |file|
32
+ options[:source] = file
33
+ end
34
+
35
+ options[:resetdictionary] = false
36
+ opts.on('--reset', "WARNING: Deletes default dictionary." ) do
37
+ options[:resetdictionary] = true
38
+ end
39
+
40
+ opts.on('-h', '--help', 'Display this screen') do
41
+ STDOUT.puts opt_parser
42
+ exit
43
+ end
44
+ end
45
+
46
+ opt_parser.parse!
47
+
48
+ if options[:resetdictionary]
49
+ STDOUT.puts MarkyMarkov::Dictionary.delete_dictionary!(options[:dictionary])
50
+ exit
51
+ end
52
+
53
+ case ARGV[0]
54
+ when "speak"
55
+ if options[:source]
56
+ markov = MarkyMarkov::TemporaryDictionary.new
57
+ markov.parse_file(options[:source])
58
+ else
59
+ unless File.exists?("#{options[:dictionary]}.mmd")
60
+ STDERR.puts "Dictionary file #{options[:dictionary]}}.mmd does not exist. Cannot generate sentence."
61
+ STDERR.puts "Please build a dictionary with read or use the --source option with speak."
62
+ exit(false)
63
+ end
64
+ markov = MarkyMarkov::Dictionary.new(options[:dictionary])
65
+ end
66
+ STDOUT.puts markov.generate_n_words(options[:wordcount])
67
+ when "read"
68
+ source = ARGV[1] || options[:source]
69
+ markov = MarkyMarkov::Dictionary.new(options[:dictionary])
70
+ markov.parse_file(source)
71
+ markov.save_dictionary!
72
+ STDOUT.puts "Added #{source} to dictionary."
73
+ when "listen"
74
+ markov = MarkyMarkov::TemporaryDictionary.new
75
+ markov.parse_string(STDIN.tty? ? ARGV[1] : STDIN.read)
76
+ STDOUT.puts markov.generate_n_words(options[:wordcount])
77
+ else
78
+ unless STDIN.tty?
79
+ markov = MarkyMarkov::TemporaryDictionary.new
80
+ markov.parse_string(STDIN.read)
81
+ STDOUT.puts markov.generate_n_words(options[:wordcount])
82
+ else
83
+ STDOUT.puts opt_parser
84
+ end
85
+ end
86
+ #end
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env ruby -i
2
+ #A Markov Chain generator.
3
+
4
+ require_relative 'marky_markov/persistent_dictionary'
5
+ require_relative 'marky_markov/two_word_sentence_generator'
6
+
7
+ module MarkyMarkov
8
+ VERSION = '0.1.0'
9
+
10
+ class TemporaryDictionary
11
+ # Create a new Temporary Markov Chain Dictionary and sentence generator for use.
12
+ # @example Create a new Temporary Dictionary.
13
+ # markov = MarkyMarkov::TemporaryDictionary.new
14
+ # @return [Object] a MarkyMarkov::TemporaryDictionary object.
15
+ def initialize
16
+ @dictionary = TwoWordDictionary.new
17
+ @sentence = TwoWordSentenceGenerator.new(@dictionary)
18
+ end
19
+ # Parses a given file and adds the sentences it contains to the current dictionary.
20
+ #
21
+ # @example Open a text file and add its contents to the dictionary.
22
+ # markov.parse_file "text.txt"
23
+ # @param [File] location the file you want to add to the dictionary.
24
+ def parse_file(location)
25
+ @dictionary.parse_source(location, true)
26
+ end
27
+ # Parses a given string and adds them to the current dictionary.
28
+ #
29
+ # @example Add a string to the dictionary.
30
+ # markov.parse_string "I could really go for some Chicken Makhani."
31
+ # @param [String] sentence the sentence you want to add to the dictionary.
32
+ def parse_string(string)
33
+ @dictionary.parse_source(string, false)
34
+ end
35
+ # Generates a sentence/sentences of n words using the dictionary generated via
36
+ # parse_string or parse_file.
37
+ #
38
+ # @example Generate a 40 word long string of words.
39
+ # markov.generate_n_words(40)
40
+ # @param [Int] wordcount the number of words you want generated.
41
+ # @return [String] the sentence generated by the dictionary.
42
+ def generate_n_words(wordcount)
43
+ @sentence.generate(wordcount)
44
+ end
45
+ # Clears the temporary dictionary's hash, useful for keeping
46
+ # the same dictionary object but removing the words it has learned.
47
+ #
48
+ # @example Clear the Dictionary hash.
49
+ # markov.clear!
50
+ def clear!
51
+ @dictionary.dictionary.clear
52
+ end
53
+ end
54
+
55
+ class Dictionary < TemporaryDictionary
56
+ # Open (or create if it doesn't exist) a Persistent Markov Chain Dictionary
57
+ # and sentence generator for use.
58
+ #
59
+ # @example Create a new Persistent Dictionary object.
60
+ # markov = MarkyMarkov::Dictionary.new("#{ENV["HOME"]}/markov_dictionary")
61
+ def initialize(location)
62
+ @dictionary = PersistentDictionary.new(location)
63
+ @sentence = TwoWordSentenceGenerator.new(@dictionary)
64
+ end
65
+ # Save the Persistent Dictionary file into JSON format for later use.
66
+ #
67
+ # @example Save the dictionary to disk.
68
+ # markov.save_dictionary!
69
+ def save_dictionary!
70
+ @dictionary.save_dictionary!
71
+ end
72
+ # Class Method: Takes a dictionary location/name and deletes it from the file-system.
73
+ #
74
+ # @note To ensure that someone doesn't pass in something that shouldn't be deleted by accident,
75
+ # the filetype .mmd is added to the end of the supplied arguement, so do not include the
76
+ # extension when calling the method.
77
+ #
78
+ # @example Delete the dictionary located at '~/markov_dictionary.mmd'
79
+ # MarkyMarkov::Dictionary.delete_dictionary!("#{ENV["HOME"]}/markov_dictionary")
80
+ # @param [String] location location/name of the dictionary file to be deleted.
81
+ def self.delete_dictionary!(location)
82
+ PersistentDictionary.delete_dictionary!(location)
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,32 @@
1
+ class OneWordDictionary
2
+ attr_accessor :dictionary
3
+ def initialize
4
+ @dictionary = {}
5
+ end
6
+
7
+ class FileNotFoundError < Exception
8
+ end
9
+
10
+ def open_source(source)
11
+ if File.exists?(source)
12
+ File.open(source, "r").read.split
13
+ else
14
+ raise FileNotFoundError.new("#{source} does not exist!")
15
+ end
16
+ end
17
+
18
+ def add_word(rootword, followedby)
19
+ @dictionary[rootword] ||= Hash.new(0)
20
+ @dictionary[rootword][followedby] ||= 0
21
+ @dictionary[rootword][followedby] += 1
22
+ end
23
+
24
+ def parse_source(source, file=true)
25
+ # Special case for last word in source file as it has no words following it.
26
+ contents = file ? open_source(source) : contents = source.split
27
+ contents.each_cons(2) do |first, second|
28
+ self.add_word(first, second)
29
+ end
30
+ @dictionary[(contents.last)] ||= Hash.new(0)
31
+ end
32
+ end
@@ -0,0 +1,35 @@
1
+ class OneWordSentenceGenerator
2
+ def initialize(dictionary)
3
+ @dictionary = dictionary
4
+ end
5
+
6
+ def random_word
7
+ keys = @dictionary.dictionary.keys
8
+ keys[rand(keys.length)]
9
+ end
10
+
11
+ def weighted_random(lastword)
12
+ # If word has no words in its dictionary (last word in source text file)
13
+ # have it pick a random word to display instead.
14
+ @dictionary.dictionary.fetch(lastword, random_word)
15
+ total = @dictionary.dictionary[lastword].values.inject(:+)
16
+ return random_word if total.nil?
17
+
18
+ random = rand(total)+1
19
+ @dictionary.dictionary[lastword].each do |word, occurs|
20
+ random -= occurs
21
+ if random <= 0
22
+ return word
23
+ end
24
+ end
25
+ end
26
+
27
+ def generate(wordcount)
28
+ sentence = []
29
+ sentence << random_word
30
+ (wordcount-1).times do
31
+ sentence << weighted_random(sentence.last)
32
+ end
33
+ sentence.join(' ')
34
+ end
35
+ end
@@ -0,0 +1,36 @@
1
+ require 'yajl'
2
+ require_relative 'two_word_dictionary'
3
+
4
+ class PersistentDictionary < TwoWordDictionary
5
+ def initialize(dictionary)
6
+ @dictionarylocation = "#{dictionary}.mmd"
7
+ self.open_dictionary
8
+ end
9
+
10
+ def open_dictionary
11
+ if File.exists?(@dictionarylocation)
12
+ File.open(@dictionarylocation,'r') do |f|
13
+ @dictionary = Yajl::Parser.parse(f)
14
+ end
15
+ else
16
+ @dictionary = {}
17
+ end
18
+ end
19
+
20
+ def save_dictionary!
21
+ json = Yajl::Encoder.encode(@dictionary)
22
+ File.open(@dictionarylocation, 'w') do |f|
23
+ f.puts json
24
+ end
25
+ true
26
+ end
27
+
28
+ def self.delete_dictionary!(dictionary=@dictionarylocation[0...-4])
29
+ mmd = "#{dictionary}.mmd"
30
+ if File.exists?(mmd)
31
+ File.delete(mmd)
32
+ "Deleted #{mmd}"
33
+ end
34
+ false
35
+ end
36
+ end
@@ -0,0 +1,11 @@
1
+ require_relative 'one_word_dictionary'
2
+
3
+ class TwoWordDictionary < OneWordDictionary
4
+ def parse_source(source, file=true)
5
+ contents = file ? open_source(source) : contents = source.split
6
+ contents.each_cons(3) do |first, second, third|
7
+ self.add_word("#{first} #{second}", third)
8
+ end
9
+ @dictionary[contents.last(2).join(' ')] ||= Hash.new(0)
10
+ end
11
+ end
@@ -0,0 +1,13 @@
1
+ require_relative 'one_word_sentence_generator'
2
+
3
+ class TwoWordSentenceGenerator < OneWordSentenceGenerator
4
+ def generate(wordcount)
5
+ sentence = []
6
+ sentence.concat(random_word.split)
7
+ (wordcount-1).times do
8
+ sentence.concat(weighted_random(sentence.last(2).join(' ')).split)
9
+ end
10
+ sentence.pop(sentence.length-wordcount)
11
+ sentence.join(' ')
12
+ end
13
+ end
@@ -0,0 +1,45 @@
1
+ require 'spec_helper.rb'
2
+
3
+ describe OneWordDictionary do
4
+ before(:each) do
5
+ @textsource = "spec/test.txt"
6
+ @dict = OneWordDictionary.new
7
+ @dict.parse_source("Hello how are you doing today", false)
8
+ @stringdict = {"Hello" => {"how" => 1},
9
+ "how" => {"are" => 1},
10
+ "are" => {"you" => 1},
11
+ "you" => {"doing" => 1},
12
+ "doing" => {"today" => 1},
13
+ "today" => {} }
14
+ @textdict = {"The" => {"cat"=>1},
15
+ "and" => {"chainsaws"=>1},
16
+ "cat" => {"likes"=>1},
17
+ "chainsaws" => {},
18
+ "likes" => {"pie"=>1},
19
+ "pie" => {"and"=>1} }
20
+ end
21
+
22
+ it "can open a file" do
23
+ @dict.open_source(@textsource).should_not be_nil
24
+ end
25
+
26
+ it "should give a FileNotFoundError if the file doesn't exist" do
27
+ expect { @dict.open_source("thisisntreal") }.to
28
+ raise_error(OneWordDictionary::FileNotFoundError,"thisisntreal does not exist!")
29
+ end
30
+
31
+ it "can add a word to the dictionary" do
32
+ @dict.add_word("to", "be")
33
+ @dict.dictionary.should include("to" => {"be" => 1})
34
+ end
35
+
36
+ it "create a dictionary via parsing a text file" do
37
+ @dict.dictionary = {}
38
+ @dict.parse_source(@textsource)
39
+ @dict.dictionary.should eql(@textdict)
40
+ end
41
+
42
+ it "builds a one word dictionary properly" do
43
+ @dict.dictionary.should eql(@stringdict)
44
+ end
45
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper.rb'
2
+
3
+ describe OneWordSentenceGenerator do
4
+ before(:each) do
5
+ @dict = OneWordDictionary.new
6
+ @dict.parse_source("Hello man how are you today", false)
7
+ @sentence = OneWordSentenceGenerator.new(@dict)
8
+ end
9
+
10
+ it "can pick a random word" do
11
+
12
+ end
13
+
14
+ it "can choose a weighted random word" do
15
+
16
+ end
17
+
18
+ it "will use a random word if the word does not exist" do
19
+
20
+ end
21
+
22
+ it "generates a sentence of the appropriate length" do
23
+ @sentence.generate(20).split.count.should eql(20)
24
+ end
25
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe PersistentDictionary do
4
+ before do
5
+ @dict = PersistentDictionary.new("spec/textdict")
6
+ @dict.parse_source("spec/test.txt")
7
+ end
8
+
9
+ it "should be able to save a dictionary" do
10
+ @dict.save_dictionary!.should eql(true)
11
+ end
12
+
13
+ it "should be able to load an existing dictionary" do
14
+ otherdict = PersistentDictionary.new("spec/textdictcompare")
15
+ @dict.dictionary.should eql(otherdict.dictionary)
16
+ end
17
+
18
+ after do
19
+ PersistentDictionary.delete_dictionary!("spec/textdict")
20
+ end
21
+ end
22
+
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ describe TwoWordDictionary do
4
+ before(:each) do
5
+ @dict = TwoWordDictionary.new
6
+ @dict.parse_source("The cat likes pie and chainsaws", false)
7
+ @textsource = "spec/test.txt"
8
+ @stringdict = { "The cat" => { "likes" => 1},
9
+ "cat likes" => { "pie" => 1 },
10
+ "likes pie" => {"and" => 1 },
11
+ "pie and" => { "chainsaws" => 1 },
12
+ "and chainsaws" => {} }
13
+ @textdict = {"The cat" => {"likes" => 1},
14
+ "cat likes" => {"pie" => 1},
15
+ "likes pie" => {"and" => 1},
16
+ "pie and" => {"chainsaws" => 1},
17
+ "and chainsaws" => {}}
18
+ end
19
+
20
+ it "can add a word to the two-word dictionary" do
21
+ @dict.add_word("Zebras like", "kung-fu")
22
+ @dict.dictionary.should eql(@stringdict.merge( {"Zebras like" => {"kung-fu" => 1}} ))
23
+ end
24
+
25
+ it "create a two-word dictionary via parsing a text file" do
26
+ @dict.dictionary = {}
27
+ @dict.parse_source(@textsource)
28
+ @dict.dictionary.should eql(@textdict)
29
+ end
30
+ end
31
+
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+
3
+ describe TwoWordSentenceGenerator do
4
+ before(:each) do
5
+ @dict = TwoWordDictionary.new
6
+ @dict.parse_source("Hello man how are you today", false)
7
+ @sentence = TwoWordSentenceGenerator.new(@dict)
8
+ end
9
+
10
+ it "generates a sentence of the appropriate length" do
11
+ @sentence.generate(20).split.count.should eql(20)
12
+ end
13
+ end
@@ -0,0 +1 @@
1
+ require 'marky_markov'
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: marky_markov
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Matt Furden
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: yajl-ruby
16
+ requirement: &70100468290580 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.1.0
22
+ - - <
23
+ - !ruby/object:Gem::Version
24
+ version: 2.0.0
25
+ type: :runtime
26
+ prerelease: false
27
+ version_requirements: *70100468290580
28
+ description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
29
+ input from\n either a source file or a string. While usable as a module in your
30
+ code it can also be called on\n from the command line and piped into like a stanard
31
+ UNIX application."
32
+ email: mfurden@gmail.com
33
+ executables:
34
+ - marky_markov
35
+ extensions: []
36
+ extra_rdoc_files:
37
+ - README.md
38
+ files:
39
+ - README.md
40
+ - bin/marky_markov
41
+ - lib/marky_markov.rb
42
+ - lib/marky_markov/one_word_dictionary.rb
43
+ - lib/marky_markov/one_word_sentence_generator.rb
44
+ - lib/marky_markov/persistent_dictionary.rb
45
+ - lib/marky_markov/two_word_dictionary.rb
46
+ - lib/marky_markov/two_word_sentence_generator.rb
47
+ - spec/marky_markov/one_word_dictionary_spec.rb
48
+ - spec/marky_markov/one_word_sentence_spec.rb
49
+ - spec/marky_markov/persistent_dictionary_spec.rb
50
+ - spec/marky_markov/two_word_dict_spec.rb
51
+ - spec/marky_markov/two_word_sentence_spec.rb
52
+ - spec/spec_helper.rb
53
+ homepage: http://www.thefurd.com
54
+ licenses: []
55
+ post_install_message:
56
+ rdoc_options:
57
+ - --charset=UTF-8
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ! '>='
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubyforge_project: marky_markov
74
+ rubygems_version: 1.8.15
75
+ signing_key:
76
+ specification_version: 2
77
+ summary: Simple Markov Chain generation available in the command-line
78
+ test_files: []
79
+ has_rdoc: