marky_markov 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,90 @@
1
+ Marky Markov and the Funky Sentences
2
+ ====================================
3
+
4
+ Marky Markov is a naïve experiment in Markov Chain generation implemented
5
+ in Ruby. It can be used both from the command-line and as a library within your code.
6
+
7
+ # Installation
8
+
9
+ gem install marky_markov
10
+
11
+
12
+ # Module Usage
13
+
14
+ A basic usage of the TemporaryDictionary, which parses strings and files into a
15
+ temporary dictionary that will not be saved to disk.
16
+
17
+ require 'marky_markov'
18
+ markov = MarkyMarkov::TemporaryDictionary.new
19
+ markov.parse_string "These words will be added to the temporary dictionary."
20
+ markov.parse_file "filename.txt"
21
+ puts markov.generate_n_words 50
22
+ puts markov.generate_n_words 3000
23
+ markov.clear!
24
+
25
+ Dictionary creates or opens a persistent dictionary at a location defined by its
26
+ initalizer, and will allow you to build and save a dictionary over multiple runs.
27
+ to ensure existing files aren't overwritten, the system appends .mmd to the end
28
+ of the dictionary name.
29
+
30
+ require 'marky_markov'
31
+ markov = MarkyMarkov::Dictionary.new('dictionary') # Saves/opens dictionary.mmd
32
+ markov.parse_file "ENV["HOME"]/Documents/largefileindocs.txt"
33
+ markov.parse_file "anotherfileyay.txt"
34
+ puts markov.generate_n_words 10
35
+ markov.save_dictionary! # Saves the modified dictionary/creates one if it didn't exist.
36
+
37
+ If you want to delete a dictionary you call it upon the Dictionary class itself while
38
+ passing in the filename/location.
39
+
40
+ MarkyMarkov::Dictionary.delete_dictionary!('dictionary')
41
+
42
+
43
+ # Command-Line Usage
44
+
45
+ ## Build a Dictionary
46
+
47
+ marky_markov read textfile.txt
48
+
49
+ to build your word probability dictionary. You can run the command
50
+ on different files to continue adding to your dictionary file.
51
+
52
+
53
+ ## Say Some Words
54
+
55
+ marky_markov speak -w 30
56
+
57
+ Will use the dictionary to create a 30 word long sentence. If no number
58
+ is passed it will default to 200 words.
59
+
60
+ ## Temporary Dictionaries
61
+
62
+ marky_markov speak -s other-file.txt -w 20
63
+
64
+ Generates a temporary dictionary based on the source file passed to it
65
+ and uses that to speak. Here we're loading other-file.txt and
66
+ restricting the generated sentence to 20 words.
67
+
68
+ ## STDIN, Pipe Away!
69
+
70
+ echo "Hello, how are you" | marky_markov
71
+
72
+ Marky-Markov is compatible with other STDIN/STDOUT command-line
73
+ applications and can accept STDIN.
74
+
75
+ marky_markov listen "Bullfighting is difficult on the moon"
76
+
77
+ You can also supply a string as an argument to generate the text with,
78
+ though the results are nonsense without a substantial text base to work
79
+ from.
80
+
81
+ Usage: marky_markov COMMAND [OPTIONS]
82
+ Commands:
83
+ speak: Generate Markov Chain sentence (default wordcount of 200)
84
+ listen [sentence]: Generate Markov Chain sentence from supplied string.
85
+ read [file]: Add words to dictionary from supplied text file
86
+ Options
87
+ -d, --dictionary FILE Use custom dictionary location
88
+ -w, --wordcount NUMBER Set number of words generated
89
+ -s, --source FILE Generate and use temporary dictionary from source text
90
+ -h, --help Display this screen
data/bin/marky_markov ADDED
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env ruby -i
2
+ #A Markov Chain generator.
3
+
4
+ require 'optparse'
5
+ require 'marky_markov'
6
+
7
+ #if __FILE__ == $0
8
+ options = {}
9
+ opt_parser = OptionParser.new do |opts|
10
+ opts.banner = "Usage: marky-markov COMMAND [OPTIONS]"
11
+ opts.separator ""
12
+ opts.separator "Commands:"
13
+ opts.separator " speak: Generate Markov Chain sentence (default wordcount of 200)"
14
+ opts.separator " listen [sentence]: Generate Markov Chain sentence from supplied string."
15
+ opts.separator " read [file]: Add words to dictionary from supplied text file"
16
+ opts.separator ""
17
+ opts.separator "Options"
18
+
19
+ options[:dictionary] = "#{ENV["HOME"]}/.marky_markov_dictionary"
20
+ opts.on('-d', '--dictionary FILE', 'Use custom dictionary location') do |file|
21
+ options[:dictionary] = file
22
+ end
23
+
24
+ options[:wordcount] = 200
25
+ opts.on('-w', '--wordcount NUMBER', 'Set number of words generated') do |number|
26
+ options[:wordcount] = number.to_i
27
+ end
28
+
29
+ options[:source] = nil
30
+ opts.on('-s', '--source FILE',
31
+ 'Generate and use temporary dictionary from source text') do |file|
32
+ options[:source] = file
33
+ end
34
+
35
+ options[:resetdictionary] = false
36
+ opts.on('--reset', "WARNING: Deletes default dictionary." ) do
37
+ options[:resetdictionary] = true
38
+ end
39
+
40
+ opts.on('-h', '--help', 'Display this screen') do
41
+ STDOUT.puts opt_parser
42
+ exit
43
+ end
44
+ end
45
+
46
+ opt_parser.parse!
47
+
48
+ if options[:resetdictionary]
49
+ STDOUT.puts MarkyMarkov::Dictionary.delete_dictionary!(options[:dictionary])
50
+ exit
51
+ end
52
+
53
+ case ARGV[0]
54
+ when "speak"
55
+ if options[:source]
56
+ markov = MarkyMarkov::TemporaryDictionary.new
57
+ markov.parse_file(options[:source])
58
+ else
59
+ unless File.exists?("#{options[:dictionary]}.mmd")
60
+ STDERR.puts "Dictionary file #{options[:dictionary]}}.mmd does not exist. Cannot generate sentence."
61
+ STDERR.puts "Please build a dictionary with read or use the --source option with speak."
62
+ exit(false)
63
+ end
64
+ markov = MarkyMarkov::Dictionary.new(options[:dictionary])
65
+ end
66
+ STDOUT.puts markov.generate_n_words(options[:wordcount])
67
+ when "read"
68
+ source = ARGV[1] || options[:source]
69
+ markov = MarkyMarkov::Dictionary.new(options[:dictionary])
70
+ markov.parse_file(source)
71
+ markov.save_dictionary!
72
+ STDOUT.puts "Added #{source} to dictionary."
73
+ when "listen"
74
+ markov = MarkyMarkov::TemporaryDictionary.new
75
+ markov.parse_string(STDIN.tty? ? ARGV[1] : STDIN.read)
76
+ STDOUT.puts markov.generate_n_words(options[:wordcount])
77
+ else
78
+ unless STDIN.tty?
79
+ markov = MarkyMarkov::TemporaryDictionary.new
80
+ markov.parse_string(STDIN.read)
81
+ STDOUT.puts markov.generate_n_words(options[:wordcount])
82
+ else
83
+ STDOUT.puts opt_parser
84
+ end
85
+ end
86
+ #end
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env ruby -i
2
+ #A Markov Chain generator.
3
+
4
+ require_relative 'marky_markov/persistent_dictionary'
5
+ require_relative 'marky_markov/two_word_sentence_generator'
6
+
7
+ module MarkyMarkov
8
+ VERSION = '0.1.0'
9
+
10
+ class TemporaryDictionary
11
+ # Create a new Temporary Markov Chain Dictionary and sentence generator for use.
12
+ # @example Create a new Temporary Dictionary.
13
+ # markov = MarkyMarkov::TemporaryDictionary.new
14
+ # @return [Object] a MarkyMarkov::TemporaryDictionary object.
15
+ def initialize
16
+ @dictionary = TwoWordDictionary.new
17
+ @sentence = TwoWordSentenceGenerator.new(@dictionary)
18
+ end
19
+ # Parses a given file and adds the sentences it contains to the current dictionary.
20
+ #
21
+ # @example Open a text file and add its contents to the dictionary.
22
+ # markov.parse_file "text.txt"
23
+ # @param [File] location the file you want to add to the dictionary.
24
+ def parse_file(location)
25
+ @dictionary.parse_source(location, true)
26
+ end
27
+ # Parses a given string and adds them to the current dictionary.
28
+ #
29
+ # @example Add a string to the dictionary.
30
+ # markov.parse_string "I could really go for some Chicken Makhani."
31
+ # @param [String] sentence the sentence you want to add to the dictionary.
32
+ def parse_string(string)
33
+ @dictionary.parse_source(string, false)
34
+ end
35
+ # Generates a sentence/sentences of n words using the dictionary generated via
36
+ # parse_string or parse_file.
37
+ #
38
+ # @example Generate a 40 word long string of words.
39
+ # markov.generate_n_words(40)
40
+ # @param [Int] wordcount the number of words you want generated.
41
+ # @return [String] the sentence generated by the dictionary.
42
+ def generate_n_words(wordcount)
43
+ @sentence.generate(wordcount)
44
+ end
45
+ # Clears the temporary dictionary's hash, useful for keeping
46
+ # the same dictionary object but removing the words it has learned.
47
+ #
48
+ # @example Clear the Dictionary hash.
49
+ # markov.clear!
50
+ def clear!
51
+ @dictionary.dictionary.clear
52
+ end
53
+ end
54
+
55
+ class Dictionary < TemporaryDictionary
56
+ # Open (or create if it doesn't exist) a Persistent Markov Chain Dictionary
57
+ # and sentence generator for use.
58
+ #
59
+ # @example Create a new Persistent Dictionary object.
60
+ # markov = MarkyMarkov::Dictionary.new("#{ENV["HOME"]}/markov_dictionary")
61
+ def initialize(location)
62
+ @dictionary = PersistentDictionary.new(location)
63
+ @sentence = TwoWordSentenceGenerator.new(@dictionary)
64
+ end
65
+ # Save the Persistent Dictionary file into JSON format for later use.
66
+ #
67
+ # @example Save the dictionary to disk.
68
+ # markov.save_dictionary!
69
+ def save_dictionary!
70
+ @dictionary.save_dictionary!
71
+ end
72
+ # Class Method: Takes a dictionary location/name and deletes it from the file-system.
73
+ #
74
+ # @note To ensure that someone doesn't pass in something that shouldn't be deleted by accident,
75
+ # the filetype .mmd is added to the end of the supplied arguement, so do not include the
76
+ # extension when calling the method.
77
+ #
78
+ # @example Delete the dictionary located at '~/markov_dictionary.mmd'
79
+ # MarkyMarkov::Dictionary.delete_dictionary!("#{ENV["HOME"]}/markov_dictionary")
80
+ # @param [String] location location/name of the dictionary file to be deleted.
81
+ def self.delete_dictionary!(location)
82
+ PersistentDictionary.delete_dictionary!(location)
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,32 @@
1
+ class OneWordDictionary
2
+ attr_accessor :dictionary
3
+ def initialize
4
+ @dictionary = {}
5
+ end
6
+
7
+ class FileNotFoundError < Exception
8
+ end
9
+
10
+ def open_source(source)
11
+ if File.exists?(source)
12
+ File.open(source, "r").read.split
13
+ else
14
+ raise FileNotFoundError.new("#{source} does not exist!")
15
+ end
16
+ end
17
+
18
+ def add_word(rootword, followedby)
19
+ @dictionary[rootword] ||= Hash.new(0)
20
+ @dictionary[rootword][followedby] ||= 0
21
+ @dictionary[rootword][followedby] += 1
22
+ end
23
+
24
+ def parse_source(source, file=true)
25
+ # Special case for last word in source file as it has no words following it.
26
+ contents = file ? open_source(source) : contents = source.split
27
+ contents.each_cons(2) do |first, second|
28
+ self.add_word(first, second)
29
+ end
30
+ @dictionary[(contents.last)] ||= Hash.new(0)
31
+ end
32
+ end
@@ -0,0 +1,35 @@
1
+ class OneWordSentenceGenerator
2
+ def initialize(dictionary)
3
+ @dictionary = dictionary
4
+ end
5
+
6
+ def random_word
7
+ keys = @dictionary.dictionary.keys
8
+ keys[rand(keys.length)]
9
+ end
10
+
11
+ def weighted_random(lastword)
12
+ # If word has no words in its dictionary (last word in source text file)
13
+ # have it pick a random word to display instead.
14
+ @dictionary.dictionary.fetch(lastword, random_word)
15
+ total = @dictionary.dictionary[lastword].values.inject(:+)
16
+ return random_word if total.nil?
17
+
18
+ random = rand(total)+1
19
+ @dictionary.dictionary[lastword].each do |word, occurs|
20
+ random -= occurs
21
+ if random <= 0
22
+ return word
23
+ end
24
+ end
25
+ end
26
+
27
+ def generate(wordcount)
28
+ sentence = []
29
+ sentence << random_word
30
+ (wordcount-1).times do
31
+ sentence << weighted_random(sentence.last)
32
+ end
33
+ sentence.join(' ')
34
+ end
35
+ end
@@ -0,0 +1,36 @@
1
+ require 'yajl'
2
+ require_relative 'two_word_dictionary'
3
+
4
+ class PersistentDictionary < TwoWordDictionary
5
+ def initialize(dictionary)
6
+ @dictionarylocation = "#{dictionary}.mmd"
7
+ self.open_dictionary
8
+ end
9
+
10
+ def open_dictionary
11
+ if File.exists?(@dictionarylocation)
12
+ File.open(@dictionarylocation,'r') do |f|
13
+ @dictionary = Yajl::Parser.parse(f)
14
+ end
15
+ else
16
+ @dictionary = {}
17
+ end
18
+ end
19
+
20
+ def save_dictionary!
21
+ json = Yajl::Encoder.encode(@dictionary)
22
+ File.open(@dictionarylocation, 'w') do |f|
23
+ f.puts json
24
+ end
25
+ true
26
+ end
27
+
28
+ def self.delete_dictionary!(dictionary=@dictionarylocation[0...-4])
29
+ mmd = "#{dictionary}.mmd"
30
+ if File.exists?(mmd)
31
+ File.delete(mmd)
32
+ "Deleted #{mmd}"
33
+ end
34
+ false
35
+ end
36
+ end
@@ -0,0 +1,11 @@
1
+ require_relative 'one_word_dictionary'
2
+
3
+ class TwoWordDictionary < OneWordDictionary
4
+ def parse_source(source, file=true)
5
+ contents = file ? open_source(source) : contents = source.split
6
+ contents.each_cons(3) do |first, second, third|
7
+ self.add_word("#{first} #{second}", third)
8
+ end
9
+ @dictionary[contents.last(2).join(' ')] ||= Hash.new(0)
10
+ end
11
+ end
@@ -0,0 +1,13 @@
1
+ require_relative 'one_word_sentence_generator'
2
+
3
+ class TwoWordSentenceGenerator < OneWordSentenceGenerator
4
+ def generate(wordcount)
5
+ sentence = []
6
+ sentence.concat(random_word.split)
7
+ (wordcount-1).times do
8
+ sentence.concat(weighted_random(sentence.last(2).join(' ')).split)
9
+ end
10
+ sentence.pop(sentence.length-wordcount)
11
+ sentence.join(' ')
12
+ end
13
+ end
@@ -0,0 +1,45 @@
1
+ require 'spec_helper.rb'
2
+
3
+ describe OneWordDictionary do
4
+ before(:each) do
5
+ @textsource = "spec/test.txt"
6
+ @dict = OneWordDictionary.new
7
+ @dict.parse_source("Hello how are you doing today", false)
8
+ @stringdict = {"Hello" => {"how" => 1},
9
+ "how" => {"are" => 1},
10
+ "are" => {"you" => 1},
11
+ "you" => {"doing" => 1},
12
+ "doing" => {"today" => 1},
13
+ "today" => {} }
14
+ @textdict = {"The" => {"cat"=>1},
15
+ "and" => {"chainsaws"=>1},
16
+ "cat" => {"likes"=>1},
17
+ "chainsaws" => {},
18
+ "likes" => {"pie"=>1},
19
+ "pie" => {"and"=>1} }
20
+ end
21
+
22
+ it "can open a file" do
23
+ @dict.open_source(@textsource).should_not be_nil
24
+ end
25
+
26
+ it "should give a FileNotFoundError if the file doesn't exist" do
27
+ expect { @dict.open_source("thisisntreal") }.to
28
+ raise_error(OneWordDictionary::FileNotFoundError,"thisisntreal does not exist!")
29
+ end
30
+
31
+ it "can add a word to the dictionary" do
32
+ @dict.add_word("to", "be")
33
+ @dict.dictionary.should include("to" => {"be" => 1})
34
+ end
35
+
36
+ it "create a dictionary via parsing a text file" do
37
+ @dict.dictionary = {}
38
+ @dict.parse_source(@textsource)
39
+ @dict.dictionary.should eql(@textdict)
40
+ end
41
+
42
+ it "builds a one word dictionary properly" do
43
+ @dict.dictionary.should eql(@stringdict)
44
+ end
45
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper.rb'
2
+
3
+ describe OneWordSentenceGenerator do
4
+ before(:each) do
5
+ @dict = OneWordDictionary.new
6
+ @dict.parse_source("Hello man how are you today", false)
7
+ @sentence = OneWordSentenceGenerator.new(@dict)
8
+ end
9
+
10
+ it "can pick a random word" do
11
+
12
+ end
13
+
14
+ it "can choose a weighted random word" do
15
+
16
+ end
17
+
18
+ it "will use a random word if the word does not exist" do
19
+
20
+ end
21
+
22
+ it "generates a sentence of the appropriate length" do
23
+ @sentence.generate(20).split.count.should eql(20)
24
+ end
25
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe PersistentDictionary do
4
+ before do
5
+ @dict = PersistentDictionary.new("spec/textdict")
6
+ @dict.parse_source("spec/test.txt")
7
+ end
8
+
9
+ it "should be able to save a dictionary" do
10
+ @dict.save_dictionary!.should eql(true)
11
+ end
12
+
13
+ it "should be able to load an existing dictionary" do
14
+ otherdict = PersistentDictionary.new("spec/textdictcompare")
15
+ @dict.dictionary.should eql(otherdict.dictionary)
16
+ end
17
+
18
+ after do
19
+ PersistentDictionary.delete_dictionary!("spec/textdict")
20
+ end
21
+ end
22
+
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ describe TwoWordDictionary do
4
+ before(:each) do
5
+ @dict = TwoWordDictionary.new
6
+ @dict.parse_source("The cat likes pie and chainsaws", false)
7
+ @textsource = "spec/test.txt"
8
+ @stringdict = { "The cat" => { "likes" => 1},
9
+ "cat likes" => { "pie" => 1 },
10
+ "likes pie" => {"and" => 1 },
11
+ "pie and" => { "chainsaws" => 1 },
12
+ "and chainsaws" => {} }
13
+ @textdict = {"The cat" => {"likes" => 1},
14
+ "cat likes" => {"pie" => 1},
15
+ "likes pie" => {"and" => 1},
16
+ "pie and" => {"chainsaws" => 1},
17
+ "and chainsaws" => {}}
18
+ end
19
+
20
+ it "can add a word to the two-word dictionary" do
21
+ @dict.add_word("Zebras like", "kung-fu")
22
+ @dict.dictionary.should eql(@stringdict.merge( {"Zebras like" => {"kung-fu" => 1}} ))
23
+ end
24
+
25
+ it "create a two-word dictionary via parsing a text file" do
26
+ @dict.dictionary = {}
27
+ @dict.parse_source(@textsource)
28
+ @dict.dictionary.should eql(@textdict)
29
+ end
30
+ end
31
+
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+
3
+ describe TwoWordSentenceGenerator do
4
+ before(:each) do
5
+ @dict = TwoWordDictionary.new
6
+ @dict.parse_source("Hello man how are you today", false)
7
+ @sentence = TwoWordSentenceGenerator.new(@dict)
8
+ end
9
+
10
+ it "generates a sentence of the appropriate length" do
11
+ @sentence.generate(20).split.count.should eql(20)
12
+ end
13
+ end
@@ -0,0 +1 @@
1
+ require 'marky_markov'
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: marky_markov
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Matt Furden
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: yajl-ruby
16
+ requirement: &70100468290580 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.1.0
22
+ - - <
23
+ - !ruby/object:Gem::Version
24
+ version: 2.0.0
25
+ type: :runtime
26
+ prerelease: false
27
+ version_requirements: *70100468290580
28
+ description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
29
+ input from\n either a source file or a string. While usable as a module in your
30
+ code it can also be called on\n from the command line and piped into like a stanard
31
+ UNIX application."
32
+ email: mfurden@gmail.com
33
+ executables:
34
+ - marky_markov
35
+ extensions: []
36
+ extra_rdoc_files:
37
+ - README.md
38
+ files:
39
+ - README.md
40
+ - bin/marky_markov
41
+ - lib/marky_markov.rb
42
+ - lib/marky_markov/one_word_dictionary.rb
43
+ - lib/marky_markov/one_word_sentence_generator.rb
44
+ - lib/marky_markov/persistent_dictionary.rb
45
+ - lib/marky_markov/two_word_dictionary.rb
46
+ - lib/marky_markov/two_word_sentence_generator.rb
47
+ - spec/marky_markov/one_word_dictionary_spec.rb
48
+ - spec/marky_markov/one_word_sentence_spec.rb
49
+ - spec/marky_markov/persistent_dictionary_spec.rb
50
+ - spec/marky_markov/two_word_dict_spec.rb
51
+ - spec/marky_markov/two_word_sentence_spec.rb
52
+ - spec/spec_helper.rb
53
+ homepage: http://www.thefurd.com
54
+ licenses: []
55
+ post_install_message:
56
+ rdoc_options:
57
+ - --charset=UTF-8
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ! '>='
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubyforge_project: marky_markov
74
+ rubygems_version: 1.8.15
75
+ signing_key:
76
+ specification_version: 2
77
+ summary: Simple Markov Chain generation available in the command-line
78
+ test_files: []
79
+ has_rdoc: