marky_markov 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -4,6 +4,12 @@ Marky Markov and the Funky Sentences
4
4
  Marky Markov is a naïve experiment in Markov Chain generation implemented
5
5
  in Ruby. It can be used both from the command-line and as a library within your code.
6
6
 
7
+ NOTE: 0.3.0 now uses arrays with multiple entries per word instead of a
8
+ hash key for each word with the value representing number of occurences.
9
+ While a less elegant solution, it leads to faster text generation. We
10
+ are also now using msgpack instead of yajl-json to store the dictionary
11
+ which should lead to faster usage in the command-line app.
12
+
7
13
  NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
8
14
  ability to generate proper sentences (generate_n_sentences) instead of simply a
9
15
  maximum number of words. The command-line app has changed to sentences as its default
@@ -46,9 +52,9 @@ number in there, well, you can!
46
52
  markov.generate_20_words
47
53
 
48
54
  The default dictionary depth is two words.
49
- `{"I hope" => {"this" => 1},
50
- "hope this" => {"makes" => 1},
51
- "this makes" => {"sense" => 1}}`
55
+ `{["I", "hope"] => ["this"],
56
+ ["hope", "this"] => ["makes"],
57
+ ["this", "makes"] => ["sense"]}`
52
58
  but it can be set to a depth between 1 and 5 upon dictionary creation,
53
59
  though really any higher than 3 and it starts to simply print passages
54
60
  from the source text.
@@ -56,8 +62,8 @@ from the source text.
56
62
  markov = MarkyMarkov::Dictionary.new('dictionary', 3)
57
63
 
58
64
  creates a dictionary with a depth of three words.
59
- `{"I hope this" => {"makes" => 1},
60
- "hope this makes" => {"sense" => 1}`
65
+ `{["I", "hope", "this"] => ["makes"],
66
+ ["hope", "this", "makes"] => ["sense"]`
61
67
 
62
68
  If you want to delete a dictionary you call it upon the Dictionary class itself while
63
69
  passing in the filename/location.
data/lib/marky_markov.rb CHANGED
@@ -4,12 +4,12 @@
4
4
  require_relative 'marky_markov/persistent_dictionary'
5
5
  require_relative 'marky_markov/markov_sentence_generator'
6
6
 
7
- # @version = 0.2.0
7
+ # @version = 0.3.0
8
8
  # @author Matt Furden
9
9
  # Module containing TemporaryDictionary and Dictionary for creation of
10
10
  # Markov Chain Dictionaries and generating sentences from those dictionaries.
11
11
  module MarkyMarkov
12
- VERSION = '0.2.0'
12
+ VERSION = '0.3.0'
13
13
 
14
14
  class TemporaryDictionary
15
15
  # Create a new Temporary Markov Chain Dictionary and sentence generator for use.
@@ -24,9 +24,8 @@ class MarkovDictionary
24
24
  # @example Adding a word
25
25
  # add_word("Hello", "world")
26
26
  def add_word(rootword, followedby)
27
- @dictionary[rootword] ||= Hash.new(0)
28
- @dictionary[rootword][followedby] ||= 0
29
- @dictionary[rootword][followedby] += 1
27
+ @dictionary[rootword] ||= []
28
+ @dictionary[rootword] << followedby
30
29
  end
31
30
 
32
31
  # Given a source of text, be it a text file (file=true) or a string (file=false)
@@ -39,8 +38,8 @@ class MarkovDictionary
39
38
  def parse_source(source, file=true)
40
39
  contents = file ? open_source(source) : contents = source.split
41
40
  contents.each_cons(@depth+1) do |words|
42
- self.add_word(words[0..-2].join(' '), words[-1])
41
+ self.add_word(words[0..-2], words[-1])
43
42
  end
44
- @dictionary[contents.last(@depth).join(' ')] ||= Hash.new(0)
43
+ @dictionary[contents.last(@depth)] ||= []
45
44
  end
46
45
  end
@@ -39,16 +39,9 @@ class MarkovSentenceGenerator
39
39
  def weighted_random(lastword)
40
40
  # If word has no words in its dictionary (last word in source text file)
41
41
  # have it pick a random word to display instead.
42
- @dictionary.dictionary.fetch(lastword, random_word)
43
- total = @dictionary.dictionary[lastword].values.inject(:+)
44
- return random_word if total.nil?
45
- random = rand(total)+1
46
- @dictionary.dictionary[lastword].each do |word, occurs|
47
- random -= occurs
48
- if random <= 0
49
- return word
50
- end
51
- end
42
+ if word = @dictionary.dictionary[lastword]
43
+ word.sample
44
+ end
52
45
  end
53
46
 
54
47
  # Generates a sentence of (wordcount) length using the weighted_random function.
@@ -57,9 +50,13 @@ class MarkovSentenceGenerator
57
50
  # @return [String] the words, hopefully forming sentences generated.
58
51
  def generate(wordcount)
59
52
  sentence = []
60
- sentence.concat(random_capitalized_word.split)
53
+ sentence.concat(random_capitalized_word)
61
54
  (wordcount-1).times do
62
- sentence.concat(weighted_random(sentence.last(@depth).join(' ')).split)
55
+ if word = weighted_random(sentence.last(@depth))
56
+ sentence << word
57
+ else
58
+ sentence.concat(random_capitalized_word)
59
+ end
63
60
  end
64
61
  sentence.pop(sentence.length-wordcount)
65
62
  sentence.join(' ')
@@ -77,12 +74,13 @@ class MarkovSentenceGenerator
77
74
  # If less than 30 keys, use that plus five as your maximum sentence length.
78
75
  maximum_length = key_count < 30 ? key_count + 5 : 30
79
76
  stop_at_index = sentence.count + maximum_length
80
- sentence.concat(random_capitalized_word.split)
77
+ sentence.concat(random_capitalized_word)
81
78
  until (/[.!?]/ =~ sentence.last[-1])
82
- sentence.concat(weighted_random(sentence.last(@depth).join(' ')).split)
79
+ word = weighted_random(sentence.last(@depth))
80
+ sentence << word unless word.nil?
83
81
  # If a word ending with a . ! or ? isn't found after 30 words,
84
82
  # just add a period as there may be none in the dictionary.
85
- sentence[-1] << "." if sentence.count > stop_at_index
83
+ sentence[-1] << "." if word.nil? || sentence.count > stop_at_index
86
84
  end
87
85
  end
88
86
  sentence.join(' ')
@@ -1,4 +1,4 @@
1
- require 'yajl'
1
+ require 'ox'
2
2
  require_relative 'markov_dictionary'
3
3
 
4
4
  # @private
@@ -28,10 +28,9 @@ class PersistentDictionary < MarkovDictionary
28
28
  # otherwise it creates an empty hash.
29
29
  def open_dictionary
30
30
  if File.exists?(@dictionarylocation)
31
- File.open(@dictionarylocation,'r').each do |f|
32
- @depth = f[0].to_i
33
- @dictionary = Yajl::Parser.parse(f[1..-1])
34
- end
31
+ file = File.new(@dictionarylocation, 'r').read
32
+ @depth = file[0].to_i
33
+ @dictionary = Ox.parse_obj(file[1..-1])
35
34
  else
36
35
  @dictionary = {}
37
36
  end
@@ -40,9 +39,9 @@ class PersistentDictionary < MarkovDictionary
40
39
  # Saves the PersistentDictionary objects @dictionary hash
41
40
  # to disk in JSON format.
42
41
  def save_dictionary!
43
- json = Yajl::Encoder.encode(@dictionary)
42
+ packed = Ox.dump(@dictionary)
44
43
  File.open(@dictionarylocation, 'w') do |f|
45
- f.puts @depth.to_s + json
44
+ f.write @depth.to_s + packed
46
45
  end
47
46
  true
48
47
  end
@@ -0,0 +1,82 @@
1
+ ## This is the rakegem gemspec template. Make sure you read and understand
2
+ ## all of the comments. Some sections require modification, and others can
3
+ ## be deleted if you don't need them. Once you understand the contents of
4
+ ## this file, feel free to delete any comments that begin with two hash marks.
5
+ ## You can find comprehensive Gem::Specification documentation, at
6
+ ## http://docs.rubygems.org/read/chapter/20
7
+ Gem::Specification.new do |s|
8
+ s.specification_version = 2 if s.respond_to? :specification_version=
9
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
10
+ s.rubygems_version = '1.3.5'
11
+
12
+ ## Leave these as is they will be modified for you by the rake gemspec task.
13
+ ## If your rubyforge_project name is different, then edit it and comment out
14
+ ## the sub! line in the Rakefile
15
+ s.name = 'marky_markov'
16
+ s.version = '0.3.0'
17
+ s.date = '2012-02-09'
18
+ s.rubyforge_project = 'marky_markov'
19
+
20
+ ## Make sure your summary is short. The description may be as long
21
+ ## as you like.
22
+ s.summary = "Simple Markov Chain generation available in the command-line"
23
+ s.description = "MarkyMarkov makes it easy to generate simply Markov Chains based upon input from
24
+ either a source file or a string. While usable as a module in your code it can also be called on
25
+ from the command line and piped into like a standard UNIX application."
26
+
27
+ ## List the primary authors. If there are a bunch of authors, it's probably
28
+ ## better to set the email to an email list or something. If you don't have
29
+ ## a custom homepage, consider using your GitHub URL or the like.
30
+ s.authors = ["Matt Furden"]
31
+ s.email = 'mfurden@gmail.com'
32
+ s.homepage = 'http://www.thefurd.com'
33
+
34
+ ## This gets added to the $LOAD_PATH so that 'lib/NAME.rb' can be required as
35
+ ## require 'NAME.rb' or'/lib/NAME/file.rb' can be as require 'NAME/file.rb'
36
+ s.require_paths = %w[lib]
37
+
38
+ ## This sections is only necessary if you have C extensions.
39
+ #s.require_paths << 'ext'
40
+ #s.extensions = %w[ext/extconf.rb]
41
+
42
+ ## If your gem includes any executables, list them here.
43
+ s.executables = ["marky_markov"]
44
+
45
+ ## Specify any RDoc options here. You'll want to add your README and
46
+ ## LICENSE files to the extra_rdoc_files list.
47
+ s.rdoc_options = ["--charset=UTF-8"]
48
+ s.extra_rdoc_files = %w[README.md]
49
+
50
+ ## List your runtime dependencies here. Runtime dependencies are those
51
+ ## that are needed for an end user to actually USE your code.
52
+ s.add_dependency('ox', "~> 1.4")
53
+
54
+ ## List your development dependencies here. Development dependencies are
55
+ ## those that are only needed during development
56
+ #s.add_development_dependency('DEVDEPNAME', [">= 1.1.0", "< 2.0.0"])
57
+
58
+ ## Leave this section as-is. It will be automatically generated from the
59
+ ## contents of your Git repository via the gemspec task. DO NOT REMOVE
60
+ ## THE MANIFEST COMMENTS, they are used as delimiters by the task.
61
+ # = MANIFEST =
62
+ s.files = %w[
63
+ README.md
64
+ bin/marky_markov
65
+ lib/marky_markov.rb
66
+ lib/marky_markov/markov_dictionary.rb
67
+ lib/marky_markov/markov_sentence_generator.rb
68
+ lib/marky_markov/persistent_dictionary.rb
69
+ marky_markov.gemspec
70
+ spec/marky_markov/markov_dictionary_spec.rb
71
+ spec/marky_markov/markov_sentence_generator_spec.rb
72
+ spec/marky_markov/persistent_dictionary_spec.rb
73
+ spec/spec_helper.rb
74
+ spec/test.txt
75
+ spec/textdictcompare.mmd
76
+ ]
77
+ # = MANIFEST =
78
+
79
+ ## Test files will be grabbed from the file list. Make sure the path glob
80
+ ## matches what you actually use.
81
+ s.test_files = s.files.select { |path| path =~ /^test\/test_.*\.rb/ }
82
+ end
@@ -5,19 +5,13 @@ describe MarkovDictionary do
5
5
  before(:each) do
6
6
  @onetextsource = "spec/test.txt"
7
7
  @onedict = MarkovDictionary.new(1)
8
- @onedict.parse_source("Hello how are you doing today", false)
9
- @onestringdict = {"Hello" => {"how" => 1},
10
- "how" => {"are" => 1},
11
- "are" => {"you" => 1},
12
- "you" => {"doing" => 1},
13
- "doing" => {"today" => 1},
14
- "today" => {} }
15
- @onetextdict = {"The" => {"cat"=>1},
16
- "and" => {"chainsaws"=>1},
17
- "cat" => {"likes"=>1},
18
- "chainsaws" => {},
19
- "likes" => {"pie"=>1},
20
- "pie" => {"and"=>1} }
8
+ @onedict.parse_source("The cat likes pie and chainsaws", false)
9
+ @onetextdict = { ["The"] => ["cat"],
10
+ ["cat"] => ["likes"],
11
+ ["likes"] => ["pie"],
12
+ ["pie"] => ["and"],
13
+ ["and"] => ["chainsaws"],
14
+ ["chainsaws"] => []}
21
15
  end
22
16
 
23
17
  it "can open a file" do
@@ -30,8 +24,8 @@ describe MarkovDictionary do
30
24
  end
31
25
 
32
26
  it "can add a word to the dictionary" do
33
- @onedict.add_word("to", "be")
34
- @onedict.dictionary.should include("to" => {"be" => 1})
27
+ @onedict.add_word(["to"], "be")
28
+ @onedict.dictionary.should include(["to"] => ["be"])
35
29
  end
36
30
 
37
31
  it "create a dictionary via parsing a text file" do
@@ -41,7 +35,7 @@ describe MarkovDictionary do
41
35
  end
42
36
 
43
37
  it "builds a one word dictionary properly" do
44
- @onedict.dictionary.should eql(@onestringdict)
38
+ @onedict.dictionary.should eql(@onetextdict)
45
39
  end
46
40
  end
47
41
 
@@ -50,21 +44,16 @@ describe MarkovDictionary do
50
44
  @twodict = MarkovDictionary.new
51
45
  @twodict.parse_source("The cat likes pie and chainsaws", false)
52
46
  @twotextsource = "spec/test.txt"
53
- @twostringdict = { "The cat" => { "likes" => 1},
54
- "cat likes" => { "pie" => 1 },
55
- "likes pie" => {"and" => 1 },
56
- "pie and" => { "chainsaws" => 1 },
57
- "and chainsaws" => {} }
58
- @twotextdict = {"The cat" => {"likes" => 1},
59
- "cat likes" => {"pie" => 1},
60
- "likes pie" => {"and" => 1},
61
- "pie and" => {"chainsaws" => 1},
62
- "and chainsaws" => {}}
47
+ @twotextdict = {["The", "cat"] => ["likes"],
48
+ ["and", "chainsaws"] => [],
49
+ ["cat", "likes"] => ["pie"],
50
+ ["likes", "pie"] => ["and"],
51
+ ["pie", "and"] => ["chainsaws"]}
63
52
  end
64
53
 
65
54
  it "can add a word to the two-word dictionary" do
66
- @twodict.add_word("Zebras like", "kung-fu")
67
- @twodict.dictionary.should eql(@twostringdict.merge( {"Zebras like" => {"kung-fu" => 1}} ))
55
+ @twodict.add_word(["Zebras", "like"], "kung-fu")
56
+ @twodict.dictionary.should eql(@twotextdict.merge( {["Zebras", "like"] => ["kung-fu"]} ))
68
57
  end
69
58
 
70
59
  it "create a two-word dictionary via parsing a text file" do
@@ -1 +1,35 @@
1
- 2{"The cat":{"likes":1},"cat likes":{"pie":1},"likes pie":{"and":1},"pie and":{"chainsaws":1},"and chainsaws":{}}
1
+ 2<h>
2
+ <a>
3
+ <s>The</s>
4
+ <s>cat</s>
5
+ </a>
6
+ <a>
7
+ <s>likes</s>
8
+ </a>
9
+ <a>
10
+ <s>cat</s>
11
+ <s>likes</s>
12
+ </a>
13
+ <a>
14
+ <s>pie</s>
15
+ </a>
16
+ <a>
17
+ <s>likes</s>
18
+ <s>pie</s>
19
+ </a>
20
+ <a>
21
+ <s>and</s>
22
+ </a>
23
+ <a>
24
+ <s>pie</s>
25
+ <s>and</s>
26
+ </a>
27
+ <a>
28
+ <s>chainsaws</s>
29
+ </a>
30
+ <a>
31
+ <s>and</s>
32
+ <s>chainsaws</s>
33
+ </a>
34
+ <a/>
35
+ </h>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marky_markov
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,22 +9,19 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-08 00:00:00.000000000 Z
12
+ date: 2012-02-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: yajl-ruby
16
- requirement: &70254574670860 !ruby/object:Gem::Requirement
15
+ name: ox
16
+ requirement: &70184695396840 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
- - - ! '>='
19
+ - - ~>
20
20
  - !ruby/object:Gem::Version
21
- version: 1.1.0
22
- - - <
23
- - !ruby/object:Gem::Version
24
- version: 2.0.0
21
+ version: '1.4'
25
22
  type: :runtime
26
23
  prerelease: false
27
- version_requirements: *70254574670860
24
+ version_requirements: *70184695396840
28
25
  description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
29
26
  input from\n either a source file or a string. While usable as a module in your
30
27
  code it can also be called on\n from the command line and piped into like a standard
@@ -42,6 +39,7 @@ files:
42
39
  - lib/marky_markov/markov_dictionary.rb
43
40
  - lib/marky_markov/markov_sentence_generator.rb
44
41
  - lib/marky_markov/persistent_dictionary.rb
42
+ - marky_markov.gemspec
45
43
  - spec/marky_markov/markov_dictionary_spec.rb
46
44
  - spec/marky_markov/markov_sentence_generator_spec.rb
47
45
  - spec/marky_markov/persistent_dictionary_spec.rb