marky_markov 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -4,6 +4,12 @@ Marky Markov and the Funky Sentences
4
4
  Marky Markov is a naïve experiment in Markov Chain generation implemented
5
5
  in Ruby. It can be used both from the command-line and as a library within your code.
6
6
 
7
+ NOTE: 0.3.0 now uses arrays with multiple entries per word instead of a
8
+ hash key for each word with the value representing number of occurences.
9
+ While a less elegant solution, it leads to faster text generation. We
10
+ are also now using msgpack instead of yajl-json to store the dictionary
11
+ which should lead to faster usage in the command-line app.
12
+
7
13
  NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
8
14
  ability to generate proper sentences (generate_n_sentences) instead of simply a
9
15
  maximum number of words. The command-line app has changed to sentences as its default
@@ -46,9 +52,9 @@ number in there, well, you can!
46
52
  markov.generate_20_words
47
53
 
48
54
  The default dictionary depth is two words.
49
- `{"I hope" => {"this" => 1},
50
- "hope this" => {"makes" => 1},
51
- "this makes" => {"sense" => 1}}`
55
+ `{["I", "hope"] => ["this"],
56
+ ["hope", "this"] => ["makes"],
57
+ ["this", "makes"] => ["sense"]}`
52
58
  but it can be set to a depth between 1 and 5 upon dictionary creation,
53
59
  though really any higher than 3 and it starts to simply print passages
54
60
  from the source text.
@@ -56,8 +62,8 @@ from the source text.
56
62
  markov = MarkyMarkov::Dictionary.new('dictionary', 3)
57
63
 
58
64
  creates a dictionary with a depth of three words.
59
- `{"I hope this" => {"makes" => 1},
60
- "hope this makes" => {"sense" => 1}`
65
+ `{["I", "hope", "this"] => ["makes"],
66
+ ["hope", "this", "makes"] => ["sense"]`
61
67
 
62
68
  If you want to delete a dictionary you call it upon the Dictionary class itself while
63
69
  passing in the filename/location.
data/lib/marky_markov.rb CHANGED
@@ -4,12 +4,12 @@
4
4
  require_relative 'marky_markov/persistent_dictionary'
5
5
  require_relative 'marky_markov/markov_sentence_generator'
6
6
 
7
- # @version = 0.2.0
7
+ # @version = 0.3.0
8
8
  # @author Matt Furden
9
9
  # Module containing TemporaryDictionary and Dictionary for creation of
10
10
  # Markov Chain Dictionaries and generating sentences from those dictionaries.
11
11
  module MarkyMarkov
12
- VERSION = '0.2.0'
12
+ VERSION = '0.3.0'
13
13
 
14
14
  class TemporaryDictionary
15
15
  # Create a new Temporary Markov Chain Dictionary and sentence generator for use.
@@ -24,9 +24,8 @@ class MarkovDictionary
24
24
  # @example Adding a word
25
25
  # add_word("Hello", "world")
26
26
  def add_word(rootword, followedby)
27
- @dictionary[rootword] ||= Hash.new(0)
28
- @dictionary[rootword][followedby] ||= 0
29
- @dictionary[rootword][followedby] += 1
27
+ @dictionary[rootword] ||= []
28
+ @dictionary[rootword] << followedby
30
29
  end
31
30
 
32
31
  # Given a source of text, be it a text file (file=true) or a string (file=false)
@@ -39,8 +38,8 @@ class MarkovDictionary
39
38
  def parse_source(source, file=true)
40
39
  contents = file ? open_source(source) : contents = source.split
41
40
  contents.each_cons(@depth+1) do |words|
42
- self.add_word(words[0..-2].join(' '), words[-1])
41
+ self.add_word(words[0..-2], words[-1])
43
42
  end
44
- @dictionary[contents.last(@depth).join(' ')] ||= Hash.new(0)
43
+ @dictionary[contents.last(@depth)] ||= []
45
44
  end
46
45
  end
@@ -39,16 +39,9 @@ class MarkovSentenceGenerator
39
39
  def weighted_random(lastword)
40
40
  # If word has no words in its dictionary (last word in source text file)
41
41
  # have it pick a random word to display instead.
42
- @dictionary.dictionary.fetch(lastword, random_word)
43
- total = @dictionary.dictionary[lastword].values.inject(:+)
44
- return random_word if total.nil?
45
- random = rand(total)+1
46
- @dictionary.dictionary[lastword].each do |word, occurs|
47
- random -= occurs
48
- if random <= 0
49
- return word
50
- end
51
- end
42
+ if word = @dictionary.dictionary[lastword]
43
+ word.sample
44
+ end
52
45
  end
53
46
 
54
47
  # Generates a sentence of (wordcount) length using the weighted_random function.
@@ -57,9 +50,13 @@ class MarkovSentenceGenerator
57
50
  # @return [String] the words, hopefully forming sentences generated.
58
51
  def generate(wordcount)
59
52
  sentence = []
60
- sentence.concat(random_capitalized_word.split)
53
+ sentence.concat(random_capitalized_word)
61
54
  (wordcount-1).times do
62
- sentence.concat(weighted_random(sentence.last(@depth).join(' ')).split)
55
+ if word = weighted_random(sentence.last(@depth))
56
+ sentence << word
57
+ else
58
+ sentence.concat(random_capitalized_word)
59
+ end
63
60
  end
64
61
  sentence.pop(sentence.length-wordcount)
65
62
  sentence.join(' ')
@@ -77,12 +74,13 @@ class MarkovSentenceGenerator
77
74
  # If less than 30 keys, use that plus five as your maximum sentence length.
78
75
  maximum_length = key_count < 30 ? key_count + 5 : 30
79
76
  stop_at_index = sentence.count + maximum_length
80
- sentence.concat(random_capitalized_word.split)
77
+ sentence.concat(random_capitalized_word)
81
78
  until (/[.!?]/ =~ sentence.last[-1])
82
- sentence.concat(weighted_random(sentence.last(@depth).join(' ')).split)
79
+ word = weighted_random(sentence.last(@depth))
80
+ sentence << word unless word.nil?
83
81
  # If a word ending with a . ! or ? isn't found after 30 words,
84
82
  # just add a period as there may be none in the dictionary.
85
- sentence[-1] << "." if sentence.count > stop_at_index
83
+ sentence[-1] << "." if word.nil? || sentence.count > stop_at_index
86
84
  end
87
85
  end
88
86
  sentence.join(' ')
@@ -1,4 +1,4 @@
1
- require 'yajl'
1
+ require 'ox'
2
2
  require_relative 'markov_dictionary'
3
3
 
4
4
  # @private
@@ -28,10 +28,9 @@ class PersistentDictionary < MarkovDictionary
28
28
  # otherwise it creates an empty hash.
29
29
  def open_dictionary
30
30
  if File.exists?(@dictionarylocation)
31
- File.open(@dictionarylocation,'r').each do |f|
32
- @depth = f[0].to_i
33
- @dictionary = Yajl::Parser.parse(f[1..-1])
34
- end
31
+ file = File.new(@dictionarylocation, 'r').read
32
+ @depth = file[0].to_i
33
+ @dictionary = Ox.parse_obj(file[1..-1])
35
34
  else
36
35
  @dictionary = {}
37
36
  end
@@ -40,9 +39,9 @@ class PersistentDictionary < MarkovDictionary
40
39
  # Saves the PersistentDictionary objects @dictionary hash
41
40
  # to disk in JSON format.
42
41
  def save_dictionary!
43
- json = Yajl::Encoder.encode(@dictionary)
42
+ packed = Ox.dump(@dictionary)
44
43
  File.open(@dictionarylocation, 'w') do |f|
45
- f.puts @depth.to_s + json
44
+ f.write @depth.to_s + packed
46
45
  end
47
46
  true
48
47
  end
@@ -0,0 +1,82 @@
1
+ ## This is the rakegem gemspec template. Make sure you read and understand
2
+ ## all of the comments. Some sections require modification, and others can
3
+ ## be deleted if you don't need them. Once you understand the contents of
4
+ ## this file, feel free to delete any comments that begin with two hash marks.
5
+ ## You can find comprehensive Gem::Specification documentation, at
6
+ ## http://docs.rubygems.org/read/chapter/20
7
+ Gem::Specification.new do |s|
8
+ s.specification_version = 2 if s.respond_to? :specification_version=
9
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
10
+ s.rubygems_version = '1.3.5'
11
+
12
+ ## Leave these as is they will be modified for you by the rake gemspec task.
13
+ ## If your rubyforge_project name is different, then edit it and comment out
14
+ ## the sub! line in the Rakefile
15
+ s.name = 'marky_markov'
16
+ s.version = '0.3.0'
17
+ s.date = '2012-02-09'
18
+ s.rubyforge_project = 'marky_markov'
19
+
20
+ ## Make sure your summary is short. The description may be as long
21
+ ## as you like.
22
+ s.summary = "Simple Markov Chain generation available in the command-line"
23
+ s.description = "MarkyMarkov makes it easy to generate simply Markov Chains based upon input from
24
+ either a source file or a string. While usable as a module in your code it can also be called on
25
+ from the command line and piped into like a standard UNIX application."
26
+
27
+ ## List the primary authors. If there are a bunch of authors, it's probably
28
+ ## better to set the email to an email list or something. If you don't have
29
+ ## a custom homepage, consider using your GitHub URL or the like.
30
+ s.authors = ["Matt Furden"]
31
+ s.email = 'mfurden@gmail.com'
32
+ s.homepage = 'http://www.thefurd.com'
33
+
34
+ ## This gets added to the $LOAD_PATH so that 'lib/NAME.rb' can be required as
35
+ ## require 'NAME.rb' or'/lib/NAME/file.rb' can be as require 'NAME/file.rb'
36
+ s.require_paths = %w[lib]
37
+
38
+ ## This sections is only necessary if you have C extensions.
39
+ #s.require_paths << 'ext'
40
+ #s.extensions = %w[ext/extconf.rb]
41
+
42
+ ## If your gem includes any executables, list them here.
43
+ s.executables = ["marky_markov"]
44
+
45
+ ## Specify any RDoc options here. You'll want to add your README and
46
+ ## LICENSE files to the extra_rdoc_files list.
47
+ s.rdoc_options = ["--charset=UTF-8"]
48
+ s.extra_rdoc_files = %w[README.md]
49
+
50
+ ## List your runtime dependencies here. Runtime dependencies are those
51
+ ## that are needed for an end user to actually USE your code.
52
+ s.add_dependency('ox', "~> 1.4")
53
+
54
+ ## List your development dependencies here. Development dependencies are
55
+ ## those that are only needed during development
56
+ #s.add_development_dependency('DEVDEPNAME', [">= 1.1.0", "< 2.0.0"])
57
+
58
+ ## Leave this section as-is. It will be automatically generated from the
59
+ ## contents of your Git repository via the gemspec task. DO NOT REMOVE
60
+ ## THE MANIFEST COMMENTS, they are used as delimiters by the task.
61
+ # = MANIFEST =
62
+ s.files = %w[
63
+ README.md
64
+ bin/marky_markov
65
+ lib/marky_markov.rb
66
+ lib/marky_markov/markov_dictionary.rb
67
+ lib/marky_markov/markov_sentence_generator.rb
68
+ lib/marky_markov/persistent_dictionary.rb
69
+ marky_markov.gemspec
70
+ spec/marky_markov/markov_dictionary_spec.rb
71
+ spec/marky_markov/markov_sentence_generator_spec.rb
72
+ spec/marky_markov/persistent_dictionary_spec.rb
73
+ spec/spec_helper.rb
74
+ spec/test.txt
75
+ spec/textdictcompare.mmd
76
+ ]
77
+ # = MANIFEST =
78
+
79
+ ## Test files will be grabbed from the file list. Make sure the path glob
80
+ ## matches what you actually use.
81
+ s.test_files = s.files.select { |path| path =~ /^test\/test_.*\.rb/ }
82
+ end
@@ -5,19 +5,13 @@ describe MarkovDictionary do
5
5
  before(:each) do
6
6
  @onetextsource = "spec/test.txt"
7
7
  @onedict = MarkovDictionary.new(1)
8
- @onedict.parse_source("Hello how are you doing today", false)
9
- @onestringdict = {"Hello" => {"how" => 1},
10
- "how" => {"are" => 1},
11
- "are" => {"you" => 1},
12
- "you" => {"doing" => 1},
13
- "doing" => {"today" => 1},
14
- "today" => {} }
15
- @onetextdict = {"The" => {"cat"=>1},
16
- "and" => {"chainsaws"=>1},
17
- "cat" => {"likes"=>1},
18
- "chainsaws" => {},
19
- "likes" => {"pie"=>1},
20
- "pie" => {"and"=>1} }
8
+ @onedict.parse_source("The cat likes pie and chainsaws", false)
9
+ @onetextdict = { ["The"] => ["cat"],
10
+ ["cat"] => ["likes"],
11
+ ["likes"] => ["pie"],
12
+ ["pie"] => ["and"],
13
+ ["and"] => ["chainsaws"],
14
+ ["chainsaws"] => []}
21
15
  end
22
16
 
23
17
  it "can open a file" do
@@ -30,8 +24,8 @@ describe MarkovDictionary do
30
24
  end
31
25
 
32
26
  it "can add a word to the dictionary" do
33
- @onedict.add_word("to", "be")
34
- @onedict.dictionary.should include("to" => {"be" => 1})
27
+ @onedict.add_word(["to"], "be")
28
+ @onedict.dictionary.should include(["to"] => ["be"])
35
29
  end
36
30
 
37
31
  it "create a dictionary via parsing a text file" do
@@ -41,7 +35,7 @@ describe MarkovDictionary do
41
35
  end
42
36
 
43
37
  it "builds a one word dictionary properly" do
44
- @onedict.dictionary.should eql(@onestringdict)
38
+ @onedict.dictionary.should eql(@onetextdict)
45
39
  end
46
40
  end
47
41
 
@@ -50,21 +44,16 @@ describe MarkovDictionary do
50
44
  @twodict = MarkovDictionary.new
51
45
  @twodict.parse_source("The cat likes pie and chainsaws", false)
52
46
  @twotextsource = "spec/test.txt"
53
- @twostringdict = { "The cat" => { "likes" => 1},
54
- "cat likes" => { "pie" => 1 },
55
- "likes pie" => {"and" => 1 },
56
- "pie and" => { "chainsaws" => 1 },
57
- "and chainsaws" => {} }
58
- @twotextdict = {"The cat" => {"likes" => 1},
59
- "cat likes" => {"pie" => 1},
60
- "likes pie" => {"and" => 1},
61
- "pie and" => {"chainsaws" => 1},
62
- "and chainsaws" => {}}
47
+ @twotextdict = {["The", "cat"] => ["likes"],
48
+ ["and", "chainsaws"] => [],
49
+ ["cat", "likes"] => ["pie"],
50
+ ["likes", "pie"] => ["and"],
51
+ ["pie", "and"] => ["chainsaws"]}
63
52
  end
64
53
 
65
54
  it "can add a word to the two-word dictionary" do
66
- @twodict.add_word("Zebras like", "kung-fu")
67
- @twodict.dictionary.should eql(@twostringdict.merge( {"Zebras like" => {"kung-fu" => 1}} ))
55
+ @twodict.add_word(["Zebras", "like"], "kung-fu")
56
+ @twodict.dictionary.should eql(@twotextdict.merge( {["Zebras", "like"] => ["kung-fu"]} ))
68
57
  end
69
58
 
70
59
  it "create a two-word dictionary via parsing a text file" do
@@ -1 +1,35 @@
1
- 2{"The cat":{"likes":1},"cat likes":{"pie":1},"likes pie":{"and":1},"pie and":{"chainsaws":1},"and chainsaws":{}}
1
+ 2<h>
2
+ <a>
3
+ <s>The</s>
4
+ <s>cat</s>
5
+ </a>
6
+ <a>
7
+ <s>likes</s>
8
+ </a>
9
+ <a>
10
+ <s>cat</s>
11
+ <s>likes</s>
12
+ </a>
13
+ <a>
14
+ <s>pie</s>
15
+ </a>
16
+ <a>
17
+ <s>likes</s>
18
+ <s>pie</s>
19
+ </a>
20
+ <a>
21
+ <s>and</s>
22
+ </a>
23
+ <a>
24
+ <s>pie</s>
25
+ <s>and</s>
26
+ </a>
27
+ <a>
28
+ <s>chainsaws</s>
29
+ </a>
30
+ <a>
31
+ <s>and</s>
32
+ <s>chainsaws</s>
33
+ </a>
34
+ <a/>
35
+ </h>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marky_markov
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,22 +9,19 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-08 00:00:00.000000000 Z
12
+ date: 2012-02-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: yajl-ruby
16
- requirement: &70254574670860 !ruby/object:Gem::Requirement
15
+ name: ox
16
+ requirement: &70184695396840 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
- - - ! '>='
19
+ - - ~>
20
20
  - !ruby/object:Gem::Version
21
- version: 1.1.0
22
- - - <
23
- - !ruby/object:Gem::Version
24
- version: 2.0.0
21
+ version: '1.4'
25
22
  type: :runtime
26
23
  prerelease: false
27
- version_requirements: *70254574670860
24
+ version_requirements: *70184695396840
28
25
  description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
29
26
  input from\n either a source file or a string. While usable as a module in your
30
27
  code it can also be called on\n from the command line and piped into like a standard
@@ -42,6 +39,7 @@ files:
42
39
  - lib/marky_markov/markov_dictionary.rb
43
40
  - lib/marky_markov/markov_sentence_generator.rb
44
41
  - lib/marky_markov/persistent_dictionary.rb
42
+ - marky_markov.gemspec
45
43
  - spec/marky_markov/markov_dictionary_spec.rb
46
44
  - spec/marky_markov/markov_sentence_generator_spec.rb
47
45
  - spec/marky_markov/persistent_dictionary_spec.rb