marky_markov 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -7,8 +7,8 @@ in Ruby. It can be used both from the command-line and as a library within your
7
7
  NOTE: 0.3.0 now uses arrays with multiple entries per word instead of a
8
8
  hash key for each word with the value representing number of occurences.
9
9
  While a less elegant solution, it leads to faster text generation. We
10
- are also now using msgpack instead of yajl-json to store the dictionary
11
- which should lead to faster usage in the command-line app.
10
+ are also now using ox instead of yajl-json to store the dictionary
11
+ as yajl-json does not appear to support arrays within hashes properly.
12
12
 
13
13
  NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
14
14
  ability to generate proper sentences (generate_n_sentences) instead of simply a
data/lib/marky_markov.rb CHANGED
@@ -4,12 +4,12 @@
4
4
  require_relative 'marky_markov/persistent_dictionary'
5
5
  require_relative 'marky_markov/markov_sentence_generator'
6
6
 
7
- # @version = 0.3.0
7
+ # @version = 0.3.2
8
8
  # @author Matt Furden
9
9
  # Module containing TemporaryDictionary and Dictionary for creation of
10
10
  # Markov Chain Dictionaries and generating sentences from those dictionaries.
11
11
  module MarkyMarkov
12
- VERSION = '0.3.0'
12
+ VERSION = '0.3.2'
13
13
 
14
14
  class TemporaryDictionary
15
15
  # Create a new Temporary Markov Chain Dictionary and sentence generator for use.
@@ -26,6 +26,12 @@ module MarkyMarkov
26
26
  @sentence = MarkovSentenceGenerator.new(@dictionary)
27
27
  end
28
28
 
29
+ # Returns the MarkovDictionary objects dictionary hash.
30
+ # @return [Hash] the MarkovDictionary hash.
31
+ def dictionary
32
+ @dictionary.dictionary
33
+ end
34
+
29
35
  # Parses a given file and adds the sentences it contains to the current dictionary.
30
36
  #
31
37
  # @example Open a text file and add its contents to the dictionary.
@@ -93,13 +99,13 @@ module MarkyMarkov
93
99
  end
94
100
 
95
101
  # @since 0.1.4
96
- # Modify respond_to? to include generate_n_words and generate_n_sentences
102
+ # Modify respond_to_missing? to include generate_n_words and generate_n_sentences
97
103
  # method_missing implementation.
98
- def respond_to?(method_sym, include_private = false)
104
+ def respond_to_missing?(method_sym, include_private)
99
105
  if method_sym.to_s =~ /^generate_(\d*)_word[s]*$/
100
- generate_n_words($1.to_i)
106
+ true
101
107
  elsif method_sym.to_s =~ /^generate_(\d*)_sentence[s]*$/
102
- generate_n_sentences($1.to_i)
108
+ true
103
109
  else
104
110
  super
105
111
  end
@@ -1,19 +1,20 @@
1
1
  # @private
2
- class MarkovDictionary
3
- attr_accessor :dictionary, :depth
4
- def initialize(depth=2)
5
- @dictionary = {}
2
+ class MarkovDictionary # :nodoc:
3
+ attr_reader :dictionary, :depth
4
+ def initialize(depth=2) @dictionary = {}
6
5
  @depth = depth
6
+ @split_words = /([.?!])|[\s]+/
7
+ @split_sentence = /(?<=[.!?])\s+/
7
8
  end
8
9
 
9
10
  # If File does not exist.
10
- class FileNotFoundError < Exception
11
+ class FileNotFoundError < Exception # :nodoc:
11
12
  end
12
13
 
13
14
  # Open supplied text file:
14
15
  def open_source(source)
15
16
  if File.exists?(source)
16
- File.open(source, "r").read.split
17
+ File.open(source, "r").read.split(@split_sentence)
17
18
  else
18
19
  raise FileNotFoundError.new("#{source} does not exist!")
19
20
  end
@@ -36,10 +37,12 @@ class MarkovDictionary
36
37
  # @example Add a string
37
38
  # parse_source("Hi, how are you doing?", false)
38
39
  def parse_source(source, file=true)
39
- contents = file ? open_source(source) : contents = source.split
40
- contents.each_cons(@depth+1) do |words|
41
- self.add_word(words[0..-2], words[-1])
40
+ contents = file ? open_source(source) : contents = source.split(@split_sentence)
41
+ contents.map! {|sentence| sentence.gsub(/["()]/,"")}
42
+ contents.each do |sentence|
43
+ sentence.split(@split_words).each_cons(@depth+1) do |words|
44
+ self.add_word(words[0..-2], words[-1])
45
+ end
42
46
  end
43
- @dictionary[contents.last(@depth)] ||= []
44
47
  end
45
48
  end
@@ -1,47 +1,67 @@
1
+ # Messing about with the NullObject pattern, can't apply it in too many
2
+ # places in this one. Need to evaluate what else could be used in this
3
+ # aside from my first instinct of defaulting to []
1
4
  # @private
2
- class MarkovSentenceGenerator
5
+ class NullObject # :nodoc:
6
+ def method_missing (*args, &block)
7
+ self
8
+ end
9
+ def nil?; true; end
10
+ def <<(*); end
11
+ def to_str; end
12
+ def to_ary; []; end
13
+ end
14
+
15
+ # @private
16
+ NULL_OBJECT = NullObject.new # :nodoc:
17
+
18
+ # @private
19
+ class MarkovSentenceGenerator # :nodoc:
3
20
  def initialize(dictionary)
4
21
  @dictionary = dictionary
5
22
  @depth = @dictionary.depth
6
23
  end
7
24
 
8
- # Returns a random word via picking a random key from the dictionary.
25
+ # Returns a random word vsjfia picking a random key from the dictionary.
9
26
  # In the case of the TwoWordDictionary, it returns two words to ensure
10
27
  # that the sentence will have a valid two word string to pick the next
11
28
  # word from.
12
- #
29
+ # wordslength
13
30
  # @return [String] a string containing a random dictionary key.
14
31
  def random_word
15
- keys = @dictionary.dictionary.keys
16
- keys[rand(keys.length)]
32
+ words = @dictionary.dictionary.keys
33
+ words[rand(words.length)]
17
34
  end
18
35
 
19
36
  # Generates a random capitalized word via picking a random key from the
20
37
  # dictionary and recurring if the word is lowercase.
21
38
  #
22
39
  # (see #random_word)
23
- def random_capitalized_word(attempts=0)
24
- keys = @dictionary.dictionary.keys
25
- x = keys[rand(keys.length)]
26
- if /[A-Z]/ =~ x[0]
27
- return x
28
- elsif attempts < 30
29
- # If you don't find a capitalized word after 30 attempts, just use
30
- # a lowercase word as there may be no capitals in the dicationary.
31
- random_capitalized_word(attempts+1)
32
- else
33
- random_word
40
+ def random_capitalized_word
41
+ attempts = 0
42
+ # If you don't find a capitalized word after 15 attempts, just use
43
+ # a lowercase word as there may be no capitals in the dicationary.
44
+ until attempts > 15
45
+ attempts += 1
46
+ words = @dictionary.dictionary.keys
47
+ random_choice = words[rand(words.length)]
48
+ if random_choice[0] =~ /[A-Z]/
49
+ return random_choice
50
+ end
34
51
  end
52
+ random_word
35
53
  end
36
54
 
37
- # Returns a word based upon the likelyhood of it appearing after the supplied word.
38
- #
55
+ # Returns a word based upon the likelihood of it appearing after the supplied word.
56
+ #
39
57
  def weighted_random(lastword)
40
58
  # If word has no words in its dictionary (last word in source text file)
41
59
  # have it pick a random word to display instead.
42
- if word = @dictionary.dictionary[lastword]
43
- word.sample
44
- end
60
+ @dictionary.dictionary.fetch(lastword, NULL_OBJECT).sample
61
+ end
62
+
63
+ def punctuation?(word)
64
+ word =~ /[.!?]/
45
65
  end
46
66
 
47
67
  # Generates a sentence of (wordcount) length using the weighted_random function.
@@ -52,10 +72,13 @@ class MarkovSentenceGenerator
52
72
  sentence = []
53
73
  sentence.concat(random_capitalized_word)
54
74
  (wordcount-1).times do
55
- if word = weighted_random(sentence.last(@depth))
56
- sentence << word
57
- else
75
+ word = weighted_random(sentence.last(@depth))
76
+ if punctuation?(word[0])
77
+ sentence[-1] = sentence.last.dup << word
78
+ elsif word.nil?
58
79
  sentence.concat(random_capitalized_word)
80
+ else
81
+ sentence << word
59
82
  end
60
83
  end
61
84
  sentence.pop(sentence.length-wordcount)
@@ -68,19 +91,21 @@ class MarkovSentenceGenerator
68
91
  # @return [String] the sentence(s) generated.
69
92
  def generate_sentence(sentencecount)
70
93
  sentence = []
94
+ # Find out how many actual keys are in the dictionary.
95
+ key_count = @dictionary.dictionary.keys.length
96
+ # If less than 30 keys, use that plus five as your maximum sentence length.
97
+ maximum_length = key_count < 30 ? key_count + 5 : 30
71
98
  sentencecount.times do
72
- # Find out how many actual keys are in the dictionary.
73
- key_count = @dictionary.dictionary.keys.length
74
- # If less than 30 keys, use that plus five as your maximum sentence length.
75
- maximum_length = key_count < 30 ? key_count + 5 : 30
76
- stop_at_index = sentence.count + maximum_length
99
+ wordcount = 0
77
100
  sentence.concat(random_capitalized_word)
78
- until (/[.!?]/ =~ sentence.last[-1])
101
+ until (punctuation?(sentence.last[-1])) || wordcount > maximum_length
102
+ wordcount += 1
79
103
  word = weighted_random(sentence.last(@depth))
80
- sentence << word unless word.nil?
81
- # If a word ending with a . ! or ? isn't found after 30 words,
82
- # just add a period as there may be none in the dictionary.
83
- sentence[-1] << "." if word.nil? || sentence.count > stop_at_index
104
+ if punctuation?(word)
105
+ sentence[-1] = sentence.last.dup << word
106
+ else
107
+ sentence << word
108
+ end
84
109
  end
85
110
  end
86
111
  sentence.join(' ')
@@ -2,9 +2,9 @@ require 'ox'
2
2
  require_relative 'markov_dictionary'
3
3
 
4
4
  # @private
5
- class PersistentDictionary < MarkovDictionary
5
+ class PersistentDictionary < MarkovDictionary # :nodoc:
6
6
 
7
- class DepthNotInRangeError < Exception
7
+ class DepthNotInRangeError < Exception # :nodoc:
8
8
  end
9
9
 
10
10
  # Creates a PersistentDictionary object using the supplied dictionary file.
@@ -19,6 +19,8 @@ class PersistentDictionary < MarkovDictionary
19
19
  raise DepthNotInRangeError.new("Depth must be between 1 and 5")
20
20
  end
21
21
  @dictionarylocation = dictionary
22
+ @split_words = /([.?!])|[\s]+/
23
+ @split_sentence = /(?<=[.!?])\s+/
22
24
  self.open_dictionary
23
25
  end
24
26
 
data/marky_markov.gemspec CHANGED
@@ -13,8 +13,8 @@ Gem::Specification.new do |s|
13
13
  ## If your rubyforge_project name is different, then edit it and comment out
14
14
  ## the sub! line in the Rakefile
15
15
  s.name = 'marky_markov'
16
- s.version = '0.3.0'
17
- s.date = '2012-02-09'
16
+ s.version = '0.3.2'
17
+ s.date = '2012-02-12'
18
18
  s.rubyforge_project = 'marky_markov'
19
19
 
20
20
  ## Make sure your summary is short. The description may be as long
@@ -69,6 +69,7 @@ Gem::Specification.new do |s|
69
69
  marky_markov.gemspec
70
70
  spec/marky_markov/markov_dictionary_spec.rb
71
71
  spec/marky_markov/markov_sentence_generator_spec.rb
72
+ spec/marky_markov/marky_markov_spec.rb
72
73
  spec/marky_markov/persistent_dictionary_spec.rb
73
74
  spec/spec_helper.rb
74
75
  spec/test.txt
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+
3
+ describe MarkyMarkov do
4
+ context "TemporaryDictionary" do
5
+ before(:each) do
6
+ @textsource = "spec/test.txt"
7
+ @dictionary = MarkyMarkov::TemporaryDictionary.new
8
+ @onedictcompare = { ["The"] => ["cat"],
9
+ ["cat"] => ["likes"],
10
+ ["likes"] => ["pie"],
11
+ ["pie"] => ["and"],
12
+ ["and"] => ["chainsaws"],
13
+ ["chainsaws"] => []}
14
+ @twodictcompare = {["The", "cat"] => ["likes"],
15
+ ["and", "chainsaws"] => [],
16
+ ["cat", "likes"] => ["pie"],
17
+ ["likes", "pie"] => ["and"],
18
+ ["pie", "and"] => ["chainsaws"]}
19
+ end
20
+
21
+ it "should be able to parse a string" do
22
+ @dictionary.parse_string "The cat likes pie and chainsaws"
23
+ @dictionary.dictionary.should eql(@twodictcompare)
24
+ end
25
+
26
+ it "should generate the right number of sentences" do
27
+ end
28
+
29
+ it "should create the right number of words" do
30
+ end
31
+ end
32
+
33
+ context "PersistentDictionary" do
34
+ before(:each) do
35
+ @textsource = "spec/test.txt"
36
+ @dictionary = MarkyMarkov::Dictionary.new(@textsource)
37
+ @onedictcompare = { ["The"] => ["cat"],
38
+ ["cat"] => ["likes"],
39
+ ["likes"] => ["pie"],
40
+ ["pie"] => ["and"],
41
+ ["and"] => ["chainsaws"],
42
+ ["chainsaws"] => []}
43
+ @twodictcompare = {["The", "cat"] => ["likes"],
44
+ ["and", "chainsaws"] => [],
45
+ ["cat", "likes"] => ["pie"],
46
+ ["likes", "pie"] => ["and"],
47
+ ["pie", "and"] => ["chainsaws"]}
48
+ end
49
+
50
+ it "should load the saved dictionary" do
51
+ end
52
+
53
+ it "should be able to parse a string" do
54
+ @dictionary.parse_string "The cat likes pie and chainsaws"
55
+ @dictionary.dictionary.should include(@twodictcompare)
56
+ end
57
+
58
+ it "should generate the right number of sentences" do
59
+ end
60
+
61
+ it "should create the right number of words" do
62
+ end
63
+ end
64
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marky_markov
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-09 00:00:00.000000000 Z
12
+ date: 2012-02-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ox
16
- requirement: &70184695396840 !ruby/object:Gem::Requirement
16
+ requirement: &70164029504260 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '1.4'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70184695396840
24
+ version_requirements: *70164029504260
25
25
  description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
26
26
  input from\n either a source file or a string. While usable as a module in your
27
27
  code it can also be called on\n from the command line and piped into like a standard
@@ -42,6 +42,7 @@ files:
42
42
  - marky_markov.gemspec
43
43
  - spec/marky_markov/markov_dictionary_spec.rb
44
44
  - spec/marky_markov/markov_sentence_generator_spec.rb
45
+ - spec/marky_markov/marky_markov_spec.rb
45
46
  - spec/marky_markov/persistent_dictionary_spec.rb
46
47
  - spec/spec_helper.rb
47
48
  - spec/test.txt