marky_markov 0.3.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -7,8 +7,8 @@ in Ruby. It can be used both from the command-line and as a library within your
7
7
  NOTE: 0.3.0 now uses arrays with multiple entries per word instead of a
8
8
  hash key for each word with the value representing number of occurences.
9
9
  While a less elegant solution, it leads to faster text generation. We
10
- are also now using msgpack instead of yajl-json to store the dictionary
11
- which should lead to faster usage in the command-line app.
10
+ are also now using ox instead of yajl-json to store the dictionary
11
+ as yajl-json does not appear to support arrays within hashes properly.
12
12
 
13
13
  NOTE: In the transition between 0.1.3 to 0.2.0 MarkyMarkov has added the
14
14
  ability to generate proper sentences (generate_n_sentences) instead of simply a
data/lib/marky_markov.rb CHANGED
@@ -4,12 +4,12 @@
4
4
  require_relative 'marky_markov/persistent_dictionary'
5
5
  require_relative 'marky_markov/markov_sentence_generator'
6
6
 
7
- # @version = 0.3.0
7
+ # @version = 0.3.2
8
8
  # @author Matt Furden
9
9
  # Module containing TemporaryDictionary and Dictionary for creation of
10
10
  # Markov Chain Dictionaries and generating sentences from those dictionaries.
11
11
  module MarkyMarkov
12
- VERSION = '0.3.0'
12
+ VERSION = '0.3.2'
13
13
 
14
14
  class TemporaryDictionary
15
15
  # Create a new Temporary Markov Chain Dictionary and sentence generator for use.
@@ -26,6 +26,12 @@ module MarkyMarkov
26
26
  @sentence = MarkovSentenceGenerator.new(@dictionary)
27
27
  end
28
28
 
29
+ # Returns the MarkovDictionary objects dictionary hash.
30
+ # @return [Hash] the MarkovDictionary hash.
31
+ def dictionary
32
+ @dictionary.dictionary
33
+ end
34
+
29
35
  # Parses a given file and adds the sentences it contains to the current dictionary.
30
36
  #
31
37
  # @example Open a text file and add its contents to the dictionary.
@@ -93,13 +99,13 @@ module MarkyMarkov
93
99
  end
94
100
 
95
101
  # @since 0.1.4
96
- # Modify respond_to? to include generate_n_words and generate_n_sentences
102
+ # Modify respond_to_missing? to include generate_n_words and generate_n_sentences
97
103
  # method_missing implementation.
98
- def respond_to?(method_sym, include_private = false)
104
+ def respond_to_missing?(method_sym, include_private)
99
105
  if method_sym.to_s =~ /^generate_(\d*)_word[s]*$/
100
- generate_n_words($1.to_i)
106
+ true
101
107
  elsif method_sym.to_s =~ /^generate_(\d*)_sentence[s]*$/
102
- generate_n_sentences($1.to_i)
108
+ true
103
109
  else
104
110
  super
105
111
  end
@@ -1,19 +1,20 @@
1
1
  # @private
2
- class MarkovDictionary
3
- attr_accessor :dictionary, :depth
4
- def initialize(depth=2)
5
- @dictionary = {}
2
+ class MarkovDictionary # :nodoc:
3
+ attr_reader :dictionary, :depth
4
+ def initialize(depth=2) @dictionary = {}
6
5
  @depth = depth
6
+ @split_words = /([.?!])|[\s]+/
7
+ @split_sentence = /(?<=[.!?])\s+/
7
8
  end
8
9
 
9
10
  # If File does not exist.
10
- class FileNotFoundError < Exception
11
+ class FileNotFoundError < Exception # :nodoc:
11
12
  end
12
13
 
13
14
  # Open supplied text file:
14
15
  def open_source(source)
15
16
  if File.exists?(source)
16
- File.open(source, "r").read.split
17
+ File.open(source, "r").read.split(@split_sentence)
17
18
  else
18
19
  raise FileNotFoundError.new("#{source} does not exist!")
19
20
  end
@@ -36,10 +37,12 @@ class MarkovDictionary
36
37
  # @example Add a string
37
38
  # parse_source("Hi, how are you doing?", false)
38
39
  def parse_source(source, file=true)
39
- contents = file ? open_source(source) : contents = source.split
40
- contents.each_cons(@depth+1) do |words|
41
- self.add_word(words[0..-2], words[-1])
40
+ contents = file ? open_source(source) : contents = source.split(@split_sentence)
41
+ contents.map! {|sentence| sentence.gsub(/["()]/,"")}
42
+ contents.each do |sentence|
43
+ sentence.split(@split_words).each_cons(@depth+1) do |words|
44
+ self.add_word(words[0..-2], words[-1])
45
+ end
42
46
  end
43
- @dictionary[contents.last(@depth)] ||= []
44
47
  end
45
48
  end
@@ -1,47 +1,67 @@
1
+ # Messing about with the NullObject pattern, can't apply it in too many
2
+ # places in this one. Need to evaluate what else could be used in this
3
+ # aside from my first instinct of defaulting to []
1
4
  # @private
2
- class MarkovSentenceGenerator
5
+ class NullObject # :nodoc:
6
+ def method_missing (*args, &block)
7
+ self
8
+ end
9
+ def nil?; true; end
10
+ def <<(*); end
11
+ def to_str; end
12
+ def to_ary; []; end
13
+ end
14
+
15
+ # @private
16
+ NULL_OBJECT = NullObject.new # :nodoc:
17
+
18
+ # @private
19
+ class MarkovSentenceGenerator # :nodoc:
3
20
  def initialize(dictionary)
4
21
  @dictionary = dictionary
5
22
  @depth = @dictionary.depth
6
23
  end
7
24
 
8
- # Returns a random word via picking a random key from the dictionary.
25
+ # Returns a random word vsjfia picking a random key from the dictionary.
9
26
  # In the case of the TwoWordDictionary, it returns two words to ensure
10
27
  # that the sentence will have a valid two word string to pick the next
11
28
  # word from.
12
- #
29
+ # wordslength
13
30
  # @return [String] a string containing a random dictionary key.
14
31
  def random_word
15
- keys = @dictionary.dictionary.keys
16
- keys[rand(keys.length)]
32
+ words = @dictionary.dictionary.keys
33
+ words[rand(words.length)]
17
34
  end
18
35
 
19
36
  # Generates a random capitalized word via picking a random key from the
20
37
  # dictionary and recurring if the word is lowercase.
21
38
  #
22
39
  # (see #random_word)
23
- def random_capitalized_word(attempts=0)
24
- keys = @dictionary.dictionary.keys
25
- x = keys[rand(keys.length)]
26
- if /[A-Z]/ =~ x[0]
27
- return x
28
- elsif attempts < 30
29
- # If you don't find a capitalized word after 30 attempts, just use
30
- # a lowercase word as there may be no capitals in the dicationary.
31
- random_capitalized_word(attempts+1)
32
- else
33
- random_word
40
+ def random_capitalized_word
41
+ attempts = 0
42
+ # If you don't find a capitalized word after 15 attempts, just use
43
+ # a lowercase word as there may be no capitals in the dicationary.
44
+ until attempts > 15
45
+ attempts += 1
46
+ words = @dictionary.dictionary.keys
47
+ random_choice = words[rand(words.length)]
48
+ if random_choice[0] =~ /[A-Z]/
49
+ return random_choice
50
+ end
34
51
  end
52
+ random_word
35
53
  end
36
54
 
37
- # Returns a word based upon the likelyhood of it appearing after the supplied word.
38
- #
55
+ # Returns a word based upon the likelihood of it appearing after the supplied word.
56
+ #
39
57
  def weighted_random(lastword)
40
58
  # If word has no words in its dictionary (last word in source text file)
41
59
  # have it pick a random word to display instead.
42
- if word = @dictionary.dictionary[lastword]
43
- word.sample
44
- end
60
+ @dictionary.dictionary.fetch(lastword, NULL_OBJECT).sample
61
+ end
62
+
63
+ def punctuation?(word)
64
+ word =~ /[.!?]/
45
65
  end
46
66
 
47
67
  # Generates a sentence of (wordcount) length using the weighted_random function.
@@ -52,10 +72,13 @@ class MarkovSentenceGenerator
52
72
  sentence = []
53
73
  sentence.concat(random_capitalized_word)
54
74
  (wordcount-1).times do
55
- if word = weighted_random(sentence.last(@depth))
56
- sentence << word
57
- else
75
+ word = weighted_random(sentence.last(@depth))
76
+ if punctuation?(word[0])
77
+ sentence[-1] = sentence.last.dup << word
78
+ elsif word.nil?
58
79
  sentence.concat(random_capitalized_word)
80
+ else
81
+ sentence << word
59
82
  end
60
83
  end
61
84
  sentence.pop(sentence.length-wordcount)
@@ -68,19 +91,21 @@ class MarkovSentenceGenerator
68
91
  # @return [String] the sentence(s) generated.
69
92
  def generate_sentence(sentencecount)
70
93
  sentence = []
94
+ # Find out how many actual keys are in the dictionary.
95
+ key_count = @dictionary.dictionary.keys.length
96
+ # If less than 30 keys, use that plus five as your maximum sentence length.
97
+ maximum_length = key_count < 30 ? key_count + 5 : 30
71
98
  sentencecount.times do
72
- # Find out how many actual keys are in the dictionary.
73
- key_count = @dictionary.dictionary.keys.length
74
- # If less than 30 keys, use that plus five as your maximum sentence length.
75
- maximum_length = key_count < 30 ? key_count + 5 : 30
76
- stop_at_index = sentence.count + maximum_length
99
+ wordcount = 0
77
100
  sentence.concat(random_capitalized_word)
78
- until (/[.!?]/ =~ sentence.last[-1])
101
+ until (punctuation?(sentence.last[-1])) || wordcount > maximum_length
102
+ wordcount += 1
79
103
  word = weighted_random(sentence.last(@depth))
80
- sentence << word unless word.nil?
81
- # If a word ending with a . ! or ? isn't found after 30 words,
82
- # just add a period as there may be none in the dictionary.
83
- sentence[-1] << "." if word.nil? || sentence.count > stop_at_index
104
+ if punctuation?(word)
105
+ sentence[-1] = sentence.last.dup << word
106
+ else
107
+ sentence << word
108
+ end
84
109
  end
85
110
  end
86
111
  sentence.join(' ')
@@ -2,9 +2,9 @@ require 'ox'
2
2
  require_relative 'markov_dictionary'
3
3
 
4
4
  # @private
5
- class PersistentDictionary < MarkovDictionary
5
+ class PersistentDictionary < MarkovDictionary # :nodoc:
6
6
 
7
- class DepthNotInRangeError < Exception
7
+ class DepthNotInRangeError < Exception # :nodoc:
8
8
  end
9
9
 
10
10
  # Creates a PersistentDictionary object using the supplied dictionary file.
@@ -19,6 +19,8 @@ class PersistentDictionary < MarkovDictionary
19
19
  raise DepthNotInRangeError.new("Depth must be between 1 and 5")
20
20
  end
21
21
  @dictionarylocation = dictionary
22
+ @split_words = /([.?!])|[\s]+/
23
+ @split_sentence = /(?<=[.!?])\s+/
22
24
  self.open_dictionary
23
25
  end
24
26
 
data/marky_markov.gemspec CHANGED
@@ -13,8 +13,8 @@ Gem::Specification.new do |s|
13
13
  ## If your rubyforge_project name is different, then edit it and comment out
14
14
  ## the sub! line in the Rakefile
15
15
  s.name = 'marky_markov'
16
- s.version = '0.3.0'
17
- s.date = '2012-02-09'
16
+ s.version = '0.3.2'
17
+ s.date = '2012-02-12'
18
18
  s.rubyforge_project = 'marky_markov'
19
19
 
20
20
  ## Make sure your summary is short. The description may be as long
@@ -69,6 +69,7 @@ Gem::Specification.new do |s|
69
69
  marky_markov.gemspec
70
70
  spec/marky_markov/markov_dictionary_spec.rb
71
71
  spec/marky_markov/markov_sentence_generator_spec.rb
72
+ spec/marky_markov/marky_markov_spec.rb
72
73
  spec/marky_markov/persistent_dictionary_spec.rb
73
74
  spec/spec_helper.rb
74
75
  spec/test.txt
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+
3
+ describe MarkyMarkov do
4
+ context "TemporaryDictionary" do
5
+ before(:each) do
6
+ @textsource = "spec/test.txt"
7
+ @dictionary = MarkyMarkov::TemporaryDictionary.new
8
+ @onedictcompare = { ["The"] => ["cat"],
9
+ ["cat"] => ["likes"],
10
+ ["likes"] => ["pie"],
11
+ ["pie"] => ["and"],
12
+ ["and"] => ["chainsaws"],
13
+ ["chainsaws"] => []}
14
+ @twodictcompare = {["The", "cat"] => ["likes"],
15
+ ["and", "chainsaws"] => [],
16
+ ["cat", "likes"] => ["pie"],
17
+ ["likes", "pie"] => ["and"],
18
+ ["pie", "and"] => ["chainsaws"]}
19
+ end
20
+
21
+ it "should be able to parse a string" do
22
+ @dictionary.parse_string "The cat likes pie and chainsaws"
23
+ @dictionary.dictionary.should eql(@twodictcompare)
24
+ end
25
+
26
+ it "should generate the right number of sentences" do
27
+ end
28
+
29
+ it "should create the right number of words" do
30
+ end
31
+ end
32
+
33
+ context "PersistentDictionary" do
34
+ before(:each) do
35
+ @textsource = "spec/test.txt"
36
+ @dictionary = MarkyMarkov::Dictionary.new(@textsource)
37
+ @onedictcompare = { ["The"] => ["cat"],
38
+ ["cat"] => ["likes"],
39
+ ["likes"] => ["pie"],
40
+ ["pie"] => ["and"],
41
+ ["and"] => ["chainsaws"],
42
+ ["chainsaws"] => []}
43
+ @twodictcompare = {["The", "cat"] => ["likes"],
44
+ ["and", "chainsaws"] => [],
45
+ ["cat", "likes"] => ["pie"],
46
+ ["likes", "pie"] => ["and"],
47
+ ["pie", "and"] => ["chainsaws"]}
48
+ end
49
+
50
+ it "should load the saved dictionary" do
51
+ end
52
+
53
+ it "should be able to parse a string" do
54
+ @dictionary.parse_string "The cat likes pie and chainsaws"
55
+ @dictionary.dictionary.should include(@twodictcompare)
56
+ end
57
+
58
+ it "should generate the right number of sentences" do
59
+ end
60
+
61
+ it "should create the right number of words" do
62
+ end
63
+ end
64
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marky_markov
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-09 00:00:00.000000000 Z
12
+ date: 2012-02-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ox
16
- requirement: &70184695396840 !ruby/object:Gem::Requirement
16
+ requirement: &70164029504260 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '1.4'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70184695396840
24
+ version_requirements: *70164029504260
25
25
  description: ! "MarkyMarkov makes it easy to generate simply Markov Chains based upon
26
26
  input from\n either a source file or a string. While usable as a module in your
27
27
  code it can also be called on\n from the command line and piped into like a standard
@@ -42,6 +42,7 @@ files:
42
42
  - marky_markov.gemspec
43
43
  - spec/marky_markov/markov_dictionary_spec.rb
44
44
  - spec/marky_markov/markov_sentence_generator_spec.rb
45
+ - spec/marky_markov/marky_markov_spec.rb
45
46
  - spec/marky_markov/persistent_dictionary_spec.rb
46
47
  - spec/spec_helper.rb
47
48
  - spec/test.txt