markov_chains 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 045674e5b0e005db622a8d1c6bae356074e88049
4
- data.tar.gz: a52da2defd03a2cc01f5d8312cf5481cd27b2b20
3
+ metadata.gz: ee16106b2590f0bc8e9eda0d97a5fcc271ee22f3
4
+ data.tar.gz: 6858012c3c1f13b9f29992dcf4b61a955424d687
5
5
  SHA512:
6
- metadata.gz: cf4da09f7460ecf5506a17eaeb2fde1ea04ea72dbc4d2f1f951d3f9b79a37a818168f9e4d796f56e1d921aed5f08be3d32427ae7f6e2ef46ffa26c6b9340711f
7
- data.tar.gz: e40a5e57101f9bfd448e2ee9520ec240ecfaa3236e30ec31150af23a12a23d15be2f3211a58a1811bc60fba2999e5ca985bb8eec769cfcd8a218f62ccc6995d2
6
+ metadata.gz: d52d880b9f62e9eab776176cd03bc5513442a078fa992a5c0f157a1a15465d63a08386cfa15c0bc50801fef944809c824892bc495de566de02c8cf85447119f4
7
+ data.tar.gz: b80a72e8a71bb2c1b90355b1347e7b41032794716ad9cab3ad820062d32634265e9bd33e4b3de1ea6c1a647bc06cf063a8a21466763a235d97207930dbfb3a3a
data/.gitignore CHANGED
@@ -12,3 +12,4 @@
12
12
  *.o
13
13
  *.a
14
14
  mkmf.log
15
+ *.gem
data/Gemfile CHANGED
@@ -2,3 +2,5 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in markov-chains.gemspec
4
4
  gemspec
5
+
6
+ gem 'minitest'
data/README.md CHANGED
@@ -29,6 +29,11 @@ Generate a number of sentences, here 5:
29
29
 
30
30
  generator.get_sentences(5)
31
31
 
32
+ ## Change log
33
+
34
+ * Version 0.1.0 - Added order attribute in the generator for higher-order markov chains and rewrote the most of the internal code to handle order
35
+ * Version 0.0.0 - Initial release
36
+
32
37
  ## Contributing
33
38
 
34
39
  1. Fork it ( https://github.com/[justindomingue]/markov-chains/fork )
@@ -1,75 +1,83 @@
1
1
  module MarkovChains
2
2
  class Dictionary
3
+ attr_reader :order
3
4
 
4
5
  # Initialized the dictionary with a text source.
5
6
  #
6
- # @example Create a new dictionary
7
+ # @example Create a new dictionary of order 1
7
8
  # MarkovChains::Dictionary.new(string)
9
+ # @example Create a new dictinary of order 2
10
+ # MarkovChains::Dictionary.new(string, 2)
11
+ #
8
12
  # @param [String] the text source
13
+ # @param [int] the order of the dictionary. The order is the "memory" of the dictionary, meaning that an order <n> dictionary will consider <n> words to generate the next one. Order of 1 or 2 are typical. More than 3 and the generated sentences will be the same as the source.
9
14
  #
10
- def initialize(text)
11
- @words = Hash.new
15
+ def initialize(text, order = 1)
16
+ @order = order
17
+ @words_for = Hash.new
12
18
  @start_words = Array.new
13
19
 
14
- wordlist = text.split
20
+ # Standarize input text
21
+ text.delete! "\n"
15
22
 
16
- # Add first word as possible start word
17
- @start_words.push wordlist[0]
23
+ # Process each sentence
18
24
 
19
- # Process each word
20
- wordlist.each_with_index do |word, index|
21
- add(word, wordlist[index + 1]) if index <= wordlist.size - 2
22
- end
25
+ # <sentences> has format sentence+terminator:
26
+ # ["sent1", "term1", "sent2", "term2", ...]
27
+ seps = /([.!?]+)/
28
+ sentences = text.split seps
29
+ sentences.each_slice(2) { |s,t| process_sentence(s.strip,t) }
23
30
  end
24
-
25
- # Returns a word based on the likelihood of it appearing after the input word
31
+
32
+ # Processes a single sentence with terminator
26
33
  #
27
- # @example Get a word likely to appear next to the word 'It'
28
- # get('It')
34
+ # @example Process a sentence
35
+ # process_sentence("It is sunny today", "!")
29
36
  #
30
- # @param [String] word for which we want a possible next word
31
- # @return [String] word that is likely to follow the input word
37
+ # @param [String] sentence to process
38
+ # @param [Character] sentence terminator
32
39
  #
33
- def get(word)
34
- return "" if !@words[word]
35
- followers = @words[word]
36
- sum = followers.inject(0) {|sum,kv| sum += kv[1]}
37
- random = rand(sum)+1
38
- partial_sum = 0
39
- next_word = followers.find do |word, count|
40
- partial_sum += count
41
- partial_sum >= random
42
- end.first
43
- next_word
40
+ private def process_sentence(sentence, terminator)
41
+ # Consider phrases/words/clauses separators when splitting
42
+ seps = "([,;:])"
43
+
44
+ # Split <sentence> into words
45
+ words = sentence.gsub(/[^#{seps}\w'\s]/, "").gsub(/(#{seps})\s+/, '\1').split(/\s+|#{seps}/)
46
+ words << terminator
47
+
48
+ # Add <@order> start words to the list
49
+ @start_words << words[0, @order]
50
+
51
+ # Add the words to the frequency hash <words_for>
52
+ until words.size < @order + 1 do
53
+ (@words_for[words[0, @order]] ||= []) << words[@order]
54
+ words.shift
55
+ end
44
56
  end
45
-
46
- # Returns a word beginning a sentence seen in the source
57
+
58
+ # Returns a word based on the likelihood of it appearing after the input array of words
47
59
  #
48
- # @example Get a start word
49
- # get_start_word()
50
- # @return [String] a possible start word
60
+ # @example Get a word likely to appear next to the word 'It'
61
+ # get(['It']) # => 'has'
62
+ # @example Get a word likely to appear next to the words 'It has been' (with a dictionary of order 2)
63
+ # get(['It has']) # => 'been'
51
64
  #
52
- def get_start_word
53
- @start_words.sample
65
+ # @param [[String]] array of words for which we want a possible next word
66
+ # @return [String] word that is likely to follow the input
67
+ #
68
+ def get(words)
69
+ (@words_for[words] || []).sample
54
70
  end
55
71
 
56
- private
57
-
58
- # Adds word-next_word combination to the dictionary
72
+ # Returns a list of words beginning a sentence seen in the source
59
73
  #
60
- # @example Adding a word in a order 1 dictionary
61
- # add_word("It", "has")
62
- # @example Adding words in a order m dictionary
63
- # add_word("It has", "been")
74
+ # @example Get a start words
75
+ # get_start_word # => ['It', 'has']
64
76
  #
65
- # @param word [string] root word
66
- # @param next_word [string] word following the root word
77
+ # @return [[String]] array of words that could start a sentence
67
78
  #
68
- def add(word, next_word)
69
- @words[word] = Hash.new(0) if !@words[word]
70
- @words[word][next_word] += 1
71
- @start_words.push(next_word) if word.end_with? '.' and !word.nil?
79
+ def get_start_words
80
+ @start_words.sample
72
81
  end
73
-
74
82
  end
75
- end
83
+ end
@@ -1,6 +1,6 @@
1
1
  module MarkovChains
2
2
  class Generator
3
-
3
+
4
4
  # Initializes the generator
5
5
  #
6
6
  # @example Create a new generator
@@ -8,8 +8,8 @@ module MarkovChains
8
8
  #
9
9
  # @param Text source to generate sentences from
10
10
  #
11
- def initialize(text)
12
- @dict = MarkovChains::Dictionary.new(text)
11
+ def initialize(text, order = 1)
12
+ @dict = MarkovChains::Dictionary.new(text, order)
13
13
  end
14
14
 
15
15
  # Returns a given number of randonly generated sentences
@@ -24,17 +24,16 @@ module MarkovChains
24
24
  sentences = []
25
25
 
26
26
  n.times do
27
- sentence = ""
28
- word = @dict.get_start_word
29
- until sentence.scan(/\.|\?|!/).size == 1
30
- sentence << word << " "
31
- word = @dict.get(word)
27
+ sentence = @dict.get_start_words
28
+
29
+ while nw = @dict.get(sentence[-@dict.order, @dict.order])
30
+ sentence << nw
32
31
  end
33
32
 
34
- sentences.push sentence
33
+ sentences << (sentence[0...-1].join(" ").gsub(/\s([,;:])/, '\1') << sentence.last)
35
34
  end
36
35
 
37
36
  sentences
38
37
  end
39
38
  end
40
- end
39
+ end
@@ -1,3 +1,3 @@
1
1
  module MarkovChains
2
- VERSION = '0.0.0'
2
+ VERSION = '0.1.0'
3
3
  end
@@ -4,16 +4,16 @@ require 'markov_chains'
4
4
  class DictionaryTest < Minitest::Test
5
5
  def setup
6
6
  source = "Why did the chicken cross the road?"
7
- @dict = MarkovChains::Dictionary.new(source)
7
+ @order1 = MarkovChains::Dictionary.new(source)
8
8
  end
9
-
9
+
10
10
  def test_get
11
- assert_equal "did", @dict.get("Why")
12
- assert_equal true, %w(chicken road).include?( @dict.get("the"))
13
- assert_equal "", @dict.get("Not there...")
11
+ assert_equal "did", @order1.get(["Why"])
12
+ assert_equal true, %w(chicken road).include?(@order1.get(["the"]))
13
+ assert_equal nil, @order1.get(["Not there..."])
14
14
  end
15
15
 
16
16
  def test_get_start_word
17
- assert_equal "Why", @dict.get_start_word()
17
+ assert_equal ["Why"], @order1.get_start_words
18
18
  end
19
19
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markov_chains
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - justindomingue
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-30 00:00:00.000000000 Z
11
+ date: 2015-01-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler