markov_chains 0.0.0 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 045674e5b0e005db622a8d1c6bae356074e88049
4
- data.tar.gz: a52da2defd03a2cc01f5d8312cf5481cd27b2b20
3
+ metadata.gz: ee16106b2590f0bc8e9eda0d97a5fcc271ee22f3
4
+ data.tar.gz: 6858012c3c1f13b9f29992dcf4b61a955424d687
5
5
  SHA512:
6
- metadata.gz: cf4da09f7460ecf5506a17eaeb2fde1ea04ea72dbc4d2f1f951d3f9b79a37a818168f9e4d796f56e1d921aed5f08be3d32427ae7f6e2ef46ffa26c6b9340711f
7
- data.tar.gz: e40a5e57101f9bfd448e2ee9520ec240ecfaa3236e30ec31150af23a12a23d15be2f3211a58a1811bc60fba2999e5ca985bb8eec769cfcd8a218f62ccc6995d2
6
+ metadata.gz: d52d880b9f62e9eab776176cd03bc5513442a078fa992a5c0f157a1a15465d63a08386cfa15c0bc50801fef944809c824892bc495de566de02c8cf85447119f4
7
+ data.tar.gz: b80a72e8a71bb2c1b90355b1347e7b41032794716ad9cab3ad820062d32634265e9bd33e4b3de1ea6c1a647bc06cf063a8a21466763a235d97207930dbfb3a3a
data/.gitignore CHANGED
@@ -12,3 +12,4 @@
12
12
  *.o
13
13
  *.a
14
14
  mkmf.log
15
+ *.gem
data/Gemfile CHANGED
@@ -2,3 +2,5 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in markov-chains.gemspec
4
4
  gemspec
5
+
6
+ gem 'minitest'
data/README.md CHANGED
@@ -29,6 +29,11 @@ Generate a number of sentences, here 5:
29
29
 
30
30
  generator.get_sentences(5)
31
31
 
32
+ ## Change log
33
+
34
+ * Version 0.1.0 - Added order attribute in the generator for higher-order markov chains and rewrote the most of the internal code to handle order
35
+ * Version 0.0.0 - Initial release
36
+
32
37
  ## Contributing
33
38
 
34
39
  1. Fork it ( https://github.com/[justindomingue]/markov-chains/fork )
@@ -1,75 +1,83 @@
1
1
  module MarkovChains
2
2
  class Dictionary
3
+ attr_reader :order
3
4
 
4
5
  # Initialized the dictionary with a text source.
5
6
  #
6
- # @example Create a new dictionary
7
+ # @example Create a new dictionary of order 1
7
8
  # MarkovChains::Dictionary.new(string)
9
+ # @example Create a new dictinary of order 2
10
+ # MarkovChains::Dictionary.new(string, 2)
11
+ #
8
12
  # @param [String] the text source
13
+ # @param [int] the order of the dictionary. The order is the "memory" of the dictionary, meaning that an order <n> dictionary will consider <n> words to generate the next one. Order of 1 or 2 are typical. More than 3 and the generated sentences will be the same as the source.
9
14
  #
10
- def initialize(text)
11
- @words = Hash.new
15
+ def initialize(text, order = 1)
16
+ @order = order
17
+ @words_for = Hash.new
12
18
  @start_words = Array.new
13
19
 
14
- wordlist = text.split
20
+ # Standarize input text
21
+ text.delete! "\n"
15
22
 
16
- # Add first word as possible start word
17
- @start_words.push wordlist[0]
23
+ # Process each sentence
18
24
 
19
- # Process each word
20
- wordlist.each_with_index do |word, index|
21
- add(word, wordlist[index + 1]) if index <= wordlist.size - 2
22
- end
25
+ # <sentences> has format sentence+terminator:
26
+ # ["sent1", "term1", "sent2", "term2", ...]
27
+ seps = /([.!?]+)/
28
+ sentences = text.split seps
29
+ sentences.each_slice(2) { |s,t| process_sentence(s.strip,t) }
23
30
  end
24
-
25
- # Returns a word based on the likelihood of it appearing after the input word
31
+
32
+ # Processes a single sentence with terminator
26
33
  #
27
- # @example Get a word likely to appear next to the word 'It'
28
- # get('It')
34
+ # @example Process a sentence
35
+ # process_sentence("It is sunny today", "!")
29
36
  #
30
- # @param [String] word for which we want a possible next word
31
- # @return [String] word that is likely to follow the input word
37
+ # @param [String] sentence to process
38
+ # @param [Character] sentence terminator
32
39
  #
33
- def get(word)
34
- return "" if !@words[word]
35
- followers = @words[word]
36
- sum = followers.inject(0) {|sum,kv| sum += kv[1]}
37
- random = rand(sum)+1
38
- partial_sum = 0
39
- next_word = followers.find do |word, count|
40
- partial_sum += count
41
- partial_sum >= random
42
- end.first
43
- next_word
40
+ private def process_sentence(sentence, terminator)
41
+ # Consider phrases/words/clauses separators when splitting
42
+ seps = "([,;:])"
43
+
44
+ # Split <sentence> into words
45
+ words = sentence.gsub(/[^#{seps}\w'\s]/, "").gsub(/(#{seps})\s+/, '\1').split(/\s+|#{seps}/)
46
+ words << terminator
47
+
48
+ # Add <@order> start words to the list
49
+ @start_words << words[0, @order]
50
+
51
+ # Add the words to the frequency hash <words_for>
52
+ until words.size < @order + 1 do
53
+ (@words_for[words[0, @order]] ||= []) << words[@order]
54
+ words.shift
55
+ end
44
56
  end
45
-
46
- # Returns a word beginning a sentence seen in the source
57
+
58
+ # Returns a word based on the likelihood of it appearing after the input array of words
47
59
  #
48
- # @example Get a start word
49
- # get_start_word()
50
- # @return [String] a possible start word
60
+ # @example Get a word likely to appear next to the word 'It'
61
+ # get(['It']) # => 'has'
62
+ # @example Get a word likely to appear next to the words 'It has been' (with a dictionary of order 2)
63
+ # get(['It has']) # => 'been'
51
64
  #
52
- def get_start_word
53
- @start_words.sample
65
+ # @param [[String]] array of words for which we want a possible next word
66
+ # @return [String] word that is likely to follow the input
67
+ #
68
+ def get(words)
69
+ (@words_for[words] || []).sample
54
70
  end
55
71
 
56
- private
57
-
58
- # Adds word-next_word combination to the dictionary
72
+ # Returns a list of words beginning a sentence seen in the source
59
73
  #
60
- # @example Adding a word in a order 1 dictionary
61
- # add_word("It", "has")
62
- # @example Adding words in a order m dictionary
63
- # add_word("It has", "been")
74
+ # @example Get a start words
75
+ # get_start_word # => ['It', 'has']
64
76
  #
65
- # @param word [string] root word
66
- # @param next_word [string] word following the root word
77
+ # @return [[String]] array of words that could start a sentence
67
78
  #
68
- def add(word, next_word)
69
- @words[word] = Hash.new(0) if !@words[word]
70
- @words[word][next_word] += 1
71
- @start_words.push(next_word) if word.end_with? '.' and !word.nil?
79
+ def get_start_words
80
+ @start_words.sample
72
81
  end
73
-
74
82
  end
75
- end
83
+ end
@@ -1,6 +1,6 @@
1
1
  module MarkovChains
2
2
  class Generator
3
-
3
+
4
4
  # Initializes the generator
5
5
  #
6
6
  # @example Create a new generator
@@ -8,8 +8,8 @@ module MarkovChains
8
8
  #
9
9
  # @param Text source to generate sentences from
10
10
  #
11
- def initialize(text)
12
- @dict = MarkovChains::Dictionary.new(text)
11
+ def initialize(text, order = 1)
12
+ @dict = MarkovChains::Dictionary.new(text, order)
13
13
  end
14
14
 
15
15
  # Returns a given number of randonly generated sentences
@@ -24,17 +24,16 @@ module MarkovChains
24
24
  sentences = []
25
25
 
26
26
  n.times do
27
- sentence = ""
28
- word = @dict.get_start_word
29
- until sentence.scan(/\.|\?|!/).size == 1
30
- sentence << word << " "
31
- word = @dict.get(word)
27
+ sentence = @dict.get_start_words
28
+
29
+ while nw = @dict.get(sentence[-@dict.order, @dict.order])
30
+ sentence << nw
32
31
  end
33
32
 
34
- sentences.push sentence
33
+ sentences << (sentence[0...-1].join(" ").gsub(/\s([,;:])/, '\1') << sentence.last)
35
34
  end
36
35
 
37
36
  sentences
38
37
  end
39
38
  end
40
- end
39
+ end
@@ -1,3 +1,3 @@
1
1
  module MarkovChains
2
- VERSION = '0.0.0'
2
+ VERSION = '0.1.0'
3
3
  end
@@ -4,16 +4,16 @@ require 'markov_chains'
4
4
  class DictionaryTest < Minitest::Test
5
5
  def setup
6
6
  source = "Why did the chicken cross the road?"
7
- @dict = MarkovChains::Dictionary.new(source)
7
+ @order1 = MarkovChains::Dictionary.new(source)
8
8
  end
9
-
9
+
10
10
  def test_get
11
- assert_equal "did", @dict.get("Why")
12
- assert_equal true, %w(chicken road).include?( @dict.get("the"))
13
- assert_equal "", @dict.get("Not there...")
11
+ assert_equal "did", @order1.get(["Why"])
12
+ assert_equal true, %w(chicken road).include?(@order1.get(["the"]))
13
+ assert_equal nil, @order1.get(["Not there..."])
14
14
  end
15
15
 
16
16
  def test_get_start_word
17
- assert_equal "Why", @dict.get_start_word()
17
+ assert_equal ["Why"], @order1.get_start_words
18
18
  end
19
19
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markov_chains
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - justindomingue
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-30 00:00:00.000000000 Z
11
+ date: 2015-01-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler