markov_chains 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +2 -0
- data/README.md +5 -0
- data/lib/markov_chains/dictionary.rb +57 -49
- data/lib/markov_chains/generator.rb +9 -10
- data/lib/markov_chains/version.rb +1 -1
- data/test/test_dictionary.rb +6 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee16106b2590f0bc8e9eda0d97a5fcc271ee22f3
|
4
|
+
data.tar.gz: 6858012c3c1f13b9f29992dcf4b61a955424d687
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d52d880b9f62e9eab776176cd03bc5513442a078fa992a5c0f157a1a15465d63a08386cfa15c0bc50801fef944809c824892bc495de566de02c8cf85447119f4
|
7
|
+
data.tar.gz: b80a72e8a71bb2c1b90355b1347e7b41032794716ad9cab3ad820062d32634265e9bd33e4b3de1ea6c1a647bc06cf063a8a21466763a235d97207930dbfb3a3a
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -29,6 +29,11 @@ Generate a number of sentences, here 5:
|
|
29
29
|
|
30
30
|
generator.get_sentences(5)
|
31
31
|
|
32
|
+
## Change log
|
33
|
+
|
34
|
+
* Version 0.1.0 - Added order attribute in the generator for higher-order markov chains and rewrote the most of the internal code to handle order
|
35
|
+
* Version 0.0.0 - Initial release
|
36
|
+
|
32
37
|
## Contributing
|
33
38
|
|
34
39
|
1. Fork it ( https://github.com/[justindomingue]/markov-chains/fork )
|
@@ -1,75 +1,83 @@
|
|
1
1
|
module MarkovChains
|
2
2
|
class Dictionary
|
3
|
+
attr_reader :order
|
3
4
|
|
4
5
|
# Initialized the dictionary with a text source.
|
5
6
|
#
|
6
|
-
# @example Create a new dictionary
|
7
|
+
# @example Create a new dictionary of order 1
|
7
8
|
# MarkovChains::Dictionary.new(string)
|
9
|
+
# @example Create a new dictinary of order 2
|
10
|
+
# MarkovChains::Dictionary.new(string, 2)
|
11
|
+
#
|
8
12
|
# @param [String] the text source
|
13
|
+
# @param [int] the order of the dictionary. The order is the "memory" of the dictionary, meaning that an order <n> dictionary will consider <n> words to generate the next one. Order of 1 or 2 are typical. More than 3 and the generated sentences will be the same as the source.
|
9
14
|
#
|
10
|
-
def initialize(text)
|
11
|
-
@
|
15
|
+
def initialize(text, order = 1)
|
16
|
+
@order = order
|
17
|
+
@words_for = Hash.new
|
12
18
|
@start_words = Array.new
|
13
19
|
|
14
|
-
|
20
|
+
# Standarize input text
|
21
|
+
text.delete! "\n"
|
15
22
|
|
16
|
-
#
|
17
|
-
@start_words.push wordlist[0]
|
23
|
+
# Process each sentence
|
18
24
|
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
25
|
+
# <sentences> has format sentence+terminator:
|
26
|
+
# ["sent1", "term1", "sent2", "term2", ...]
|
27
|
+
seps = /([.!?]+)/
|
28
|
+
sentences = text.split seps
|
29
|
+
sentences.each_slice(2) { |s,t| process_sentence(s.strip,t) }
|
23
30
|
end
|
24
|
-
|
25
|
-
#
|
31
|
+
|
32
|
+
# Processes a single sentence with terminator
|
26
33
|
#
|
27
|
-
# @example
|
28
|
-
#
|
34
|
+
# @example Process a sentence
|
35
|
+
# process_sentence("It is sunny today", "!")
|
29
36
|
#
|
30
|
-
# @param [String]
|
31
|
-
# @
|
37
|
+
# @param [String] sentence to process
|
38
|
+
# @param [Character] sentence terminator
|
32
39
|
#
|
33
|
-
def
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
private def process_sentence(sentence, terminator)
|
41
|
+
# Consider phrases/words/clauses separators when splitting
|
42
|
+
seps = "([,;:])"
|
43
|
+
|
44
|
+
# Split <sentence> into words
|
45
|
+
words = sentence.gsub(/[^#{seps}\w'\s]/, "").gsub(/(#{seps})\s+/, '\1').split(/\s+|#{seps}/)
|
46
|
+
words << terminator
|
47
|
+
|
48
|
+
# Add <@order> start words to the list
|
49
|
+
@start_words << words[0, @order]
|
50
|
+
|
51
|
+
# Add the words to the frequency hash <words_for>
|
52
|
+
until words.size < @order + 1 do
|
53
|
+
(@words_for[words[0, @order]] ||= []) << words[@order]
|
54
|
+
words.shift
|
55
|
+
end
|
44
56
|
end
|
45
|
-
|
46
|
-
# Returns a word
|
57
|
+
|
58
|
+
# Returns a word based on the likelihood of it appearing after the input array of words
|
47
59
|
#
|
48
|
-
# @example Get a
|
49
|
-
#
|
50
|
-
# @
|
60
|
+
# @example Get a word likely to appear next to the word 'It'
|
61
|
+
# get(['It']) # => 'has'
|
62
|
+
# @example Get a word likely to appear next to the words 'It has been' (with a dictionary of order 2)
|
63
|
+
# get(['It has']) # => 'been'
|
51
64
|
#
|
52
|
-
|
53
|
-
|
65
|
+
# @param [[String]] array of words for which we want a possible next word
|
66
|
+
# @return [String] word that is likely to follow the input
|
67
|
+
#
|
68
|
+
def get(words)
|
69
|
+
(@words_for[words] || []).sample
|
54
70
|
end
|
55
71
|
|
56
|
-
|
57
|
-
|
58
|
-
# Adds word-next_word combination to the dictionary
|
72
|
+
# Returns a list of words beginning a sentence seen in the source
|
59
73
|
#
|
60
|
-
# @example
|
61
|
-
#
|
62
|
-
# @example Adding words in a order m dictionary
|
63
|
-
# add_word("It has", "been")
|
74
|
+
# @example Get a start words
|
75
|
+
# get_start_word # => ['It', 'has']
|
64
76
|
#
|
65
|
-
# @
|
66
|
-
# @param next_word [string] word following the root word
|
77
|
+
# @return [[String]] array of words that could start a sentence
|
67
78
|
#
|
68
|
-
def
|
69
|
-
@
|
70
|
-
@words[word][next_word] += 1
|
71
|
-
@start_words.push(next_word) if word.end_with? '.' and !word.nil?
|
79
|
+
def get_start_words
|
80
|
+
@start_words.sample
|
72
81
|
end
|
73
|
-
|
74
82
|
end
|
75
|
-
end
|
83
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module MarkovChains
|
2
2
|
class Generator
|
3
|
-
|
3
|
+
|
4
4
|
# Initializes the generator
|
5
5
|
#
|
6
6
|
# @example Create a new generator
|
@@ -8,8 +8,8 @@ module MarkovChains
|
|
8
8
|
#
|
9
9
|
# @param Text source to generate sentences from
|
10
10
|
#
|
11
|
-
def initialize(text)
|
12
|
-
@dict = MarkovChains::Dictionary.new(text)
|
11
|
+
def initialize(text, order = 1)
|
12
|
+
@dict = MarkovChains::Dictionary.new(text, order)
|
13
13
|
end
|
14
14
|
|
15
15
|
# Returns a given number of randonly generated sentences
|
@@ -24,17 +24,16 @@ module MarkovChains
|
|
24
24
|
sentences = []
|
25
25
|
|
26
26
|
n.times do
|
27
|
-
sentence =
|
28
|
-
|
29
|
-
|
30
|
-
sentence <<
|
31
|
-
word = @dict.get(word)
|
27
|
+
sentence = @dict.get_start_words
|
28
|
+
|
29
|
+
while nw = @dict.get(sentence[-@dict.order, @dict.order])
|
30
|
+
sentence << nw
|
32
31
|
end
|
33
32
|
|
34
|
-
sentences.
|
33
|
+
sentences << (sentence[0...-1].join(" ").gsub(/\s([,;:])/, '\1') << sentence.last)
|
35
34
|
end
|
36
35
|
|
37
36
|
sentences
|
38
37
|
end
|
39
38
|
end
|
40
|
-
end
|
39
|
+
end
|
data/test/test_dictionary.rb
CHANGED
@@ -4,16 +4,16 @@ require 'markov_chains'
|
|
4
4
|
class DictionaryTest < Minitest::Test
|
5
5
|
def setup
|
6
6
|
source = "Why did the chicken cross the road?"
|
7
|
-
@
|
7
|
+
@order1 = MarkovChains::Dictionary.new(source)
|
8
8
|
end
|
9
|
-
|
9
|
+
|
10
10
|
def test_get
|
11
|
-
assert_equal "did", @
|
12
|
-
assert_equal true, %w(chicken road).include?(
|
13
|
-
assert_equal
|
11
|
+
assert_equal "did", @order1.get(["Why"])
|
12
|
+
assert_equal true, %w(chicken road).include?(@order1.get(["the"]))
|
13
|
+
assert_equal nil, @order1.get(["Not there..."])
|
14
14
|
end
|
15
15
|
|
16
16
|
def test_get_start_word
|
17
|
-
assert_equal "Why", @
|
17
|
+
assert_equal ["Why"], @order1.get_start_words
|
18
18
|
end
|
19
19
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: markov_chains
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- justindomingue
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|