rubykov 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/lib/rubykov/markov_model.rb +41 -27
- data/lib/rubykov/text_generator.rb +45 -6
- data/license.md +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9827c73272cba93ae8557afe9cd9158175f5710c
|
4
|
+
data.tar.gz: 3cafc24fa4870f60450dae6d2a5aa692ee3f72c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b62a34df1580d94914ca0fe4bb171ba5d7449857e3f7323d72958930a912e9b69e67664eb23ce1b7562f14168890a8b46c549d754848951eaa94998ee875074f
|
7
|
+
data.tar.gz: bc3f35303e83992f01ec220d032a70371682e4a173912807219a34cd924f1416defc62cdf014bb30ffccf9fbb3b2abb42cf8371b0264659ad0637f4490945545
|
data/README.md
CHANGED
@@ -1,7 +1,17 @@
|
|
1
|
-
|
1
|
+
# Rubykov
|
2
2
|
|
3
3
|
Sensible, easy Markov chains in Ruby.
|
4
4
|
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Simply add:
|
8
|
+
|
9
|
+
```
|
10
|
+
gem 'rubykov'
|
11
|
+
```
|
12
|
+
|
13
|
+
to your Gemfile.
|
14
|
+
|
5
15
|
## Usage
|
6
16
|
|
7
17
|
Markov models are generated by an order and a set of training data.
|
data/lib/rubykov/markov_model.rb
CHANGED
@@ -3,38 +3,22 @@ module Rubykov
|
|
3
3
|
def initialize(order, training_data)
|
4
4
|
raise ArgumentError unless order.is_a? Integer
|
5
5
|
raise ArgumentError unless training_data.is_a? Array
|
6
|
-
|
7
|
-
training_data.each_cons(order + 1).each do |datum|
|
8
|
-
key = datum.first(order)
|
9
|
-
value = datum.last
|
10
|
-
if representation.include? key
|
11
|
-
representation[key] << value
|
12
|
-
else
|
13
|
-
representation[key] = [value]
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
6
|
+
|
17
7
|
@order = order
|
18
|
-
@representation =
|
8
|
+
@representation = {}
|
9
|
+
add_data_to_model(training_data)
|
10
|
+
end
|
11
|
+
|
12
|
+
def train(training_data)
|
13
|
+
add_data_to_model(training_data)
|
19
14
|
end
|
20
15
|
|
21
16
|
def chain
|
22
|
-
|
23
|
-
|
24
|
-
current_state.each do |word|
|
25
|
-
y << word
|
26
|
-
end
|
17
|
+
chain_enumerator
|
18
|
+
end
|
27
19
|
|
28
|
-
|
29
|
-
|
30
|
-
break
|
31
|
-
else
|
32
|
-
next_word = @representation[current_state].sample
|
33
|
-
y << next_word
|
34
|
-
current_state = current_state.last(@order-1) + [next_word]
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
20
|
+
def chain_with_seed(seed_state)
|
21
|
+
chain_enumerator(seed_state)
|
38
22
|
end
|
39
23
|
|
40
24
|
def states
|
@@ -44,5 +28,35 @@ module Rubykov
|
|
44
28
|
def transitions
|
45
29
|
@representation
|
46
30
|
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def add_data_to_model(training_data)
|
35
|
+
training_data.each_cons(@order + 1).each do |datum|
|
36
|
+
key = datum.first(@order)
|
37
|
+
value = datum.last
|
38
|
+
if @representation.include? key
|
39
|
+
@representation[key] << value
|
40
|
+
else
|
41
|
+
@representation[key] = [value]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def chain_enumerator(seed_state = states.sample)
|
47
|
+
Enumerator.new do |output|
|
48
|
+
current_state = seed_state
|
49
|
+
current_state.each do |word|
|
50
|
+
output << word
|
51
|
+
end
|
52
|
+
|
53
|
+
loop do
|
54
|
+
break if @representation[current_state].nil?
|
55
|
+
next_word = @representation[current_state].sample
|
56
|
+
output << next_word
|
57
|
+
current_state = current_state.last(@order-1) + [next_word]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
47
61
|
end
|
48
62
|
end
|
@@ -1,19 +1,58 @@
|
|
1
1
|
module Rubykov
|
2
2
|
class TextGenerator < MarkovModel
|
3
3
|
def initialize(order, training_text)
|
4
|
-
super(order, training_text
|
4
|
+
super(order, massage_training_text(training_text))
|
5
|
+
end
|
6
|
+
|
7
|
+
def train(training_text)
|
8
|
+
super(massage_training_text(training_text))
|
5
9
|
end
|
6
10
|
|
7
11
|
def character_limited_output(desired_length)
|
8
12
|
length = 0
|
9
|
-
chain.take_while
|
10
|
-
length += (word.length + 1)
|
11
|
-
length < desired_length
|
12
|
-
end.join(' ').gsub(' .', '.').capitalize
|
13
|
+
words_to_sentences(chain.take_while { |word| length += (word.length + 1); length < desired_length } )
|
13
14
|
end
|
14
15
|
|
15
16
|
def word_limited_output(desired_length)
|
16
|
-
chain.take(desired_length)
|
17
|
+
words_to_sentences(chain.take(desired_length))
|
18
|
+
end
|
19
|
+
|
20
|
+
def sentence_limited_output(desired_length)
|
21
|
+
length = 0
|
22
|
+
output = []
|
23
|
+
while length < desired_length
|
24
|
+
output += chain.take_while do |word|
|
25
|
+
length += 1 if is_sentence_finisher?(word)
|
26
|
+
length < desired_length
|
27
|
+
end
|
28
|
+
end
|
29
|
+
words_to_sentences(output)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def words_to_sentences(words)
|
35
|
+
massage_word_list(words).join(' ')
|
36
|
+
end
|
37
|
+
|
38
|
+
def massage_word_list(words)
|
39
|
+
output = []
|
40
|
+
words.each_with_index do |word, index|
|
41
|
+
if word =~ /^[^\w\s]+$/
|
42
|
+
output[-1] += word
|
43
|
+
else
|
44
|
+
output << word
|
45
|
+
end
|
46
|
+
end
|
47
|
+
output
|
48
|
+
end
|
49
|
+
|
50
|
+
def is_sentence_finisher?(word)
|
51
|
+
word =~ (/^[?!.]*$/)
|
52
|
+
end
|
53
|
+
|
54
|
+
def massage_training_text(training_text)
|
55
|
+
training_text.downcase.gsub(/[^\w\s]+/) { " #{$&}" }.split(' ')
|
17
56
|
end
|
18
57
|
end
|
19
58
|
end
|
data/license.md
CHANGED