rubykov 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/lib/rubykov/markov_model.rb +41 -27
- data/lib/rubykov/text_generator.rb +45 -6
- data/license.md +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9827c73272cba93ae8557afe9cd9158175f5710c
|
4
|
+
data.tar.gz: 3cafc24fa4870f60450dae6d2a5aa692ee3f72c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b62a34df1580d94914ca0fe4bb171ba5d7449857e3f7323d72958930a912e9b69e67664eb23ce1b7562f14168890a8b46c549d754848951eaa94998ee875074f
|
7
|
+
data.tar.gz: bc3f35303e83992f01ec220d032a70371682e4a173912807219a34cd924f1416defc62cdf014bb30ffccf9fbb3b2abb42cf8371b0264659ad0637f4490945545
|
data/README.md
CHANGED
@@ -1,7 +1,17 @@
|
|
1
|
-
|
1
|
+
# Rubykov
|
2
2
|
|
3
3
|
Sensible, easy Markov chains in Ruby.
|
4
4
|
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Simply add:
|
8
|
+
|
9
|
+
```
|
10
|
+
gem 'rubykov'
|
11
|
+
```
|
12
|
+
|
13
|
+
to your Gemfile.
|
14
|
+
|
5
15
|
## Usage
|
6
16
|
|
7
17
|
Markov models are generated by an order and a set of training data.
|
data/lib/rubykov/markov_model.rb
CHANGED
@@ -3,38 +3,22 @@ module Rubykov
|
|
3
3
|
def initialize(order, training_data)
|
4
4
|
raise ArgumentError unless order.is_a? Integer
|
5
5
|
raise ArgumentError unless training_data.is_a? Array
|
6
|
-
|
7
|
-
training_data.each_cons(order + 1).each do |datum|
|
8
|
-
key = datum.first(order)
|
9
|
-
value = datum.last
|
10
|
-
if representation.include? key
|
11
|
-
representation[key] << value
|
12
|
-
else
|
13
|
-
representation[key] = [value]
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
6
|
+
|
17
7
|
@order = order
|
18
|
-
@representation =
|
8
|
+
@representation = {}
|
9
|
+
add_data_to_model(training_data)
|
10
|
+
end
|
11
|
+
|
12
|
+
def train(training_data)
|
13
|
+
add_data_to_model(training_data)
|
19
14
|
end
|
20
15
|
|
21
16
|
def chain
|
22
|
-
|
23
|
-
|
24
|
-
current_state.each do |word|
|
25
|
-
y << word
|
26
|
-
end
|
17
|
+
chain_enumerator
|
18
|
+
end
|
27
19
|
|
28
|
-
|
29
|
-
|
30
|
-
break
|
31
|
-
else
|
32
|
-
next_word = @representation[current_state].sample
|
33
|
-
y << next_word
|
34
|
-
current_state = current_state.last(@order-1) + [next_word]
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
20
|
+
def chain_with_seed(seed_state)
|
21
|
+
chain_enumerator(seed_state)
|
38
22
|
end
|
39
23
|
|
40
24
|
def states
|
@@ -44,5 +28,35 @@ module Rubykov
|
|
44
28
|
def transitions
|
45
29
|
@representation
|
46
30
|
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def add_data_to_model(training_data)
|
35
|
+
training_data.each_cons(@order + 1).each do |datum|
|
36
|
+
key = datum.first(@order)
|
37
|
+
value = datum.last
|
38
|
+
if @representation.include? key
|
39
|
+
@representation[key] << value
|
40
|
+
else
|
41
|
+
@representation[key] = [value]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def chain_enumerator(seed_state = states.sample)
|
47
|
+
Enumerator.new do |output|
|
48
|
+
current_state = seed_state
|
49
|
+
current_state.each do |word|
|
50
|
+
output << word
|
51
|
+
end
|
52
|
+
|
53
|
+
loop do
|
54
|
+
break if @representation[current_state].nil?
|
55
|
+
next_word = @representation[current_state].sample
|
56
|
+
output << next_word
|
57
|
+
current_state = current_state.last(@order-1) + [next_word]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
47
61
|
end
|
48
62
|
end
|
@@ -1,19 +1,58 @@
|
|
1
1
|
module Rubykov
|
2
2
|
class TextGenerator < MarkovModel
|
3
3
|
def initialize(order, training_text)
|
4
|
-
super(order, training_text
|
4
|
+
super(order, massage_training_text(training_text))
|
5
|
+
end
|
6
|
+
|
7
|
+
def train(training_text)
|
8
|
+
super(massage_training_text(training_text))
|
5
9
|
end
|
6
10
|
|
7
11
|
def character_limited_output(desired_length)
|
8
12
|
length = 0
|
9
|
-
chain.take_while
|
10
|
-
length += (word.length + 1)
|
11
|
-
length < desired_length
|
12
|
-
end.join(' ').gsub(' .', '.').capitalize
|
13
|
+
words_to_sentences(chain.take_while { |word| length += (word.length + 1); length < desired_length } )
|
13
14
|
end
|
14
15
|
|
15
16
|
def word_limited_output(desired_length)
|
16
|
-
chain.take(desired_length)
|
17
|
+
words_to_sentences(chain.take(desired_length))
|
18
|
+
end
|
19
|
+
|
20
|
+
def sentence_limited_output(desired_length)
|
21
|
+
length = 0
|
22
|
+
output = []
|
23
|
+
while length < desired_length
|
24
|
+
output += chain.take_while do |word|
|
25
|
+
length += 1 if is_sentence_finisher?(word)
|
26
|
+
length < desired_length
|
27
|
+
end
|
28
|
+
end
|
29
|
+
words_to_sentences(output)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def words_to_sentences(words)
|
35
|
+
massage_word_list(words).join(' ')
|
36
|
+
end
|
37
|
+
|
38
|
+
def massage_word_list(words)
|
39
|
+
output = []
|
40
|
+
words.each_with_index do |word, index|
|
41
|
+
if word =~ /^[^\w\s]+$/
|
42
|
+
output[-1] += word
|
43
|
+
else
|
44
|
+
output << word
|
45
|
+
end
|
46
|
+
end
|
47
|
+
output
|
48
|
+
end
|
49
|
+
|
50
|
+
def is_sentence_finisher?(word)
|
51
|
+
word =~ (/^[?!.]*$/)
|
52
|
+
end
|
53
|
+
|
54
|
+
def massage_training_text(training_text)
|
55
|
+
training_text.downcase.gsub(/[^\w\s]+/) { " #{$&}" }.split(' ')
|
17
56
|
end
|
18
57
|
end
|
19
58
|
end
|
data/license.md
CHANGED