rubykov 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1bdb3e4295782a7bf03967d2f4de67aef65fa64b
4
- data.tar.gz: 16e45e2b424bb261476afc2dae7619af27f9bcb6
3
+ metadata.gz: 9827c73272cba93ae8557afe9cd9158175f5710c
4
+ data.tar.gz: 3cafc24fa4870f60450dae6d2a5aa692ee3f72c7
5
5
  SHA512:
6
- metadata.gz: 3570f5ad7f21b459e7f31575b08b4dd27e799189c081b54352bdc2d0bdf5ff04d0672efb664896eec00787951394cc5953c151cb659f7e87b1269ff7dee71a3f
7
- data.tar.gz: 8b0fd9ba3513aaf47e58e66bd0f7a252df8a529c29cd45f43ea00547b5782784a502c8deadaed6af757ca1ad4f80f1315378fe22b72a6733fcfaed20f1a885d3
6
+ metadata.gz: b62a34df1580d94914ca0fe4bb171ba5d7449857e3f7323d72958930a912e9b69e67664eb23ce1b7562f14168890a8b46c549d754848951eaa94998ee875074f
7
+ data.tar.gz: bc3f35303e83992f01ec220d032a70371682e4a173912807219a34cd924f1416defc62cdf014bb30ffccf9fbb3b2abb42cf8371b0264659ad0637f4490945545
data/README.md CHANGED
@@ -1,7 +1,17 @@
1
- ### Rubykov
1
+ # Rubykov
2
2
 
3
3
  Sensible, easy Markov chains in Ruby.
4
4
 
5
+ ## Installation
6
+
7
+ Simply add:
8
+
9
+ ```
10
+ gem 'rubykov'
11
+ ```
12
+
13
+ to your Gemfile.
14
+
5
15
  ## Usage
6
16
 
7
17
  Markov models are generated by an order and a set of training data.
@@ -3,38 +3,22 @@ module Rubykov
3
3
  def initialize(order, training_data)
4
4
  raise ArgumentError unless order.is_a? Integer
5
5
  raise ArgumentError unless training_data.is_a? Array
6
- chain_data = {}.tap do |representation|
7
- training_data.each_cons(order + 1).each do |datum|
8
- key = datum.first(order)
9
- value = datum.last
10
- if representation.include? key
11
- representation[key] << value
12
- else
13
- representation[key] = [value]
14
- end
15
- end
16
- end
6
+
17
7
  @order = order
18
- @representation = chain_data
8
+ @representation = {}
9
+ add_data_to_model(training_data)
10
+ end
11
+
12
+ def train(training_data)
13
+ add_data_to_model(training_data)
19
14
  end
20
15
 
21
16
  def chain
22
- Enumerator.new do |y|
23
- current_state = @representation.keys.sample
24
- current_state.each do |word|
25
- y << word
26
- end
17
+ chain_enumerator
18
+ end
27
19
 
28
- loop do
29
- if @representation[current_state].nil?
30
- break
31
- else
32
- next_word = @representation[current_state].sample
33
- y << next_word
34
- current_state = current_state.last(@order-1) + [next_word]
35
- end
36
- end
37
- end
20
+ def chain_with_seed(seed_state)
21
+ chain_enumerator(seed_state)
38
22
  end
39
23
 
40
24
  def states
@@ -44,5 +28,35 @@ module Rubykov
44
28
  def transitions
45
29
  @representation
46
30
  end
31
+
32
+ private
33
+
34
+ def add_data_to_model(training_data)
35
+ training_data.each_cons(@order + 1).each do |datum|
36
+ key = datum.first(@order)
37
+ value = datum.last
38
+ if @representation.include? key
39
+ @representation[key] << value
40
+ else
41
+ @representation[key] = [value]
42
+ end
43
+ end
44
+ end
45
+
46
+ def chain_enumerator(seed_state = states.sample)
47
+ Enumerator.new do |output|
48
+ current_state = seed_state
49
+ current_state.each do |word|
50
+ output << word
51
+ end
52
+
53
+ loop do
54
+ break if @representation[current_state].nil?
55
+ next_word = @representation[current_state].sample
56
+ output << next_word
57
+ current_state = current_state.last(@order-1) + [next_word]
58
+ end
59
+ end
60
+ end
47
61
  end
48
62
  end
@@ -1,19 +1,58 @@
1
1
  module Rubykov
2
2
  class TextGenerator < MarkovModel
3
3
  def initialize(order, training_text)
4
- super(order, training_text.downcase.gsub('.', ' .').split(' '))
4
+ super(order, massage_training_text(training_text))
5
+ end
6
+
7
+ def train(training_text)
8
+ super(massage_training_text(training_text))
5
9
  end
6
10
 
7
11
  def character_limited_output(desired_length)
8
12
  length = 0
9
- chain.take_while do |word|
10
- length += (word.length + 1)
11
- length < desired_length
12
- end.join(' ').gsub(' .', '.').capitalize
13
+ words_to_sentences(chain.take_while { |word| length += (word.length + 1); length < desired_length } )
13
14
  end
14
15
 
15
16
  def word_limited_output(desired_length)
16
- chain.take(desired_length).join(' ').gsub(' .', '.').capitalize
17
+ words_to_sentences(chain.take(desired_length))
18
+ end
19
+
20
+ def sentence_limited_output(desired_length)
21
+ length = 0
22
+ output = []
23
+ while length < desired_length
24
+ output += chain.take_while do |word|
25
+ length += 1 if is_sentence_finisher?(word)
26
+ length < desired_length
27
+ end
28
+ end
29
+ words_to_sentences(output)
30
+ end
31
+
32
+ private
33
+
34
+ def words_to_sentences(words)
35
+ massage_word_list(words).join(' ')
36
+ end
37
+
38
+ def massage_word_list(words)
39
+ output = []
40
+ words.each_with_index do |word, index|
41
+ if word =~ /^[^\w\s]+$/
42
+ output[-1] += word
43
+ else
44
+ output << word
45
+ end
46
+ end
47
+ output
48
+ end
49
+
50
+ def is_sentence_finisher?(word)
51
+ word =~ (/^[?!.]*$/)
52
+ end
53
+
54
+ def massage_training_text(training_text)
55
+ training_text.downcase.gsub(/[^\w\s]+/) { " #{$&}" }.split(' ')
17
56
  end
18
57
  end
19
58
  end
data/license.md CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) <2014> <Evan Hemsley>
3
+ Copyright (c) 2014 Evan Hemsley
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubykov
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Evan Hemsley