twitter_ebooks 2.0.8 → 2.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +1 -1
- data/lib/twitter_ebooks/model.rb +1 -1
- data/lib/twitter_ebooks/suffix.rb +6 -1
- data/lib/twitter_ebooks/version.rb +1 -1
- metadata +1 -1
data/README.md
CHANGED
data/lib/twitter_ebooks/model.rb
CHANGED
@@ -74,7 +74,7 @@ module Ebooks
|
|
74
74
|
|
75
75
|
def make_statement(limit=140, generator=nil)
|
76
76
|
responding = !generator.nil?
|
77
|
-
generator
|
77
|
+
generator ||= SuffixGenerator.build(@sentences)
|
78
78
|
tweet = ""
|
79
79
|
|
80
80
|
while (tokens = generator.generate(3, :bigrams)) do
|
@@ -1,4 +1,8 @@
|
|
1
1
|
module Ebooks
|
2
|
+
# This generator uses data identical to the markov model, but
|
3
|
+
# instead of making a chain by looking up bigrams it uses the
|
4
|
+
# positions to randomly replace suffixes in one sentence with
|
5
|
+
# matching suffixes in another
|
2
6
|
class SuffixGenerator
|
3
7
|
def self.build(sentences)
|
4
8
|
SuffixGenerator.new(sentences)
|
@@ -48,6 +52,7 @@ module Ebooks
|
|
48
52
|
break if next_token.nil?
|
49
53
|
|
50
54
|
alternatives = (n == :unigrams) ? @unigrams[next_token] : @bigrams[token][next_token]
|
55
|
+
# Filter out suffixes from previous sentences
|
51
56
|
alternatives.reject! { |a| a[1] == INTERIM || used.include?(a[0]) }
|
52
57
|
varsites[i] = alternatives unless alternatives.empty?
|
53
58
|
end
|
@@ -62,6 +67,7 @@ module Ebooks
|
|
62
67
|
suffix = @sentences[alt[0]][alt[1]..-1]
|
63
68
|
potential = tokens[0..start+1] + suffix
|
64
69
|
|
70
|
+
# Ensure we're not just rebuilding some segment of another sentence
|
65
71
|
unless verbatim.find { |v| NLP.subseq?(v, potential) || NLP.subseq?(potential, v) }
|
66
72
|
used << alt[0]
|
67
73
|
variant = potential
|
@@ -75,7 +81,6 @@ module Ebooks
|
|
75
81
|
tokens = variant if variant
|
76
82
|
end
|
77
83
|
|
78
|
-
|
79
84
|
tokens
|
80
85
|
end
|
81
86
|
end
|