lita-markov 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/lita/handlers/markov/engine.rb +8 -23
- data/lita-markov.gemspec +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 899ea3cf061e0e9e60046fa630ecc85ac0c62d3c
|
4
|
+
data.tar.gz: c9c47683d42548b71f6b9adcf17130786d0e7ae3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f52a9534465220eadd3b53cee38d98169f04640696deb168a4c7dcc549b3f19587c6d508046c950b99438894d962f527a8c4f8affbf36bdd8b58b92ecb4e4548
|
7
|
+
data.tar.gz: ee9904cfdaef57ca3757407511e90763fac96173b0897973cd6616a21915b3a8a41da460f56fd9767c85e35f56069d4c77774a7afa1ac400be8274393470d31a
|
@@ -36,8 +36,8 @@ class Lita::Handlers::Markov
|
|
36
36
|
|
37
37
|
return if words.length == 0
|
38
38
|
|
39
|
-
# Capitalize the first word
|
40
|
-
words = [words[0].capitalize] + words.slice(1..-1)
|
39
|
+
# Capitalize the first word and add a period at the end
|
40
|
+
words = [words[0].capitalize] + words.slice(1..-1) + ['.']
|
41
41
|
|
42
42
|
# Iterate over it one step at a time in sets of `@depth + 1`
|
43
43
|
words.each_cons(@depth + 1) do |words|
|
@@ -99,10 +99,6 @@ class Lita::Handlers::Markov
|
|
99
99
|
state.split(' ').last
|
100
100
|
end
|
101
101
|
|
102
|
-
def is_punctuation?(string)
|
103
|
-
PUNCTUATION.any? { |p| string == p }
|
104
|
-
end
|
105
|
-
|
106
102
|
def get_next_state(user, current_state)
|
107
103
|
states = @db[:dictionary]
|
108
104
|
.where(user: user, current_state: current_state)
|
@@ -133,7 +129,7 @@ class Lita::Handlers::Markov
|
|
133
129
|
|
134
130
|
sentence << next_state
|
135
131
|
|
136
|
-
if
|
132
|
+
if next_state == '.'
|
137
133
|
ended_with_punctuation = true
|
138
134
|
break
|
139
135
|
end
|
@@ -146,7 +142,7 @@ class Lita::Handlers::Markov
|
|
146
142
|
chain
|
147
143
|
end
|
148
144
|
|
149
|
-
STRING_SEPARATOR =
|
145
|
+
STRING_SEPARATOR = /\s+/
|
150
146
|
|
151
147
|
def separate_string string
|
152
148
|
# Including the punctuation in group so they'll be included in the
|
@@ -157,33 +153,22 @@ class Lita::Handlers::Markov
|
|
157
153
|
.select { |w| !w.empty? }
|
158
154
|
end
|
159
155
|
|
160
|
-
PUNCTUATION = [',', '.', '!', '?']
|
161
|
-
|
162
156
|
# Don't allow anything besides letters, digits, whitespace, and puncutation
|
163
|
-
|
157
|
+
NON_WORD_CHARACTERS = /[^\w\d'"“”’:+-]/
|
164
158
|
|
165
159
|
HYPERLINKS = /http[^\s]+/
|
166
160
|
SIMPLE_CODE_BLOCK = /`[^`]+`/
|
167
161
|
EXTENDED_CODE_BLOCK = /```.+```/m
|
168
|
-
|
169
|
-
REPEATED_PUNCTUATION = /([.!?])[.!?]+/
|
170
|
-
BASIC_PUNCTUATION = /([;,.!?])/
|
171
|
-
|
162
|
+
REPEATED_WHITESPACE = /\s+/
|
172
163
|
|
173
164
|
def sanitize_string string
|
174
165
|
string = string
|
175
166
|
.gsub(HYPERLINKS, ''.freeze) # Remove any hyperlinks
|
176
167
|
.gsub(SIMPLE_CODE_BLOCK, ''.freeze) # Remove code blocks and illegal characters
|
177
168
|
.gsub(EXTENDED_CODE_BLOCK, ''.freeze)
|
178
|
-
.gsub(
|
179
|
-
.gsub(
|
180
|
-
.gsub(BASIC_PUNCTUATION, '\1 '.freeze) # Put whitespace after punctuation for proper separation
|
169
|
+
.gsub(NON_WORD_CHARACTERS, ' '.freeze) # Convert non-word characters into whitespace
|
170
|
+
.gsub(REPEATED_WHITESPACE, ' '.freeze) # Convert repeated whitespace into just single spaces
|
181
171
|
.strip()
|
182
|
-
|
183
|
-
ends_with_punctuation = PUNCTUATION.any? { |p| string.end_with? p }
|
184
|
-
string = string+'.'.freeze unless ends_with_punctuation
|
185
|
-
|
186
|
-
string
|
187
172
|
end
|
188
173
|
end
|
189
174
|
end
|
data/lita-markov.gemspec
CHANGED