lita-markov 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/lita/handlers/markov.rb +66 -11
- data/lib/lita/handlers/markov/engine.rb +17 -10
- data/lita-markov.gemspec +2 -1
- data/templates/backlog_form.erb +8 -0
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34357a93d6cd917570538738c72f1bc0d301b78b
|
4
|
+
data.tar.gz: 4fa6a56d9c25baf00d141bb9d593bc11bf3fc469
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a139b7b1e4618b420addacdaf0732885d02defcbc0c741186c892efb85b7d9dba0ea230b6dffd544903cac599f257f73624158340cf7baf10cf1cb745f84189a
|
7
|
+
data.tar.gz: 7737301aa793730960cc142892586d64d6a8e5310760fe4ad868d792610d04240ba253f2bd69c24148c3c5be2cd842efa4d9bce7fbef8a698c111c3aac2c682a
|
data/lib/lita/handlers/markov.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
# Forward definition of Markov handler class
|
2
2
|
class Lita::Handlers::Markov < Lita::Handler; end
|
3
3
|
|
4
|
+
require 'oj'
|
4
5
|
require 'lita/handlers/markov/engine'
|
5
6
|
|
6
7
|
module Lita::Handlers
|
7
8
|
class Markov
|
9
|
+
template_root File.expand_path('../../../../templates', __FILE__)
|
10
|
+
|
8
11
|
config :database_url, type: String, required: true
|
9
12
|
|
10
13
|
route(/.+/, :ingest, command: false)
|
@@ -13,8 +16,15 @@ module Lita::Handlers
|
|
13
16
|
'markov USER' => 'Generate a markov chain from the given user.'
|
14
17
|
})
|
15
18
|
|
19
|
+
http.get '/markov/backlog', :backlog_form
|
20
|
+
http.post '/markov/upload_backlog', :upload_backlog
|
21
|
+
|
22
|
+
# Share the engine instance between all instances of the bot
|
23
|
+
def self.engine(instance)
|
24
|
+
@engine ||= Engine.new(instance.config.database_url)
|
25
|
+
end
|
16
26
|
def engine
|
17
|
-
|
27
|
+
self.class.engine(self)
|
18
28
|
end
|
19
29
|
|
20
30
|
def ingest(chat)
|
@@ -47,22 +57,67 @@ module Lita::Handlers
|
|
47
57
|
end
|
48
58
|
end
|
49
59
|
|
50
|
-
def
|
51
|
-
|
60
|
+
def backlog_form(request, response)
|
61
|
+
render_backlog_form response
|
52
62
|
end
|
53
63
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
57
|
-
|
64
|
+
def upload_backlog(request, response)
|
65
|
+
t0 = Time.now
|
66
|
+
|
67
|
+
response.headers['Content-Type'] = 'text/plain'
|
68
|
+
|
69
|
+
multipart = Rack::Multipart.parse_multipart request.env
|
70
|
+
tempfile = multipart.values.first[:tempfile]
|
71
|
+
|
72
|
+
begin
|
73
|
+
messages = Oj.load File.read(tempfile.path).strip, :mode => :strict
|
74
|
+
rescue Oj::ParseError => error
|
75
|
+
response.write error.message
|
76
|
+
return
|
77
|
+
end
|
78
|
+
|
79
|
+
messages.select! { |m| m['type'] == 'message' }
|
58
80
|
|
59
|
-
|
81
|
+
users = {}
|
82
|
+
find_user = proc do |id|
|
83
|
+
users[id] ||= Lita::User.fuzzy_find id
|
84
|
+
end
|
85
|
+
|
86
|
+
meta_tag_regex = /<(\w|[!|@])+>/
|
87
|
+
|
88
|
+
count = 0
|
89
|
+
messages.each do |message|
|
90
|
+
count += 1
|
91
|
+
|
92
|
+
begin
|
93
|
+
text = message['text'.freeze]
|
94
|
+
next unless text
|
95
|
+
|
96
|
+
user = find_user.call message['user'.freeze]
|
97
|
+
unless user
|
98
|
+
response.write "User not found for message ##{count}: #{message['user']}\n"
|
99
|
+
next
|
100
|
+
end
|
60
101
|
|
61
|
-
|
102
|
+
message = text.gsub meta_tag_regex, ''.freeze
|
103
|
+
|
104
|
+
engine.ingest user.id, message
|
105
|
+
|
106
|
+
if count % 1000 == 0
|
107
|
+
response.write "Processed #{count} messages\n"
|
108
|
+
end
|
109
|
+
rescue => error
|
110
|
+
response.write "Error writing message ##{count}: #{error.inspect}\n"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
response.write "Processed #{count} total messages in #{Time.now - t0} seconds\n"
|
62
115
|
end
|
63
116
|
|
64
|
-
|
65
|
-
|
117
|
+
private
|
118
|
+
|
119
|
+
def render_backlog_form(response)
|
120
|
+
response.write render_template('backlog_form')
|
66
121
|
end
|
67
122
|
|
68
123
|
Lita.register_handler self
|
@@ -146,35 +146,42 @@ class Lita::Handlers::Markov
|
|
146
146
|
chain
|
147
147
|
end
|
148
148
|
|
149
|
+
STRING_SEPARATOR = /([.!?])|\s+/
|
150
|
+
|
149
151
|
def separate_string string
|
150
152
|
# Including the punctuation in group so they'll be included in the
|
151
153
|
# split results
|
152
154
|
string
|
153
|
-
.split(
|
154
|
-
.map { |w| w.strip }
|
155
|
+
.split(STRING_SEPARATOR)
|
156
|
+
.map { |w| w.strip!; w }
|
155
157
|
.select { |w| !w.empty? }
|
156
158
|
end
|
157
159
|
|
158
|
-
PUNCTUATION = ['.', '!', '?']
|
160
|
+
PUNCTUATION = [',', '.', '!', '?']
|
159
161
|
|
160
162
|
# Don't allow anything besides letters, digits, whitespace, and puncutation
|
161
163
|
ILLEGAL_CHARACTERS = /[^\w\d\s:;,.!?#@]/
|
162
164
|
|
165
|
+
HYPERLINKS = /http[^\s]+/
|
163
166
|
SIMPLE_CODE_BLOCK = /`[^`]+`/
|
164
167
|
EXTENDED_CODE_BLOCK = /```.+```/m
|
165
168
|
|
169
|
+
REPEATED_PUNCTUATION = /([.!?])[.!?]+/
|
170
|
+
BASIC_PUNCTUATION = /([;,.!?])/
|
171
|
+
|
172
|
+
|
166
173
|
def sanitize_string string
|
167
174
|
string = string
|
168
|
-
.
|
169
|
-
.gsub(
|
170
|
-
.gsub(
|
171
|
-
.gsub(
|
172
|
-
.gsub(
|
173
|
-
.gsub(
|
175
|
+
.gsub(HYPERLINKS, ''.freeze) # Remove any hyperlinks
|
176
|
+
.gsub(SIMPLE_CODE_BLOCK, ''.freeze) # Remove code blocks and illegal characters
|
177
|
+
.gsub(EXTENDED_CODE_BLOCK, ''.freeze)
|
178
|
+
.gsub(ILLEGAL_CHARACTERS, ''.freeze)
|
179
|
+
.gsub(REPEATED_PUNCTUATION, '\1'.freeze) # Trim down repeated punctuation
|
180
|
+
.gsub(BASIC_PUNCTUATION, '\1 '.freeze) # Put whitespace after punctuation for proper separation
|
174
181
|
.strip()
|
175
182
|
|
176
183
|
ends_with_punctuation = PUNCTUATION.any? { |p| string.end_with? p }
|
177
|
-
string = string+'.' unless ends_with_punctuation
|
184
|
+
string = string+'.'.freeze unless ends_with_punctuation
|
178
185
|
|
179
186
|
string
|
180
187
|
end
|
data/lita-markov.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "lita-markov"
|
3
|
-
spec.version = "1.0
|
3
|
+
spec.version = "1.1.0"
|
4
4
|
spec.authors = ["Dirk Gadsden"]
|
5
5
|
spec.email = ["dirk@dirk.to"]
|
6
6
|
spec.description = "Markov chains for Lita."
|
@@ -17,6 +17,7 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.add_runtime_dependency "sequel", "~> 4.28.0"
|
18
18
|
spec.add_runtime_dependency "mysql2", "~> 0.4.1"
|
19
19
|
spec.add_runtime_dependency "pg", "~> 0.18.4"
|
20
|
+
spec.add_runtime_dependency "oj", "~> 2.13.1"
|
20
21
|
|
21
22
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
23
|
spec.add_development_dependency "pry-byebug"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lita-markov
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dirk Gadsden
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 0.18.4
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: oj
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 2.13.1
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 2.13.1
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: bundler
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -157,6 +171,7 @@ files:
|
|
157
171
|
- spec/lita/handlers/markov_spec.rb
|
158
172
|
- spec/spec_helper.rb
|
159
173
|
- templates/.gitkeep
|
174
|
+
- templates/backlog_form.erb
|
160
175
|
homepage: http://github.com/dirk/lita-markov
|
161
176
|
licenses: []
|
162
177
|
metadata:
|
@@ -177,7 +192,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
177
192
|
version: '0'
|
178
193
|
requirements: []
|
179
194
|
rubyforge_project:
|
180
|
-
rubygems_version: 2.4.5
|
195
|
+
rubygems_version: 2.4.5.1
|
181
196
|
signing_key:
|
182
197
|
specification_version: 4
|
183
198
|
summary: Markov chains for Lita.
|