lita-markov 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7b5c291ff7b19be493ce427ed3ef9d49eac2ba4d
4
- data.tar.gz: bec9e862f464c5ed4daeeea15adf946bb22335b7
3
+ metadata.gz: 34357a93d6cd917570538738c72f1bc0d301b78b
4
+ data.tar.gz: 4fa6a56d9c25baf00d141bb9d593bc11bf3fc469
5
5
  SHA512:
6
- metadata.gz: 5f82af9abdb8fcd6c6413e6f384cf884034d34b4be5a63a6e6f28357d9465a02b81a215447ea16196730aca584ab7bb229b4e8de03bcdcc4615ab9d94c9ab722
7
- data.tar.gz: b7bc2ad646600648117a2013986d56a02ad803cba950e8e81167f429f688742c5fe08625ae53fa294e507c041f9bbb09500a30d9c8ad2f144bf423fd4f0d6ca8
6
+ metadata.gz: a139b7b1e4618b420addacdaf0732885d02defcbc0c741186c892efb85b7d9dba0ea230b6dffd544903cac599f257f73624158340cf7baf10cf1cb745f84189a
7
+ data.tar.gz: 7737301aa793730960cc142892586d64d6a8e5310760fe4ad868d792610d04240ba253f2bd69c24148c3c5be2cd842efa4d9bce7fbef8a698c111c3aac2c682a
@@ -1,10 +1,13 @@
1
1
  # Forward definition of Markov handler class
2
2
  class Lita::Handlers::Markov < Lita::Handler; end
3
3
 
4
+ require 'oj'
4
5
  require 'lita/handlers/markov/engine'
5
6
 
6
7
  module Lita::Handlers
7
8
  class Markov
9
+ template_root File.expand_path('../../../../templates', __FILE__)
10
+
8
11
  config :database_url, type: String, required: true
9
12
 
10
13
  route(/.+/, :ingest, command: false)
@@ -13,8 +16,15 @@ module Lita::Handlers
13
16
  'markov USER' => 'Generate a markov chain from the given user.'
14
17
  })
15
18
 
19
+ http.get '/markov/backlog', :backlog_form
20
+ http.post '/markov/upload_backlog', :upload_backlog
21
+
22
+ # Share the engine instance between all instances of the bot
23
+ def self.engine(instance)
24
+ @engine ||= Engine.new(instance.config.database_url)
25
+ end
16
26
  def engine
17
- @engine ||= Engine.new config.database_url
27
+ self.class.engine(self)
18
28
  end
19
29
 
20
30
  def ingest(chat)
@@ -47,22 +57,67 @@ module Lita::Handlers
47
57
  end
48
58
  end
49
59
 
50
- def save_dictionary(name, dictionary)
51
- redis.set key_for_user(name), dictionary.to_json
60
+ def backlog_form(request, response)
61
+ render_backlog_form response
52
62
  end
53
63
 
54
- def dictionary_for_user(name)
55
- key = key_for_user name
56
- dictionary = Dictionary.new name
57
- json = redis.get key
64
+ def upload_backlog(request, response)
65
+ t0 = Time.now
66
+
67
+ response.headers['Content-Type'] = 'text/plain'
68
+
69
+ multipart = Rack::Multipart.parse_multipart request.env
70
+ tempfile = multipart.values.first[:tempfile]
71
+
72
+ begin
73
+ messages = Oj.load File.read(tempfile.path).strip, :mode => :strict
74
+ rescue Oj::ParseError => error
75
+ response.write error.message
76
+ return
77
+ end
78
+
79
+ messages.select! { |m| m['type'] == 'message' }
58
80
 
59
- dictionary.load_json(json) if json
81
+ users = {}
82
+ find_user = proc do |id|
83
+ users[id] ||= Lita::User.fuzzy_find id
84
+ end
85
+
86
+ meta_tag_regex = /<(\w|[!|@])+>/
87
+
88
+ count = 0
89
+ messages.each do |message|
90
+ count += 1
91
+
92
+ begin
93
+ text = message['text'.freeze]
94
+ next unless text
95
+
96
+ user = find_user.call message['user'.freeze]
97
+ unless user
98
+ response.write "User not found for message ##{count}: #{message['user']}\n"
99
+ next
100
+ end
60
101
 
61
- dictionary
102
+ message = text.gsub meta_tag_regex, ''.freeze
103
+
104
+ engine.ingest user.id, message
105
+
106
+ if count % 1000 == 0
107
+ response.write "Processed #{count} messages\n"
108
+ end
109
+ rescue => error
110
+ response.write "Error writing message ##{count}: #{error.inspect}\n"
111
+ end
112
+ end
113
+
114
+ response.write "Processed #{count} total messages in #{Time.now - t0} seconds\n"
62
115
  end
63
116
 
64
- def key_for_user(name)
65
- REDIS_KEY_PREFIX+name.downcase
117
+ private
118
+
119
+ def render_backlog_form(response)
120
+ response.write render_template('backlog_form')
66
121
  end
67
122
 
68
123
  Lita.register_handler self
@@ -146,35 +146,42 @@ class Lita::Handlers::Markov
146
146
  chain
147
147
  end
148
148
 
149
+ STRING_SEPARATOR = /([.!?])|\s+/
150
+
149
151
  def separate_string string
150
152
  # Including the punctuation in group so they'll be included in the
151
153
  # split results
152
154
  string
153
- .split(/([.!?])|\s+/)
154
- .map { |w| w.strip }
155
+ .split(STRING_SEPARATOR)
156
+ .map { |w| w.strip!; w }
155
157
  .select { |w| !w.empty? }
156
158
  end
157
159
 
158
- PUNCTUATION = ['.', '!', '?']
160
+ PUNCTUATION = [',', '.', '!', '?']
159
161
 
160
162
  # Don't allow anything besides letters, digits, whitespace, and puncutation
161
163
  ILLEGAL_CHARACTERS = /[^\w\d\s:;,.!?#@]/
162
164
 
165
+ HYPERLINKS = /http[^\s]+/
163
166
  SIMPLE_CODE_BLOCK = /`[^`]+`/
164
167
  EXTENDED_CODE_BLOCK = /```.+```/m
165
168
 
169
+ REPEATED_PUNCTUATION = /([.!?])[.!?]+/
170
+ BASIC_PUNCTUATION = /([;,.!?])/
171
+
172
+
166
173
  def sanitize_string string
167
174
  string = string
168
- .strip()
169
- .gsub(/http[^\s]+/, '') # Remove any hyperlinks
170
- .gsub(SIMPLE_CODE_BLOCK, '') # Remove code blocks and illegal characters
171
- .gsub(EXTENDED_CODE_BLOCK, '')
172
- .gsub(ILLEGAL_CHARACTERS, '')
173
- .gsub(/([:;,.!?])/, '\1 ') # Put whitespace after punctuation for proper separation
175
+ .gsub(HYPERLINKS, ''.freeze) # Remove any hyperlinks
176
+ .gsub(SIMPLE_CODE_BLOCK, ''.freeze) # Remove code blocks and illegal characters
177
+ .gsub(EXTENDED_CODE_BLOCK, ''.freeze)
178
+ .gsub(ILLEGAL_CHARACTERS, ''.freeze)
179
+ .gsub(REPEATED_PUNCTUATION, '\1'.freeze) # Trim down repeated punctuation
180
+ .gsub(BASIC_PUNCTUATION, '\1 '.freeze) # Put whitespace after punctuation for proper separation
174
181
  .strip()
175
182
 
176
183
  ends_with_punctuation = PUNCTUATION.any? { |p| string.end_with? p }
177
- string = string+'.' unless ends_with_punctuation
184
+ string = string+'.'.freeze unless ends_with_punctuation
178
185
 
179
186
  string
180
187
  end
data/lita-markov.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "lita-markov"
3
- spec.version = "1.0.2"
3
+ spec.version = "1.1.0"
4
4
  spec.authors = ["Dirk Gadsden"]
5
5
  spec.email = ["dirk@dirk.to"]
6
6
  spec.description = "Markov chains for Lita."
@@ -17,6 +17,7 @@ Gem::Specification.new do |spec|
17
17
  spec.add_runtime_dependency "sequel", "~> 4.28.0"
18
18
  spec.add_runtime_dependency "mysql2", "~> 0.4.1"
19
19
  spec.add_runtime_dependency "pg", "~> 0.18.4"
20
+ spec.add_runtime_dependency "oj", "~> 2.13.1"
20
21
 
21
22
  spec.add_development_dependency "bundler", "~> 1.3"
22
23
  spec.add_development_dependency "pry-byebug"
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <body>
3
+ <form action="/markov/upload_backlog" method="post" enctype="multipart/form-data">
4
+ <h3>Select Slack JSON archive to upload:</h3>
5
+ <p><input type="file" name="backlog" /></p>
6
+ <p><input type="submit" /></p>
7
+ </form>
8
+ </html>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lita-markov
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dirk Gadsden
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: 0.18.4
69
+ - !ruby/object:Gem::Dependency
70
+ name: oj
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 2.13.1
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 2.13.1
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: bundler
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -157,6 +171,7 @@ files:
157
171
  - spec/lita/handlers/markov_spec.rb
158
172
  - spec/spec_helper.rb
159
173
  - templates/.gitkeep
174
+ - templates/backlog_form.erb
160
175
  homepage: http://github.com/dirk/lita-markov
161
176
  licenses: []
162
177
  metadata:
@@ -177,7 +192,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
177
192
  version: '0'
178
193
  requirements: []
179
194
  rubyforge_project:
180
- rubygems_version: 2.4.5
195
+ rubygems_version: 2.4.5.1
181
196
  signing_key:
182
197
  specification_version: 4
183
198
  summary: Markov chains for Lita.