lita-markov 0.0.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 80906c8495bb1dcbbc33b970047390e606b429c8
4
- data.tar.gz: 85f84a17b45289e6c86334318322eebba6724b8f
3
+ metadata.gz: 627ba42fa0054e9a6cd2d46bbde5cb6510f1b985
4
+ data.tar.gz: c53c2b0f565930850bd0a60085d6d02c69128eee
5
5
  SHA512:
6
- metadata.gz: f7c18adeba628b7e6cc48a22922fd745595104ce6846c919b773ab475e7c22b4d2e852d747055adadaec74b0dc881cab36254b305ab4c2df179c6a77e1b118e3
7
- data.tar.gz: e4fce6fb0f3d70d04d2153fb202d1740c223abadc475a18a6629ba5080f211219dc58a0817bd36d58d6529801b8da7a2eaeb700585b7bb69d7ff1c20daf45145
6
+ metadata.gz: b634fa615bb4960166da153db753ea5c8f05f9dece26a856626fbdd8b73be6ee887ce559a55fdfa17d215085e9d61b9e62c40d65a89630ef813fba33a756c7d5
7
+ data.tar.gz: 885beb293a6f002273be56e374700e56d2739bd451076ccffc5114d7a261cc9aee76c4e6fabb379dc564d62301fe9ab7c0a15d9c9cf6cf3659ba5fe0c1ea682f
data/.gitignore CHANGED
@@ -1,2 +1,3 @@
1
+ .DS_Store
1
2
  Gemfile.lock
2
3
  tmp
data/README.md CHANGED
@@ -11,6 +11,17 @@ Add `lita-markov` to your Lita instance's Gemfile:
11
11
  gem 'lita-markov'
12
12
  ```
13
13
 
14
+ Configure the database URL for your SQL database
15
+ ([Sequel](http://sequel.jeremyevans.net/) is used for
16
+ communicating with databases):
17
+
18
+ ```ruby
19
+ # lita_config.rb
20
+ Lita.configure do |config|
21
+ config.handlers.markov.database_url = ENV['DATABASE_URL']
22
+ end
23
+ ```
24
+
14
25
  ## Usage
15
26
 
16
27
  The bot will automatically ingest all messages into the Redis-backed Markov
@@ -0,0 +1,172 @@
1
+ require 'sequel'
2
+
3
+ class Lita::Handlers::Markov
4
+ class Engine
5
+ class EmptyDictionaryError < StandardError; end
6
+
7
+ # Default development database URL
8
+ DEFAULT_DATABASE_URL = 'mysql2://root@localhost/lita-markov'
9
+
10
+ attr_accessor :handler
11
+ attr_reader :db
12
+
13
+ def initialize(handler = nil)
14
+ @handler = handler
15
+ @depth = 2
16
+
17
+ database_url = DEFAULT_DATABASE_URL
18
+ database_url ||= handler.config.database_url if handler
19
+
20
+ @db = Sequel.connect database_url
21
+
22
+ @db.create_table?(:dictionary) do
23
+ column :user, String, null: false # The user the states are associated with
24
+ column :current_state, String, null: false # Word(s) the user has "said"
25
+ column :next_state, String, null: false # Word that follows that word
26
+ column :frequency, Integer, null: false # Frequency that the next word follows the current state/word
27
+
28
+ primary_key [:user, :current_state, :next_state]
29
+ end
30
+ end
31
+
32
+ # user - Username of the user
33
+ # string - String of words that the user has just said (ideally a sentence)
34
+ def ingest user, string
35
+ string = sanitize_string string
36
+ words = separate_string string
37
+
38
+ return if words.length == 0
39
+
40
+ # Capitalize the first word
41
+ words = [words[0].capitalize] + words.slice(1..-1)
42
+
43
+ # Iterate over it one step at a time in sets of `@depth + 1`
44
+ words.each_cons(@depth + 1) do |words|
45
+ current_state = words[0]+' '+words[1]
46
+ next_state = words[2]
47
+
48
+ add_entry user, current_state, next_state
49
+ end # words.each_cons
50
+ end # def ingest
51
+
52
+ def add_entry user, current_state, next_state
53
+ dictionary = @db[:dictionary]
54
+
55
+ @db.transaction do
56
+ entry = {
57
+ user: user,
58
+ current_state: current_state,
59
+ next_state: next_state
60
+ }
61
+
62
+ if dictionary.where(entry).any?
63
+ # Entry is already present, so increment its frequency
64
+ frequency = dictionary.where(entry).get(:frequency)
65
+
66
+ dictionary.where(entry).update frequency: frequency + 1
67
+ else
68
+ dictionary.insert entry.merge(frequency: 1)
69
+ end
70
+ end
71
+ end
72
+
73
+ def random_capitalized_word
74
+ states = @db[:dictionary].map(:current_state)
75
+
76
+ capitalized_states = states.select do |state|
77
+ /^[A-Z]/ =~ state
78
+ end
79
+
80
+ if capitalized_states.length > 0
81
+ state = capitalized_states.sample
82
+ else
83
+ state = states.sample
84
+ end
85
+
86
+ raise EmptyDictionaryError, 'No data for user' if state.nil?
87
+
88
+ return state.split(' ').first
89
+ end
90
+
91
+ def random_second_word(first_word)
92
+ states = @db[:dictionary]
93
+ .where(Sequel.like(:current_state, first_word+'%'))
94
+ .map(:current_state)
95
+
96
+ state = states.sample
97
+ state.split(' ').last
98
+ end
99
+
100
+ def is_punctuation?(string)
101
+ PUNCTUATION.any? { |p| string.end_with? p }
102
+ end
103
+
104
+ def get_next_state(user, current_state)
105
+ states = @db[:dictionary]
106
+ .where(user: user, current_state: current_state)
107
+ .select(:next_state, :frequency)
108
+ .all
109
+
110
+ distribution = states.flat_map do |state|
111
+ Array.new(state[:frequency]) { state[:next_state] }
112
+ end
113
+
114
+ distribution.sample
115
+ end
116
+
117
+ def generate_sentence_for(user, length = 30)
118
+ first_word = random_capitalized_word
119
+ second_word = random_second_word first_word
120
+
121
+ sentence = [first_word, second_word]
122
+
123
+ while sentence.length < length
124
+ current_state = sentence.slice(sentence.length - @depth, @depth).join ' '
125
+
126
+ next_state = get_next_state user, current_state
127
+
128
+ # Stop if we failed to find a next state
129
+ break if next_state.nil?
130
+
131
+ sentence << next_state
132
+
133
+ break if is_punctuation? next_state
134
+ end
135
+
136
+ sentence.slice(0..-2).join(' ') + sentence.last
137
+ end
138
+
139
+ def separate_string string
140
+ # Including the punctuation in group so they'll be included in the
141
+ # split results
142
+ string
143
+ .split(/([.!?])|\s+/)
144
+ .map { |w| w.strip }
145
+ .select { |w| !w.empty? }
146
+ end
147
+
148
+ PUNCTUATION = ['.', '!', '?']
149
+
150
+ # Don't allow anything besides letters, digits, whitespace, and puncutation
151
+ ILLEGAL_CHARACTERS = /[^\w\d\s:;,.!?#@]/
152
+
153
+ SIMPLE_CODE_BLOCK = /`[^`]+`/
154
+ EXTENDED_CODE_BLOCK = /```.+```/m
155
+
156
+ def sanitize_string string
157
+ string = string
158
+ .strip()
159
+ .gsub(/http[^\s]+/, '') # Remove any hyperlinks
160
+ .gsub(SIMPLE_CODE_BLOCK, '') # Remove code blocks and illegal characters
161
+ .gsub(EXTENDED_CODE_BLOCK, '')
162
+ .gsub(ILLEGAL_CHARACTERS, '')
163
+ .gsub(/([:;,.!?])/, '\1 ') # Put whitespace after punctuation for proper separation
164
+ .strip()
165
+
166
+ ends_with_punctuation = PUNCTUATION.any? { |p| string.end_with? p }
167
+ string = string+'.' unless ends_with_punctuation
168
+
169
+ string
170
+ end
171
+ end
172
+ end
@@ -1,8 +1,13 @@
1
+ # Forward definition of Markov handler class
2
+ class Lita::Handlers::Markov < Lita::Handler; end
3
+
4
+ require 'lita/handlers/markov/engine'
5
+
1
6
  module Lita::Handlers
2
- class Markov < Lita::Handler
3
- Dictionary = MarkyMarkov::PersistentJSONDictionary
7
+ class Markov
8
+ attr_reader :engine
4
9
 
5
- REDIS_KEY_PREFIX = 'lita-markov:'
10
+ config :database_url
6
11
 
7
12
  route(/.+/, :ingest, command: false)
8
13
 
@@ -10,6 +15,12 @@ module Lita::Handlers
10
15
  'markov USER' => 'Generate a markov chain from the given user.'
11
16
  })
12
17
 
18
+ def initialize(robot)
19
+ super(robot)
20
+
21
+ @engine = Engine.new self
22
+ end
23
+
13
24
  def ingest(chat)
14
25
  # Don't ingest messages addressed to ourselves
15
26
  return if chat.command?
@@ -17,26 +28,20 @@ module Lita::Handlers
17
28
  message = chat.matches[0].strip
18
29
 
19
30
  # Get the mention name (ie. 'dirk') of the user
20
- name = chat.user.mention_name
21
- dictionary = dictionary_for_user name
22
-
23
- # Passing `false` to indicate it's a string and not a file name
24
- dictionary.parse_source message, false
31
+ id = chat.user.id
25
32
 
26
- save_dictionary name, dictionary
33
+ @engine.ingest id, message
27
34
  end
28
35
 
29
36
  def generate(chat)
30
37
  name = chat.matches[0][0].strip
31
-
32
- dictionary = dictionary_for_user name
33
- generator = MarkovSentenceGenerator.new dictionary
38
+ id = Lita::User.fuzzy_find(name).id
34
39
 
35
40
  begin
36
- sentence = generator.generate_sentence 1
41
+ sentence = @engine.generate_sentence_for id
37
42
 
38
43
  chat.reply sentence
39
- rescue EmptyDictionaryError
44
+ rescue Engine::EmptyDictionaryError
40
45
  chat.reply "Looks like #{name} hasn't said anything!"
41
46
  end
42
47
  end
data/lib/lita-markov.rb CHANGED
@@ -4,7 +4,6 @@ Lita.load_locales Dir[File.expand_path(
4
4
  File.join("..", "..", "locales", "*.yml"), __FILE__
5
5
  )]
6
6
 
7
- require 'marky_markov/persistent_json_dictionary'
8
7
  require 'lita/handlers/markov'
9
8
 
10
9
  # Lita::Handlers::Markov.template_root File.expand_path(
data/lita-markov.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "lita-markov"
3
- spec.version = "0.0.1"
3
+ spec.version = "1.0.0"
4
4
  spec.authors = ["Dirk Gadsden"]
5
5
  spec.email = ["dirk@dirk.to"]
6
6
  spec.description = "Markov chains for Lita."
@@ -13,9 +13,10 @@ Gem::Specification.new do |spec|
13
13
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
14
14
  spec.require_paths = ["lib"]
15
15
 
16
- spec.add_runtime_dependency "lita", ">= 4.6"
17
- spec.add_runtime_dependency "marky_markov", "~> 0.3.5"
18
- spec.add_runtime_dependency "oj", "~> 2.13.1"
16
+ spec.add_runtime_dependency "lita", ">= 4.6"
17
+ spec.add_runtime_dependency "sequel", "~> 4.28.0"
18
+ spec.add_runtime_dependency "mysql2", "~> 0.4.1"
19
+ spec.add_runtime_dependency "pg", "~> 0.18.4"
19
20
 
20
21
  spec.add_development_dependency "bundler", "~> 1.3"
21
22
  spec.add_development_dependency "pry-byebug"
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+ require 'pry'
3
+
4
+ describe Lita::Handlers::Markov::Engine do
5
+ before(:each) do
6
+ subject.db[:dictionary].delete
7
+ end
8
+
9
+ it 'will sanitize links from a message' do
10
+ message = 'hello https://www.example.com world!'
11
+
12
+ expect(subject.sanitize_string(message)).to eql 'hello world!'
13
+ end
14
+
15
+ it 'will remove code blocks from a message' do
16
+ message = 'I have `code in` me.'
17
+
18
+ expect(subject.sanitize_string(message)).to eql 'I have me.'
19
+ end
20
+
21
+ it 'will remove illegal characters from a message' do
22
+ message = 'I have a bad % character.'
23
+
24
+ expect(subject.sanitize_string(message)).to eql 'I have a bad character.'
25
+ end
26
+
27
+ it 'will separate a string into words' do
28
+ string = "I am\n so totally\tseparated."
29
+
30
+ expect(subject.separate_string(string)).to eql ['I', 'am', 'so', 'totally', 'separated', '.']
31
+ end
32
+
33
+ it 'will ingest messages' do
34
+ dictionary = subject.db[:dictionary]
35
+
36
+ subject.ingest('user', 'hello big, fun world!')
37
+
38
+ # Check that the first state made it in and is capitalized
39
+ expect(dictionary.where(current_state: 'Hello big,').count).to eql 1
40
+ # Check that the last state made it in
41
+ expect(dictionary.where(current_state: 'fun world', next_state: '!').count).to eql 1
42
+
43
+ subject.ingest('user', 'Hello big, fun planet!')
44
+
45
+ # Check that the frequency of the "Hello big," -> "fun" state went up
46
+ expect(dictionary.where(current_state: 'Hello big,', next_state: 'fun').get(:frequency)).to eql 2
47
+ end
48
+
49
+ it 'will generate a sentence' do
50
+ subject.ingest('user', 'Hello cruel world.')
51
+
52
+ expect(subject.generate_sentence_for 'user').to eql 'Hello cruel world.'
53
+ end
54
+ end
@@ -3,7 +3,7 @@ require 'pry'
3
3
 
4
4
  describe Lita::Handlers::Markov, lita_handler: true do
5
5
  before(:each) do
6
- Lita.redis.flushall
6
+ subject.engine.db[:dictionary].delete
7
7
  end
8
8
 
9
9
  it "won't call #ingest for non-command messages" do
@@ -13,16 +13,6 @@ describe Lita::Handlers::Markov, lita_handler: true do
13
13
  send_command 'bar'
14
14
  end
15
15
 
16
- it "will ingest a message into that person's dictionary" do
17
- send_message 'hello markov world'
18
- send_message 'hello markov planet'
19
-
20
- dictionary = subject.dictionary_for_user user.mention_name
21
-
22
- # Check that the messages made it into the dictionary
23
- expect(dictionary.dictionary[['hello', 'markov']]).to eql ['world', 'planet']
24
- end
25
-
26
16
  it 'will build a sentence' do
27
17
  send_message 'I love cookies!'
28
18
  send_message 'I love pancakes!'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lita-markov
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dirk Gadsden
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-23 00:00:00.000000000 Z
11
+ date: 2015-11-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: lita
@@ -25,33 +25,47 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '4.6'
27
27
  - !ruby/object:Gem::Dependency
28
- name: marky_markov
28
+ name: sequel
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.3.5
33
+ version: 4.28.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.3.5
40
+ version: 4.28.0
41
41
  - !ruby/object:Gem::Dependency
42
- name: oj
42
+ name: mysql2
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 2.13.1
47
+ version: 0.4.1
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 2.13.1
54
+ version: 0.4.1
55
+ - !ruby/object:Gem::Dependency
56
+ name: pg
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.18.4
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.18.4
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: bundler
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -136,11 +150,11 @@ files:
136
150
  - Rakefile
137
151
  - lib/lita-markov.rb
138
152
  - lib/lita/handlers/markov.rb
139
- - lib/marky_markov/persistent_json_dictionary.rb
153
+ - lib/lita/handlers/markov/engine.rb
140
154
  - lita-markov.gemspec
141
155
  - locales/en.yml
156
+ - spec/lita/handlers/markov/engine_spec.rb
142
157
  - spec/lita/handlers/markov_spec.rb
143
- - spec/marky_markov/persistent_json_dictionary_spec.rb
144
158
  - spec/spec_helper.rb
145
159
  - templates/.gitkeep
146
160
  homepage: http://github.com/dirk/lita-markov
@@ -163,11 +177,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
177
  version: '0'
164
178
  requirements: []
165
179
  rubyforge_project:
166
- rubygems_version: 2.4.5.1
180
+ rubygems_version: 2.4.5
167
181
  signing_key:
168
182
  specification_version: 4
169
183
  summary: Markov chains for Lita.
170
184
  test_files:
185
+ - spec/lita/handlers/markov/engine_spec.rb
171
186
  - spec/lita/handlers/markov_spec.rb
172
- - spec/marky_markov/persistent_json_dictionary_spec.rb
173
187
  - spec/spec_helper.rb
@@ -1,34 +0,0 @@
1
- require 'marky_markov'
2
- require 'oj'
3
-
4
- module MarkyMarkov
5
- class PersistentJSONDictionary < ::PersistentDictionary
6
- def initialize(*args)
7
- super(*args)
8
-
9
- @dictionary = {}
10
- @capitalized_words = []
11
- end
12
-
13
- # No-op instead of reading from the filesystem
14
- def open_dictionary
15
- nil
16
- end
17
-
18
- def load_json(json)
19
- data = Oj.load json
20
-
21
- @depth = data['depth']
22
- @dictionary = data['dictionary']
23
- @capitalized_words = data['capitalized_words']
24
- end
25
-
26
- def to_json
27
- Oj.dump(
28
- 'depth' => @depth,
29
- 'dictionary' => @dictionary,
30
- 'capitalized_words' => @capitalized_words
31
- )
32
- end
33
- end
34
- end
@@ -1,28 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe MarkyMarkov::PersistentJSONDictionary do
4
- subject { MarkyMarkov::PersistentJSONDictionary.new 'whoa' }
5
-
6
- it 'initializes with sensible defaults' do
7
- expect(subject.dictionary).to eq({})
8
- end
9
-
10
- it 'saves a dictionary' do
11
- subject.add_word ['a', 'b'], 'c'
12
-
13
- json = subject.to_json
14
-
15
- expect(json).to eql '{"depth":2,"dictionary":{"^#1":[["a","b"],["c"]]},"capitalized_words":[]}'
16
- end
17
-
18
- it 'saves and loads a dictionary' do
19
- subject.add_word ['a', 'b'], 'c'
20
-
21
- json = subject.to_json
22
-
23
- new_dictionary = MarkyMarkov::PersistentJSONDictionary.new 'whoa-another-one'
24
- new_dictionary.load_json json
25
-
26
- expect(new_dictionary.dictionary).to eql(['a', 'b'] => ['c'])
27
- end
28
- end