lita-markov 0.0.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 80906c8495bb1dcbbc33b970047390e606b429c8
4
- data.tar.gz: 85f84a17b45289e6c86334318322eebba6724b8f
3
+ metadata.gz: 627ba42fa0054e9a6cd2d46bbde5cb6510f1b985
4
+ data.tar.gz: c53c2b0f565930850bd0a60085d6d02c69128eee
5
5
  SHA512:
6
- metadata.gz: f7c18adeba628b7e6cc48a22922fd745595104ce6846c919b773ab475e7c22b4d2e852d747055adadaec74b0dc881cab36254b305ab4c2df179c6a77e1b118e3
7
- data.tar.gz: e4fce6fb0f3d70d04d2153fb202d1740c223abadc475a18a6629ba5080f211219dc58a0817bd36d58d6529801b8da7a2eaeb700585b7bb69d7ff1c20daf45145
6
+ metadata.gz: b634fa615bb4960166da153db753ea5c8f05f9dece26a856626fbdd8b73be6ee887ce559a55fdfa17d215085e9d61b9e62c40d65a89630ef813fba33a756c7d5
7
+ data.tar.gz: 885beb293a6f002273be56e374700e56d2739bd451076ccffc5114d7a261cc9aee76c4e6fabb379dc564d62301fe9ab7c0a15d9c9cf6cf3659ba5fe0c1ea682f
data/.gitignore CHANGED
@@ -1,2 +1,3 @@
1
+ .DS_Store
1
2
  Gemfile.lock
2
3
  tmp
data/README.md CHANGED
@@ -11,6 +11,17 @@ Add `lita-markov` to your Lita instance's Gemfile:
11
11
  gem 'lita-markov'
12
12
  ```
13
13
 
14
+ Configure the database URL for your SQL database
15
+ ([Sequel](http://sequel.jeremyevans.net/) is used for
16
+ communicating with databases):
17
+
18
+ ```ruby
19
+ # lita_config.rb
20
+ Lita.configure do |config|
21
+ config.handlers.markov.database_url = ENV['DATABASE_URL']
22
+ end
23
+ ```
24
+
14
25
  ## Usage
15
26
 
16
27
  The bot will automatically ingest all messages into the Redis-backed Markov
@@ -0,0 +1,172 @@
1
+ require 'sequel'
2
+
3
+ class Lita::Handlers::Markov
4
+ class Engine
5
+ class EmptyDictionaryError < StandardError; end
6
+
7
+ # Default development database URL
8
+ DEFAULT_DATABASE_URL = 'mysql2://root@localhost/lita-markov'
9
+
10
+ attr_accessor :handler
11
+ attr_reader :db
12
+
13
+ def initialize(handler = nil)
14
+ @handler = handler
15
+ @depth = 2
16
+
17
+ database_url = DEFAULT_DATABASE_URL
18
+ database_url ||= handler.config.database_url if handler
19
+
20
+ @db = Sequel.connect database_url
21
+
22
+ @db.create_table?(:dictionary) do
23
+ column :user, String, null: false # The user the states are associated with
24
+ column :current_state, String, null: false # Word(s) the user has "said"
25
+ column :next_state, String, null: false # Word that follows that word
26
+ column :frequency, Integer, null: false # Frequency that the next word follows the current state/word
27
+
28
+ primary_key [:user, :current_state, :next_state]
29
+ end
30
+ end
31
+
32
+ # user - Username of the user
33
+ # string - String of words that the user has just said (ideally a sentence)
34
+ def ingest user, string
35
+ string = sanitize_string string
36
+ words = separate_string string
37
+
38
+ return if words.length == 0
39
+
40
+ # Capitalize the first word
41
+ words = [words[0].capitalize] + words.slice(1..-1)
42
+
43
+ # Iterate over it one step at a time in sets of `@depth + 1`
44
+ words.each_cons(@depth + 1) do |words|
45
+ current_state = words[0]+' '+words[1]
46
+ next_state = words[2]
47
+
48
+ add_entry user, current_state, next_state
49
+ end # words.each_cons
50
+ end # def ingest
51
+
52
+ def add_entry user, current_state, next_state
53
+ dictionary = @db[:dictionary]
54
+
55
+ @db.transaction do
56
+ entry = {
57
+ user: user,
58
+ current_state: current_state,
59
+ next_state: next_state
60
+ }
61
+
62
+ if dictionary.where(entry).any?
63
+ # Entry is already present, so increment its frequency
64
+ frequency = dictionary.where(entry).get(:frequency)
65
+
66
+ dictionary.where(entry).update frequency: frequency + 1
67
+ else
68
+ dictionary.insert entry.merge(frequency: 1)
69
+ end
70
+ end
71
+ end
72
+
73
+ def random_capitalized_word
74
+ states = @db[:dictionary].map(:current_state)
75
+
76
+ capitalized_states = states.select do |state|
77
+ /^[A-Z]/ =~ state
78
+ end
79
+
80
+ if capitalized_states.length > 0
81
+ state = capitalized_states.sample
82
+ else
83
+ state = states.sample
84
+ end
85
+
86
+ raise EmptyDictionaryError, 'No data for user' if state.nil?
87
+
88
+ return state.split(' ').first
89
+ end
90
+
91
+ def random_second_word(first_word)
92
+ states = @db[:dictionary]
93
+ .where(Sequel.like(:current_state, first_word+'%'))
94
+ .map(:current_state)
95
+
96
+ state = states.sample
97
+ state.split(' ').last
98
+ end
99
+
100
+ def is_punctuation?(string)
101
+ PUNCTUATION.any? { |p| string.end_with? p }
102
+ end
103
+
104
+ def get_next_state(user, current_state)
105
+ states = @db[:dictionary]
106
+ .where(user: user, current_state: current_state)
107
+ .select(:next_state, :frequency)
108
+ .all
109
+
110
+ distribution = states.flat_map do |state|
111
+ Array.new(state[:frequency]) { state[:next_state] }
112
+ end
113
+
114
+ distribution.sample
115
+ end
116
+
117
+ def generate_sentence_for(user, length = 30)
118
+ first_word = random_capitalized_word
119
+ second_word = random_second_word first_word
120
+
121
+ sentence = [first_word, second_word]
122
+
123
+ while sentence.length < length
124
+ current_state = sentence.slice(sentence.length - @depth, @depth).join ' '
125
+
126
+ next_state = get_next_state user, current_state
127
+
128
+ # Stop if we failed to find a next state
129
+ break if next_state.nil?
130
+
131
+ sentence << next_state
132
+
133
+ break if is_punctuation? next_state
134
+ end
135
+
136
+ sentence.slice(0..-2).join(' ') + sentence.last
137
+ end
138
+
139
+ def separate_string string
140
+ # Including the punctuation in group so they'll be included in the
141
+ # split results
142
+ string
143
+ .split(/([.!?])|\s+/)
144
+ .map { |w| w.strip }
145
+ .select { |w| !w.empty? }
146
+ end
147
+
148
+ PUNCTUATION = ['.', '!', '?']
149
+
150
+ # Don't allow anything besides letters, digits, whitespace, and puncutation
151
+ ILLEGAL_CHARACTERS = /[^\w\d\s:;,.!?#@]/
152
+
153
+ SIMPLE_CODE_BLOCK = /`[^`]+`/
154
+ EXTENDED_CODE_BLOCK = /```.+```/m
155
+
156
+ def sanitize_string string
157
+ string = string
158
+ .strip()
159
+ .gsub(/http[^\s]+/, '') # Remove any hyperlinks
160
+ .gsub(SIMPLE_CODE_BLOCK, '') # Remove code blocks and illegal characters
161
+ .gsub(EXTENDED_CODE_BLOCK, '')
162
+ .gsub(ILLEGAL_CHARACTERS, '')
163
+ .gsub(/([:;,.!?])/, '\1 ') # Put whitespace after punctuation for proper separation
164
+ .strip()
165
+
166
+ ends_with_punctuation = PUNCTUATION.any? { |p| string.end_with? p }
167
+ string = string+'.' unless ends_with_punctuation
168
+
169
+ string
170
+ end
171
+ end
172
+ end
@@ -1,8 +1,13 @@
1
+ # Forward definition of Markov handler class
2
+ class Lita::Handlers::Markov < Lita::Handler; end
3
+
4
+ require 'lita/handlers/markov/engine'
5
+
1
6
  module Lita::Handlers
2
- class Markov < Lita::Handler
3
- Dictionary = MarkyMarkov::PersistentJSONDictionary
7
+ class Markov
8
+ attr_reader :engine
4
9
 
5
- REDIS_KEY_PREFIX = 'lita-markov:'
10
+ config :database_url
6
11
 
7
12
  route(/.+/, :ingest, command: false)
8
13
 
@@ -10,6 +15,12 @@ module Lita::Handlers
10
15
  'markov USER' => 'Generate a markov chain from the given user.'
11
16
  })
12
17
 
18
+ def initialize(robot)
19
+ super(robot)
20
+
21
+ @engine = Engine.new self
22
+ end
23
+
13
24
  def ingest(chat)
14
25
  # Don't ingest messages addressed to ourselves
15
26
  return if chat.command?
@@ -17,26 +28,20 @@ module Lita::Handlers
17
28
  message = chat.matches[0].strip
18
29
 
19
30
  # Get the mention name (ie. 'dirk') of the user
20
- name = chat.user.mention_name
21
- dictionary = dictionary_for_user name
22
-
23
- # Passing `false` to indicate it's a string and not a file name
24
- dictionary.parse_source message, false
31
+ id = chat.user.id
25
32
 
26
- save_dictionary name, dictionary
33
+ @engine.ingest id, message
27
34
  end
28
35
 
29
36
  def generate(chat)
30
37
  name = chat.matches[0][0].strip
31
-
32
- dictionary = dictionary_for_user name
33
- generator = MarkovSentenceGenerator.new dictionary
38
+ id = Lita::User.fuzzy_find(name).id
34
39
 
35
40
  begin
36
- sentence = generator.generate_sentence 1
41
+ sentence = @engine.generate_sentence_for id
37
42
 
38
43
  chat.reply sentence
39
- rescue EmptyDictionaryError
44
+ rescue Engine::EmptyDictionaryError
40
45
  chat.reply "Looks like #{name} hasn't said anything!"
41
46
  end
42
47
  end
data/lib/lita-markov.rb CHANGED
@@ -4,7 +4,6 @@ Lita.load_locales Dir[File.expand_path(
4
4
  File.join("..", "..", "locales", "*.yml"), __FILE__
5
5
  )]
6
6
 
7
- require 'marky_markov/persistent_json_dictionary'
8
7
  require 'lita/handlers/markov'
9
8
 
10
9
  # Lita::Handlers::Markov.template_root File.expand_path(
data/lita-markov.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "lita-markov"
3
- spec.version = "0.0.1"
3
+ spec.version = "1.0.0"
4
4
  spec.authors = ["Dirk Gadsden"]
5
5
  spec.email = ["dirk@dirk.to"]
6
6
  spec.description = "Markov chains for Lita."
@@ -13,9 +13,10 @@ Gem::Specification.new do |spec|
13
13
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
14
14
  spec.require_paths = ["lib"]
15
15
 
16
- spec.add_runtime_dependency "lita", ">= 4.6"
17
- spec.add_runtime_dependency "marky_markov", "~> 0.3.5"
18
- spec.add_runtime_dependency "oj", "~> 2.13.1"
16
+ spec.add_runtime_dependency "lita", ">= 4.6"
17
+ spec.add_runtime_dependency "sequel", "~> 4.28.0"
18
+ spec.add_runtime_dependency "mysql2", "~> 0.4.1"
19
+ spec.add_runtime_dependency "pg", "~> 0.18.4"
19
20
 
20
21
  spec.add_development_dependency "bundler", "~> 1.3"
21
22
  spec.add_development_dependency "pry-byebug"
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+ require 'pry'
3
+
4
+ describe Lita::Handlers::Markov::Engine do
5
+ before(:each) do
6
+ subject.db[:dictionary].delete
7
+ end
8
+
9
+ it 'will sanitize links from a message' do
10
+ message = 'hello https://www.example.com world!'
11
+
12
+ expect(subject.sanitize_string(message)).to eql 'hello world!'
13
+ end
14
+
15
+ it 'will remove code blocks from a message' do
16
+ message = 'I have `code in` me.'
17
+
18
+ expect(subject.sanitize_string(message)).to eql 'I have me.'
19
+ end
20
+
21
+ it 'will remove illegal characters from a message' do
22
+ message = 'I have a bad % character.'
23
+
24
+ expect(subject.sanitize_string(message)).to eql 'I have a bad character.'
25
+ end
26
+
27
+ it 'will separate a string into words' do
28
+ string = "I am\n so totally\tseparated."
29
+
30
+ expect(subject.separate_string(string)).to eql ['I', 'am', 'so', 'totally', 'separated', '.']
31
+ end
32
+
33
+ it 'will ingest messages' do
34
+ dictionary = subject.db[:dictionary]
35
+
36
+ subject.ingest('user', 'hello big, fun world!')
37
+
38
+ # Check that the first state made it in and is capitalized
39
+ expect(dictionary.where(current_state: 'Hello big,').count).to eql 1
40
+ # Check that the last state made it in
41
+ expect(dictionary.where(current_state: 'fun world', next_state: '!').count).to eql 1
42
+
43
+ subject.ingest('user', 'Hello big, fun planet!')
44
+
45
+ # Check that the frequency of the "Hello big," -> "fun" state went up
46
+ expect(dictionary.where(current_state: 'Hello big,', next_state: 'fun').get(:frequency)).to eql 2
47
+ end
48
+
49
+ it 'will generate a sentence' do
50
+ subject.ingest('user', 'Hello cruel world.')
51
+
52
+ expect(subject.generate_sentence_for 'user').to eql 'Hello cruel world.'
53
+ end
54
+ end
@@ -3,7 +3,7 @@ require 'pry'
3
3
 
4
4
  describe Lita::Handlers::Markov, lita_handler: true do
5
5
  before(:each) do
6
- Lita.redis.flushall
6
+ subject.engine.db[:dictionary].delete
7
7
  end
8
8
 
9
9
  it "won't call #ingest for non-command messages" do
@@ -13,16 +13,6 @@ describe Lita::Handlers::Markov, lita_handler: true do
13
13
  send_command 'bar'
14
14
  end
15
15
 
16
- it "will ingest a message into that person's dictionary" do
17
- send_message 'hello markov world'
18
- send_message 'hello markov planet'
19
-
20
- dictionary = subject.dictionary_for_user user.mention_name
21
-
22
- # Check that the messages made it into the dictionary
23
- expect(dictionary.dictionary[['hello', 'markov']]).to eql ['world', 'planet']
24
- end
25
-
26
16
  it 'will build a sentence' do
27
17
  send_message 'I love cookies!'
28
18
  send_message 'I love pancakes!'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lita-markov
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dirk Gadsden
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-23 00:00:00.000000000 Z
11
+ date: 2015-11-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: lita
@@ -25,33 +25,47 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '4.6'
27
27
  - !ruby/object:Gem::Dependency
28
- name: marky_markov
28
+ name: sequel
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.3.5
33
+ version: 4.28.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.3.5
40
+ version: 4.28.0
41
41
  - !ruby/object:Gem::Dependency
42
- name: oj
42
+ name: mysql2
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 2.13.1
47
+ version: 0.4.1
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 2.13.1
54
+ version: 0.4.1
55
+ - !ruby/object:Gem::Dependency
56
+ name: pg
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.18.4
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.18.4
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: bundler
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -136,11 +150,11 @@ files:
136
150
  - Rakefile
137
151
  - lib/lita-markov.rb
138
152
  - lib/lita/handlers/markov.rb
139
- - lib/marky_markov/persistent_json_dictionary.rb
153
+ - lib/lita/handlers/markov/engine.rb
140
154
  - lita-markov.gemspec
141
155
  - locales/en.yml
156
+ - spec/lita/handlers/markov/engine_spec.rb
142
157
  - spec/lita/handlers/markov_spec.rb
143
- - spec/marky_markov/persistent_json_dictionary_spec.rb
144
158
  - spec/spec_helper.rb
145
159
  - templates/.gitkeep
146
160
  homepage: http://github.com/dirk/lita-markov
@@ -163,11 +177,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
177
  version: '0'
164
178
  requirements: []
165
179
  rubyforge_project:
166
- rubygems_version: 2.4.5.1
180
+ rubygems_version: 2.4.5
167
181
  signing_key:
168
182
  specification_version: 4
169
183
  summary: Markov chains for Lita.
170
184
  test_files:
185
+ - spec/lita/handlers/markov/engine_spec.rb
171
186
  - spec/lita/handlers/markov_spec.rb
172
- - spec/marky_markov/persistent_json_dictionary_spec.rb
173
187
  - spec/spec_helper.rb
@@ -1,34 +0,0 @@
1
- require 'marky_markov'
2
- require 'oj'
3
-
4
- module MarkyMarkov
5
- class PersistentJSONDictionary < ::PersistentDictionary
6
- def initialize(*args)
7
- super(*args)
8
-
9
- @dictionary = {}
10
- @capitalized_words = []
11
- end
12
-
13
- # No-op instead of reading from the filesystem
14
- def open_dictionary
15
- nil
16
- end
17
-
18
- def load_json(json)
19
- data = Oj.load json
20
-
21
- @depth = data['depth']
22
- @dictionary = data['dictionary']
23
- @capitalized_words = data['capitalized_words']
24
- end
25
-
26
- def to_json
27
- Oj.dump(
28
- 'depth' => @depth,
29
- 'dictionary' => @dictionary,
30
- 'capitalized_words' => @capitalized_words
31
- )
32
- end
33
- end
34
- end
@@ -1,28 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe MarkyMarkov::PersistentJSONDictionary do
4
- subject { MarkyMarkov::PersistentJSONDictionary.new 'whoa' }
5
-
6
- it 'initializes with sensible defaults' do
7
- expect(subject.dictionary).to eq({})
8
- end
9
-
10
- it 'saves a dictionary' do
11
- subject.add_word ['a', 'b'], 'c'
12
-
13
- json = subject.to_json
14
-
15
- expect(json).to eql '{"depth":2,"dictionary":{"^#1":[["a","b"],["c"]]},"capitalized_words":[]}'
16
- end
17
-
18
- it 'saves and loads a dictionary' do
19
- subject.add_word ['a', 'b'], 'c'
20
-
21
- json = subject.to_json
22
-
23
- new_dictionary = MarkyMarkov::PersistentJSONDictionary.new 'whoa-another-one'
24
- new_dictionary.load_json json
25
-
26
- expect(new_dictionary.dictionary).to eql(['a', 'b'] => ['c'])
27
- end
28
- end