elephrame 0.4.6 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
File without changes
@@ -1,5 +1,7 @@
1
1
  require 'elephrame'
2
2
 
3
+ # we define our bot by telling elephrame how often it should post,
4
+ # and where it should load our tracery rules from
3
5
  bot = Elephrame::Bots::TraceryBot.new('10s', 'tracery_files')
4
6
 
5
7
  # this code happens automatically in the framework
@@ -1,6 +1,7 @@
1
1
  require_relative '../rest/rest'
2
2
  require_relative '../streaming/streaming'
3
3
  require_relative './tracery'
4
+ require_relative './generative'
4
5
  require_relative '../bot'
5
6
 
6
7
  module Elephrame
@@ -62,17 +63,17 @@ module Elephrame
62
63
  ##
63
64
  # create a new TraceryBot
64
65
  # @param interval [String] a string representing the interval to post
65
- # @param tracery_dir [String] a string with the path to the directory
66
- # containing all of the tracery grammer rules.
66
+ # @param dirs [Array<String>] an array of strings with paths to directories
67
+ # containing tracery grammer rules
67
68
  # @return [Elephrame::Bots::TraceryBot]
68
69
 
69
- def initialize interval, tracery_dir
70
+ def initialize interval, *dirs
70
71
  super()
71
72
 
72
73
  # set up our bot stuff
73
74
  setup_scheduler interval
74
75
  setup_streaming
75
- setup_tracery tracery_dir
76
+ setup_tracery dirs
76
77
  end
77
78
 
78
79
  ##
@@ -82,13 +83,322 @@ module Elephrame
82
83
 
83
84
  def run
84
85
  run_scheduled &Proc.new
86
+
87
+ # if we have any logic for on_reply, we run that
88
+ # otherwise we go past it and wait for our scheduler to finish
89
+ run_reply unless @on_reply.nil?
90
+ @scheduler.join
91
+ end
92
+ end
93
+
94
+ ##
95
+ # A basic Ebooks bot template
96
+
97
+ class EbooksBot < GenerativeBot
98
+ attr :update_interval,
99
+ :old_id,
100
+ :scrape_filter
101
+
102
+ PrivacyLevels = ['public', 'unlisted', 'private', 'direct']
103
+ APILimit = 280
104
+ RetryTime = '6m'
105
+
106
+ ##
107
+ # Creates a new Ebooks bot
108
+ #
109
+ # @param interval [String] how often should the bot post on it's own
110
+ # @param opts [Hash] options for the bot
111
+ # @option opt cw [String]
112
+ # @option opt update_interval [String] how often to scrape new posts
113
+ # from the accounts the bot follows
114
+ # @option opt retry_limit [Integer] the amount of times to retry
115
+ # generating a post
116
+ # @option opt model_filename [String] path to a file where we
117
+ # will save our backing ebooks model data
118
+ # @option opt filter_filename [String] path to a file where we
119
+ # will save our internal filtered words data
120
+ # @option opt visibility [String] the posting level the bot will default to
121
+ # @option opt scrape_privacy [String] the highest privacy the bot should
122
+ # scrape for content
123
+
124
+ def initialize(interval, opts = {})
125
+ super
126
+
127
+ # add our manual update command
128
+ add_privileged_command 'update' do
129
+ fetch_new_posts
130
+ end
131
+
132
+ # set some defaults for our internal vars
133
+ level = PrivacyLevels.index(opts[:scrape_privacy]) || 0
134
+ @scrape_filter = /(#{PrivacyLevels[0..level].join('|')})/
135
+ @update_interval = opts[:update_interval] || '2d'
136
+
137
+ # if we don't have what a newest post id then we fetch them
138
+ # for each account
139
+ if @model_hash[:last_id].empty?
140
+ @old_id = {}
141
+
142
+ @following.each do |account|
143
+ # get the newest post from this account and save the id
144
+ newest_id = @client.statuses(account,
145
+ exclude_reblogs: true,
146
+ limit: 1).first.id
147
+ @model_hash[:last_id][account] = newest_id
148
+ @old_id[account] = newest_id
149
+ end
150
+ end
151
+
152
+ # if our model's token are empty that means we have an empty model
153
+ fetch_old_posts if @model_hash[:model].tokens.empty?
154
+ end
155
+
156
+ ##
157
+ # Method to go and fetch all posts
158
+ # should be ran first
159
+
160
+ def fetch_old_posts
161
+ begin
162
+ # init some vars to keep track of where we are
163
+ api_calls = 1
164
+ errored = false
165
+ new_posts = { statuses: [],
166
+ mentions: [] }
167
+
168
+ # for each account we're following
169
+ @following.each do |account|
170
+ # okay so
171
+ # we keep track of how many get requests we're doing and before
172
+ # the limit (300) we schedule for 5min and go on, saving what we got
173
+ posts = @client.statuses(account,
174
+ exclude_reblogs: true,
175
+ limit: 40,
176
+ max_id: @old_id[account])
177
+
178
+ # while we still have posts and haven't gotten near the api limit
179
+ while not posts.size.zero? and api_calls < APILimit
180
+ posts.each do |post|
181
+
182
+ # add the new post to our hash
183
+ if post.visibility =~ @scrape_filter
184
+ new_posts = add_post_to_hash post, new_posts
185
+ end
186
+
187
+ # set our cached id to the latest post id
188
+ @old_id[account] = post.id
189
+ end
190
+
191
+ # fetch more posts
192
+ posts = @client.statuses(account,
193
+ exclude_reblogs: true,
194
+ limit: 40,
195
+ max_id: @old_id[account])
196
+ api_calls += 1
197
+ end
198
+
199
+ break if api_calls >= APILimit
200
+ end
201
+
202
+ rescue
203
+ errored = true
204
+
205
+ ensure
206
+ # consume our posts, and then save our model
207
+ @model_hash[:model].consume! new_posts
208
+ save_file(@model_filename,
209
+ @model_hash.collect {|key, value| value.to_hash }.to_yaml)
210
+
211
+ # if we have more than our limit of api calls
212
+ # or we errored out that means we need to check again
213
+ if api_calls >= APILimit or errored
214
+ @scheduler.in RetryTime do
215
+ fetch_old_posts
216
+ end
217
+ end
218
+ end
219
+ end
220
+
221
+ ##
222
+ # Fetch posts from the accounts the bot follows
223
+
224
+ def fetch_new_posts
225
+ begin
226
+ # set up some vars for tracking our progress
227
+ added_posts = { statuses: [],
228
+ mentions: [] }
229
+ api_calls = 1
230
+ errored = false
231
+
232
+ # for each account we're following
233
+ @following.each do |account|
234
+ # get 40 posts at a time, where we left off
235
+ posts = @client.statuses(account,
236
+ exclude_reblogs: true,
237
+ limit: 40,
238
+ since_id: @model_hash[:last_id][account])
239
+
240
+ # while we have posts to process and we haven't
241
+ # gotten near the api limit
242
+ while not posts.size.zero? and api_calls < APILimit
243
+ posts.reverse_each do |post|
244
+ # save our post id for next loop
245
+ @model_hash[:last_id][account] = post.id
246
+
247
+ # if the post matches our set visibility we add it to our hash
248
+ if post.visibility =~ @scrape_filter
249
+ added_posts = add_post_to_hash post, added_posts
250
+ end
251
+ end
252
+
253
+ # fetch more posts
254
+ posts = @client.statuses(account,
255
+ exclude_reblogs: true,
256
+ limit: 40,
257
+ since_id: @model_hash[:last_id][account])
258
+ api_calls += 1
259
+ end
260
+
261
+ # in case we hit our api limit between calls
262
+ break if api_calls >= APILimit
263
+ end
264
+
265
+ rescue
266
+ # if we've hit here then we've errored out
267
+ errored = true
268
+
269
+ ensure
270
+ # consume our new posts, and add them to our original hash
271
+ @model_hash[:model].consume! added_posts
272
+
273
+ if api_calls >= APILimit or errored
274
+ @scheduler.in RetryTime do
275
+ fetch_new_posts
276
+ end
277
+ end
278
+
279
+ # then we save
280
+ save_file(@model_filename,
281
+ @model_hash.collect {|key, value| value.to_hash }.to_yaml)
282
+ end
283
+ end
284
+
285
+ ##
286
+ # Run the Ebooks bot
287
+
288
+ def run
289
+ # set up our scheduler to scrape posts
290
+ @scheduler.repeat @update_interval do
291
+ fetch_new_posts
292
+ end
293
+
294
+ # call generativebot's run method
295
+ super
296
+ end
297
+
298
+ private
299
+
300
+ ##
301
+ # adds a post into the +post_hash+ hash
302
+ # makes sure it gets put under the appropriate key
303
+ #
304
+ # @param post [Mastodon::Status]
305
+
306
+ def add_post_to_hash post, hash
307
+ # make sure we strip out the html crap
308
+ post.class
309
+ .module_eval { alias_method :content, :strip } if @strip_html
85
310
 
86
- unless @on_reply.nil?
87
- run_reply
311
+ # decide which array the post should go into, based
312
+ # on if it's a reply or not
313
+ # also make sure to strip out any account names
314
+ if post.in_reply_to_id.nil? or post.mentions.size.zero?
315
+ hash[:statuses] << post.content
88
316
  else
89
- @scheduler.join
317
+ hash[:mentions] << post.content.gsub(/@.+?(@.+?)?\s/, '')
318
+ end
319
+
320
+ hash
321
+ end
322
+ end
323
+
324
+ ##
325
+ # A more general purpose markov bot. Reads in data from a supplied source
326
+
327
+ class MarkovBot < GenerativeBot
328
+
329
+ ##
330
+ # Creates a new Ebooks bot
331
+ #
332
+ # @param interval [String] how often should the bot post on it's own
333
+ # @param sources [Array] all of the sources for the bot. either
334
+ # folders or files
335
+ # @param opts [Hash] options for the bot
336
+ # @option opt cw [String]
337
+ # @option opt retry_limit [Integer] the amount of times to retry
338
+ # generating a post
339
+ # @option opt model_filename [String] path to a file where we
340
+ # will save our backing ebooks model data
341
+ # @option opt filter_filename [String] path to a file where we
342
+ # will save our internal filtered words data
343
+ # @option opt visibility [String] the posting level the bot will default to
344
+
345
+ def initialize(interval, *sources, **options)
346
+ super(interval, options)
347
+
348
+ raise 'no sources provided!' if sources.empty?
349
+
350
+ # initialize the model to contain the specified source text
351
+ if @model_hash[:model].tokens.empty?
352
+ sources.each do |source|
353
+ if Dir.exists? source
354
+ Dir.open source do |file|
355
+ next if file =~ /^\.\.?$/
356
+ read_and_consume "#{source}/#{file}"
357
+ end
358
+ elsif File.exists? source
359
+ read_and_consume source
360
+ else
361
+ raise "source #{source} could not be loaded"
362
+ end
363
+ end
364
+
365
+ save_file(@model_filename,
366
+ @model_hash[:model].to_hash.to_yaml)
90
367
  end
91
368
  end
369
+
370
+ private
371
+
372
+ ##
373
+ # reads a file in and adds it into the model
374
+ #
375
+ # @param file [String] path to a file
376
+
377
+ def read_and_consume file
378
+ @model_hash[:model].consume! File.read(file)
379
+ end
380
+
381
+
382
+ =begin
383
+ ##
384
+ # scrapes text from a provided url
385
+ #
386
+ # @param url [String] a url
387
+ # @returns [Boolean]
388
+
389
+ def download_and_consume url
390
+ uri = URI.parse url
391
+ errored = false
392
+
393
+ begin
394
+
395
+ rescue
396
+ errored = true
397
+ end
398
+
399
+ errored
400
+ end
401
+ =end
92
402
  end
93
403
  end
94
404
  end
@@ -0,0 +1,244 @@
1
+ module Elephrame
2
+ module Bots
3
+ class GenerativeBot < BaseBot
4
+ include Elephrame::Streaming
5
+ include Elephrame::Reply
6
+ include Elephrame::Scheduler
7
+ include Elephrame::Command
8
+
9
+ attr_accessor :cw
10
+ attr :filter,
11
+ :filter_words,
12
+ :filter_by,
13
+ :following,
14
+ :model,
15
+ :char_limit,
16
+ :retry_limit,
17
+ :visibility,
18
+ :model_hash,
19
+ :model_filename,
20
+ :filter_filename
21
+
22
+ backup_method :post, :actually_post
23
+ SavedFileName = 'model.yml'
24
+ SavedFilterFileName = 'filter.yml'
25
+
26
+ def initialize(interval, options = {})
27
+ require 'moo_ebooks'
28
+ require 'yaml'
29
+
30
+ # initialize our botness
31
+ super()
32
+
33
+ # setup our various classes
34
+ setup_streaming
35
+ setup_scheduler interval
36
+ setup_command
37
+
38
+ # set some defaults and initialize some vars
39
+ @model_hash = { model: Ebooks::Model.new,
40
+ last_id: {} }
41
+ @filter = /^$/
42
+ @filter_words = []
43
+ @following = []
44
+ @char_limit = @client.instance.max_toot_chars || 500
45
+ @retry_limit = options[:retry_limit] || 10
46
+ @cw = options[:cw] || 'markov post'
47
+ @visibility = options[:visibility] || 'unlisted'
48
+ @model_filename = options[:model_filename] || SavedFileName
49
+ @filter_filename = options[:filter_filename] || SavedFilterFileName
50
+
51
+ # load our model if it exists
52
+ if File.exists? @model_filename
53
+ values = load_file(@model_filename)
54
+ @model_hash[:model] = Ebooks::Model.from_hash(values.first)
55
+ @model_hash[:last_id] = values.last
56
+ end
57
+
58
+ @filter_words = load_file(@filter_filename) if File.exists? @filter_filename
59
+
60
+ # add our default commands
61
+ #
62
+ # !delete will delete the status it's in reply to
63
+ add_privileged_command 'delete' do |bot, content, status|
64
+ @client.destroy_status(status.in_reply_to_id)
65
+ end
66
+
67
+ # !filter will add every word from the post into the word filter
68
+ add_privileged_command 'filter' do |bot, content, status|
69
+ content.split.each do |word|
70
+ add_filter_word word
71
+ end
72
+ save_file @filter_filename, @filter_words.to_yaml
73
+ bot.reply("'#{content}' added to internal filter")
74
+ end
75
+
76
+ # add a help command that explains the other commands
77
+ add_privileged_command 'help' do |bot|
78
+ bot.reply(default_help)
79
+ end
80
+
81
+ # set up a default for replying
82
+ on_reply do |bot, status|
83
+ # retry our status creation until we get something that
84
+ # passes our filters
85
+ @retry_limit.times do
86
+ text = @model_hash[:model].reply(status
87
+ .content
88
+ .gsub(/@.+?(@.+?)?\s/, ''),
89
+ @char_limit)
90
+ break unless bot.reply_with_mentions(text,
91
+ spoiler: @cw).nil?
92
+ end
93
+ end
94
+
95
+ # get our own account id and save the ids of the accounts
96
+ # we're following
97
+ acct_id = @client.verify_credentials.id
98
+ @client.following(acct_id).each do |account|
99
+ @following << account.id
100
+ end
101
+ end
102
+
103
+ ##
104
+ # Runs the bot
105
+
106
+ def run
107
+ # see scheduler.rb
108
+ run_scheduled do |bot|
109
+ @retry_limit.times do
110
+ text = @model_hash[:model].update(@char_limit)
111
+ break unless bot.post(text,
112
+ spoiler: @cw,
113
+ visibility: @visibility).nil?
114
+ end
115
+ end
116
+
117
+ # we do this because run_commands accepts a block that
118
+ # will run when it doesn't find a command in a mention
119
+ # this should work. :shrug:
120
+ run_commands do |bot, status|
121
+ @on_reply.call(bot, status)
122
+ end
123
+ end
124
+
125
+ ##
126
+ # generates a default help message for the default commands
127
+ # if you add custom commands add a `custom_command_help` method
128
+ # that returns a string. it will be added to the end of this
129
+ #
130
+ # @returns [String] default help text
131
+
132
+ def default_help
133
+ txt = []
134
+ txt << "#{@prefix}delete -- deletes the status that the command post is replying to"
135
+ txt << "#{@prefix}filter -- adds all words from the command post into the internal filter"
136
+ txt << "#{@prefix}help -- replies with this help text"
137
+ txt << custom_command_help if respond_to? :custom_command_help
138
+ txt.join "\n"
139
+ end
140
+
141
+ ##
142
+ # adds a command that can only be executed by someone
143
+ # that the bot follows
144
+ #
145
+ # @param cmd [String] a command to add
146
+ # @param block [Proc] the code to execute when +cmd+ is recieved
147
+
148
+ def add_privileged_command cmd, &block
149
+ add_command cmd do |bot, content, status|
150
+ if @following.include? status.account.id
151
+ block.call(bot, content, status)
152
+ end
153
+ end
154
+ end
155
+
156
+ ##
157
+ # loads a yaml file containing our model data
158
+ #
159
+ # @param filename [String] file to read in from
160
+
161
+ def load_file filename
162
+ YAML.load_file(filename)
163
+ end
164
+
165
+ ##
166
+ # Saves a yaml file containing our model data
167
+ #
168
+ # @param filename [String] file to write out to
169
+
170
+ def save_file filename, data
171
+ File.write(filename, data)
172
+ end
173
+
174
+ ##
175
+ # Sets the filter regex
176
+ # if arg is a string array, 'or's the strings together
177
+ # if it's a regexp it just sets it to the value
178
+ #
179
+ # @param arg [Array<String>,String,Regexp]
180
+
181
+ def filter= arg
182
+ arg = arg.join('|') if arg.kind_of? Array
183
+ arg = /#{arg}/ unless arg.kind_of? Regexp
184
+ @filter = arg
185
+ end
186
+
187
+ ##
188
+ # Returns a string representing all of the current
189
+ # words being checked in the filter
190
+ #
191
+ # @returns [String] comma separated list of all filter words
192
+
193
+ def filter_words
194
+ @filter_words.join(', ')
195
+ end
196
+
197
+ ##
198
+ # Adds a word into the filter list
199
+ #
200
+ # @param word [String]
201
+
202
+ def add_filter_word(word)
203
+ @filter_words << word
204
+ filter = @filter_words
205
+ end
206
+
207
+ ##
208
+ # Accepts a block to check the post against before posting
209
+ #
210
+ # @param block [Proc]
211
+
212
+ def filter_by &block
213
+ @filter_by = block
214
+ end
215
+
216
+ ##
217
+ # Checks the proposed post against the filters
218
+ # only posts if the text passes the filters
219
+ #
220
+ # @param text [String] the tracery text to expand before posting
221
+ # @param options [Hash] a hash of arguments to pass to post
222
+ # @option options rules [String] the grammar rules to load
223
+ # @option options visibility [String] visibility level
224
+ # @option options spoiler [String] text to use as content warning
225
+ # @option options reply_id [String] id of post to reply to
226
+ # @option options hide_media [Bool] should we hide media?
227
+ # @option options media [Array<String>] array of file paths
228
+
229
+ def filter_and_post(text, *options)
230
+ opts = Hash[*options]
231
+
232
+ # default passed to false and then see if
233
+ # the supplied text gets through our filters
234
+ passed = false
235
+ passed = !(text =~ @filter)
236
+ passed = @filter_by.call(text) unless @filter_by.nil?
237
+
238
+ actually_post(text, **opts) if passed
239
+ end
240
+
241
+ alias_method :post, :filter_and_post
242
+ end
243
+ end
244
+ end
@@ -12,29 +12,32 @@ module Elephrame
12
12
  # loads all of our tracery files into our +files+ hash
13
13
  # if a file is named 'default' then we load that into +grammar+
14
14
  #
15
- # @param dir [String] path to the directory containing the tracery rules
15
+ # @param dirs [String] path to the directory containing the tracery rules
16
16
 
17
- def setup_tracery dir_path
17
+ def setup_tracery *dirs
18
18
  raise "Provided path not a directory" unless Dir.exist?(dir_path)
19
19
 
20
20
  @grammar = {}
21
- Dir.open(dir_path) do |dir|
22
- dir.each do |file|
23
- # skip our current and parent dir
24
- next if file =~ /^\.\.?$/
25
21
 
26
- # read the rule file into the files hash
27
- @grammar[file.split('.').first] =
28
- createGrammar(JSON.parse(File.read("#{dir_path}/#{file}")))
22
+ dirs.each do |directory|
23
+ Dir.open(directory) do |dir|
24
+ dir.each do |file|
25
+ # skip our current and parent dir
26
+ next if file =~ /^\.\.?$/
27
+
28
+ # read the rule file into the files hash
29
+ @grammar[file.split('.').first] =
30
+ createGrammar(JSON.parse(File.read("#{dir_path}/#{file}")))
31
+ end
29
32
  end
30
33
  end
31
-
34
+
32
35
  # go ahead and makes a default mention-handler
33
36
  # if we have a reply rule file
34
37
  unless @grammar['reply'].nil?
35
- on_reply { |bot|
38
+ on_reply do |bot|
36
39
  bot.reply_with_mentions('#default#', rules: 'reply')
37
- }
40
+ end
38
41
  end
39
42
  end
40
43
 
@@ -23,7 +23,7 @@ module Elephrame
23
23
  @schedule = j
24
24
  yield(self)
25
25
  end
26
- @scheduler.join unless not @streamer.nil?
26
+ @scheduler.join if @streamer.nil?
27
27
  end
28
28
 
29
29
  alias_method :run, :run_scheduled
@@ -35,7 +35,7 @@ module Elephrame
35
35
  # @param usage [String]
36
36
 
37
37
  def set_help usage
38
- add_command 'help' do |bot, content, status|
38
+ add_command 'help' do |bot|
39
39
  bot.reply("#{usage}")
40
40
  end
41
41
  end
@@ -43,7 +43,7 @@ module Elephrame
43
43
  # unless that account is our own, or the tagged account
44
44
  # has #NoBot
45
45
  mentions = @mention_data[:mentions].collect do |m|
46
- "@#{m.acct}" unless m.acct == @username or no_bot? m.id
46
+ "@#{m.acct}" unless m.acct == @username or @client.account(m.id).no_bot?
47
47
  end.join ' '
48
48
 
49
49
  reply("#{mentions.strip} #{text}", *options)
@@ -11,8 +11,9 @@ module Elephrame
11
11
  # Creates the stream client
12
12
 
13
13
  def setup_streaming
14
- stream_uri = @client.instance()
15
- .attributes['urls']['streaming_api'].gsub(/^wss?/, 'https')
14
+ stream_uri = @client.instance
15
+ .attributes['urls']['streaming_api']
16
+ .gsub(/^wss?/, 'https')
16
17
  @streamer = Mastodon::Streaming::Client.new(base_url: stream_uri,
17
18
  bearer_token: ENV['TOKEN'])
18
19
  end