elephrame 0.4.6 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -1,5 +1,7 @@
1
1
  require 'elephrame'
2
2
 
3
+ # we define our bot by telling elephrame how often it should post,
4
+ # and where it should load our tracery rules from
3
5
  bot = Elephrame::Bots::TraceryBot.new('10s', 'tracery_files')
4
6
 
5
7
  # this code happens automatically in the framework
@@ -1,6 +1,7 @@
1
1
  require_relative '../rest/rest'
2
2
  require_relative '../streaming/streaming'
3
3
  require_relative './tracery'
4
+ require_relative './generative'
4
5
  require_relative '../bot'
5
6
 
6
7
  module Elephrame
@@ -62,17 +63,17 @@ module Elephrame
62
63
  ##
63
64
  # create a new TraceryBot
64
65
  # @param interval [String] a string representing the interval to post
65
- # @param tracery_dir [String] a string with the path to the directory
66
- # containing all of the tracery grammer rules.
66
+ # @param dirs [Array<String>] an array of strings with paths to directories
67
+ # containing tracery grammer rules
67
68
  # @return [Elephrame::Bots::TraceryBot]
68
69
 
69
- def initialize interval, tracery_dir
70
+ def initialize interval, *dirs
70
71
  super()
71
72
 
72
73
  # set up our bot stuff
73
74
  setup_scheduler interval
74
75
  setup_streaming
75
- setup_tracery tracery_dir
76
+ setup_tracery dirs
76
77
  end
77
78
 
78
79
  ##
@@ -82,13 +83,322 @@ module Elephrame
82
83
 
83
84
  def run
84
85
  run_scheduled &Proc.new
86
+
87
+ # if we have any logic for on_reply, we run that
88
+ # otherwise we go past it and wait for our scheduler to finish
89
+ run_reply unless @on_reply.nil?
90
+ @scheduler.join
91
+ end
92
+ end
93
+
94
+ ##
95
+ # A basic Ebooks bot template
96
+
97
+ class EbooksBot < GenerativeBot
98
+ attr :update_interval,
99
+ :old_id,
100
+ :scrape_filter
101
+
102
+ PrivacyLevels = ['public', 'unlisted', 'private', 'direct']
103
+ APILimit = 280
104
+ RetryTime = '6m'
105
+
106
+ ##
107
+ # Creates a new Ebooks bot
108
+ #
109
+ # @param interval [String] how often should the bot post on it's own
110
+ # @param opts [Hash] options for the bot
111
+ # @option opt cw [String]
112
+ # @option opt update_interval [String] how often to scrape new posts
113
+ # from the accounts the bot follows
114
+ # @option opt retry_limit [Integer] the amount of times to retry
115
+ # generating a post
116
+ # @option opt model_filename [String] path to a file where we
117
+ # will save our backing ebooks model data
118
+ # @option opt filter_filename [String] path to a file where we
119
+ # will save our internal filtered words data
120
+ # @option opt visibility [String] the posting level the bot will default to
121
+ # @option opt scrape_privacy [String] the highest privacy the bot should
122
+ # scrape for content
123
+
124
+ def initialize(interval, opts = {})
125
+ super
126
+
127
+ # add our manual update command
128
+ add_privileged_command 'update' do
129
+ fetch_new_posts
130
+ end
131
+
132
+ # set some defaults for our internal vars
133
+ level = PrivacyLevels.index(opts[:scrape_privacy]) || 0
134
+ @scrape_filter = /(#{PrivacyLevels[0..level].join('|')})/
135
+ @update_interval = opts[:update_interval] || '2d'
136
+
137
+ # if we don't have what a newest post id then we fetch them
138
+ # for each account
139
+ if @model_hash[:last_id].empty?
140
+ @old_id = {}
141
+
142
+ @following.each do |account|
143
+ # get the newest post from this account and save the id
144
+ newest_id = @client.statuses(account,
145
+ exclude_reblogs: true,
146
+ limit: 1).first.id
147
+ @model_hash[:last_id][account] = newest_id
148
+ @old_id[account] = newest_id
149
+ end
150
+ end
151
+
152
+ # if our model's token are empty that means we have an empty model
153
+ fetch_old_posts if @model_hash[:model].tokens.empty?
154
+ end
155
+
156
+ ##
157
+ # Method to go and fetch all posts
158
+ # should be ran first
159
+
160
+ def fetch_old_posts
161
+ begin
162
+ # init some vars to keep track of where we are
163
+ api_calls = 1
164
+ errored = false
165
+ new_posts = { statuses: [],
166
+ mentions: [] }
167
+
168
+ # for each account we're following
169
+ @following.each do |account|
170
+ # okay so
171
+ # we keep track of how many get requests we're doing and before
172
+ # the limit (300) we schedule for 5min and go on, saving what we got
173
+ posts = @client.statuses(account,
174
+ exclude_reblogs: true,
175
+ limit: 40,
176
+ max_id: @old_id[account])
177
+
178
+ # while we still have posts and haven't gotten near the api limit
179
+ while not posts.size.zero? and api_calls < APILimit
180
+ posts.each do |post|
181
+
182
+ # add the new post to our hash
183
+ if post.visibility =~ @scrape_filter
184
+ new_posts = add_post_to_hash post, new_posts
185
+ end
186
+
187
+ # set our cached id to the latest post id
188
+ @old_id[account] = post.id
189
+ end
190
+
191
+ # fetch more posts
192
+ posts = @client.statuses(account,
193
+ exclude_reblogs: true,
194
+ limit: 40,
195
+ max_id: @old_id[account])
196
+ api_calls += 1
197
+ end
198
+
199
+ break if api_calls >= APILimit
200
+ end
201
+
202
+ rescue
203
+ errored = true
204
+
205
+ ensure
206
+ # consume our posts, and then save our model
207
+ @model_hash[:model].consume! new_posts
208
+ save_file(@model_filename,
209
+ @model_hash.collect {|key, value| value.to_hash }.to_yaml)
210
+
211
+ # if we have more than our limit of api calls
212
+ # or we errored out that means we need to check again
213
+ if api_calls >= APILimit or errored
214
+ @scheduler.in RetryTime do
215
+ fetch_old_posts
216
+ end
217
+ end
218
+ end
219
+ end
220
+
221
+ ##
222
+ # Fetch posts from the accounts the bot follows
223
+
224
+ def fetch_new_posts
225
+ begin
226
+ # set up some vars for tracking our progress
227
+ added_posts = { statuses: [],
228
+ mentions: [] }
229
+ api_calls = 1
230
+ errored = false
231
+
232
+ # for each account we're following
233
+ @following.each do |account|
234
+ # get 40 posts at a time, where we left off
235
+ posts = @client.statuses(account,
236
+ exclude_reblogs: true,
237
+ limit: 40,
238
+ since_id: @model_hash[:last_id][account])
239
+
240
+ # while we have posts to process and we haven't
241
+ # gotten near the api limit
242
+ while not posts.size.zero? and api_calls < APILimit
243
+ posts.reverse_each do |post|
244
+ # save our post id for next loop
245
+ @model_hash[:last_id][account] = post.id
246
+
247
+ # if the post matches our set visibility we add it to our hash
248
+ if post.visibility =~ @scrape_filter
249
+ added_posts = add_post_to_hash post, added_posts
250
+ end
251
+ end
252
+
253
+ # fetch more posts
254
+ posts = @client.statuses(account,
255
+ exclude_reblogs: true,
256
+ limit: 40,
257
+ since_id: @model_hash[:last_id][account])
258
+ api_calls += 1
259
+ end
260
+
261
+ # in case we hit our api limit between calls
262
+ break if api_calls >= APILimit
263
+ end
264
+
265
+ rescue
266
+ # if we've hit here then we've errored out
267
+ errored = true
268
+
269
+ ensure
270
+ # consume our new posts, and add them to our original hash
271
+ @model_hash[:model].consume! added_posts
272
+
273
+ if api_calls >= APILimit or errored
274
+ @scheduler.in RetryTime do
275
+ fetch_new_posts
276
+ end
277
+ end
278
+
279
+ # then we save
280
+ save_file(@model_filename,
281
+ @model_hash.collect {|key, value| value.to_hash }.to_yaml)
282
+ end
283
+ end
284
+
285
+ ##
286
+ # Run the Ebooks bot
287
+
288
+ def run
289
+ # set up our scheduler to scrape posts
290
+ @scheduler.repeat @update_interval do
291
+ fetch_new_posts
292
+ end
293
+
294
+ # call generativebot's run method
295
+ super
296
+ end
297
+
298
+ private
299
+
300
+ ##
301
+ # adds a post into the +post_hash+ hash
302
+ # makes sure it gets put under the appropriate key
303
+ #
304
+ # @param post [Mastodon::Status]
305
+
306
+ def add_post_to_hash post, hash
307
+ # make sure we strip out the html crap
308
+ post.class
309
+ .module_eval { alias_method :content, :strip } if @strip_html
85
310
 
86
- unless @on_reply.nil?
87
- run_reply
311
+ # decide which array the post should go into, based
312
+ # on if it's a reply or not
313
+ # also make sure to strip out any account names
314
+ if post.in_reply_to_id.nil? or post.mentions.size.zero?
315
+ hash[:statuses] << post.content
88
316
  else
89
- @scheduler.join
317
+ hash[:mentions] << post.content.gsub(/@.+?(@.+?)?\s/, '')
318
+ end
319
+
320
+ hash
321
+ end
322
+ end
323
+
324
+ ##
325
+ # A more general purpose markov bot. Reads in data from a supplied source
326
+
327
+ class MarkovBot < GenerativeBot
328
+
329
+ ##
330
+ # Creates a new Ebooks bot
331
+ #
332
+ # @param interval [String] how often should the bot post on it's own
333
+ # @param sources [Array] all of the sources for the bot. either
334
+ # folders or files
335
+ # @param opts [Hash] options for the bot
336
+ # @option opt cw [String]
337
+ # @option opt retry_limit [Integer] the amount of times to retry
338
+ # generating a post
339
+ # @option opt model_filename [String] path to a file where we
340
+ # will save our backing ebooks model data
341
+ # @option opt filter_filename [String] path to a file where we
342
+ # will save our internal filtered words data
343
+ # @option opt visibility [String] the posting level the bot will default to
344
+
345
+ def initialize(interval, *sources, **options)
346
+ super(interval, options)
347
+
348
+ raise 'no sources provided!' if sources.empty?
349
+
350
+ # initialize the model to contain the specified source text
351
+ if @model_hash[:model].tokens.empty?
352
+ sources.each do |source|
353
+ if Dir.exists? source
354
+ Dir.open source do |file|
355
+ next if file =~ /^\.\.?$/
356
+ read_and_consume "#{source}/#{file}"
357
+ end
358
+ elsif File.exists? source
359
+ read_and_consume source
360
+ else
361
+ raise "source #{source} could not be loaded"
362
+ end
363
+ end
364
+
365
+ save_file(@model_filename,
366
+ @model_hash[:model].to_hash.to_yaml)
90
367
  end
91
368
  end
369
+
370
+ private
371
+
372
+ ##
373
+ # reads a file in and adds it into the model
374
+ #
375
+ # @param file [String] path to a file
376
+
377
+ def read_and_consume file
378
+ @model_hash[:model].consume! File.read(file)
379
+ end
380
+
381
+
382
+ =begin
383
+ ##
384
+ # scrapes text from a provided url
385
+ #
386
+ # @param url [String] a url
387
+ # @returns [Boolean]
388
+
389
+ def download_and_consume url
390
+ uri = URI.parse url
391
+ errored = false
392
+
393
+ begin
394
+
395
+ rescue
396
+ errored = true
397
+ end
398
+
399
+ errored
400
+ end
401
+ =end
92
402
  end
93
403
  end
94
404
  end
@@ -0,0 +1,244 @@
1
+ module Elephrame
2
+ module Bots
3
+ class GenerativeBot < BaseBot
4
+ include Elephrame::Streaming
5
+ include Elephrame::Reply
6
+ include Elephrame::Scheduler
7
+ include Elephrame::Command
8
+
9
+ attr_accessor :cw
10
+ attr :filter,
11
+ :filter_words,
12
+ :filter_by,
13
+ :following,
14
+ :model,
15
+ :char_limit,
16
+ :retry_limit,
17
+ :visibility,
18
+ :model_hash,
19
+ :model_filename,
20
+ :filter_filename
21
+
22
+ backup_method :post, :actually_post
23
+ SavedFileName = 'model.yml'
24
+ SavedFilterFileName = 'filter.yml'
25
+
26
+ def initialize(interval, options = {})
27
+ require 'moo_ebooks'
28
+ require 'yaml'
29
+
30
+ # initialize our botness
31
+ super()
32
+
33
+ # setup our various classes
34
+ setup_streaming
35
+ setup_scheduler interval
36
+ setup_command
37
+
38
+ # set some defaults and initialize some vars
39
+ @model_hash = { model: Ebooks::Model.new,
40
+ last_id: {} }
41
+ @filter = /^$/
42
+ @filter_words = []
43
+ @following = []
44
+ @char_limit = @client.instance.max_toot_chars || 500
45
+ @retry_limit = options[:retry_limit] || 10
46
+ @cw = options[:cw] || 'markov post'
47
+ @visibility = options[:visibility] || 'unlisted'
48
+ @model_filename = options[:model_filename] || SavedFileName
49
+ @filter_filename = options[:filter_filename] || SavedFilterFileName
50
+
51
+ # load our model if it exists
52
+ if File.exists? @model_filename
53
+ values = load_file(@model_filename)
54
+ @model_hash[:model] = Ebooks::Model.from_hash(values.first)
55
+ @model_hash[:last_id] = values.last
56
+ end
57
+
58
+ @filter_words = load_file(@filter_filename) if File.exists? @filter_filename
59
+
60
+ # add our default commands
61
+ #
62
+ # !delete will delete the status it's in reply to
63
+ add_privileged_command 'delete' do |bot, content, status|
64
+ @client.destroy_status(status.in_reply_to_id)
65
+ end
66
+
67
+ # !filter will add every word from the post into the word filter
68
+ add_privileged_command 'filter' do |bot, content, status|
69
+ content.split.each do |word|
70
+ add_filter_word word
71
+ end
72
+ save_file @filter_filename, @filter_words.to_yaml
73
+ bot.reply("'#{content}' added to internal filter")
74
+ end
75
+
76
+ # add a help command that explains the other commands
77
+ add_privileged_command 'help' do |bot|
78
+ bot.reply(default_help)
79
+ end
80
+
81
+ # set up a default for replying
82
+ on_reply do |bot, status|
83
+ # retry our status creation until we get something that
84
+ # passes our filters
85
+ @retry_limit.times do
86
+ text = @model_hash[:model].reply(status
87
+ .content
88
+ .gsub(/@.+?(@.+?)?\s/, ''),
89
+ @char_limit)
90
+ break unless bot.reply_with_mentions(text,
91
+ spoiler: @cw).nil?
92
+ end
93
+ end
94
+
95
+ # get our own account id and save the ids of the accounts
96
+ # we're following
97
+ acct_id = @client.verify_credentials.id
98
+ @client.following(acct_id).each do |account|
99
+ @following << account.id
100
+ end
101
+ end
102
+
103
+ ##
104
+ # Runs the bot
105
+
106
+ def run
107
+ # see scheduler.rb
108
+ run_scheduled do |bot|
109
+ @retry_limit.times do
110
+ text = @model_hash[:model].update(@char_limit)
111
+ break unless bot.post(text,
112
+ spoiler: @cw,
113
+ visibility: @visibility).nil?
114
+ end
115
+ end
116
+
117
+ # we do this because run_commands accepts a block that
118
+ # will run when it doesn't find a command in a mention
119
+ # this should work. :shrug:
120
+ run_commands do |bot, status|
121
+ @on_reply.call(bot, status)
122
+ end
123
+ end
124
+
125
+ ##
126
+ # generates a default help message for the default commands
127
+ # if you add custom commands add a `custom_command_help` method
128
+ # that returns a string. it will be added to the end of this
129
+ #
130
+ # @returns [String] default help text
131
+
132
+ def default_help
133
+ txt = []
134
+ txt << "#{@prefix}delete -- deletes the status that the command post is replying to"
135
+ txt << "#{@prefix}filter -- adds all words from the command post into the internal filter"
136
+ txt << "#{@prefix}help -- replies with this help text"
137
+ txt << custom_command_help if respond_to? :custom_command_help
138
+ txt.join "\n"
139
+ end
140
+
141
+ ##
142
+ # adds a command that can only be executed by someone
143
+ # that the bot follows
144
+ #
145
+ # @param cmd [String] a command to add
146
+ # @param block [Proc] the code to execute when +cmd+ is recieved
147
+
148
+ def add_privileged_command cmd, &block
149
+ add_command cmd do |bot, content, status|
150
+ if @following.include? status.account.id
151
+ block.call(bot, content, status)
152
+ end
153
+ end
154
+ end
155
+
156
+ ##
157
+ # loads a yaml file containing our model data
158
+ #
159
+ # @param filename [String] file to read in from
160
+
161
+ def load_file filename
162
+ YAML.load_file(filename)
163
+ end
164
+
165
+ ##
166
+ # Saves a yaml file containing our model data
167
+ #
168
+ # @param filename [String] file to write out to
169
+
170
+ def save_file filename, data
171
+ File.write(filename, data)
172
+ end
173
+
174
+ ##
175
+ # Sets the filter regex
176
+ # if arg is a string array, 'or's the strings together
177
+ # if it's a regexp it just sets it to the value
178
+ #
179
+ # @param arg [Array<String>,String,Regexp]
180
+
181
+ def filter= arg
182
+ arg = arg.join('|') if arg.kind_of? Array
183
+ arg = /#{arg}/ unless arg.kind_of? Regexp
184
+ @filter = arg
185
+ end
186
+
187
+ ##
188
+ # Returns a string representing all of the current
189
+ # words being checked in the filter
190
+ #
191
+ # @returns [String] comma separated list of all filter words
192
+
193
+ def filter_words
194
+ @filter_words.join(', ')
195
+ end
196
+
197
+ ##
198
+ # Adds a word into the filter list
199
+ #
200
+ # @param word [String]
201
+
202
+ def add_filter_word(word)
203
+ @filter_words << word
204
+ filter = @filter_words
205
+ end
206
+
207
+ ##
208
+ # Accepts a block to check the post against before posting
209
+ #
210
+ # @param block [Proc]
211
+
212
+ def filter_by &block
213
+ @filter_by = block
214
+ end
215
+
216
+ ##
217
+ # Checks the proposed post against the filters
218
+ # only posts if the text passes the filters
219
+ #
220
+ # @param text [String] the tracery text to expand before posting
221
+ # @param options [Hash] a hash of arguments to pass to post
222
+ # @option options rules [String] the grammar rules to load
223
+ # @option options visibility [String] visibility level
224
+ # @option options spoiler [String] text to use as content warning
225
+ # @option options reply_id [String] id of post to reply to
226
+ # @option options hide_media [Bool] should we hide media?
227
+ # @option options media [Array<String>] array of file paths
228
+
229
+ def filter_and_post(text, *options)
230
+ opts = Hash[*options]
231
+
232
+ # default passed to false and then see if
233
+ # the supplied text gets through our filters
234
+ passed = false
235
+ passed = !(text =~ @filter)
236
+ passed = @filter_by.call(text) unless @filter_by.nil?
237
+
238
+ actually_post(text, **opts) if passed
239
+ end
240
+
241
+ alias_method :post, :filter_and_post
242
+ end
243
+ end
244
+ end
@@ -12,29 +12,32 @@ module Elephrame
12
12
  # loads all of our tracery files into our +files+ hash
13
13
  # if a file is named 'default' then we load that into +grammar+
14
14
  #
15
- # @param dir [String] path to the directory containing the tracery rules
15
+ # @param dirs [String] path to the directory containing the tracery rules
16
16
 
17
- def setup_tracery dir_path
17
+ def setup_tracery *dirs
18
18
  raise "Provided path not a directory" unless Dir.exist?(dir_path)
19
19
 
20
20
  @grammar = {}
21
- Dir.open(dir_path) do |dir|
22
- dir.each do |file|
23
- # skip our current and parent dir
24
- next if file =~ /^\.\.?$/
25
21
 
26
- # read the rule file into the files hash
27
- @grammar[file.split('.').first] =
28
- createGrammar(JSON.parse(File.read("#{dir_path}/#{file}")))
22
+ dirs.each do |directory|
23
+ Dir.open(directory) do |dir|
24
+ dir.each do |file|
25
+ # skip our current and parent dir
26
+ next if file =~ /^\.\.?$/
27
+
28
+ # read the rule file into the files hash
29
+ @grammar[file.split('.').first] =
30
+ createGrammar(JSON.parse(File.read("#{dir_path}/#{file}")))
31
+ end
29
32
  end
30
33
  end
31
-
34
+
32
35
  # go ahead and makes a default mention-handler
33
36
  # if we have a reply rule file
34
37
  unless @grammar['reply'].nil?
35
- on_reply { |bot|
38
+ on_reply do |bot|
36
39
  bot.reply_with_mentions('#default#', rules: 'reply')
37
- }
40
+ end
38
41
  end
39
42
  end
40
43
 
@@ -23,7 +23,7 @@ module Elephrame
23
23
  @schedule = j
24
24
  yield(self)
25
25
  end
26
- @scheduler.join unless not @streamer.nil?
26
+ @scheduler.join if @streamer.nil?
27
27
  end
28
28
 
29
29
  alias_method :run, :run_scheduled
@@ -35,7 +35,7 @@ module Elephrame
35
35
  # @param usage [String]
36
36
 
37
37
  def set_help usage
38
- add_command 'help' do |bot, content, status|
38
+ add_command 'help' do |bot|
39
39
  bot.reply("#{usage}")
40
40
  end
41
41
  end
@@ -43,7 +43,7 @@ module Elephrame
43
43
  # unless that account is our own, or the tagged account
44
44
  # has #NoBot
45
45
  mentions = @mention_data[:mentions].collect do |m|
46
- "@#{m.acct}" unless m.acct == @username or no_bot? m.id
46
+ "@#{m.acct}" unless m.acct == @username or @client.account(m.id).no_bot?
47
47
  end.join ' '
48
48
 
49
49
  reply("#{mentions.strip} #{text}", *options)
@@ -11,8 +11,9 @@ module Elephrame
11
11
  # Creates the stream client
12
12
 
13
13
  def setup_streaming
14
- stream_uri = @client.instance()
15
- .attributes['urls']['streaming_api'].gsub(/^wss?/, 'https')
14
+ stream_uri = @client.instance
15
+ .attributes['urls']['streaming_api']
16
+ .gsub(/^wss?/, 'https')
16
17
  @streamer = Mastodon::Streaming::Client.new(base_url: stream_uri,
17
18
  bearer_token: ENV['TOKEN'])
18
19
  end