spreadsheet_agent 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/spreadsheet_agent.rb +418 -0
- data/lib/spreadsheet_agent/db.rb +55 -0
- data/lib/spreadsheet_agent/error.rb +12 -0
- data/lib/spreadsheet_agent/runner.rb +306 -0
- data/test/agent_bin/othergoal_agent.rb +17 -0
- data/test/agent_bin/testgoal_agent.rb +18 -0
- data/test/spreadsheet_agent_db_test.rb +13 -0
- data/test/spreadsheet_agent_runner_test.rb +468 -0
- data/test/spreadsheet_agent_test.rb +411 -0
- metadata +157 -0
@@ -0,0 +1,418 @@
|
|
1
|
+
# Author: Darin London
|
2
|
+
# The license of this source is "MIT Licence"
|
3
|
+
|
4
|
+
require 'spreadsheet_agent/db'
|
5
|
+
require 'socket'
|
6
|
+
require 'open3'
|
7
|
+
require 'capture_io'
|
8
|
+
require 'mail'
|
9
|
+
|
10
|
+
# A Distributed Agent System using Google Spreadsheets
|
11
|
+
#
|
12
|
+
# Version 0.01
|
13
|
+
#
|
14
|
+
# SpreadsheetAgent is a framework for creating massively distributed pipelines
|
15
|
+
# across many different servers, each using the same google spreadsheet as a
|
16
|
+
# control panel. It is extensible, and flexible. It doesnt specify what
|
17
|
+
# goals any pipeline should be working towards, or which goals are prerequisites
|
18
|
+
# for other goals, but it does provide logic for easily defining these relationships
|
19
|
+
# based on your own needs. It does this by providing a subsumption architecture,
|
20
|
+
# whereby many small, highly focused agents are written to perform specific goals,
|
21
|
+
# and also know what resources they require to perform them. Agents can be coded to
|
22
|
+
# subsume other agents upon successful completion. In addition, it is
|
23
|
+
# designed from the beginning to support the creation of simple human-computational
|
24
|
+
# workflows.
|
25
|
+
#
|
26
|
+
# SpreadsheetAgent requires GoogleDrive[http://rubygems.org/gems/google_drive], and works with a Google Spreadsheet with some or all worksheets
|
27
|
+
# formatted according to the following:
|
28
|
+
# * The top row of a page to be processed has fields for all entry record in subsequent rows
|
29
|
+
# * You can define any fields necessary, but you must specify a 'ready' and a 'complete' field
|
30
|
+
# * You must define at least 1 key field, and the key field must be specified as required in the :config (see SpreadsheetAgent::Db)
|
31
|
+
# * You should then define fields named for agent_bin/#{ field_name }_agent.rb for each agent that you plan to deploy in your pipeline
|
32
|
+
#
|
33
|
+
module SpreadsheetAgent
|
34
|
+
|
35
|
+
# SpreadsheetAgent::Agent is designed to make it easy to create a single task which connects to
|
36
|
+
# a field within a record on a page within the configured SpreadsheetAgent compatible Google Spreadsheet,
|
37
|
+
# runs, and reports whether the job completed or ended in error. An agent can be configured to only run
|
38
|
+
# when certain prerequisite fields have completed. The data in these fields can be filled in by other
|
39
|
+
# SpreadsheetAgent::Agents, SpreadsheetAgent::Runners, or humans. Compute node configuration is available
|
40
|
+
# to prevent the agent from running more than a certain number of instances of itself, or not run if certain
|
41
|
+
# other agents or processes are running on the node. Finally, an agent can be configured to subsume another
|
42
|
+
# agent, and fill in the completion field for that agent in addition to its own when it completes successfully.
|
43
|
+
#
|
44
|
+
# extends SpreadsheetAgent::Db
|
45
|
+
class Agent < SpreadsheetAgent::Db
|
46
|
+
|
47
|
+
# The name of the field in the page to which the agent should report status
|
48
|
+
attr_accessor :agent_name
|
49
|
+
|
50
|
+
# The name of the Page on the Google Spreadsheet that contains the record to be worked on by the agent
|
51
|
+
attr_accessor :page_name
|
52
|
+
|
53
|
+
# hash of key-value pairs. The keys are defined in config/agent.conf.yml. The values
|
54
|
+
# specify the values for those fields in the record on the page for which the agent is running.
|
55
|
+
# All keys configured as 'required: 1' in config/agent.conf.yml must be included in the keys hash
|
56
|
+
attr_accessor :keys
|
57
|
+
|
58
|
+
# Boolean. When true, the agent code will print verbosely to STDERR. When false, and the process!
|
59
|
+
# returns a failure status, the agent will email all stdout and stderr to the email specified in the
|
60
|
+
# :config send_to value
|
61
|
+
attr_accessor :debug
|
62
|
+
|
63
|
+
# Optional array of prerequisite fields that must contain a 1 in them for the record on the page before
|
64
|
+
# the agent will attempt to run
|
65
|
+
attr_accessor :prerequisites
|
66
|
+
|
67
|
+
# Optional integer. This works on Linux with ps. The agent will not attempt to run if there are
|
68
|
+
# max_selves instances running
|
69
|
+
attr_accessor :max_selves
|
70
|
+
|
71
|
+
# Hash of process_name to number of max_instances. This works on Linux with ps. If the agent detects
|
72
|
+
# the specified number of max_instances of the given process (based on a line match), it will not
|
73
|
+
# attempt to run
|
74
|
+
attr_accessor :conflicts_with
|
75
|
+
|
76
|
+
# Array of fields on the record which this agent subsumes. If the agent completes successfully these
|
77
|
+
# fields will be updated with a 1 in addition to the field for the agent
|
78
|
+
attr_accessor :subsumes
|
79
|
+
|
80
|
+
# Readonly access to the GoogleDrive::Worksheet that is being access by the agent.
|
81
|
+
attr_reader :worksheet
|
82
|
+
|
83
|
+
# create a new SpreadsheetAgent::Agent with the following:
|
84
|
+
# == required configuration parameters:
|
85
|
+
# * agent_name
|
86
|
+
# * page_name
|
87
|
+
# * keys
|
88
|
+
#
|
89
|
+
# == optional parameters:
|
90
|
+
# * config_file: (see SpreadsheetAgent::DB)
|
91
|
+
# * debug
|
92
|
+
# * prerequisites
|
93
|
+
# * max_selves
|
94
|
+
# * conflicts_with
|
95
|
+
# * subsumes
|
96
|
+
#
|
97
|
+
def initialize(attributes)
|
98
|
+
@agent_name = attributes[:agent_name]
|
99
|
+
@page_name = attributes[:page_name]
|
100
|
+
@keys = attributes[:keys].clone
|
101
|
+
unless @agent_name && @page_name && @keys
|
102
|
+
raise SpreadsheetAgentError, "agent_name, page_name, and keys attributes are required!"
|
103
|
+
end
|
104
|
+
@config_file = attributes[:config_file]
|
105
|
+
build_db()
|
106
|
+
|
107
|
+
@worksheet = @db.worksheet_by_title(@page_name)
|
108
|
+
@debug = attributes[:debug]
|
109
|
+
if attributes[:prerequisites]
|
110
|
+
@prerequisites = attributes[:prerequisites].clone
|
111
|
+
end
|
112
|
+
|
113
|
+
@max_selves = attributes[:max_selves]
|
114
|
+
if attributes[:conflicts_with]
|
115
|
+
@conflicts_with = attributes[:conflicts_with].clone
|
116
|
+
end
|
117
|
+
if attributes[:subsumes]
|
118
|
+
@subsumes = attributes[:subsumes].clone
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# If the agent does not have any conflicting processes (max_selves or conflicts_with)
|
123
|
+
# and if the entry is ready (field 'ready' has a 1), and all prerequisite fields have a 1,
|
124
|
+
# gets the GoogleDrive::List record, and passes it to the supplied agent_code PROC as argument.
|
125
|
+
# This PROC must return a required boolean field indicating success or failure, and an optional
|
126
|
+
# hash of key - value fields that will be updated on the GoogleDrive::List record. Note, the updates
|
127
|
+
# are made regardless of the value of success. In fact, the agent can be configured to update
|
128
|
+
# different fields based on success or failure. Also, note that any value can be stored in the
|
129
|
+
# hash. This allows the agent to communicate any useful information to the google spreadsheet for other
|
130
|
+
# agents (SpreadsheetAgent::Agent, SpreadsheetAgent::Runner, or human) to use. The PROC must try at all
|
131
|
+
# costs to avoid terminating. If an error is encountered, it should return false for the success field
|
132
|
+
# to signal that the process failed. If no errors are encountered it should return true for the success
|
133
|
+
# field.
|
134
|
+
#
|
135
|
+
# Exits successfully, enters a 1 in the agent_name field
|
136
|
+
# $agent->process! do |entry|
|
137
|
+
# true
|
138
|
+
# end
|
139
|
+
#
|
140
|
+
# Same, but also updates the 'notice' field in the record along with the 1 in the agent_name field
|
141
|
+
# $agent->process! do |entry|
|
142
|
+
# [true, {:notice => 'There were 30 files processed'}]
|
143
|
+
# end
|
144
|
+
#
|
145
|
+
# Fails, enters f:#{hostname} in the agent_name field
|
146
|
+
# $agent->process! do |entry|
|
147
|
+
# false
|
148
|
+
#
|
149
|
+
# Same, but also updates the 'notice' field in the record along with the failure notice
|
150
|
+
# $agent->process! do |entry|
|
151
|
+
# [false, {:notice => 'There were 10 files left to process!' }]
|
152
|
+
# end
|
153
|
+
#
|
154
|
+
# This agent passes different parameters based on success or failure
|
155
|
+
# $agent->process! do |entry|
|
156
|
+
# if $success
|
157
|
+
# true
|
158
|
+
# else
|
159
|
+
# [ false, {:notice => 'there were 10 remaining files'}]
|
160
|
+
# end
|
161
|
+
# end
|
162
|
+
#
|
163
|
+
def process!(&agent_code)
|
164
|
+
@worksheet.reload
|
165
|
+
no_problems = true
|
166
|
+
capture_output = nil
|
167
|
+
unless @debug
|
168
|
+
capture_output = CaptureIO.new
|
169
|
+
capture_output.start
|
170
|
+
end
|
171
|
+
|
172
|
+
begin
|
173
|
+
return true if has_conflicts()
|
174
|
+
(runnable, entry) = run_entry()
|
175
|
+
return false unless entry
|
176
|
+
return true unless runnable
|
177
|
+
|
178
|
+
success, update_entry = agent_code.call(entry)
|
179
|
+
if success
|
180
|
+
complete_entry(update_entry)
|
181
|
+
else
|
182
|
+
fail_entry(update_entry)
|
183
|
+
end
|
184
|
+
rescue
|
185
|
+
$stderr.puts "#{ $! }"
|
186
|
+
no_problems = false
|
187
|
+
end
|
188
|
+
unless capture_output.nil?
|
189
|
+
if no_problems
|
190
|
+
capture_output.stop
|
191
|
+
else
|
192
|
+
mail_error(capture_output.stop)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
return no_problems
|
196
|
+
end
|
197
|
+
|
198
|
+
# Returns the GoogleDrive::List object for the specified keys
|
199
|
+
def get_entry
|
200
|
+
this_entry = nil
|
201
|
+
if @worksheet
|
202
|
+
@worksheet.list.each do |this_row|
|
203
|
+
keep_row = true
|
204
|
+
|
205
|
+
@config['key_fields'].keys.reject { |key_field|
|
206
|
+
!(@config['key_fields'][key_field]["required"]) && !(@keys[key_field])
|
207
|
+
}.each do |key|
|
208
|
+
break unless keep_row
|
209
|
+
keep_row = (this_row[key] == @keys[key])
|
210
|
+
end
|
211
|
+
|
212
|
+
if keep_row
|
213
|
+
return this_row
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
private
|
220
|
+
|
221
|
+
def has_conflicts
|
222
|
+
return unless (@max_selves || @conflicts_with) # nothing conflicts here
|
223
|
+
|
224
|
+
running_conflicters = {}
|
225
|
+
self_name = File.basename $0
|
226
|
+
|
227
|
+
begin
|
228
|
+
conflicting_in = Open3.popen3('ps','-eo','pid,command')[1]
|
229
|
+
conflicting_in.lines.each do |line|
|
230
|
+
unless(
|
231
|
+
(line.match(/emacs\s+|vim*\s+|pico\s+/)) ||
|
232
|
+
(line.match("#{ $$ }"))
|
233
|
+
)
|
234
|
+
if @max_selves && line.match(self_name)
|
235
|
+
if running_conflicters[@agent_name].nil?
|
236
|
+
running_conflicters[@agent_name] = 1
|
237
|
+
else
|
238
|
+
running_conflicters[@agent_name] += 1
|
239
|
+
end
|
240
|
+
|
241
|
+
if running_conflicters[@agent_name] == @max_selves
|
242
|
+
$stderr.puts "max_selves limit reached" if @debug
|
243
|
+
conflicting_in.close
|
244
|
+
return true
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
if @conflicts_with
|
249
|
+
@conflicts_with.keys.each do |conflicter|
|
250
|
+
if line.match(conflicter)
|
251
|
+
if running_conflicters[conflicter].nil?
|
252
|
+
running_conflicters[conflicter] = 1
|
253
|
+
else
|
254
|
+
running_conflicters[conflicter] += 1
|
255
|
+
end
|
256
|
+
if running_conflicters[conflicter] >= @conflicts_with[conflicter]
|
257
|
+
$stderr.puts "conflicts with #{ conflicter }" if @debug
|
258
|
+
conflicting_in.close
|
259
|
+
return true
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
conflicting_in.close
|
267
|
+
return false
|
268
|
+
|
269
|
+
rescue
|
270
|
+
$stderr.puts "Couldnt check conflicts #{ $! }" if @debug
|
271
|
+
return true
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|
275
|
+
|
276
|
+
# this call initiates a race resistant attempt to make sure that there is only 1
|
277
|
+
# clear 'winner' among N potential agents attempting to run the same goal on the
|
278
|
+
# same spreadsheet agent's cell
|
279
|
+
def run_entry
|
280
|
+
entry = get_entry()
|
281
|
+
output = '';
|
282
|
+
@keys.keys.select { |k| @config['key_fields'][k] && @keys[k] }.each do |key|
|
283
|
+
output += [ key, @keys[key] ].join(' ') + " "
|
284
|
+
end
|
285
|
+
|
286
|
+
unless entry
|
287
|
+
$stderr.puts "#{ output } is not supported on #{ @page_name }" if @debug
|
288
|
+
return
|
289
|
+
end
|
290
|
+
|
291
|
+
unless entry['ready'] == "1"
|
292
|
+
$stderr.puts "#{ output } is not ready to run #{ @agent_name }" if @debug
|
293
|
+
return false, entry
|
294
|
+
end
|
295
|
+
|
296
|
+
if entry['complete'] == "1"
|
297
|
+
$stderr.puts "All goals are completed for #{ output }" if @debug
|
298
|
+
return false, entry
|
299
|
+
end
|
300
|
+
|
301
|
+
if entry[@agent_name]
|
302
|
+
(status, running_hostname) = entry[@agent_name].split(':')
|
303
|
+
|
304
|
+
case status
|
305
|
+
when 'r'
|
306
|
+
$stderr.puts " #{ output } is already running #{ @agent_name } on #{ running_hostname }" if @debug
|
307
|
+
return false, entry
|
308
|
+
|
309
|
+
when "1"
|
310
|
+
$stderr.puts " #{ output } has already run #{ @agent_name }" if @debug
|
311
|
+
return false, entry
|
312
|
+
|
313
|
+
when 'F'
|
314
|
+
$stderr.puts " #{ output } has already Failed #{ @agent_name }" if @debug
|
315
|
+
return false, entry
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
if @prerequisites
|
320
|
+
@prerequisites.each do |prereq_field|
|
321
|
+
unless entry[prereq_field] == "1"
|
322
|
+
$stderr.puts " #{ output } has not finished #{ prereq_field }" if @debug
|
323
|
+
return false, entry
|
324
|
+
end
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
# first attempt to set the hostname of the machine as the value of the agent
|
329
|
+
hostname = Socket.gethostname;
|
330
|
+
begin
|
331
|
+
entry.update @agent_name => "r:#{ hostname }"
|
332
|
+
@worksheet.save
|
333
|
+
|
334
|
+
rescue GoogleDrive::Error
|
335
|
+
# this is a collision, which is to be treated as if it is not runnable
|
336
|
+
$stderr.puts " #{ output } lost #{ @agent_name } on #{hostname}" if @debug
|
337
|
+
return false, entry
|
338
|
+
end
|
339
|
+
|
340
|
+
sleep 3
|
341
|
+
begin
|
342
|
+
@worksheet.reload
|
343
|
+
rescue GoogleDrive::Error
|
344
|
+
# this is a collision, which is to be treated as if it is not runnable
|
345
|
+
$stderr.puts " #{ output } lost #{ @agent_name } on #{hostname}" if @debug
|
346
|
+
return false, entry
|
347
|
+
end
|
348
|
+
|
349
|
+
check = entry[@agent_name]
|
350
|
+
(status, running_hostname) = check.split(':')
|
351
|
+
if hostname == running_hostname
|
352
|
+
return true, entry
|
353
|
+
end
|
354
|
+
$stderr.puts " #{ output } lost #{ @agent_name } on #{hostname}" if @debug
|
355
|
+
return false, entry
|
356
|
+
end
|
357
|
+
|
358
|
+
def complete_entry(update_entry)
|
359
|
+
if update_entry.nil?
|
360
|
+
update_entry = {}
|
361
|
+
end
|
362
|
+
|
363
|
+
if @subsumes && @subsumes.length > 0
|
364
|
+
@subsumes.each do |subsumed_agent|
|
365
|
+
update_entry[subsumed_agent] = 1
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
update_entry[@agent_name] = 1
|
370
|
+
entry = get_entry()
|
371
|
+
entry.update update_entry
|
372
|
+
@worksheet.save
|
373
|
+
end
|
374
|
+
|
375
|
+
def fail_entry(update_entry)
|
376
|
+
if update_entry.nil?
|
377
|
+
update_entry = { }
|
378
|
+
end
|
379
|
+
hostname = Socket.gethostname
|
380
|
+
update_entry[@agent_name] = "F:#{ hostname }"
|
381
|
+
entry = get_entry()
|
382
|
+
entry.update update_entry
|
383
|
+
@worksheet.save
|
384
|
+
end
|
385
|
+
|
386
|
+
def mail_error(error_message)
|
387
|
+
output = ''
|
388
|
+
@keys.keys.each do |key|
|
389
|
+
output += [key, @keys[key] ].join(' ') + " "
|
390
|
+
end
|
391
|
+
|
392
|
+
prefix = [Socket.gethostname, output, @agent_name ].join(' ')
|
393
|
+
begin
|
394
|
+
Mail.defaults do
|
395
|
+
delivery_method :smtp, {
|
396
|
+
:address => "smtp.gmail.com",
|
397
|
+
:port => 587,
|
398
|
+
:domain => Socket.gethostname,
|
399
|
+
:user_name => @config['guser'],
|
400
|
+
:password => @config['gpass'],
|
401
|
+
:authentication => 'plain',
|
402
|
+
:enable_starttls_auto => true }
|
403
|
+
end
|
404
|
+
|
405
|
+
mail = Mail.new do
|
406
|
+
from @config['reply_email']
|
407
|
+
to @config['send_to']
|
408
|
+
subject prefix
|
409
|
+
body error_message.to_s
|
410
|
+
end
|
411
|
+
|
412
|
+
mail.deliver!
|
413
|
+
rescue
|
414
|
+
#DO NOTHING
|
415
|
+
end
|
416
|
+
end
|
417
|
+
end
|
418
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Author: Darin London
|
2
|
+
# The license of this source is "MIT Licence"
|
3
|
+
|
4
|
+
require 'google_drive'
|
5
|
+
require 'psych'
|
6
|
+
|
7
|
+
module SpreadsheetAgent
|
8
|
+
|
9
|
+
# SpreadsheetAgent::Db is a class that is meant to be extended by SpreadsheetAgent classes. It
|
10
|
+
# stores shared code to instantiate and provide access to a GoogleDrive object and
|
11
|
+
# GoogleDrive::Spreadsheet object for use by the extending classes to access their Google Spreadsheets
|
12
|
+
class Db
|
13
|
+
|
14
|
+
# This holds the GoogleDrive::Spreadsheet object that can be used to query information from the google
|
15
|
+
# spreadsheet using its API. It cannot be changed after the object is constructed
|
16
|
+
attr_reader :db
|
17
|
+
|
18
|
+
# This holds the GoogleDrive object instantiated with the guser and gpass in the :config. It
|
19
|
+
# cannot be changed after the object is constructed
|
20
|
+
attr_reader :session
|
21
|
+
|
22
|
+
# This holds the hash that is constructed from the YAML :config_file. It
|
23
|
+
# cannot be changed after the object is constructed
|
24
|
+
attr_reader :config
|
25
|
+
|
26
|
+
# Passing this attribute to the constructor will override the location of config/agent.conf.yml.
|
27
|
+
# If passed, it must be a path to a file which matches the template in config/agent.conf.yml.
|
28
|
+
# The default is to load ../config/agent.config.yaml relative to the directory containing the
|
29
|
+
# calling script $0. This cannot be changed after the object is constructed
|
30
|
+
attr_reader :config_file
|
31
|
+
|
32
|
+
# This is for internal use by SpreadsheetAgent classes that extend SpreadsheetAgent::Db
|
33
|
+
def build_db
|
34
|
+
build_config()
|
35
|
+
unless @config['key_fields'].keys.select { |k| @config['key_fields'][k]['required'] }.count > 0
|
36
|
+
raise SpreadsheetAgentError, "Your configuration must have at least one required key_fields key"
|
37
|
+
end
|
38
|
+
@session = GoogleDrive.login(@config['guser'], @config['gpass'])
|
39
|
+
@db = @session.spreadsheet_by_title(@config['spreadsheet_name'])
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def build_config()
|
45
|
+
if @config_file.nil?
|
46
|
+
@config_file = find_bin() + '../config/agent.conf.yml'
|
47
|
+
end
|
48
|
+
@config = Psych.load_file(@config_file)
|
49
|
+
end
|
50
|
+
|
51
|
+
def find_bin()
|
52
|
+
File.expand_path(File.dirname( $0 )) + '/'
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|