spreadsheet_agent 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/spreadsheet_agent.rb +418 -0
- data/lib/spreadsheet_agent/db.rb +55 -0
- data/lib/spreadsheet_agent/error.rb +12 -0
- data/lib/spreadsheet_agent/runner.rb +306 -0
- data/test/agent_bin/othergoal_agent.rb +17 -0
- data/test/agent_bin/testgoal_agent.rb +18 -0
- data/test/spreadsheet_agent_db_test.rb +13 -0
- data/test/spreadsheet_agent_runner_test.rb +468 -0
- data/test/spreadsheet_agent_test.rb +411 -0
- metadata +157 -0
@@ -0,0 +1,418 @@
|
|
1
|
+
# Author: Darin London
|
2
|
+
# The license of this source is "MIT Licence"
|
3
|
+
|
4
|
+
require 'spreadsheet_agent/db'
|
5
|
+
require 'socket'
|
6
|
+
require 'open3'
|
7
|
+
require 'capture_io'
|
8
|
+
require 'mail'
|
9
|
+
|
10
|
+
# A Distributed Agent System using Google Spreadsheets
|
11
|
+
#
|
12
|
+
# Version 0.01
|
13
|
+
#
|
14
|
+
# SpreadsheetAgent is a framework for creating massively distributed pipelines
|
15
|
+
# across many different servers, each using the same google spreadsheet as a
|
16
|
+
# control panel. It is extensible, and flexible. It doesnt specify what
|
17
|
+
# goals any pipeline should be working towards, or which goals are prerequisites
|
18
|
+
# for other goals, but it does provide logic for easily defining these relationships
|
19
|
+
# based on your own needs. It does this by providing a subsumption architecture,
|
20
|
+
# whereby many small, highly focused agents are written to perform specific goals,
|
21
|
+
# and also know what resources they require to perform them. Agents can be coded to
|
22
|
+
# subsume other agents upon successful completion. In addition, it is
|
23
|
+
# designed from the beginning to support the creation of simple human-computational
|
24
|
+
# workflows.
|
25
|
+
#
|
26
|
+
# SpreadsheetAgent requires GoogleDrive[http://rubygems.org/gems/google_drive], and works with a Google Spreadsheet with some or all worksheets
|
27
|
+
# formatted according to the following:
|
28
|
+
# * The top row of a page to be processed has fields for all entry record in subsequent rows
|
29
|
+
# * You can define any fields necessary, but you must specify a 'ready' and a 'complete' field
|
30
|
+
# * You must define at least 1 key field, and the key field must be specified as required in the :config (see SpreadsheetAgent::Db)
|
31
|
+
# * You should then define fields named for agent_bin/#{ field_name }_agent.rb for each agent that you plan to deploy in your pipeline
|
32
|
+
#
|
33
|
+
module SpreadsheetAgent
|
34
|
+
|
35
|
+
# SpreadsheetAgent::Agent is designed to make it easy to create a single task which connects to
|
36
|
+
# a field within a record on a page within the configured SpreadsheetAgent compatible Google Spreadsheet,
|
37
|
+
# runs, and reports whether the job completed or ended in error. An agent can be configured to only run
|
38
|
+
# when certain prerequisite fields have completed. The data in these fields can be filled in by other
|
39
|
+
# SpreadsheetAgent::Agents, SpreadsheetAgent::Runners, or humans. Compute node configuration is available
|
40
|
+
# to prevent the agent from running more than a certain number of instances of itself, or not run if certain
|
41
|
+
# other agents or processes are running on the node. Finally, an agent can be configured to subsume another
|
42
|
+
# agent, and fill in the completion field for that agent in addition to its own when it completes successfully.
|
43
|
+
#
|
44
|
+
# extends SpreadsheetAgent::Db
|
45
|
+
class Agent < SpreadsheetAgent::Db
|
46
|
+
|
47
|
+
# The name of the field in the page to which the agent should report status
|
48
|
+
attr_accessor :agent_name
|
49
|
+
|
50
|
+
# The name of the Page on the Google Spreadsheet that contains the record to be worked on by the agent
|
51
|
+
attr_accessor :page_name
|
52
|
+
|
53
|
+
# hash of key-value pairs. The keys are defined in config/agent.conf.yml. The values
|
54
|
+
# specify the values for those fields in the record on the page for which the agent is running.
|
55
|
+
# All keys configured as 'required: 1' in config/agent.conf.yml must be included in the keys hash
|
56
|
+
attr_accessor :keys
|
57
|
+
|
58
|
+
# Boolean. When true, the agent code will print verbosely to STDERR. When false, and the process!
|
59
|
+
# returns a failure status, the agent will email all stdout and stderr to the email specified in the
|
60
|
+
# :config send_to value
|
61
|
+
attr_accessor :debug
|
62
|
+
|
63
|
+
# Optional array of prerequisite fields that must contain a 1 in them for the record on the page before
|
64
|
+
# the agent will attempt to run
|
65
|
+
attr_accessor :prerequisites
|
66
|
+
|
67
|
+
# Optional integer. This works on Linux with ps. The agent will not attempt to run if there are
|
68
|
+
# max_selves instances running
|
69
|
+
attr_accessor :max_selves
|
70
|
+
|
71
|
+
# Hash of process_name to number of max_instances. This works on Linux with ps. If the agent detects
|
72
|
+
# the specified number of max_instances of the given process (based on a line match), it will not
|
73
|
+
# attempt to run
|
74
|
+
attr_accessor :conflicts_with
|
75
|
+
|
76
|
+
# Array of fields on the record which this agent subsumes. If the agent completes successfully these
|
77
|
+
# fields will be updated with a 1 in addition to the field for the agent
|
78
|
+
attr_accessor :subsumes
|
79
|
+
|
80
|
+
# Readonly access to the GoogleDrive::Worksheet that is being access by the agent.
|
81
|
+
attr_reader :worksheet
|
82
|
+
|
83
|
+
# create a new SpreadsheetAgent::Agent with the following:
|
84
|
+
# == required configuration parameters:
|
85
|
+
# * agent_name
|
86
|
+
# * page_name
|
87
|
+
# * keys
|
88
|
+
#
|
89
|
+
# == optional parameters:
|
90
|
+
# * config_file: (see SpreadsheetAgent::DB)
|
91
|
+
# * debug
|
92
|
+
# * prerequisites
|
93
|
+
# * max_selves
|
94
|
+
# * conflicts_with
|
95
|
+
# * subsumes
|
96
|
+
#
|
97
|
+
def initialize(attributes)
|
98
|
+
@agent_name = attributes[:agent_name]
|
99
|
+
@page_name = attributes[:page_name]
|
100
|
+
@keys = attributes[:keys].clone
|
101
|
+
unless @agent_name && @page_name && @keys
|
102
|
+
raise SpreadsheetAgentError, "agent_name, page_name, and keys attributes are required!"
|
103
|
+
end
|
104
|
+
@config_file = attributes[:config_file]
|
105
|
+
build_db()
|
106
|
+
|
107
|
+
@worksheet = @db.worksheet_by_title(@page_name)
|
108
|
+
@debug = attributes[:debug]
|
109
|
+
if attributes[:prerequisites]
|
110
|
+
@prerequisites = attributes[:prerequisites].clone
|
111
|
+
end
|
112
|
+
|
113
|
+
@max_selves = attributes[:max_selves]
|
114
|
+
if attributes[:conflicts_with]
|
115
|
+
@conflicts_with = attributes[:conflicts_with].clone
|
116
|
+
end
|
117
|
+
if attributes[:subsumes]
|
118
|
+
@subsumes = attributes[:subsumes].clone
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# If the agent does not have any conflicting processes (max_selves or conflicts_with)
|
123
|
+
# and if the entry is ready (field 'ready' has a 1), and all prerequisite fields have a 1,
|
124
|
+
# gets the GoogleDrive::List record, and passes it to the supplied agent_code PROC as argument.
|
125
|
+
# This PROC must return a required boolean field indicating success or failure, and an optional
|
126
|
+
# hash of key - value fields that will be updated on the GoogleDrive::List record. Note, the updates
|
127
|
+
# are made regardless of the value of success. In fact, the agent can be configured to update
|
128
|
+
# different fields based on success or failure. Also, note that any value can be stored in the
|
129
|
+
# hash. This allows the agent to communicate any useful information to the google spreadsheet for other
|
130
|
+
# agents (SpreadsheetAgent::Agent, SpreadsheetAgent::Runner, or human) to use. The PROC must try at all
|
131
|
+
# costs to avoid terminating. If an error is encountered, it should return false for the success field
|
132
|
+
# to signal that the process failed. If no errors are encountered it should return true for the success
|
133
|
+
# field.
|
134
|
+
#
|
135
|
+
# Exits successfully, enters a 1 in the agent_name field
|
136
|
+
# $agent->process! do |entry|
|
137
|
+
# true
|
138
|
+
# end
|
139
|
+
#
|
140
|
+
# Same, but also updates the 'notice' field in the record along with the 1 in the agent_name field
|
141
|
+
# $agent->process! do |entry|
|
142
|
+
# [true, {:notice => 'There were 30 files processed'}]
|
143
|
+
# end
|
144
|
+
#
|
145
|
+
# Fails, enters f:#{hostname} in the agent_name field
|
146
|
+
# $agent->process! do |entry|
|
147
|
+
# false
|
148
|
+
#
|
149
|
+
# Same, but also updates the 'notice' field in the record along with the failure notice
|
150
|
+
# $agent->process! do |entry|
|
151
|
+
# [false, {:notice => 'There were 10 files left to process!' }]
|
152
|
+
# end
|
153
|
+
#
|
154
|
+
# This agent passes different parameters based on success or failure
|
155
|
+
# $agent->process! do |entry|
|
156
|
+
# if $success
|
157
|
+
# true
|
158
|
+
# else
|
159
|
+
# [ false, {:notice => 'there were 10 remaining files'}]
|
160
|
+
# end
|
161
|
+
# end
|
162
|
+
#
|
163
|
+
def process!(&agent_code)
|
164
|
+
@worksheet.reload
|
165
|
+
no_problems = true
|
166
|
+
capture_output = nil
|
167
|
+
unless @debug
|
168
|
+
capture_output = CaptureIO.new
|
169
|
+
capture_output.start
|
170
|
+
end
|
171
|
+
|
172
|
+
begin
|
173
|
+
return true if has_conflicts()
|
174
|
+
(runnable, entry) = run_entry()
|
175
|
+
return false unless entry
|
176
|
+
return true unless runnable
|
177
|
+
|
178
|
+
success, update_entry = agent_code.call(entry)
|
179
|
+
if success
|
180
|
+
complete_entry(update_entry)
|
181
|
+
else
|
182
|
+
fail_entry(update_entry)
|
183
|
+
end
|
184
|
+
rescue
|
185
|
+
$stderr.puts "#{ $! }"
|
186
|
+
no_problems = false
|
187
|
+
end
|
188
|
+
unless capture_output.nil?
|
189
|
+
if no_problems
|
190
|
+
capture_output.stop
|
191
|
+
else
|
192
|
+
mail_error(capture_output.stop)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
return no_problems
|
196
|
+
end
|
197
|
+
|
198
|
+
# Returns the GoogleDrive::List object for the specified keys
|
199
|
+
def get_entry
|
200
|
+
this_entry = nil
|
201
|
+
if @worksheet
|
202
|
+
@worksheet.list.each do |this_row|
|
203
|
+
keep_row = true
|
204
|
+
|
205
|
+
@config['key_fields'].keys.reject { |key_field|
|
206
|
+
!(@config['key_fields'][key_field]["required"]) && !(@keys[key_field])
|
207
|
+
}.each do |key|
|
208
|
+
break unless keep_row
|
209
|
+
keep_row = (this_row[key] == @keys[key])
|
210
|
+
end
|
211
|
+
|
212
|
+
if keep_row
|
213
|
+
return this_row
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
private
|
220
|
+
|
221
|
+
def has_conflicts
|
222
|
+
return unless (@max_selves || @conflicts_with) # nothing conflicts here
|
223
|
+
|
224
|
+
running_conflicters = {}
|
225
|
+
self_name = File.basename $0
|
226
|
+
|
227
|
+
begin
|
228
|
+
conflicting_in = Open3.popen3('ps','-eo','pid,command')[1]
|
229
|
+
conflicting_in.lines.each do |line|
|
230
|
+
unless(
|
231
|
+
(line.match(/emacs\s+|vim*\s+|pico\s+/)) ||
|
232
|
+
(line.match("#{ $$ }"))
|
233
|
+
)
|
234
|
+
if @max_selves && line.match(self_name)
|
235
|
+
if running_conflicters[@agent_name].nil?
|
236
|
+
running_conflicters[@agent_name] = 1
|
237
|
+
else
|
238
|
+
running_conflicters[@agent_name] += 1
|
239
|
+
end
|
240
|
+
|
241
|
+
if running_conflicters[@agent_name] == @max_selves
|
242
|
+
$stderr.puts "max_selves limit reached" if @debug
|
243
|
+
conflicting_in.close
|
244
|
+
return true
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
if @conflicts_with
|
249
|
+
@conflicts_with.keys.each do |conflicter|
|
250
|
+
if line.match(conflicter)
|
251
|
+
if running_conflicters[conflicter].nil?
|
252
|
+
running_conflicters[conflicter] = 1
|
253
|
+
else
|
254
|
+
running_conflicters[conflicter] += 1
|
255
|
+
end
|
256
|
+
if running_conflicters[conflicter] >= @conflicts_with[conflicter]
|
257
|
+
$stderr.puts "conflicts with #{ conflicter }" if @debug
|
258
|
+
conflicting_in.close
|
259
|
+
return true
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
conflicting_in.close
|
267
|
+
return false
|
268
|
+
|
269
|
+
rescue
|
270
|
+
$stderr.puts "Couldnt check conflicts #{ $! }" if @debug
|
271
|
+
return true
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|
275
|
+
|
276
|
+
# this call initiates a race resistant attempt to make sure that there is only 1
|
277
|
+
# clear 'winner' among N potential agents attempting to run the same goal on the
|
278
|
+
# same spreadsheet agent's cell
|
279
|
+
def run_entry
|
280
|
+
entry = get_entry()
|
281
|
+
output = '';
|
282
|
+
@keys.keys.select { |k| @config['key_fields'][k] && @keys[k] }.each do |key|
|
283
|
+
output += [ key, @keys[key] ].join(' ') + " "
|
284
|
+
end
|
285
|
+
|
286
|
+
unless entry
|
287
|
+
$stderr.puts "#{ output } is not supported on #{ @page_name }" if @debug
|
288
|
+
return
|
289
|
+
end
|
290
|
+
|
291
|
+
unless entry['ready'] == "1"
|
292
|
+
$stderr.puts "#{ output } is not ready to run #{ @agent_name }" if @debug
|
293
|
+
return false, entry
|
294
|
+
end
|
295
|
+
|
296
|
+
if entry['complete'] == "1"
|
297
|
+
$stderr.puts "All goals are completed for #{ output }" if @debug
|
298
|
+
return false, entry
|
299
|
+
end
|
300
|
+
|
301
|
+
if entry[@agent_name]
|
302
|
+
(status, running_hostname) = entry[@agent_name].split(':')
|
303
|
+
|
304
|
+
case status
|
305
|
+
when 'r'
|
306
|
+
$stderr.puts " #{ output } is already running #{ @agent_name } on #{ running_hostname }" if @debug
|
307
|
+
return false, entry
|
308
|
+
|
309
|
+
when "1"
|
310
|
+
$stderr.puts " #{ output } has already run #{ @agent_name }" if @debug
|
311
|
+
return false, entry
|
312
|
+
|
313
|
+
when 'F'
|
314
|
+
$stderr.puts " #{ output } has already Failed #{ @agent_name }" if @debug
|
315
|
+
return false, entry
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
if @prerequisites
|
320
|
+
@prerequisites.each do |prereq_field|
|
321
|
+
unless entry[prereq_field] == "1"
|
322
|
+
$stderr.puts " #{ output } has not finished #{ prereq_field }" if @debug
|
323
|
+
return false, entry
|
324
|
+
end
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
# first attempt to set the hostname of the machine as the value of the agent
|
329
|
+
hostname = Socket.gethostname;
|
330
|
+
begin
|
331
|
+
entry.update @agent_name => "r:#{ hostname }"
|
332
|
+
@worksheet.save
|
333
|
+
|
334
|
+
rescue GoogleDrive::Error
|
335
|
+
# this is a collision, which is to be treated as if it is not runnable
|
336
|
+
$stderr.puts " #{ output } lost #{ @agent_name } on #{hostname}" if @debug
|
337
|
+
return false, entry
|
338
|
+
end
|
339
|
+
|
340
|
+
sleep 3
|
341
|
+
begin
|
342
|
+
@worksheet.reload
|
343
|
+
rescue GoogleDrive::Error
|
344
|
+
# this is a collision, which is to be treated as if it is not runnable
|
345
|
+
$stderr.puts " #{ output } lost #{ @agent_name } on #{hostname}" if @debug
|
346
|
+
return false, entry
|
347
|
+
end
|
348
|
+
|
349
|
+
check = entry[@agent_name]
|
350
|
+
(status, running_hostname) = check.split(':')
|
351
|
+
if hostname == running_hostname
|
352
|
+
return true, entry
|
353
|
+
end
|
354
|
+
$stderr.puts " #{ output } lost #{ @agent_name } on #{hostname}" if @debug
|
355
|
+
return false, entry
|
356
|
+
end
|
357
|
+
|
358
|
+
def complete_entry(update_entry)
|
359
|
+
if update_entry.nil?
|
360
|
+
update_entry = {}
|
361
|
+
end
|
362
|
+
|
363
|
+
if @subsumes && @subsumes.length > 0
|
364
|
+
@subsumes.each do |subsumed_agent|
|
365
|
+
update_entry[subsumed_agent] = 1
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
update_entry[@agent_name] = 1
|
370
|
+
entry = get_entry()
|
371
|
+
entry.update update_entry
|
372
|
+
@worksheet.save
|
373
|
+
end
|
374
|
+
|
375
|
+
def fail_entry(update_entry)
|
376
|
+
if update_entry.nil?
|
377
|
+
update_entry = { }
|
378
|
+
end
|
379
|
+
hostname = Socket.gethostname
|
380
|
+
update_entry[@agent_name] = "F:#{ hostname }"
|
381
|
+
entry = get_entry()
|
382
|
+
entry.update update_entry
|
383
|
+
@worksheet.save
|
384
|
+
end
|
385
|
+
|
386
|
+
def mail_error(error_message)
|
387
|
+
output = ''
|
388
|
+
@keys.keys.each do |key|
|
389
|
+
output += [key, @keys[key] ].join(' ') + " "
|
390
|
+
end
|
391
|
+
|
392
|
+
prefix = [Socket.gethostname, output, @agent_name ].join(' ')
|
393
|
+
begin
|
394
|
+
Mail.defaults do
|
395
|
+
delivery_method :smtp, {
|
396
|
+
:address => "smtp.gmail.com",
|
397
|
+
:port => 587,
|
398
|
+
:domain => Socket.gethostname,
|
399
|
+
:user_name => @config['guser'],
|
400
|
+
:password => @config['gpass'],
|
401
|
+
:authentication => 'plain',
|
402
|
+
:enable_starttls_auto => true }
|
403
|
+
end
|
404
|
+
|
405
|
+
mail = Mail.new do
|
406
|
+
from @config['reply_email']
|
407
|
+
to @config['send_to']
|
408
|
+
subject prefix
|
409
|
+
body error_message.to_s
|
410
|
+
end
|
411
|
+
|
412
|
+
mail.deliver!
|
413
|
+
rescue
|
414
|
+
#DO NOTHING
|
415
|
+
end
|
416
|
+
end
|
417
|
+
end
|
418
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Author: Darin London
|
2
|
+
# The license of this source is "MIT Licence"
|
3
|
+
|
4
|
+
require 'google_drive'
|
5
|
+
require 'psych'
|
6
|
+
|
7
|
+
module SpreadsheetAgent
|
8
|
+
|
9
|
+
# SpreadsheetAgent::Db is a class that is meant to be extended by SpreadsheetAgent classes. It
|
10
|
+
# stores shared code to instantiate and provide access to a GoogleDrive object and
|
11
|
+
# GoogleDrive::Spreadsheet object for use by the extending classes to access their Google Spreadsheets
|
12
|
+
class Db
|
13
|
+
|
14
|
+
# This holds the GoogleDrive::Spreadsheet object that can be used to query information from the google
|
15
|
+
# spreadsheet using its API. It cannot be changed after the object is constructed
|
16
|
+
attr_reader :db
|
17
|
+
|
18
|
+
# This holds the GoogleDrive object instantiated with the guser and gpass in the :config. It
|
19
|
+
# cannot be changed after the object is constructed
|
20
|
+
attr_reader :session
|
21
|
+
|
22
|
+
# This holds the hash that is constructed from the YAML :config_file. It
|
23
|
+
# cannot be changed after the object is constructed
|
24
|
+
attr_reader :config
|
25
|
+
|
26
|
+
# Passing this attribute to the constructor will override the location of config/agent.conf.yml.
|
27
|
+
# If passed, it must be a path to a file which matches the template in config/agent.conf.yml.
|
28
|
+
# The default is to load ../config/agent.config.yaml relative to the directory containing the
|
29
|
+
# calling script $0. This cannot be changed after the object is constructed
|
30
|
+
attr_reader :config_file
|
31
|
+
|
32
|
+
# This is for internal use by SpreadsheetAgent classes that extend SpreadsheetAgent::Db
|
33
|
+
def build_db
|
34
|
+
build_config()
|
35
|
+
unless @config['key_fields'].keys.select { |k| @config['key_fields'][k]['required'] }.count > 0
|
36
|
+
raise SpreadsheetAgentError, "Your configuration must have at least one required key_fields key"
|
37
|
+
end
|
38
|
+
@session = GoogleDrive.login(@config['guser'], @config['gpass'])
|
39
|
+
@db = @session.spreadsheet_by_title(@config['spreadsheet_name'])
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def build_config()
|
45
|
+
if @config_file.nil?
|
46
|
+
@config_file = find_bin() + '../config/agent.conf.yml'
|
47
|
+
end
|
48
|
+
@config = Psych.load_file(@config_file)
|
49
|
+
end
|
50
|
+
|
51
|
+
def find_bin()
|
52
|
+
File.expand_path(File.dirname( $0 )) + '/'
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|