taverna-scufl 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,24 @@
1
+ module Document # :nodoc:
2
+
3
+ #Input or output data
4
+ #
5
+ #value - the data value or a (possibly nested) list of data values
6
+ class Data
7
+ attr_accessor :value, :annotation
8
+
9
+ def initialize(value=nil, annotation=nil)
10
+ @value = value
11
+ @annotation = annotation
12
+ end
13
+
14
+ def eql?(other)
15
+ @value.eql?(other.value) and @annotation.eql?(other.annotation)
16
+ end
17
+
18
+ def ==(other)
19
+ @value == other.value and @annotation == other.annotation
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -0,0 +1,91 @@
1
+ module Document
2
+
3
+ class Report
4
+
5
+ attr_reader :processors
6
+ attr_accessor :id, :status
7
+
8
+ def initialize
9
+ @processors = Array.new
10
+ end
11
+
12
+ def self.from_xml(xml)
13
+ Reader.read(xml)
14
+ end
15
+
16
+ def self.from_document(document)
17
+ Reader.read(document)
18
+ end
19
+
20
+ end
21
+
22
+ class Processor
23
+ attr_accessor :name, :status, :time, :total, :number
24
+ end
25
+
26
+ class Reader
27
+
28
+ def self.read(report)
29
+ if report.kind_of?(REXML::Document)
30
+ document = report
31
+ else
32
+ document = REXML::Document.new(report)
33
+ end
34
+ root = document.root
35
+
36
+ return nil if not root
37
+
38
+ raise root.name + "Doesn't appear to be a workflow report!" if root.name != "workflowReport"
39
+
40
+ create_report(root)
41
+ end
42
+
43
+ def self.create_report(element)
44
+ report = Report.new
45
+
46
+ id = element.attribute('workflowId')
47
+ report.id = id.value if id
48
+
49
+ status = element.attribute('workflowStatus')
50
+ report.status = status.value if status
51
+
52
+ element.elements['processorList'].each_element('processor') { |processor|
53
+ add_processor(processor, report)
54
+ }
55
+
56
+ report
57
+ end
58
+
59
+ def self.add_processor(element, report)
60
+ processor = Processor.new
61
+
62
+ name = element.attribute('name')
63
+ processor.name = name.value if name
64
+
65
+ if element.has_elements?
66
+ firstElement = element.elements[1]
67
+ case firstElement.name
68
+ when 'ProcessComplete'
69
+ processor.status = 'COMPLETE'
70
+ processor.time = firstElement.attribute('TimeStamp')
71
+ when 'ProcessScheduled'
72
+ processor.status = 'SCHEDULED'
73
+ processor.time = firstElement.attribute('TimeStamp')
74
+ when 'InvokingWithIteration'
75
+ processor.status = 'ITERATING'
76
+ processor.time = firstElement.attribute('TimeStamp')
77
+ processor.number = firstElement.attribute('IterationNumber')
78
+ processor.total = firstElement.attribute('IterationTotal')
79
+ when 'ServiceFailure'
80
+ processor.status = 'FAILED'
81
+ processor.time = firstElement.attribute('TimeStamp')
82
+ else
83
+ processor.status = 'UNKNOWN'
84
+ end
85
+ end
86
+ report.processors.push processor
87
+ end
88
+
89
+ end
90
+
91
+ end
@@ -0,0 +1,673 @@
1
+ require 'rubygems'
2
+ require 'builder'
3
+ require "uri"
4
+ require 'rexml/document'
5
+ require 'net/http'
6
+ require 'baclava/reader'
7
+ require 'baclava/writer'
8
+ require 'document/report'
9
+ require 'document/data'
10
+
11
+ module Enactor # :nodoc:
12
+
13
+ #Base class for Taverna service errors.
14
+ class TavernaServiceError < StandardError
15
+ end
16
+
17
+ #Job did not complete.
18
+ #Thrown by execute_sync()
19
+ class NotCompleteError < TavernaServiceError
20
+ def initialize(job_url, status)
21
+ super("Job #{job_url} not complete, status: #{status}")
22
+ end
23
+ end
24
+
25
+ #Could not create resource.
26
+ class CouldNotCreateError < TavernaServiceError
27
+ def initialize(url)
28
+ super("Expected 201 Created when uploading #url")
29
+ end
30
+ end
31
+
32
+
33
+ #Status messages that can be returned from TavernaService.get_job_status().
34
+ #
35
+ #If finished?(status) is true, this means the job is finished,
36
+ #either successfully (COMPLETE), unsuccessfully (CANCELLED, FAILED), or
37
+ #that the job is no longer in the database (DESTROYED).
38
+ #
39
+ #When a job has just been created it will be in status NEW, after that
40
+ #it will immediately be on a queue and in the state QUEUED. Once the
41
+ #job has been picked up by a worker it will be in INITIALISING, this
42
+ #state might include the startup time of the worker and while downloading
43
+ #the workflow and input data to the worker. The state PAUSED is not
44
+ #currently used. The FAILING state can occur if the workflow engine
45
+ #crashed, after clean-up or if the workflow itself failed, the state
46
+ #will be FAILED.
47
+ #
48
+ #The job might at any time be set to the state CANCELLING by the user,
49
+ #which will stop execution of the workflow, leading to the state
50
+ #CANCELLED.
51
+ #
52
+ #If the workflow execution completed the state will be set to COMPLETE,
53
+ #after which the workflow result data should be available by using
54
+ #get_job_outputs_doc().
55
+ #
56
+ #If data about the job has been lost (probably because it's too old
57
+ #or has been deleted by the user), the state will be DESTROYED.
58
+ class Status
59
+ NEW = "NEW"
60
+ QUEUED = "QUEUED"
61
+ INITIALISING = "INITIALISING"
62
+ PAUSED = "PAUSED"
63
+ FAILING = "FAILING"
64
+ CANCELLING = "CANCELLING"
65
+ CANCELLED = "CANCELLED"
66
+ COMPLETE = "COMPLETE"
67
+ FAILED = "FAILED"
68
+ DESTROYED = "DESTROYED"
69
+ FINISHED = [COMPLETE, CANCELLED, DESTROYED, FAILED]
70
+ ALL = [NEW, QUEUED, INITIALISING, FAILING,
71
+ CANCELLING, CANCELLED, COMPLETE, FAILED, DESTROYED]
72
+
73
+ #Return True if the status is a finished status.
74
+ #
75
+ #This would normally include COMPLETE, CANCELLED, DESTROYED and FAILED.
76
+ def Status.finished?(status)
77
+ return FINISHED.include?(status)
78
+ end
79
+
80
+ #Check if a string is a valid status.
81
+ def Status.valid?(status)
82
+ ALL.include?(status)
83
+ end
84
+ end
85
+
86
+ #Client library for accessing a Taverna Remote execution service.
87
+ #
88
+ #Since the service is a rest interface, this library reflects that to
89
+ #a certain degree and many of the methods return URLs to be used by
90
+ #other methods.
91
+ #
92
+ # The main methods of interest are - in order of a normal execution:
93
+ #
94
+ # execute_sync() -- Given a scufl document or the URL for a previously
95
+ # uploaded workflow, and data as a hash or URL for previously
96
+ # uploaded data, submit job for execution, wait for completion
97
+ # (or a timeout) and retrieve results. This is a blocking
98
+ # convenience method that can be used instead of the methods below.
99
+ #
100
+ # upload_workflow() -- Given a scufl document as a string, upload the
101
+ # workflow to the server for later execution. Return the URL for the
102
+ # created workflow resource that can be used with submit_job()
103
+ #
104
+ # upload_data()-- Given a hash of input values to a
105
+ # workflow run, upload the data to the user's collection.
106
+ # Return the URL for the created data resource that can be used with
107
+ # submit_job()
108
+ #
109
+ # submit_job() -- Given the URL for a workflow resource and optionally
110
+ # the URL for a input data resource, submit the a to the server
111
+ # to be executed. Return the URL to the created job resource.
112
+ #
113
+ # get_job_status() -- Get the status of the job. Return one of the values from
114
+ # Status.
115
+ #
116
+ # finished?() -- Return True if the job is in a finished state. Note
117
+ # that this also includes failed states.
118
+ #
119
+ # wait_for_job() -- Wait until job has finished execution, or a maximum
120
+ # timeout is exceeded.
121
+ #
122
+ # get_job_outputs() -- Get the outputs produced by job. Return a
123
+ # hash which values are strings, lists of strings,
124
+ # or deeper lists.
125
+ #
126
+ #Most or all of these methods might in addition to stated exceptions also raise
127
+ #Net::HTTPError or InvalidResponseError if anything goes wrong in communicating with the service.
128
+ class Client
129
+
130
+ #Name spaces used by various XML documents.
131
+ NAMESPACES = {
132
+ :xscufl => 'http://org.embl.ebi.escience/xscufl/0.1alpha',
133
+ :baclava => 'http://org.embl.ebi.escience/baclava/0.1alpha',
134
+ :service => 'http://taverna.sf.net/service',
135
+ :xlink => 'http://www.w3.org/1999/xlink',
136
+ :dcterms => 'http://purl.org/dc/terms/'
137
+ }
138
+
139
+ #Mime types used by the rest protocol.
140
+ #
141
+ # See net.sf.taverna.service.interfaces.TavernaConstants.java
142
+ MIME_TYPES = {
143
+ :rest => 'application/vnd.taverna.rest+xml', # For most of the rest documents
144
+ :scufl => 'application/vnd.taverna.scufl+xml', # For Taverna workflows
145
+ :baclava => 'application/vnd.taverna.baclava+xml', # For Taverna's Baclava data documents
146
+ :report => 'application/vnd.taverna.report+xml', # For Taverna's internal progress reports
147
+ :console => 'text/plain' # For Taverna's console
148
+ }
149
+
150
+
151
+ DEFAULT_TIMEOUT = 5 * 60 # in seconds
152
+ DEFAULT_REFRESH = 0.5 # in seconds
153
+
154
+ #Construct a Taverna remote execution service client accessing the service
155
+ #at the given base URL.
156
+ #
157
+ #Note that this constructor will not attempt to verify the URL or the
158
+ #credentials. To verify, call get_user_url() which requires authentication.
159
+ #
160
+ #url -- The base URL for the service, normally ending in /v1/, for example:
161
+ # "http://myserver.com:8080/tavernaService/v1/"
162
+ #
163
+ #username -- The username of a user that has been previously created or
164
+ # registered in the web interface of the service.
165
+ #
166
+ #password -- The password of the user. Note that the password will be sent
167
+ # over the wire using unencrypted HTTP Basic Auth, unless the URL starts
168
+ # with "https".
169
+ def initialize(url, username, password)
170
+ @url = url
171
+ @username = username
172
+ @password = password
173
+ end
174
+
175
+ #private
176
+
177
+ #Get the capabilities document as a REXML::Document
178
+ #
179
+ #This document contains the links to the main collections of the service.
180
+ def get_capabilities_doc
181
+ url = URI.parse(@url)
182
+ request = Net::HTTP::Get.new(url.path)
183
+ request['Accept'] = MIME_TYPES[:rest]
184
+ request.basic_auth @username, @password
185
+ response = Net::HTTP.start(url.host, url.port) {|http|
186
+ http.request(request)
187
+ }
188
+ response.value
189
+ REXML::Document.new(response.body)
190
+ end
191
+
192
+ #Get the URL for the current user's home on the server.
193
+ def get_user_url
194
+ capabilities_doc = get_capabilities_doc()
195
+ #currentUser = capabilities_doc.root.elements["{#{NAMESPACES[:service]}}currentUser"]
196
+ current_user = capabilities_doc.root.elements['currentUser']
197
+ current_user_url = current_user.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
198
+
199
+ url = URI.parse(current_user_url)
200
+ request = Net::HTTP::Get.new(url.path)
201
+ request['Accept'] = MIME_TYPES[:rest]
202
+ request.basic_auth @username, @password
203
+ response = Net::HTTP.start(url.host, url.port) {|http|
204
+ http.request(request)
205
+ }
206
+ response.error! unless response.kind_of?(Net::HTTPSuccess) or response.kind_of?(Net::HTTPRedirection)
207
+ response.header['Location']
208
+ end
209
+
210
+ #Get the user document as an REXML::Document object.
211
+ #
212
+ #This document contains the links to the user owned collections,
213
+ #such as where to upload workflows and jobs.
214
+ def get_user_doc
215
+ url = URI.parse(get_user_url())
216
+ request = Net::HTTP::Get.new(url.path)
217
+ request['Accept'] = MIME_TYPES[:rest]
218
+ request.basic_auth @username, @password
219
+ response = Net::HTTP.start(url.host, url.port) {|http|
220
+ http.request(request)
221
+ }
222
+ response.value
223
+ REXML::Document.new(response.body)
224
+ end
225
+
226
+ #Get the URL to a user-owned collection.
227
+ #
228
+ #collectionType -- The collection, either "workflows" or "datas"
229
+ def get_user_collection_url(collection)
230
+ user_doc = get_user_doc()
231
+
232
+ #collections = user_doc.root.elements["{#{NAMESPACES[:service]}}#{collection}"]
233
+ collections = user_doc.root.elements[collection]
234
+ return collections.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
235
+ end
236
+
237
+ #Get the URL to the output document for a job.
238
+ #
239
+ #It generally only makes sense to call this function if
240
+ #get_job_status() == Status::COMPLETED, but no check is enforced here.
241
+ #
242
+ #Return the URL to a data document produced by the job, or None if the
243
+ #job has not (yet) produced any output.
244
+ #
245
+ #job_url -- The URL to a job resource previously created using
246
+ # submit_job().
247
+ def get_job_outputs_url(job_url)
248
+ job_document = get_xml_doc(job_url)
249
+ #outputs_element = job_document.root.elements["{#{NAMESPACES[:service]}}outputs"]
250
+ outputs_element = job_document.root.elements['outputs']
251
+ return nil if not outputs_element
252
+ outputs_element.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
253
+ end
254
+
255
+ #Get the output document for a job.
256
+ #
257
+ #Return the output document as an REXML::Document object, or None
258
+ #if the job didn't have an output document (yet). This document can be
259
+ #parsed using parse_data_doc().
260
+ #
261
+ #job_url -- The URL to a job resource previously created using
262
+ # submit_job().
263
+ def get_job_outputs_doc(job_url)
264
+ outputs_url = get_job_outputs_url(job_url)
265
+ return nil if not outputs_url
266
+ get_xml_doc(outputs_url, MIME_TYPES[:baclava])
267
+ end
268
+
269
+ #Retrieve an XML document from the given URL.
270
+ #
271
+ #Return the retrieved document as a REXML::Document.
272
+ #
273
+ #url -- The URL to a resource retrievable as an XML document
274
+ #
275
+ #mimeType -- The mime-type to request using the Accept header, by default
276
+ # MIME_TYPES[:rest]
277
+ def get_xml_doc(doc_url, mimeType=MIME_TYPES[:rest])
278
+ url = URI.parse(doc_url)
279
+ request = Net::HTTP::Get.new(url.path)
280
+ request['Accept'] = mimeType
281
+ request.basic_auth @username, @password
282
+ response = Net::HTTP.start(url.host, url.port) {|http|
283
+ http.request(request)
284
+ }
285
+ response.value
286
+ REXML::Document.new(response.body)
287
+ end
288
+
289
+ #Return the size of an XML document from the given URL without
290
+ #fetching the document.
291
+ #
292
+ #Return the size of a XML document .
293
+ #
294
+ #url -- The URL to a resource find the size of
295
+ #
296
+ #mimeType -- The mime-type to request using the Accept header, by default
297
+ # MIME_TYPES[:rest]
298
+ def get_xml_doc_size(doc_url, mimeType=MIME_TYPES[:rest])
299
+ url = URI.parse(doc_url)
300
+ request = Net::HTTP::Head.new(url.path)
301
+ request['Accept'] = mimeType
302
+ request.basic_auth @username, @password
303
+ response = Net::HTTP.start(url.host, url.port) {|http|
304
+ http.request(request)
305
+ }
306
+ response.content_length
307
+ end
308
+
309
+ #Parse a data document as returned from get_job_outputs_doc().
310
+ #
311
+ #Return a hash where the keys are strings, matching the names of
312
+ # ports of the workflow. The values are Document::Data objects.
313
+ #
314
+ #xml -- A data document as a REXML::Document. This data document can be created
315
+ # using create_data_doc()
316
+ def parse_data_doc(xml_document)
317
+ Baclava::Reader.read(xml_document)
318
+ end
319
+
320
+ #Upload a data document to the current user's collection.
321
+ #
322
+ #Return the URL of the created data resource.
323
+ #
324
+ #xml -- A data document as a REXML::Document. This data document can be created
325
+ # using create_data_doc()
326
+ #
327
+ #Raises:
328
+ # CouldNotCreateError -- If the service returned 200 OK instead of
329
+ # creating the resource
330
+ def upload_data_doc(xml_document)
331
+ datas_url = get_user_collection_url("datas")
332
+ upload_to_collection(datas_url, xml_document.to_s, MIME_TYPES[:baclava])
333
+ end
334
+
335
+ #Tests if the url is valid for this server
336
+ def url_valid?(url)
337
+ url = URI.parse(url)
338
+ req = Net::HTTP::Head.new(url.path)
339
+ req.basic_auth @username, @password
340
+ Net::HTTP.start(url.host, url.port) {|http|
341
+ http.request(req)
342
+ }.kind_of?(Net::HTTPSuccess)
343
+ end
344
+
345
+ #Upload data by POST-ing to given URL.
346
+ #
347
+ #Return the URL of the created resource if the request succeeded with
348
+ #201 Created.
349
+ #
350
+ #Raises:
351
+ # CouldNotCreateError -- If the service returned 200 OK instead of
352
+ # creating the resource
353
+ # Net::HTTPError -- If any other HTTP result code (including errors)
354
+ # was returned
355
+ #
356
+ #url -- The URL of the collection of where to POST,
357
+ # normally retrieved using get_user_collection_url().
358
+ #
359
+ #data -- The data to upload as a string
360
+ #
361
+ #content_type -- The MIME type of the data to upload. Typically the value
362
+ # of one of the MimeTypes constants. For data uploaded to the "datas" user
363
+ # collection this would be MIME_TYPES[:baclava], and for workflow to the "
364
+ # workflows" collection, MIME_TYPES[:scufl]. Any other XML documents from
365
+ # the NAMESPACES[:service] namespace has the mime type MIME_TYPES[:rest]
366
+ def upload_to_collection(url, data, content_type)
367
+ url = URI.parse(url)
368
+ request = Net::HTTP::Post.new(url.path)
369
+ request.body = data
370
+ request['Accept'] = MIME_TYPES[:rest]
371
+ request['Content-Type'] = content_type
372
+ request.basic_auth @username, @password
373
+ response = Net::HTTP.start(url.host, url.port) {|http|
374
+ http.request(request)
375
+ }
376
+ response.value
377
+ raise CouldNotCreateError(url, response) unless response.kind_of?(Net::HTTPCreated)
378
+ response.header['Location']
379
+ end
380
+
381
+ #Create a data document to be uploaded with upload_data_doc().
382
+ #
383
+ #Return the data document a REXML::Document. This data document can be parsed using
384
+ #parse_data_doc()
385
+ #
386
+ #hash -- A hash where the keys are strings, matching the names of input
387
+ # ports of the workflow to run. The values are Document::Data objects.
388
+ #
389
+ def create_data_doc(hash)
390
+ Baclava::Writer.write_doc(hash)
391
+ end
392
+
393
+ #Create a job document for submission with submit_job().
394
+ #
395
+ #Return the job document as XML.
396
+ #
397
+ #workflow_url -- The URL of a workflow previously uploaded using
398
+ # upload_workflow()
399
+ #
400
+ #inputs_url -- The (optional) URL of a input document previously
401
+ # uploaded using upload_data_doc()
402
+ def create_job_doc(workflow_url, inputs_url=nil)
403
+ xml = Builder::XmlMarkup.new
404
+ xml.instruct!
405
+ REXML::Document.new(xml.job('xmlns' => NAMESPACES[:service], 'xmlns:xlink' => NAMESPACES[:xlink]) {
406
+ xml.inputs('xlink:href' => inputs_url) if inputs_url
407
+ xml.workflow('xlink:href' => workflow_url)
408
+ })
409
+ end
410
+
411
+ #Submit a job to be queued for execution on the server.
412
+ #
413
+ #Return the URL to the job resource.
414
+ #
415
+ #job_document -- A job document created with create_job_doc() specifying
416
+ # the workflow to run with which inputs.
417
+ #
418
+ #Raises:
419
+ # CouldNotCreateError -- If the service returned 200 OK instead of
420
+ # creating the resource
421
+ def submit_job_doc(job_document)
422
+ jobsURL = get_user_collection_url("jobs")
423
+ upload_to_collection(jobsURL, job_document.to_s, MIME_TYPES[:rest])
424
+ end
425
+
426
+ public
427
+
428
+ #Get the status of a previously submitted job.
429
+ #
430
+ #Return the status as a string, one of the values from Status.
431
+ #
432
+ #job_url -- The URL to a job resource previously created using
433
+ # submit_job().
434
+ def get_job_status(job_url)
435
+ job_document = get_xml_doc(job_url)
436
+ #status = job_document.elements["{#{NAMESPACES[:service]}}status"]
437
+ status = job_document.root.elements['status']
438
+ # TODO: For future checks, use:
439
+ #status_url = status.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
440
+ status.text
441
+ end
442
+
443
+ #Get the date a previously submitted job was created.
444
+ #
445
+ #Return the date as a Datetime object.
446
+ #
447
+ #job_url -- The URL to a job resource previously created using
448
+ # submit_job().
449
+ def get_job_created_date(job_url)
450
+ job_document = get_xml_doc(job_url)
451
+ #created = job_document.elements["{#{NAMESPACES[:dcterms]}}created"]
452
+ created = job_document.root.elements['dcterms:created'].text
453
+ DateTime.parse(created)
454
+ end
455
+
456
+ #Get the date a previously submitted job was last modified.
457
+ #
458
+ #Return the date as a Datetime object.
459
+ #
460
+ #job_url -- The URL to a job resource previously created using
461
+ # submit_job().
462
+ def get_job_modified_date(job_url)
463
+ job_document = get_xml_doc(job_url)
464
+ #modified = job_document.elements["{#{NAMESPACES[:dcterms]}}modified"]
465
+ modified = job_document.root.elements['dcterms:modified'].text
466
+ DateTime.parse(modified)
467
+ end
468
+
469
+ #Get the job's internal progress report. This might be available
470
+ #while the job is running.
471
+ #
472
+ #Return the internal progress report as a Document::Report object.
473
+ #
474
+ #job_url -- The URL to a job resource previously created using submit_job().
475
+ def get_job_report(job_url)
476
+ job_document = get_xml_doc(job_url)
477
+ #report_element = job_document.elements["{#{NAMESPACES[:service]}}report"]
478
+ report_element = job_document.root.elements['report']
479
+ report_url = report_element.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
480
+ # TODO: Cache report_url per job
481
+ job_report_document = get_xml_doc(report_url, MIME_TYPES[:report])
482
+ Document::Report.from_document(job_report_document)
483
+ end
484
+
485
+ #Get the outputs of a job.
486
+ #
487
+ #Return the job outputs as a hash where the keys are strings,
488
+ #matching the names of output ports of the workflow. The values are
489
+ #Document::Data objects. If no outputs exists, nil is returned instead.
490
+ #
491
+ #job_url -- The URL to a job resource previously created using
492
+ # submit_job().
493
+ def get_job_outputs(job_url)
494
+ job_outputs = get_job_outputs_doc(job_url)
495
+ return nil unless job_outputs
496
+ parse_data_doc(job_outputs)
497
+ end
498
+
499
+ #Get the size of the outputs of a job.
500
+ #
501
+ #Return the size of the outputs of a job in kilobytes.
502
+ #If no outputs exists, nil is returned instead.
503
+ #
504
+ #job_url -- The URL to a job resource previously created using
505
+ # submit_job().
506
+ def get_job_outputs_size(job_url)
507
+ outputs_url = get_job_outputs_url(job_url)
508
+ return nil if not outputs_url
509
+ get_xml_doc_size(outputs_url, MIME_TYPES[:baclava])
510
+ end
511
+
512
+ #Check if a job has finished in one way or another.
513
+ #
514
+ #Note that the job might have finished unsuccessfully. To check
515
+ #if a job is actually complete, check::
516
+ #
517
+ # get_job_status(job_url) == Status::COMPLETE.
518
+ #
519
+ #Return True if the job is in a finished state, that is that the
520
+ #Status.finished?(get_job_status()) is True.
521
+ #
522
+ #job_url -- The URL to a job resource previously created using
523
+ # #submit_job().
524
+ #
525
+ def finished?(job_url)
526
+ status = get_job_status(job_url)
527
+ Status.finished?(status)
528
+ end
529
+
530
+ #Submit a job to be queued for execution on the server.
531
+ #
532
+ #Return the URL to the created job resource.
533
+ #
534
+ #workflow_url -- The URL of a workflow previously uploaded using
535
+ # upload_workflow()
536
+ #
537
+ #inputs_url -- The (optional) URL of a input resource previously
538
+ # uploaded using upload_data()
539
+ #
540
+ #Raises:
541
+ # CouldNotCreateError -- If the service returned 200 OK instead of
542
+ # creating the resource
543
+ #
544
+ def submit_job(workflow_url, inputs_url=nil)
545
+ job_document = create_job_doc(workflow_url, inputs_url)
546
+ submit_job_doc(job_document)
547
+ end
548
+
549
+ #Upload data to be used with submit_job().
550
+ #
551
+ #Return the URL to the created data resource.
552
+ #
553
+ #hash -- A hash where the keys are strings, matching the names of input
554
+ # ports of the workflow to run. The values can be strings, lists of strings, or deeper
555
+ # lists.
556
+ #
557
+ #Raises:
558
+ # CouldNotCreateError -- If the service returned 200 OK instead of
559
+ # creating the resource
560
+ def upload_data(hash)
561
+ inputs = create_data_doc(hash)
562
+ upload_data_doc(inputs)
563
+ end
564
+
565
+ #Checks if the workflow exists on the server
566
+ #
567
+ #workflow_url -- The URL to a workflow previously uploaded using
568
+ # upload_workflow().
569
+ def workflow_exists?(workflow_url)
570
+ url_valid?(workflow_url)
571
+ end
572
+
573
+ #Checks if the username and password is valid for the service
574
+ def service_valid?
575
+ begin
576
+ get_user_url
577
+ true
578
+ rescue
579
+ false
580
+ end
581
+ end
582
+
583
+ #Upload a workflow XML document to the current users' collection.
584
+ #
585
+ #Return the URL of the created workflow resource.
586
+ #
587
+ #workflow_xml -- The Taverna scufl workflow as a string
588
+ #
589
+ #Raises:
590
+ # CouldNotCreateError -- If the service returned 200 OK instead of
591
+ # creating the resource
592
+ def upload_workflow(workflow_xml)
593
+ workflows_url = get_user_collection_url("workflows")
594
+ upload_to_collection(workflows_url, workflow_xml, MIME_TYPES[:scufl])
595
+ end
596
+
597
+ #Wait (blocking) for a job to finish, or until a maximum timeout
598
+ #has been reached.
599
+ #
600
+ #Return the status of the job. If the
601
+ #
602
+ #job_url -- The URL to a job resource previously created using
603
+ # submit_job().
604
+ #
605
+ #timeout -- The maximum number of seconds (as a float) to wait for job.
606
+ # The default value is DEFAULT_TIMEOUT.
607
+ #
608
+ #refresh -- In seconds (as a float), how often to check the job's
609
+ # status while waiting. The default value is DEFAULT_REFRESH.
610
+ def wait_for_job(job_url, timeout=DEFAULT_TIMEOUT, refresh=DEFAULT_REFRESH)
611
+ now = Time.now
612
+ _until = now + timeout
613
+ while _until > Time.now and not finished?(job_url)
614
+ now = Time.now # finished?() might have taken a while
615
+ sleep [[refresh, _until-now].min, 0].max
616
+ now = Time.now # after the sleep
617
+ end
618
+ get_job_status(job_url)
619
+ end
620
+
621
+ #Execute a workflow and wait until it's finished.
622
+ #
623
+ #This will block until the workflow has been executed by the server, and
624
+ #return the result of the workflow run.
625
+ #
626
+ #Return the parsed output document as a hash where the keys are
627
+ #strings, matching the names of output ports of the workflow. The
628
+ #values are Document::Data objects. If the workflow
629
+ #did not produce any output, nil might be returned instead.
630
+ #
631
+ #workflow_xml -- The workflow as a Taverna scufl XML string. This *or* the
632
+ # workflow_url parameter is required.
633
+ #
634
+ #workflow_url -- The URL to a workflow previously uploaded using
635
+ # upload_workflow(). This *or* the workflow_xml parameter is required.
636
+ #
637
+ #inputs -- The (optional) inputs to the workflow, either as a Baclava
638
+ # XML document (string), or as a hash where the keys are
639
+ # strings, matching the names of input ports of the workflow. The
640
+ # values can be strings, lists of strings, or deeper lists.
641
+ #
642
+ #timeout -- The maximum number of seconds (as a float) to wait for job.
643
+ # The default value is DEFAULT_TIMEOUT.
644
+ #
645
+ #refresh -- In seconds (as a float), how often to check the job's
646
+ # status while waiting. The default value is DEFAULT_REFRESH.
647
+ #
648
+ #Raises:
649
+ # NotCompleteError -- If the job did not complete, for instance because
650
+ # the timeout was reached before completion.
651
+ #
652
+ # urllib2.HTTPError -- If any step in submitting or requesting the status and
653
+ # result of the job failed.
654
+ def execute_sync(workflow_xml=nil, workflow_url=nil, inputs=nil,
655
+ timeout=DEFAULT_TIMEOUT, refresh=DEFAULT_REFRESH)
656
+ raise TypeError.new("workflow_xml or worklowURL must be given") unless workflow_xml or workflow_url
657
+ raise TypeError.new("Only one of workflow_xml and workflow_url can be given") if workflow_xml and workflow_url
658
+
659
+ workflow_url = upload_workflow(workflow_xml) if workflow_xml
660
+ inputs_url = upload_data(inputs) if inputs
661
+
662
+ job_url = submit_job(workflow_url, inputs_url)
663
+ status = wait_for_job(job_url, timeout, refresh)
664
+
665
+ raise NotCompleteError.new(job_url, status) if status != Status::COMPLETE
666
+
667
+ get_job_outputs(job_url)
668
+ end
669
+
670
+ end
671
+
672
+ end
673
+