mannie-taverna-scufl 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENCE +165 -0
- data/README +133 -0
- data/lib/baclava/reader.rb +61 -0
- data/lib/baclava/writer.rb +82 -0
- data/lib/document/data.rb +24 -0
- data/lib/document/report.rb +91 -0
- data/lib/enactor/client.rb +673 -0
- data/lib/scufl/dot.rb +248 -0
- data/lib/scufl/model.rb +153 -0
- data/lib/scufl/parser.rb +162 -0
- data/test/fixtures/animal.xml +10 -0
- data/test/fixtures/colouranimal.xml +14 -0
- data/test/tc_client.rb +191 -0
- data/test/ts_taverna.rb +4 -0
- metadata +95 -0
@@ -0,0 +1,91 @@
|
|
1
|
+
module Document
|
2
|
+
|
3
|
+
class Report
|
4
|
+
|
5
|
+
attr_reader :processors
|
6
|
+
attr_accessor :id, :status
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@processors = Array.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.from_xml(xml)
|
13
|
+
Reader.read(xml)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.from_document(document)
|
17
|
+
Reader.read(document)
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
class Processor
|
23
|
+
attr_accessor :name, :status, :time, :total, :number
|
24
|
+
end
|
25
|
+
|
26
|
+
class Reader
|
27
|
+
|
28
|
+
def self.read(report)
|
29
|
+
if report.kind_of?(REXML::Document)
|
30
|
+
document = report
|
31
|
+
else
|
32
|
+
document = REXML::Document.new(report)
|
33
|
+
end
|
34
|
+
root = document.root
|
35
|
+
|
36
|
+
return nil if not root
|
37
|
+
|
38
|
+
raise root.name + "Doesn't appear to be a workflow report!" if root.name != "workflowReport"
|
39
|
+
|
40
|
+
create_report(root)
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.create_report(element)
|
44
|
+
report = Report.new
|
45
|
+
|
46
|
+
id = element.attribute('workflowId')
|
47
|
+
report.id = id.value if id
|
48
|
+
|
49
|
+
status = element.attribute('workflowStatus')
|
50
|
+
report.status = status.value if status
|
51
|
+
|
52
|
+
element.elements['processorList'].each_element('processor') { |processor|
|
53
|
+
add_processor(processor, report)
|
54
|
+
}
|
55
|
+
|
56
|
+
report
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.add_processor(element, report)
|
60
|
+
processor = Processor.new
|
61
|
+
|
62
|
+
name = element.attribute('name')
|
63
|
+
processor.name = name.value if name
|
64
|
+
|
65
|
+
if element.has_elements?
|
66
|
+
firstElement = element.elements[1]
|
67
|
+
case firstElement.name
|
68
|
+
when 'ProcessComplete'
|
69
|
+
processor.status = 'COMPLETE'
|
70
|
+
processor.time = firstElement.attribute('TimeStamp')
|
71
|
+
when 'ProcessScheduled'
|
72
|
+
processor.status = 'SCHEDULED'
|
73
|
+
processor.time = firstElement.attribute('TimeStamp')
|
74
|
+
when 'InvokingWithIteration'
|
75
|
+
processor.status = 'ITERATING'
|
76
|
+
processor.time = firstElement.attribute('TimeStamp')
|
77
|
+
processor.number = firstElement.attribute('IterationNumber')
|
78
|
+
processor.total = firstElement.attribute('IterationTotal')
|
79
|
+
when 'ServiceFailure'
|
80
|
+
processor.status = 'FAILED'
|
81
|
+
processor.time = firstElement.attribute('TimeStamp')
|
82
|
+
else
|
83
|
+
processor.status = 'UNKNOWN'
|
84
|
+
end
|
85
|
+
end
|
86
|
+
report.processors.push processor
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
@@ -0,0 +1,673 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'builder'
|
3
|
+
require "uri"
|
4
|
+
require 'rexml/document'
|
5
|
+
require 'net/http'
|
6
|
+
require 'baclava/reader'
|
7
|
+
require 'baclava/writer'
|
8
|
+
require 'document/report'
|
9
|
+
require 'document/data'
|
10
|
+
|
11
|
+
module Enactor # :nodoc:
|
12
|
+
|
13
|
+
#Base class for Taverna service errors.
|
14
|
+
class TavernaServiceError < StandardError
|
15
|
+
end
|
16
|
+
|
17
|
+
#Job did not complete.
|
18
|
+
#Thrown by execute_sync()
|
19
|
+
class NotCompleteError < TavernaServiceError
|
20
|
+
def initialize(job_url, status)
|
21
|
+
super("Job #{job_url} not complete, status: #{status}")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
#Could not create resource.
|
26
|
+
class CouldNotCreateError < TavernaServiceError
|
27
|
+
def initialize(url)
|
28
|
+
super("Expected 201 Created when uploading #url")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
#Status messages that can be returned from TavernaService.get_job_status().
|
34
|
+
#
|
35
|
+
#If finished?(status) is true, this means the job is finished,
|
36
|
+
#either successfully (COMPLETE), unsuccessfully (CANCELLED, FAILED), or
|
37
|
+
#that the job is no longer in the database (DESTROYED).
|
38
|
+
#
|
39
|
+
#When a job has just been created it will be in status NEW, after that
|
40
|
+
#it will immediately be on a queue and in the state QUEUED. Once the
|
41
|
+
#job has been picked up by a worker it will be in INITIALISING, this
|
42
|
+
#state might include the startup time of the worker and while downloading
|
43
|
+
#the workflow and input data to the worker. The state PAUSED is not
|
44
|
+
#currently used. The FAILING state can occur if the workflow engine
|
45
|
+
#crashed, after clean-up or if the workflow itself failed, the state
|
46
|
+
#will be FAILED.
|
47
|
+
#
|
48
|
+
#The job might at any time be set to the state CANCELLING by the user,
|
49
|
+
#which will stop execution of the workflow, leading to the state
|
50
|
+
#CANCELLED.
|
51
|
+
#
|
52
|
+
#If the workflow execution completed the state will be set to COMPLETE,
|
53
|
+
#after which the workflow result data should be available by using
|
54
|
+
#get_job_outputs_doc().
|
55
|
+
#
|
56
|
+
#If data about the job has been lost (probably because it's too old
|
57
|
+
#or has been deleted by the user), the state will be DESTROYED.
|
58
|
+
class Status
|
59
|
+
NEW = "NEW"
|
60
|
+
QUEUED = "QUEUED"
|
61
|
+
INITIALISING = "INITIALISING"
|
62
|
+
PAUSED = "PAUSED"
|
63
|
+
FAILING = "FAILING"
|
64
|
+
CANCELLING = "CANCELLING"
|
65
|
+
CANCELLED = "CANCELLED"
|
66
|
+
COMPLETE = "COMPLETE"
|
67
|
+
FAILED = "FAILED"
|
68
|
+
DESTROYED = "DESTROYED"
|
69
|
+
FINISHED = [COMPLETE, CANCELLED, DESTROYED, FAILED]
|
70
|
+
ALL = [NEW, QUEUED, INITIALISING, FAILING,
|
71
|
+
CANCELLING, CANCELLED, COMPLETE, FAILED, DESTROYED]
|
72
|
+
|
73
|
+
#Return True if the status is a finished status.
|
74
|
+
#
|
75
|
+
#This would normally include COMPLETE, CANCELLED, DESTROYED and FAILED.
|
76
|
+
def Status.finished?(status)
|
77
|
+
return FINISHED.include?(status)
|
78
|
+
end
|
79
|
+
|
80
|
+
#Check if a string is a valid status.
|
81
|
+
def Status.valid?(status)
|
82
|
+
ALL.include?(status)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
#Client library for accessing a Taverna Remote execution service.
|
87
|
+
#
|
88
|
+
#Since the service is a rest interface, this library reflects that to
|
89
|
+
#a certain degree and many of the methods return URLs to be used by
|
90
|
+
#other methods.
|
91
|
+
#
|
92
|
+
# The main methods of interest are - in order of a normal execution:
|
93
|
+
#
|
94
|
+
# execute_sync() -- Given a scufl document or the URL for a previously
|
95
|
+
# uploaded workflow, and data as a hash or URL for previously
|
96
|
+
# uploaded data, submit job for execution, wait for completion
|
97
|
+
# (or a timeout) and retrieve results. This is a blocking
|
98
|
+
# convenience method that can be used instead of the methods below.
|
99
|
+
#
|
100
|
+
# upload_workflow() -- Given a scufl document as a string, upload the
|
101
|
+
# workflow to the server for later execution. Return the URL for the
|
102
|
+
# created workflow resource that can be used with submit_job()
|
103
|
+
#
|
104
|
+
# upload_data()-- Given a hash of input values to a
|
105
|
+
# workflow run, upload the data to the user's collection.
|
106
|
+
# Return the URL for the created data resource that can be used with
|
107
|
+
# submit_job()
|
108
|
+
#
|
109
|
+
# submit_job() -- Given the URL for a workflow resource and optionally
|
110
|
+
# the URL for a input data resource, submit the a to the server
|
111
|
+
# to be executed. Return the URL to the created job resource.
|
112
|
+
#
|
113
|
+
# get_job_status() -- Get the status of the job. Return one of the values from
|
114
|
+
# Status.
|
115
|
+
#
|
116
|
+
# finished?() -- Return True if the job is in a finished state. Note
|
117
|
+
# that this also includes failed states.
|
118
|
+
#
|
119
|
+
# wait_for_job() -- Wait until job has finished execution, or a maximum
|
120
|
+
# timeout is exceeded.
|
121
|
+
#
|
122
|
+
# get_job_outputs() -- Get the outputs produced by job. Return a
|
123
|
+
# hash which values are strings, lists of strings,
|
124
|
+
# or deeper lists.
|
125
|
+
#
|
126
|
+
#Most or all of these methods might in addition to stated exceptions also raise
|
127
|
+
#Net::HTTPError or InvalidResponseError if anything goes wrong in communicating with the service.
|
128
|
+
class Client
|
129
|
+
|
130
|
+
#Name spaces used by various XML documents.
|
131
|
+
NAMESPACES = {
|
132
|
+
:xscufl => 'http://org.embl.ebi.escience/xscufl/0.1alpha',
|
133
|
+
:baclava => 'http://org.embl.ebi.escience/baclava/0.1alpha',
|
134
|
+
:service => 'http://taverna.sf.net/service',
|
135
|
+
:xlink => 'http://www.w3.org/1999/xlink',
|
136
|
+
:dcterms => 'http://purl.org/dc/terms/'
|
137
|
+
}
|
138
|
+
|
139
|
+
#Mime types used by the rest protocol.
|
140
|
+
#
|
141
|
+
# See net.sf.taverna.service.interfaces.TavernaConstants.java
|
142
|
+
MIME_TYPES = {
|
143
|
+
:rest => 'application/vnd.taverna.rest+xml', # For most of the rest documents
|
144
|
+
:scufl => 'application/vnd.taverna.scufl+xml', # For Taverna workflows
|
145
|
+
:baclava => 'application/vnd.taverna.baclava+xml', # For Taverna's Baclava data documents
|
146
|
+
:report => 'application/vnd.taverna.report+xml', # For Taverna's internal progress reports
|
147
|
+
:console => 'text/plain' # For Taverna's console
|
148
|
+
}
|
149
|
+
|
150
|
+
|
151
|
+
DEFAULT_TIMEOUT = 5 * 60 # in seconds
|
152
|
+
DEFAULT_REFRESH = 0.5 # in seconds
|
153
|
+
|
154
|
+
#Construct a Taverna remote execution service client accessing the service
|
155
|
+
#at the given base URL.
|
156
|
+
#
|
157
|
+
#Note that this constructor will not attempt to verify the URL or the
|
158
|
+
#credentials. To verify, call get_user_url() which requires authentication.
|
159
|
+
#
|
160
|
+
#url -- The base URL for the service, normally ending in /v1/, for example:
|
161
|
+
# "http://myserver.com:8080/tavernaService/v1/"
|
162
|
+
#
|
163
|
+
#username -- The username of a user that has been previously created or
|
164
|
+
# registered in the web interface of the service.
|
165
|
+
#
|
166
|
+
#password -- The password of the user. Note that the password will be sent
|
167
|
+
# over the wire using unencrypted HTTP Basic Auth, unless the URL starts
|
168
|
+
# with "https".
|
169
|
+
def initialize(url, username, password)
|
170
|
+
@url = url
|
171
|
+
@username = username
|
172
|
+
@password = password
|
173
|
+
end
|
174
|
+
|
175
|
+
#private
|
176
|
+
|
177
|
+
#Get the capabilities document as a REXML::Document
|
178
|
+
#
|
179
|
+
#This document contains the links to the main collections of the service.
|
180
|
+
def get_capabilities_doc
|
181
|
+
url = URI.parse(@url)
|
182
|
+
request = Net::HTTP::Get.new(url.path)
|
183
|
+
request['Accept'] = MIME_TYPES[:rest]
|
184
|
+
request.basic_auth @username, @password
|
185
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
186
|
+
http.request(request)
|
187
|
+
}
|
188
|
+
response.value
|
189
|
+
REXML::Document.new(response.body)
|
190
|
+
end
|
191
|
+
|
192
|
+
#Get the URL for the current user's home on the server.
|
193
|
+
def get_user_url
|
194
|
+
capabilities_doc = get_capabilities_doc()
|
195
|
+
#currentUser = capabilities_doc.root.elements["{#{NAMESPACES[:service]}}currentUser"]
|
196
|
+
current_user = capabilities_doc.root.elements['currentUser']
|
197
|
+
current_user_url = current_user.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
|
198
|
+
|
199
|
+
url = URI.parse(current_user_url)
|
200
|
+
request = Net::HTTP::Get.new(url.path)
|
201
|
+
request['Accept'] = MIME_TYPES[:rest]
|
202
|
+
request.basic_auth @username, @password
|
203
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
204
|
+
http.request(request)
|
205
|
+
}
|
206
|
+
response.error! unless response.kind_of?(Net::HTTPSuccess) or response.kind_of?(Net::HTTPRedirection)
|
207
|
+
response.header['Location']
|
208
|
+
end
|
209
|
+
|
210
|
+
#Get the user document as an REXML::Document object.
|
211
|
+
#
|
212
|
+
#This document contains the links to the user owned collections,
|
213
|
+
#such as where to upload workflows and jobs.
|
214
|
+
def get_user_doc
|
215
|
+
url = URI.parse(get_user_url())
|
216
|
+
request = Net::HTTP::Get.new(url.path)
|
217
|
+
request['Accept'] = MIME_TYPES[:rest]
|
218
|
+
request.basic_auth @username, @password
|
219
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
220
|
+
http.request(request)
|
221
|
+
}
|
222
|
+
response.value
|
223
|
+
REXML::Document.new(response.body)
|
224
|
+
end
|
225
|
+
|
226
|
+
#Get the URL to a user-owned collection.
|
227
|
+
#
|
228
|
+
#collectionType -- The collection, either "workflows" or "datas"
|
229
|
+
def get_user_collection_url(collection)
|
230
|
+
user_doc = get_user_doc()
|
231
|
+
|
232
|
+
#collections = user_doc.root.elements["{#{NAMESPACES[:service]}}#{collection}"]
|
233
|
+
collections = user_doc.root.elements[collection]
|
234
|
+
return collections.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
|
235
|
+
end
|
236
|
+
|
237
|
+
#Get the URL to the output document for a job.
|
238
|
+
#
|
239
|
+
#It generally only makes sense to call this function if
|
240
|
+
#get_job_status() == Status::COMPLETED, but no check is enforced here.
|
241
|
+
#
|
242
|
+
#Return the URL to a data document produced by the job, or None if the
|
243
|
+
#job has not (yet) produced any output.
|
244
|
+
#
|
245
|
+
#job_url -- The URL to a job resource previously created using
|
246
|
+
# submit_job().
|
247
|
+
def get_job_outputs_url(job_url)
|
248
|
+
job_document = get_xml_doc(job_url)
|
249
|
+
#outputs_element = job_document.root.elements["{#{NAMESPACES[:service]}}outputs"]
|
250
|
+
outputs_element = job_document.root.elements['outputs']
|
251
|
+
return nil if not outputs_element
|
252
|
+
outputs_element.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
|
253
|
+
end
|
254
|
+
|
255
|
+
#Get the output document for a job.
|
256
|
+
#
|
257
|
+
#Return the output document as an REXML::Document object, or None
|
258
|
+
#if the job didn't have an output document (yet). This document can be
|
259
|
+
#parsed using parse_data_doc().
|
260
|
+
#
|
261
|
+
#job_url -- The URL to a job resource previously created using
|
262
|
+
# submit_job().
|
263
|
+
def get_job_outputs_doc(job_url)
|
264
|
+
outputs_url = get_job_outputs_url(job_url)
|
265
|
+
return nil if not outputs_url
|
266
|
+
get_xml_doc(outputs_url, MIME_TYPES[:baclava])
|
267
|
+
end
|
268
|
+
|
269
|
+
#Retrieve an XML document from the given URL.
|
270
|
+
#
|
271
|
+
#Return the retrieved document as a REXML::Document.
|
272
|
+
#
|
273
|
+
#url -- The URL to a resource retrievable as an XML document
|
274
|
+
#
|
275
|
+
#mimeType -- The mime-type to request using the Accept header, by default
|
276
|
+
# MIME_TYPES[:rest]
|
277
|
+
def get_xml_doc(doc_url, mimeType=MIME_TYPES[:rest])
|
278
|
+
url = URI.parse(doc_url)
|
279
|
+
request = Net::HTTP::Get.new(url.path)
|
280
|
+
request['Accept'] = mimeType
|
281
|
+
request.basic_auth @username, @password
|
282
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
283
|
+
http.request(request)
|
284
|
+
}
|
285
|
+
response.value
|
286
|
+
REXML::Document.new(response.body)
|
287
|
+
end
|
288
|
+
|
289
|
+
#Return the size of an XML document from the given URL without
|
290
|
+
#fetching the document.
|
291
|
+
#
|
292
|
+
#Return the size of a XML document .
|
293
|
+
#
|
294
|
+
#url -- The URL to a resource find the size of
|
295
|
+
#
|
296
|
+
#mimeType -- The mime-type to request using the Accept header, by default
|
297
|
+
# MIME_TYPES[:rest]
|
298
|
+
def get_xml_doc_size(doc_url, mimeType=MIME_TYPES[:rest])
|
299
|
+
url = URI.parse(doc_url)
|
300
|
+
request = Net::HTTP::Head.new(url.path)
|
301
|
+
request['Accept'] = mimeType
|
302
|
+
request.basic_auth @username, @password
|
303
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
304
|
+
http.request(request)
|
305
|
+
}
|
306
|
+
response.content_length
|
307
|
+
end
|
308
|
+
|
309
|
+
#Parse a data document as returned from get_job_outputs_doc().
|
310
|
+
#
|
311
|
+
#Return a hash where the keys are strings, matching the names of
|
312
|
+
# ports of the workflow. The values are Document::Data objects.
|
313
|
+
#
|
314
|
+
#xml -- A data document as a REXML::Document. This data document can be created
|
315
|
+
# using create_data_doc()
|
316
|
+
def parse_data_doc(xml_document)
|
317
|
+
Baclava::Reader.read(xml_document)
|
318
|
+
end
|
319
|
+
|
320
|
+
#Upload a data document to the current user's collection.
|
321
|
+
#
|
322
|
+
#Return the URL of the created data resource.
|
323
|
+
#
|
324
|
+
#xml -- A data document as a REXML::Document. This data document can be created
|
325
|
+
# using create_data_doc()
|
326
|
+
#
|
327
|
+
#Raises:
|
328
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
329
|
+
# creating the resource
|
330
|
+
def upload_data_doc(xml_document)
|
331
|
+
datas_url = get_user_collection_url("datas")
|
332
|
+
upload_to_collection(datas_url, xml_document.to_s, MIME_TYPES[:baclava])
|
333
|
+
end
|
334
|
+
|
335
|
+
#Tests if the url is valid for this server
|
336
|
+
def url_valid?(url)
|
337
|
+
url = URI.parse(url)
|
338
|
+
req = Net::HTTP::Head.new(url.path)
|
339
|
+
req.basic_auth @username, @password
|
340
|
+
Net::HTTP.start(url.host, url.port) {|http|
|
341
|
+
http.request(req)
|
342
|
+
}.kind_of?(Net::HTTPSuccess)
|
343
|
+
end
|
344
|
+
|
345
|
+
#Upload data by POST-ing to given URL.
|
346
|
+
#
|
347
|
+
#Return the URL of the created resource if the request succeeded with
|
348
|
+
#201 Created.
|
349
|
+
#
|
350
|
+
#Raises:
|
351
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
352
|
+
# creating the resource
|
353
|
+
# Net::HTTPError -- If any other HTTP result code (including errors)
|
354
|
+
# was returned
|
355
|
+
#
|
356
|
+
#url -- The URL of the collection of where to POST,
|
357
|
+
# normally retrieved using get_user_collection_url().
|
358
|
+
#
|
359
|
+
#data -- The data to upload as a string
|
360
|
+
#
|
361
|
+
#content_type -- The MIME type of the data to upload. Typically the value
|
362
|
+
# of one of the MimeTypes constants. For data uploaded to the "datas" user
|
363
|
+
# collection this would be MIME_TYPES[:baclava], and for workflow to the "
|
364
|
+
# workflows" collection, MIME_TYPES[:scufl]. Any other XML documents from
|
365
|
+
# the NAMESPACES[:service] namespace has the mime type MIME_TYPES[:rest]
|
366
|
+
def upload_to_collection(url, data, content_type)
|
367
|
+
url = URI.parse(url)
|
368
|
+
request = Net::HTTP::Post.new(url.path)
|
369
|
+
request.body = data
|
370
|
+
request['Accept'] = MIME_TYPES[:rest]
|
371
|
+
request['Content-Type'] = content_type
|
372
|
+
request.basic_auth @username, @password
|
373
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
374
|
+
http.request(request)
|
375
|
+
}
|
376
|
+
response.value
|
377
|
+
raise CouldNotCreateError(url, response) unless response.kind_of?(Net::HTTPCreated)
|
378
|
+
response.header['Location']
|
379
|
+
end
|
380
|
+
|
381
|
+
#Create a data document to be uploaded with upload_data_doc().
|
382
|
+
#
|
383
|
+
#Return the data document a REXML::Document. This data document can be parsed using
|
384
|
+
#parse_data_doc()
|
385
|
+
#
|
386
|
+
#hash -- A hash where the keys are strings, matching the names of input
|
387
|
+
# ports of the workflow to run. The values are Document::Data objects.
|
388
|
+
#
|
389
|
+
def create_data_doc(hash)
|
390
|
+
Baclava::Writer.write_doc(hash)
|
391
|
+
end
|
392
|
+
|
393
|
+
#Create a job document for submission with submit_job().
|
394
|
+
#
|
395
|
+
#Return the job document as XML.
|
396
|
+
#
|
397
|
+
#workflow_url -- The URL of a workflow previously uploaded using
|
398
|
+
# upload_workflow()
|
399
|
+
#
|
400
|
+
#inputs_url -- The (optional) URL of a input document previously
|
401
|
+
# uploaded using upload_data_doc()
|
402
|
+
def create_job_doc(workflow_url, inputs_url=nil)
|
403
|
+
xml = Builder::XmlMarkup.new
|
404
|
+
xml.instruct!
|
405
|
+
REXML::Document.new(xml.job('xmlns' => NAMESPACES[:service], 'xmlns:xlink' => NAMESPACES[:xlink]) {
|
406
|
+
xml.inputs('xlink:href' => inputs_url) if inputs_url
|
407
|
+
xml.workflow('xlink:href' => workflow_url)
|
408
|
+
})
|
409
|
+
end
|
410
|
+
|
411
|
+
#Submit a job to be queued for execution on the server.
|
412
|
+
#
|
413
|
+
#Return the URL to the job resource.
|
414
|
+
#
|
415
|
+
#job_document -- A job document created with create_job_doc() specifying
|
416
|
+
# the workflow to run with which inputs.
|
417
|
+
#
|
418
|
+
#Raises:
|
419
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
420
|
+
# creating the resource
|
421
|
+
def submit_job_doc(job_document)
|
422
|
+
jobsURL = get_user_collection_url("jobs")
|
423
|
+
upload_to_collection(jobsURL, job_document.to_s, MIME_TYPES[:rest])
|
424
|
+
end
|
425
|
+
|
426
|
+
public
|
427
|
+
|
428
|
+
#Get the status of a previously submitted job.
|
429
|
+
#
|
430
|
+
#Return the status as a string, one of the values from Status.
|
431
|
+
#
|
432
|
+
#job_url -- The URL to a job resource previously created using
|
433
|
+
# submit_job().
|
434
|
+
def get_job_status(job_url)
|
435
|
+
job_document = get_xml_doc(job_url)
|
436
|
+
#status = job_document.elements["{#{NAMESPACES[:service]}}status"]
|
437
|
+
status = job_document.root.elements['status']
|
438
|
+
# TODO: For future checks, use:
|
439
|
+
#status_url = status.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
|
440
|
+
status.text
|
441
|
+
end
|
442
|
+
|
443
|
+
#Get the date a previously submitted job was created.
|
444
|
+
#
|
445
|
+
#Return the date as a Datetime object.
|
446
|
+
#
|
447
|
+
#job_url -- The URL to a job resource previously created using
|
448
|
+
# submit_job().
|
449
|
+
def get_job_created_date(job_url)
|
450
|
+
job_document = get_xml_doc(job_url)
|
451
|
+
#created = job_document.elements["{#{NAMESPACES[:dcterms]}}created"]
|
452
|
+
created = job_document.root.elements['dcterms:created'].text
|
453
|
+
DateTime.parse(created)
|
454
|
+
end
|
455
|
+
|
456
|
+
#Get the date a previously submitted job was last modified.
|
457
|
+
#
|
458
|
+
#Return the date as a Datetime object.
|
459
|
+
#
|
460
|
+
#job_url -- The URL to a job resource previously created using
|
461
|
+
# submit_job().
|
462
|
+
def get_job_modified_date(job_url)
|
463
|
+
job_document = get_xml_doc(job_url)
|
464
|
+
#modified = job_document.elements["{#{NAMESPACES[:dcterms]}}modified"]
|
465
|
+
modified = job_document.root.elements['dcterms:modified'].text
|
466
|
+
DateTime.parse(modified)
|
467
|
+
end
|
468
|
+
|
469
|
+
#Get the job's internal progress report. This might be available
|
470
|
+
#while the job is running.
|
471
|
+
#
|
472
|
+
#Return the internal progress report as a Document::Report object.
|
473
|
+
#
|
474
|
+
#job_url -- The URL to a job resource previously created using submit_job().
|
475
|
+
def get_job_report(job_url)
|
476
|
+
job_document = get_xml_doc(job_url)
|
477
|
+
#report_element = job_document.elements["{#{NAMESPACES[:service]}}report"]
|
478
|
+
report_element = job_document.root.elements['report']
|
479
|
+
report_url = report_element.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
|
480
|
+
# TODO: Cache report_url per job
|
481
|
+
job_report_document = get_xml_doc(report_url, MIME_TYPES[:report])
|
482
|
+
Document::Report.from_document(job_report_document)
|
483
|
+
end
|
484
|
+
|
485
|
+
#Get the outputs of a job.
|
486
|
+
#
|
487
|
+
#Return the job outputs as a hash where the keys are strings,
|
488
|
+
#matching the names of output ports of the workflow. The values are
|
489
|
+
#Document::Data objects. If no outputs exists, nil is returned instead.
|
490
|
+
#
|
491
|
+
#job_url -- The URL to a job resource previously created using
|
492
|
+
# submit_job().
|
493
|
+
def get_job_outputs(job_url)
|
494
|
+
job_outputs = get_job_outputs_doc(job_url)
|
495
|
+
return nil unless job_outputs
|
496
|
+
parse_data_doc(job_outputs)
|
497
|
+
end
|
498
|
+
|
499
|
+
#Get the size of the outputs of a job.
|
500
|
+
#
|
501
|
+
#Return the size of the outputs of a job in kilobytes.
|
502
|
+
#If no outputs exists, nil is returned instead.
|
503
|
+
#
|
504
|
+
#job_url -- The URL to a job resource previously created using
|
505
|
+
# submit_job().
|
506
|
+
def get_job_outputs_size(job_url)
|
507
|
+
outputs_url = get_job_outputs_url(job_url)
|
508
|
+
return nil if not outputs_url
|
509
|
+
get_xml_doc_size(outputs_url, MIME_TYPES[:baclava])
|
510
|
+
end
|
511
|
+
|
512
|
+
#Check if a job has finished in one way or another.
|
513
|
+
#
|
514
|
+
#Note that the job might have finished unsuccessfully. To check
|
515
|
+
#if a job is actually complete, check::
|
516
|
+
#
|
517
|
+
# get_job_status(job_url) == Status::COMPLETE.
|
518
|
+
#
|
519
|
+
#Return True if the job is in a finished state, that is that the
|
520
|
+
#Status.finished?(get_job_status()) is True.
|
521
|
+
#
|
522
|
+
#job_url -- The URL to a job resource previously created using
|
523
|
+
# #submit_job().
|
524
|
+
#
|
525
|
+
def finished?(job_url)
|
526
|
+
status = get_job_status(job_url)
|
527
|
+
Status.finished?(status)
|
528
|
+
end
|
529
|
+
|
530
|
+
#Submit a job to be queued for execution on the server.
|
531
|
+
#
|
532
|
+
#Return the URL to the created job resource.
|
533
|
+
#
|
534
|
+
#workflow_url -- The URL of a workflow previously uploaded using
|
535
|
+
# upload_workflow()
|
536
|
+
#
|
537
|
+
#inputs_url -- The (optional) URL of a input resource previously
|
538
|
+
# uploaded using upload_data()
|
539
|
+
#
|
540
|
+
#Raises:
|
541
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
542
|
+
# creating the resource
|
543
|
+
#
|
544
|
+
def submit_job(workflow_url, inputs_url=nil)
|
545
|
+
job_document = create_job_doc(workflow_url, inputs_url)
|
546
|
+
submit_job_doc(job_document)
|
547
|
+
end
|
548
|
+
|
549
|
+
#Upload data to be used with submit_job().
|
550
|
+
#
|
551
|
+
#Return the URL to the created data resource.
|
552
|
+
#
|
553
|
+
#hash -- A hash where the keys are strings, matching the names of input
|
554
|
+
# ports of the workflow to run. The values can be strings, lists of strings, or deeper
|
555
|
+
# lists.
|
556
|
+
#
|
557
|
+
#Raises:
|
558
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
559
|
+
# creating the resource
|
560
|
+
def upload_data(hash)
|
561
|
+
inputs = create_data_doc(hash)
|
562
|
+
upload_data_doc(inputs)
|
563
|
+
end
|
564
|
+
|
565
|
+
#Checks if the workflow exists on the server
|
566
|
+
#
|
567
|
+
#workflow_url -- The URL to a workflow previously uploaded using
|
568
|
+
# upload_workflow().
|
569
|
+
def workflow_exists?(workflow_url)
|
570
|
+
url_valid?(workflow_url)
|
571
|
+
end
|
572
|
+
|
573
|
+
#Checks if the username and password is valid for the service
|
574
|
+
def service_valid?
|
575
|
+
begin
|
576
|
+
get_user_url
|
577
|
+
true
|
578
|
+
rescue
|
579
|
+
false
|
580
|
+
end
|
581
|
+
end
|
582
|
+
|
583
|
+
#Upload a workflow XML document to the current users' collection.
|
584
|
+
#
|
585
|
+
#Return the URL of the created workflow resource.
|
586
|
+
#
|
587
|
+
#workflow_xml -- The Taverna scufl workflow as a string
|
588
|
+
#
|
589
|
+
#Raises:
|
590
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
591
|
+
# creating the resource
|
592
|
+
def upload_workflow(workflow_xml)
|
593
|
+
workflows_url = get_user_collection_url("workflows")
|
594
|
+
upload_to_collection(workflows_url, workflow_xml, MIME_TYPES[:scufl])
|
595
|
+
end
|
596
|
+
|
597
|
+
#Wait (blocking) for a job to finish, or until a maximum timeout
|
598
|
+
#has been reached.
|
599
|
+
#
|
600
|
+
#Return the status of the job. If the
|
601
|
+
#
|
602
|
+
#job_url -- The URL to a job resource previously created using
|
603
|
+
# submit_job().
|
604
|
+
#
|
605
|
+
#timeout -- The maximum number of seconds (as a float) to wait for job.
|
606
|
+
# The default value is DEFAULT_TIMEOUT.
|
607
|
+
#
|
608
|
+
#refresh -- In seconds (as a float), how often to check the job's
|
609
|
+
# status while waiting. The default value is DEFAULT_REFRESH.
|
610
|
+
def wait_for_job(job_url, timeout=DEFAULT_TIMEOUT, refresh=DEFAULT_REFRESH)
|
611
|
+
now = Time.now
|
612
|
+
_until = now + timeout
|
613
|
+
while _until > Time.now and not finished?(job_url)
|
614
|
+
now = Time.now # finished?() might have taken a while
|
615
|
+
sleep [[refresh, _until-now].min, 0].max
|
616
|
+
now = Time.now # after the sleep
|
617
|
+
end
|
618
|
+
get_job_status(job_url)
|
619
|
+
end
|
620
|
+
|
621
|
+
#Execute a workflow and wait until it's finished.
|
622
|
+
#
|
623
|
+
#This will block until the workflow has been executed by the server, and
|
624
|
+
#return the result of the workflow run.
|
625
|
+
#
|
626
|
+
#Return the parsed output document as a hash where the keys are
|
627
|
+
#strings, matching the names of output ports of the workflow. The
|
628
|
+
#values are Document::Data objects. If the workflow
|
629
|
+
#did not produce any output, nil might be returned instead.
|
630
|
+
#
|
631
|
+
#workflow_xml -- The workflow as a Taverna scufl XML string. This *or* the
|
632
|
+
# workflow_url parameter is required.
|
633
|
+
#
|
634
|
+
#workflow_url -- The URL to a workflow previously uploaded using
|
635
|
+
# upload_workflow(). This *or* the workflow_xml parameter is required.
|
636
|
+
#
|
637
|
+
#inputs -- The (optional) inputs to the workflow, either as a Baclava
|
638
|
+
# XML document (string), or as a hash where the keys are
|
639
|
+
# strings, matching the names of input ports of the workflow. The
|
640
|
+
# values can be strings, lists of strings, or deeper lists.
|
641
|
+
#
|
642
|
+
#timeout -- The maximum number of seconds (as a float) to wait for job.
|
643
|
+
# The default value is DEFAULT_TIMEOUT.
|
644
|
+
#
|
645
|
+
#refresh -- In seconds (as a float), how often to check the job's
|
646
|
+
# status while waiting. The default value is DEFAULT_REFRESH.
|
647
|
+
#
|
648
|
+
#Raises:
|
649
|
+
# NotCompleteError -- If the job did not complete, for instance because
|
650
|
+
# the timeout was reached before completion.
|
651
|
+
#
|
652
|
+
# urllib2.HTTPError -- If any step in submitting or requesting the status and
|
653
|
+
# result of the job failed.
|
654
|
+
def execute_sync(workflow_xml=nil, workflow_url=nil, inputs=nil,
|
655
|
+
timeout=DEFAULT_TIMEOUT, refresh=DEFAULT_REFRESH)
|
656
|
+
raise TypeError.new("workflow_xml or worklowURL must be given") unless workflow_xml or workflow_url
|
657
|
+
raise TypeError.new("Only one of workflow_xml and workflow_url can be given") if workflow_xml and workflow_url
|
658
|
+
|
659
|
+
workflow_url = upload_workflow(workflow_xml) if workflow_xml
|
660
|
+
inputs_url = upload_data(inputs) if inputs
|
661
|
+
|
662
|
+
job_url = submit_job(workflow_url, inputs_url)
|
663
|
+
status = wait_for_job(job_url, timeout, refresh)
|
664
|
+
|
665
|
+
raise NotCompleteError.new(job_url, status) if status != Status::COMPLETE
|
666
|
+
|
667
|
+
get_job_outputs(job_url)
|
668
|
+
end
|
669
|
+
|
670
|
+
end
|
671
|
+
|
672
|
+
end
|
673
|
+
|