taverna-scufl 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog.rdoc +8 -0
- data/LICENCE +165 -0
- data/README.rdoc +133 -0
- data/lib/baclava/reader.rb +61 -0
- data/lib/baclava/writer.rb +82 -0
- data/lib/document/data.rb +24 -0
- data/lib/document/report.rb +91 -0
- data/lib/enactor/client.rb +673 -0
- data/lib/scufl/dot.rb +248 -0
- data/lib/scufl/model.rb +310 -0
- data/lib/scufl/parser.rb +185 -0
- data/test/fixtures/animal.xml +10 -0
- data/test/fixtures/colouranimal.xml +14 -0
- data/test/tc_client.rb +191 -0
- data/test/ts_taverna.rb +4 -0
- metadata +119 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
module Document # :nodoc:
|
2
|
+
|
3
|
+
#Input or output data
|
4
|
+
#
|
5
|
+
#value - the data value or a (possibly nested) list of data values
|
6
|
+
class Data
|
7
|
+
attr_accessor :value, :annotation
|
8
|
+
|
9
|
+
def initialize(value=nil, annotation=nil)
|
10
|
+
@value = value
|
11
|
+
@annotation = annotation
|
12
|
+
end
|
13
|
+
|
14
|
+
def eql?(other)
|
15
|
+
@value.eql?(other.value) and @annotation.eql?(other.annotation)
|
16
|
+
end
|
17
|
+
|
18
|
+
def ==(other)
|
19
|
+
@value == other.value and @annotation == other.annotation
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module Document
|
2
|
+
|
3
|
+
class Report
|
4
|
+
|
5
|
+
attr_reader :processors
|
6
|
+
attr_accessor :id, :status
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@processors = Array.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.from_xml(xml)
|
13
|
+
Reader.read(xml)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.from_document(document)
|
17
|
+
Reader.read(document)
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
class Processor
|
23
|
+
attr_accessor :name, :status, :time, :total, :number
|
24
|
+
end
|
25
|
+
|
26
|
+
class Reader
|
27
|
+
|
28
|
+
def self.read(report)
|
29
|
+
if report.kind_of?(REXML::Document)
|
30
|
+
document = report
|
31
|
+
else
|
32
|
+
document = REXML::Document.new(report)
|
33
|
+
end
|
34
|
+
root = document.root
|
35
|
+
|
36
|
+
return nil if not root
|
37
|
+
|
38
|
+
raise root.name + "Doesn't appear to be a workflow report!" if root.name != "workflowReport"
|
39
|
+
|
40
|
+
create_report(root)
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.create_report(element)
|
44
|
+
report = Report.new
|
45
|
+
|
46
|
+
id = element.attribute('workflowId')
|
47
|
+
report.id = id.value if id
|
48
|
+
|
49
|
+
status = element.attribute('workflowStatus')
|
50
|
+
report.status = status.value if status
|
51
|
+
|
52
|
+
element.elements['processorList'].each_element('processor') { |processor|
|
53
|
+
add_processor(processor, report)
|
54
|
+
}
|
55
|
+
|
56
|
+
report
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.add_processor(element, report)
|
60
|
+
processor = Processor.new
|
61
|
+
|
62
|
+
name = element.attribute('name')
|
63
|
+
processor.name = name.value if name
|
64
|
+
|
65
|
+
if element.has_elements?
|
66
|
+
firstElement = element.elements[1]
|
67
|
+
case firstElement.name
|
68
|
+
when 'ProcessComplete'
|
69
|
+
processor.status = 'COMPLETE'
|
70
|
+
processor.time = firstElement.attribute('TimeStamp')
|
71
|
+
when 'ProcessScheduled'
|
72
|
+
processor.status = 'SCHEDULED'
|
73
|
+
processor.time = firstElement.attribute('TimeStamp')
|
74
|
+
when 'InvokingWithIteration'
|
75
|
+
processor.status = 'ITERATING'
|
76
|
+
processor.time = firstElement.attribute('TimeStamp')
|
77
|
+
processor.number = firstElement.attribute('IterationNumber')
|
78
|
+
processor.total = firstElement.attribute('IterationTotal')
|
79
|
+
when 'ServiceFailure'
|
80
|
+
processor.status = 'FAILED'
|
81
|
+
processor.time = firstElement.attribute('TimeStamp')
|
82
|
+
else
|
83
|
+
processor.status = 'UNKNOWN'
|
84
|
+
end
|
85
|
+
end
|
86
|
+
report.processors.push processor
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
@@ -0,0 +1,673 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'builder'
|
3
|
+
require "uri"
|
4
|
+
require 'rexml/document'
|
5
|
+
require 'net/http'
|
6
|
+
require 'baclava/reader'
|
7
|
+
require 'baclava/writer'
|
8
|
+
require 'document/report'
|
9
|
+
require 'document/data'
|
10
|
+
|
11
|
+
module Enactor # :nodoc:
|
12
|
+
|
13
|
+
#Base class for Taverna service errors.
|
14
|
+
class TavernaServiceError < StandardError
|
15
|
+
end
|
16
|
+
|
17
|
+
#Job did not complete.
|
18
|
+
#Thrown by execute_sync()
|
19
|
+
class NotCompleteError < TavernaServiceError
|
20
|
+
def initialize(job_url, status)
|
21
|
+
super("Job #{job_url} not complete, status: #{status}")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
#Could not create resource.
|
26
|
+
class CouldNotCreateError < TavernaServiceError
|
27
|
+
def initialize(url)
|
28
|
+
super("Expected 201 Created when uploading #url")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
#Status messages that can be returned from TavernaService.get_job_status().
|
34
|
+
#
|
35
|
+
#If finished?(status) is true, this means the job is finished,
|
36
|
+
#either successfully (COMPLETE), unsuccessfully (CANCELLED, FAILED), or
|
37
|
+
#that the job is no longer in the database (DESTROYED).
|
38
|
+
#
|
39
|
+
#When a job has just been created it will be in status NEW, after that
|
40
|
+
#it will immediately be on a queue and in the state QUEUED. Once the
|
41
|
+
#job has been picked up by a worker it will be in INITIALISING, this
|
42
|
+
#state might include the startup time of the worker and while downloading
|
43
|
+
#the workflow and input data to the worker. The state PAUSED is not
|
44
|
+
#currently used. The FAILING state can occur if the workflow engine
|
45
|
+
#crashed, after clean-up or if the workflow itself failed, the state
|
46
|
+
#will be FAILED.
|
47
|
+
#
|
48
|
+
#The job might at any time be set to the state CANCELLING by the user,
|
49
|
+
#which will stop execution of the workflow, leading to the state
|
50
|
+
#CANCELLED.
|
51
|
+
#
|
52
|
+
#If the workflow execution completed the state will be set to COMPLETE,
|
53
|
+
#after which the workflow result data should be available by using
|
54
|
+
#get_job_outputs_doc().
|
55
|
+
#
|
56
|
+
#If data about the job has been lost (probably because it's too old
|
57
|
+
#or has been deleted by the user), the state will be DESTROYED.
|
58
|
+
class Status
|
59
|
+
NEW = "NEW"
|
60
|
+
QUEUED = "QUEUED"
|
61
|
+
INITIALISING = "INITIALISING"
|
62
|
+
PAUSED = "PAUSED"
|
63
|
+
FAILING = "FAILING"
|
64
|
+
CANCELLING = "CANCELLING"
|
65
|
+
CANCELLED = "CANCELLED"
|
66
|
+
COMPLETE = "COMPLETE"
|
67
|
+
FAILED = "FAILED"
|
68
|
+
DESTROYED = "DESTROYED"
|
69
|
+
FINISHED = [COMPLETE, CANCELLED, DESTROYED, FAILED]
|
70
|
+
ALL = [NEW, QUEUED, INITIALISING, FAILING,
|
71
|
+
CANCELLING, CANCELLED, COMPLETE, FAILED, DESTROYED]
|
72
|
+
|
73
|
+
#Return True if the status is a finished status.
|
74
|
+
#
|
75
|
+
#This would normally include COMPLETE, CANCELLED, DESTROYED and FAILED.
|
76
|
+
def Status.finished?(status)
|
77
|
+
return FINISHED.include?(status)
|
78
|
+
end
|
79
|
+
|
80
|
+
#Check if a string is a valid status.
|
81
|
+
def Status.valid?(status)
|
82
|
+
ALL.include?(status)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
#Client library for accessing a Taverna Remote execution service.
|
87
|
+
#
|
88
|
+
#Since the service is a rest interface, this library reflects that to
|
89
|
+
#a certain degree and many of the methods return URLs to be used by
|
90
|
+
#other methods.
|
91
|
+
#
|
92
|
+
# The main methods of interest are - in order of a normal execution:
|
93
|
+
#
|
94
|
+
# execute_sync() -- Given a scufl document or the URL for a previously
|
95
|
+
# uploaded workflow, and data as a hash or URL for previously
|
96
|
+
# uploaded data, submit job for execution, wait for completion
|
97
|
+
# (or a timeout) and retrieve results. This is a blocking
|
98
|
+
# convenience method that can be used instead of the methods below.
|
99
|
+
#
|
100
|
+
# upload_workflow() -- Given a scufl document as a string, upload the
|
101
|
+
# workflow to the server for later execution. Return the URL for the
|
102
|
+
# created workflow resource that can be used with submit_job()
|
103
|
+
#
|
104
|
+
# upload_data()-- Given a hash of input values to a
|
105
|
+
# workflow run, upload the data to the user's collection.
|
106
|
+
# Return the URL for the created data resource that can be used with
|
107
|
+
# submit_job()
|
108
|
+
#
|
109
|
+
# submit_job() -- Given the URL for a workflow resource and optionally
|
110
|
+
# the URL for a input data resource, submit the a to the server
|
111
|
+
# to be executed. Return the URL to the created job resource.
|
112
|
+
#
|
113
|
+
# get_job_status() -- Get the status of the job. Return one of the values from
|
114
|
+
# Status.
|
115
|
+
#
|
116
|
+
# finished?() -- Return True if the job is in a finished state. Note
|
117
|
+
# that this also includes failed states.
|
118
|
+
#
|
119
|
+
# wait_for_job() -- Wait until job has finished execution, or a maximum
|
120
|
+
# timeout is exceeded.
|
121
|
+
#
|
122
|
+
# get_job_outputs() -- Get the outputs produced by job. Return a
|
123
|
+
# hash which values are strings, lists of strings,
|
124
|
+
# or deeper lists.
|
125
|
+
#
|
126
|
+
#Most or all of these methods might in addition to stated exceptions also raise
|
127
|
+
#Net::HTTPError or InvalidResponseError if anything goes wrong in communicating with the service.
|
128
|
+
class Client
|
129
|
+
|
130
|
+
#Name spaces used by various XML documents.
|
131
|
+
NAMESPACES = {
|
132
|
+
:xscufl => 'http://org.embl.ebi.escience/xscufl/0.1alpha',
|
133
|
+
:baclava => 'http://org.embl.ebi.escience/baclava/0.1alpha',
|
134
|
+
:service => 'http://taverna.sf.net/service',
|
135
|
+
:xlink => 'http://www.w3.org/1999/xlink',
|
136
|
+
:dcterms => 'http://purl.org/dc/terms/'
|
137
|
+
}
|
138
|
+
|
139
|
+
#Mime types used by the rest protocol.
|
140
|
+
#
|
141
|
+
# See net.sf.taverna.service.interfaces.TavernaConstants.java
|
142
|
+
MIME_TYPES = {
|
143
|
+
:rest => 'application/vnd.taverna.rest+xml', # For most of the rest documents
|
144
|
+
:scufl => 'application/vnd.taverna.scufl+xml', # For Taverna workflows
|
145
|
+
:baclava => 'application/vnd.taverna.baclava+xml', # For Taverna's Baclava data documents
|
146
|
+
:report => 'application/vnd.taverna.report+xml', # For Taverna's internal progress reports
|
147
|
+
:console => 'text/plain' # For Taverna's console
|
148
|
+
}
|
149
|
+
|
150
|
+
|
151
|
+
DEFAULT_TIMEOUT = 5 * 60 # in seconds
|
152
|
+
DEFAULT_REFRESH = 0.5 # in seconds
|
153
|
+
|
154
|
+
#Construct a Taverna remote execution service client accessing the service
|
155
|
+
#at the given base URL.
|
156
|
+
#
|
157
|
+
#Note that this constructor will not attempt to verify the URL or the
|
158
|
+
#credentials. To verify, call get_user_url() which requires authentication.
|
159
|
+
#
|
160
|
+
#url -- The base URL for the service, normally ending in /v1/, for example:
|
161
|
+
# "http://myserver.com:8080/tavernaService/v1/"
|
162
|
+
#
|
163
|
+
#username -- The username of a user that has been previously created or
|
164
|
+
# registered in the web interface of the service.
|
165
|
+
#
|
166
|
+
#password -- The password of the user. Note that the password will be sent
|
167
|
+
# over the wire using unencrypted HTTP Basic Auth, unless the URL starts
|
168
|
+
# with "https".
|
169
|
+
def initialize(url, username, password)
|
170
|
+
@url = url
|
171
|
+
@username = username
|
172
|
+
@password = password
|
173
|
+
end
|
174
|
+
|
175
|
+
#private
|
176
|
+
|
177
|
+
#Get the capabilities document as a REXML::Document
|
178
|
+
#
|
179
|
+
#This document contains the links to the main collections of the service.
|
180
|
+
def get_capabilities_doc
|
181
|
+
url = URI.parse(@url)
|
182
|
+
request = Net::HTTP::Get.new(url.path)
|
183
|
+
request['Accept'] = MIME_TYPES[:rest]
|
184
|
+
request.basic_auth @username, @password
|
185
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
186
|
+
http.request(request)
|
187
|
+
}
|
188
|
+
response.value
|
189
|
+
REXML::Document.new(response.body)
|
190
|
+
end
|
191
|
+
|
192
|
+
#Get the URL for the current user's home on the server.
|
193
|
+
def get_user_url
|
194
|
+
capabilities_doc = get_capabilities_doc()
|
195
|
+
#currentUser = capabilities_doc.root.elements["{#{NAMESPACES[:service]}}currentUser"]
|
196
|
+
current_user = capabilities_doc.root.elements['currentUser']
|
197
|
+
current_user_url = current_user.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
|
198
|
+
|
199
|
+
url = URI.parse(current_user_url)
|
200
|
+
request = Net::HTTP::Get.new(url.path)
|
201
|
+
request['Accept'] = MIME_TYPES[:rest]
|
202
|
+
request.basic_auth @username, @password
|
203
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
204
|
+
http.request(request)
|
205
|
+
}
|
206
|
+
response.error! unless response.kind_of?(Net::HTTPSuccess) or response.kind_of?(Net::HTTPRedirection)
|
207
|
+
response.header['Location']
|
208
|
+
end
|
209
|
+
|
210
|
+
#Get the user document as an REXML::Document object.
|
211
|
+
#
|
212
|
+
#This document contains the links to the user owned collections,
|
213
|
+
#such as where to upload workflows and jobs.
|
214
|
+
def get_user_doc
|
215
|
+
url = URI.parse(get_user_url())
|
216
|
+
request = Net::HTTP::Get.new(url.path)
|
217
|
+
request['Accept'] = MIME_TYPES[:rest]
|
218
|
+
request.basic_auth @username, @password
|
219
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
220
|
+
http.request(request)
|
221
|
+
}
|
222
|
+
response.value
|
223
|
+
REXML::Document.new(response.body)
|
224
|
+
end
|
225
|
+
|
226
|
+
#Get the URL to a user-owned collection.
|
227
|
+
#
|
228
|
+
#collectionType -- The collection, either "workflows" or "datas"
|
229
|
+
def get_user_collection_url(collection)
|
230
|
+
user_doc = get_user_doc()
|
231
|
+
|
232
|
+
#collections = user_doc.root.elements["{#{NAMESPACES[:service]}}#{collection}"]
|
233
|
+
collections = user_doc.root.elements[collection]
|
234
|
+
return collections.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
|
235
|
+
end
|
236
|
+
|
237
|
+
#Get the URL to the output document for a job.
|
238
|
+
#
|
239
|
+
#It generally only makes sense to call this function if
|
240
|
+
#get_job_status() == Status::COMPLETED, but no check is enforced here.
|
241
|
+
#
|
242
|
+
#Return the URL to a data document produced by the job, or None if the
|
243
|
+
#job has not (yet) produced any output.
|
244
|
+
#
|
245
|
+
#job_url -- The URL to a job resource previously created using
|
246
|
+
# submit_job().
|
247
|
+
def get_job_outputs_url(job_url)
|
248
|
+
job_document = get_xml_doc(job_url)
|
249
|
+
#outputs_element = job_document.root.elements["{#{NAMESPACES[:service]}}outputs"]
|
250
|
+
outputs_element = job_document.root.elements['outputs']
|
251
|
+
return nil if not outputs_element
|
252
|
+
outputs_element.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
|
253
|
+
end
|
254
|
+
|
255
|
+
#Get the output document for a job.
|
256
|
+
#
|
257
|
+
#Return the output document as an REXML::Document object, or None
|
258
|
+
#if the job didn't have an output document (yet). This document can be
|
259
|
+
#parsed using parse_data_doc().
|
260
|
+
#
|
261
|
+
#job_url -- The URL to a job resource previously created using
|
262
|
+
# submit_job().
|
263
|
+
def get_job_outputs_doc(job_url)
|
264
|
+
outputs_url = get_job_outputs_url(job_url)
|
265
|
+
return nil if not outputs_url
|
266
|
+
get_xml_doc(outputs_url, MIME_TYPES[:baclava])
|
267
|
+
end
|
268
|
+
|
269
|
+
#Retrieve an XML document from the given URL.
|
270
|
+
#
|
271
|
+
#Return the retrieved document as a REXML::Document.
|
272
|
+
#
|
273
|
+
#url -- The URL to a resource retrievable as an XML document
|
274
|
+
#
|
275
|
+
#mimeType -- The mime-type to request using the Accept header, by default
|
276
|
+
# MIME_TYPES[:rest]
|
277
|
+
def get_xml_doc(doc_url, mimeType=MIME_TYPES[:rest])
|
278
|
+
url = URI.parse(doc_url)
|
279
|
+
request = Net::HTTP::Get.new(url.path)
|
280
|
+
request['Accept'] = mimeType
|
281
|
+
request.basic_auth @username, @password
|
282
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
283
|
+
http.request(request)
|
284
|
+
}
|
285
|
+
response.value
|
286
|
+
REXML::Document.new(response.body)
|
287
|
+
end
|
288
|
+
|
289
|
+
#Return the size of an XML document from the given URL without
|
290
|
+
#fetching the document.
|
291
|
+
#
|
292
|
+
#Return the size of a XML document .
|
293
|
+
#
|
294
|
+
#url -- The URL to a resource find the size of
|
295
|
+
#
|
296
|
+
#mimeType -- The mime-type to request using the Accept header, by default
|
297
|
+
# MIME_TYPES[:rest]
|
298
|
+
def get_xml_doc_size(doc_url, mimeType=MIME_TYPES[:rest])
|
299
|
+
url = URI.parse(doc_url)
|
300
|
+
request = Net::HTTP::Head.new(url.path)
|
301
|
+
request['Accept'] = mimeType
|
302
|
+
request.basic_auth @username, @password
|
303
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
304
|
+
http.request(request)
|
305
|
+
}
|
306
|
+
response.content_length
|
307
|
+
end
|
308
|
+
|
309
|
+
#Parse a data document as returned from get_job_outputs_doc().
|
310
|
+
#
|
311
|
+
#Return a hash where the keys are strings, matching the names of
|
312
|
+
# ports of the workflow. The values are Document::Data objects.
|
313
|
+
#
|
314
|
+
#xml -- A data document as a REXML::Document. This data document can be created
|
315
|
+
# using create_data_doc()
|
316
|
+
def parse_data_doc(xml_document)
|
317
|
+
Baclava::Reader.read(xml_document)
|
318
|
+
end
|
319
|
+
|
320
|
+
#Upload a data document to the current user's collection.
|
321
|
+
#
|
322
|
+
#Return the URL of the created data resource.
|
323
|
+
#
|
324
|
+
#xml -- A data document as a REXML::Document. This data document can be created
|
325
|
+
# using create_data_doc()
|
326
|
+
#
|
327
|
+
#Raises:
|
328
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
329
|
+
# creating the resource
|
330
|
+
def upload_data_doc(xml_document)
|
331
|
+
datas_url = get_user_collection_url("datas")
|
332
|
+
upload_to_collection(datas_url, xml_document.to_s, MIME_TYPES[:baclava])
|
333
|
+
end
|
334
|
+
|
335
|
+
#Tests if the url is valid for this server
|
336
|
+
def url_valid?(url)
|
337
|
+
url = URI.parse(url)
|
338
|
+
req = Net::HTTP::Head.new(url.path)
|
339
|
+
req.basic_auth @username, @password
|
340
|
+
Net::HTTP.start(url.host, url.port) {|http|
|
341
|
+
http.request(req)
|
342
|
+
}.kind_of?(Net::HTTPSuccess)
|
343
|
+
end
|
344
|
+
|
345
|
+
#Upload data by POST-ing to given URL.
|
346
|
+
#
|
347
|
+
#Return the URL of the created resource if the request succeeded with
|
348
|
+
#201 Created.
|
349
|
+
#
|
350
|
+
#Raises:
|
351
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
352
|
+
# creating the resource
|
353
|
+
# Net::HTTPError -- If any other HTTP result code (including errors)
|
354
|
+
# was returned
|
355
|
+
#
|
356
|
+
#url -- The URL of the collection of where to POST,
|
357
|
+
# normally retrieved using get_user_collection_url().
|
358
|
+
#
|
359
|
+
#data -- The data to upload as a string
|
360
|
+
#
|
361
|
+
#content_type -- The MIME type of the data to upload. Typically the value
|
362
|
+
# of one of the MimeTypes constants. For data uploaded to the "datas" user
|
363
|
+
# collection this would be MIME_TYPES[:baclava], and for workflow to the "
|
364
|
+
# workflows" collection, MIME_TYPES[:scufl]. Any other XML documents from
|
365
|
+
# the NAMESPACES[:service] namespace has the mime type MIME_TYPES[:rest]
|
366
|
+
def upload_to_collection(url, data, content_type)
|
367
|
+
url = URI.parse(url)
|
368
|
+
request = Net::HTTP::Post.new(url.path)
|
369
|
+
request.body = data
|
370
|
+
request['Accept'] = MIME_TYPES[:rest]
|
371
|
+
request['Content-Type'] = content_type
|
372
|
+
request.basic_auth @username, @password
|
373
|
+
response = Net::HTTP.start(url.host, url.port) {|http|
|
374
|
+
http.request(request)
|
375
|
+
}
|
376
|
+
response.value
|
377
|
+
raise CouldNotCreateError(url, response) unless response.kind_of?(Net::HTTPCreated)
|
378
|
+
response.header['Location']
|
379
|
+
end
|
380
|
+
|
381
|
+
#Create a data document to be uploaded with upload_data_doc().
|
382
|
+
#
|
383
|
+
#Return the data document a REXML::Document. This data document can be parsed using
|
384
|
+
#parse_data_doc()
|
385
|
+
#
|
386
|
+
#hash -- A hash where the keys are strings, matching the names of input
|
387
|
+
# ports of the workflow to run. The values are Document::Data objects.
|
388
|
+
#
|
389
|
+
def create_data_doc(hash)
|
390
|
+
Baclava::Writer.write_doc(hash)
|
391
|
+
end
|
392
|
+
|
393
|
+
#Create a job document for submission with submit_job().
|
394
|
+
#
|
395
|
+
#Return the job document as XML.
|
396
|
+
#
|
397
|
+
#workflow_url -- The URL of a workflow previously uploaded using
|
398
|
+
# upload_workflow()
|
399
|
+
#
|
400
|
+
#inputs_url -- The (optional) URL of a input document previously
|
401
|
+
# uploaded using upload_data_doc()
|
402
|
+
def create_job_doc(workflow_url, inputs_url=nil)
|
403
|
+
xml = Builder::XmlMarkup.new
|
404
|
+
xml.instruct!
|
405
|
+
REXML::Document.new(xml.job('xmlns' => NAMESPACES[:service], 'xmlns:xlink' => NAMESPACES[:xlink]) {
|
406
|
+
xml.inputs('xlink:href' => inputs_url) if inputs_url
|
407
|
+
xml.workflow('xlink:href' => workflow_url)
|
408
|
+
})
|
409
|
+
end
|
410
|
+
|
411
|
+
#Submit a job to be queued for execution on the server.
|
412
|
+
#
|
413
|
+
#Return the URL to the job resource.
|
414
|
+
#
|
415
|
+
#job_document -- A job document created with create_job_doc() specifying
|
416
|
+
# the workflow to run with which inputs.
|
417
|
+
#
|
418
|
+
#Raises:
|
419
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
420
|
+
# creating the resource
|
421
|
+
def submit_job_doc(job_document)
|
422
|
+
jobsURL = get_user_collection_url("jobs")
|
423
|
+
upload_to_collection(jobsURL, job_document.to_s, MIME_TYPES[:rest])
|
424
|
+
end
|
425
|
+
|
426
|
+
public
|
427
|
+
|
428
|
+
#Get the status of a previously submitted job.
|
429
|
+
#
|
430
|
+
#Return the status as a string, one of the values from Status.
|
431
|
+
#
|
432
|
+
#job_url -- The URL to a job resource previously created using
|
433
|
+
# submit_job().
|
434
|
+
def get_job_status(job_url)
|
435
|
+
job_document = get_xml_doc(job_url)
|
436
|
+
#status = job_document.elements["{#{NAMESPACES[:service]}}status"]
|
437
|
+
status = job_document.root.elements['status']
|
438
|
+
# TODO: For future checks, use:
|
439
|
+
#status_url = status.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
|
440
|
+
status.text
|
441
|
+
end
|
442
|
+
|
443
|
+
#Get the date a previously submitted job was created.
|
444
|
+
#
|
445
|
+
#Return the date as a Datetime object.
|
446
|
+
#
|
447
|
+
#job_url -- The URL to a job resource previously created using
|
448
|
+
# submit_job().
|
449
|
+
def get_job_created_date(job_url)
|
450
|
+
job_document = get_xml_doc(job_url)
|
451
|
+
#created = job_document.elements["{#{NAMESPACES[:dcterms]}}created"]
|
452
|
+
created = job_document.root.elements['dcterms:created'].text
|
453
|
+
DateTime.parse(created)
|
454
|
+
end
|
455
|
+
|
456
|
+
#Get the date a previously submitted job was last modified.
|
457
|
+
#
|
458
|
+
#Return the date as a Datetime object.
|
459
|
+
#
|
460
|
+
#job_url -- The URL to a job resource previously created using
|
461
|
+
# submit_job().
|
462
|
+
def get_job_modified_date(job_url)
|
463
|
+
job_document = get_xml_doc(job_url)
|
464
|
+
#modified = job_document.elements["{#{NAMESPACES[:dcterms]}}modified"]
|
465
|
+
modified = job_document.root.elements['dcterms:modified'].text
|
466
|
+
DateTime.parse(modified)
|
467
|
+
end
|
468
|
+
|
469
|
+
#Get the job's internal progress report. This might be available
|
470
|
+
#while the job is running.
|
471
|
+
#
|
472
|
+
#Return the internal progress report as a Document::Report object.
|
473
|
+
#
|
474
|
+
#job_url -- The URL to a job resource previously created using submit_job().
|
475
|
+
def get_job_report(job_url)
|
476
|
+
job_document = get_xml_doc(job_url)
|
477
|
+
#report_element = job_document.elements["{#{NAMESPACES[:service]}}report"]
|
478
|
+
report_element = job_document.root.elements['report']
|
479
|
+
report_url = report_element.attributes.get_attribute_ns(NAMESPACES[:xlink], 'href').value
|
480
|
+
# TODO: Cache report_url per job
|
481
|
+
job_report_document = get_xml_doc(report_url, MIME_TYPES[:report])
|
482
|
+
Document::Report.from_document(job_report_document)
|
483
|
+
end
|
484
|
+
|
485
|
+
#Get the outputs of a job.
|
486
|
+
#
|
487
|
+
#Return the job outputs as a hash where the keys are strings,
|
488
|
+
#matching the names of output ports of the workflow. The values are
|
489
|
+
#Document::Data objects. If no outputs exists, nil is returned instead.
|
490
|
+
#
|
491
|
+
#job_url -- The URL to a job resource previously created using
|
492
|
+
# submit_job().
|
493
|
+
def get_job_outputs(job_url)
|
494
|
+
job_outputs = get_job_outputs_doc(job_url)
|
495
|
+
return nil unless job_outputs
|
496
|
+
parse_data_doc(job_outputs)
|
497
|
+
end
|
498
|
+
|
499
|
+
#Get the size of the outputs of a job.
|
500
|
+
#
|
501
|
+
#Return the size of the outputs of a job in kilobytes.
|
502
|
+
#If no outputs exists, nil is returned instead.
|
503
|
+
#
|
504
|
+
#job_url -- The URL to a job resource previously created using
|
505
|
+
# submit_job().
|
506
|
+
def get_job_outputs_size(job_url)
|
507
|
+
outputs_url = get_job_outputs_url(job_url)
|
508
|
+
return nil if not outputs_url
|
509
|
+
get_xml_doc_size(outputs_url, MIME_TYPES[:baclava])
|
510
|
+
end
|
511
|
+
|
512
|
+
#Check if a job has finished in one way or another.
|
513
|
+
#
|
514
|
+
#Note that the job might have finished unsuccessfully. To check
|
515
|
+
#if a job is actually complete, check::
|
516
|
+
#
|
517
|
+
# get_job_status(job_url) == Status::COMPLETE.
|
518
|
+
#
|
519
|
+
#Return True if the job is in a finished state, that is that the
|
520
|
+
#Status.finished?(get_job_status()) is True.
|
521
|
+
#
|
522
|
+
#job_url -- The URL to a job resource previously created using
|
523
|
+
# #submit_job().
|
524
|
+
#
|
525
|
+
def finished?(job_url)
|
526
|
+
status = get_job_status(job_url)
|
527
|
+
Status.finished?(status)
|
528
|
+
end
|
529
|
+
|
530
|
+
#Submit a job to be queued for execution on the server.
|
531
|
+
#
|
532
|
+
#Return the URL to the created job resource.
|
533
|
+
#
|
534
|
+
#workflow_url -- The URL of a workflow previously uploaded using
|
535
|
+
# upload_workflow()
|
536
|
+
#
|
537
|
+
#inputs_url -- The (optional) URL of a input resource previously
|
538
|
+
# uploaded using upload_data()
|
539
|
+
#
|
540
|
+
#Raises:
|
541
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
542
|
+
# creating the resource
|
543
|
+
#
|
544
|
+
def submit_job(workflow_url, inputs_url=nil)
|
545
|
+
job_document = create_job_doc(workflow_url, inputs_url)
|
546
|
+
submit_job_doc(job_document)
|
547
|
+
end
|
548
|
+
|
549
|
+
#Upload data to be used with submit_job().
|
550
|
+
#
|
551
|
+
#Return the URL to the created data resource.
|
552
|
+
#
|
553
|
+
#hash -- A hash where the keys are strings, matching the names of input
|
554
|
+
# ports of the workflow to run. The values can be strings, lists of strings, or deeper
|
555
|
+
# lists.
|
556
|
+
#
|
557
|
+
#Raises:
|
558
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
559
|
+
# creating the resource
|
560
|
+
def upload_data(hash)
|
561
|
+
inputs = create_data_doc(hash)
|
562
|
+
upload_data_doc(inputs)
|
563
|
+
end
|
564
|
+
|
565
|
+
#Checks if the workflow exists on the server
|
566
|
+
#
|
567
|
+
#workflow_url -- The URL to a workflow previously uploaded using
|
568
|
+
# upload_workflow().
|
569
|
+
def workflow_exists?(workflow_url)
|
570
|
+
url_valid?(workflow_url)
|
571
|
+
end
|
572
|
+
|
573
|
+
#Checks if the username and password is valid for the service
|
574
|
+
def service_valid?
|
575
|
+
begin
|
576
|
+
get_user_url
|
577
|
+
true
|
578
|
+
rescue
|
579
|
+
false
|
580
|
+
end
|
581
|
+
end
|
582
|
+
|
583
|
+
#Upload a workflow XML document to the current users' collection.
|
584
|
+
#
|
585
|
+
#Return the URL of the created workflow resource.
|
586
|
+
#
|
587
|
+
#workflow_xml -- The Taverna scufl workflow as a string
|
588
|
+
#
|
589
|
+
#Raises:
|
590
|
+
# CouldNotCreateError -- If the service returned 200 OK instead of
|
591
|
+
# creating the resource
|
592
|
+
def upload_workflow(workflow_xml)
|
593
|
+
workflows_url = get_user_collection_url("workflows")
|
594
|
+
upload_to_collection(workflows_url, workflow_xml, MIME_TYPES[:scufl])
|
595
|
+
end
|
596
|
+
|
597
|
+
#Wait (blocking) for a job to finish, or until a maximum timeout
|
598
|
+
#has been reached.
|
599
|
+
#
|
600
|
+
#Return the status of the job. If the
|
601
|
+
#
|
602
|
+
#job_url -- The URL to a job resource previously created using
|
603
|
+
# submit_job().
|
604
|
+
#
|
605
|
+
#timeout -- The maximum number of seconds (as a float) to wait for job.
|
606
|
+
# The default value is DEFAULT_TIMEOUT.
|
607
|
+
#
|
608
|
+
#refresh -- In seconds (as a float), how often to check the job's
|
609
|
+
# status while waiting. The default value is DEFAULT_REFRESH.
|
610
|
+
def wait_for_job(job_url, timeout=DEFAULT_TIMEOUT, refresh=DEFAULT_REFRESH)
|
611
|
+
now = Time.now
|
612
|
+
_until = now + timeout
|
613
|
+
while _until > Time.now and not finished?(job_url)
|
614
|
+
now = Time.now # finished?() might have taken a while
|
615
|
+
sleep [[refresh, _until-now].min, 0].max
|
616
|
+
now = Time.now # after the sleep
|
617
|
+
end
|
618
|
+
get_job_status(job_url)
|
619
|
+
end
|
620
|
+
|
621
|
+
#Execute a workflow and wait until it's finished.
|
622
|
+
#
|
623
|
+
#This will block until the workflow has been executed by the server, and
|
624
|
+
#return the result of the workflow run.
|
625
|
+
#
|
626
|
+
#Return the parsed output document as a hash where the keys are
|
627
|
+
#strings, matching the names of output ports of the workflow. The
|
628
|
+
#values are Document::Data objects. If the workflow
|
629
|
+
#did not produce any output, nil might be returned instead.
|
630
|
+
#
|
631
|
+
#workflow_xml -- The workflow as a Taverna scufl XML string. This *or* the
|
632
|
+
# workflow_url parameter is required.
|
633
|
+
#
|
634
|
+
#workflow_url -- The URL to a workflow previously uploaded using
|
635
|
+
# upload_workflow(). This *or* the workflow_xml parameter is required.
|
636
|
+
#
|
637
|
+
#inputs -- The (optional) inputs to the workflow, either as a Baclava
|
638
|
+
# XML document (string), or as a hash where the keys are
|
639
|
+
# strings, matching the names of input ports of the workflow. The
|
640
|
+
# values can be strings, lists of strings, or deeper lists.
|
641
|
+
#
|
642
|
+
#timeout -- The maximum number of seconds (as a float) to wait for job.
|
643
|
+
# The default value is DEFAULT_TIMEOUT.
|
644
|
+
#
|
645
|
+
#refresh -- In seconds (as a float), how often to check the job's
|
646
|
+
# status while waiting. The default value is DEFAULT_REFRESH.
|
647
|
+
#
|
648
|
+
#Raises:
|
649
|
+
# NotCompleteError -- If the job did not complete, for instance because
|
650
|
+
# the timeout was reached before completion.
|
651
|
+
#
|
652
|
+
# urllib2.HTTPError -- If any step in submitting or requesting the status and
|
653
|
+
# result of the job failed.
|
654
|
+
def execute_sync(workflow_xml=nil, workflow_url=nil, inputs=nil,
|
655
|
+
timeout=DEFAULT_TIMEOUT, refresh=DEFAULT_REFRESH)
|
656
|
+
raise TypeError.new("workflow_xml or worklowURL must be given") unless workflow_xml or workflow_url
|
657
|
+
raise TypeError.new("Only one of workflow_xml and workflow_url can be given") if workflow_xml and workflow_url
|
658
|
+
|
659
|
+
workflow_url = upload_workflow(workflow_xml) if workflow_xml
|
660
|
+
inputs_url = upload_data(inputs) if inputs
|
661
|
+
|
662
|
+
job_url = submit_job(workflow_url, inputs_url)
|
663
|
+
status = wait_for_job(job_url, timeout, refresh)
|
664
|
+
|
665
|
+
raise NotCompleteError.new(job_url, status) if status != Status::COMPLETE
|
666
|
+
|
667
|
+
get_job_outputs(job_url)
|
668
|
+
end
|
669
|
+
|
670
|
+
end
|
671
|
+
|
672
|
+
end
|
673
|
+
|