tap 0.18.0 → 0.19.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,374 +0,0 @@
1
- require 'tap/app'
2
- require 'tap/schema/utils'
3
- require 'tap/schema/parser'
4
-
5
- module Tap
6
- class App
7
- def build(schema, options={})
8
- options = {
9
- :clean => true,
10
- :validate => true
11
- }.merge(options)
12
-
13
- unless schema.kind_of?(Schema)
14
- schema = Schema.new(schema)
15
- end
16
-
17
- if resources = options[:resources]
18
- schema.resolve! do |type, id|
19
- resources[type][id]
20
- end
21
- end
22
-
23
- if options[:clean]
24
- reset
25
- end
26
-
27
- schema.build!(self, options[:validate])
28
- end
29
-
30
- def to_schema
31
- schema = Schema.new
32
- queue.to_a.each do |task, inputs|
33
- schema.add(task, inputs)
34
- end
35
-
36
- middleware.reverse_each do |m|
37
- schema.use(m)
38
- end
39
-
40
- index = 0
41
- schema.tasks.keys.each do |task|
42
- schema.rename(task, index)
43
- index += 1
44
- end
45
-
46
- if block_given?
47
- schema.resources.each_pair do |type, resource|
48
- yield(type, resource)
49
- end
50
- end
51
-
52
- schema
53
- end
54
- end
55
-
56
- class Schema
57
- class << self
58
- def load(str)
59
- schema = YAML.load(str)
60
- new(schema ? Utils.symbolize(schema) : {})
61
- end
62
-
63
- def load_file(path)
64
- load(File.read(path))
65
- end
66
- end
67
-
68
- include Utils
69
-
70
- # A hash of task schema describing individual tasks in a workflow. Tasks
71
- # only require a class, but may contain configurations and even arguments
72
- # for enque. Individual tasks may be a hash or an array. The tasks are
73
- # resolved if they take one of these forms:
74
- #
75
- # tasks:
76
- # key: {:class: TaskClass, ...}
77
- # key: [TaskClass, ...]
78
- #
79
- attr_reader :tasks
80
-
81
- # An array of join schema that describe how to join tasks together. Joins
82
- # have arrays of inputs and outputs that reference task keys. Individual
83
- # joins may be a hash or an array. The joins are resolved if they take
84
- # one of these forms:
85
- #
86
- # joins:
87
- # - [[inputs], [outputs], {:class: JoinClass, ...}]
88
- # - [[inputs], [outputs], [JoinClass, ...]]
89
- #
90
- attr_reader :joins
91
-
92
- # An array of [key, [args]] data that indicates the tasks and arguments
93
- # to be added to an application during build. A key may be specified
94
- # alone if tasks[key] is an array; in that case, the arguments remaining
95
- # in tasks[key] after instantiation will be used.
96
- #
97
- # queue:
98
- # - key # uses tasks[key]
99
- # - [key, [1, 2, 3]] # enques tasks[key] with [1, 2, 3]
100
- #
101
- attr_reader :queue
102
-
103
- # An array of middleware to build onto the app.
104
- attr_reader :middleware
105
-
106
- # The app used to build self
107
- attr_reader :app
108
-
109
- def initialize(schema={})
110
- @tasks = schema[:tasks] || {}
111
- @joins = schema[:joins] || []
112
- @queue = schema[:queue] || []
113
- @middleware = schema[:middleware] || []
114
-
115
- @app = nil
116
- end
117
-
118
- def add(node, inputs=nil)
119
- collect_tasks(node).collect do |task|
120
- tasks[task] = task.to_hash
121
- task.joins
122
- end.flatten.uniq.each do |join|
123
- joins << [join.inputs, join.outputs, join.to_hash]
124
- end
125
-
126
- if inputs
127
- queue << [node, inputs]
128
- end
129
-
130
- self
131
- end
132
-
133
- def use(middleware)
134
- self.middleware << middleware.to_hash
135
- self
136
- end
137
-
138
- def resources
139
- {
140
- :task => tasks.values,
141
- :join => joins.collect {|join| join[2] },
142
- :middleware => middleware
143
- }
144
- end
145
-
146
- # Renames the current_key task to new_key. References in joins and
147
- # queue are updated by rename. Raises an error if built? or if the
148
- # specified task does not exist.
149
- def rename(current_key, new_key)
150
- if built?
151
- raise "cannot rename if built"
152
- end
153
-
154
- # rename task
155
- unless task = tasks.delete(current_key)
156
- raise "unknown task: #{current_key.inspect}"
157
- end
158
- tasks[new_key] = task
159
-
160
- # update join references
161
- joins.each do |inputs, outputs, join|
162
- inputs.each_index do |index|
163
- inputs[index] = new_key if inputs[index] == current_key
164
- end
165
-
166
- outputs.each_index do |index|
167
- outputs[index] = new_key if outputs[index] == current_key
168
- end
169
- end
170
-
171
- # update queue references, note both array and
172
- # reference-style entries must be handled
173
- queue.each_index do |index|
174
- if queue[index].kind_of?(Array)
175
- if queue[index][0] == current_key
176
- queue[index][0] = new_key
177
- end
178
- else
179
- if queue[index] == current_key
180
- queue[index] = new_key
181
- end
182
- end
183
- end
184
-
185
- self
186
- end
187
-
188
- def resolve!
189
- tasks.each_pair do |key, task|
190
- task ||= {}
191
- tasks[key] = resolve(task) do |id|
192
- yield(:task, id || key)
193
- end
194
- end
195
-
196
- joins.collect! do |inputs, outputs, join|
197
- join ||= {}
198
- join = resolve(join) do |id|
199
- yield(:join, id || 'join')
200
- end
201
- [inputs, outputs, join]
202
- end
203
-
204
- middleware.collect! do |m|
205
- resolve(m) do |id|
206
- yield(:middleware, id)
207
- end
208
- end
209
-
210
- queue.collect! do |(key, inputs)|
211
- [key, inputs || tasks[key]]
212
- end
213
-
214
- self
215
- end
216
-
217
- def validate!
218
- errors = []
219
- tasks.each_value do |task|
220
- unless resolved?(task)
221
- errors << "unresolvable task: #{task.inspect}"
222
- end
223
- end
224
-
225
- joins.each do |inputs, outputs, join|
226
- unless resolved?(join)
227
- errors << "unresolvable join: #{join.inspect}"
228
- end
229
-
230
- inputs.each do |key|
231
- unless tasks.has_key?(key)
232
- errors << "missing join input: #{key.inspect}"
233
- end
234
- end
235
-
236
- outputs.each do |key|
237
- unless tasks.has_key?(key)
238
- errors << "missing join output: #{key.inspect}"
239
- end
240
- end
241
- end
242
-
243
- queue.each do |(key, args)|
244
- if tasks.has_key?(key)
245
- unless args.kind_of?(Array)
246
- errors << "non-array args: #{args.inspect}"
247
- end
248
- else
249
- errors << "missing task: #{key}"
250
- end
251
- end
252
-
253
- middleware.each do |m|
254
- unless resolved?(m)
255
- errors << "unresolvable middleware: #{m.inspect}"
256
- end
257
- end
258
-
259
- unless errors.empty?
260
- prefix = if errors.length > 1
261
- "#{errors.length} schema errors\n"
262
- else
263
- ""
264
- end
265
-
266
- raise "#{prefix}#{errors.join("\n")}\n"
267
- end
268
-
269
- self
270
- end
271
-
272
- def cleanup!
273
- joins.delete_if do |inputs, outputs, join|
274
-
275
- # remove missing inputs
276
- inputs.delete_if {|key| !tasks.has_key?(key) }
277
-
278
- # remove missing outputs
279
- outputs.delete_if {|key| !tasks.has_key?(key) }
280
-
281
- # remove orphan joins
282
- inputs.empty? || outputs.empty?
283
- end
284
-
285
- # remove inputs without a task
286
- queue.delete_if do |(key, inputs)|
287
- !tasks.has_key?(key)
288
- end
289
-
290
- self
291
- end
292
-
293
- def build!(app, validate=true)
294
- validate! if validate
295
-
296
- # instantiate tasks
297
- tasks.each_pair do |key, task|
298
- tasks[key] = instantiate(task, app)
299
- end
300
- tasks.freeze
301
-
302
- # build the workflow
303
- joins.collect! do |inputs, outputs, join|
304
- inputs = inputs.collect {|key| tasks[key] }
305
- outputs = outputs.collect {|key| tasks[key] }
306
- instantiate(join, app).join(inputs, outputs)
307
- end
308
- joins.freeze
309
-
310
- # utilize middleware
311
- middleware.collect! do |middleware|
312
- instantiate(middleware, app)
313
- end
314
- middleware.freeze
315
-
316
- # enque tasks
317
- queue.each do |(key, inputs)|
318
- app.enq(tasks[key], *inputs)
319
- end
320
- queue.clear.freeze
321
-
322
- @app = app
323
- self
324
- end
325
-
326
- def built?
327
- @app != nil
328
- end
329
-
330
- def enque(key, *inputs)
331
- unless built?
332
- raise "cannot enque unless built"
333
- end
334
-
335
- unless task = tasks[key]
336
- raise "unknown task: #{key.inspect}"
337
- end
338
-
339
- app.queue.enq(task, inputs)
340
- task
341
- end
342
-
343
- # Creates an hash dump of self.
344
- def to_hash
345
- { :tasks => tasks,
346
- :joins => joins,
347
- :queue => queue,
348
- :middleware => middleware
349
- }
350
- end
351
-
352
- # Converts self to a hash and serializes it to YAML.
353
- def dump(io=nil)
354
- YAML.dump(to_hash, io)
355
- end
356
-
357
- protected
358
-
359
- # helper to collect all tasks and tasks joined to task
360
- def collect_tasks(task, collection=[]) # :nodoc:
361
- unless collection.include?(task)
362
- collection << task
363
-
364
- task.joins.each do |join|
365
- (join.inputs + join.outputs).each do |input|
366
- collect_tasks(input, collection)
367
- end
368
- end
369
- end
370
-
371
- collection
372
- end
373
- end
374
- end
@@ -1,425 +0,0 @@
1
- require 'shellwords'
2
- require 'tap/schema'
3
-
4
- module Tap
5
- class Schema
6
- class << self
7
- def parse(argv=ARGV)
8
- Parser.new(argv).schema
9
- end
10
- end
11
-
12
- # A parser for workflow schema defined on the command line.
13
- #
14
- # == Syntax
15
- #
16
- # The command line syntax can be thought of as a series of ARGV arrays
17
- # connected by breaks. The arrays define tasks (ie nodes) in a workflow
18
- # while the breaks define joins. These are the available breaks:
19
- #
20
- # break meaning
21
- # -- default delimiter, no join
22
- # --: sequence join
23
- # --[][] multi-join (sequence, fork, merge)
24
- #
25
- # As an example, this defines three tasks (a, b, c) and sequences the
26
- # b and c tasks:
27
- #
28
- # schema = Parser.new("a -- b --: c").schema
29
- # schema.tasks # => [["a"], ["b"], ["c"]]
30
- # schema.joins # => [['join', [1],[2]]]
31
- #
32
- # In the example, the indicies of the tasks participating in the sequence
33
- # are inferred as the last and next tasks in the schema. Alternatively
34
- # the tasks participating in the sequence may be written out directly;
35
- # these also sequence b to c.
36
- #
37
- # schema = Parser.new("a -- b -- c --1:2").schema
38
- # schema.tasks
39
- # # => {
40
- # # 0 => ["a"],
41
- # # 1 => ["b"],
42
- # # 2 => ["c"]
43
- # # }
44
- # schema.joins
45
- # # => [
46
- # # [[1],[2]]
47
- # # ]
48
- #
49
- # schema = Parser.new("a --1:2 b -- c").schema
50
- # schema.tasks
51
- # # => {
52
- # # 0 => ["a"],
53
- # # 1 => ["b"],
54
- # # 2 => ["c"]
55
- # # }
56
- # schema.joins
57
- # # => [
58
- # # [[1],[2]]
59
- # # ]
60
- #
61
- # ==== Multi-Join Syntax
62
- #
63
- # The multi-join syntax allows the specification of arbitrary joins.
64
- # Starting with a few examples:
65
- #
66
- # example meaning
67
- # --[][] last.sequence(next)
68
- # --[1][2] 1.sequence(2)
69
- # --[1][2,3] 1.fork(2,3)
70
- # --[1,2][3] 3.merge(1,2)
71
- #
72
- # The meaning of the bracket breaks seems to be changing but note that
73
- # the sequences, forks, and (unsynchronized) merges are all variations
74
- # of a multi-way join. Internally the breaks are interpreted like this:
75
- #
76
- # join = Join.new
77
- # join.join(inputs, outputs)
78
- #
79
- # To specify another class of join, or to specify join configurations,
80
- # add a string in the format "configs.class" where the configs are the
81
- # single-letter configuration flags and class is a lookup for the join
82
- # class.
83
- #
84
- # example interpretation
85
- # --:s Join.new(:splat => true)
86
- # --1:2is Join.new(:iterate => true, :splat => true)
87
- # --[][]q.sync Sync.new(:enq => true)
88
- # --[][].sync Sync.new
89
- #
90
- # If you can stand the syntax, you can also specify a full argv after
91
- # the bracket, just be sure to enclose the whole break in quotes.
92
- #
93
- # example interpretation
94
- # "--1:2 join -i -s" Join.new(:iterate => true, :splat => true)
95
- # "--[][] sync --enq" Sync.new(:enq => true)
96
- #
97
- # ==== Escapes and End Flags
98
- #
99
- # Breaks can be escaped by enclosing them in '-.' and '.-' delimiters;
100
- # any number of arguments may be enclosed within the escape. After the
101
- # end delimiter, breaks are active once again.
102
- #
103
- # schema = Parser.new("a -- b -- c").schema
104
- # schema.tasks
105
- # # => {
106
- # # 0 => ["a"],
107
- # # 1 => ["b"],
108
- # # 2 => ["c"]
109
- # # }
110
- #
111
- # schema = Parser.new("a -. -- b .- -- c").schema
112
- # schema.tasks
113
- # # => {
114
- # # 0 => ["a", "--", "b"],
115
- # # 1 => ["c"]
116
- # # }
117
- #
118
- # Parsing continues until the end of argv, or a an end flag '---' is
119
- # reached. The end flag may also be escaped.
120
- #
121
- # schema = Parser.new("a -- b --- c").schema
122
- # schema.tasks
123
- # # => {
124
- # # 0 => ["a"],
125
- # # 1 => ["b"]
126
- # # }
127
- #
128
- class Parser
129
-
130
- # A set of parsing routines used internally by Tap::Schema::Parser,
131
- # modularized for ease of testing, and potential re-use. These methods
132
- # require that <tt>current_index</tt> and <tt>previous_index</tt> be
133
- # implemented in the including class.
134
- module Utils
135
- module_function
136
-
137
- # The escape begin argument
138
- ESCAPE_BEGIN = "-."
139
-
140
- # The escape end argument
141
- ESCAPE_END = ".-"
142
-
143
- # The parser end flag
144
- END_FLAG = "---"
145
-
146
- # Matches any breaking arg. Examples:
147
- #
148
- # --
149
- # --1:2
150
- # --[1][2]
151
- # --[1,2,3][4,5,6]is.join
152
- # --.middleware
153
- #
154
- # After the match:
155
- #
156
- # $1:: The string after the break
157
- # (ex: '--' => '', '--:' => ':', '--[1,2][3,4]is.join' => '[1,2][3,4]is.join')
158
- #
159
- BREAK = /\A--(\z|[\d\:\[\.].*\z)/
160
-
161
- # Matches a sequence break. Examples:
162
- #
163
- # :
164
- # 1:
165
- # :2
166
- # 1:2:3
167
- #
168
- # After the match:
169
- #
170
- # $1:: The sequence string after the break.
171
- # (ex: ':' => ':', '1:2' => '1:2', '1:' => '1:', ':2' => ':2')
172
- # $2:: The modifier string.
173
- # (ex: ':i' => 'i', '1:2is' => 'is')
174
- #
175
- SEQUENCE = /\A(\d*(?::\d*)+)(.*)\z/
176
-
177
- # Matches a generic join break. Examples:
178
- #
179
- # "[1,2,3][4,5,6] join -i -s"
180
- # [1,2,3][4,5,6]is.join
181
- # [1,2][3,4]
182
- # [1][2]
183
- #
184
- # After the match:
185
- #
186
- # $1:: The inputs string.
187
- # (ex: '[1,2,3][4,5,6]' => '1,2,3')
188
- # $2:: The outputs string.
189
- # (ex: '[1,2,3][4,5,6]' => '4,5,6')
190
- # $3:: The modifier string.
191
- # (ex: '[][]is' => 'is')
192
- #
193
- JOIN = /\A\[([\d,]*)\]\[([\d,]*)\](.*)\z/
194
-
195
- # Matches a join modifier. After the match:
196
- #
197
- # $1:: The modifier flag string.
198
- # (ex: 'is.sync' => 'is')
199
- # $2:: The class string.
200
- # (ex: 'is.sync' => 'sync')
201
- #
202
- JOIN_MODIFIER = /\A([A-z]*)(?:\.(.*))?\z/
203
-
204
- # Matches a generic middleware break. Examples:
205
- #
206
- # ". middleware --flag"
207
- # .middleware
208
- #
209
- # After the match:
210
- #
211
- # $1:: The modifier string.
212
- # (ex: '.middleware' => 'middleware')
213
- #
214
- MIDDLEWARE = /\A\.(.*)\z/
215
-
216
- # Parses an indicies str along commas, and collects the indicies
217
- # as integers. Ex:
218
- #
219
- # parse_indicies('') # => []
220
- # parse_indicies('1') # => [1]
221
- # parse_indicies('1,2,3') # => [1,2,3]
222
- #
223
- def parse_indicies(str, regexp=/,+/)
224
- indicies = []
225
- str.split(regexp).each do |n|
226
- indicies << n.to_i unless n.empty?
227
- end
228
- indicies
229
- end
230
-
231
- # Parses the match of a SEQUENCE regexp an array of [input_indicies,
232
- # output_indicies, metadata] arrays. The inputs corresponds to $1 and
233
- # $2 for the match. The previous and current index are assumed if $1
234
- # starts and/or ends with a semi-colon.
235
- #
236
- # parse_sequence("1:2:3", '')
237
- # # => [
238
- # # [[1], [2]],
239
- # # [[2], [3]],
240
- # # ]
241
- #
242
- # parse_sequence(":1:2:", 'is')
243
- # # => [
244
- # # [[:previous_index], [1], ['join', '-i', '-s']],
245
- # # [[1], [2], ['join', '-i', '-s']]],
246
- # # [[2], [:current_index], ['join', '-i', '-s']],
247
- # # ]
248
- #
249
- def parse_sequence(one, two)
250
- indicies = parse_indicies(one, /:+/)
251
- indicies.unshift previous_index if one[0] == ?:
252
- indicies << current_index if one[-1] == ?:
253
-
254
- sequences = []
255
- while indicies.length > 1
256
- sequences << [[indicies.shift], [indicies[0]]]
257
- end
258
-
259
- if argv = parse_join_modifier(two)
260
- sequences.each do |sequence|
261
- sequence << argv
262
- end
263
- end
264
-
265
- sequences
266
- end
267
-
268
- # Parses the match of a JOIN regexp into a [input_indicies,
269
- # output_indicies, metadata] array. The inputs corresponds to $1, $2,
270
- # and $3 for the match. A join type of 'join' is assumed unless
271
- # otherwise specified.
272
- #
273
- # parse_join("1", "2,3", "") # => [[1], [2,3]]
274
- # parse_join("", "", "is.type") # => [[], [], ['type', '-i', '-s']]
275
- # parse_join("", "", "type -i -s") # => [[], [], ['type', '-i', '-s']]
276
- #
277
- def parse_join(one, two, three)
278
- join = [parse_indicies(one), parse_indicies(two)]
279
-
280
- if argv = parse_join_modifier(three)
281
- join << argv
282
- end
283
-
284
- join
285
- end
286
-
287
- # Parses a join modifier string into an argv.
288
- def parse_join_modifier(modifier)
289
- case modifier
290
- when ""
291
- nil
292
- when JOIN_MODIFIER
293
- argv = [$2 == nil || $2.empty? ? 'join' : $2]
294
- $1.split("").each {|char| argv << "-#{char}"}
295
- argv
296
- else
297
- Shellwords.shellwords(modifier)
298
- end
299
- end
300
-
301
- # Parses the match of a MIDDLEWARE regexp into metadata array.
302
- # The input corresponds to $1 for the match. Currently this
303
- # method is an alias for Shellwords.shellwords.
304
- def parse_middleware(one)
305
- Shellwords.shellwords(one)
306
- end
307
- end
308
-
309
- include Utils
310
-
311
- # The schema into which tasks are being parsed
312
- attr_reader :schema
313
-
314
- def initialize(argv=[])
315
- parse(argv)
316
- end
317
-
318
- # Iterates through the argv splitting out task and join definitions.
319
- # Parse is non-destructive to argv. If a string argv is provided, parse
320
- # splits it into an array using Shellwords; if a hash argv is provided,
321
- # parse converts it to an array using Parser::Utils#parse_argh.
322
- def parse(argv)
323
- parse!(argv.kind_of?(String) ? argv : argv.dup)
324
- end
325
-
326
- # Same as parse, but removes parsed args from argv.
327
- def parse!(argv)
328
- @schema = Schema.new
329
-
330
- # prevent the addition of an empty task to schema
331
- return schema if argv.empty?
332
-
333
- argv = Shellwords.shellwords(argv) if argv.kind_of?(String)
334
- argv.unshift('--') unless argv[0] =~ BREAK
335
-
336
- @current_index = -1
337
- @current = nil
338
- escape = false
339
- while !argv.empty?
340
- arg = argv.shift
341
-
342
- # if escaping, add escaped arguments
343
- # until an escape-end argument
344
- if escape
345
- if arg == ESCAPE_END
346
- escape = false
347
- else
348
- current << arg
349
- end
350
-
351
- next
352
- end
353
-
354
- case arg
355
- when ESCAPE_BEGIN
356
- # begin escaping if indicated
357
- escape = true
358
-
359
- when END_FLAG
360
- # break on an end-flag
361
- break
362
-
363
- when BREAK
364
- # a breaking argument was reached
365
- @current_index += 1
366
- @current = nil
367
-
368
- # parse the break string for any
369
- # schema modifications
370
- parse_break($1)
371
-
372
- else
373
- # add all other non-breaking args to
374
- # the current argv; this includes
375
- # both inputs and configurations
376
- current << arg
377
-
378
- end
379
- end
380
-
381
- # determine the queue as all tasks not
382
- # used as a join output
383
- queue = schema.tasks.keys
384
- schema.joins.each {|join| queue -= join[1] }
385
- schema.queue.concat(queue)
386
-
387
- schema
388
- end
389
-
390
- protected
391
-
392
- # The index of the task currently being parsed.
393
- attr_reader :current_index # :nodoc:
394
-
395
- def current
396
- @current ||= task(current_index)
397
- end
398
-
399
- # helper to initialize a task at the specified index
400
- def task(index) # :nodoc:
401
- schema.tasks[index] ||= []
402
- end
403
-
404
- # returns current_index-1, or raises an error if current_index < 1.
405
- def previous_index # :nodoc:
406
- current_index - 1
407
- end
408
-
409
- # determines the type of break and modifies self appropriately
410
- def parse_break(arg) # :nodoc:
411
- case arg
412
- when ""
413
- when SEQUENCE
414
- schema.joins.concat parse_sequence($1, $2)
415
- when JOIN
416
- schema.joins << parse_join($1, $2, $3)
417
- when MIDDLEWARE
418
- schema.middleware << parse_middleware($1)
419
- else
420
- raise ArgumentError, "invalid break argument: #{arg}"
421
- end
422
- end
423
- end
424
- end
425
- end