tap 0.18.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,374 +0,0 @@
1
- require 'tap/app'
2
- require 'tap/schema/utils'
3
- require 'tap/schema/parser'
4
-
5
- module Tap
6
- class App
7
- def build(schema, options={})
8
- options = {
9
- :clean => true,
10
- :validate => true
11
- }.merge(options)
12
-
13
- unless schema.kind_of?(Schema)
14
- schema = Schema.new(schema)
15
- end
16
-
17
- if resources = options[:resources]
18
- schema.resolve! do |type, id|
19
- resources[type][id]
20
- end
21
- end
22
-
23
- if options[:clean]
24
- reset
25
- end
26
-
27
- schema.build!(self, options[:validate])
28
- end
29
-
30
- def to_schema
31
- schema = Schema.new
32
- queue.to_a.each do |task, inputs|
33
- schema.add(task, inputs)
34
- end
35
-
36
- middleware.reverse_each do |m|
37
- schema.use(m)
38
- end
39
-
40
- index = 0
41
- schema.tasks.keys.each do |task|
42
- schema.rename(task, index)
43
- index += 1
44
- end
45
-
46
- if block_given?
47
- schema.resources.each_pair do |type, resource|
48
- yield(type, resource)
49
- end
50
- end
51
-
52
- schema
53
- end
54
- end
55
-
56
- class Schema
57
- class << self
58
- def load(str)
59
- schema = YAML.load(str)
60
- new(schema ? Utils.symbolize(schema) : {})
61
- end
62
-
63
- def load_file(path)
64
- load(File.read(path))
65
- end
66
- end
67
-
68
- include Utils
69
-
70
- # A hash of task schema describing individual tasks in a workflow. Tasks
71
- # only require a class, but may contain configurations and even arguments
72
- # for enque. Individual tasks may be a hash or an array. The tasks are
73
- # resolved if they take one of these forms:
74
- #
75
- # tasks:
76
- # key: {:class: TaskClass, ...}
77
- # key: [TaskClass, ...]
78
- #
79
- attr_reader :tasks
80
-
81
- # An array of join schema that describe how to join tasks together. Joins
82
- # have arrays of inputs and outputs that reference task keys. Individual
83
- # joins may be a hash or an array. The joins are resolved if they take
84
- # one of these forms:
85
- #
86
- # joins:
87
- # - [[inputs], [outputs], {:class: JoinClass, ...}]
88
- # - [[inputs], [outputs], [JoinClass, ...]]
89
- #
90
- attr_reader :joins
91
-
92
- # An array of [key, [args]] data that indicates the tasks and arguments
93
- # to be added to an application during build. A key may be specified
94
- # alone if tasks[key] is an array; in that case, the arguments remaining
95
- # in tasks[key] after instantiation will be used.
96
- #
97
- # queue:
98
- # - key # uses tasks[key]
99
- # - [key, [1, 2, 3]] # enques tasks[key] with [1, 2, 3]
100
- #
101
- attr_reader :queue
102
-
103
- # An array of middleware to build onto the app.
104
- attr_reader :middleware
105
-
106
- # The app used to build self
107
- attr_reader :app
108
-
109
- def initialize(schema={})
110
- @tasks = schema[:tasks] || {}
111
- @joins = schema[:joins] || []
112
- @queue = schema[:queue] || []
113
- @middleware = schema[:middleware] || []
114
-
115
- @app = nil
116
- end
117
-
118
- def add(node, inputs=nil)
119
- collect_tasks(node).collect do |task|
120
- tasks[task] = task.to_hash
121
- task.joins
122
- end.flatten.uniq.each do |join|
123
- joins << [join.inputs, join.outputs, join.to_hash]
124
- end
125
-
126
- if inputs
127
- queue << [node, inputs]
128
- end
129
-
130
- self
131
- end
132
-
133
- def use(middleware)
134
- self.middleware << middleware.to_hash
135
- self
136
- end
137
-
138
- def resources
139
- {
140
- :task => tasks.values,
141
- :join => joins.collect {|join| join[2] },
142
- :middleware => middleware
143
- }
144
- end
145
-
146
- # Renames the current_key task to new_key. References in joins and
147
- # queue are updated by rename. Raises an error if built? or if the
148
- # specified task does not exist.
149
- def rename(current_key, new_key)
150
- if built?
151
- raise "cannot rename if built"
152
- end
153
-
154
- # rename task
155
- unless task = tasks.delete(current_key)
156
- raise "unknown task: #{current_key.inspect}"
157
- end
158
- tasks[new_key] = task
159
-
160
- # update join references
161
- joins.each do |inputs, outputs, join|
162
- inputs.each_index do |index|
163
- inputs[index] = new_key if inputs[index] == current_key
164
- end
165
-
166
- outputs.each_index do |index|
167
- outputs[index] = new_key if outputs[index] == current_key
168
- end
169
- end
170
-
171
- # update queue references, note both array and
172
- # reference-style entries must be handled
173
- queue.each_index do |index|
174
- if queue[index].kind_of?(Array)
175
- if queue[index][0] == current_key
176
- queue[index][0] = new_key
177
- end
178
- else
179
- if queue[index] == current_key
180
- queue[index] = new_key
181
- end
182
- end
183
- end
184
-
185
- self
186
- end
187
-
188
- def resolve!
189
- tasks.each_pair do |key, task|
190
- task ||= {}
191
- tasks[key] = resolve(task) do |id|
192
- yield(:task, id || key)
193
- end
194
- end
195
-
196
- joins.collect! do |inputs, outputs, join|
197
- join ||= {}
198
- join = resolve(join) do |id|
199
- yield(:join, id || 'join')
200
- end
201
- [inputs, outputs, join]
202
- end
203
-
204
- middleware.collect! do |m|
205
- resolve(m) do |id|
206
- yield(:middleware, id)
207
- end
208
- end
209
-
210
- queue.collect! do |(key, inputs)|
211
- [key, inputs || tasks[key]]
212
- end
213
-
214
- self
215
- end
216
-
217
- def validate!
218
- errors = []
219
- tasks.each_value do |task|
220
- unless resolved?(task)
221
- errors << "unresolvable task: #{task.inspect}"
222
- end
223
- end
224
-
225
- joins.each do |inputs, outputs, join|
226
- unless resolved?(join)
227
- errors << "unresolvable join: #{join.inspect}"
228
- end
229
-
230
- inputs.each do |key|
231
- unless tasks.has_key?(key)
232
- errors << "missing join input: #{key.inspect}"
233
- end
234
- end
235
-
236
- outputs.each do |key|
237
- unless tasks.has_key?(key)
238
- errors << "missing join output: #{key.inspect}"
239
- end
240
- end
241
- end
242
-
243
- queue.each do |(key, args)|
244
- if tasks.has_key?(key)
245
- unless args.kind_of?(Array)
246
- errors << "non-array args: #{args.inspect}"
247
- end
248
- else
249
- errors << "missing task: #{key}"
250
- end
251
- end
252
-
253
- middleware.each do |m|
254
- unless resolved?(m)
255
- errors << "unresolvable middleware: #{m.inspect}"
256
- end
257
- end
258
-
259
- unless errors.empty?
260
- prefix = if errors.length > 1
261
- "#{errors.length} schema errors\n"
262
- else
263
- ""
264
- end
265
-
266
- raise "#{prefix}#{errors.join("\n")}\n"
267
- end
268
-
269
- self
270
- end
271
-
272
- def cleanup!
273
- joins.delete_if do |inputs, outputs, join|
274
-
275
- # remove missing inputs
276
- inputs.delete_if {|key| !tasks.has_key?(key) }
277
-
278
- # remove missing outputs
279
- outputs.delete_if {|key| !tasks.has_key?(key) }
280
-
281
- # remove orphan joins
282
- inputs.empty? || outputs.empty?
283
- end
284
-
285
- # remove inputs without a task
286
- queue.delete_if do |(key, inputs)|
287
- !tasks.has_key?(key)
288
- end
289
-
290
- self
291
- end
292
-
293
- def build!(app, validate=true)
294
- validate! if validate
295
-
296
- # instantiate tasks
297
- tasks.each_pair do |key, task|
298
- tasks[key] = instantiate(task, app)
299
- end
300
- tasks.freeze
301
-
302
- # build the workflow
303
- joins.collect! do |inputs, outputs, join|
304
- inputs = inputs.collect {|key| tasks[key] }
305
- outputs = outputs.collect {|key| tasks[key] }
306
- instantiate(join, app).join(inputs, outputs)
307
- end
308
- joins.freeze
309
-
310
- # utilize middleware
311
- middleware.collect! do |middleware|
312
- instantiate(middleware, app)
313
- end
314
- middleware.freeze
315
-
316
- # enque tasks
317
- queue.each do |(key, inputs)|
318
- app.enq(tasks[key], *inputs)
319
- end
320
- queue.clear.freeze
321
-
322
- @app = app
323
- self
324
- end
325
-
326
- def built?
327
- @app != nil
328
- end
329
-
330
- def enque(key, *inputs)
331
- unless built?
332
- raise "cannot enque unless built"
333
- end
334
-
335
- unless task = tasks[key]
336
- raise "unknown task: #{key.inspect}"
337
- end
338
-
339
- app.queue.enq(task, inputs)
340
- task
341
- end
342
-
343
- # Creates an hash dump of self.
344
- def to_hash
345
- { :tasks => tasks,
346
- :joins => joins,
347
- :queue => queue,
348
- :middleware => middleware
349
- }
350
- end
351
-
352
- # Converts self to a hash and serializes it to YAML.
353
- def dump(io=nil)
354
- YAML.dump(to_hash, io)
355
- end
356
-
357
- protected
358
-
359
- # helper to collect all tasks and tasks joined to task
360
- def collect_tasks(task, collection=[]) # :nodoc:
361
- unless collection.include?(task)
362
- collection << task
363
-
364
- task.joins.each do |join|
365
- (join.inputs + join.outputs).each do |input|
366
- collect_tasks(input, collection)
367
- end
368
- end
369
- end
370
-
371
- collection
372
- end
373
- end
374
- end
@@ -1,425 +0,0 @@
1
- require 'shellwords'
2
- require 'tap/schema'
3
-
4
- module Tap
5
- class Schema
6
- class << self
7
- def parse(argv=ARGV)
8
- Parser.new(argv).schema
9
- end
10
- end
11
-
12
- # A parser for workflow schema defined on the command line.
13
- #
14
- # == Syntax
15
- #
16
- # The command line syntax can be thought of as a series of ARGV arrays
17
- # connected by breaks. The arrays define tasks (ie nodes) in a workflow
18
- # while the breaks define joins. These are the available breaks:
19
- #
20
- # break meaning
21
- # -- default delimiter, no join
22
- # --: sequence join
23
- # --[][] multi-join (sequence, fork, merge)
24
- #
25
- # As an example, this defines three tasks (a, b, c) and sequences the
26
- # b and c tasks:
27
- #
28
- # schema = Parser.new("a -- b --: c").schema
29
- # schema.tasks # => [["a"], ["b"], ["c"]]
30
- # schema.joins # => [['join', [1],[2]]]
31
- #
32
- # In the example, the indicies of the tasks participating in the sequence
33
- # are inferred as the last and next tasks in the schema. Alternatively
34
- # the tasks participating in the sequence may be written out directly;
35
- # these also sequence b to c.
36
- #
37
- # schema = Parser.new("a -- b -- c --1:2").schema
38
- # schema.tasks
39
- # # => {
40
- # # 0 => ["a"],
41
- # # 1 => ["b"],
42
- # # 2 => ["c"]
43
- # # }
44
- # schema.joins
45
- # # => [
46
- # # [[1],[2]]
47
- # # ]
48
- #
49
- # schema = Parser.new("a --1:2 b -- c").schema
50
- # schema.tasks
51
- # # => {
52
- # # 0 => ["a"],
53
- # # 1 => ["b"],
54
- # # 2 => ["c"]
55
- # # }
56
- # schema.joins
57
- # # => [
58
- # # [[1],[2]]
59
- # # ]
60
- #
61
- # ==== Multi-Join Syntax
62
- #
63
- # The multi-join syntax allows the specification of arbitrary joins.
64
- # Starting with a few examples:
65
- #
66
- # example meaning
67
- # --[][] last.sequence(next)
68
- # --[1][2] 1.sequence(2)
69
- # --[1][2,3] 1.fork(2,3)
70
- # --[1,2][3] 3.merge(1,2)
71
- #
72
- # The meaning of the bracket breaks seems to be changing but note that
73
- # the sequences, forks, and (unsynchronized) merges are all variations
74
- # of a multi-way join. Internally the breaks are interpreted like this:
75
- #
76
- # join = Join.new
77
- # join.join(inputs, outputs)
78
- #
79
- # To specify another class of join, or to specify join configurations,
80
- # add a string in the format "configs.class" where the configs are the
81
- # single-letter configuration flags and class is a lookup for the join
82
- # class.
83
- #
84
- # example interpretation
85
- # --:s Join.new(:splat => true)
86
- # --1:2is Join.new(:iterate => true, :splat => true)
87
- # --[][]q.sync Sync.new(:enq => true)
88
- # --[][].sync Sync.new
89
- #
90
- # If you can stand the syntax, you can also specify a full argv after
91
- # the bracket, just be sure to enclose the whole break in quotes.
92
- #
93
- # example interpretation
94
- # "--1:2 join -i -s" Join.new(:iterate => true, :splat => true)
95
- # "--[][] sync --enq" Sync.new(:enq => true)
96
- #
97
- # ==== Escapes and End Flags
98
- #
99
- # Breaks can be escaped by enclosing them in '-.' and '.-' delimiters;
100
- # any number of arguments may be enclosed within the escape. After the
101
- # end delimiter, breaks are active once again.
102
- #
103
- # schema = Parser.new("a -- b -- c").schema
104
- # schema.tasks
105
- # # => {
106
- # # 0 => ["a"],
107
- # # 1 => ["b"],
108
- # # 2 => ["c"]
109
- # # }
110
- #
111
- # schema = Parser.new("a -. -- b .- -- c").schema
112
- # schema.tasks
113
- # # => {
114
- # # 0 => ["a", "--", "b"],
115
- # # 1 => ["c"]
116
- # # }
117
- #
118
- # Parsing continues until the end of argv, or a an end flag '---' is
119
- # reached. The end flag may also be escaped.
120
- #
121
- # schema = Parser.new("a -- b --- c").schema
122
- # schema.tasks
123
- # # => {
124
- # # 0 => ["a"],
125
- # # 1 => ["b"]
126
- # # }
127
- #
128
- class Parser
129
-
130
- # A set of parsing routines used internally by Tap::Schema::Parser,
131
- # modularized for ease of testing, and potential re-use. These methods
132
- # require that <tt>current_index</tt> and <tt>previous_index</tt> be
133
- # implemented in the including class.
134
- module Utils
135
- module_function
136
-
137
- # The escape begin argument
138
- ESCAPE_BEGIN = "-."
139
-
140
- # The escape end argument
141
- ESCAPE_END = ".-"
142
-
143
- # The parser end flag
144
- END_FLAG = "---"
145
-
146
- # Matches any breaking arg. Examples:
147
- #
148
- # --
149
- # --1:2
150
- # --[1][2]
151
- # --[1,2,3][4,5,6]is.join
152
- # --.middleware
153
- #
154
- # After the match:
155
- #
156
- # $1:: The string after the break
157
- # (ex: '--' => '', '--:' => ':', '--[1,2][3,4]is.join' => '[1,2][3,4]is.join')
158
- #
159
- BREAK = /\A--(\z|[\d\:\[\.].*\z)/
160
-
161
- # Matches a sequence break. Examples:
162
- #
163
- # :
164
- # 1:
165
- # :2
166
- # 1:2:3
167
- #
168
- # After the match:
169
- #
170
- # $1:: The sequence string after the break.
171
- # (ex: ':' => ':', '1:2' => '1:2', '1:' => '1:', ':2' => ':2')
172
- # $2:: The modifier string.
173
- # (ex: ':i' => 'i', '1:2is' => 'is')
174
- #
175
- SEQUENCE = /\A(\d*(?::\d*)+)(.*)\z/
176
-
177
- # Matches a generic join break. Examples:
178
- #
179
- # "[1,2,3][4,5,6] join -i -s"
180
- # [1,2,3][4,5,6]is.join
181
- # [1,2][3,4]
182
- # [1][2]
183
- #
184
- # After the match:
185
- #
186
- # $1:: The inputs string.
187
- # (ex: '[1,2,3][4,5,6]' => '1,2,3')
188
- # $2:: The outputs string.
189
- # (ex: '[1,2,3][4,5,6]' => '4,5,6')
190
- # $3:: The modifier string.
191
- # (ex: '[][]is' => 'is')
192
- #
193
- JOIN = /\A\[([\d,]*)\]\[([\d,]*)\](.*)\z/
194
-
195
- # Matches a join modifier. After the match:
196
- #
197
- # $1:: The modifier flag string.
198
- # (ex: 'is.sync' => 'is')
199
- # $2:: The class string.
200
- # (ex: 'is.sync' => 'sync')
201
- #
202
- JOIN_MODIFIER = /\A([A-z]*)(?:\.(.*))?\z/
203
-
204
- # Matches a generic middleware break. Examples:
205
- #
206
- # ". middleware --flag"
207
- # .middleware
208
- #
209
- # After the match:
210
- #
211
- # $1:: The modifier string.
212
- # (ex: '.middleware' => 'middleware')
213
- #
214
- MIDDLEWARE = /\A\.(.*)\z/
215
-
216
- # Parses an indicies str along commas, and collects the indicies
217
- # as integers. Ex:
218
- #
219
- # parse_indicies('') # => []
220
- # parse_indicies('1') # => [1]
221
- # parse_indicies('1,2,3') # => [1,2,3]
222
- #
223
- def parse_indicies(str, regexp=/,+/)
224
- indicies = []
225
- str.split(regexp).each do |n|
226
- indicies << n.to_i unless n.empty?
227
- end
228
- indicies
229
- end
230
-
231
- # Parses the match of a SEQUENCE regexp an array of [input_indicies,
232
- # output_indicies, metadata] arrays. The inputs corresponds to $1 and
233
- # $2 for the match. The previous and current index are assumed if $1
234
- # starts and/or ends with a semi-colon.
235
- #
236
- # parse_sequence("1:2:3", '')
237
- # # => [
238
- # # [[1], [2]],
239
- # # [[2], [3]],
240
- # # ]
241
- #
242
- # parse_sequence(":1:2:", 'is')
243
- # # => [
244
- # # [[:previous_index], [1], ['join', '-i', '-s']],
245
- # # [[1], [2], ['join', '-i', '-s']]],
246
- # # [[2], [:current_index], ['join', '-i', '-s']],
247
- # # ]
248
- #
249
- def parse_sequence(one, two)
250
- indicies = parse_indicies(one, /:+/)
251
- indicies.unshift previous_index if one[0] == ?:
252
- indicies << current_index if one[-1] == ?:
253
-
254
- sequences = []
255
- while indicies.length > 1
256
- sequences << [[indicies.shift], [indicies[0]]]
257
- end
258
-
259
- if argv = parse_join_modifier(two)
260
- sequences.each do |sequence|
261
- sequence << argv
262
- end
263
- end
264
-
265
- sequences
266
- end
267
-
268
- # Parses the match of a JOIN regexp into a [input_indicies,
269
- # output_indicies, metadata] array. The inputs corresponds to $1, $2,
270
- # and $3 for the match. A join type of 'join' is assumed unless
271
- # otherwise specified.
272
- #
273
- # parse_join("1", "2,3", "") # => [[1], [2,3]]
274
- # parse_join("", "", "is.type") # => [[], [], ['type', '-i', '-s']]
275
- # parse_join("", "", "type -i -s") # => [[], [], ['type', '-i', '-s']]
276
- #
277
- def parse_join(one, two, three)
278
- join = [parse_indicies(one), parse_indicies(two)]
279
-
280
- if argv = parse_join_modifier(three)
281
- join << argv
282
- end
283
-
284
- join
285
- end
286
-
287
- # Parses a join modifier string into an argv.
288
- def parse_join_modifier(modifier)
289
- case modifier
290
- when ""
291
- nil
292
- when JOIN_MODIFIER
293
- argv = [$2 == nil || $2.empty? ? 'join' : $2]
294
- $1.split("").each {|char| argv << "-#{char}"}
295
- argv
296
- else
297
- Shellwords.shellwords(modifier)
298
- end
299
- end
300
-
301
- # Parses the match of a MIDDLEWARE regexp into metadata array.
302
- # The input corresponds to $1 for the match. Currently this
303
- # method is an alias for Shellwords.shellwords.
304
- def parse_middleware(one)
305
- Shellwords.shellwords(one)
306
- end
307
- end
308
-
309
- include Utils
310
-
311
- # The schema into which tasks are being parsed
312
- attr_reader :schema
313
-
314
- def initialize(argv=[])
315
- parse(argv)
316
- end
317
-
318
- # Iterates through the argv splitting out task and join definitions.
319
- # Parse is non-destructive to argv. If a string argv is provided, parse
320
- # splits it into an array using Shellwords; if a hash argv is provided,
321
- # parse converts it to an array using Parser::Utils#parse_argh.
322
- def parse(argv)
323
- parse!(argv.kind_of?(String) ? argv : argv.dup)
324
- end
325
-
326
- # Same as parse, but removes parsed args from argv.
327
- def parse!(argv)
328
- @schema = Schema.new
329
-
330
- # prevent the addition of an empty task to schema
331
- return schema if argv.empty?
332
-
333
- argv = Shellwords.shellwords(argv) if argv.kind_of?(String)
334
- argv.unshift('--') unless argv[0] =~ BREAK
335
-
336
- @current_index = -1
337
- @current = nil
338
- escape = false
339
- while !argv.empty?
340
- arg = argv.shift
341
-
342
- # if escaping, add escaped arguments
343
- # until an escape-end argument
344
- if escape
345
- if arg == ESCAPE_END
346
- escape = false
347
- else
348
- current << arg
349
- end
350
-
351
- next
352
- end
353
-
354
- case arg
355
- when ESCAPE_BEGIN
356
- # begin escaping if indicated
357
- escape = true
358
-
359
- when END_FLAG
360
- # break on an end-flag
361
- break
362
-
363
- when BREAK
364
- # a breaking argument was reached
365
- @current_index += 1
366
- @current = nil
367
-
368
- # parse the break string for any
369
- # schema modifications
370
- parse_break($1)
371
-
372
- else
373
- # add all other non-breaking args to
374
- # the current argv; this includes
375
- # both inputs and configurations
376
- current << arg
377
-
378
- end
379
- end
380
-
381
- # determine the queue as all tasks not
382
- # used as a join output
383
- queue = schema.tasks.keys
384
- schema.joins.each {|join| queue -= join[1] }
385
- schema.queue.concat(queue)
386
-
387
- schema
388
- end
389
-
390
- protected
391
-
392
- # The index of the task currently being parsed.
393
- attr_reader :current_index # :nodoc:
394
-
395
- def current
396
- @current ||= task(current_index)
397
- end
398
-
399
- # helper to initialize a task at the specified index
400
- def task(index) # :nodoc:
401
- schema.tasks[index] ||= []
402
- end
403
-
404
- # returns current_index-1, or raises an error if current_index < 1.
405
- def previous_index # :nodoc:
406
- current_index - 1
407
- end
408
-
409
- # determines the type of break and modifies self appropriately
410
- def parse_break(arg) # :nodoc:
411
- case arg
412
- when ""
413
- when SEQUENCE
414
- schema.joins.concat parse_sequence($1, $2)
415
- when JOIN
416
- schema.joins << parse_join($1, $2, $3)
417
- when MIDDLEWARE
418
- schema.middleware << parse_middleware($1)
419
- else
420
- raise ArgumentError, "invalid break argument: #{arg}"
421
- end
422
- end
423
- end
424
- end
425
- end