pipelines 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/pipelines.rb +314 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 616bf7c8beb7ad8ba7f11cce119849f592fb3f74
|
4
|
+
data.tar.gz: 52bec1e786aeb7f9a7594c7e295001ab9e7ffec1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8196a97dcb6f7116527af650626054f9bb81b1c31af55dba17b317b025e76ea7beac0d297c77aa9857edf1faf6a23ef4b498323cbfcd5301700155943cb629e2
|
7
|
+
data.tar.gz: a426d7a00fd930213491055ffffd293dba7fbce8c04f454773cced30cc2977088d54899b1b53a7d53c21b0304a9a4739fec9631cc92ac947c8f3952f87dd2e5a
|
data/lib/pipelines.rb
ADDED
@@ -0,0 +1,314 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'monitor'
|
3
|
+
|
4
|
+
class Pipeline
|
5
|
+
attr :dir
|
6
|
+
attr :ended_at
|
7
|
+
attr :exception
|
8
|
+
attr :invocations
|
9
|
+
attr :name
|
10
|
+
attr :input
|
11
|
+
attr :output
|
12
|
+
attr :started_at
|
13
|
+
attr :stats
|
14
|
+
attr :thread_lock
|
15
|
+
attr :threads
|
16
|
+
attr :topdir
|
17
|
+
|
18
|
+
def initialize(*args)
|
19
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
20
|
+
dir = args.first
|
21
|
+
|
22
|
+
@topdir = @parent ? @parent.topdir : dir
|
23
|
+
@dir = dir
|
24
|
+
@type = options.delete(:type) || :serial
|
25
|
+
@parent = options.delete(:parent) # This is nil only for the top level pipeline.
|
26
|
+
|
27
|
+
@serial_count = 0
|
28
|
+
@parallel_count = 0
|
29
|
+
|
30
|
+
@thread_lock = @parent ? @parent.thread_lock : Monitor.new
|
31
|
+
@stats = @parent ? @parent.stats : {}
|
32
|
+
|
33
|
+
@name = underscore self.class.name.split('::')[-1]
|
34
|
+
@order = options.delete(:order)
|
35
|
+
|
36
|
+
@input = options.delete(:input)
|
37
|
+
@output = serial? ? nil : []
|
38
|
+
|
39
|
+
@threads = []
|
40
|
+
Thread.abort_on_exception = true
|
41
|
+
|
42
|
+
@options = options
|
43
|
+
@step = nil
|
44
|
+
@ended_at = nil
|
45
|
+
@started_at = options[:started_at] || Time.now
|
46
|
+
@invocations = 0
|
47
|
+
|
48
|
+
unless @dir.nil?
|
49
|
+
Dir.mkdir(@dir) unless Dir.exists?(@dir)
|
50
|
+
end
|
51
|
+
|
52
|
+
if @parent.nil? # This is the top level pipeline.
|
53
|
+
class << self
|
54
|
+
alias_method :unlocked_run, :run
|
55
|
+
|
56
|
+
def run(*args, &block)
|
57
|
+
lock
|
58
|
+
output = unlocked_run(*args, &block)
|
59
|
+
unlock
|
60
|
+
notify
|
61
|
+
|
62
|
+
output
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
def serial(args=nil, &block)
|
70
|
+
pipeline(:serial, args, &block)
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def parallel(args=nil, &block)
|
75
|
+
pipeline(:parallel, args, &block)
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
def invoke(klass, *args)
|
80
|
+
@invocations += 1
|
81
|
+
|
82
|
+
dir = File.join(@dir, "#{description(klass)}") unless @dir.nil?
|
83
|
+
segment = klass.new dir, :order => @order, :parent => self
|
84
|
+
|
85
|
+
output_file = segment_cache segment
|
86
|
+
if output_file && File.exists?(output_file)
|
87
|
+
segment.puts "\033[0;35mSkipping\033[0m"
|
88
|
+
output = YAML.load(File.read(output_file))[:data]
|
89
|
+
|
90
|
+
if serial?
|
91
|
+
@output = output
|
92
|
+
@input = output
|
93
|
+
elsif parallel?
|
94
|
+
@thread_lock.synchronize do
|
95
|
+
@output << output
|
96
|
+
end
|
97
|
+
end
|
98
|
+
else
|
99
|
+
segment.puts "\033[0;32mRunning\033[0m"
|
100
|
+
|
101
|
+
if serial?
|
102
|
+
dispatch(segment, output_file, *args)
|
103
|
+
@input = @output
|
104
|
+
elsif parallel?
|
105
|
+
thread = Thread.new do
|
106
|
+
dispatch(segment, output_file, *args)
|
107
|
+
end
|
108
|
+
@threads << thread
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
def description(klass)
|
115
|
+
if @order.nil?
|
116
|
+
"#{@invocations}-#{underscore(klass.name.split('::')[-1])}"
|
117
|
+
else
|
118
|
+
"#{@order}-#{@invocations}-#{underscore(klass.name.split('::')[-1])}"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
def puts(string='')
|
124
|
+
@thread_lock.synchronize do
|
125
|
+
unlocked_puts(string)
|
126
|
+
end
|
127
|
+
|
128
|
+
nil # Behave like Kernel.puts
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
def unlocked_puts(string='')
|
133
|
+
if self.class == Pipeline || @parent.nil? # If @parent is missing, it is in the top level block.
|
134
|
+
if @order
|
135
|
+
Kernel.puts "\033[32m[#{@order}][#{@invocations}]\033[0m #{string}"
|
136
|
+
else
|
137
|
+
Kernel.puts "\033[33m[#{@invocations}]\033[0m #{string}"
|
138
|
+
end
|
139
|
+
else
|
140
|
+
if @order
|
141
|
+
Kernel.puts "\033[32m[#{@order}][#{@name}][#{@parent.invocations}]\033[0m #{string}"
|
142
|
+
else
|
143
|
+
Kernel.puts "\033[32m[#{@name}][#{@parent.invocations}]\033[0m #{string}"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
STDOUT.flush
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def serial?
|
154
|
+
@type == :serial
|
155
|
+
end
|
156
|
+
|
157
|
+
|
158
|
+
def parallel?
|
159
|
+
@type == :parallel
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
def pipeline(mode, args=nil, &block)
|
164
|
+
begin
|
165
|
+
if parallel? # When inside parallel.
|
166
|
+
thread = Thread.new do
|
167
|
+
pipeline = child(mode, args)
|
168
|
+
pipeline.instance_eval &block
|
169
|
+
pipeline.threads.each { |thread| thread.join } # Could be a parallel block inside a parallel block.
|
170
|
+
@thread_lock.synchronize do
|
171
|
+
@output << (mode == :parallel ? pipeline.output.flatten(1) : pipeline.output)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
@threads << thread
|
175
|
+
elsif serial?
|
176
|
+
pipeline = child(mode, args)
|
177
|
+
pipeline.instance_eval &block
|
178
|
+
pipeline.threads.each { |thread| thread.join }
|
179
|
+
@output = (mode == :parallel ? pipeline.output.flatten(1) : pipeline.output)
|
180
|
+
@input = @output
|
181
|
+
end
|
182
|
+
rescue Exception => e
|
183
|
+
@exception = e
|
184
|
+
notify
|
185
|
+
raise
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
|
190
|
+
def notify
|
191
|
+
# This should be implemented in the subclasses.
|
192
|
+
end
|
193
|
+
|
194
|
+
|
195
|
+
def run
|
196
|
+
# This should be implemented in the subclasses.
|
197
|
+
end
|
198
|
+
|
199
|
+
|
200
|
+
def run_with_args(segment, args, options)
|
201
|
+
if args.empty?
|
202
|
+
if @invocations > 1
|
203
|
+
if options.empty?
|
204
|
+
segment.send :run, @input
|
205
|
+
else
|
206
|
+
segment.send :run, @input, options
|
207
|
+
end
|
208
|
+
else
|
209
|
+
if options.empty?
|
210
|
+
segment.send :run
|
211
|
+
else
|
212
|
+
segment.send :run, options
|
213
|
+
end
|
214
|
+
end
|
215
|
+
else
|
216
|
+
if options.empty?
|
217
|
+
segment.send :run, *args
|
218
|
+
else
|
219
|
+
segment.send :run, *args, options
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
|
225
|
+
def dispatch(segment, output_file, *args)
|
226
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
227
|
+
|
228
|
+
output = if segment.method(:run).arity > 0 # Optional arguments result in negative arity.
|
229
|
+
run_with_args(segment, args, options)
|
230
|
+
elsif segment.method(:run).arity < 0
|
231
|
+
run_with_args(segment, args, options)
|
232
|
+
else
|
233
|
+
segment.send :run
|
234
|
+
end
|
235
|
+
|
236
|
+
if output_file
|
237
|
+
File.open(output_file, "w") do |f|
|
238
|
+
f.write({:data => output}.to_yaml)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
if serial?
|
243
|
+
@output = output
|
244
|
+
elsif parallel?
|
245
|
+
@thread_lock.synchronize do
|
246
|
+
@output << output
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
|
252
|
+
def segment_cache(segment)
|
253
|
+
File.join(@dir, "#{description(segment.class)}.yaml") if segment.dir
|
254
|
+
end
|
255
|
+
|
256
|
+
|
257
|
+
def child(type, args=nil)
|
258
|
+
order = if type == :serial
|
259
|
+
@serial_count += 1
|
260
|
+
"#{@order}S#{@serial_count}"
|
261
|
+
elsif type == :parallel
|
262
|
+
@parallel_count += 1
|
263
|
+
"#{@order}P#{@parallel_count}"
|
264
|
+
end
|
265
|
+
|
266
|
+
Pipeline.new(@dir, :type => type, :input => args || @input, :parent => self, :order => order, :started_at => started_at)
|
267
|
+
end
|
268
|
+
|
269
|
+
|
270
|
+
def underscore(string)
|
271
|
+
string.gsub(/::/, '/').
|
272
|
+
gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2').
|
273
|
+
gsub(/([a-z\d])([A-Z])/, '\1_\2').
|
274
|
+
tr("-", "_").downcase
|
275
|
+
end
|
276
|
+
|
277
|
+
|
278
|
+
def lock
|
279
|
+
return if @dir.nil?
|
280
|
+
|
281
|
+
lock = File.join @dir, 'lock'
|
282
|
+
if !@options[:force] && File.exists?(lock)
|
283
|
+
raise "Another instance of the pipelines seems to be running.\nPlease remove #{lock} if that is not the case."
|
284
|
+
end
|
285
|
+
|
286
|
+
File.open(lock, 'w') do |f|
|
287
|
+
f.write $$
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
|
292
|
+
def unlock
|
293
|
+
unless @dir.nil?
|
294
|
+
lock = File.join @dir, 'lock'
|
295
|
+
File.delete lock
|
296
|
+
end
|
297
|
+
|
298
|
+
@ended_at = Time.now
|
299
|
+
end
|
300
|
+
|
301
|
+
|
302
|
+
def set_stats(values)
|
303
|
+
@thread_lock.synchronize do
|
304
|
+
@stats[@name] = values
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
|
309
|
+
def get_stats
|
310
|
+
@thread_lock.synchronize do
|
311
|
+
@stats[@name]
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pipelines
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sujoy Gupta
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-09-25 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A simple way to build a pipeline of tasks. These tasks can be configured
|
14
|
+
to run in serial, parallel or any combination thereof.
|
15
|
+
email: sujoyg@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/pipelines.rb
|
21
|
+
homepage: https://github.com/sujoyg/pipelines
|
22
|
+
licenses: []
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubyforge_project:
|
40
|
+
rubygems_version: 2.1.4
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: Pipelines
|
44
|
+
test_files: []
|