pipelines 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/pipelines.rb +314 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 616bf7c8beb7ad8ba7f11cce119849f592fb3f74
|
4
|
+
data.tar.gz: 52bec1e786aeb7f9a7594c7e295001ab9e7ffec1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8196a97dcb6f7116527af650626054f9bb81b1c31af55dba17b317b025e76ea7beac0d297c77aa9857edf1faf6a23ef4b498323cbfcd5301700155943cb629e2
|
7
|
+
data.tar.gz: a426d7a00fd930213491055ffffd293dba7fbce8c04f454773cced30cc2977088d54899b1b53a7d53c21b0304a9a4739fec9631cc92ac947c8f3952f87dd2e5a
|
data/lib/pipelines.rb
ADDED
@@ -0,0 +1,314 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'monitor'
|
3
|
+
|
4
|
+
class Pipeline
|
5
|
+
attr :dir
|
6
|
+
attr :ended_at
|
7
|
+
attr :exception
|
8
|
+
attr :invocations
|
9
|
+
attr :name
|
10
|
+
attr :input
|
11
|
+
attr :output
|
12
|
+
attr :started_at
|
13
|
+
attr :stats
|
14
|
+
attr :thread_lock
|
15
|
+
attr :threads
|
16
|
+
attr :topdir
|
17
|
+
|
18
|
+
def initialize(*args)
|
19
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
20
|
+
dir = args.first
|
21
|
+
|
22
|
+
@topdir = @parent ? @parent.topdir : dir
|
23
|
+
@dir = dir
|
24
|
+
@type = options.delete(:type) || :serial
|
25
|
+
@parent = options.delete(:parent) # This is nil only for the top level pipeline.
|
26
|
+
|
27
|
+
@serial_count = 0
|
28
|
+
@parallel_count = 0
|
29
|
+
|
30
|
+
@thread_lock = @parent ? @parent.thread_lock : Monitor.new
|
31
|
+
@stats = @parent ? @parent.stats : {}
|
32
|
+
|
33
|
+
@name = underscore self.class.name.split('::')[-1]
|
34
|
+
@order = options.delete(:order)
|
35
|
+
|
36
|
+
@input = options.delete(:input)
|
37
|
+
@output = serial? ? nil : []
|
38
|
+
|
39
|
+
@threads = []
|
40
|
+
Thread.abort_on_exception = true
|
41
|
+
|
42
|
+
@options = options
|
43
|
+
@step = nil
|
44
|
+
@ended_at = nil
|
45
|
+
@started_at = options[:started_at] || Time.now
|
46
|
+
@invocations = 0
|
47
|
+
|
48
|
+
unless @dir.nil?
|
49
|
+
Dir.mkdir(@dir) unless Dir.exists?(@dir)
|
50
|
+
end
|
51
|
+
|
52
|
+
if @parent.nil? # This is the top level pipeline.
|
53
|
+
class << self
|
54
|
+
alias_method :unlocked_run, :run
|
55
|
+
|
56
|
+
def run(*args, &block)
|
57
|
+
lock
|
58
|
+
output = unlocked_run(*args, &block)
|
59
|
+
unlock
|
60
|
+
notify
|
61
|
+
|
62
|
+
output
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
def serial(args=nil, &block)
|
70
|
+
pipeline(:serial, args, &block)
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def parallel(args=nil, &block)
|
75
|
+
pipeline(:parallel, args, &block)
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
def invoke(klass, *args)
|
80
|
+
@invocations += 1
|
81
|
+
|
82
|
+
dir = File.join(@dir, "#{description(klass)}") unless @dir.nil?
|
83
|
+
segment = klass.new dir, :order => @order, :parent => self
|
84
|
+
|
85
|
+
output_file = segment_cache segment
|
86
|
+
if output_file && File.exists?(output_file)
|
87
|
+
segment.puts "\033[0;35mSkipping\033[0m"
|
88
|
+
output = YAML.load(File.read(output_file))[:data]
|
89
|
+
|
90
|
+
if serial?
|
91
|
+
@output = output
|
92
|
+
@input = output
|
93
|
+
elsif parallel?
|
94
|
+
@thread_lock.synchronize do
|
95
|
+
@output << output
|
96
|
+
end
|
97
|
+
end
|
98
|
+
else
|
99
|
+
segment.puts "\033[0;32mRunning\033[0m"
|
100
|
+
|
101
|
+
if serial?
|
102
|
+
dispatch(segment, output_file, *args)
|
103
|
+
@input = @output
|
104
|
+
elsif parallel?
|
105
|
+
thread = Thread.new do
|
106
|
+
dispatch(segment, output_file, *args)
|
107
|
+
end
|
108
|
+
@threads << thread
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
def description(klass)
|
115
|
+
if @order.nil?
|
116
|
+
"#{@invocations}-#{underscore(klass.name.split('::')[-1])}"
|
117
|
+
else
|
118
|
+
"#{@order}-#{@invocations}-#{underscore(klass.name.split('::')[-1])}"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
def puts(string='')
|
124
|
+
@thread_lock.synchronize do
|
125
|
+
unlocked_puts(string)
|
126
|
+
end
|
127
|
+
|
128
|
+
nil # Behave like Kernel.puts
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
def unlocked_puts(string='')
|
133
|
+
if self.class == Pipeline || @parent.nil? # If @parent is missing, it is in the top level block.
|
134
|
+
if @order
|
135
|
+
Kernel.puts "\033[32m[#{@order}][#{@invocations}]\033[0m #{string}"
|
136
|
+
else
|
137
|
+
Kernel.puts "\033[33m[#{@invocations}]\033[0m #{string}"
|
138
|
+
end
|
139
|
+
else
|
140
|
+
if @order
|
141
|
+
Kernel.puts "\033[32m[#{@order}][#{@name}][#{@parent.invocations}]\033[0m #{string}"
|
142
|
+
else
|
143
|
+
Kernel.puts "\033[32m[#{@name}][#{@parent.invocations}]\033[0m #{string}"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
STDOUT.flush
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def serial?
|
154
|
+
@type == :serial
|
155
|
+
end
|
156
|
+
|
157
|
+
|
158
|
+
def parallel?
|
159
|
+
@type == :parallel
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
def pipeline(mode, args=nil, &block)
|
164
|
+
begin
|
165
|
+
if parallel? # When inside parallel.
|
166
|
+
thread = Thread.new do
|
167
|
+
pipeline = child(mode, args)
|
168
|
+
pipeline.instance_eval &block
|
169
|
+
pipeline.threads.each { |thread| thread.join } # Could be a parallel block inside a parallel block.
|
170
|
+
@thread_lock.synchronize do
|
171
|
+
@output << (mode == :parallel ? pipeline.output.flatten(1) : pipeline.output)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
@threads << thread
|
175
|
+
elsif serial?
|
176
|
+
pipeline = child(mode, args)
|
177
|
+
pipeline.instance_eval &block
|
178
|
+
pipeline.threads.each { |thread| thread.join }
|
179
|
+
@output = (mode == :parallel ? pipeline.output.flatten(1) : pipeline.output)
|
180
|
+
@input = @output
|
181
|
+
end
|
182
|
+
rescue Exception => e
|
183
|
+
@exception = e
|
184
|
+
notify
|
185
|
+
raise
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
|
190
|
+
def notify
|
191
|
+
# This should be implemented in the subclasses.
|
192
|
+
end
|
193
|
+
|
194
|
+
|
195
|
+
def run
|
196
|
+
# This should be implemented in the subclasses.
|
197
|
+
end
|
198
|
+
|
199
|
+
|
200
|
+
def run_with_args(segment, args, options)
|
201
|
+
if args.empty?
|
202
|
+
if @invocations > 1
|
203
|
+
if options.empty?
|
204
|
+
segment.send :run, @input
|
205
|
+
else
|
206
|
+
segment.send :run, @input, options
|
207
|
+
end
|
208
|
+
else
|
209
|
+
if options.empty?
|
210
|
+
segment.send :run
|
211
|
+
else
|
212
|
+
segment.send :run, options
|
213
|
+
end
|
214
|
+
end
|
215
|
+
else
|
216
|
+
if options.empty?
|
217
|
+
segment.send :run, *args
|
218
|
+
else
|
219
|
+
segment.send :run, *args, options
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
|
225
|
+
def dispatch(segment, output_file, *args)
|
226
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
227
|
+
|
228
|
+
output = if segment.method(:run).arity > 0 # Optional arguments result in negative arity.
|
229
|
+
run_with_args(segment, args, options)
|
230
|
+
elsif segment.method(:run).arity < 0
|
231
|
+
run_with_args(segment, args, options)
|
232
|
+
else
|
233
|
+
segment.send :run
|
234
|
+
end
|
235
|
+
|
236
|
+
if output_file
|
237
|
+
File.open(output_file, "w") do |f|
|
238
|
+
f.write({:data => output}.to_yaml)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
if serial?
|
243
|
+
@output = output
|
244
|
+
elsif parallel?
|
245
|
+
@thread_lock.synchronize do
|
246
|
+
@output << output
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
|
252
|
+
def segment_cache(segment)
|
253
|
+
File.join(@dir, "#{description(segment.class)}.yaml") if segment.dir
|
254
|
+
end
|
255
|
+
|
256
|
+
|
257
|
+
def child(type, args=nil)
|
258
|
+
order = if type == :serial
|
259
|
+
@serial_count += 1
|
260
|
+
"#{@order}S#{@serial_count}"
|
261
|
+
elsif type == :parallel
|
262
|
+
@parallel_count += 1
|
263
|
+
"#{@order}P#{@parallel_count}"
|
264
|
+
end
|
265
|
+
|
266
|
+
Pipeline.new(@dir, :type => type, :input => args || @input, :parent => self, :order => order, :started_at => started_at)
|
267
|
+
end
|
268
|
+
|
269
|
+
|
270
|
+
def underscore(string)
|
271
|
+
string.gsub(/::/, '/').
|
272
|
+
gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2').
|
273
|
+
gsub(/([a-z\d])([A-Z])/, '\1_\2').
|
274
|
+
tr("-", "_").downcase
|
275
|
+
end
|
276
|
+
|
277
|
+
|
278
|
+
def lock
|
279
|
+
return if @dir.nil?
|
280
|
+
|
281
|
+
lock = File.join @dir, 'lock'
|
282
|
+
if !@options[:force] && File.exists?(lock)
|
283
|
+
raise "Another instance of the pipelines seems to be running.\nPlease remove #{lock} if that is not the case."
|
284
|
+
end
|
285
|
+
|
286
|
+
File.open(lock, 'w') do |f|
|
287
|
+
f.write $$
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
|
292
|
+
def unlock
|
293
|
+
unless @dir.nil?
|
294
|
+
lock = File.join @dir, 'lock'
|
295
|
+
File.delete lock
|
296
|
+
end
|
297
|
+
|
298
|
+
@ended_at = Time.now
|
299
|
+
end
|
300
|
+
|
301
|
+
|
302
|
+
def set_stats(values)
|
303
|
+
@thread_lock.synchronize do
|
304
|
+
@stats[@name] = values
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
|
309
|
+
def get_stats
|
310
|
+
@thread_lock.synchronize do
|
311
|
+
@stats[@name]
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pipelines
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sujoy Gupta
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-09-25 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A simple way to build a pipeline of tasks. These tasks can be configured
|
14
|
+
to run in serial, parallel or any combination thereof.
|
15
|
+
email: sujoyg@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/pipelines.rb
|
21
|
+
homepage: https://github.com/sujoyg/pipelines
|
22
|
+
licenses: []
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubyforge_project:
|
40
|
+
rubygems_version: 2.1.4
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: Pipelines
|
44
|
+
test_files: []
|