pipelines 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/pipelines.rb +314 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 616bf7c8beb7ad8ba7f11cce119849f592fb3f74
4
+ data.tar.gz: 52bec1e786aeb7f9a7594c7e295001ab9e7ffec1
5
+ SHA512:
6
+ metadata.gz: 8196a97dcb6f7116527af650626054f9bb81b1c31af55dba17b317b025e76ea7beac0d297c77aa9857edf1faf6a23ef4b498323cbfcd5301700155943cb629e2
7
+ data.tar.gz: a426d7a00fd930213491055ffffd293dba7fbce8c04f454773cced30cc2977088d54899b1b53a7d53c21b0304a9a4739fec9631cc92ac947c8f3952f87dd2e5a
data/lib/pipelines.rb ADDED
@@ -0,0 +1,314 @@
1
+ require 'yaml'
2
+ require 'monitor'
3
+
4
+ class Pipeline
5
+ attr :dir
6
+ attr :ended_at
7
+ attr :exception
8
+ attr :invocations
9
+ attr :name
10
+ attr :input
11
+ attr :output
12
+ attr :started_at
13
+ attr :stats
14
+ attr :thread_lock
15
+ attr :threads
16
+ attr :topdir
17
+
18
+ def initialize(*args)
19
+ options = args.last.is_a?(Hash) ? args.pop : {}
20
+ dir = args.first
21
+
22
+ @topdir = @parent ? @parent.topdir : dir
23
+ @dir = dir
24
+ @type = options.delete(:type) || :serial
25
+ @parent = options.delete(:parent) # This is nil only for the top level pipeline.
26
+
27
+ @serial_count = 0
28
+ @parallel_count = 0
29
+
30
+ @thread_lock = @parent ? @parent.thread_lock : Monitor.new
31
+ @stats = @parent ? @parent.stats : {}
32
+
33
+ @name = underscore self.class.name.split('::')[-1]
34
+ @order = options.delete(:order)
35
+
36
+ @input = options.delete(:input)
37
+ @output = serial? ? nil : []
38
+
39
+ @threads = []
40
+ Thread.abort_on_exception = true
41
+
42
+ @options = options
43
+ @step = nil
44
+ @ended_at = nil
45
+ @started_at = options[:started_at] || Time.now
46
+ @invocations = 0
47
+
48
+ unless @dir.nil?
49
+ Dir.mkdir(@dir) unless Dir.exists?(@dir)
50
+ end
51
+
52
+ if @parent.nil? # This is the top level pipeline.
53
+ class << self
54
+ alias_method :unlocked_run, :run
55
+
56
+ def run(*args, &block)
57
+ lock
58
+ output = unlocked_run(*args, &block)
59
+ unlock
60
+ notify
61
+
62
+ output
63
+ end
64
+ end
65
+ end
66
+ end
67
+
68
+
69
+ def serial(args=nil, &block)
70
+ pipeline(:serial, args, &block)
71
+ end
72
+
73
+
74
+ def parallel(args=nil, &block)
75
+ pipeline(:parallel, args, &block)
76
+ end
77
+
78
+
79
+ def invoke(klass, *args)
80
+ @invocations += 1
81
+
82
+ dir = File.join(@dir, "#{description(klass)}") unless @dir.nil?
83
+ segment = klass.new dir, :order => @order, :parent => self
84
+
85
+ output_file = segment_cache segment
86
+ if output_file && File.exists?(output_file)
87
+ segment.puts "\033[0;35mSkipping\033[0m"
88
+ output = YAML.load(File.read(output_file))[:data]
89
+
90
+ if serial?
91
+ @output = output
92
+ @input = output
93
+ elsif parallel?
94
+ @thread_lock.synchronize do
95
+ @output << output
96
+ end
97
+ end
98
+ else
99
+ segment.puts "\033[0;32mRunning\033[0m"
100
+
101
+ if serial?
102
+ dispatch(segment, output_file, *args)
103
+ @input = @output
104
+ elsif parallel?
105
+ thread = Thread.new do
106
+ dispatch(segment, output_file, *args)
107
+ end
108
+ @threads << thread
109
+ end
110
+ end
111
+ end
112
+
113
+
114
+ def description(klass)
115
+ if @order.nil?
116
+ "#{@invocations}-#{underscore(klass.name.split('::')[-1])}"
117
+ else
118
+ "#{@order}-#{@invocations}-#{underscore(klass.name.split('::')[-1])}"
119
+ end
120
+ end
121
+
122
+
123
+ def puts(string='')
124
+ @thread_lock.synchronize do
125
+ unlocked_puts(string)
126
+ end
127
+
128
+ nil # Behave like Kernel.puts
129
+ end
130
+
131
+
132
+ def unlocked_puts(string='')
133
+ if self.class == Pipeline || @parent.nil? # If @parent is missing, it is in the top level block.
134
+ if @order
135
+ Kernel.puts "\033[32m[#{@order}][#{@invocations}]\033[0m #{string}"
136
+ else
137
+ Kernel.puts "\033[33m[#{@invocations}]\033[0m #{string}"
138
+ end
139
+ else
140
+ if @order
141
+ Kernel.puts "\033[32m[#{@order}][#{@name}][#{@parent.invocations}]\033[0m #{string}"
142
+ else
143
+ Kernel.puts "\033[32m[#{@name}][#{@parent.invocations}]\033[0m #{string}"
144
+ end
145
+ end
146
+
147
+ STDOUT.flush
148
+ end
149
+
150
+
151
+ private
152
+
153
+ def serial?
154
+ @type == :serial
155
+ end
156
+
157
+
158
+ def parallel?
159
+ @type == :parallel
160
+ end
161
+
162
+
163
+ def pipeline(mode, args=nil, &block)
164
+ begin
165
+ if parallel? # When inside parallel.
166
+ thread = Thread.new do
167
+ pipeline = child(mode, args)
168
+ pipeline.instance_eval &block
169
+ pipeline.threads.each { |thread| thread.join } # Could be a parallel block inside a parallel block.
170
+ @thread_lock.synchronize do
171
+ @output << (mode == :parallel ? pipeline.output.flatten(1) : pipeline.output)
172
+ end
173
+ end
174
+ @threads << thread
175
+ elsif serial?
176
+ pipeline = child(mode, args)
177
+ pipeline.instance_eval &block
178
+ pipeline.threads.each { |thread| thread.join }
179
+ @output = (mode == :parallel ? pipeline.output.flatten(1) : pipeline.output)
180
+ @input = @output
181
+ end
182
+ rescue Exception => e
183
+ @exception = e
184
+ notify
185
+ raise
186
+ end
187
+ end
188
+
189
+
190
+ def notify
191
+ # This should be implemented in the subclasses.
192
+ end
193
+
194
+
195
+ def run
196
+ # This should be implemented in the subclasses.
197
+ end
198
+
199
+
200
+ def run_with_args(segment, args, options)
201
+ if args.empty?
202
+ if @invocations > 1
203
+ if options.empty?
204
+ segment.send :run, @input
205
+ else
206
+ segment.send :run, @input, options
207
+ end
208
+ else
209
+ if options.empty?
210
+ segment.send :run
211
+ else
212
+ segment.send :run, options
213
+ end
214
+ end
215
+ else
216
+ if options.empty?
217
+ segment.send :run, *args
218
+ else
219
+ segment.send :run, *args, options
220
+ end
221
+ end
222
+ end
223
+
224
+
225
+ def dispatch(segment, output_file, *args)
226
+ options = args.last.is_a?(Hash) ? args.pop : {}
227
+
228
+ output = if segment.method(:run).arity > 0 # Optional arguments result in negative arity.
229
+ run_with_args(segment, args, options)
230
+ elsif segment.method(:run).arity < 0
231
+ run_with_args(segment, args, options)
232
+ else
233
+ segment.send :run
234
+ end
235
+
236
+ if output_file
237
+ File.open(output_file, "w") do |f|
238
+ f.write({:data => output}.to_yaml)
239
+ end
240
+ end
241
+
242
+ if serial?
243
+ @output = output
244
+ elsif parallel?
245
+ @thread_lock.synchronize do
246
+ @output << output
247
+ end
248
+ end
249
+ end
250
+
251
+
252
+ def segment_cache(segment)
253
+ File.join(@dir, "#{description(segment.class)}.yaml") if segment.dir
254
+ end
255
+
256
+
257
+ def child(type, args=nil)
258
+ order = if type == :serial
259
+ @serial_count += 1
260
+ "#{@order}S#{@serial_count}"
261
+ elsif type == :parallel
262
+ @parallel_count += 1
263
+ "#{@order}P#{@parallel_count}"
264
+ end
265
+
266
+ Pipeline.new(@dir, :type => type, :input => args || @input, :parent => self, :order => order, :started_at => started_at)
267
+ end
268
+
269
+
270
+ def underscore(string)
271
+ string.gsub(/::/, '/').
272
+ gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2').
273
+ gsub(/([a-z\d])([A-Z])/, '\1_\2').
274
+ tr("-", "_").downcase
275
+ end
276
+
277
+
278
+ def lock
279
+ return if @dir.nil?
280
+
281
+ lock = File.join @dir, 'lock'
282
+ if !@options[:force] && File.exists?(lock)
283
+ raise "Another instance of the pipelines seems to be running.\nPlease remove #{lock} if that is not the case."
284
+ end
285
+
286
+ File.open(lock, 'w') do |f|
287
+ f.write $$
288
+ end
289
+ end
290
+
291
+
292
+ def unlock
293
+ unless @dir.nil?
294
+ lock = File.join @dir, 'lock'
295
+ File.delete lock
296
+ end
297
+
298
+ @ended_at = Time.now
299
+ end
300
+
301
+
302
+ def set_stats(values)
303
+ @thread_lock.synchronize do
304
+ @stats[@name] = values
305
+ end
306
+ end
307
+
308
+
309
+ def get_stats
310
+ @thread_lock.synchronize do
311
+ @stats[@name]
312
+ end
313
+ end
314
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pipelines
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Sujoy Gupta
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-25 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A simple way to build a pipeline of tasks. These tasks can be configured
14
+ to run in serial, parallel or any combination thereof.
15
+ email: sujoyg@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/pipelines.rb
21
+ homepage: https://github.com/sujoyg/pipelines
22
+ licenses: []
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.1.4
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Pipelines
44
+ test_files: []