pipelines 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/pipelines.rb +314 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 616bf7c8beb7ad8ba7f11cce119849f592fb3f74
4
+ data.tar.gz: 52bec1e786aeb7f9a7594c7e295001ab9e7ffec1
5
+ SHA512:
6
+ metadata.gz: 8196a97dcb6f7116527af650626054f9bb81b1c31af55dba17b317b025e76ea7beac0d297c77aa9857edf1faf6a23ef4b498323cbfcd5301700155943cb629e2
7
+ data.tar.gz: a426d7a00fd930213491055ffffd293dba7fbce8c04f454773cced30cc2977088d54899b1b53a7d53c21b0304a9a4739fec9631cc92ac947c8f3952f87dd2e5a
data/lib/pipelines.rb ADDED
@@ -0,0 +1,314 @@
1
+ require 'yaml'
2
+ require 'monitor'
3
+
4
+ class Pipeline
5
+ attr :dir
6
+ attr :ended_at
7
+ attr :exception
8
+ attr :invocations
9
+ attr :name
10
+ attr :input
11
+ attr :output
12
+ attr :started_at
13
+ attr :stats
14
+ attr :thread_lock
15
+ attr :threads
16
+ attr :topdir
17
+
18
+ def initialize(*args)
19
+ options = args.last.is_a?(Hash) ? args.pop : {}
20
+ dir = args.first
21
+
22
+ @topdir = @parent ? @parent.topdir : dir
23
+ @dir = dir
24
+ @type = options.delete(:type) || :serial
25
+ @parent = options.delete(:parent) # This is nil only for the top level pipeline.
26
+
27
+ @serial_count = 0
28
+ @parallel_count = 0
29
+
30
+ @thread_lock = @parent ? @parent.thread_lock : Monitor.new
31
+ @stats = @parent ? @parent.stats : {}
32
+
33
+ @name = underscore self.class.name.split('::')[-1]
34
+ @order = options.delete(:order)
35
+
36
+ @input = options.delete(:input)
37
+ @output = serial? ? nil : []
38
+
39
+ @threads = []
40
+ Thread.abort_on_exception = true
41
+
42
+ @options = options
43
+ @step = nil
44
+ @ended_at = nil
45
+ @started_at = options[:started_at] || Time.now
46
+ @invocations = 0
47
+
48
+ unless @dir.nil?
49
+ Dir.mkdir(@dir) unless Dir.exists?(@dir)
50
+ end
51
+
52
+ if @parent.nil? # This is the top level pipeline.
53
+ class << self
54
+ alias_method :unlocked_run, :run
55
+
56
+ def run(*args, &block)
57
+ lock
58
+ output = unlocked_run(*args, &block)
59
+ unlock
60
+ notify
61
+
62
+ output
63
+ end
64
+ end
65
+ end
66
+ end
67
+
68
+
69
+ def serial(args=nil, &block)
70
+ pipeline(:serial, args, &block)
71
+ end
72
+
73
+
74
+ def parallel(args=nil, &block)
75
+ pipeline(:parallel, args, &block)
76
+ end
77
+
78
+
79
+ def invoke(klass, *args)
80
+ @invocations += 1
81
+
82
+ dir = File.join(@dir, "#{description(klass)}") unless @dir.nil?
83
+ segment = klass.new dir, :order => @order, :parent => self
84
+
85
+ output_file = segment_cache segment
86
+ if output_file && File.exists?(output_file)
87
+ segment.puts "\033[0;35mSkipping\033[0m"
88
+ output = YAML.load(File.read(output_file))[:data]
89
+
90
+ if serial?
91
+ @output = output
92
+ @input = output
93
+ elsif parallel?
94
+ @thread_lock.synchronize do
95
+ @output << output
96
+ end
97
+ end
98
+ else
99
+ segment.puts "\033[0;32mRunning\033[0m"
100
+
101
+ if serial?
102
+ dispatch(segment, output_file, *args)
103
+ @input = @output
104
+ elsif parallel?
105
+ thread = Thread.new do
106
+ dispatch(segment, output_file, *args)
107
+ end
108
+ @threads << thread
109
+ end
110
+ end
111
+ end
112
+
113
+
114
+ def description(klass)
115
+ if @order.nil?
116
+ "#{@invocations}-#{underscore(klass.name.split('::')[-1])}"
117
+ else
118
+ "#{@order}-#{@invocations}-#{underscore(klass.name.split('::')[-1])}"
119
+ end
120
+ end
121
+
122
+
123
+ def puts(string='')
124
+ @thread_lock.synchronize do
125
+ unlocked_puts(string)
126
+ end
127
+
128
+ nil # Behave like Kernel.puts
129
+ end
130
+
131
+
132
+ def unlocked_puts(string='')
133
+ if self.class == Pipeline || @parent.nil? # If @parent is missing, it is in the top level block.
134
+ if @order
135
+ Kernel.puts "\033[32m[#{@order}][#{@invocations}]\033[0m #{string}"
136
+ else
137
+ Kernel.puts "\033[33m[#{@invocations}]\033[0m #{string}"
138
+ end
139
+ else
140
+ if @order
141
+ Kernel.puts "\033[32m[#{@order}][#{@name}][#{@parent.invocations}]\033[0m #{string}"
142
+ else
143
+ Kernel.puts "\033[32m[#{@name}][#{@parent.invocations}]\033[0m #{string}"
144
+ end
145
+ end
146
+
147
+ STDOUT.flush
148
+ end
149
+
150
+
151
+ private
152
+
153
+ def serial?
154
+ @type == :serial
155
+ end
156
+
157
+
158
+ def parallel?
159
+ @type == :parallel
160
+ end
161
+
162
+
163
+ def pipeline(mode, args=nil, &block)
164
+ begin
165
+ if parallel? # When inside parallel.
166
+ thread = Thread.new do
167
+ pipeline = child(mode, args)
168
+ pipeline.instance_eval &block
169
+ pipeline.threads.each { |thread| thread.join } # Could be a parallel block inside a parallel block.
170
+ @thread_lock.synchronize do
171
+ @output << (mode == :parallel ? pipeline.output.flatten(1) : pipeline.output)
172
+ end
173
+ end
174
+ @threads << thread
175
+ elsif serial?
176
+ pipeline = child(mode, args)
177
+ pipeline.instance_eval &block
178
+ pipeline.threads.each { |thread| thread.join }
179
+ @output = (mode == :parallel ? pipeline.output.flatten(1) : pipeline.output)
180
+ @input = @output
181
+ end
182
+ rescue Exception => e
183
+ @exception = e
184
+ notify
185
+ raise
186
+ end
187
+ end
188
+
189
+
190
+ def notify
191
+ # This should be implemented in the subclasses.
192
+ end
193
+
194
+
195
+ def run
196
+ # This should be implemented in the subclasses.
197
+ end
198
+
199
+
200
+ def run_with_args(segment, args, options)
201
+ if args.empty?
202
+ if @invocations > 1
203
+ if options.empty?
204
+ segment.send :run, @input
205
+ else
206
+ segment.send :run, @input, options
207
+ end
208
+ else
209
+ if options.empty?
210
+ segment.send :run
211
+ else
212
+ segment.send :run, options
213
+ end
214
+ end
215
+ else
216
+ if options.empty?
217
+ segment.send :run, *args
218
+ else
219
+ segment.send :run, *args, options
220
+ end
221
+ end
222
+ end
223
+
224
+
225
+ def dispatch(segment, output_file, *args)
226
+ options = args.last.is_a?(Hash) ? args.pop : {}
227
+
228
+ output = if segment.method(:run).arity > 0 # Optional arguments result in negative arity.
229
+ run_with_args(segment, args, options)
230
+ elsif segment.method(:run).arity < 0
231
+ run_with_args(segment, args, options)
232
+ else
233
+ segment.send :run
234
+ end
235
+
236
+ if output_file
237
+ File.open(output_file, "w") do |f|
238
+ f.write({:data => output}.to_yaml)
239
+ end
240
+ end
241
+
242
+ if serial?
243
+ @output = output
244
+ elsif parallel?
245
+ @thread_lock.synchronize do
246
+ @output << output
247
+ end
248
+ end
249
+ end
250
+
251
+
252
+ def segment_cache(segment)
253
+ File.join(@dir, "#{description(segment.class)}.yaml") if segment.dir
254
+ end
255
+
256
+
257
+ def child(type, args=nil)
258
+ order = if type == :serial
259
+ @serial_count += 1
260
+ "#{@order}S#{@serial_count}"
261
+ elsif type == :parallel
262
+ @parallel_count += 1
263
+ "#{@order}P#{@parallel_count}"
264
+ end
265
+
266
+ Pipeline.new(@dir, :type => type, :input => args || @input, :parent => self, :order => order, :started_at => started_at)
267
+ end
268
+
269
+
270
+ def underscore(string)
271
+ string.gsub(/::/, '/').
272
+ gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2').
273
+ gsub(/([a-z\d])([A-Z])/, '\1_\2').
274
+ tr("-", "_").downcase
275
+ end
276
+
277
+
278
+ def lock
279
+ return if @dir.nil?
280
+
281
+ lock = File.join @dir, 'lock'
282
+ if !@options[:force] && File.exists?(lock)
283
+ raise "Another instance of the pipelines seems to be running.\nPlease remove #{lock} if that is not the case."
284
+ end
285
+
286
+ File.open(lock, 'w') do |f|
287
+ f.write $$
288
+ end
289
+ end
290
+
291
+
292
+ def unlock
293
+ unless @dir.nil?
294
+ lock = File.join @dir, 'lock'
295
+ File.delete lock
296
+ end
297
+
298
+ @ended_at = Time.now
299
+ end
300
+
301
+
302
+ def set_stats(values)
303
+ @thread_lock.synchronize do
304
+ @stats[@name] = values
305
+ end
306
+ end
307
+
308
+
309
+ def get_stats
310
+ @thread_lock.synchronize do
311
+ @stats[@name]
312
+ end
313
+ end
314
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pipelines
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Sujoy Gupta
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-25 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A simple way to build a pipeline of tasks. These tasks can be configured
14
+ to run in serial, parallel or any combination thereof.
15
+ email: sujoyg@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/pipelines.rb
21
+ homepage: https://github.com/sujoyg/pipelines
22
+ licenses: []
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.1.4
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Pipelines
44
+ test_files: []