madcat 1.0.7

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.swp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in madcat.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ Copyright (C) 2012, moe@busyloop.net
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a
4
+ copy of this software and associated documentation files (the "Software"),
5
+ to deal in the Software without restriction, including without limitation
6
+ the rights to use, copy, modify, merge, publish, pulverize, distribute,
7
+ synergize, compost, defenestrate, sublicense, and/or sell copies of the
8
+ Software, and to permit persons to whom the Software is furnished to do
9
+ so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included
12
+ in all copies or substantial portions of the Software.
13
+
14
+ If the Author of the Software (the "Author") needs a place to crash and
15
+ you have a sofa available, you should maybe give the Author a break and
16
+ let him sleep on your couch.
17
+
18
+ If you are caught in a dire situation wherein you only have enough time
19
+ to save one person out of a group, and the Author is a member of that
20
+ group, you must save the Author.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
23
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO BLAH BLAH BLAH ISN'T IT FUNNY
24
+ HOW UPPER-CASE MAKES IT SOUND LIKE THE LICENSE IS ANGRY AND SHOUTING AT YOU.
25
+
data/README.md ADDED
@@ -0,0 +1,10 @@
1
+ # Madcat
2
+
3
+ Experimental JSON pipeline.
4
+
5
+
6
+ ## Installation
7
+
8
+ $ gem install madcat
9
+
10
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/bin/madcat ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'madcat'
4
+
5
+ MadCat::Cli.run!(ARGV)
6
+
@@ -0,0 +1,79 @@
1
+ module MadCat
2
+ # Common Helpers
3
+ module Helpers
4
+ module ClassMethods
5
+ #
6
+ require 'open4'
7
+ require 'hashie/mash'
8
+ # Run argv via Open4, optionally passing it stdin.
9
+ #
10
+ # @param [Array] argv Command
11
+ # @param [String] stdin Stdin data
12
+ def exec!(argv, stdin='')
13
+ stdout, stderr = '', ''
14
+ status = Open4::spawn argv, {'quiet' => true, 'raise' => true, 'stdin' => stdin, 'stdout' => stdout, 'stderr' => stderr}
15
+ Hashie::Mash.new({ :status => status.exitstatus, :stdin => stdin, :stdout => stdout, :stderr => stderr, :cmd => argv })
16
+ end
17
+
18
+ # Read stream line-wise, yield line.split(delim)
19
+ #
20
+ # @param [IO] stream Stream to read from
21
+ # @param [String] delim Field delimiter
22
+ def stream_as_array(stream=STDIN, delim="\t")
23
+ stream.each do |line|
24
+ yield line.split(delim)
25
+ end
26
+ end
27
+
28
+ # preamble code by Starr Horne;
29
+ # https://github.com/starrhorne/preamble/
30
+ require 'yaml'
31
+ def yaml_preamble(text)
32
+ preamble_lines = []
33
+ body_lines = []
34
+
35
+ state = :before_preamble
36
+
37
+ text.split("\n").each do |line|
38
+ stripped = line.strip
39
+
40
+ case state
41
+ when :before_preamble
42
+ new_state = case stripped
43
+ when "---"
44
+ :preamble
45
+ when ""
46
+ :before_preamble
47
+ else
48
+ raise "First line must begin with ---"
49
+ end
50
+
51
+ when :preamble
52
+ new_state = case stripped
53
+ when "---"
54
+ :after_preamble
55
+ else
56
+ preamble_lines << line
57
+ :preamble
58
+ end
59
+
60
+ when :after_preamble
61
+ new_state = :after_preamble
62
+ body_lines << line
63
+
64
+ else raise "Invalid State: #{ state }"
65
+ end
66
+
67
+ state = new_state
68
+ end
69
+ return [YAML::load(preamble_lines.join), body_lines.join]
70
+ end
71
+ end
72
+
73
+ def self.included base
74
+ base.extend ClassMethods
75
+ end
76
+
77
+ self.extend ClassMethods
78
+ end
79
+ end
@@ -0,0 +1,3 @@
1
+ module MadCat
2
+ VERSION = "1.0.7"
3
+ end
data/lib/madcat.rb ADDED
@@ -0,0 +1,345 @@
1
+ require "madcat/version"
2
+ require "madcat/helpers"
3
+
4
+ require "logger"
5
+ require "optix"
6
+ require "json"
7
+ require "yaml"
8
+ require "deep_merge"
9
+ require "chronic_duration"
10
+
11
+ module MadCat
12
+ CAT = <<EOCAT
13
+ ,
14
+ ,-. _,---._ __ / \\
15
+ / ) .-' `./ / \\
16
+ ( ( ,' `/ /|
17
+ \\ `-" \\'\\ / |
18
+ `. , \\ \\ / |
19
+ /`. ,'-`----Y |
20
+ ( ; | '
21
+ | ,-. ,-' | /
22
+ | | ( | #{("v"+MadCat::VERSION).rjust(10)} | /
23
+ ) | \\ `.___________|/
24
+ `--' `--'
25
+ EOCAT
26
+ module Cli
27
+ Optix::command do
28
+ params "[filename] [key=value] [...]"
29
+ #text "FIXME: docs needed"
30
+
31
+ CAT.split("\n").each { |line| text line }
32
+
33
+ # global options
34
+ opt :path, "Search path for pipeline executables", :default => '$MADCAT_PATH || $PATH'
35
+ opt :meta_key, "Insert metadata (read-only) on this key (OFF = no meta)", :default => '__pipeline__'
36
+ opt :key, "Pipeline key", :default => 'default'
37
+ opt :trace, "Trace all i/o", :default => false
38
+ opt :log_level, "Loglevel (fatal|error|warn|info|debug)", :default => 'warn', :short => :L
39
+ opt :ls, "List available pipelines to stdout and exit", :short => :l
40
+ opt :quiet_stdout, "Print leaf stdout to stdout", :default => false, :short => :Q
41
+ opt :quiet_stderr, "Do not print stderr to stderr", :default => false
42
+ opt :marker, "Insert informative marker above forwarded stdout/stderr", :default => false
43
+ opt :version, "Print version and exit"
44
+
45
+ trigger :version do
46
+ STDERR.puts "madcat v#{VERSION}"
47
+ end
48
+
49
+ exec do |cmd, opts, argv|
50
+ start_time = Time.now
51
+
52
+ logger = Logger.new(STDERR)
53
+ logger.level = Logger.const_get(opts[:log_level].upcase)
54
+ logger.level = Logger::DEBUG if opts[:trace]
55
+
56
+ # read input from ARGV[0] or STDIN
57
+ if ARGV[0] and !ARGV[0].include? '=' and File.readable? ARGV[0]
58
+ blob = File.read(ARGV.shift)
59
+ if blob[0..1] == '#!'
60
+ # remove shebang from input, if present
61
+ blob = blob.lines.to_a[1..-1].join
62
+ end
63
+ else
64
+ blob = STDIN.read()
65
+ end
66
+
67
+ # accept initial input as YAML or JSON
68
+ if blob[0..2] == '---'
69
+ blob = YAML.load(blob)
70
+ else
71
+ blob = JSON.parse(blob)
72
+ end
73
+
74
+ # the key that contains our pipeline must be called
75
+ # pipeline_$KEY (where $KEY defaults to 'default')
76
+ pipeline_key = "pipeline_#{opts[:key]}"
77
+
78
+ unless blob.include? pipeline_key
79
+ raise RuntimeError, "Pipeline key '#{pipeline_key}' not found in input"
80
+ end
81
+
82
+ unless blob[pipeline_key].is_a? Array
83
+ raise RuntimeError, "Value of pipeline key '#{pipeline_key}' is not an Array"
84
+ end
85
+
86
+ if blob["#{pipeline_key}_path"].is_a? String
87
+ opts[:path] = blob["#{pipeline_key}_path"]
88
+ end
89
+
90
+ if opts[:ls]
91
+ blob.keys.each do |key|
92
+ if key =~ /^pipeline_/ and not key =~ /^pipeline_.*_path$/
93
+ puts key.sub(/^pipeline_/,'')
94
+ end
95
+ end
96
+ exit 0
97
+ end
98
+
99
+ opts[:path] = ENV['MADCAT_PATH'] || ENV['PATH'] if opts[:path] == '$MADCAT_PATH || $PATH'
100
+ bin_path = opts[:path].split(':')
101
+
102
+ meta = { 'id' => pipeline_key,
103
+ 'current' => blob["pipeline_#{opts[:key]}"][0],
104
+ 'todo' => blob["pipeline_#{opts[:key]}"],
105
+ 'done' => [],
106
+ 'path' => bin_path }
107
+
108
+ # add read-only meta-data to the json?
109
+ meta_key = nil
110
+ if opts[:meta_key] != 'OFF'
111
+ meta_key = opts[:meta_key]
112
+ blob[meta_key] = meta
113
+ end
114
+
115
+ # key=value from ARGV
116
+ ARGV.each do |arg|
117
+ unless arg.include? '='
118
+ STDERR.puts "Error: Positional arguments must be given as key=value"
119
+ STDERR.puts "Invalid argument: #{arg}"
120
+ exit 2
121
+ end
122
+ k,v = arg.split('=', 2)
123
+
124
+ kk = k.split('.')
125
+ p = blob
126
+ kk.each_with_index do |e,i|
127
+ p = p[e] = i == kk.length-1 ? v : (p[e].is_a?(Hash) ? p[e] : {})
128
+ end
129
+ end
130
+
131
+ first_id = "#{meta['todo'][0].split(' ')[0]}:0"
132
+ results = MadCat::LitterBox.new(logger).run(blob.to_json, first_id, opts[:trace], 0, meta, meta_key)
133
+ end_time = Time.now
134
+
135
+ sum_jobs_duration = 0
136
+ error = false
137
+ results.each do |r|
138
+ indent = ' ' * (r[:depth] || 0)
139
+ line = "#{indent}#{r['id']} exit:#{r[:exit]} time:#{r['duration'].round(2)} stdin:#{r[:stdin].length} stdout:#{r[:stdout].length} stderr:#{r[:stderr].length}"
140
+ sum_jobs_duration += r['duration']
141
+
142
+ if r[:exit] != 0 or r.include? :exception_type
143
+ if r[:exception_type] == 'PipelineStop'
144
+ if 0 == r[:todo].length
145
+ line += " FIN"
146
+ else
147
+ line += " BREAK:#{r[:todo].length}"
148
+ end
149
+ logger.info line
150
+ else
151
+ line += " ERROR"
152
+ logger.error line
153
+ # be verbose, hoomins love that!
154
+ logger.fatal "ERROR: job '#{r['id']}' #{r[:exception_type]}, dumping..."
155
+ logger.fatal "-- DUMP for job #{r['id']} ------------------"
156
+
157
+ r.to_yaml.split("\n").each do |line|
158
+ next if line[0..2] == '---'
159
+ logger.fatal line
160
+ end
161
+ error = true
162
+ end
163
+ else
164
+ if 0 == r[:todo].length and 0 < r[:stdout].length
165
+ line += " FIN+"
166
+ unless opts[:quiet_stdout]
167
+ STDOUT.puts "-- STDOUT by #{r['id']} ---------------" if opts[:marker]
168
+ STDOUT.puts r[:stdout]
169
+ end
170
+ end
171
+ logger.info line
172
+ end
173
+
174
+ if opts[:quiet_stderr] == false and 0 < r[:stderr].length
175
+ STDERR.puts "-- STDERR by #{r['id']} ---------------" if opts[:marker]
176
+ STDERR.puts r[:stderr]
177
+ end
178
+ end
179
+
180
+ logger.info "pipeline_#{opts[:key]} finished, #{results.length} jobs"
181
+
182
+ total_duration = end_time - start_time
183
+ total_duration_text = ChronicDuration.output(total_duration.round(2), :format => :chrono)
184
+
185
+ avg_per_job = ChronicDuration.output((total_duration / results.length).round(2), :format => :chrono)
186
+
187
+ in_madcat = total_duration - sum_jobs_duration
188
+ in_madcat_text = ChronicDuration.output(in_madcat.round(2), :format => :chrono)
189
+ mad_prc = (in_madcat / total_duration * 100).round(2)
190
+ logger.info "time total: #{total_duration_text}, avg: #{avg_per_job}, in madcat: #{in_madcat_text} (#{mad_prc}%)"
191
+
192
+ exit 1 if error
193
+ exit 0
194
+ end
195
+ end
196
+
197
+ def self.run!(argv=ARGV)
198
+ Optix::invoke!(argv)
199
+ end
200
+ end
201
+
202
+ class LitterBox
203
+ class ErrorFromJSON < RuntimeError; end
204
+ class NonZeroExit < RuntimeError; end
205
+ class MalformedInput < RuntimeError; end
206
+ class MalformedOutput < RuntimeError; end
207
+ class PipelineStop < RuntimeError; end
208
+
209
+ def initialize(logger)
210
+ @logger = logger
211
+ end
212
+
213
+ def run(blob, id=:unknown, trace=false, depth=0, meta_in=nil, meta_key=nil)
214
+ begin
215
+ # scaffold the results-object as early as possible
216
+ # because this will also be returned and augmented
217
+ # when an exception occurs.
218
+ results = [{'id' => id, :exit => -1, :stdin => blob,
219
+ 'duration' => -1, :stdout => '', :stderr => ''}]
220
+
221
+ blob = JSON.parse(blob)
222
+
223
+ meta = Marshal.load(Marshal.dump(meta_in)) # deep_copy
224
+
225
+ todo = meta['todo']
226
+ raise RuntimeError, 'Pipeline has no jobs defined?!' if todo.length == 0
227
+
228
+ pipeline_id = meta['id']
229
+ job = todo.shift
230
+ cmd = job.split(' ')
231
+ job = cmd[0]
232
+
233
+ # find our executable
234
+ bin_path = meta['path']
235
+ bin = nil
236
+ bin_path.each do |e|
237
+ bin = File.join(e, cmd[0])
238
+ break if File.executable? bin
239
+ bin = nil
240
+ end
241
+
242
+ if bin.nil?
243
+ raise RuntimeError, "Executable #{job} not found in #{bin_path.join(':')}"
244
+ end
245
+
246
+ # update meta
247
+ meta['current'] = job
248
+
249
+ # add read-only metadata
250
+ if meta_key
251
+ blob[meta_key] = meta
252
+ end
253
+
254
+
255
+ # run our executable
256
+ blob['*'] = cmd[0]
257
+ stdin = blob.to_json
258
+ results[0][:stdin] = stdin # we modified it above
259
+ start_time = Time.now
260
+ p = MadCat::Helpers::exec!([bin] + cmd[1..-1], stdin)
261
+ duration = (Time.now - start_time).to_f
262
+ results[0].merge!({:bin => bin, :exit => p.status, 'duration' => duration,
263
+ :stdout => p.stdout, :stderr => p.stderr, :todo => todo,
264
+ :depth => depth })
265
+
266
+ if trace
267
+ d=0
268
+ meta['done'].each do |node|
269
+ l.debug "## #{' ' * d}#{node.keys[0]}"
270
+ d+=1
271
+ end
272
+ l.debug "## #{' ' * d}#{id} <--"
273
+
274
+ l.debug "#" * 70
275
+ # zomg HACK: append a newline to stdin/stdout/stderr to convince
276
+ # the yaml serializer to print them as block literals.
277
+ # a bit evil, but it makes the trace-output much more readable...
278
+ [:stdin, :stdout, :stderr].each do |key|
279
+ if 0 < results[0][key].length && results[0][key][-1] != "\n"
280
+ results[0][key] += "\n"
281
+ end
282
+ end
283
+
284
+ results[0].to_yaml.split("\n").each do |line|
285
+ next if line[0..2] == '---'
286
+ l.debug line
287
+ end
288
+ l.debug "#" * 70
289
+ end
290
+
291
+ # alert on exit code != 0
292
+ raise NonZeroExit, "#{p.status}" if p.status != 0
293
+
294
+ # no output? game over.
295
+ raise PipelineStop if p.stdout == ''
296
+
297
+ # parse output and wrap JSON errors
298
+ begin
299
+ blobs = JSON.parse(p.stdout)
300
+ rescue => e
301
+ raise MalformedOutput, "Could not parse JSON: #{e.message}" if 0 < todo.length
302
+ end
303
+ blobs = [blobs] unless blobs.is_a? Array
304
+
305
+ # raise on json-key 'err'
306
+ blobs.each do |blob|
307
+ if blob.include? 'err'
308
+ raise ErrorFromJSON, blob['err']
309
+ end
310
+ end
311
+
312
+ # oob mode
313
+ meta['done'] << { results[-1]['id'] => results[-1].select{|k,v| ['duration', :exit].include? k} }
314
+
315
+ # feed blob to next stage of the pipeline
316
+ i=0
317
+ blobs.each do |new_inner_blob|
318
+ break if 0 == todo.length
319
+
320
+ # store output blob under jobid
321
+ blob_ = Marshal.load(Marshal.dump(blob)) # deep_copy
322
+ blob_[job] ||= {}
323
+ blob_[job].deep_merge! new_inner_blob
324
+
325
+ got = run(blob_.to_json, "#{todo[0].split(' ')[0]}:#{i}", trace, depth+1, meta, meta_key)
326
+ results += got
327
+
328
+ i+=1
329
+ end
330
+ results
331
+ rescue => e
332
+ results[-1][:exception_type] = e.class.name.split('::')[-1]
333
+ results[-1][:exception_message] = e.message
334
+ results[-1][:exception_backtrace] = e.backtrace
335
+
336
+ # bubble up
337
+ results
338
+ end
339
+ end
340
+
341
+ def l
342
+ @logger
343
+ end
344
+ end
345
+ end
data/madcat.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/madcat/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Moe"]
6
+ gem.email = ["moe@busyloop.net"]
7
+ gem.description = %q{Katzeklo, Katzeklo...}
8
+ gem.summary = %q{...ja das macht die Katze froh}
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "madcat"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = MadCat::VERSION
17
+
18
+ gem.add_dependency 'optix'
19
+ gem.add_dependency 'open4'
20
+ gem.add_dependency 'hashie'
21
+ gem.add_dependency 'json'
22
+ gem.add_dependency 'chronic_duration'
23
+ gem.add_dependency 'deep_merge'
24
+ end
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: madcat
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.7
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Moe
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-13 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: optix
16
+ requirement: &22553000 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *22553000
25
+ - !ruby/object:Gem::Dependency
26
+ name: open4
27
+ requirement: &22552320 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *22552320
36
+ - !ruby/object:Gem::Dependency
37
+ name: hashie
38
+ requirement: &22551900 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *22551900
47
+ - !ruby/object:Gem::Dependency
48
+ name: json
49
+ requirement: &22551400 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: *22551400
58
+ - !ruby/object:Gem::Dependency
59
+ name: chronic_duration
60
+ requirement: &22550760 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :runtime
67
+ prerelease: false
68
+ version_requirements: *22550760
69
+ - !ruby/object:Gem::Dependency
70
+ name: deep_merge
71
+ requirement: &22550140 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: *22550140
80
+ description: Katzeklo, Katzeklo...
81
+ email:
82
+ - moe@busyloop.net
83
+ executables:
84
+ - madcat
85
+ extensions: []
86
+ extra_rdoc_files: []
87
+ files:
88
+ - .gitignore
89
+ - Gemfile
90
+ - LICENSE
91
+ - README.md
92
+ - Rakefile
93
+ - bin/madcat
94
+ - lib/madcat.rb
95
+ - lib/madcat/helpers.rb
96
+ - lib/madcat/version.rb
97
+ - madcat.gemspec
98
+ homepage: ''
99
+ licenses: []
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ none: false
112
+ requirements:
113
+ - - ! '>='
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ requirements: []
117
+ rubyforge_project:
118
+ rubygems_version: 1.8.10
119
+ signing_key:
120
+ specification_version: 3
121
+ summary: ! '...ja das macht die Katze froh'
122
+ test_files: []
123
+ has_rdoc: