madcat 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.swp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in madcat.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ Copyright (C) 2012, moe@busyloop.net
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a
4
+ copy of this software and associated documentation files (the "Software"),
5
+ to deal in the Software without restriction, including without limitation
6
+ the rights to use, copy, modify, merge, publish, pulverize, distribute,
7
+ synergize, compost, defenestrate, sublicense, and/or sell copies of the
8
+ Software, and to permit persons to whom the Software is furnished to do
9
+ so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included
12
+ in all copies or substantial portions of the Software.
13
+
14
+ If the Author of the Software (the "Author") needs a place to crash and
15
+ you have a sofa available, you should maybe give the Author a break and
16
+ let him sleep on your couch.
17
+
18
+ If you are caught in a dire situation wherein you only have enough time
19
+ to save one person out of a group, and the Author is a member of that
20
+ group, you must save the Author.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
23
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO BLAH BLAH BLAH ISN'T IT FUNNY
24
+ HOW UPPER-CASE MAKES IT SOUND LIKE THE LICENSE IS ANGRY AND SHOUTING AT YOU.
25
+
data/README.md ADDED
@@ -0,0 +1,10 @@
1
+ # Madcat
2
+
3
+ Experimental JSON pipeline.
4
+
5
+
6
+ ## Installation
7
+
8
+ $ gem install madcat
9
+
10
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/bin/madcat ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'madcat'
4
+
5
+ MadCat::Cli.run!(ARGV)
6
+
@@ -0,0 +1,79 @@
1
+ module MadCat
2
+ # Common Helpers
3
+ module Helpers
4
+ module ClassMethods
5
+ #
6
+ require 'open4'
7
+ require 'hashie/mash'
8
+ # Run argv via Open4, optionally passing it stdin.
9
+ #
10
+ # @param [Array] argv Command
11
+ # @param [String] stdin Stdin data
12
+ def exec!(argv, stdin='')
13
+ stdout, stderr = '', ''
14
+ status = Open4::spawn argv, {'quiet' => true, 'raise' => true, 'stdin' => stdin, 'stdout' => stdout, 'stderr' => stderr}
15
+ Hashie::Mash.new({ :status => status.exitstatus, :stdin => stdin, :stdout => stdout, :stderr => stderr, :cmd => argv })
16
+ end
17
+
18
+ # Read stream line-wise, yield line.split(delim)
19
+ #
20
+ # @param [IO] stream Stream to read from
21
+ # @param [String] delim Field delimiter
22
+ def stream_as_array(stream=STDIN, delim="\t")
23
+ stream.each do |line|
24
+ yield line.split(delim)
25
+ end
26
+ end
27
+
28
+ # preamble code by Starr Horne;
29
+ # https://github.com/starrhorne/preamble/
30
+ require 'yaml'
31
+ def yaml_preamble(text)
32
+ preamble_lines = []
33
+ body_lines = []
34
+
35
+ state = :before_preamble
36
+
37
+ text.split("\n").each do |line|
38
+ stripped = line.strip
39
+
40
+ case state
41
+ when :before_preamble
42
+ new_state = case stripped
43
+ when "---"
44
+ :preamble
45
+ when ""
46
+ :before_preamble
47
+ else
48
+ raise "First line must begin with ---"
49
+ end
50
+
51
+ when :preamble
52
+ new_state = case stripped
53
+ when "---"
54
+ :after_preamble
55
+ else
56
+ preamble_lines << line
57
+ :preamble
58
+ end
59
+
60
+ when :after_preamble
61
+ new_state = :after_preamble
62
+ body_lines << line
63
+
64
+ else raise "Invalid State: #{ state }"
65
+ end
66
+
67
+ state = new_state
68
+ end
69
+ return [YAML::load(preamble_lines.join), body_lines.join]
70
+ end
71
+ end
72
+
73
+ def self.included base
74
+ base.extend ClassMethods
75
+ end
76
+
77
+ self.extend ClassMethods
78
+ end
79
+ end
@@ -0,0 +1,3 @@
1
+ module MadCat
2
+ VERSION = "1.0.7"
3
+ end
data/lib/madcat.rb ADDED
@@ -0,0 +1,345 @@
1
+ require "madcat/version"
2
+ require "madcat/helpers"
3
+
4
+ require "logger"
5
+ require "optix"
6
+ require "json"
7
+ require "yaml"
8
+ require "deep_merge"
9
+ require "chronic_duration"
10
+
11
+ module MadCat
12
+ CAT = <<EOCAT
13
+ ,
14
+ ,-. _,---._ __ / \\
15
+ / ) .-' `./ / \\
16
+ ( ( ,' `/ /|
17
+ \\ `-" \\'\\ / |
18
+ `. , \\ \\ / |
19
+ /`. ,'-`----Y |
20
+ ( ; | '
21
+ | ,-. ,-' | /
22
+ | | ( | #{("v"+MadCat::VERSION).rjust(10)} | /
23
+ ) | \\ `.___________|/
24
+ `--' `--'
25
+ EOCAT
26
+ module Cli
27
+ Optix::command do
28
+ params "[filename] [key=value] [...]"
29
+ #text "FIXME: docs needed"
30
+
31
+ CAT.split("\n").each { |line| text line }
32
+
33
+ # global options
34
+ opt :path, "Search path for pipeline executables", :default => '$MADCAT_PATH || $PATH'
35
+ opt :meta_key, "Insert metadata (read-only) on this key (OFF = no meta)", :default => '__pipeline__'
36
+ opt :key, "Pipeline key", :default => 'default'
37
+ opt :trace, "Trace all i/o", :default => false
38
+ opt :log_level, "Loglevel (fatal|error|warn|info|debug)", :default => 'warn', :short => :L
39
+ opt :ls, "List available pipelines to stdout and exit", :short => :l
40
+ opt :quiet_stdout, "Print leaf stdout to stdout", :default => false, :short => :Q
41
+ opt :quiet_stderr, "Do not print stderr to stderr", :default => false
42
+ opt :marker, "Insert informative marker above forwarded stdout/stderr", :default => false
43
+ opt :version, "Print version and exit"
44
+
45
+ trigger :version do
46
+ STDERR.puts "madcat v#{VERSION}"
47
+ end
48
+
49
+ exec do |cmd, opts, argv|
50
+ start_time = Time.now
51
+
52
+ logger = Logger.new(STDERR)
53
+ logger.level = Logger.const_get(opts[:log_level].upcase)
54
+ logger.level = Logger::DEBUG if opts[:trace]
55
+
56
+ # read input from ARGV[0] or STDIN
57
+ if ARGV[0] and !ARGV[0].include? '=' and File.readable? ARGV[0]
58
+ blob = File.read(ARGV.shift)
59
+ if blob[0..1] == '#!'
60
+ # remove shebang from input, if present
61
+ blob = blob.lines.to_a[1..-1].join
62
+ end
63
+ else
64
+ blob = STDIN.read()
65
+ end
66
+
67
+ # accept initial input as YAML or JSON
68
+ if blob[0..2] == '---'
69
+ blob = YAML.load(blob)
70
+ else
71
+ blob = JSON.parse(blob)
72
+ end
73
+
74
+ # the key that contains our pipeline must be called
75
+ # pipeline_$KEY (where $KEY defaults to 'default')
76
+ pipeline_key = "pipeline_#{opts[:key]}"
77
+
78
+ unless blob.include? pipeline_key
79
+ raise RuntimeError, "Pipeline key '#{pipeline_key}' not found in input"
80
+ end
81
+
82
+ unless blob[pipeline_key].is_a? Array
83
+ raise RuntimeError, "Value of pipeline key '#{pipeline_key}' is not an Array"
84
+ end
85
+
86
+ if blob["#{pipeline_key}_path"].is_a? String
87
+ opts[:path] = blob["#{pipeline_key}_path"]
88
+ end
89
+
90
+ if opts[:ls]
91
+ blob.keys.each do |key|
92
+ if key =~ /^pipeline_/ and not key =~ /^pipeline_.*_path$/
93
+ puts key.sub(/^pipeline_/,'')
94
+ end
95
+ end
96
+ exit 0
97
+ end
98
+
99
+ opts[:path] = ENV['MADCAT_PATH'] || ENV['PATH'] if opts[:path] == '$MADCAT_PATH || $PATH'
100
+ bin_path = opts[:path].split(':')
101
+
102
+ meta = { 'id' => pipeline_key,
103
+ 'current' => blob["pipeline_#{opts[:key]}"][0],
104
+ 'todo' => blob["pipeline_#{opts[:key]}"],
105
+ 'done' => [],
106
+ 'path' => bin_path }
107
+
108
+ # add read-only meta-data to the json?
109
+ meta_key = nil
110
+ if opts[:meta_key] != 'OFF'
111
+ meta_key = opts[:meta_key]
112
+ blob[meta_key] = meta
113
+ end
114
+
115
+ # key=value from ARGV
116
+ ARGV.each do |arg|
117
+ unless arg.include? '='
118
+ STDERR.puts "Error: Positional arguments must be given as key=value"
119
+ STDERR.puts "Invalid argument: #{arg}"
120
+ exit 2
121
+ end
122
+ k,v = arg.split('=', 2)
123
+
124
+ kk = k.split('.')
125
+ p = blob
126
+ kk.each_with_index do |e,i|
127
+ p = p[e] = i == kk.length-1 ? v : (p[e].is_a?(Hash) ? p[e] : {})
128
+ end
129
+ end
130
+
131
+ first_id = "#{meta['todo'][0].split(' ')[0]}:0"
132
+ results = MadCat::LitterBox.new(logger).run(blob.to_json, first_id, opts[:trace], 0, meta, meta_key)
133
+ end_time = Time.now
134
+
135
+ sum_jobs_duration = 0
136
+ error = false
137
+ results.each do |r|
138
+ indent = ' ' * (r[:depth] || 0)
139
+ line = "#{indent}#{r['id']} exit:#{r[:exit]} time:#{r['duration'].round(2)} stdin:#{r[:stdin].length} stdout:#{r[:stdout].length} stderr:#{r[:stderr].length}"
140
+ sum_jobs_duration += r['duration']
141
+
142
+ if r[:exit] != 0 or r.include? :exception_type
143
+ if r[:exception_type] == 'PipelineStop'
144
+ if 0 == r[:todo].length
145
+ line += " FIN"
146
+ else
147
+ line += " BREAK:#{r[:todo].length}"
148
+ end
149
+ logger.info line
150
+ else
151
+ line += " ERROR"
152
+ logger.error line
153
+ # be verbose, hoomins love that!
154
+ logger.fatal "ERROR: job '#{r['id']}' #{r[:exception_type]}, dumping..."
155
+ logger.fatal "-- DUMP for job #{r['id']} ------------------"
156
+
157
+ r.to_yaml.split("\n").each do |line|
158
+ next if line[0..2] == '---'
159
+ logger.fatal line
160
+ end
161
+ error = true
162
+ end
163
+ else
164
+ if 0 == r[:todo].length and 0 < r[:stdout].length
165
+ line += " FIN+"
166
+ unless opts[:quiet_stdout]
167
+ STDOUT.puts "-- STDOUT by #{r['id']} ---------------" if opts[:marker]
168
+ STDOUT.puts r[:stdout]
169
+ end
170
+ end
171
+ logger.info line
172
+ end
173
+
174
+ if opts[:quiet_stderr] == false and 0 < r[:stderr].length
175
+ STDERR.puts "-- STDERR by #{r['id']} ---------------" if opts[:marker]
176
+ STDERR.puts r[:stderr]
177
+ end
178
+ end
179
+
180
+ logger.info "pipeline_#{opts[:key]} finished, #{results.length} jobs"
181
+
182
+ total_duration = end_time - start_time
183
+ total_duration_text = ChronicDuration.output(total_duration.round(2), :format => :chrono)
184
+
185
+ avg_per_job = ChronicDuration.output((total_duration / results.length).round(2), :format => :chrono)
186
+
187
+ in_madcat = total_duration - sum_jobs_duration
188
+ in_madcat_text = ChronicDuration.output(in_madcat.round(2), :format => :chrono)
189
+ mad_prc = (in_madcat / total_duration * 100).round(2)
190
+ logger.info "time total: #{total_duration_text}, avg: #{avg_per_job}, in madcat: #{in_madcat_text} (#{mad_prc}%)"
191
+
192
+ exit 1 if error
193
+ exit 0
194
+ end
195
+ end
196
+
197
+ def self.run!(argv=ARGV)
198
+ Optix::invoke!(argv)
199
+ end
200
+ end
201
+
202
+ class LitterBox
203
+ class ErrorFromJSON < RuntimeError; end
204
+ class NonZeroExit < RuntimeError; end
205
+ class MalformedInput < RuntimeError; end
206
+ class MalformedOutput < RuntimeError; end
207
+ class PipelineStop < RuntimeError; end
208
+
209
+ def initialize(logger)
210
+ @logger = logger
211
+ end
212
+
213
+ def run(blob, id=:unknown, trace=false, depth=0, meta_in=nil, meta_key=nil)
214
+ begin
215
+ # scaffold the results-object as early as possible
216
+ # because this will also be returned and augmented
217
+ # when an exception occurs.
218
+ results = [{'id' => id, :exit => -1, :stdin => blob,
219
+ 'duration' => -1, :stdout => '', :stderr => ''}]
220
+
221
+ blob = JSON.parse(blob)
222
+
223
+ meta = Marshal.load(Marshal.dump(meta_in)) # deep_copy
224
+
225
+ todo = meta['todo']
226
+ raise RuntimeError, 'Pipeline has no jobs defined?!' if todo.length == 0
227
+
228
+ pipeline_id = meta['id']
229
+ job = todo.shift
230
+ cmd = job.split(' ')
231
+ job = cmd[0]
232
+
233
+ # find our executable
234
+ bin_path = meta['path']
235
+ bin = nil
236
+ bin_path.each do |e|
237
+ bin = File.join(e, cmd[0])
238
+ break if File.executable? bin
239
+ bin = nil
240
+ end
241
+
242
+ if bin.nil?
243
+ raise RuntimeError, "Executable #{job} not found in #{bin_path.join(':')}"
244
+ end
245
+
246
+ # update meta
247
+ meta['current'] = job
248
+
249
+ # add read-only metadata
250
+ if meta_key
251
+ blob[meta_key] = meta
252
+ end
253
+
254
+
255
+ # run our executable
256
+ blob['*'] = cmd[0]
257
+ stdin = blob.to_json
258
+ results[0][:stdin] = stdin # we modified it above
259
+ start_time = Time.now
260
+ p = MadCat::Helpers::exec!([bin] + cmd[1..-1], stdin)
261
+ duration = (Time.now - start_time).to_f
262
+ results[0].merge!({:bin => bin, :exit => p.status, 'duration' => duration,
263
+ :stdout => p.stdout, :stderr => p.stderr, :todo => todo,
264
+ :depth => depth })
265
+
266
+ if trace
267
+ d=0
268
+ meta['done'].each do |node|
269
+ l.debug "## #{' ' * d}#{node.keys[0]}"
270
+ d+=1
271
+ end
272
+ l.debug "## #{' ' * d}#{id} <--"
273
+
274
+ l.debug "#" * 70
275
+ # zomg HACK: append a newline to stdin/stdout/stderr to convince
276
+ # the yaml serializer to print them as block literals.
277
+ # a bit evil, but it makes the trace-output much more readable...
278
+ [:stdin, :stdout, :stderr].each do |key|
279
+ if 0 < results[0][key].length && results[0][key][-1] != "\n"
280
+ results[0][key] += "\n"
281
+ end
282
+ end
283
+
284
+ results[0].to_yaml.split("\n").each do |line|
285
+ next if line[0..2] == '---'
286
+ l.debug line
287
+ end
288
+ l.debug "#" * 70
289
+ end
290
+
291
+ # alert on exit code != 0
292
+ raise NonZeroExit, "#{p.status}" if p.status != 0
293
+
294
+ # no output? game over.
295
+ raise PipelineStop if p.stdout == ''
296
+
297
+ # parse output and wrap JSON errors
298
+ begin
299
+ blobs = JSON.parse(p.stdout)
300
+ rescue => e
301
+ raise MalformedOutput, "Could not parse JSON: #{e.message}" if 0 < todo.length
302
+ end
303
+ blobs = [blobs] unless blobs.is_a? Array
304
+
305
+ # raise on json-key 'err'
306
+ blobs.each do |blob|
307
+ if blob.include? 'err'
308
+ raise ErrorFromJSON, blob['err']
309
+ end
310
+ end
311
+
312
+ # oob mode
313
+ meta['done'] << { results[-1]['id'] => results[-1].select{|k,v| ['duration', :exit].include? k} }
314
+
315
+ # feed blob to next stage of the pipeline
316
+ i=0
317
+ blobs.each do |new_inner_blob|
318
+ break if 0 == todo.length
319
+
320
+ # store output blob under jobid
321
+ blob_ = Marshal.load(Marshal.dump(blob)) # deep_copy
322
+ blob_[job] ||= {}
323
+ blob_[job].deep_merge! new_inner_blob
324
+
325
+ got = run(blob_.to_json, "#{todo[0].split(' ')[0]}:#{i}", trace, depth+1, meta, meta_key)
326
+ results += got
327
+
328
+ i+=1
329
+ end
330
+ results
331
+ rescue => e
332
+ results[-1][:exception_type] = e.class.name.split('::')[-1]
333
+ results[-1][:exception_message] = e.message
334
+ results[-1][:exception_backtrace] = e.backtrace
335
+
336
+ # bubble up
337
+ results
338
+ end
339
+ end
340
+
341
+ def l
342
+ @logger
343
+ end
344
+ end
345
+ end
data/madcat.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/madcat/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Moe"]
6
+ gem.email = ["moe@busyloop.net"]
7
+ gem.description = %q{Katzeklo, Katzeklo...}
8
+ gem.summary = %q{...ja das macht die Katze froh}
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "madcat"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = MadCat::VERSION
17
+
18
+ gem.add_dependency 'optix'
19
+ gem.add_dependency 'open4'
20
+ gem.add_dependency 'hashie'
21
+ gem.add_dependency 'json'
22
+ gem.add_dependency 'chronic_duration'
23
+ gem.add_dependency 'deep_merge'
24
+ end
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: madcat
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.7
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Moe
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-13 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: optix
16
+ requirement: &22553000 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *22553000
25
+ - !ruby/object:Gem::Dependency
26
+ name: open4
27
+ requirement: &22552320 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *22552320
36
+ - !ruby/object:Gem::Dependency
37
+ name: hashie
38
+ requirement: &22551900 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *22551900
47
+ - !ruby/object:Gem::Dependency
48
+ name: json
49
+ requirement: &22551400 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: *22551400
58
+ - !ruby/object:Gem::Dependency
59
+ name: chronic_duration
60
+ requirement: &22550760 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :runtime
67
+ prerelease: false
68
+ version_requirements: *22550760
69
+ - !ruby/object:Gem::Dependency
70
+ name: deep_merge
71
+ requirement: &22550140 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: *22550140
80
+ description: Katzeklo, Katzeklo...
81
+ email:
82
+ - moe@busyloop.net
83
+ executables:
84
+ - madcat
85
+ extensions: []
86
+ extra_rdoc_files: []
87
+ files:
88
+ - .gitignore
89
+ - Gemfile
90
+ - LICENSE
91
+ - README.md
92
+ - Rakefile
93
+ - bin/madcat
94
+ - lib/madcat.rb
95
+ - lib/madcat/helpers.rb
96
+ - lib/madcat/version.rb
97
+ - madcat.gemspec
98
+ homepage: ''
99
+ licenses: []
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ none: false
112
+ requirements:
113
+ - - ! '>='
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ requirements: []
117
+ rubyforge_project:
118
+ rubygems_version: 1.8.10
119
+ signing_key:
120
+ specification_version: 3
121
+ summary: ! '...ja das macht die Katze froh'
122
+ test_files: []
123
+ has_rdoc: