azkaban-rb 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/azkaban-rb.gemspec +1 -0
- data/example/Rakefile +9 -2
- data/lib/azkaban-rb.rb +1 -0
- data/lib/azkaban-rb/tasks.rb +5 -1
- data/lib/azkaban-rb/version.rb +1 -1
- data/lib/azkaban-rb/visualization.rb +221 -0
- metadata +53 -55
data/azkaban-rb.gemspec
CHANGED
data/example/Rakefile
CHANGED
@@ -23,7 +23,7 @@ task :clean_job_conf do
|
|
23
23
|
end
|
24
24
|
|
25
25
|
props :base do
|
26
|
-
set "udf.import.list" => "oink.,
|
26
|
+
set "udf.import.list" => "oink.,org.apache.pig.piggybank."
|
27
27
|
set "hadoop.job.ugi" => "#{@@user_name},hadoop"
|
28
28
|
set "hdfs.default.classpath.dir" => config["hdfs_classpath"]
|
29
29
|
set "jvm.args" => config["jvm_args"] if config["jvm_args"]
|
@@ -72,6 +72,12 @@ end
|
|
72
72
|
|
73
73
|
task :default => :zip
|
74
74
|
|
75
|
+
desc "Visualize the data flow (requires GraphViz installed)"
|
76
|
+
task :visualize do
|
77
|
+
rakeGraph = RakeGraph.new
|
78
|
+
rakeGraph.visualize("ExampleAzkabanDataflow", "example_azkaban_dataflow.png")
|
79
|
+
end
|
80
|
+
|
75
81
|
# Create a run task for each pig job so we can run using Rake. Parameter substituion is done automatically.
|
76
82
|
Rake.application.tasks.find_all do |task|
|
77
83
|
if task.job && task.job.instance_of?(Azkaban::PigJob)
|
@@ -82,4 +88,5 @@ Rake.application.tasks.find_all do |task|
|
|
82
88
|
`bin/pig #{parameters} #{script}`
|
83
89
|
end
|
84
90
|
end
|
85
|
-
end
|
91
|
+
end
|
92
|
+
|
data/lib/azkaban-rb.rb
CHANGED
data/lib/azkaban-rb/tasks.rb
CHANGED
@@ -63,7 +63,7 @@ module Azkaban
|
|
63
63
|
HTTP::Message.mime_type_handler = Proc.new { |path| Azkaban::mime_type_handler(path) }
|
64
64
|
|
65
65
|
class JobFile
|
66
|
-
attr_reader :read_locks, :write_locks, :task, :
|
66
|
+
attr_reader :read_locks, :write_locks, :task, :uses_arg
|
67
67
|
|
68
68
|
@output_dir = "conf/"
|
69
69
|
|
@@ -154,6 +154,7 @@ module Azkaban
|
|
154
154
|
end
|
155
155
|
|
156
156
|
def uses(name)
|
157
|
+
@uses_arg = name
|
157
158
|
set "pig.script"=>name
|
158
159
|
end
|
159
160
|
|
@@ -174,6 +175,7 @@ module Azkaban
|
|
174
175
|
end
|
175
176
|
|
176
177
|
def uses(name)
|
178
|
+
@uses_arg = name
|
177
179
|
set "job.class"=>name
|
178
180
|
end
|
179
181
|
end
|
@@ -185,6 +187,7 @@ module Azkaban
|
|
185
187
|
end
|
186
188
|
|
187
189
|
def uses(name)
|
190
|
+
@uses_arg = name
|
188
191
|
set "java.class"=>name
|
189
192
|
end
|
190
193
|
end
|
@@ -196,6 +199,7 @@ module Azkaban
|
|
196
199
|
end
|
197
200
|
|
198
201
|
def uses(text)
|
202
|
+
@uses_arg = text
|
199
203
|
set "command"=>text
|
200
204
|
end
|
201
205
|
end
|
data/lib/azkaban-rb/version.rb
CHANGED
@@ -0,0 +1,221 @@
|
|
1
|
+
require 'graphviz_r'
|
2
|
+
|
3
|
+
class RakeGraph
|
4
|
+
attr_reader :tasks
|
5
|
+
|
6
|
+
def initialize(namespaces = nil)
|
7
|
+
@namespaces = namespaces
|
8
|
+
@tasks = {}
|
9
|
+
Rake.application.tasks.find_all{ |task| (not task.job.nil?)}.each do |task|
|
10
|
+
tasks[RakeGraph.task_name(task)] = task if (task.job.read_locks.size + task.job.write_locks.size) > 0
|
11
|
+
end
|
12
|
+
@nodes = {}
|
13
|
+
@edges = []
|
14
|
+
construct_graph()
|
15
|
+
end
|
16
|
+
|
17
|
+
def RakeGraph.task_name(task)
|
18
|
+
task_name = "TASK#{task}"
|
19
|
+
task_name = task_name.gsub(/[^0-9a-z ]/i, '')
|
20
|
+
return task_name
|
21
|
+
end
|
22
|
+
|
23
|
+
def RakeGraph.data_name(name)
|
24
|
+
name = "DATA"+name.gsub(/[^0-9a-z ]/i, '')
|
25
|
+
return name
|
26
|
+
end
|
27
|
+
|
28
|
+
def task_in_namespace(task)
|
29
|
+
return true if @namespaces.nil? or @namespaces.size == 0
|
30
|
+
return (task.scope & @namespaces).size > 0
|
31
|
+
end
|
32
|
+
|
33
|
+
def find_prereq(task, prereq)
|
34
|
+
scopes = Array.new(task.scope)
|
35
|
+
while prereq.start_with? '^'
|
36
|
+
scopes.pop
|
37
|
+
prereq.slice!(0)
|
38
|
+
end
|
39
|
+
return RakeGraph.task_name(scopes.join('')+prereq)
|
40
|
+
end
|
41
|
+
|
42
|
+
def construct_graph()
|
43
|
+
# first add all of the task nodes
|
44
|
+
@tasks.each do |task_name, task|
|
45
|
+
next unless task_in_namespace(task)
|
46
|
+
node = TaskNode.new(task)
|
47
|
+
@nodes[node.name] = node
|
48
|
+
end
|
49
|
+
|
50
|
+
# now add all of the edges and data nodes
|
51
|
+
data_nodes = {}
|
52
|
+
@nodes.each do |name, node|
|
53
|
+
task = node.task
|
54
|
+
# find all prereq tasks
|
55
|
+
# task.prerequisites.each do |prereq|
|
56
|
+
# prereq = find_prereq(task, prereq)
|
57
|
+
# next unless @nodes.has_key?(prereq)
|
58
|
+
# @edges << TaskEdge.new(prereq, node.name)
|
59
|
+
# end
|
60
|
+
# find all data reads
|
61
|
+
task.job.read_locks.each do |read_lock|
|
62
|
+
data_name = RakeGraph.data_name(read_lock)
|
63
|
+
data_nodes[data_name] = DataNode.new(read_lock) unless data_nodes.has_key? data_name
|
64
|
+
@edges << DataEdge.new(data_name, node.name)
|
65
|
+
end
|
66
|
+
# find all data writes
|
67
|
+
task.job.write_locks.each do |write_lock|
|
68
|
+
data_name = RakeGraph.data_name(write_lock)
|
69
|
+
data_nodes[data_name] = DataNode.new(write_lock) unless data_nodes.has_key? data_name
|
70
|
+
@edges << DataEdge.new(node.name, data_name)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
data_nodes.each do |key, value|
|
74
|
+
@nodes[key] = value
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
class Node
|
79
|
+
attr_reader :name, :type
|
80
|
+
|
81
|
+
def initialize(name, type)
|
82
|
+
@name = name
|
83
|
+
@type = type
|
84
|
+
end
|
85
|
+
|
86
|
+
def fontcolor
|
87
|
+
return '#000000'
|
88
|
+
end
|
89
|
+
|
90
|
+
def to_s
|
91
|
+
return "#{@type}: #{@name}"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
class TaskNode < Node
|
96
|
+
attr_reader :task
|
97
|
+
|
98
|
+
def initialize(task)
|
99
|
+
super(RakeGraph.task_name(task), task.job.class.to_s)
|
100
|
+
@task = task
|
101
|
+
end
|
102
|
+
|
103
|
+
def label
|
104
|
+
label = "<#{@task}<br/>#{@task.job.uses_arg}>"
|
105
|
+
return label.to_sym
|
106
|
+
end
|
107
|
+
|
108
|
+
def shape
|
109
|
+
return :ellipse
|
110
|
+
end
|
111
|
+
|
112
|
+
def fillcolor
|
113
|
+
case @type
|
114
|
+
when 'Azkaban::PigJob'
|
115
|
+
return '#e7a5a5'
|
116
|
+
when 'Azkaban::JavaJob'
|
117
|
+
return '#E7C6A5'
|
118
|
+
when 'Azkaban::CommandJob'
|
119
|
+
return '#e7e6a5'
|
120
|
+
end
|
121
|
+
return ""
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
class DataNode < Node
|
126
|
+
attr_reader :filename
|
127
|
+
|
128
|
+
def initialize(filename)
|
129
|
+
super(RakeGraph.data_name(filename), "data")
|
130
|
+
@filename = filename
|
131
|
+
end
|
132
|
+
|
133
|
+
def label
|
134
|
+
label = @filename
|
135
|
+
return "<#{label}>".to_sym
|
136
|
+
end
|
137
|
+
|
138
|
+
def shape
|
139
|
+
return :box
|
140
|
+
end
|
141
|
+
|
142
|
+
def fillcolor
|
143
|
+
return '#d2e3f3'
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
class Edge
|
148
|
+
attr_reader :source, :dest, :type
|
149
|
+
|
150
|
+
def initialize(source, dest)
|
151
|
+
@source = source
|
152
|
+
@dest = dest
|
153
|
+
end
|
154
|
+
|
155
|
+
def to_s
|
156
|
+
return "#{source} >> #{dest}"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
class TaskEdge < Edge
|
161
|
+
def initialize(source, dest)
|
162
|
+
super(source, dest)
|
163
|
+
@type = "task"
|
164
|
+
end
|
165
|
+
|
166
|
+
def style
|
167
|
+
return :dotted
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
class DataEdge < Edge
|
172
|
+
def initialize(source, dest)
|
173
|
+
super(source, dest)
|
174
|
+
@type = "data"
|
175
|
+
end
|
176
|
+
|
177
|
+
def style
|
178
|
+
:solid
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def visualize(name, output_file)
|
183
|
+
g = GraphvizR.new name
|
184
|
+
g.graph[:label => name]
|
185
|
+
add_nodes(g)
|
186
|
+
add_edges(g)
|
187
|
+
g.output output_file
|
188
|
+
end
|
189
|
+
|
190
|
+
def add_nodes(g)
|
191
|
+
@nodes.each do |name, node|
|
192
|
+
g[name] [:label => @label_block.nil? ? node.label : @label_block.call(node),
|
193
|
+
:shape => @shape_block.nil? ? node.shape : @shape_block.call(node),
|
194
|
+
:fillcolor => @fillcolor_block.nil? ? node.fillcolor : @fillcolor_block.call(node),
|
195
|
+
:style => :filled,
|
196
|
+
:fontcolor => @fontcolor_block.nil? ? node.fontcolor : @fontcolor_block.call(node)]
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def add_edges(g)
|
201
|
+
@edges.each do |edge|
|
202
|
+
(g[edge.source]>>g[edge.dest])[:style => edge.style]
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def set_label(&block)
|
207
|
+
@label_block = block
|
208
|
+
end
|
209
|
+
|
210
|
+
def set_fillcolor(&block)
|
211
|
+
@fillcolor_block = block
|
212
|
+
end
|
213
|
+
|
214
|
+
def set_fontcolor(&block)
|
215
|
+
@fontcolor_block = block
|
216
|
+
end
|
217
|
+
|
218
|
+
def set_shape(&block)
|
219
|
+
@shape_block = block
|
220
|
+
end
|
221
|
+
end
|
metadata
CHANGED
@@ -1,41 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: azkaban-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 19
|
5
4
|
prerelease:
|
6
|
-
|
7
|
-
- 0
|
8
|
-
- 0
|
9
|
-
- 6
|
10
|
-
version: 0.0.6
|
5
|
+
version: 0.0.7
|
11
6
|
platform: ruby
|
12
7
|
authors:
|
13
|
-
- Matt Hayes
|
8
|
+
- Matt Hayes
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
12
|
|
18
|
-
date: 2011-08-
|
13
|
+
date: 2011-08-23 00:00:00 -07:00
|
14
|
+
default_executable:
|
19
15
|
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: httpclient
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 2.1.6
|
25
|
+
type: :runtime
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: GraphvizR
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ~>
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 0.5.1
|
36
|
+
type: :runtime
|
37
|
+
version_requirements: *id002
|
36
38
|
description: azkaban-rb allows Azkaban jobs to be modeled as rake tasks
|
37
39
|
email:
|
38
|
-
- matthew.terence.hayes@gmail.com
|
40
|
+
- matthew.terence.hayes@gmail.com
|
39
41
|
executables: []
|
40
42
|
|
41
43
|
extensions: []
|
@@ -43,22 +45,24 @@ extensions: []
|
|
43
45
|
extra_rdoc_files: []
|
44
46
|
|
45
47
|
files:
|
46
|
-
- .gitignore
|
47
|
-
- Gemfile
|
48
|
-
- Rakefile
|
49
|
-
- azkaban-rb.gemspec
|
50
|
-
- example/.gitignore
|
51
|
-
- example/Rakefile
|
52
|
-
- example/bin/pig
|
53
|
-
- example/data/input.txt
|
54
|
-
- example/example_config.yml
|
55
|
-
- example/hadoop-lzo-0.4.9.jar
|
56
|
-
- example/pig-0.9.0-core.jar
|
57
|
-
- example/src/test.pig
|
58
|
-
- example/src/test2.pig
|
59
|
-
- lib/azkaban-rb.rb
|
60
|
-
- lib/azkaban-rb/tasks.rb
|
61
|
-
- lib/azkaban-rb/version.rb
|
48
|
+
- .gitignore
|
49
|
+
- Gemfile
|
50
|
+
- Rakefile
|
51
|
+
- azkaban-rb.gemspec
|
52
|
+
- example/.gitignore
|
53
|
+
- example/Rakefile
|
54
|
+
- example/bin/pig
|
55
|
+
- example/data/input.txt
|
56
|
+
- example/example_config.yml
|
57
|
+
- example/hadoop-lzo-0.4.9.jar
|
58
|
+
- example/pig-0.9.0-core.jar
|
59
|
+
- example/src/test.pig
|
60
|
+
- example/src/test2.pig
|
61
|
+
- lib/azkaban-rb.rb
|
62
|
+
- lib/azkaban-rb/tasks.rb
|
63
|
+
- lib/azkaban-rb/version.rb
|
64
|
+
- lib/azkaban-rb/visualization.rb
|
65
|
+
has_rdoc: true
|
62
66
|
homepage: https://github.com/matthayes/azkaban-rb
|
63
67
|
licenses: []
|
64
68
|
|
@@ -66,29 +70,23 @@ post_install_message:
|
|
66
70
|
rdoc_options: []
|
67
71
|
|
68
72
|
require_paths:
|
69
|
-
- lib
|
73
|
+
- lib
|
70
74
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
75
|
none: false
|
72
76
|
requirements:
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
segments:
|
77
|
-
- 0
|
78
|
-
version: "0"
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: "0"
|
79
80
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
81
|
none: false
|
81
82
|
requirements:
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
segments:
|
86
|
-
- 0
|
87
|
-
version: "0"
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: "0"
|
88
86
|
requirements: []
|
89
87
|
|
90
88
|
rubyforge_project: azkaban-rb
|
91
|
-
rubygems_version: 1.
|
89
|
+
rubygems_version: 1.5.1
|
92
90
|
signing_key:
|
93
91
|
specification_version: 3
|
94
92
|
summary: Azkaban job generation using Ruby
|