azkaban-rb 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/azkaban-rb.gemspec +1 -0
- data/example/Rakefile +9 -2
- data/lib/azkaban-rb.rb +1 -0
- data/lib/azkaban-rb/tasks.rb +5 -1
- data/lib/azkaban-rb/version.rb +1 -1
- data/lib/azkaban-rb/visualization.rb +221 -0
- metadata +53 -55
data/azkaban-rb.gemspec
CHANGED
data/example/Rakefile
CHANGED
@@ -23,7 +23,7 @@ task :clean_job_conf do
|
|
23
23
|
end
|
24
24
|
|
25
25
|
props :base do
|
26
|
-
set "udf.import.list" => "oink.,
|
26
|
+
set "udf.import.list" => "oink.,org.apache.pig.piggybank."
|
27
27
|
set "hadoop.job.ugi" => "#{@@user_name},hadoop"
|
28
28
|
set "hdfs.default.classpath.dir" => config["hdfs_classpath"]
|
29
29
|
set "jvm.args" => config["jvm_args"] if config["jvm_args"]
|
@@ -72,6 +72,12 @@ end
|
|
72
72
|
|
73
73
|
task :default => :zip
|
74
74
|
|
75
|
+
desc "Visualize the data flow (requires GraphViz installed)"
|
76
|
+
task :visualize do
|
77
|
+
rakeGraph = RakeGraph.new
|
78
|
+
rakeGraph.visualize("ExampleAzkabanDataflow", "example_azkaban_dataflow.png")
|
79
|
+
end
|
80
|
+
|
75
81
|
# Create a run task for each pig job so we can run using Rake. Parameter substituion is done automatically.
|
76
82
|
Rake.application.tasks.find_all do |task|
|
77
83
|
if task.job && task.job.instance_of?(Azkaban::PigJob)
|
@@ -82,4 +88,5 @@ Rake.application.tasks.find_all do |task|
|
|
82
88
|
`bin/pig #{parameters} #{script}`
|
83
89
|
end
|
84
90
|
end
|
85
|
-
end
|
91
|
+
end
|
92
|
+
|
data/lib/azkaban-rb.rb
CHANGED
data/lib/azkaban-rb/tasks.rb
CHANGED
@@ -63,7 +63,7 @@ module Azkaban
|
|
63
63
|
HTTP::Message.mime_type_handler = Proc.new { |path| Azkaban::mime_type_handler(path) }
|
64
64
|
|
65
65
|
class JobFile
|
66
|
-
attr_reader :read_locks, :write_locks, :task, :
|
66
|
+
attr_reader :read_locks, :write_locks, :task, :uses_arg
|
67
67
|
|
68
68
|
@output_dir = "conf/"
|
69
69
|
|
@@ -154,6 +154,7 @@ module Azkaban
|
|
154
154
|
end
|
155
155
|
|
156
156
|
def uses(name)
|
157
|
+
@uses_arg = name
|
157
158
|
set "pig.script"=>name
|
158
159
|
end
|
159
160
|
|
@@ -174,6 +175,7 @@ module Azkaban
|
|
174
175
|
end
|
175
176
|
|
176
177
|
def uses(name)
|
178
|
+
@uses_arg = name
|
177
179
|
set "job.class"=>name
|
178
180
|
end
|
179
181
|
end
|
@@ -185,6 +187,7 @@ module Azkaban
|
|
185
187
|
end
|
186
188
|
|
187
189
|
def uses(name)
|
190
|
+
@uses_arg = name
|
188
191
|
set "java.class"=>name
|
189
192
|
end
|
190
193
|
end
|
@@ -196,6 +199,7 @@ module Azkaban
|
|
196
199
|
end
|
197
200
|
|
198
201
|
def uses(text)
|
202
|
+
@uses_arg = text
|
199
203
|
set "command"=>text
|
200
204
|
end
|
201
205
|
end
|
data/lib/azkaban-rb/version.rb
CHANGED
@@ -0,0 +1,221 @@
|
|
1
|
+
require 'graphviz_r'
|
2
|
+
|
3
|
+
class RakeGraph
|
4
|
+
attr_reader :tasks
|
5
|
+
|
6
|
+
def initialize(namespaces = nil)
|
7
|
+
@namespaces = namespaces
|
8
|
+
@tasks = {}
|
9
|
+
Rake.application.tasks.find_all{ |task| (not task.job.nil?)}.each do |task|
|
10
|
+
tasks[RakeGraph.task_name(task)] = task if (task.job.read_locks.size + task.job.write_locks.size) > 0
|
11
|
+
end
|
12
|
+
@nodes = {}
|
13
|
+
@edges = []
|
14
|
+
construct_graph()
|
15
|
+
end
|
16
|
+
|
17
|
+
def RakeGraph.task_name(task)
|
18
|
+
task_name = "TASK#{task}"
|
19
|
+
task_name = task_name.gsub(/[^0-9a-z ]/i, '')
|
20
|
+
return task_name
|
21
|
+
end
|
22
|
+
|
23
|
+
def RakeGraph.data_name(name)
|
24
|
+
name = "DATA"+name.gsub(/[^0-9a-z ]/i, '')
|
25
|
+
return name
|
26
|
+
end
|
27
|
+
|
28
|
+
def task_in_namespace(task)
|
29
|
+
return true if @namespaces.nil? or @namespaces.size == 0
|
30
|
+
return (task.scope & @namespaces).size > 0
|
31
|
+
end
|
32
|
+
|
33
|
+
def find_prereq(task, prereq)
|
34
|
+
scopes = Array.new(task.scope)
|
35
|
+
while prereq.start_with? '^'
|
36
|
+
scopes.pop
|
37
|
+
prereq.slice!(0)
|
38
|
+
end
|
39
|
+
return RakeGraph.task_name(scopes.join('')+prereq)
|
40
|
+
end
|
41
|
+
|
42
|
+
def construct_graph()
|
43
|
+
# first add all of the task nodes
|
44
|
+
@tasks.each do |task_name, task|
|
45
|
+
next unless task_in_namespace(task)
|
46
|
+
node = TaskNode.new(task)
|
47
|
+
@nodes[node.name] = node
|
48
|
+
end
|
49
|
+
|
50
|
+
# now add all of the edges and data nodes
|
51
|
+
data_nodes = {}
|
52
|
+
@nodes.each do |name, node|
|
53
|
+
task = node.task
|
54
|
+
# find all prereq tasks
|
55
|
+
# task.prerequisites.each do |prereq|
|
56
|
+
# prereq = find_prereq(task, prereq)
|
57
|
+
# next unless @nodes.has_key?(prereq)
|
58
|
+
# @edges << TaskEdge.new(prereq, node.name)
|
59
|
+
# end
|
60
|
+
# find all data reads
|
61
|
+
task.job.read_locks.each do |read_lock|
|
62
|
+
data_name = RakeGraph.data_name(read_lock)
|
63
|
+
data_nodes[data_name] = DataNode.new(read_lock) unless data_nodes.has_key? data_name
|
64
|
+
@edges << DataEdge.new(data_name, node.name)
|
65
|
+
end
|
66
|
+
# find all data writes
|
67
|
+
task.job.write_locks.each do |write_lock|
|
68
|
+
data_name = RakeGraph.data_name(write_lock)
|
69
|
+
data_nodes[data_name] = DataNode.new(write_lock) unless data_nodes.has_key? data_name
|
70
|
+
@edges << DataEdge.new(node.name, data_name)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
data_nodes.each do |key, value|
|
74
|
+
@nodes[key] = value
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
class Node
|
79
|
+
attr_reader :name, :type
|
80
|
+
|
81
|
+
def initialize(name, type)
|
82
|
+
@name = name
|
83
|
+
@type = type
|
84
|
+
end
|
85
|
+
|
86
|
+
def fontcolor
|
87
|
+
return '#000000'
|
88
|
+
end
|
89
|
+
|
90
|
+
def to_s
|
91
|
+
return "#{@type}: #{@name}"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
class TaskNode < Node
|
96
|
+
attr_reader :task
|
97
|
+
|
98
|
+
def initialize(task)
|
99
|
+
super(RakeGraph.task_name(task), task.job.class.to_s)
|
100
|
+
@task = task
|
101
|
+
end
|
102
|
+
|
103
|
+
def label
|
104
|
+
label = "<#{@task}<br/>#{@task.job.uses_arg}>"
|
105
|
+
return label.to_sym
|
106
|
+
end
|
107
|
+
|
108
|
+
def shape
|
109
|
+
return :ellipse
|
110
|
+
end
|
111
|
+
|
112
|
+
def fillcolor
|
113
|
+
case @type
|
114
|
+
when 'Azkaban::PigJob'
|
115
|
+
return '#e7a5a5'
|
116
|
+
when 'Azkaban::JavaJob'
|
117
|
+
return '#E7C6A5'
|
118
|
+
when 'Azkaban::CommandJob'
|
119
|
+
return '#e7e6a5'
|
120
|
+
end
|
121
|
+
return ""
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
class DataNode < Node
|
126
|
+
attr_reader :filename
|
127
|
+
|
128
|
+
def initialize(filename)
|
129
|
+
super(RakeGraph.data_name(filename), "data")
|
130
|
+
@filename = filename
|
131
|
+
end
|
132
|
+
|
133
|
+
def label
|
134
|
+
label = @filename
|
135
|
+
return "<#{label}>".to_sym
|
136
|
+
end
|
137
|
+
|
138
|
+
def shape
|
139
|
+
return :box
|
140
|
+
end
|
141
|
+
|
142
|
+
def fillcolor
|
143
|
+
return '#d2e3f3'
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
class Edge
|
148
|
+
attr_reader :source, :dest, :type
|
149
|
+
|
150
|
+
def initialize(source, dest)
|
151
|
+
@source = source
|
152
|
+
@dest = dest
|
153
|
+
end
|
154
|
+
|
155
|
+
def to_s
|
156
|
+
return "#{source} >> #{dest}"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
class TaskEdge < Edge
|
161
|
+
def initialize(source, dest)
|
162
|
+
super(source, dest)
|
163
|
+
@type = "task"
|
164
|
+
end
|
165
|
+
|
166
|
+
def style
|
167
|
+
return :dotted
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
class DataEdge < Edge
|
172
|
+
def initialize(source, dest)
|
173
|
+
super(source, dest)
|
174
|
+
@type = "data"
|
175
|
+
end
|
176
|
+
|
177
|
+
def style
|
178
|
+
:solid
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def visualize(name, output_file)
|
183
|
+
g = GraphvizR.new name
|
184
|
+
g.graph[:label => name]
|
185
|
+
add_nodes(g)
|
186
|
+
add_edges(g)
|
187
|
+
g.output output_file
|
188
|
+
end
|
189
|
+
|
190
|
+
def add_nodes(g)
|
191
|
+
@nodes.each do |name, node|
|
192
|
+
g[name] [:label => @label_block.nil? ? node.label : @label_block.call(node),
|
193
|
+
:shape => @shape_block.nil? ? node.shape : @shape_block.call(node),
|
194
|
+
:fillcolor => @fillcolor_block.nil? ? node.fillcolor : @fillcolor_block.call(node),
|
195
|
+
:style => :filled,
|
196
|
+
:fontcolor => @fontcolor_block.nil? ? node.fontcolor : @fontcolor_block.call(node)]
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def add_edges(g)
|
201
|
+
@edges.each do |edge|
|
202
|
+
(g[edge.source]>>g[edge.dest])[:style => edge.style]
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def set_label(&block)
|
207
|
+
@label_block = block
|
208
|
+
end
|
209
|
+
|
210
|
+
def set_fillcolor(&block)
|
211
|
+
@fillcolor_block = block
|
212
|
+
end
|
213
|
+
|
214
|
+
def set_fontcolor(&block)
|
215
|
+
@fontcolor_block = block
|
216
|
+
end
|
217
|
+
|
218
|
+
def set_shape(&block)
|
219
|
+
@shape_block = block
|
220
|
+
end
|
221
|
+
end
|
metadata
CHANGED
@@ -1,41 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: azkaban-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 19
|
5
4
|
prerelease:
|
6
|
-
|
7
|
-
- 0
|
8
|
-
- 0
|
9
|
-
- 6
|
10
|
-
version: 0.0.6
|
5
|
+
version: 0.0.7
|
11
6
|
platform: ruby
|
12
7
|
authors:
|
13
|
-
- Matt Hayes
|
8
|
+
- Matt Hayes
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
12
|
|
18
|
-
date: 2011-08-
|
13
|
+
date: 2011-08-23 00:00:00 -07:00
|
14
|
+
default_executable:
|
19
15
|
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: httpclient
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 2.1.6
|
25
|
+
type: :runtime
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: GraphvizR
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ~>
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 0.5.1
|
36
|
+
type: :runtime
|
37
|
+
version_requirements: *id002
|
36
38
|
description: azkaban-rb allows Azkaban jobs to be modeled as rake tasks
|
37
39
|
email:
|
38
|
-
- matthew.terence.hayes@gmail.com
|
40
|
+
- matthew.terence.hayes@gmail.com
|
39
41
|
executables: []
|
40
42
|
|
41
43
|
extensions: []
|
@@ -43,22 +45,24 @@ extensions: []
|
|
43
45
|
extra_rdoc_files: []
|
44
46
|
|
45
47
|
files:
|
46
|
-
- .gitignore
|
47
|
-
- Gemfile
|
48
|
-
- Rakefile
|
49
|
-
- azkaban-rb.gemspec
|
50
|
-
- example/.gitignore
|
51
|
-
- example/Rakefile
|
52
|
-
- example/bin/pig
|
53
|
-
- example/data/input.txt
|
54
|
-
- example/example_config.yml
|
55
|
-
- example/hadoop-lzo-0.4.9.jar
|
56
|
-
- example/pig-0.9.0-core.jar
|
57
|
-
- example/src/test.pig
|
58
|
-
- example/src/test2.pig
|
59
|
-
- lib/azkaban-rb.rb
|
60
|
-
- lib/azkaban-rb/tasks.rb
|
61
|
-
- lib/azkaban-rb/version.rb
|
48
|
+
- .gitignore
|
49
|
+
- Gemfile
|
50
|
+
- Rakefile
|
51
|
+
- azkaban-rb.gemspec
|
52
|
+
- example/.gitignore
|
53
|
+
- example/Rakefile
|
54
|
+
- example/bin/pig
|
55
|
+
- example/data/input.txt
|
56
|
+
- example/example_config.yml
|
57
|
+
- example/hadoop-lzo-0.4.9.jar
|
58
|
+
- example/pig-0.9.0-core.jar
|
59
|
+
- example/src/test.pig
|
60
|
+
- example/src/test2.pig
|
61
|
+
- lib/azkaban-rb.rb
|
62
|
+
- lib/azkaban-rb/tasks.rb
|
63
|
+
- lib/azkaban-rb/version.rb
|
64
|
+
- lib/azkaban-rb/visualization.rb
|
65
|
+
has_rdoc: true
|
62
66
|
homepage: https://github.com/matthayes/azkaban-rb
|
63
67
|
licenses: []
|
64
68
|
|
@@ -66,29 +70,23 @@ post_install_message:
|
|
66
70
|
rdoc_options: []
|
67
71
|
|
68
72
|
require_paths:
|
69
|
-
- lib
|
73
|
+
- lib
|
70
74
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
75
|
none: false
|
72
76
|
requirements:
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
segments:
|
77
|
-
- 0
|
78
|
-
version: "0"
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: "0"
|
79
80
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
81
|
none: false
|
81
82
|
requirements:
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
segments:
|
86
|
-
- 0
|
87
|
-
version: "0"
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: "0"
|
88
86
|
requirements: []
|
89
87
|
|
90
88
|
rubyforge_project: azkaban-rb
|
91
|
-
rubygems_version: 1.
|
89
|
+
rubygems_version: 1.5.1
|
92
90
|
signing_key:
|
93
91
|
specification_version: 3
|
94
92
|
summary: Azkaban job generation using Ruby
|