azkaban-rb 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,4 +19,5 @@ Gem::Specification.new do |s|
19
19
  s.require_paths = ["lib"]
20
20
 
21
21
  s.add_dependency "httpclient", "~> 2.1.6"
22
+ s.add_dependency "GraphvizR", "~> 0.5.1"
22
23
  end
@@ -23,7 +23,7 @@ task :clean_job_conf do
23
23
  end
24
24
 
25
25
  props :base do
26
- set "udf.import.list" => "oink.,com.linkedin.pig.,com.linkedin.pig.date.,org.apache.pig.piggybank.,com.linkedin.pig.characters."
26
+ set "udf.import.list" => "oink.,org.apache.pig.piggybank."
27
27
  set "hadoop.job.ugi" => "#{@@user_name},hadoop"
28
28
  set "hdfs.default.classpath.dir" => config["hdfs_classpath"]
29
29
  set "jvm.args" => config["jvm_args"] if config["jvm_args"]
@@ -72,6 +72,12 @@ end
72
72
 
73
73
  task :default => :zip
74
74
 
75
+ desc "Visualize the data flow (requires GraphViz installed)"
76
+ task :visualize do
77
+ rakeGraph = RakeGraph.new
78
+ rakeGraph.visualize("ExampleAzkabanDataflow", "example_azkaban_dataflow.png")
79
+ end
80
+
75
81
  # Create a run task for each pig job so we can run using Rake. Parameter substituion is done automatically.
76
82
  Rake.application.tasks.find_all do |task|
77
83
  if task.job && task.job.instance_of?(Azkaban::PigJob)
@@ -82,4 +88,5 @@ Rake.application.tasks.find_all do |task|
82
88
  `bin/pig #{parameters} #{script}`
83
89
  end
84
90
  end
85
- end
91
+ end
92
+
@@ -1,5 +1,6 @@
1
1
  require "azkaban-rb/version"
2
2
  require "azkaban-rb/tasks"
3
+ require "azkaban-rb/visualization"
3
4
 
4
5
  module Azkaban
5
6
  module Rb
@@ -63,7 +63,7 @@ module Azkaban
63
63
  HTTP::Message.mime_type_handler = Proc.new { |path| Azkaban::mime_type_handler(path) }
64
64
 
65
65
  class JobFile
66
- attr_reader :read_locks, :write_locks, :task, :uses
66
+ attr_reader :read_locks, :write_locks, :task, :uses_arg
67
67
 
68
68
  @output_dir = "conf/"
69
69
 
@@ -154,6 +154,7 @@ module Azkaban
154
154
  end
155
155
 
156
156
  def uses(name)
157
+ @uses_arg = name
157
158
  set "pig.script"=>name
158
159
  end
159
160
 
@@ -174,6 +175,7 @@ module Azkaban
174
175
  end
175
176
 
176
177
  def uses(name)
178
+ @uses_arg = name
177
179
  set "job.class"=>name
178
180
  end
179
181
  end
@@ -185,6 +187,7 @@ module Azkaban
185
187
  end
186
188
 
187
189
  def uses(name)
190
+ @uses_arg = name
188
191
  set "java.class"=>name
189
192
  end
190
193
  end
@@ -196,6 +199,7 @@ module Azkaban
196
199
  end
197
200
 
198
201
  def uses(text)
202
+ @uses_arg = text
199
203
  set "command"=>text
200
204
  end
201
205
  end
@@ -1,5 +1,5 @@
1
1
  module Azkaban
2
2
  module Rb
3
- VERSION = "0.0.6"
3
+ VERSION = "0.0.7"
4
4
  end
5
5
  end
@@ -0,0 +1,221 @@
1
+ require 'graphviz_r'
2
+
3
+ class RakeGraph
4
+ attr_reader :tasks
5
+
6
+ def initialize(namespaces = nil)
7
+ @namespaces = namespaces
8
+ @tasks = {}
9
+ Rake.application.tasks.find_all{ |task| (not task.job.nil?)}.each do |task|
10
+ tasks[RakeGraph.task_name(task)] = task if (task.job.read_locks.size + task.job.write_locks.size) > 0
11
+ end
12
+ @nodes = {}
13
+ @edges = []
14
+ construct_graph()
15
+ end
16
+
17
+ def RakeGraph.task_name(task)
18
+ task_name = "TASK#{task}"
19
+ task_name = task_name.gsub(/[^0-9a-z ]/i, '')
20
+ return task_name
21
+ end
22
+
23
+ def RakeGraph.data_name(name)
24
+ name = "DATA"+name.gsub(/[^0-9a-z ]/i, '')
25
+ return name
26
+ end
27
+
28
+ def task_in_namespace(task)
29
+ return true if @namespaces.nil? or @namespaces.size == 0
30
+ return (task.scope & @namespaces).size > 0
31
+ end
32
+
33
+ def find_prereq(task, prereq)
34
+ scopes = Array.new(task.scope)
35
+ while prereq.start_with? '^'
36
+ scopes.pop
37
+ prereq.slice!(0)
38
+ end
39
+ return RakeGraph.task_name(scopes.join('')+prereq)
40
+ end
41
+
42
+ def construct_graph()
43
+ # first add all of the task nodes
44
+ @tasks.each do |task_name, task|
45
+ next unless task_in_namespace(task)
46
+ node = TaskNode.new(task)
47
+ @nodes[node.name] = node
48
+ end
49
+
50
+ # now add all of the edges and data nodes
51
+ data_nodes = {}
52
+ @nodes.each do |name, node|
53
+ task = node.task
54
+ # find all prereq tasks
55
+ # task.prerequisites.each do |prereq|
56
+ # prereq = find_prereq(task, prereq)
57
+ # next unless @nodes.has_key?(prereq)
58
+ # @edges << TaskEdge.new(prereq, node.name)
59
+ # end
60
+ # find all data reads
61
+ task.job.read_locks.each do |read_lock|
62
+ data_name = RakeGraph.data_name(read_lock)
63
+ data_nodes[data_name] = DataNode.new(read_lock) unless data_nodes.has_key? data_name
64
+ @edges << DataEdge.new(data_name, node.name)
65
+ end
66
+ # find all data writes
67
+ task.job.write_locks.each do |write_lock|
68
+ data_name = RakeGraph.data_name(write_lock)
69
+ data_nodes[data_name] = DataNode.new(write_lock) unless data_nodes.has_key? data_name
70
+ @edges << DataEdge.new(node.name, data_name)
71
+ end
72
+ end
73
+ data_nodes.each do |key, value|
74
+ @nodes[key] = value
75
+ end
76
+ end
77
+
78
+ class Node
79
+ attr_reader :name, :type
80
+
81
+ def initialize(name, type)
82
+ @name = name
83
+ @type = type
84
+ end
85
+
86
+ def fontcolor
87
+ return '#000000'
88
+ end
89
+
90
+ def to_s
91
+ return "#{@type}: #{@name}"
92
+ end
93
+ end
94
+
95
+ class TaskNode < Node
96
+ attr_reader :task
97
+
98
+ def initialize(task)
99
+ super(RakeGraph.task_name(task), task.job.class.to_s)
100
+ @task = task
101
+ end
102
+
103
+ def label
104
+ label = "<#{@task}<br/>#{@task.job.uses_arg}>"
105
+ return label.to_sym
106
+ end
107
+
108
+ def shape
109
+ return :ellipse
110
+ end
111
+
112
+ def fillcolor
113
+ case @type
114
+ when 'Azkaban::PigJob'
115
+ return '#e7a5a5'
116
+ when 'Azkaban::JavaJob'
117
+ return '#E7C6A5'
118
+ when 'Azkaban::CommandJob'
119
+ return '#e7e6a5'
120
+ end
121
+ return ""
122
+ end
123
+ end
124
+
125
+ class DataNode < Node
126
+ attr_reader :filename
127
+
128
+ def initialize(filename)
129
+ super(RakeGraph.data_name(filename), "data")
130
+ @filename = filename
131
+ end
132
+
133
+ def label
134
+ label = @filename
135
+ return "<#{label}>".to_sym
136
+ end
137
+
138
+ def shape
139
+ return :box
140
+ end
141
+
142
+ def fillcolor
143
+ return '#d2e3f3'
144
+ end
145
+ end
146
+
147
+ class Edge
148
+ attr_reader :source, :dest, :type
149
+
150
+ def initialize(source, dest)
151
+ @source = source
152
+ @dest = dest
153
+ end
154
+
155
+ def to_s
156
+ return "#{source} >> #{dest}"
157
+ end
158
+ end
159
+
160
+ class TaskEdge < Edge
161
+ def initialize(source, dest)
162
+ super(source, dest)
163
+ @type = "task"
164
+ end
165
+
166
+ def style
167
+ return :dotted
168
+ end
169
+ end
170
+
171
+ class DataEdge < Edge
172
+ def initialize(source, dest)
173
+ super(source, dest)
174
+ @type = "data"
175
+ end
176
+
177
+ def style
178
+ :solid
179
+ end
180
+ end
181
+
182
+ def visualize(name, output_file)
183
+ g = GraphvizR.new name
184
+ g.graph[:label => name]
185
+ add_nodes(g)
186
+ add_edges(g)
187
+ g.output output_file
188
+ end
189
+
190
+ def add_nodes(g)
191
+ @nodes.each do |name, node|
192
+ g[name] [:label => @label_block.nil? ? node.label : @label_block.call(node),
193
+ :shape => @shape_block.nil? ? node.shape : @shape_block.call(node),
194
+ :fillcolor => @fillcolor_block.nil? ? node.fillcolor : @fillcolor_block.call(node),
195
+ :style => :filled,
196
+ :fontcolor => @fontcolor_block.nil? ? node.fontcolor : @fontcolor_block.call(node)]
197
+ end
198
+ end
199
+
200
+ def add_edges(g)
201
+ @edges.each do |edge|
202
+ (g[edge.source]>>g[edge.dest])[:style => edge.style]
203
+ end
204
+ end
205
+
206
+ def set_label(&block)
207
+ @label_block = block
208
+ end
209
+
210
+ def set_fillcolor(&block)
211
+ @fillcolor_block = block
212
+ end
213
+
214
+ def set_fontcolor(&block)
215
+ @fontcolor_block = block
216
+ end
217
+
218
+ def set_shape(&block)
219
+ @shape_block = block
220
+ end
221
+ end
metadata CHANGED
@@ -1,41 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: azkaban-rb
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
5
4
  prerelease:
6
- segments:
7
- - 0
8
- - 0
9
- - 6
10
- version: 0.0.6
5
+ version: 0.0.7
11
6
  platform: ruby
12
7
  authors:
13
- - Matt Hayes
8
+ - Matt Hayes
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
12
 
18
- date: 2011-08-22 00:00:00 Z
13
+ date: 2011-08-23 00:00:00 -07:00
14
+ default_executable:
19
15
  dependencies:
20
- - !ruby/object:Gem::Dependency
21
- name: httpclient
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ~>
27
- - !ruby/object:Gem::Version
28
- hash: 7
29
- segments:
30
- - 2
31
- - 1
32
- - 6
33
- version: 2.1.6
34
- type: :runtime
35
- version_requirements: *id001
16
+ - !ruby/object:Gem::Dependency
17
+ name: httpclient
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: 2.1.6
25
+ type: :runtime
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: GraphvizR
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ~>
34
+ - !ruby/object:Gem::Version
35
+ version: 0.5.1
36
+ type: :runtime
37
+ version_requirements: *id002
36
38
  description: azkaban-rb allows Azkaban jobs to be modeled as rake tasks
37
39
  email:
38
- - matthew.terence.hayes@gmail.com
40
+ - matthew.terence.hayes@gmail.com
39
41
  executables: []
40
42
 
41
43
  extensions: []
@@ -43,22 +45,24 @@ extensions: []
43
45
  extra_rdoc_files: []
44
46
 
45
47
  files:
46
- - .gitignore
47
- - Gemfile
48
- - Rakefile
49
- - azkaban-rb.gemspec
50
- - example/.gitignore
51
- - example/Rakefile
52
- - example/bin/pig
53
- - example/data/input.txt
54
- - example/example_config.yml
55
- - example/hadoop-lzo-0.4.9.jar
56
- - example/pig-0.9.0-core.jar
57
- - example/src/test.pig
58
- - example/src/test2.pig
59
- - lib/azkaban-rb.rb
60
- - lib/azkaban-rb/tasks.rb
61
- - lib/azkaban-rb/version.rb
48
+ - .gitignore
49
+ - Gemfile
50
+ - Rakefile
51
+ - azkaban-rb.gemspec
52
+ - example/.gitignore
53
+ - example/Rakefile
54
+ - example/bin/pig
55
+ - example/data/input.txt
56
+ - example/example_config.yml
57
+ - example/hadoop-lzo-0.4.9.jar
58
+ - example/pig-0.9.0-core.jar
59
+ - example/src/test.pig
60
+ - example/src/test2.pig
61
+ - lib/azkaban-rb.rb
62
+ - lib/azkaban-rb/tasks.rb
63
+ - lib/azkaban-rb/version.rb
64
+ - lib/azkaban-rb/visualization.rb
65
+ has_rdoc: true
62
66
  homepage: https://github.com/matthayes/azkaban-rb
63
67
  licenses: []
64
68
 
@@ -66,29 +70,23 @@ post_install_message:
66
70
  rdoc_options: []
67
71
 
68
72
  require_paths:
69
- - lib
73
+ - lib
70
74
  required_ruby_version: !ruby/object:Gem::Requirement
71
75
  none: false
72
76
  requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- hash: 3
76
- segments:
77
- - 0
78
- version: "0"
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: "0"
79
80
  required_rubygems_version: !ruby/object:Gem::Requirement
80
81
  none: false
81
82
  requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- hash: 3
85
- segments:
86
- - 0
87
- version: "0"
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: "0"
88
86
  requirements: []
89
87
 
90
88
  rubyforge_project: azkaban-rb
91
- rubygems_version: 1.8.7
89
+ rubygems_version: 1.5.1
92
90
  signing_key:
93
91
  specification_version: 3
94
92
  summary: Azkaban job generation using Ruby