azkaban-rb 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ config.yml
2
+ azkaban-rb-test.zip
3
+ job/
4
+ jobs/
data/example/Rakefile ADDED
@@ -0,0 +1,73 @@
1
+ $:.unshift(File.expand_path('../lib/'))
2
+
3
+ require 'azkaban-rb'
4
+ require 'yaml'
5
+
6
+ raise 'need config.yml' unless File.exists?('config.yml')
7
+
8
+ config = YAML.load(File.read('config.yml'))
9
+
10
+ @@job_namespace = 'azkaban-rb-test'
11
+ @@user_name = config["user_name"]
12
+ @@hdfs_root = "/user/#{@@user_name}"
13
+ @@zip_name = "#{@@job_namespace}.zip"
14
+ @@azkaban_path = @@job_namespace
15
+
16
+ Azkaban::JobFile.output_dir = "jobs/"
17
+
18
+ job_dir = "job"
19
+
20
+ desc "Remove all generated files"
21
+ task :clean_job_conf do
22
+ `rm -rf #{Azkaban::JobFile.output_dir}` if File.exists? Azkaban::JobFile.output_dir
23
+ end
24
+
25
+ props :base do
26
+ set "udf.import.list" => "oink.,com.linkedin.pig.,com.linkedin.pig.date.,org.apache.pig.piggybank.,com.linkedin.pig.characters."
27
+ set "hadoop.job.ugi" => "#{@@user_name},hadoop"
28
+ set "hdfs.default.classpath.dir" => config["hdfs_classpath"]
29
+ set "jvm.args" => config["jvm_args"] if config["jvm_args"]
30
+ set "classpath" => "pig-0.9.0-core.jar,hadoop-lzo-0.4.9.jar"
31
+ set "param.job_root" => @@hdfs_root
32
+ end
33
+
34
+ namespace @@job_namespace.to_sym do
35
+
36
+ pig_job :test do
37
+ uses "src/test.pig"
38
+ reads "#{@@hdfs_root}/input.txt", :as => "input"
39
+ writes "#{@@hdfs_root}/input_grouped.txt", :as => "output"
40
+ end
41
+
42
+ pig_job :test2 => :test do
43
+ uses "src/test2.pig"
44
+ reads "#{@@hdfs_root}/input_grouped.txt", :as => "input"
45
+ writes "#{@@hdfs_root}/input_ordered.txt", :as => "output"
46
+ end
47
+
48
+ command_job :all => :test2 do
49
+ uses 'echo "Running the final job"'
50
+ end
51
+ end
52
+
53
+ task :clean_job_dir do
54
+ `rm -rf #{job_dir}` if File.exists? "#{job_dir}"
55
+ end
56
+
57
+ task :clean => [:clean_job_conf,:clean_job_dir] do
58
+ `rm -rf #{@@zip_name}`
59
+ end
60
+
61
+ task :zip => [:clean, :base, "#{@@job_namespace}:all".to_sym] do
62
+ `mkdir #{job_dir}`
63
+ `cp #{Azkaban::JobFile.output_dir}/* #{job_dir}`
64
+ `cp *.jar #{job_dir}`
65
+ `cp -r src #{job_dir}/src`
66
+ `cd #{job_dir}; zip -r #{@@zip_name} *; cd ..; mv #{job_dir}/#{@@zip_name} .`
67
+ end
68
+
69
+ task :deploy => :zip do
70
+ Azkaban.deploy(config['azkaban_uri'], @@azkaban_path, @@zip_name)
71
+ end
72
+
73
+ task :default => :zip
data/example/bin/pig ADDED
@@ -0,0 +1,202 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # Licensed to the Apache Software Foundation (ASF) under one
4
+ # or more contributor license agreements. See the NOTICE file
5
+ # distributed with this work for additional information
6
+ # regarding copyright ownership. The ASF licenses this file
7
+ # to you under the Apache License, Version 2.0 (the
8
+ # "License"); you may not use this file except in compliance
9
+ # with the License. You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+
19
+ #
20
+ # The Pig command script
21
+ #
22
+ # Environment Variables
23
+ #
24
+ # JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
25
+ #
26
+ # PIG_CLASSPATH Extra Java CLASSPATH entries.
27
+ #
28
+ # PIG_HEAPSIZE The maximum amount of heap to use, in MB.
29
+ # Default is 1000.
30
+ #
31
+ # PIG_OPTS Extra Java runtime options.
32
+ #
33
+ # PIG_CONF_DIR Alternate conf dir. Default is ${PIG_HOME}/conf.
34
+ #
35
+ # PIG_ROOT_LOGGER The root appender. Default is INFO,console
36
+ #
37
+ # PIG_HADOOP_VERSION Version of hadoop to run with. Default is 20 (0.20).
38
+ #
39
+ # HBASE_CONF_DIR - Optionally, the HBase configuration to run against
40
+ # when using HBaseStorage
41
+
42
+ cygwin=false
43
+ case "`uname`" in
44
+ CYGWIN*) cygwin=true;;
45
+ esac
46
+ debug=false
47
+
48
+ # filter command line parameter
49
+ for f in $@; do
50
+ if [[ $f = "-secretDebugCmd" ]]; then
51
+ debug=true
52
+ else
53
+ remaining="${remaining} $f"
54
+ fi
55
+ done
56
+
57
+ # resolve links - $0 may be a softlink
58
+ this="${BASH_SOURCE-$0}"
59
+ while [ -h "$this" ]; do
60
+ ls=`ls -ld "$this"`
61
+ link=`expr "$ls" : '.*-> \(.*\)$'`
62
+ if expr "$link" : '.*/.*' > /dev/null; then
63
+ this="$link"
64
+ else
65
+ this=`dirname "$this"`/"$link"
66
+ fi
67
+ done
68
+
69
+ # convert relative path to absolute path
70
+ bin=`dirname "$this"`
71
+ script=`basename "$this"`
72
+ bin=`unset CDPATH; cd "$bin"; pwd`
73
+ this="$bin/$script"
74
+
75
+ # the root of the Pig installation
76
+ export PIG_HOME=`dirname "$this"`/..
77
+
78
+ #check to see if the conf dir is given as an optional argument
79
+ if [ $# -gt 1 ]
80
+ then
81
+ if [ "--config" = "$1" ]
82
+ then
83
+ shift
84
+ confdir=$1
85
+ shift
86
+ PIG_CONF_DIR=$confdir
87
+ fi
88
+ fi
89
+
90
+ # Allow alternate conf dir location.
91
+ PIG_CONF_DIR="${PIG_CONF_DIR:-$PIG_HOME/conf}"
92
+
93
+ if [ -f "${PIG_CONF_DIR}/pig-env.sh" ]; then
94
+ . "${PIG_CONF_DIR}/pig-env.sh"
95
+ fi
96
+
97
+ # some Java parameters
98
+ if [ "$JAVA_HOME" != "" ]; then
99
+ #echo "run java in $JAVA_HOME"
100
+ JAVA_HOME=$JAVA_HOME
101
+ fi
102
+
103
+ if [ "$JAVA_HOME" = "" ]; then
104
+ echo "Error: JAVA_HOME is not set."
105
+ exit 1
106
+ fi
107
+
108
+ JAVA=$JAVA_HOME/bin/java
109
+ JAVA_HEAP_MAX=-Xmx1000m
110
+
111
+ # check envvars which might override default args
112
+ if [ "$PIG_HEAPSIZE" != "" ]; then
113
+ JAVA_HEAP_MAX="-Xmx""$PIG_HEAPSIZE""m"
114
+ fi
115
+
116
+ # CLASSPATH initially contains $PIG_CONF_DIR
117
+ CLASSPATH="${PIG_CONF_DIR}"
118
+ CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
119
+
120
+ # for developers, add Pig classes to CLASSPATH
121
+ if [ -d "$PIG_HOME/build/classes" ]; then
122
+ CLASSPATH=${CLASSPATH}:$PIG_HOME/build/classes
123
+ fi
124
+ if [ -d "$PIG_HOME/build/test/classes" ]; then
125
+ CLASSPATH=${CLASSPATH}:$PIG_HOME/build/test/classes
126
+ fi
127
+
128
+ # so that filenames w/ spaces are handled correctly in loops below
129
+ IFS=
130
+
131
+ # for releases, add core pig to CLASSPATH
132
+ for f in $PIG_HOME/pig-*-core.jar; do
133
+ CLASSPATH=${CLASSPATH}:$f;
134
+ done
135
+
136
+ # during development pig jar might be in build
137
+ for f in $PIG_HOME/build/pig-*-SNAPSHOT.jar; do
138
+ CLASSPATH=${CLASSPATH}:$f;
139
+ done
140
+
141
+ # Set the version for Hadoop, default to 17
142
+ PIG_HADOOP_VERSION="${PIG_HADOOP_VERSION:-20}"
143
+ # add libs to CLASSPATH. There can be more than one version of the hadoop
144
+ # libraries in the lib dir, so don't blindly add them all. Only add the one
145
+ # that matche PIG_HADOOP_VERSION.
146
+ for f in $PIG_HOME/lib/*.jar; do
147
+ filename=`basename $f`
148
+ IS_HADOOP=`echo $filename | grep hadoop`
149
+ if [ "${IS_HADOOP}x" == "x" ]; then
150
+ CLASSPATH=${CLASSPATH}:$f;
151
+ else
152
+ IS_RIGHT_VER=`echo $f | grep hadoop${PIG_HADOOP_VERSION}.jar`
153
+ if [ "${IS_RIGHT_VER}x" != "x" ]; then
154
+ CLASSPATH=${CLASSPATH}:$f;
155
+ fi
156
+ fi
157
+ done
158
+
159
+ # if using HBase, likely want to include HBase config
160
+ HBASE_CONF_DIR=${HBASE_CONF_DIR:-/etc/hbase}
161
+ if [ -n "$HBASE_CONF_DIR" ] && [ -d "$HBASE_CONF_DIR" ]; then
162
+ CLASSPATH=$HBASE_CONF_DIR:$CLASSPATH
163
+ fi
164
+
165
+ # add user-specified CLASSPATH last
166
+ if [ "$PIG_CLASSPATH" != "" ]; then
167
+ CLASSPATH=${CLASSPATH}:${PIG_CLASSPATH}
168
+ fi
169
+
170
+ # default log directory & file
171
+ if [ "$PIG_LOG_DIR" = "" ]; then
172
+ PIG_LOG_DIR="$PIG_HOME/logs"
173
+ fi
174
+ if [ "$PIG_LOGFILE" = "" ]; then
175
+ PIG_LOGFILE='pig.log'
176
+ fi
177
+
178
+ # cygwin path translation
179
+ if $cygwin; then
180
+ CLASSPATH=`cygpath -p -w "$CLASSPATH"`
181
+ PIG_HOME=`cygpath -d "$PIG_HOME"`
182
+ PIG_LOG_DIR=`cygpath -d "$PIG_LOG_DIR"`
183
+ fi
184
+
185
+ # restore ordinary behaviour
186
+ unset IFS
187
+
188
+ CLASS=org.apache.pig.Main
189
+
190
+ PIG_OPTS="$PIG_OPTS -Dpig.log.dir=$PIG_LOG_DIR"
191
+ PIG_OPTS="$PIG_OPTS -Dpig.log.file=$PIG_LOGFILE"
192
+ PIG_OPTS="$PIG_OPTS -Dpig.home.dir=$PIG_HOME"
193
+ PIG_OPTS="$PIG_OPTS -Dpig.root.logger=${PIG_ROOT_LOGGER:-INFO,console,DRFA}"
194
+
195
+ # run it
196
+ if [ "$debug" == "true" ]; then
197
+ echo "dry run:"
198
+ echo "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS ${remaining}
199
+ echo
200
+ else
201
+ exec "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS ${remaining}
202
+ fi
@@ -0,0 +1,100 @@
1
+ A 15
2
+ C 18
3
+ C 84
4
+ B 45
5
+ C 16
6
+ E 46
7
+ E 32
8
+ D 63
9
+ C 71
10
+ B 87
11
+ A 76
12
+ D 61
13
+ E 18
14
+ A 69
15
+ A 53
16
+ D 2
17
+ E 60
18
+ A 69
19
+ A 2
20
+ E 23
21
+ B 63
22
+ C 47
23
+ D 89
24
+ B 74
25
+ B 31
26
+ A 81
27
+ A 26
28
+ B 99
29
+ E 72
30
+ B 38
31
+ D 12
32
+ E 26
33
+ D 1
34
+ B 64
35
+ E 21
36
+ C 24
37
+ D 68
38
+ B 82
39
+ D 80
40
+ E 94
41
+ A 98
42
+ D 85
43
+ A 90
44
+ C 13
45
+ B 95
46
+ D 96
47
+ B 74
48
+ A 96
49
+ E 1
50
+ A 11
51
+ E 28
52
+ C 21
53
+ A 15
54
+ C 64
55
+ A 53
56
+ C 57
57
+ A 58
58
+ E 89
59
+ D 42
60
+ E 43
61
+ C 74
62
+ C 93
63
+ C 29
64
+ A 94
65
+ D 79
66
+ D 33
67
+ E 82
68
+ C 13
69
+ C 91
70
+ D 82
71
+ D 31
72
+ D 19
73
+ A 11
74
+ B 90
75
+ C 52
76
+ C 46
77
+ D 43
78
+ D 78
79
+ C 8
80
+ D 94
81
+ B 58
82
+ D 22
83
+ D 40
84
+ A 11
85
+ A 77
86
+ B 84
87
+ C 8
88
+ A 44
89
+ E 94
90
+ C 42
91
+ E 1
92
+ B 78
93
+ A 55
94
+ D 10
95
+ C 88
96
+ E 83
97
+ E 10
98
+ B 95
99
+ E 43
100
+ B 20
@@ -0,0 +1,4 @@
1
+ user_name: TODO
2
+ jvm_args: TODO
3
+ hdfs_classpath: TODO
4
+ azkaban_uri: TODO
Binary file
Binary file
@@ -0,0 +1,7 @@
1
+ rmf $output
2
+ inp = load '$input' as (type:chararray,count:int);
3
+
4
+ inp_grouped = GROUP inp BY type;
5
+ inp_grouped = FOREACH inp_grouped GENERATE group as type, SUM(inp.count) as count;
6
+
7
+ store inp_grouped into '$output';
@@ -0,0 +1,6 @@
1
+ rmf $output
2
+ inp = load '$input' as (type:chararray,count:int);
3
+
4
+ inp_ordered = ORDER inp BY count DESC;
5
+
6
+ STORE inp_ordered INTO '$output';
@@ -1,5 +1,11 @@
1
1
  require 'httpclient'
2
2
 
3
+ module Rake
4
+ class Task
5
+ attr_accessor :job
6
+ end
7
+ end
8
+
3
9
  module Azkaban
4
10
 
5
11
  def self.deploy(uri, path, zip_file)
@@ -57,13 +63,17 @@ module Azkaban
57
63
  HTTP::Message.mime_type_handler = Proc.new { |path| Azkaban::mime_type_handler(path) }
58
64
 
59
65
  class JobFile
66
+ attr_reader :read_locks, :write_locks, :task, :uses
60
67
 
61
68
  @output_dir = "conf/"
62
69
 
63
70
  def initialize(task, ext)
71
+ task.job = self
64
72
  @task = task
65
73
  @ext = ext
66
74
  @args = {}
75
+ @read_locks = []
76
+ @write_locks = []
67
77
  end
68
78
 
69
79
  class << self
@@ -76,6 +86,16 @@ module Azkaban
76
86
  end
77
87
  end
78
88
 
89
+ def reads(name, *options)
90
+ @read_locks << name
91
+ handle_read_write_options(options, name)
92
+ end
93
+
94
+ def writes(name, *options)
95
+ @write_locks << name
96
+ handle_read_write_options(options, name)
97
+ end
98
+
79
99
  def write
80
100
  if @args.size > 0
81
101
  file_name = @task.name.gsub(":", "-") + @ext
@@ -94,18 +114,74 @@ module Azkaban
94
114
 
95
115
  private
96
116
 
117
+ def handle_read_write_options(options, name)
118
+ options = options[0] if options.size > 0
119
+ if options && options.instance_of?(Hash) && options[:as]
120
+ set "param.#{options[:as]}" => name
121
+ end
122
+ end
123
+
97
124
  def create_properties_file(file_name, props)
98
125
  unless File.exists? Azkaban::JobFile.output_dir
99
126
  Dir.mkdir Azkaban::JobFile.output_dir
100
127
  end
101
128
  file = File.new(Azkaban::JobFile.output_dir + file_name, "w+")
129
+ if @read_locks && @read_locks.size > 0
130
+ file.write("read.lock=#{@read_locks.join(",")}\n")
131
+ end
132
+ if @write_locks && @write_locks.size > 0
133
+ file.write("write.lock=#{@write_locks.join(",")}\n")
134
+ end
102
135
  props.each do |k,v|
103
136
  file.write("#{k}=#{v}\n")
104
137
  end
105
138
  file.close
106
139
  end
107
140
  end
141
+
142
+ class PigJob < JobFile
143
+ def initialize(task, ext)
144
+ super(task,ext)
145
+ set "type"=>"pig"
146
+ end
147
+
148
+ def uses(name)
149
+ set "pig.script"=>name
150
+ end
151
+ end
152
+
153
+ class JavaJob < JobFile
154
+ def initialize(task, ext)
155
+ super(task,ext)
156
+ set "type"=>"java"
157
+ end
158
+
159
+ def uses(name)
160
+ set "job.class"=>name
161
+ end
162
+ end
163
+
164
+ class JavaProcessJob < JobFile
165
+ def initialize(task, ext)
166
+ super(task,ext)
167
+ set "type"=>"java"
168
+ end
169
+
170
+ def uses(name)
171
+ set "java.class"=>name
172
+ end
173
+ end
108
174
 
175
+ class CommandJob < JobFile
176
+ def initialize(task, ext)
177
+ super(task,ext)
178
+ set "type"=>"command"
179
+ end
180
+
181
+ def uses(text)
182
+ set "command"=>text
183
+ end
184
+ end
109
185
  end
110
186
 
111
187
  def props(*args, &b)
@@ -118,12 +194,29 @@ def props(*args, &b)
118
194
  end
119
195
  end
120
196
 
121
- def job(*args,&b)
122
- task(*args) do |t|
123
- unless b.nil?
124
- job = Azkaban::JobFile.new(t, ".job")
125
- job.instance_eval(&b)
126
- job.write
127
- end
197
+ def job(*args,&b)
198
+ make_job(Azkaban::JobFile, args, b)
199
+ end
200
+
201
+ def pig_job(*args,&b)
202
+ make_job(Azkaban::PigJob, args, b)
203
+ end
204
+
205
+ def java_job(*args,&b)
206
+ make_job(Azkaban::JavaJob, args, b)
207
+ end
208
+
209
+ def java_process_job(*args,&b)
210
+ make_job(Azkaban::JavaProcessJob, args, b)
211
+ end
212
+
213
+ def command_job(*args,&b)
214
+ make_job(Azkaban::CommandJob, args, b)
215
+ end
216
+
217
+ def make_job(job_class,args,b)
218
+ job = job_class.new(task(*args) { job.write }, ".job")
219
+ unless b.nil?
220
+ job.instance_eval(&b)
128
221
  end
129
222
  end
@@ -1,5 +1,5 @@
1
1
  module Azkaban
2
2
  module Rb
3
- VERSION = "0.0.4"
3
+ VERSION = "0.0.5"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,8 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: azkaban-rb
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 21
4
5
  prerelease:
5
- version: 0.0.4
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 5
10
+ version: 0.0.5
6
11
  platform: ruby
7
12
  authors:
8
13
  - Matt Hayes
@@ -10,8 +15,7 @@ autorequire:
10
15
  bindir: bin
11
16
  cert_chain: []
12
17
 
13
- date: 2011-06-23 00:00:00 -07:00
14
- default_executable:
18
+ date: 2011-08-20 00:00:00 Z
15
19
  dependencies:
16
20
  - !ruby/object:Gem::Dependency
17
21
  name: httpclient
@@ -21,6 +25,11 @@ dependencies:
21
25
  requirements:
22
26
  - - ~>
23
27
  - !ruby/object:Gem::Version
28
+ hash: 7
29
+ segments:
30
+ - 2
31
+ - 1
32
+ - 6
24
33
  version: 2.1.6
25
34
  type: :runtime
26
35
  version_requirements: *id001
@@ -38,10 +47,18 @@ files:
38
47
  - Gemfile
39
48
  - Rakefile
40
49
  - azkaban-rb.gemspec
50
+ - example/.gitignore
51
+ - example/Rakefile
52
+ - example/bin/pig
53
+ - example/data/input.txt
54
+ - example/example_config.yml
55
+ - example/hadoop-lzo-0.4.9.jar
56
+ - example/pig-0.9.0-core.jar
57
+ - example/src/test.pig
58
+ - example/src/test2.pig
41
59
  - lib/azkaban-rb.rb
42
60
  - lib/azkaban-rb/tasks.rb
43
61
  - lib/azkaban-rb/version.rb
44
- has_rdoc: true
45
62
  homepage: https://github.com/matthayes/azkaban-rb
46
63
  licenses: []
47
64
 
@@ -55,17 +72,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
55
72
  requirements:
56
73
  - - ">="
57
74
  - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
58
78
  version: "0"
59
79
  required_rubygems_version: !ruby/object:Gem::Requirement
60
80
  none: false
61
81
  requirements:
62
82
  - - ">="
63
83
  - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
64
87
  version: "0"
65
88
  requirements: []
66
89
 
67
90
  rubyforge_project: azkaban-rb
68
- rubygems_version: 1.6.2
91
+ rubygems_version: 1.8.7
69
92
  signing_key:
70
93
  specification_version: 3
71
94
  summary: Azkaban job generation using Ruby