azkaban-rb 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ config.yml
2
+ azkaban-rb-test.zip
3
+ job/
4
+ jobs/
data/example/Rakefile ADDED
@@ -0,0 +1,73 @@
1
+ $:.unshift(File.expand_path('../lib/'))
2
+
3
+ require 'azkaban-rb'
4
+ require 'yaml'
5
+
6
+ raise 'need config.yml' unless File.exists?('config.yml')
7
+
8
+ config = YAML.load(File.read('config.yml'))
9
+
10
+ @@job_namespace = 'azkaban-rb-test'
11
+ @@user_name = config["user_name"]
12
+ @@hdfs_root = "/user/#{@@user_name}"
13
+ @@zip_name = "#{@@job_namespace}.zip"
14
+ @@azkaban_path = @@job_namespace
15
+
16
+ Azkaban::JobFile.output_dir = "jobs/"
17
+
18
+ job_dir = "job"
19
+
20
+ desc "Remove all generated files"
21
+ task :clean_job_conf do
22
+ `rm -rf #{Azkaban::JobFile.output_dir}` if File.exists? Azkaban::JobFile.output_dir
23
+ end
24
+
25
+ props :base do
26
+ set "udf.import.list" => "oink.,com.linkedin.pig.,com.linkedin.pig.date.,org.apache.pig.piggybank.,com.linkedin.pig.characters."
27
+ set "hadoop.job.ugi" => "#{@@user_name},hadoop"
28
+ set "hdfs.default.classpath.dir" => config["hdfs_classpath"]
29
+ set "jvm.args" => config["jvm_args"] if config["jvm_args"]
30
+ set "classpath" => "pig-0.9.0-core.jar,hadoop-lzo-0.4.9.jar"
31
+ set "param.job_root" => @@hdfs_root
32
+ end
33
+
34
+ namespace @@job_namespace.to_sym do
35
+
36
+ pig_job :test do
37
+ uses "src/test.pig"
38
+ reads "#{@@hdfs_root}/input.txt", :as => "input"
39
+ writes "#{@@hdfs_root}/input_grouped.txt", :as => "output"
40
+ end
41
+
42
+ pig_job :test2 => :test do
43
+ uses "src/test2.pig"
44
+ reads "#{@@hdfs_root}/input_grouped.txt", :as => "input"
45
+ writes "#{@@hdfs_root}/input_ordered.txt", :as => "output"
46
+ end
47
+
48
+ command_job :all => :test2 do
49
+ uses 'echo "Running the final job"'
50
+ end
51
+ end
52
+
53
+ task :clean_job_dir do
54
+ `rm -rf #{job_dir}` if File.exists? "#{job_dir}"
55
+ end
56
+
57
+ task :clean => [:clean_job_conf,:clean_job_dir] do
58
+ `rm -rf #{@@zip_name}`
59
+ end
60
+
61
+ task :zip => [:clean, :base, "#{@@job_namespace}:all".to_sym] do
62
+ `mkdir #{job_dir}`
63
+ `cp #{Azkaban::JobFile.output_dir}/* #{job_dir}`
64
+ `cp *.jar #{job_dir}`
65
+ `cp -r src #{job_dir}/src`
66
+ `cd #{job_dir}; zip -r #{@@zip_name} *; cd ..; mv #{job_dir}/#{@@zip_name} .`
67
+ end
68
+
69
+ task :deploy => :zip do
70
+ Azkaban.deploy(config['azkaban_uri'], @@azkaban_path, @@zip_name)
71
+ end
72
+
73
+ task :default => :zip
data/example/bin/pig ADDED
@@ -0,0 +1,202 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # Licensed to the Apache Software Foundation (ASF) under one
4
+ # or more contributor license agreements. See the NOTICE file
5
+ # distributed with this work for additional information
6
+ # regarding copyright ownership. The ASF licenses this file
7
+ # to you under the Apache License, Version 2.0 (the
8
+ # "License"); you may not use this file except in compliance
9
+ # with the License. You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+
19
+ #
20
+ # The Pig command script
21
+ #
22
+ # Environment Variables
23
+ #
24
+ # JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
25
+ #
26
+ # PIG_CLASSPATH Extra Java CLASSPATH entries.
27
+ #
28
+ # PIG_HEAPSIZE The maximum amount of heap to use, in MB.
29
+ # Default is 1000.
30
+ #
31
+ # PIG_OPTS Extra Java runtime options.
32
+ #
33
+ # PIG_CONF_DIR Alternate conf dir. Default is ${PIG_HOME}/conf.
34
+ #
35
+ # PIG_ROOT_LOGGER The root appender. Default is INFO,console
36
+ #
37
+ # PIG_HADOOP_VERSION Version of hadoop to run with. Default is 20 (0.20).
38
+ #
39
+ # HBASE_CONF_DIR - Optionally, the HBase configuration to run against
40
+ # when using HBaseStorage
41
+
42
+ cygwin=false
43
+ case "`uname`" in
44
+ CYGWIN*) cygwin=true;;
45
+ esac
46
+ debug=false
47
+
48
+ # filter command line parameter
49
+ for f in $@; do
50
+ if [[ $f = "-secretDebugCmd" ]]; then
51
+ debug=true
52
+ else
53
+ remaining="${remaining} $f"
54
+ fi
55
+ done
56
+
57
+ # resolve links - $0 may be a softlink
58
+ this="${BASH_SOURCE-$0}"
59
+ while [ -h "$this" ]; do
60
+ ls=`ls -ld "$this"`
61
+ link=`expr "$ls" : '.*-> \(.*\)$'`
62
+ if expr "$link" : '.*/.*' > /dev/null; then
63
+ this="$link"
64
+ else
65
+ this=`dirname "$this"`/"$link"
66
+ fi
67
+ done
68
+
69
+ # convert relative path to absolute path
70
+ bin=`dirname "$this"`
71
+ script=`basename "$this"`
72
+ bin=`unset CDPATH; cd "$bin"; pwd`
73
+ this="$bin/$script"
74
+
75
+ # the root of the Pig installation
76
+ export PIG_HOME=`dirname "$this"`/..
77
+
78
+ #check to see if the conf dir is given as an optional argument
79
+ if [ $# -gt 1 ]
80
+ then
81
+ if [ "--config" = "$1" ]
82
+ then
83
+ shift
84
+ confdir=$1
85
+ shift
86
+ PIG_CONF_DIR=$confdir
87
+ fi
88
+ fi
89
+
90
+ # Allow alternate conf dir location.
91
+ PIG_CONF_DIR="${PIG_CONF_DIR:-$PIG_HOME/conf}"
92
+
93
+ if [ -f "${PIG_CONF_DIR}/pig-env.sh" ]; then
94
+ . "${PIG_CONF_DIR}/pig-env.sh"
95
+ fi
96
+
97
+ # some Java parameters
98
+ if [ "$JAVA_HOME" != "" ]; then
99
+ #echo "run java in $JAVA_HOME"
100
+ JAVA_HOME=$JAVA_HOME
101
+ fi
102
+
103
+ if [ "$JAVA_HOME" = "" ]; then
104
+ echo "Error: JAVA_HOME is not set."
105
+ exit 1
106
+ fi
107
+
108
+ JAVA=$JAVA_HOME/bin/java
109
+ JAVA_HEAP_MAX=-Xmx1000m
110
+
111
+ # check envvars which might override default args
112
+ if [ "$PIG_HEAPSIZE" != "" ]; then
113
+ JAVA_HEAP_MAX="-Xmx""$PIG_HEAPSIZE""m"
114
+ fi
115
+
116
+ # CLASSPATH initially contains $PIG_CONF_DIR
117
+ CLASSPATH="${PIG_CONF_DIR}"
118
+ CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
119
+
120
+ # for developers, add Pig classes to CLASSPATH
121
+ if [ -d "$PIG_HOME/build/classes" ]; then
122
+ CLASSPATH=${CLASSPATH}:$PIG_HOME/build/classes
123
+ fi
124
+ if [ -d "$PIG_HOME/build/test/classes" ]; then
125
+ CLASSPATH=${CLASSPATH}:$PIG_HOME/build/test/classes
126
+ fi
127
+
128
+ # so that filenames w/ spaces are handled correctly in loops below
129
+ IFS=
130
+
131
+ # for releases, add core pig to CLASSPATH
132
+ for f in $PIG_HOME/pig-*-core.jar; do
133
+ CLASSPATH=${CLASSPATH}:$f;
134
+ done
135
+
136
+ # during development pig jar might be in build
137
+ for f in $PIG_HOME/build/pig-*-SNAPSHOT.jar; do
138
+ CLASSPATH=${CLASSPATH}:$f;
139
+ done
140
+
141
+ # Set the version for Hadoop, default to 17
142
+ PIG_HADOOP_VERSION="${PIG_HADOOP_VERSION:-20}"
143
+ # add libs to CLASSPATH. There can be more than one version of the hadoop
144
+ # libraries in the lib dir, so don't blindly add them all. Only add the one
145
+ # that matche PIG_HADOOP_VERSION.
146
+ for f in $PIG_HOME/lib/*.jar; do
147
+ filename=`basename $f`
148
+ IS_HADOOP=`echo $filename | grep hadoop`
149
+ if [ "${IS_HADOOP}x" == "x" ]; then
150
+ CLASSPATH=${CLASSPATH}:$f;
151
+ else
152
+ IS_RIGHT_VER=`echo $f | grep hadoop${PIG_HADOOP_VERSION}.jar`
153
+ if [ "${IS_RIGHT_VER}x" != "x" ]; then
154
+ CLASSPATH=${CLASSPATH}:$f;
155
+ fi
156
+ fi
157
+ done
158
+
159
+ # if using HBase, likely want to include HBase config
160
+ HBASE_CONF_DIR=${HBASE_CONF_DIR:-/etc/hbase}
161
+ if [ -n "$HBASE_CONF_DIR" ] && [ -d "$HBASE_CONF_DIR" ]; then
162
+ CLASSPATH=$HBASE_CONF_DIR:$CLASSPATH
163
+ fi
164
+
165
+ # add user-specified CLASSPATH last
166
+ if [ "$PIG_CLASSPATH" != "" ]; then
167
+ CLASSPATH=${CLASSPATH}:${PIG_CLASSPATH}
168
+ fi
169
+
170
+ # default log directory & file
171
+ if [ "$PIG_LOG_DIR" = "" ]; then
172
+ PIG_LOG_DIR="$PIG_HOME/logs"
173
+ fi
174
+ if [ "$PIG_LOGFILE" = "" ]; then
175
+ PIG_LOGFILE='pig.log'
176
+ fi
177
+
178
+ # cygwin path translation
179
+ if $cygwin; then
180
+ CLASSPATH=`cygpath -p -w "$CLASSPATH"`
181
+ PIG_HOME=`cygpath -d "$PIG_HOME"`
182
+ PIG_LOG_DIR=`cygpath -d "$PIG_LOG_DIR"`
183
+ fi
184
+
185
+ # restore ordinary behaviour
186
+ unset IFS
187
+
188
+ CLASS=org.apache.pig.Main
189
+
190
+ PIG_OPTS="$PIG_OPTS -Dpig.log.dir=$PIG_LOG_DIR"
191
+ PIG_OPTS="$PIG_OPTS -Dpig.log.file=$PIG_LOGFILE"
192
+ PIG_OPTS="$PIG_OPTS -Dpig.home.dir=$PIG_HOME"
193
+ PIG_OPTS="$PIG_OPTS -Dpig.root.logger=${PIG_ROOT_LOGGER:-INFO,console,DRFA}"
194
+
195
+ # run it
196
+ if [ "$debug" == "true" ]; then
197
+ echo "dry run:"
198
+ echo "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS ${remaining}
199
+ echo
200
+ else
201
+ exec "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS ${remaining}
202
+ fi
@@ -0,0 +1,100 @@
1
+ A 15
2
+ C 18
3
+ C 84
4
+ B 45
5
+ C 16
6
+ E 46
7
+ E 32
8
+ D 63
9
+ C 71
10
+ B 87
11
+ A 76
12
+ D 61
13
+ E 18
14
+ A 69
15
+ A 53
16
+ D 2
17
+ E 60
18
+ A 69
19
+ A 2
20
+ E 23
21
+ B 63
22
+ C 47
23
+ D 89
24
+ B 74
25
+ B 31
26
+ A 81
27
+ A 26
28
+ B 99
29
+ E 72
30
+ B 38
31
+ D 12
32
+ E 26
33
+ D 1
34
+ B 64
35
+ E 21
36
+ C 24
37
+ D 68
38
+ B 82
39
+ D 80
40
+ E 94
41
+ A 98
42
+ D 85
43
+ A 90
44
+ C 13
45
+ B 95
46
+ D 96
47
+ B 74
48
+ A 96
49
+ E 1
50
+ A 11
51
+ E 28
52
+ C 21
53
+ A 15
54
+ C 64
55
+ A 53
56
+ C 57
57
+ A 58
58
+ E 89
59
+ D 42
60
+ E 43
61
+ C 74
62
+ C 93
63
+ C 29
64
+ A 94
65
+ D 79
66
+ D 33
67
+ E 82
68
+ C 13
69
+ C 91
70
+ D 82
71
+ D 31
72
+ D 19
73
+ A 11
74
+ B 90
75
+ C 52
76
+ C 46
77
+ D 43
78
+ D 78
79
+ C 8
80
+ D 94
81
+ B 58
82
+ D 22
83
+ D 40
84
+ A 11
85
+ A 77
86
+ B 84
87
+ C 8
88
+ A 44
89
+ E 94
90
+ C 42
91
+ E 1
92
+ B 78
93
+ A 55
94
+ D 10
95
+ C 88
96
+ E 83
97
+ E 10
98
+ B 95
99
+ E 43
100
+ B 20
@@ -0,0 +1,4 @@
1
+ user_name: TODO
2
+ jvm_args: TODO
3
+ hdfs_classpath: TODO
4
+ azkaban_uri: TODO
Binary file
Binary file
@@ -0,0 +1,7 @@
1
+ rmf $output
2
+ inp = load '$input' as (type:chararray,count:int);
3
+
4
+ inp_grouped = GROUP inp BY type;
5
+ inp_grouped = FOREACH inp_grouped GENERATE group as type, SUM(inp.count) as count;
6
+
7
+ store inp_grouped into '$output';
@@ -0,0 +1,6 @@
1
+ rmf $output
2
+ inp = load '$input' as (type:chararray,count:int);
3
+
4
+ inp_ordered = ORDER inp BY count DESC;
5
+
6
+ STORE inp_ordered INTO '$output';
@@ -1,5 +1,11 @@
1
1
  require 'httpclient'
2
2
 
3
+ module Rake
4
+ class Task
5
+ attr_accessor :job
6
+ end
7
+ end
8
+
3
9
  module Azkaban
4
10
 
5
11
  def self.deploy(uri, path, zip_file)
@@ -57,13 +63,17 @@ module Azkaban
57
63
  HTTP::Message.mime_type_handler = Proc.new { |path| Azkaban::mime_type_handler(path) }
58
64
 
59
65
  class JobFile
66
+ attr_reader :read_locks, :write_locks, :task, :uses
60
67
 
61
68
  @output_dir = "conf/"
62
69
 
63
70
  def initialize(task, ext)
71
+ task.job = self
64
72
  @task = task
65
73
  @ext = ext
66
74
  @args = {}
75
+ @read_locks = []
76
+ @write_locks = []
67
77
  end
68
78
 
69
79
  class << self
@@ -76,6 +86,16 @@ module Azkaban
76
86
  end
77
87
  end
78
88
 
89
+ def reads(name, *options)
90
+ @read_locks << name
91
+ handle_read_write_options(options, name)
92
+ end
93
+
94
+ def writes(name, *options)
95
+ @write_locks << name
96
+ handle_read_write_options(options, name)
97
+ end
98
+
79
99
  def write
80
100
  if @args.size > 0
81
101
  file_name = @task.name.gsub(":", "-") + @ext
@@ -94,18 +114,74 @@ module Azkaban
94
114
 
95
115
  private
96
116
 
117
+ def handle_read_write_options(options, name)
118
+ options = options[0] if options.size > 0
119
+ if options && options.instance_of?(Hash) && options[:as]
120
+ set "param.#{options[:as]}" => name
121
+ end
122
+ end
123
+
97
124
  def create_properties_file(file_name, props)
98
125
  unless File.exists? Azkaban::JobFile.output_dir
99
126
  Dir.mkdir Azkaban::JobFile.output_dir
100
127
  end
101
128
  file = File.new(Azkaban::JobFile.output_dir + file_name, "w+")
129
+ if @read_locks && @read_locks.size > 0
130
+ file.write("read.lock=#{@read_locks.join(",")}\n")
131
+ end
132
+ if @write_locks && @write_locks.size > 0
133
+ file.write("write.lock=#{@write_locks.join(",")}\n")
134
+ end
102
135
  props.each do |k,v|
103
136
  file.write("#{k}=#{v}\n")
104
137
  end
105
138
  file.close
106
139
  end
107
140
  end
141
+
142
+ class PigJob < JobFile
143
+ def initialize(task, ext)
144
+ super(task,ext)
145
+ set "type"=>"pig"
146
+ end
147
+
148
+ def uses(name)
149
+ set "pig.script"=>name
150
+ end
151
+ end
152
+
153
+ class JavaJob < JobFile
154
+ def initialize(task, ext)
155
+ super(task,ext)
156
+ set "type"=>"java"
157
+ end
158
+
159
+ def uses(name)
160
+ set "job.class"=>name
161
+ end
162
+ end
163
+
164
+ class JavaProcessJob < JobFile
165
+ def initialize(task, ext)
166
+ super(task,ext)
167
+ set "type"=>"java"
168
+ end
169
+
170
+ def uses(name)
171
+ set "java.class"=>name
172
+ end
173
+ end
108
174
 
175
+ class CommandJob < JobFile
176
+ def initialize(task, ext)
177
+ super(task,ext)
178
+ set "type"=>"command"
179
+ end
180
+
181
+ def uses(text)
182
+ set "command"=>text
183
+ end
184
+ end
109
185
  end
110
186
 
111
187
  def props(*args, &b)
@@ -118,12 +194,29 @@ def props(*args, &b)
118
194
  end
119
195
  end
120
196
 
121
- def job(*args,&b)
122
- task(*args) do |t|
123
- unless b.nil?
124
- job = Azkaban::JobFile.new(t, ".job")
125
- job.instance_eval(&b)
126
- job.write
127
- end
197
+ def job(*args,&b)
198
+ make_job(Azkaban::JobFile, args, b)
199
+ end
200
+
201
+ def pig_job(*args,&b)
202
+ make_job(Azkaban::PigJob, args, b)
203
+ end
204
+
205
+ def java_job(*args,&b)
206
+ make_job(Azkaban::JavaJob, args, b)
207
+ end
208
+
209
+ def java_process_job(*args,&b)
210
+ make_job(Azkaban::JavaProcessJob, args, b)
211
+ end
212
+
213
+ def command_job(*args,&b)
214
+ make_job(Azkaban::CommandJob, args, b)
215
+ end
216
+
217
+ def make_job(job_class,args,b)
218
+ job = job_class.new(task(*args) { job.write }, ".job")
219
+ unless b.nil?
220
+ job.instance_eval(&b)
128
221
  end
129
222
  end
@@ -1,5 +1,5 @@
1
1
  module Azkaban
2
2
  module Rb
3
- VERSION = "0.0.4"
3
+ VERSION = "0.0.5"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,8 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: azkaban-rb
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 21
4
5
  prerelease:
5
- version: 0.0.4
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 5
10
+ version: 0.0.5
6
11
  platform: ruby
7
12
  authors:
8
13
  - Matt Hayes
@@ -10,8 +15,7 @@ autorequire:
10
15
  bindir: bin
11
16
  cert_chain: []
12
17
 
13
- date: 2011-06-23 00:00:00 -07:00
14
- default_executable:
18
+ date: 2011-08-20 00:00:00 Z
15
19
  dependencies:
16
20
  - !ruby/object:Gem::Dependency
17
21
  name: httpclient
@@ -21,6 +25,11 @@ dependencies:
21
25
  requirements:
22
26
  - - ~>
23
27
  - !ruby/object:Gem::Version
28
+ hash: 7
29
+ segments:
30
+ - 2
31
+ - 1
32
+ - 6
24
33
  version: 2.1.6
25
34
  type: :runtime
26
35
  version_requirements: *id001
@@ -38,10 +47,18 @@ files:
38
47
  - Gemfile
39
48
  - Rakefile
40
49
  - azkaban-rb.gemspec
50
+ - example/.gitignore
51
+ - example/Rakefile
52
+ - example/bin/pig
53
+ - example/data/input.txt
54
+ - example/example_config.yml
55
+ - example/hadoop-lzo-0.4.9.jar
56
+ - example/pig-0.9.0-core.jar
57
+ - example/src/test.pig
58
+ - example/src/test2.pig
41
59
  - lib/azkaban-rb.rb
42
60
  - lib/azkaban-rb/tasks.rb
43
61
  - lib/azkaban-rb/version.rb
44
- has_rdoc: true
45
62
  homepage: https://github.com/matthayes/azkaban-rb
46
63
  licenses: []
47
64
 
@@ -55,17 +72,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
55
72
  requirements:
56
73
  - - ">="
57
74
  - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
58
78
  version: "0"
59
79
  required_rubygems_version: !ruby/object:Gem::Requirement
60
80
  none: false
61
81
  requirements:
62
82
  - - ">="
63
83
  - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
64
87
  version: "0"
65
88
  requirements: []
66
89
 
67
90
  rubyforge_project: azkaban-rb
68
- rubygems_version: 1.6.2
91
+ rubygems_version: 1.8.7
69
92
  signing_key:
70
93
  specification_version: 3
71
94
  summary: Azkaban job generation using Ruby