jashmenn-poolparty-extensions 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.yml +1 -1
- data/lib/extensions/convenience_helpers.rb +4 -0
- data/lib/extensions/ganglia/ganglia.rb +22 -0
- data/lib/extensions/ganglia/templates/hadoop-metrics.properties.erb +65 -0
- data/lib/extensions/hadoop/hadoop.rb +319 -0
- data/lib/extensions/hadoop/templates/core-site.xml.erb +64 -0
- data/lib/extensions/hadoop/templates/hadoop-env.sh +3 -0
- data/lib/extensions/hadoop/templates/hadoop-site.xml.erb +9 -0
- data/lib/extensions/hadoop/templates/hadoop_hosts.erb +0 -0
- data/lib/extensions/hadoop/templates/hdfs-site.xml.erb +47 -0
- data/lib/extensions/hadoop/templates/init.d/hadoop-datanode +119 -0
- data/lib/extensions/hadoop/templates/init.d/hadoop-jobtracker +119 -0
- data/lib/extensions/hadoop/templates/init.d/hadoop-namenode +119 -0
- data/lib/extensions/hadoop/templates/init.d/hadoop-secondarynamenode +119 -0
- data/lib/extensions/hadoop/templates/init.d/hadoop-tasktracker +119 -0
- data/lib/extensions/hadoop/templates/jvm.conf +12 -0
- data/lib/extensions/hadoop/templates/log4j.properties.erb +94 -0
- data/lib/extensions/hadoop/templates/mapred-site.xml.erb +70 -0
- data/lib/extensions/hive/hive.rb +118 -0
- metadata +18 -2
@@ -0,0 +1,119 @@
|
|
1
|
+
#! /bin/sh
|
2
|
+
#
|
3
|
+
# skeleton example file to build /etc/init.d/ scripts.
|
4
|
+
# This file should be used to construct scripts for /etc/init.d.
|
5
|
+
#
|
6
|
+
# Written by Miquel van Smoorenburg <miquels@cistron.nl>.
|
7
|
+
# Modified for Debian
|
8
|
+
# by Ian Murdock <imurdock@gnu.ai.mit.edu>.
|
9
|
+
# Further changes by Javier Fernandez-Sanguino <jfs@debian.org>
|
10
|
+
#
|
11
|
+
# Version: @(#)skeleton 1.9 26-Feb-2001 miquels@cistron.nl
|
12
|
+
#
|
13
|
+
### BEGIN INIT INFO
|
14
|
+
# Provides: hadoop-secondarynamenode
|
15
|
+
# Required-Start: $network $local_fs
|
16
|
+
# Required-Stop:
|
17
|
+
# Should-Start: $named
|
18
|
+
# Should-Stop:
|
19
|
+
# Default-Start: 2 3 4 5
|
20
|
+
# Default-Stop: 0 1 6
|
21
|
+
# Short-Description: Hadoop secondarynamenode daemon
|
22
|
+
### END INIT INFO
|
23
|
+
|
24
|
+
set -e
|
25
|
+
|
26
|
+
# Include hadoop defaults if available
|
27
|
+
if [ -f /etc/default/hadoop ] ; then
|
28
|
+
. /etc/default/hadoop
|
29
|
+
fi
|
30
|
+
|
31
|
+
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
|
32
|
+
DAEMON_SCRIPT=$HADOOP_HOME/bin/hadoop-daemon.sh
|
33
|
+
NAME=hadoop-secondarynamenode
|
34
|
+
DESC="Hadoop secondarynamenode daemon"
|
35
|
+
|
36
|
+
test -x $DAEMON_SCRIPT || exit 0
|
37
|
+
|
38
|
+
LOGDIR=$HADOOP_LOG_DIR
|
39
|
+
PIDFILE=/var/run/hadoop/secondarynamenode.pid
|
40
|
+
DODTIME=3 # Time to wait for the server to die, in seconds
|
41
|
+
# If this value is set too low you might not
|
42
|
+
# let some servers to die gracefully and
|
43
|
+
# 'restart' will not work
|
44
|
+
|
45
|
+
get_running_pid() {
|
46
|
+
pid=$(ps axw -eo pid,command | tr 'A-Z' 'a-z' | grep org.apache.hadoop | grep secondarynamenode | grep java | awk '{print $1}')
|
47
|
+
}
|
48
|
+
|
49
|
+
running() {
|
50
|
+
get_running_pid
|
51
|
+
[ -z "$pid" ] && return 1
|
52
|
+
return 0
|
53
|
+
}
|
54
|
+
|
55
|
+
start() {
|
56
|
+
su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh start secondarynamenode"
|
57
|
+
}
|
58
|
+
stop() {
|
59
|
+
su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh stop secondarynamenode"
|
60
|
+
}
|
61
|
+
|
62
|
+
|
63
|
+
case "$1" in
|
64
|
+
start)
|
65
|
+
echo -n "Starting $DESC: "
|
66
|
+
start
|
67
|
+
if running ; then
|
68
|
+
echo "$NAME."
|
69
|
+
else
|
70
|
+
echo "ERROR."
|
71
|
+
fi
|
72
|
+
;;
|
73
|
+
stop)
|
74
|
+
echo -n "Stopping $DESC: "
|
75
|
+
stop
|
76
|
+
if ! running ; then
|
77
|
+
echo 'ERROR'
|
78
|
+
else
|
79
|
+
echo "$NAME."
|
80
|
+
fi
|
81
|
+
;;
|
82
|
+
force-stop)
|
83
|
+
echo -n "Forcefully stopping $DESC: "
|
84
|
+
get_running_pid
|
85
|
+
kill -9 $pid
|
86
|
+
if ! running ; then
|
87
|
+
echo "$NAME."
|
88
|
+
else
|
89
|
+
echo " ERROR."
|
90
|
+
fi
|
91
|
+
;;
|
92
|
+
force-reload)
|
93
|
+
# check wether $DAEMON is running. If so, restart
|
94
|
+
running && $0 restart
|
95
|
+
;;
|
96
|
+
restart)
|
97
|
+
echo -n "Restarting $DESC: "
|
98
|
+
stop
|
99
|
+
[ -n "$DODTIME" ] && sleep $DODTIME
|
100
|
+
$0 start
|
101
|
+
;;
|
102
|
+
status)
|
103
|
+
echo -n "$NAME is "
|
104
|
+
if running ; then
|
105
|
+
echo "running"
|
106
|
+
else
|
107
|
+
echo "not running."
|
108
|
+
exit 1
|
109
|
+
fi
|
110
|
+
;;
|
111
|
+
*)
|
112
|
+
N=/etc/init.d/$NAME
|
113
|
+
# echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2
|
114
|
+
echo "Usage: $N {start|stop|restart|force-reload|status|force-stop}" >&2
|
115
|
+
exit 1
|
116
|
+
;;
|
117
|
+
esac
|
118
|
+
|
119
|
+
exit 0
|
@@ -0,0 +1,119 @@
|
|
1
|
+
#! /bin/sh
|
2
|
+
#
|
3
|
+
# skeleton example file to build /etc/init.d/ scripts.
|
4
|
+
# This file should be used to construct scripts for /etc/init.d.
|
5
|
+
#
|
6
|
+
# Written by Miquel van Smoorenburg <miquels@cistron.nl>.
|
7
|
+
# Modified for Debian
|
8
|
+
# by Ian Murdock <imurdock@gnu.ai.mit.edu>.
|
9
|
+
# Further changes by Javier Fernandez-Sanguino <jfs@debian.org>
|
10
|
+
#
|
11
|
+
# Version: @(#)skeleton 1.9 26-Feb-2001 miquels@cistron.nl
|
12
|
+
#
|
13
|
+
### BEGIN INIT INFO
|
14
|
+
# Provides: hadoop-tasktracker
|
15
|
+
# Required-Start: $network $local_fs
|
16
|
+
# Required-Stop:
|
17
|
+
# Should-Start: $named
|
18
|
+
# Should-Stop:
|
19
|
+
# Default-Start: 2 3 4 5
|
20
|
+
# Default-Stop: 0 1 6
|
21
|
+
# Short-Description: Hadoop tasktracker daemon
|
22
|
+
### END INIT INFO
|
23
|
+
|
24
|
+
set -e
|
25
|
+
|
26
|
+
# Include hadoop defaults if available
|
27
|
+
if [ -f /etc/default/hadoop ] ; then
|
28
|
+
. /etc/default/hadoop
|
29
|
+
fi
|
30
|
+
|
31
|
+
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
|
32
|
+
DAEMON_SCRIPT=$HADOOP_HOME/bin/hadoop-daemon.sh
|
33
|
+
NAME=hadoop-tasktracker
|
34
|
+
DESC="Hadoop tasktracker daemon"
|
35
|
+
|
36
|
+
test -x $DAEMON_SCRIPT || exit 0
|
37
|
+
|
38
|
+
LOGDIR=$HADOOP_LOG_DIR
|
39
|
+
PIDFILE=/var/run/hadoop/tasktracker.pid
|
40
|
+
DODTIME=3 # Time to wait for the server to die, in seconds
|
41
|
+
# If this value is set too low you might not
|
42
|
+
# let some servers to die gracefully and
|
43
|
+
# 'restart' will not work
|
44
|
+
|
45
|
+
get_running_pid() {
|
46
|
+
pid=$(ps axw -eo pid,command | tr 'A-Z' 'a-z' | grep org.apache.hadoop | grep tasktracker | grep java | awk '{print $1}')
|
47
|
+
}
|
48
|
+
|
49
|
+
running() {
|
50
|
+
get_running_pid
|
51
|
+
[ -z "$pid" ] && return 1
|
52
|
+
return 0
|
53
|
+
}
|
54
|
+
|
55
|
+
start() {
|
56
|
+
su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh start tasktracker"
|
57
|
+
}
|
58
|
+
stop() {
|
59
|
+
su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh stop tasktracker"
|
60
|
+
}
|
61
|
+
|
62
|
+
|
63
|
+
case "$1" in
|
64
|
+
start)
|
65
|
+
echo -n "Starting $DESC: "
|
66
|
+
start
|
67
|
+
if running ; then
|
68
|
+
echo "$NAME."
|
69
|
+
else
|
70
|
+
echo "ERROR."
|
71
|
+
fi
|
72
|
+
;;
|
73
|
+
stop)
|
74
|
+
echo -n "Stopping $DESC: "
|
75
|
+
stop
|
76
|
+
if ! running ; then
|
77
|
+
echo 'ERROR'
|
78
|
+
else
|
79
|
+
echo "$NAME."
|
80
|
+
fi
|
81
|
+
;;
|
82
|
+
force-stop)
|
83
|
+
echo -n "Forcefully stopping $DESC: "
|
84
|
+
get_running_pid
|
85
|
+
kill -9 $pid
|
86
|
+
if ! running ; then
|
87
|
+
echo "$NAME."
|
88
|
+
else
|
89
|
+
echo " ERROR."
|
90
|
+
fi
|
91
|
+
;;
|
92
|
+
force-reload)
|
93
|
+
# check wether $DAEMON is running. If so, restart
|
94
|
+
running && $0 restart
|
95
|
+
;;
|
96
|
+
restart)
|
97
|
+
echo -n "Restarting $DESC: "
|
98
|
+
stop
|
99
|
+
[ -n "$DODTIME" ] && sleep $DODTIME
|
100
|
+
$0 start
|
101
|
+
;;
|
102
|
+
status)
|
103
|
+
echo -n "$NAME is "
|
104
|
+
if running ; then
|
105
|
+
echo "running"
|
106
|
+
else
|
107
|
+
echo "not running."
|
108
|
+
exit 1
|
109
|
+
fi
|
110
|
+
;;
|
111
|
+
*)
|
112
|
+
N=/etc/init.d/$NAME
|
113
|
+
# echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2
|
114
|
+
echo "Usage: $N {start|stop|restart|force-reload|status|force-stop}" >&2
|
115
|
+
exit 1
|
116
|
+
;;
|
117
|
+
esac
|
118
|
+
|
119
|
+
exit 0
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# /etc/jvm
|
2
|
+
#
|
3
|
+
# This file defines the default system JVM search order. Each
|
4
|
+
# JVM should list their JAVA_HOME compatible directory in this file.
|
5
|
+
# The default system JVM is the first one available from top to
|
6
|
+
# bottom.
|
7
|
+
|
8
|
+
/usr/lib/jvm/java-6-sun
|
9
|
+
/usr/lib/jvm/java-gcj
|
10
|
+
/usr/lib/jvm/ia32-java-1.5.0-sun
|
11
|
+
/usr/lib/jvm/java-1.5.0-sun
|
12
|
+
/usr
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# Define some default values that can be overridden by system properties
|
2
|
+
hadoop.root.logger=INFO,console
|
3
|
+
hadoop.log.dir=.
|
4
|
+
hadoop.log.file=hadoop.log
|
5
|
+
|
6
|
+
# Define the root logger to the system property "hadoop.root.logger".
|
7
|
+
log4j.rootLogger=${hadoop.root.logger}, EventCounter
|
8
|
+
|
9
|
+
# Logging Threshold
|
10
|
+
log4j.threshhold=ALL
|
11
|
+
|
12
|
+
#
|
13
|
+
# Daily Rolling File Appender
|
14
|
+
#
|
15
|
+
|
16
|
+
log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
|
17
|
+
log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
|
18
|
+
|
19
|
+
# Rollver at midnight
|
20
|
+
log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
|
21
|
+
|
22
|
+
# 30-day backup
|
23
|
+
#log4j.appender.DRFA.MaxBackupIndex=30
|
24
|
+
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
|
25
|
+
|
26
|
+
# Pattern format: Date LogLevel LoggerName LogMessage
|
27
|
+
log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
|
28
|
+
# Debugging Pattern format
|
29
|
+
#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
|
30
|
+
|
31
|
+
|
32
|
+
#
|
33
|
+
# console
|
34
|
+
# Add "console" to rootlogger above if you want to use this
|
35
|
+
#
|
36
|
+
|
37
|
+
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
38
|
+
log4j.appender.console.target=System.err
|
39
|
+
log4j.appender.console.layout=org.apache.log4j.PatternLayout
|
40
|
+
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
|
41
|
+
|
42
|
+
#
|
43
|
+
# TaskLog Appender
|
44
|
+
#
|
45
|
+
|
46
|
+
#Default values
|
47
|
+
hadoop.tasklog.taskid=null
|
48
|
+
hadoop.tasklog.noKeepSplits=4
|
49
|
+
hadoop.tasklog.totalLogFileSize=100
|
50
|
+
hadoop.tasklog.purgeLogSplits=true
|
51
|
+
hadoop.tasklog.logsRetainHours=12
|
52
|
+
|
53
|
+
log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
|
54
|
+
log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
|
55
|
+
log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
|
56
|
+
|
57
|
+
log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
|
58
|
+
log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
|
59
|
+
|
60
|
+
#
|
61
|
+
# Rolling File Appender
|
62
|
+
#
|
63
|
+
|
64
|
+
#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
|
65
|
+
#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
|
66
|
+
|
67
|
+
# Logfile size and and 30-day backups
|
68
|
+
#log4j.appender.RFA.MaxFileSize=1MB
|
69
|
+
#log4j.appender.RFA.MaxBackupIndex=30
|
70
|
+
|
71
|
+
#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
|
72
|
+
#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
|
73
|
+
#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
|
74
|
+
|
75
|
+
#
|
76
|
+
# FSNamesystem Audit logging
|
77
|
+
# All audit events are logged at INFO level
|
78
|
+
#
|
79
|
+
log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
|
80
|
+
|
81
|
+
# Custom Logging levels
|
82
|
+
|
83
|
+
log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
|
84
|
+
log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
|
85
|
+
#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
|
86
|
+
|
87
|
+
# Jets3t library
|
88
|
+
log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
|
89
|
+
|
90
|
+
#
|
91
|
+
# Event Counter Appender
|
92
|
+
# Sends counts of logging messages at different severity levels to Hadoop Metrics.
|
93
|
+
#
|
94
|
+
log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
|
@@ -0,0 +1,70 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
3
|
+
|
4
|
+
<!-- Put site-specific property overrides in this file. -->
|
5
|
+
|
6
|
+
<configuration>
|
7
|
+
|
8
|
+
<property>
|
9
|
+
<name>mapred.job.tracker</name>
|
10
|
+
<value><%= @node[:poolparty][:current_master] %>:54311</value>
|
11
|
+
<description>The host and port that the MapReduce job tracker runs
|
12
|
+
at. If "local", then jobs are run in-process as a single map
|
13
|
+
and reduce task.
|
14
|
+
</description>
|
15
|
+
</property>
|
16
|
+
|
17
|
+
|
18
|
+
<property>
|
19
|
+
<name>mapred.task.tracker.report.address</name>
|
20
|
+
<value><%= @node[:poolparty][:hadoop_this_nodes_ip] %>:0</value>
|
21
|
+
<description>The interface and port that task tracker server listens on.
|
22
|
+
Since it is only connected to by the tasks, it uses the local interface.
|
23
|
+
EXPERT ONLY. Should only be changed if your host does not have the loopback
|
24
|
+
interface.</description>
|
25
|
+
</property>
|
26
|
+
|
27
|
+
<property>
|
28
|
+
<name>mapred.local.dir</name>
|
29
|
+
<value><%= @node[:poolparty][:hadoop_data_dir] %>/mapred/local</value>
|
30
|
+
</property>
|
31
|
+
|
32
|
+
<property>
|
33
|
+
<name>mapred.system.dir</name>
|
34
|
+
<value><%= @node[:poolparty][:hadoop_data_dir] %>/mapred/system</value>
|
35
|
+
</property>
|
36
|
+
|
37
|
+
<property>
|
38
|
+
<name>mapred.tasktracker.dns.interface</name>
|
39
|
+
<value>eth0</value>
|
40
|
+
</property>
|
41
|
+
|
42
|
+
<property>
|
43
|
+
<name>mapred.job.tracker.persist.jobstatus.active</name>
|
44
|
+
<value>true</value>
|
45
|
+
</property>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
<property>
|
52
|
+
<name>mapred.child.java.opts</name>
|
53
|
+
<value>-Xmx200m -Djava.net.preferIPv4Stack=true</value>
|
54
|
+
<description>Java opts for the task tracker child processes.
|
55
|
+
The following symbol, if present, will be interpolated: @taskid@ is replaced
|
56
|
+
by current TaskID. Any other occurrences of '@' will go unchanged.
|
57
|
+
For example, to enable verbose gc logging to a file named for the taskid in
|
58
|
+
/tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
|
59
|
+
-Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
|
60
|
+
|
61
|
+
TaskManager.VMFlags=-Djava.net.preferIPv6Addresses=true -Djava.net.preferIPv4Stack=false
|
62
|
+
|
63
|
+
The configuration variable mapred.child.ulimit can be used to control the
|
64
|
+
maximum virtual memory of the child processes.
|
65
|
+
</description>
|
66
|
+
</property>
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
</configuration>
|
@@ -0,0 +1,118 @@
|
|
1
|
+
=begin rdoc
|
2
|
+
In
|
3
|
+
=end
|
4
|
+
|
5
|
+
module PoolParty
|
6
|
+
module Plugin
|
7
|
+
class Hive < Plugin
|
8
|
+
def before_load(o={}, &block)
|
9
|
+
do_once do
|
10
|
+
# install_from_bin
|
11
|
+
install_from_src
|
12
|
+
set_environment_variables
|
13
|
+
create_hdfs_directories
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def install_from_bin
|
18
|
+
has_exec "wget #{hive_dist} -O /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev.tar.gz",
|
19
|
+
:not_if => "test -e /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev.tar.gz"
|
20
|
+
has_exec "cd /usr/local/src && tar -xvvf /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev.tar.gz",
|
21
|
+
:not_if => "test -e #{hive_home}"
|
22
|
+
has_exec "mv /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev #{hive_home}",
|
23
|
+
:not_if => "test -e #{hive_home}"
|
24
|
+
end
|
25
|
+
|
26
|
+
# doesn't really work
|
27
|
+
def install_from_src
|
28
|
+
install_dependent_packages
|
29
|
+
download_and_build_src
|
30
|
+
end
|
31
|
+
|
32
|
+
def install_dependent_packages
|
33
|
+
has_package :name => "subversion"
|
34
|
+
has_package :name => "ant"
|
35
|
+
end
|
36
|
+
|
37
|
+
def download_and_build_src
|
38
|
+
has_exec "svn co #{hive_repo} #{src_dir} -r#{hive_revision}",
|
39
|
+
:not_if => "test -e #{src_dir}/build.xml"
|
40
|
+
has_exec "cd #{src_dir} && wget --no-check-certificate https://issues.apache.org/jira/secure/attachment/12409779/hive-487.3.patch",
|
41
|
+
:not_if => "test -e #{src_dir}/hive-487.3.patch"
|
42
|
+
has_exec "cd #{src_dir} && patch -p0 < hive-487.3.patch && mv hive-487.3.patch hive-487.3.patch.applied",
|
43
|
+
:not_if => "test -e #{src_dir}/hive-487.3.patch.applied"
|
44
|
+
has_exec "cd #{src_dir} && ant -Dhadoop.version=\\\"#{hadoop_version}\\\" package",
|
45
|
+
:not_if => "test -e #{hive_home}/README.txt"
|
46
|
+
has_exec "mv #{src_dir}/build/dist #{hive_home}",
|
47
|
+
:not_if => "test -e #{hive_home}"
|
48
|
+
end
|
49
|
+
|
50
|
+
# todo, pull from parent
|
51
|
+
def set_environment_variables
|
52
|
+
has_file :name => "/root/.hadoop-etc-env.sh", :content => <<-EOF
|
53
|
+
export HADOOP_HOME=#{hadoop_home}
|
54
|
+
export HADOOP=$HADOOP_HOME/bin/hadoop
|
55
|
+
export HIVE_HOME=#{hive_home}
|
56
|
+
export PATH=$HADOOP_HOME/bin:$HIVE_HOME/bin:$PATH
|
57
|
+
EOF
|
58
|
+
has_line_in_file :file => "/root/.profile", :line => "source /root/.hadoop-etc-env.sh"
|
59
|
+
end
|
60
|
+
|
61
|
+
def create_hdfs_directories
|
62
|
+
has_exec "#{hadoop_home}/bin/hadoop fs -mkdir /tmp",
|
63
|
+
:not_if => "#{hadoop_home}/bin/hadoop fs -ls /tmp",
|
64
|
+
:only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
|
65
|
+
|
66
|
+
has_exec "#{hadoop_home}/bin/hadoop fs -mkdir /user/hive/warehouse",
|
67
|
+
:not_if => "#{hadoop_home}/bin/hadoop fs -ls /user/hive/warehouse",
|
68
|
+
:only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
|
69
|
+
|
70
|
+
has_exec "#{hadoop_home}/bin/hadoop fs -chmod g+w /tmp",
|
71
|
+
:not_if => "#{hadoop_home}/bin/hadoop fs -ls /tmp", # todo, check perms
|
72
|
+
:only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
|
73
|
+
|
74
|
+
has_exec "#{hadoop_home}/bin/hadoop fs -chmod g+w /user/hive/warehouse",
|
75
|
+
:not_if => "#{hadoop_home}/bin/hadoop fs -ls /user/hive/warehouse",
|
76
|
+
:only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def hive_dist
|
82
|
+
"http://www.apache.org/dist/hadoop/hive/hive-0.3.0/hive-0.3.0-hadoop-0.19.0-dev.tar.gz"
|
83
|
+
end
|
84
|
+
|
85
|
+
def src_dir
|
86
|
+
"/usr/local/src/hive"
|
87
|
+
end
|
88
|
+
|
89
|
+
def hive_home
|
90
|
+
"/usr/local/hive"
|
91
|
+
end
|
92
|
+
|
93
|
+
def hive_repo
|
94
|
+
# "http://svn.apache.org/repos/asf/hadoop/hive/tags/release-0.3.0/"
|
95
|
+
"http://svn.apache.org/repos/asf/hadoop/hive/trunk"
|
96
|
+
end
|
97
|
+
|
98
|
+
def hive_revision
|
99
|
+
"781069"
|
100
|
+
end
|
101
|
+
|
102
|
+
### TODO the values below should pull from parent e.g. the hadoop plugin
|
103
|
+
def hadoop_home
|
104
|
+
"/usr/local/hadoop"
|
105
|
+
end
|
106
|
+
|
107
|
+
def hadoop_data_dir
|
108
|
+
"/mnt/hadoop-data"
|
109
|
+
end
|
110
|
+
|
111
|
+
def hadoop_version
|
112
|
+
"0.20.0"
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|