jashmenn-poolparty-extensions 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,119 @@
1
+ #! /bin/sh
2
+ #
3
+ # skeleton example file to build /etc/init.d/ scripts.
4
+ # This file should be used to construct scripts for /etc/init.d.
5
+ #
6
+ # Written by Miquel van Smoorenburg <miquels@cistron.nl>.
7
+ # Modified for Debian
8
+ # by Ian Murdock <imurdock@gnu.ai.mit.edu>.
9
+ # Further changes by Javier Fernandez-Sanguino <jfs@debian.org>
10
+ #
11
+ # Version: @(#)skeleton 1.9 26-Feb-2001 miquels@cistron.nl
12
+ #
13
+ ### BEGIN INIT INFO
14
+ # Provides: hadoop-secondarynamenode
15
+ # Required-Start: $network $local_fs
16
+ # Required-Stop:
17
+ # Should-Start: $named
18
+ # Should-Stop:
19
+ # Default-Start: 2 3 4 5
20
+ # Default-Stop: 0 1 6
21
+ # Short-Description: Hadoop secondarynamenode daemon
22
+ ### END INIT INFO
23
+
24
+ set -e
25
+
26
+ # Include hadoop defaults if available
27
+ if [ -f /etc/default/hadoop ] ; then
28
+ . /etc/default/hadoop
29
+ fi
30
+
31
+ PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
32
+ DAEMON_SCRIPT=$HADOOP_HOME/bin/hadoop-daemon.sh
33
+ NAME=hadoop-secondarynamenode
34
+ DESC="Hadoop secondarynamenode daemon"
35
+
36
+ test -x $DAEMON_SCRIPT || exit 0
37
+
38
+ LOGDIR=$HADOOP_LOG_DIR
39
+ PIDFILE=/var/run/hadoop/secondarynamenode.pid
40
+ DODTIME=3 # Time to wait for the server to die, in seconds
41
+ # If this value is set too low you might not
42
+ # let some servers to die gracefully and
43
+ # 'restart' will not work
44
+
45
+ get_running_pid() {
46
+ pid=$(ps axw -eo pid,command | tr 'A-Z' 'a-z' | grep org.apache.hadoop | grep secondarynamenode | grep java | awk '{print $1}')
47
+ }
48
+
49
+ running() {
50
+ get_running_pid
51
+ [ -z "$pid" ] && return 1
52
+ return 0
53
+ }
54
+
55
+ start() {
56
+ su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh start secondarynamenode"
57
+ }
58
+ stop() {
59
+ su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh stop secondarynamenode"
60
+ }
61
+
62
+
63
+ case "$1" in
64
+ start)
65
+ echo -n "Starting $DESC: "
66
+ start
67
+ if running ; then
68
+ echo "$NAME."
69
+ else
70
+ echo "ERROR."
71
+ fi
72
+ ;;
73
+ stop)
74
+ echo -n "Stopping $DESC: "
75
+ stop
76
+ if ! running ; then
77
+ echo 'ERROR'
78
+ else
79
+ echo "$NAME."
80
+ fi
81
+ ;;
82
+ force-stop)
83
+ echo -n "Forcefully stopping $DESC: "
84
+ get_running_pid
85
+ kill -9 $pid
86
+ if ! running ; then
87
+ echo "$NAME."
88
+ else
89
+ echo " ERROR."
90
+ fi
91
+ ;;
92
+ force-reload)
93
+ # check wether $DAEMON is running. If so, restart
94
+ running && $0 restart
95
+ ;;
96
+ restart)
97
+ echo -n "Restarting $DESC: "
98
+ stop
99
+ [ -n "$DODTIME" ] && sleep $DODTIME
100
+ $0 start
101
+ ;;
102
+ status)
103
+ echo -n "$NAME is "
104
+ if running ; then
105
+ echo "running"
106
+ else
107
+ echo "not running."
108
+ exit 1
109
+ fi
110
+ ;;
111
+ *)
112
+ N=/etc/init.d/$NAME
113
+ # echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2
114
+ echo "Usage: $N {start|stop|restart|force-reload|status|force-stop}" >&2
115
+ exit 1
116
+ ;;
117
+ esac
118
+
119
+ exit 0
@@ -0,0 +1,119 @@
1
+ #! /bin/sh
2
+ #
3
+ # skeleton example file to build /etc/init.d/ scripts.
4
+ # This file should be used to construct scripts for /etc/init.d.
5
+ #
6
+ # Written by Miquel van Smoorenburg <miquels@cistron.nl>.
7
+ # Modified for Debian
8
+ # by Ian Murdock <imurdock@gnu.ai.mit.edu>.
9
+ # Further changes by Javier Fernandez-Sanguino <jfs@debian.org>
10
+ #
11
+ # Version: @(#)skeleton 1.9 26-Feb-2001 miquels@cistron.nl
12
+ #
13
+ ### BEGIN INIT INFO
14
+ # Provides: hadoop-tasktracker
15
+ # Required-Start: $network $local_fs
16
+ # Required-Stop:
17
+ # Should-Start: $named
18
+ # Should-Stop:
19
+ # Default-Start: 2 3 4 5
20
+ # Default-Stop: 0 1 6
21
+ # Short-Description: Hadoop tasktracker daemon
22
+ ### END INIT INFO
23
+
24
+ set -e
25
+
26
+ # Include hadoop defaults if available
27
+ if [ -f /etc/default/hadoop ] ; then
28
+ . /etc/default/hadoop
29
+ fi
30
+
31
+ PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
32
+ DAEMON_SCRIPT=$HADOOP_HOME/bin/hadoop-daemon.sh
33
+ NAME=hadoop-tasktracker
34
+ DESC="Hadoop tasktracker daemon"
35
+
36
+ test -x $DAEMON_SCRIPT || exit 0
37
+
38
+ LOGDIR=$HADOOP_LOG_DIR
39
+ PIDFILE=/var/run/hadoop/tasktracker.pid
40
+ DODTIME=3 # Time to wait for the server to die, in seconds
41
+ # If this value is set too low you might not
42
+ # let some servers to die gracefully and
43
+ # 'restart' will not work
44
+
45
+ get_running_pid() {
46
+ pid=$(ps axw -eo pid,command | tr 'A-Z' 'a-z' | grep org.apache.hadoop | grep tasktracker | grep java | awk '{print $1}')
47
+ }
48
+
49
+ running() {
50
+ get_running_pid
51
+ [ -z "$pid" ] && return 1
52
+ return 0
53
+ }
54
+
55
+ start() {
56
+ su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh start tasktracker"
57
+ }
58
+ stop() {
59
+ su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh stop tasktracker"
60
+ }
61
+
62
+
63
+ case "$1" in
64
+ start)
65
+ echo -n "Starting $DESC: "
66
+ start
67
+ if running ; then
68
+ echo "$NAME."
69
+ else
70
+ echo "ERROR."
71
+ fi
72
+ ;;
73
+ stop)
74
+ echo -n "Stopping $DESC: "
75
+ stop
76
+ if ! running ; then
77
+ echo 'ERROR'
78
+ else
79
+ echo "$NAME."
80
+ fi
81
+ ;;
82
+ force-stop)
83
+ echo -n "Forcefully stopping $DESC: "
84
+ get_running_pid
85
+ kill -9 $pid
86
+ if ! running ; then
87
+ echo "$NAME."
88
+ else
89
+ echo " ERROR."
90
+ fi
91
+ ;;
92
+ force-reload)
93
+ # check wether $DAEMON is running. If so, restart
94
+ running && $0 restart
95
+ ;;
96
+ restart)
97
+ echo -n "Restarting $DESC: "
98
+ stop
99
+ [ -n "$DODTIME" ] && sleep $DODTIME
100
+ $0 start
101
+ ;;
102
+ status)
103
+ echo -n "$NAME is "
104
+ if running ; then
105
+ echo "running"
106
+ else
107
+ echo "not running."
108
+ exit 1
109
+ fi
110
+ ;;
111
+ *)
112
+ N=/etc/init.d/$NAME
113
+ # echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2
114
+ echo "Usage: $N {start|stop|restart|force-reload|status|force-stop}" >&2
115
+ exit 1
116
+ ;;
117
+ esac
118
+
119
+ exit 0
@@ -0,0 +1,12 @@
1
+ # /etc/jvm
2
+ #
3
+ # This file defines the default system JVM search order. Each
4
+ # JVM should list their JAVA_HOME compatible directory in this file.
5
+ # The default system JVM is the first one available from top to
6
+ # bottom.
7
+
8
+ /usr/lib/jvm/java-6-sun
9
+ /usr/lib/jvm/java-gcj
10
+ /usr/lib/jvm/ia32-java-1.5.0-sun
11
+ /usr/lib/jvm/java-1.5.0-sun
12
+ /usr
@@ -0,0 +1,94 @@
1
+ # Define some default values that can be overridden by system properties
2
+ hadoop.root.logger=INFO,console
3
+ hadoop.log.dir=.
4
+ hadoop.log.file=hadoop.log
5
+
6
+ # Define the root logger to the system property "hadoop.root.logger".
7
+ log4j.rootLogger=${hadoop.root.logger}, EventCounter
8
+
9
+ # Logging Threshold
10
+ log4j.threshhold=ALL
11
+
12
+ #
13
+ # Daily Rolling File Appender
14
+ #
15
+
16
+ log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
17
+ log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
18
+
19
+ # Rollver at midnight
20
+ log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
21
+
22
+ # 30-day backup
23
+ #log4j.appender.DRFA.MaxBackupIndex=30
24
+ log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
25
+
26
+ # Pattern format: Date LogLevel LoggerName LogMessage
27
+ log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
28
+ # Debugging Pattern format
29
+ #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
30
+
31
+
32
+ #
33
+ # console
34
+ # Add "console" to rootlogger above if you want to use this
35
+ #
36
+
37
+ log4j.appender.console=org.apache.log4j.ConsoleAppender
38
+ log4j.appender.console.target=System.err
39
+ log4j.appender.console.layout=org.apache.log4j.PatternLayout
40
+ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
41
+
42
+ #
43
+ # TaskLog Appender
44
+ #
45
+
46
+ #Default values
47
+ hadoop.tasklog.taskid=null
48
+ hadoop.tasklog.noKeepSplits=4
49
+ hadoop.tasklog.totalLogFileSize=100
50
+ hadoop.tasklog.purgeLogSplits=true
51
+ hadoop.tasklog.logsRetainHours=12
52
+
53
+ log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
54
+ log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
55
+ log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
56
+
57
+ log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
58
+ log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
59
+
60
+ #
61
+ # Rolling File Appender
62
+ #
63
+
64
+ #log4j.appender.RFA=org.apache.log4j.RollingFileAppender
65
+ #log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
66
+
67
+ # Logfile size and and 30-day backups
68
+ #log4j.appender.RFA.MaxFileSize=1MB
69
+ #log4j.appender.RFA.MaxBackupIndex=30
70
+
71
+ #log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
72
+ #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
73
+ #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
74
+
75
+ #
76
+ # FSNamesystem Audit logging
77
+ # All audit events are logged at INFO level
78
+ #
79
+ log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
80
+
81
+ # Custom Logging levels
82
+
83
+ log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
84
+ log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
85
+ #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
86
+
87
+ # Jets3t library
88
+ log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
89
+
90
+ #
91
+ # Event Counter Appender
92
+ # Sends counts of logging messages at different severity levels to Hadoop Metrics.
93
+ #
94
+ log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
@@ -0,0 +1,70 @@
1
+ <?xml version="1.0"?>
2
+ <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
+
4
+ <!-- Put site-specific property overrides in this file. -->
5
+
6
+ <configuration>
7
+
8
+ <property>
9
+ <name>mapred.job.tracker</name>
10
+ <value><%= @node[:poolparty][:current_master] %>:54311</value>
11
+ <description>The host and port that the MapReduce job tracker runs
12
+ at. If "local", then jobs are run in-process as a single map
13
+ and reduce task.
14
+ </description>
15
+ </property>
16
+
17
+
18
+ <property>
19
+ <name>mapred.task.tracker.report.address</name>
20
+ <value><%= @node[:poolparty][:hadoop_this_nodes_ip] %>:0</value>
21
+ <description>The interface and port that task tracker server listens on.
22
+ Since it is only connected to by the tasks, it uses the local interface.
23
+ EXPERT ONLY. Should only be changed if your host does not have the loopback
24
+ interface.</description>
25
+ </property>
26
+
27
+ <property>
28
+ <name>mapred.local.dir</name>
29
+ <value><%= @node[:poolparty][:hadoop_data_dir] %>/mapred/local</value>
30
+ </property>
31
+
32
+ <property>
33
+ <name>mapred.system.dir</name>
34
+ <value><%= @node[:poolparty][:hadoop_data_dir] %>/mapred/system</value>
35
+ </property>
36
+
37
+ <property>
38
+ <name>mapred.tasktracker.dns.interface</name>
39
+ <value>eth0</value>
40
+ </property>
41
+
42
+ <property>
43
+ <name>mapred.job.tracker.persist.jobstatus.active</name>
44
+ <value>true</value>
45
+ </property>
46
+
47
+
48
+
49
+
50
+
51
+ <property>
52
+ <name>mapred.child.java.opts</name>
53
+ <value>-Xmx200m -Djava.net.preferIPv4Stack=true</value>
54
+ <description>Java opts for the task tracker child processes.
55
+ The following symbol, if present, will be interpolated: @taskid@ is replaced
56
+ by current TaskID. Any other occurrences of '@' will go unchanged.
57
+ For example, to enable verbose gc logging to a file named for the taskid in
58
+ /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
59
+ -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
60
+
61
+ TaskManager.VMFlags=-Djava.net.preferIPv6Addresses=true -Djava.net.preferIPv4Stack=false
62
+
63
+ The configuration variable mapred.child.ulimit can be used to control the
64
+ maximum virtual memory of the child processes.
65
+ </description>
66
+ </property>
67
+
68
+
69
+
70
+ </configuration>
@@ -0,0 +1,118 @@
1
+ =begin rdoc
2
+ In
3
+ =end
4
+
5
+ module PoolParty
6
+ module Plugin
7
+ class Hive < Plugin
8
+ def before_load(o={}, &block)
9
+ do_once do
10
+ # install_from_bin
11
+ install_from_src
12
+ set_environment_variables
13
+ create_hdfs_directories
14
+ end
15
+ end
16
+
17
+ def install_from_bin
18
+ has_exec "wget #{hive_dist} -O /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev.tar.gz",
19
+ :not_if => "test -e /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev.tar.gz"
20
+ has_exec "cd /usr/local/src && tar -xvvf /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev.tar.gz",
21
+ :not_if => "test -e #{hive_home}"
22
+ has_exec "mv /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev #{hive_home}",
23
+ :not_if => "test -e #{hive_home}"
24
+ end
25
+
26
+ # doesn't really work
27
+ def install_from_src
28
+ install_dependent_packages
29
+ download_and_build_src
30
+ end
31
+
32
+ def install_dependent_packages
33
+ has_package :name => "subversion"
34
+ has_package :name => "ant"
35
+ end
36
+
37
+ def download_and_build_src
38
+ has_exec "svn co #{hive_repo} #{src_dir} -r#{hive_revision}",
39
+ :not_if => "test -e #{src_dir}/build.xml"
40
+ has_exec "cd #{src_dir} && wget --no-check-certificate https://issues.apache.org/jira/secure/attachment/12409779/hive-487.3.patch",
41
+ :not_if => "test -e #{src_dir}/hive-487.3.patch"
42
+ has_exec "cd #{src_dir} && patch -p0 < hive-487.3.patch && mv hive-487.3.patch hive-487.3.patch.applied",
43
+ :not_if => "test -e #{src_dir}/hive-487.3.patch.applied"
44
+ has_exec "cd #{src_dir} && ant -Dhadoop.version=\\\"#{hadoop_version}\\\" package",
45
+ :not_if => "test -e #{hive_home}/README.txt"
46
+ has_exec "mv #{src_dir}/build/dist #{hive_home}",
47
+ :not_if => "test -e #{hive_home}"
48
+ end
49
+
50
+ # todo, pull from parent
51
+ def set_environment_variables
52
+ has_file :name => "/root/.hadoop-etc-env.sh", :content => <<-EOF
53
+ export HADOOP_HOME=#{hadoop_home}
54
+ export HADOOP=$HADOOP_HOME/bin/hadoop
55
+ export HIVE_HOME=#{hive_home}
56
+ export PATH=$HADOOP_HOME/bin:$HIVE_HOME/bin:$PATH
57
+ EOF
58
+ has_line_in_file :file => "/root/.profile", :line => "source /root/.hadoop-etc-env.sh"
59
+ end
60
+
61
+ def create_hdfs_directories
62
+ has_exec "#{hadoop_home}/bin/hadoop fs -mkdir /tmp",
63
+ :not_if => "#{hadoop_home}/bin/hadoop fs -ls /tmp",
64
+ :only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
65
+
66
+ has_exec "#{hadoop_home}/bin/hadoop fs -mkdir /user/hive/warehouse",
67
+ :not_if => "#{hadoop_home}/bin/hadoop fs -ls /user/hive/warehouse",
68
+ :only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
69
+
70
+ has_exec "#{hadoop_home}/bin/hadoop fs -chmod g+w /tmp",
71
+ :not_if => "#{hadoop_home}/bin/hadoop fs -ls /tmp", # todo, check perms
72
+ :only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
73
+
74
+ has_exec "#{hadoop_home}/bin/hadoop fs -chmod g+w /user/hive/warehouse",
75
+ :not_if => "#{hadoop_home}/bin/hadoop fs -ls /user/hive/warehouse",
76
+ :only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
77
+ end
78
+
79
+ private
80
+
81
+ def hive_dist
82
+ "http://www.apache.org/dist/hadoop/hive/hive-0.3.0/hive-0.3.0-hadoop-0.19.0-dev.tar.gz"
83
+ end
84
+
85
+ def src_dir
86
+ "/usr/local/src/hive"
87
+ end
88
+
89
+ def hive_home
90
+ "/usr/local/hive"
91
+ end
92
+
93
+ def hive_repo
94
+ # "http://svn.apache.org/repos/asf/hadoop/hive/tags/release-0.3.0/"
95
+ "http://svn.apache.org/repos/asf/hadoop/hive/trunk"
96
+ end
97
+
98
+ def hive_revision
99
+ "781069"
100
+ end
101
+
102
+ ### TODO the values below should pull from parent e.g. the hadoop plugin
103
+ def hadoop_home
104
+ "/usr/local/hadoop"
105
+ end
106
+
107
+ def hadoop_data_dir
108
+ "/mnt/hadoop-data"
109
+ end
110
+
111
+ def hadoop_version
112
+ "0.20.0"
113
+ end
114
+
115
+ end
116
+ end
117
+ end
118
+