jashmenn-poolparty-extensions 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,119 @@
1
+ #! /bin/sh
2
+ #
3
+ # skeleton example file to build /etc/init.d/ scripts.
4
+ # This file should be used to construct scripts for /etc/init.d.
5
+ #
6
+ # Written by Miquel van Smoorenburg <miquels@cistron.nl>.
7
+ # Modified for Debian
8
+ # by Ian Murdock <imurdock@gnu.ai.mit.edu>.
9
+ # Further changes by Javier Fernandez-Sanguino <jfs@debian.org>
10
+ #
11
+ # Version: @(#)skeleton 1.9 26-Feb-2001 miquels@cistron.nl
12
+ #
13
+ ### BEGIN INIT INFO
14
+ # Provides: hadoop-secondarynamenode
15
+ # Required-Start: $network $local_fs
16
+ # Required-Stop:
17
+ # Should-Start: $named
18
+ # Should-Stop:
19
+ # Default-Start: 2 3 4 5
20
+ # Default-Stop: 0 1 6
21
+ # Short-Description: Hadoop secondarynamenode daemon
22
+ ### END INIT INFO
23
+
24
+ set -e
25
+
26
+ # Include hadoop defaults if available
27
+ if [ -f /etc/default/hadoop ] ; then
28
+ . /etc/default/hadoop
29
+ fi
30
+
31
+ PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
32
+ DAEMON_SCRIPT=$HADOOP_HOME/bin/hadoop-daemon.sh
33
+ NAME=hadoop-secondarynamenode
34
+ DESC="Hadoop secondarynamenode daemon"
35
+
36
+ test -x $DAEMON_SCRIPT || exit 0
37
+
38
+ LOGDIR=$HADOOP_LOG_DIR
39
+ PIDFILE=/var/run/hadoop/secondarynamenode.pid
40
+ DODTIME=3 # Time to wait for the server to die, in seconds
41
+ # If this value is set too low you might not
42
+ # let some servers to die gracefully and
43
+ # 'restart' will not work
44
+
45
+ get_running_pid() {
46
+ pid=$(ps axw -eo pid,command | tr 'A-Z' 'a-z' | grep org.apache.hadoop | grep secondarynamenode | grep java | awk '{print $1}')
47
+ }
48
+
49
+ running() {
50
+ get_running_pid
51
+ [ -z "$pid" ] && return 1
52
+ return 0
53
+ }
54
+
55
+ start() {
56
+ su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh start secondarynamenode"
57
+ }
58
+ stop() {
59
+ su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh stop secondarynamenode"
60
+ }
61
+
62
+
63
+ case "$1" in
64
+ start)
65
+ echo -n "Starting $DESC: "
66
+ start
67
+ if running ; then
68
+ echo "$NAME."
69
+ else
70
+ echo "ERROR."
71
+ fi
72
+ ;;
73
+ stop)
74
+ echo -n "Stopping $DESC: "
75
+ stop
76
+ if ! running ; then
77
+ echo 'ERROR'
78
+ else
79
+ echo "$NAME."
80
+ fi
81
+ ;;
82
+ force-stop)
83
+ echo -n "Forcefully stopping $DESC: "
84
+ get_running_pid
85
+ kill -9 $pid
86
+ if ! running ; then
87
+ echo "$NAME."
88
+ else
89
+ echo " ERROR."
90
+ fi
91
+ ;;
92
+ force-reload)
93
+ # check wether $DAEMON is running. If so, restart
94
+ running && $0 restart
95
+ ;;
96
+ restart)
97
+ echo -n "Restarting $DESC: "
98
+ stop
99
+ [ -n "$DODTIME" ] && sleep $DODTIME
100
+ $0 start
101
+ ;;
102
+ status)
103
+ echo -n "$NAME is "
104
+ if running ; then
105
+ echo "running"
106
+ else
107
+ echo "not running."
108
+ exit 1
109
+ fi
110
+ ;;
111
+ *)
112
+ N=/etc/init.d/$NAME
113
+ # echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2
114
+ echo "Usage: $N {start|stop|restart|force-reload|status|force-stop}" >&2
115
+ exit 1
116
+ ;;
117
+ esac
118
+
119
+ exit 0
@@ -0,0 +1,119 @@
1
+ #! /bin/sh
2
+ #
3
+ # skeleton example file to build /etc/init.d/ scripts.
4
+ # This file should be used to construct scripts for /etc/init.d.
5
+ #
6
+ # Written by Miquel van Smoorenburg <miquels@cistron.nl>.
7
+ # Modified for Debian
8
+ # by Ian Murdock <imurdock@gnu.ai.mit.edu>.
9
+ # Further changes by Javier Fernandez-Sanguino <jfs@debian.org>
10
+ #
11
+ # Version: @(#)skeleton 1.9 26-Feb-2001 miquels@cistron.nl
12
+ #
13
+ ### BEGIN INIT INFO
14
+ # Provides: hadoop-tasktracker
15
+ # Required-Start: $network $local_fs
16
+ # Required-Stop:
17
+ # Should-Start: $named
18
+ # Should-Stop:
19
+ # Default-Start: 2 3 4 5
20
+ # Default-Stop: 0 1 6
21
+ # Short-Description: Hadoop tasktracker daemon
22
+ ### END INIT INFO
23
+
24
+ set -e
25
+
26
+ # Include hadoop defaults if available
27
+ if [ -f /etc/default/hadoop ] ; then
28
+ . /etc/default/hadoop
29
+ fi
30
+
31
+ PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
32
+ DAEMON_SCRIPT=$HADOOP_HOME/bin/hadoop-daemon.sh
33
+ NAME=hadoop-tasktracker
34
+ DESC="Hadoop tasktracker daemon"
35
+
36
+ test -x $DAEMON_SCRIPT || exit 0
37
+
38
+ LOGDIR=$HADOOP_LOG_DIR
39
+ PIDFILE=/var/run/hadoop/tasktracker.pid
40
+ DODTIME=3 # Time to wait for the server to die, in seconds
41
+ # If this value is set too low you might not
42
+ # let some servers to die gracefully and
43
+ # 'restart' will not work
44
+
45
+ get_running_pid() {
46
+ pid=$(ps axw -eo pid,command | tr 'A-Z' 'a-z' | grep org.apache.hadoop | grep tasktracker | grep java | awk '{print $1}')
47
+ }
48
+
49
+ running() {
50
+ get_running_pid
51
+ [ -z "$pid" ] && return 1
52
+ return 0
53
+ }
54
+
55
+ start() {
56
+ su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh start tasktracker"
57
+ }
58
+ stop() {
59
+ su -s /bin/sh hadoop -c "$HADOOP_HOME/bin/hadoop-daemon.sh stop tasktracker"
60
+ }
61
+
62
+
63
+ case "$1" in
64
+ start)
65
+ echo -n "Starting $DESC: "
66
+ start
67
+ if running ; then
68
+ echo "$NAME."
69
+ else
70
+ echo "ERROR."
71
+ fi
72
+ ;;
73
+ stop)
74
+ echo -n "Stopping $DESC: "
75
+ stop
76
+ if ! running ; then
77
+ echo 'ERROR'
78
+ else
79
+ echo "$NAME."
80
+ fi
81
+ ;;
82
+ force-stop)
83
+ echo -n "Forcefully stopping $DESC: "
84
+ get_running_pid
85
+ kill -9 $pid
86
+ if ! running ; then
87
+ echo "$NAME."
88
+ else
89
+ echo " ERROR."
90
+ fi
91
+ ;;
92
+ force-reload)
93
+ # check wether $DAEMON is running. If so, restart
94
+ running && $0 restart
95
+ ;;
96
+ restart)
97
+ echo -n "Restarting $DESC: "
98
+ stop
99
+ [ -n "$DODTIME" ] && sleep $DODTIME
100
+ $0 start
101
+ ;;
102
+ status)
103
+ echo -n "$NAME is "
104
+ if running ; then
105
+ echo "running"
106
+ else
107
+ echo "not running."
108
+ exit 1
109
+ fi
110
+ ;;
111
+ *)
112
+ N=/etc/init.d/$NAME
113
+ # echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2
114
+ echo "Usage: $N {start|stop|restart|force-reload|status|force-stop}" >&2
115
+ exit 1
116
+ ;;
117
+ esac
118
+
119
+ exit 0
@@ -0,0 +1,12 @@
1
+ # /etc/jvm
2
+ #
3
+ # This file defines the default system JVM search order. Each
4
+ # JVM should list their JAVA_HOME compatible directory in this file.
5
+ # The default system JVM is the first one available from top to
6
+ # bottom.
7
+
8
+ /usr/lib/jvm/java-6-sun
9
+ /usr/lib/jvm/java-gcj
10
+ /usr/lib/jvm/ia32-java-1.5.0-sun
11
+ /usr/lib/jvm/java-1.5.0-sun
12
+ /usr
@@ -0,0 +1,94 @@
1
+ # Define some default values that can be overridden by system properties
2
+ hadoop.root.logger=INFO,console
3
+ hadoop.log.dir=.
4
+ hadoop.log.file=hadoop.log
5
+
6
+ # Define the root logger to the system property "hadoop.root.logger".
7
+ log4j.rootLogger=${hadoop.root.logger}, EventCounter
8
+
9
+ # Logging Threshold
10
+ log4j.threshhold=ALL
11
+
12
+ #
13
+ # Daily Rolling File Appender
14
+ #
15
+
16
+ log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
17
+ log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
18
+
19
+ # Rollver at midnight
20
+ log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
21
+
22
+ # 30-day backup
23
+ #log4j.appender.DRFA.MaxBackupIndex=30
24
+ log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
25
+
26
+ # Pattern format: Date LogLevel LoggerName LogMessage
27
+ log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
28
+ # Debugging Pattern format
29
+ #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
30
+
31
+
32
+ #
33
+ # console
34
+ # Add "console" to rootlogger above if you want to use this
35
+ #
36
+
37
+ log4j.appender.console=org.apache.log4j.ConsoleAppender
38
+ log4j.appender.console.target=System.err
39
+ log4j.appender.console.layout=org.apache.log4j.PatternLayout
40
+ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
41
+
42
+ #
43
+ # TaskLog Appender
44
+ #
45
+
46
+ #Default values
47
+ hadoop.tasklog.taskid=null
48
+ hadoop.tasklog.noKeepSplits=4
49
+ hadoop.tasklog.totalLogFileSize=100
50
+ hadoop.tasklog.purgeLogSplits=true
51
+ hadoop.tasklog.logsRetainHours=12
52
+
53
+ log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
54
+ log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
55
+ log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
56
+
57
+ log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
58
+ log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
59
+
60
+ #
61
+ # Rolling File Appender
62
+ #
63
+
64
+ #log4j.appender.RFA=org.apache.log4j.RollingFileAppender
65
+ #log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
66
+
67
+ # Logfile size and and 30-day backups
68
+ #log4j.appender.RFA.MaxFileSize=1MB
69
+ #log4j.appender.RFA.MaxBackupIndex=30
70
+
71
+ #log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
72
+ #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
73
+ #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
74
+
75
+ #
76
+ # FSNamesystem Audit logging
77
+ # All audit events are logged at INFO level
78
+ #
79
+ log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
80
+
81
+ # Custom Logging levels
82
+
83
+ log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
84
+ log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
85
+ #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
86
+
87
+ # Jets3t library
88
+ log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
89
+
90
+ #
91
+ # Event Counter Appender
92
+ # Sends counts of logging messages at different severity levels to Hadoop Metrics.
93
+ #
94
+ log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
@@ -0,0 +1,70 @@
1
+ <?xml version="1.0"?>
2
+ <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
+
4
+ <!-- Put site-specific property overrides in this file. -->
5
+
6
+ <configuration>
7
+
8
+ <property>
9
+ <name>mapred.job.tracker</name>
10
+ <value><%= @node[:poolparty][:current_master] %>:54311</value>
11
+ <description>The host and port that the MapReduce job tracker runs
12
+ at. If "local", then jobs are run in-process as a single map
13
+ and reduce task.
14
+ </description>
15
+ </property>
16
+
17
+
18
+ <property>
19
+ <name>mapred.task.tracker.report.address</name>
20
+ <value><%= @node[:poolparty][:hadoop_this_nodes_ip] %>:0</value>
21
+ <description>The interface and port that task tracker server listens on.
22
+ Since it is only connected to by the tasks, it uses the local interface.
23
+ EXPERT ONLY. Should only be changed if your host does not have the loopback
24
+ interface.</description>
25
+ </property>
26
+
27
+ <property>
28
+ <name>mapred.local.dir</name>
29
+ <value><%= @node[:poolparty][:hadoop_data_dir] %>/mapred/local</value>
30
+ </property>
31
+
32
+ <property>
33
+ <name>mapred.system.dir</name>
34
+ <value><%= @node[:poolparty][:hadoop_data_dir] %>/mapred/system</value>
35
+ </property>
36
+
37
+ <property>
38
+ <name>mapred.tasktracker.dns.interface</name>
39
+ <value>eth0</value>
40
+ </property>
41
+
42
+ <property>
43
+ <name>mapred.job.tracker.persist.jobstatus.active</name>
44
+ <value>true</value>
45
+ </property>
46
+
47
+
48
+
49
+
50
+
51
+ <property>
52
+ <name>mapred.child.java.opts</name>
53
+ <value>-Xmx200m -Djava.net.preferIPv4Stack=true</value>
54
+ <description>Java opts for the task tracker child processes.
55
+ The following symbol, if present, will be interpolated: @taskid@ is replaced
56
+ by current TaskID. Any other occurrences of '@' will go unchanged.
57
+ For example, to enable verbose gc logging to a file named for the taskid in
58
+ /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
59
+ -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
60
+
61
+ TaskManager.VMFlags=-Djava.net.preferIPv6Addresses=true -Djava.net.preferIPv4Stack=false
62
+
63
+ The configuration variable mapred.child.ulimit can be used to control the
64
+ maximum virtual memory of the child processes.
65
+ </description>
66
+ </property>
67
+
68
+
69
+
70
+ </configuration>
@@ -0,0 +1,118 @@
1
+ =begin rdoc
2
+ In
3
+ =end
4
+
5
+ module PoolParty
6
+ module Plugin
7
+ class Hive < Plugin
8
+ def before_load(o={}, &block)
9
+ do_once do
10
+ # install_from_bin
11
+ install_from_src
12
+ set_environment_variables
13
+ create_hdfs_directories
14
+ end
15
+ end
16
+
17
+ def install_from_bin
18
+ has_exec "wget #{hive_dist} -O /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev.tar.gz",
19
+ :not_if => "test -e /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev.tar.gz"
20
+ has_exec "cd /usr/local/src && tar -xvvf /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev.tar.gz",
21
+ :not_if => "test -e #{hive_home}"
22
+ has_exec "mv /usr/local/src/hive-0.3.0-hadoop-0.19.0-dev #{hive_home}",
23
+ :not_if => "test -e #{hive_home}"
24
+ end
25
+
26
+ # doesn't really work
27
+ def install_from_src
28
+ install_dependent_packages
29
+ download_and_build_src
30
+ end
31
+
32
+ def install_dependent_packages
33
+ has_package :name => "subversion"
34
+ has_package :name => "ant"
35
+ end
36
+
37
+ def download_and_build_src
38
+ has_exec "svn co #{hive_repo} #{src_dir} -r#{hive_revision}",
39
+ :not_if => "test -e #{src_dir}/build.xml"
40
+ has_exec "cd #{src_dir} && wget --no-check-certificate https://issues.apache.org/jira/secure/attachment/12409779/hive-487.3.patch",
41
+ :not_if => "test -e #{src_dir}/hive-487.3.patch"
42
+ has_exec "cd #{src_dir} && patch -p0 < hive-487.3.patch && mv hive-487.3.patch hive-487.3.patch.applied",
43
+ :not_if => "test -e #{src_dir}/hive-487.3.patch.applied"
44
+ has_exec "cd #{src_dir} && ant -Dhadoop.version=\\\"#{hadoop_version}\\\" package",
45
+ :not_if => "test -e #{hive_home}/README.txt"
46
+ has_exec "mv #{src_dir}/build/dist #{hive_home}",
47
+ :not_if => "test -e #{hive_home}"
48
+ end
49
+
50
+ # todo, pull from parent
51
+ def set_environment_variables
52
+ has_file :name => "/root/.hadoop-etc-env.sh", :content => <<-EOF
53
+ export HADOOP_HOME=#{hadoop_home}
54
+ export HADOOP=$HADOOP_HOME/bin/hadoop
55
+ export HIVE_HOME=#{hive_home}
56
+ export PATH=$HADOOP_HOME/bin:$HIVE_HOME/bin:$PATH
57
+ EOF
58
+ has_line_in_file :file => "/root/.profile", :line => "source /root/.hadoop-etc-env.sh"
59
+ end
60
+
61
+ def create_hdfs_directories
62
+ has_exec "#{hadoop_home}/bin/hadoop fs -mkdir /tmp",
63
+ :not_if => "#{hadoop_home}/bin/hadoop fs -ls /tmp",
64
+ :only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
65
+
66
+ has_exec "#{hadoop_home}/bin/hadoop fs -mkdir /user/hive/warehouse",
67
+ :not_if => "#{hadoop_home}/bin/hadoop fs -ls /user/hive/warehouse",
68
+ :only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
69
+
70
+ has_exec "#{hadoop_home}/bin/hadoop fs -chmod g+w /tmp",
71
+ :not_if => "#{hadoop_home}/bin/hadoop fs -ls /tmp", # todo, check perms
72
+ :only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
73
+
74
+ has_exec "#{hadoop_home}/bin/hadoop fs -chmod g+w /user/hive/warehouse",
75
+ :not_if => "#{hadoop_home}/bin/hadoop fs -ls /user/hive/warehouse",
76
+ :only_if => "test -e #{hadoop_data_dir}/dfs && (ps aux | grep org.apache.hadoop.hdfs.server.namenode.NameNode | grep -v grep)"
77
+ end
78
+
79
+ private
80
+
81
+ def hive_dist
82
+ "http://www.apache.org/dist/hadoop/hive/hive-0.3.0/hive-0.3.0-hadoop-0.19.0-dev.tar.gz"
83
+ end
84
+
85
+ def src_dir
86
+ "/usr/local/src/hive"
87
+ end
88
+
89
+ def hive_home
90
+ "/usr/local/hive"
91
+ end
92
+
93
+ def hive_repo
94
+ # "http://svn.apache.org/repos/asf/hadoop/hive/tags/release-0.3.0/"
95
+ "http://svn.apache.org/repos/asf/hadoop/hive/trunk"
96
+ end
97
+
98
+ def hive_revision
99
+ "781069"
100
+ end
101
+
102
+ ### TODO the values below should pull from parent e.g. the hadoop plugin
103
+ def hadoop_home
104
+ "/usr/local/hadoop"
105
+ end
106
+
107
+ def hadoop_data_dir
108
+ "/mnt/hadoop-data"
109
+ end
110
+
111
+ def hadoop_version
112
+ "0.20.0"
113
+ end
114
+
115
+ end
116
+ end
117
+ end
118
+