auser-poolparty-extensions 0.0.7 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +15 -0
- data/Rakefile +2 -2
- data/VERSION.yml +2 -2
- data/lib/extensions/bash_alias.rb +3 -0
- data/lib/extensions/convenience_helpers.rb +89 -0
- data/lib/extensions/development_gem_package.rb +3 -2
- data/lib/extensions/ganglia/ganglia.rb +219 -0
- data/lib/extensions/ganglia/templates/bin/gmetad.erb +38 -0
- data/lib/extensions/ganglia/templates/bin/gmond.erb +38 -0
- data/lib/extensions/ganglia/templates/ganglia-web-conf.php.erb +191 -0
- data/lib/extensions/ganglia/templates/gmetad.conf.erb +116 -0
- data/lib/extensions/ganglia/templates/gmond.conf.erb +426 -0
- data/lib/extensions/ganglia/templates/hadoop-metrics.properties.erb +65 -0
- data/lib/extensions/hadoop/hadoop.rb +427 -0
- data/lib/extensions/hadoop/templates/core-site.xml.erb +62 -0
- data/lib/extensions/hadoop/templates/hadoop-env.sh +4 -0
- data/lib/extensions/hadoop/templates/hadoop-site.xml.erb +9 -0
- data/lib/extensions/hadoop/templates/hadoop_hosts.erb +0 -0
- data/lib/extensions/hadoop/templates/hdfs-site.xml.erb +48 -0
- data/lib/extensions/hadoop/templates/init.d/hadoop-datanode +125 -0
- data/lib/extensions/hadoop/templates/init.d/hadoop-jobtracker +125 -0
- data/lib/extensions/hadoop/templates/init.d/hadoop-namenode +124 -0
- data/lib/extensions/hadoop/templates/init.d/hadoop-secondarynamenode +124 -0
- data/lib/extensions/hadoop/templates/init.d/hadoop-tasktracker +124 -0
- data/lib/extensions/hadoop/templates/jvm.conf +12 -0
- data/lib/extensions/hadoop/templates/log4j.properties.erb +94 -0
- data/lib/extensions/hadoop/templates/mapred-site.xml.erb +70 -0
- data/lib/extensions/hive/hive.rb +138 -0
- data/lib/poolparty-extensions.rb +5 -2
- metadata +28 -4
@@ -0,0 +1,65 @@
|
|
1
|
+
|
2
|
+
# Configuration of the "dfs" context for null
|
3
|
+
dfs.class=org.apache.hadoop.metrics.spi.NullContext
|
4
|
+
|
5
|
+
# Configuration of the "dfs" context for file
|
6
|
+
#dfs.class=org.apache.hadoop.metrics.file.FileContext
|
7
|
+
#dfs.period=10
|
8
|
+
#dfs.fileName=/tmp/dfsmetrics.log
|
9
|
+
|
10
|
+
# Configuration of the "dfs" context for ganglia
|
11
|
+
# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
|
12
|
+
# dfs.period=10
|
13
|
+
# dfs.servers=localhost:8649
|
14
|
+
|
15
|
+
|
16
|
+
# Configuration of the "mapred" context for null
|
17
|
+
mapred.class=org.apache.hadoop.metrics.spi.NullContext
|
18
|
+
|
19
|
+
# Configuration of the "mapred" context for file
|
20
|
+
#mapred.class=org.apache.hadoop.metrics.file.FileContext
|
21
|
+
#mapred.period=10
|
22
|
+
#mapred.fileName=/tmp/mrmetrics.log
|
23
|
+
|
24
|
+
# Configuration of the "mapred" context for ganglia
|
25
|
+
# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
|
26
|
+
# mapred.period=10
|
27
|
+
# mapred.servers=localhost:8649
|
28
|
+
|
29
|
+
|
30
|
+
# Configuration of the "jvm" context for null
|
31
|
+
jvm.class=org.apache.hadoop.metrics.spi.NullContext
|
32
|
+
|
33
|
+
# Configuration of the "jvm" context for file
|
34
|
+
#jvm.class=org.apache.hadoop.metrics.file.FileContext
|
35
|
+
#jvm.period=10
|
36
|
+
#jvm.fileName=/tmp/jvmmetrics.log
|
37
|
+
|
38
|
+
# Configuration of the "jvm" context for ganglia
|
39
|
+
# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
|
40
|
+
# jvm.period=10
|
41
|
+
# jvm.servers=localhost:8649
|
42
|
+
#
|
43
|
+
<% if @node[:poolparty][:hadoop_ganglia_monitoring_enabled] %>
|
44
|
+
|
45
|
+
# The value in the udp_send_channel you use in your /etc/gmond.conf (look for the line which says mcast_join=<IP ADDRESS>).
|
46
|
+
|
47
|
+
dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
|
48
|
+
dfs.period=10
|
49
|
+
dfs.servers=<%= @node[:poolparty][:ganglia_first_node_in_clusters_ip] %>:8649
|
50
|
+
|
51
|
+
mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
|
52
|
+
mapred.period=10
|
53
|
+
mapred.servers=<%= @node[:poolparty][:ganglia_first_node_in_clusters_ip] %>:8649
|
54
|
+
|
55
|
+
jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
|
56
|
+
jvm.period=10
|
57
|
+
jvm.servers=<%= @node[:poolparty][:ganglia_first_node_in_clusters_ip] %>:8649
|
58
|
+
|
59
|
+
rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
|
60
|
+
rpc.period=10
|
61
|
+
rpc.servers=<%= @node[:poolparty][:ganglia_first_node_in_clusters_ip] %>:8649
|
62
|
+
|
63
|
+
<% end %>
|
64
|
+
|
65
|
+
# vim: ft=conf
|
@@ -0,0 +1,427 @@
|
|
1
|
+
=begin rdoc
|
2
|
+
|
3
|
+
== Overview
|
4
|
+
Install a hadoop cluster
|
5
|
+
|
6
|
+
== Requirements
|
7
|
+
You'll need apache and php enabled in your clouds.rb. For example:
|
8
|
+
|
9
|
+
apache do
|
10
|
+
enable_php5
|
11
|
+
end
|
12
|
+
|
13
|
+
== Bugs
|
14
|
+
This assumes your clouds are named "hadoop_master" and "hadoop_slave". That sucks. TODO: pass these in as variables
|
15
|
+
|
16
|
+
== References
|
17
|
+
=end
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
module PoolParty
|
22
|
+
module Plugin
|
23
|
+
class Hadoop < Plugin
|
24
|
+
def before_load(o={}, &block)
|
25
|
+
do_once do
|
26
|
+
install_jdk
|
27
|
+
install_dependencies
|
28
|
+
# add_users_and_groups
|
29
|
+
create_keys
|
30
|
+
connect_keys
|
31
|
+
build
|
32
|
+
configure
|
33
|
+
format_hdfs
|
34
|
+
create_aliases
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def perform_just_in_time_operations
|
39
|
+
create_reference_hosts
|
40
|
+
create_ssh_configs
|
41
|
+
create_master_and_slaves_files
|
42
|
+
end
|
43
|
+
|
44
|
+
def install_jdk
|
45
|
+
# accept the sun license agreements. see: http://www.davidpashley.com/blog/debian/java-license
|
46
|
+
has_exec "echo sun-java6-jdk shared/accepted-sun-dlj-v1-1 select true | /usr/bin/debconf-set-selections"
|
47
|
+
has_exec "echo sun-java6-jre shared/accepted-sun-dlj-v1-1 select true | /usr/bin/debconf-set-selections"
|
48
|
+
has_package(:name => "sun-java6-jdk")
|
49
|
+
has_file(:name => "/etc/jvm") do
|
50
|
+
mode 0644
|
51
|
+
template "jvm.conf"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def install_dependencies
|
56
|
+
has_package(:name => "dnsutils")
|
57
|
+
end
|
58
|
+
|
59
|
+
def add_users_and_groups
|
60
|
+
has_group "hadoop", :action => :create
|
61
|
+
has_user "hadoop", :gid => "hadoop"
|
62
|
+
has_directory "/home/hadoop", :owner => "hadoop", :mode => "755"
|
63
|
+
|
64
|
+
# TODO - ssh key code below needs to turn into these lines. those should become plugins
|
65
|
+
# has_ssh_key :user => "hadoop", :name => "hadoop_id_rsa", :create => true
|
66
|
+
# has_authorized_key :user => "hadoop", :name => "hadoop_id_rsa"
|
67
|
+
end
|
68
|
+
|
69
|
+
def create_keys
|
70
|
+
unless File.exists?(hadoop_id_rsa)
|
71
|
+
FileUtils.mkdir_p(cloud_keys_dir)
|
72
|
+
cmd = "ssh-keygen -t rsa -N '' -f #{hadoop_id_rsa}" # todo, make into variables
|
73
|
+
puts cmd
|
74
|
+
`#{cmd}`
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# everything below should become methods and/or plugins
|
79
|
+
def connect_keys
|
80
|
+
# has_exec "ssh-keygen -t rsa -N '' -f /home/hadoop/.ssh/id_rsa", :user => "hadoop", :not_if => "test -e /home/hadoop/.ssh/id_rsa"
|
81
|
+
|
82
|
+
# so annoying, chef/rsync/something doesn't copy over dotfiles, so upload it as non-dot
|
83
|
+
has_directory :name => "#{home_dir}/ssh"
|
84
|
+
has_directory :name => "#{home_dir}/.ssh"
|
85
|
+
has_file :name => "#{home_dir}/ssh/#{hadoop_id_rsa_base}", :content => open(hadoop_id_rsa).read
|
86
|
+
has_exec "mv #{home_dir}/ssh/hadoop_id_rsa #{home_dir}/.ssh/#{hadoop_id_rsa_base}"
|
87
|
+
has_exec "chmod 600 #{home_dir}/.ssh/#{hadoop_id_rsa_base}"
|
88
|
+
has_exec "chmod 700 #{home_dir}/.ssh"
|
89
|
+
has_exec "rm -rf #{home_dir}/ssh"
|
90
|
+
|
91
|
+
# setup authorized keys
|
92
|
+
has_exec "touch #{home_dir}/.ssh/authorized_keys"
|
93
|
+
has_exec "chmod 644 #{home_dir}/.ssh/authorized_keys"
|
94
|
+
has_exec "chown -R #{user} #{home_dir}/.ssh"
|
95
|
+
has_line_in_file :file => "#{home_dir}/.ssh/authorized_keys", :line => File.read("#{hadoop_id_rsa}.pub")
|
96
|
+
end
|
97
|
+
|
98
|
+
def create_reference_hosts
|
99
|
+
each_node_with_type_and_index do |n, node_type, i|
|
100
|
+
if n[:private_dns_name]
|
101
|
+
has_exec "ghost modify #{node_type}#{i} \`dig +short #{n[:private_dns_name]}\`"
|
102
|
+
else
|
103
|
+
has_exec "ghost modify #{node_type}#{i} #{n[:ip]}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def create_ssh_configs
|
109
|
+
ssh_config = ""
|
110
|
+
each_node_with_type_and_index do |n, node_type, i|
|
111
|
+
has_exec "ssh -o 'StrictHostKeyChecking no' -i #{home_dir}/.ssh/#{hadoop_id_rsa_base} #{node_type}#{i} echo",
|
112
|
+
:user => user, # verify the host key
|
113
|
+
:ignore_failure => true,
|
114
|
+
:only_if => "grep #{node_type}#{i} /etc/hosts"
|
115
|
+
end
|
116
|
+
|
117
|
+
ssh_config << <<EOF
|
118
|
+
Host *
|
119
|
+
IdentityFile #{home_dir}/.ssh/#{hadoop_id_rsa_base}
|
120
|
+
EOF
|
121
|
+
|
122
|
+
# has_file("#{home_dir}/.ssh/known_hosts", :template => "known_hosts.erb", :owner => user, :group => user, :mode => "600") # clear out the known hosts file
|
123
|
+
|
124
|
+
has_exec "ssh -o 'StrictHostKeyChecking no' -i #{home_dir}/.ssh/#{hadoop_id_rsa_base} localhost echo", :user => user # verify the host key
|
125
|
+
|
126
|
+
has_file("#{home_dir}/ssh_config", :content => ssh_config)
|
127
|
+
has_exec "mv #{home_dir}/ssh_config #{home_dir}/.ssh/config"
|
128
|
+
has_exec "chmod 600 #{home_dir}/.ssh/config"
|
129
|
+
has_exec "chown #{user}:#{group} #{home_dir}/.ssh/config"
|
130
|
+
end
|
131
|
+
|
132
|
+
def build
|
133
|
+
has_directory "/usr/local/src"
|
134
|
+
has_exec "wget http://www.gossipcheck.com/mirrors/apache/hadoop/core/hadoop-0.20.0/hadoop-0.20.0.tar.gz -O /usr/local/src/hadoop-0.20.0.tar.gz",
|
135
|
+
:not_if => "test -e /usr/local/src/hadoop-0.20.0.tar.gz"
|
136
|
+
has_exec "cd /usr/local/src && tar -xzf hadoop-0.20.0.tar.gz",
|
137
|
+
:not_if => "test -e /usr/local/src/hadoop"
|
138
|
+
has_exec "mv /usr/local/src/hadoop-0.20.0 /usr/local/src/hadoop",
|
139
|
+
:not_if => "test -e /usr/local/src/hadoop/hadoop-0.20.0-core.jar"
|
140
|
+
has_exec "chown -R #{user}:#{group} /usr/local/src/hadoop",
|
141
|
+
:not_if => "test -e #{hadoop_install_dir}"
|
142
|
+
build_src
|
143
|
+
has_exec "mv /usr/local/src/hadoop #{hadoop_install_dir}",
|
144
|
+
:not_if => "test -e #{hadoop_install_dir}"
|
145
|
+
end
|
146
|
+
|
147
|
+
def build_src
|
148
|
+
has_directory "/usr/local/src/hadoop/orig"
|
149
|
+
has_exec "cp /usr/local/src/hadoop/hadoop-0.20.0-core.jar /usr/local/src/hadoop/orig/hadoop-0.20.0-core.jar",
|
150
|
+
:not_if => "test -e /usr/local/src/hadoop/orig/hadoop-0.20.0-core.jar"
|
151
|
+
|
152
|
+
has_package "ant"
|
153
|
+
has_package "zlib1g-dev"
|
154
|
+
has_directory "/usr/local/src/hadoop/patches"
|
155
|
+
|
156
|
+
# get whatever patches you want here
|
157
|
+
has_wget("https://issues.apache.org/jira/secure/attachment/12407207/HADOOP-4675-v7.patch", "/usr/local/src/hadoop/patches/0001-ganglia31-HADOOP-4675-v7.patch")
|
158
|
+
|
159
|
+
# apply them
|
160
|
+
has_exec "cd /usr/local/src/hadoop && for PATCH in `find patches -type f | grep -v applied`; do (patch -p0 < ${PATCH}); mv ${PATCH} ${PATCH}.applied ; done"
|
161
|
+
|
162
|
+
# probably need to restart hadoop somewhere here
|
163
|
+
|
164
|
+
has_exec :name => "upgrade-core-hadoop-jar",
|
165
|
+
# :command => "cp /usr/local/src/hadoop/build/hadoop-0.20.1-dev-core.jar /usr/local/src/hadoop/hadoop-0.20.0-core.jar",
|
166
|
+
:command => "cp -f /usr/local/src/hadoop/build/hadoop-0.20.1-dev-core.jar #{hadoop_install_dir}/hadoop-0.20.0-core.jar",
|
167
|
+
:action => :nothing
|
168
|
+
|
169
|
+
has_exec "export JAVA_HOME=/usr/lib/jvm/java-6-sun && cd /usr/local/src/hadoop && ant jar" do
|
170
|
+
not_if "test -e /usr/local/src/hadoop/build/hadoop-0.20.1-dev-core.jar"
|
171
|
+
notifies get_exec("upgrade-core-hadoop-jar"), :run
|
172
|
+
end
|
173
|
+
|
174
|
+
def hadoop_install_dir
|
175
|
+
"/usr/local/hadoop"
|
176
|
+
end
|
177
|
+
|
178
|
+
def set_current_master(master_hostname="master0", port="54310")
|
179
|
+
do_once do
|
180
|
+
has_variable :name => "current_master", :value => master_hostname # todo, could eventually be made more dynamic here
|
181
|
+
has_variable :name => "hadoop_fs_default_port", :value => port # todo, could eventually be made more dynamic here
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def configure
|
186
|
+
has_gem_package("bjeanes-ghost")
|
187
|
+
|
188
|
+
has_file(:name => hadoop_install_dir/"conf/hadoop-env.sh") do
|
189
|
+
mode 0644
|
190
|
+
template "hadoop-env.sh"
|
191
|
+
end
|
192
|
+
|
193
|
+
|
194
|
+
has_variable "block_replication_level", :value => 5 # this isn't the number of nodes, this is the block replication level
|
195
|
+
# this should be able to be configured in the hadoop config
|
196
|
+
|
197
|
+
has_directory hadoop_data_dir, :owner => user, :mode => "755"
|
198
|
+
has_exec "chgrp -R #{group} #{hadoop_data_dir}"
|
199
|
+
|
200
|
+
%w{logs name data mapred temp}.each do |folder|
|
201
|
+
has_directory hadoop_data_dir/folder, :owner => user, :mode => "755"
|
202
|
+
end
|
203
|
+
has_directory hadoop_data_dir/:temp/:dfs/:data, :owner => user, :mode => "755"
|
204
|
+
|
205
|
+
%w{local system temp}.each do |folder|
|
206
|
+
has_directory hadoop_data_dir/:temp/:mapred/folder, :owner => user, :mode => "755"
|
207
|
+
end
|
208
|
+
|
209
|
+
has_variable "hadoop_data_dir", :value => hadoop_data_dir
|
210
|
+
has_variable "hadoop_mapred_dir", :value => hadoop_data_dir/:mapred
|
211
|
+
|
212
|
+
has_variable("hadoop_this_nodes_ip", :value => lambda{ %Q{%x[curl http://169.254.169.254/latest/meta-data/local-ipv4]}})
|
213
|
+
|
214
|
+
%w{core hdfs mapred}.each do |config|
|
215
|
+
has_file(:name => hadoop_install_dir/"conf/#{config}-site.xml") do
|
216
|
+
mode 0644
|
217
|
+
template "#{config}-site.xml.erb"
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
has_file(:name => hadoop_install_dir/"conf/log4j.properties") do
|
222
|
+
mode 0644
|
223
|
+
template "log4j.properties.erb"
|
224
|
+
end
|
225
|
+
|
226
|
+
end
|
227
|
+
|
228
|
+
def number_of_running_nodes_in_pool
|
229
|
+
# clouds.keys.inject(0) { |sum,cloud_name| sum = sum + clouds[cloud_name].nodes(:status => 'running').size; sum }
|
230
|
+
end
|
231
|
+
|
232
|
+
def configure_master
|
233
|
+
# create_master_and_slaves_files
|
234
|
+
set_current_master
|
235
|
+
|
236
|
+
%w{datanode jobtracker namenode secondarynamenode}.each do |hadoop_role|
|
237
|
+
has_hadoop_service(hadoop_role)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
def configure_slave
|
242
|
+
set_current_master
|
243
|
+
|
244
|
+
%w{datanode tasktracker}.each do |hadoop_role|
|
245
|
+
self.send("configure_#{hadoop_role}")
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
def configure_tasktracker
|
250
|
+
set_current_master
|
251
|
+
has_hadoop_service("tasktracker")
|
252
|
+
end
|
253
|
+
|
254
|
+
def configure_datanode
|
255
|
+
set_current_master
|
256
|
+
has_hadoop_service("datanode")
|
257
|
+
end
|
258
|
+
|
259
|
+
def has_hadoop_service(hadoop_role)
|
260
|
+
has_file(:name => "/etc/init.d/hadoop-#{hadoop_role}") do
|
261
|
+
mode 0755
|
262
|
+
template "init.d/hadoop-#{hadoop_role}"
|
263
|
+
end
|
264
|
+
has_service "hadoop-#{hadoop_role}", :enabled => true, :running => true, :supports => [:restart]
|
265
|
+
end
|
266
|
+
|
267
|
+
def format_hdfs
|
268
|
+
has_directory hadoop_data_dir, :mode => "770"
|
269
|
+
has_exec "chown -R #{user}:#{group} #{hadoop_data_dir}"
|
270
|
+
|
271
|
+
has_exec "#{hadoop_install_dir}/bin/hadoop namenode -format",
|
272
|
+
# :not_if => "test -e #{hadoop_data_dir}/hadoop-hadoop/dfs",
|
273
|
+
:not_if => "test -e #{hadoop_data_dir}/dfs/name", # this line depends on if you have user-based data directories in core-site.xml
|
274
|
+
:user => user
|
275
|
+
end
|
276
|
+
|
277
|
+
# stuff for examples
|
278
|
+
|
279
|
+
def prep_example_job
|
280
|
+
download_sample_data
|
281
|
+
end
|
282
|
+
|
283
|
+
def run_example_job
|
284
|
+
start_hadoop
|
285
|
+
copy_sample_data_to_hdfs
|
286
|
+
start_the_job
|
287
|
+
end
|
288
|
+
|
289
|
+
def start_hadoop
|
290
|
+
has_exec hadoop_install_dir/"bin/start-all.sh",
|
291
|
+
:user => user
|
292
|
+
end
|
293
|
+
|
294
|
+
def download_sample_data
|
295
|
+
has_directory "/tmp/gutenberg", :mode => "770", :owner => user
|
296
|
+
# todo, create has_wget
|
297
|
+
has_exec "wget http://www.gutenberg.org/files/20417/20417.txt -O /tmp/gutenberg/outline-of-science.txt",
|
298
|
+
:not_if => "test -e /tmp/gutenberg/outline-of-science.txt"
|
299
|
+
has_exec "wget http://www.gutenberg.org/dirs/etext04/7ldvc10.txt -O /tmp/gutenberg/7ldvc10.txt",
|
300
|
+
:not_if => "test -e /tmp/gutenberg/7ldvc10.txt"
|
301
|
+
has_exec "wget http://www.gutenberg.org/files/4300/4300.txt -O /tmp/gutenberg/ulysses.txt",
|
302
|
+
:not_if => "test -e /tmp/gutenberg/ulysses.txt"
|
303
|
+
has_exec "chown -R #{user}:#{group} /tmp/gutenberg"
|
304
|
+
end
|
305
|
+
|
306
|
+
def copy_sample_data_to_hdfs
|
307
|
+
has_exec "#{hadoop_install_dir}/bin/hadoop dfs -rmr gutenberg", :user => user,
|
308
|
+
:only_if => "sudo -H -u #{user} #{hadoop_install_dir}/bin/hadoop dfs -ls gutenberg"
|
309
|
+
has_exec "#{hadoop_install_dir}/bin/hadoop dfs -rmr gutenberg-output", :user => user,
|
310
|
+
:only_if => "sudo -H -u #{user} #{hadoop_install_dir}/bin/hadoop dfs -ls gutenberg-output"
|
311
|
+
has_exec "#{hadoop_install_dir}/bin/hadoop dfs -copyFromLocal /tmp/gutenberg gutenberg",
|
312
|
+
:not_if => "sudo -H -u #{user} #{hadoop_install_dir}/bin/hadoop dfs -ls gutenberg | grep ulysses",
|
313
|
+
:user => user
|
314
|
+
end
|
315
|
+
|
316
|
+
def start_the_job
|
317
|
+
has_exec "#{hadoop_install_dir}/bin/hadoop jar #{hadoop_install_dir}/hadoop-0.20.0-examples.jar wordcount gutenberg gutenberg-output",
|
318
|
+
:user => user
|
319
|
+
end
|
320
|
+
|
321
|
+
def create_master_and_slaves_files
|
322
|
+
masters_file = ""
|
323
|
+
slaves_file = ""
|
324
|
+
|
325
|
+
master_nodes.each_with_index do |n,i|
|
326
|
+
masters_file << "master#{i}\n"
|
327
|
+
end
|
328
|
+
|
329
|
+
slave_nodes.each_with_index do |n, i|
|
330
|
+
slaves_file << "slave#{i}\n"
|
331
|
+
end
|
332
|
+
|
333
|
+
# dont need tasktracker nodes here b/c this is for the dfs
|
334
|
+
|
335
|
+
has_file(hadoop_install_dir/:conf/:masters, :content => masters_file)
|
336
|
+
has_file(hadoop_install_dir/:conf/:slaves, :content => slaves_file)
|
337
|
+
end
|
338
|
+
|
339
|
+
def create_aliases
|
340
|
+
has_bash_alias :name => "cd-hadoop", :value => "pushd /usr/local/hadoop"
|
341
|
+
end
|
342
|
+
|
343
|
+
def create_client_user(username)
|
344
|
+
has_user(username)
|
345
|
+
has_directory("/home/#{username}/.ssh", :mode => "700", :owner => username, :group => username)
|
346
|
+
has_exec "#{hadoop_install_dir}/bin/hadoop fs -mkdir /user/#{username}", :user => user,
|
347
|
+
:only_if => "sudo -H -u #{user} #{hadoop_install_dir}/bin/hadoop fs -ls /user",
|
348
|
+
:not_if => "sudo -H -u #{user} #{hadoop_install_dir}/bin/hadoop fs -ls /user/#{username}"
|
349
|
+
|
350
|
+
has_exec "#{hadoop_install_dir}/bin/hadoop fs -chown #{username} /user/#{username}", :user => user,
|
351
|
+
:only_if => "sudo -H -u #{user} #{hadoop_install_dir}/bin/hadoop fs -ls /user/#{username}"
|
352
|
+
end
|
353
|
+
|
354
|
+
private
|
355
|
+
def cloud_keys_dir
|
356
|
+
File.dirname(pool_specfile)/:keys
|
357
|
+
end
|
358
|
+
|
359
|
+
def hadoop_id_rsa
|
360
|
+
"#{cloud_keys_dir}/#{hadoop_id_rsa_base}"
|
361
|
+
end
|
362
|
+
|
363
|
+
def hadoop_id_rsa_base
|
364
|
+
"hadoop_id_rsa"
|
365
|
+
end
|
366
|
+
|
367
|
+
def hadoop_data_dir
|
368
|
+
"/mnt/hadoop-data"
|
369
|
+
end
|
370
|
+
|
371
|
+
def home_dir
|
372
|
+
"/root"
|
373
|
+
# or
|
374
|
+
# "/home/hadoop"
|
375
|
+
end
|
376
|
+
|
377
|
+
def user
|
378
|
+
"root"
|
379
|
+
# or
|
380
|
+
# hadoop
|
381
|
+
end
|
382
|
+
|
383
|
+
def group
|
384
|
+
"root"
|
385
|
+
# or
|
386
|
+
# hadoop
|
387
|
+
end
|
388
|
+
|
389
|
+
def my_line_in_file(file, line)
|
390
|
+
has_exec "line_in_#{file}_#{line.safe_quote}" do
|
391
|
+
command "grep -q \'#{line.safe_quote}\' #{file} || echo \'#{line.safe_quote}\' >> #{file}"
|
392
|
+
not_if "grep -q \'#{line.safe_quote}\' #{file}"
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
def master_nodes
|
397
|
+
clouds[:hadoop_master].andand.nodes(:status => 'running') || []
|
398
|
+
end
|
399
|
+
|
400
|
+
def slave_nodes
|
401
|
+
clouds[:hadoop_slave].andand.nodes(:status => 'running') || []
|
402
|
+
end
|
403
|
+
|
404
|
+
def tasktracker_nodes
|
405
|
+
clouds[:hadoop_tasktracker].andand.nodes(:status => 'running') || []
|
406
|
+
end
|
407
|
+
|
408
|
+
def node_types
|
409
|
+
%w{master slave tasktracker}
|
410
|
+
end
|
411
|
+
|
412
|
+
# for each node type, yield all the running nodes with an index
|
413
|
+
def each_node_with_type_and_index(&block)
|
414
|
+
node_types.each do |node_type|
|
415
|
+
self.send("#{node_type}_nodes").each_with_index do |n, i|
|
416
|
+
block.call(n, node_type, i)
|
417
|
+
end
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
def has_wget(source_url, location)
|
422
|
+
has_exec "wget --no-check-certificate #{source_url} -O #{location}", :not_if => "test -e #{location}"
|
423
|
+
end
|
424
|
+
|
425
|
+
end
|
426
|
+
end
|
427
|
+
end
|