mobilize-hdfs 1.0.06 → 1.0.07
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +6 -6
- data/lib/mobilize-hdfs/handlers/hdfs.rb +11 -8
- data/lib/mobilize-hdfs/version.rb +1 -1
- data/mobilize-hdfs.gemspec +2 -2
- data/test/hdfs_job_rows.yml +2 -2
- data/test/mobilize-hdfs_test.rb +2 -2
- metadata +5 -5
data/README.md
CHANGED
@@ -228,16 +228,16 @@ same cluster as your first.
|
|
228
228
|
Meta
|
229
229
|
----
|
230
230
|
|
231
|
-
* Code: `git clone git://github.com/
|
232
|
-
* Home: <https://github.com/
|
233
|
-
* Bugs: <https://github.com/
|
231
|
+
* Code: `git clone git://github.com/dena/mobilize-hdfs.git`
|
232
|
+
* Home: <https://github.com/dena/mobilize-hdfs>
|
233
|
+
* Bugs: <https://github.com/dena/mobilize-hdfs/issues>
|
234
234
|
* Gems: <http://rubygems.org/gems/mobilize-hdfs>
|
235
235
|
|
236
236
|
<a name='section_Author'></a>
|
237
237
|
Author
|
238
238
|
------
|
239
239
|
|
240
|
-
Cassio Paes-Leme :: cpaesleme@
|
240
|
+
Cassio Paes-Leme :: cpaesleme@dena.com :: @cpaesleme
|
241
241
|
|
242
|
-
[mobilize-base]: https://github.com/
|
243
|
-
[mobilize-ssh]: https://github.com/
|
242
|
+
[mobilize-base]: https://github.com/dena/mobilize-base
|
243
|
+
[mobilize-ssh]: https://github.com/dena/mobilize-ssh
|
@@ -37,7 +37,7 @@ module Mobilize
|
|
37
37
|
cluster, cluster_path = Hdfs.resolve_path(path)
|
38
38
|
gateway_node = Hadoop.gateway_node(cluster)
|
39
39
|
#need to direct stderr to dev null since hdfs throws errors at being headed off
|
40
|
-
command = "((#{Hadoop.exec_path(cluster)} fs -fs '#{Hdfs.
|
40
|
+
command = "((#{Hadoop.exec_path(cluster)} fs -fs '#{Hdfs.root(cluster)}' -cat '#{cluster_path}'"
|
41
41
|
command += " | head -c #{Hadoop.read_limit}) > out.txt 2> /dev/null) && cat out.txt"
|
42
42
|
response = Ssh.run(gateway_node,command,user)
|
43
43
|
if response.length==Hadoop.read_limit
|
@@ -53,7 +53,8 @@ module Mobilize
|
|
53
53
|
elsif Hadoop.clusters.keys.include?(path.split("/").first)
|
54
54
|
return path.split("/").ie{|p| [p.first,"/#{p[1..-1].join("/")}"]}
|
55
55
|
else
|
56
|
-
|
56
|
+
#default cluster, slash on the front
|
57
|
+
return [Hadoop.output_cluster,"/#{path.to_s}"]
|
57
58
|
end
|
58
59
|
end
|
59
60
|
|
@@ -111,18 +112,21 @@ module Mobilize
|
|
111
112
|
source_path = params['source']
|
112
113
|
target_path = params['target']
|
113
114
|
user = params['user']
|
114
|
-
|
115
|
-
|
116
|
-
if source_cluster.nil?
|
117
|
-
#not hdfs
|
115
|
+
begin
|
116
|
+
#check for source in gsheet format
|
118
117
|
gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
|
119
118
|
#return blank response if there are no slots available
|
120
119
|
return nil unless gdrive_slot
|
121
120
|
source_dst = s.source_dsts(gdrive_slot).first
|
121
|
+
in_string = source_dst.read(user)
|
122
122
|
Gdrive.unslot_worker_by_path(stage_path)
|
123
|
-
|
123
|
+
rescue
|
124
|
+
#try hdfs
|
125
|
+
source_cluster, source_cluster_path = Hdfs.resolve_path(source_path)
|
124
126
|
source_path = "#{source_cluster}#{source_cluster_path}"
|
125
127
|
source_dst = Dataset.find_or_create_by_handler_and_path("hdfs",source_path)
|
128
|
+
in_string = source_dst.read(user)
|
129
|
+
raise "No data found at hdfs://#{source_path}" unless in_string.to_s.length>0
|
126
130
|
end
|
127
131
|
|
128
132
|
#determine cluster for target
|
@@ -137,7 +141,6 @@ module Mobilize
|
|
137
141
|
end
|
138
142
|
|
139
143
|
target_path = "#{target_cluster}#{target_cluster_path}"
|
140
|
-
in_string = source_dst.read(user)
|
141
144
|
out_string = Hdfs.write(target_path,in_string,user)
|
142
145
|
|
143
146
|
out_url = "hdfs://#{Hadoop.output_cluster}#{Hadoop.output_dir}hdfs/#{stage_path}/out"
|
data/mobilize-hdfs.gemspec
CHANGED
@@ -10,11 +10,11 @@ Gem::Specification.new do |gem|
|
|
10
10
|
gem.email = ["cpaesleme@ngmoco.com"]
|
11
11
|
gem.description = %q{Adds hdfs read, write, and copy support to mobilize-ssh}
|
12
12
|
gem.summary = %q{Adds hdfs read, write, and copy support to mobilize-ssh}
|
13
|
-
gem.homepage = "http://github.com/
|
13
|
+
gem.homepage = "http://github.com/dena/mobilize-hdfs"
|
14
14
|
|
15
15
|
gem.files = `git ls-files`.split($/)
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
|
-
gem.add_runtime_dependency "mobilize-ssh","1.1.
|
19
|
+
gem.add_runtime_dependency "mobilize-ssh","1.1.07"
|
20
20
|
end
|
data/test/hdfs_job_rows.yml
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
stage1: hdfs.write target:"/user/mobilize/test/test_hdfs_1.out",
|
6
6
|
source:"Runner_mobilize(test)/test_hdfs_1.in"
|
7
7
|
stage2: hdfs.copy source:"/user/mobilize/test/test_hdfs_1.out",
|
8
|
-
target:"test_cluster_2/user/mobilize/test/
|
9
|
-
stage3: hdfs.read source:"/user/mobilize/test/test_hdfs_1_copy.out"
|
8
|
+
target:"test_cluster_2/user/mobilize/test/test_hdfs_1_copy.out",
|
9
|
+
stage3: hdfs.read source:"test_cluster_2/user/mobilize/test/test_hdfs_1_copy.out"
|
10
10
|
stage4: gsheet.write source:"hdfs://test_cluster_2/user/mobilize/test/test_hdfs_1_copy.out",
|
11
11
|
target:"Runner_mobilize(test)/test_hdfs_1_copy.out"
|
data/test/mobilize-hdfs_test.rb
CHANGED
@@ -22,7 +22,7 @@ describe "Mobilize" do
|
|
22
22
|
|
23
23
|
puts "add test_source data"
|
24
24
|
hdfs_1_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/test_hdfs_1.in",gdrive_slot)
|
25
|
-
hdfs_1_tsv = ([
|
25
|
+
hdfs_1_tsv = ([%w{test0 test1 test2 test3 test4 test5 test6 test7 test8 test9}.join("\t")]*10).join("\n")
|
26
26
|
hdfs_1_sheet.write(hdfs_1_tsv,u.name)
|
27
27
|
|
28
28
|
jobs_sheet = r.gsheet(gdrive_slot)
|
@@ -41,7 +41,7 @@ describe "Mobilize" do
|
|
41
41
|
puts "jobtracker posted data to test sheet"
|
42
42
|
test_destination_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/test_hdfs_1_copy.out",gdrive_slot)
|
43
43
|
|
44
|
-
assert test_destination_sheet.
|
44
|
+
assert test_destination_sheet.read(u.name).length == 599
|
45
45
|
end
|
46
46
|
|
47
47
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-hdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.07
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mobilize-ssh
|
@@ -18,7 +18,7 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - '='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 1.1.
|
21
|
+
version: 1.1.07
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - '='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 1.1.
|
29
|
+
version: 1.1.07
|
30
30
|
description: Adds hdfs read, write, and copy support to mobilize-ssh
|
31
31
|
email:
|
32
32
|
- cpaesleme@ngmoco.com
|
@@ -50,7 +50,7 @@ files:
|
|
50
50
|
- test/mobilize-hdfs_test.rb
|
51
51
|
- test/redis-test.conf
|
52
52
|
- test/test_helper.rb
|
53
|
-
homepage: http://github.com/
|
53
|
+
homepage: http://github.com/dena/mobilize-hdfs
|
54
54
|
licenses: []
|
55
55
|
post_install_message:
|
56
56
|
rdoc_options: []
|