mobilize-hdfs 1.0.06 → 1.0.07

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -228,16 +228,16 @@ same cluster as your first.
228
228
  Meta
229
229
  ----
230
230
 
231
- * Code: `git clone git://github.com/ngmoco/mobilize-hdfs.git`
232
- * Home: <https://github.com/ngmoco/mobilize-hdfs>
233
- * Bugs: <https://github.com/ngmoco/mobilize-hdfs/issues>
231
+ * Code: `git clone git://github.com/dena/mobilize-hdfs.git`
232
+ * Home: <https://github.com/dena/mobilize-hdfs>
233
+ * Bugs: <https://github.com/dena/mobilize-hdfs/issues>
234
234
  * Gems: <http://rubygems.org/gems/mobilize-hdfs>
235
235
 
236
236
  <a name='section_Author'></a>
237
237
  Author
238
238
  ------
239
239
 
240
- Cassio Paes-Leme :: cpaesleme@ngmoco.com :: @cpaesleme
240
+ Cassio Paes-Leme :: cpaesleme@dena.com :: @cpaesleme
241
241
 
242
- [mobilize-base]: https://github.com/ngmoco/mobilize-base
243
- [mobilize-ssh]: https://github.com/ngmoco/mobilize-ssh
242
+ [mobilize-base]: https://github.com/dena/mobilize-base
243
+ [mobilize-ssh]: https://github.com/dena/mobilize-ssh
@@ -37,7 +37,7 @@ module Mobilize
37
37
  cluster, cluster_path = Hdfs.resolve_path(path)
38
38
  gateway_node = Hadoop.gateway_node(cluster)
39
39
  #need to direct stderr to dev null since hdfs throws errors at being headed off
40
- command = "((#{Hadoop.exec_path(cluster)} fs -fs '#{Hdfs.namenode_path(path)}' -cat #{cluster_path}"
40
+ command = "((#{Hadoop.exec_path(cluster)} fs -fs '#{Hdfs.root(cluster)}' -cat '#{cluster_path}'"
41
41
  command += " | head -c #{Hadoop.read_limit}) > out.txt 2> /dev/null) && cat out.txt"
42
42
  response = Ssh.run(gateway_node,command,user)
43
43
  if response.length==Hadoop.read_limit
@@ -53,7 +53,8 @@ module Mobilize
53
53
  elsif Hadoop.clusters.keys.include?(path.split("/").first)
54
54
  return path.split("/").ie{|p| [p.first,"/#{p[1..-1].join("/")}"]}
55
55
  else
56
- return [nil,nil]
56
+ #default cluster, slash on the front
57
+ return [Hadoop.output_cluster,"/#{path.to_s}"]
57
58
  end
58
59
  end
59
60
 
@@ -111,18 +112,21 @@ module Mobilize
111
112
  source_path = params['source']
112
113
  target_path = params['target']
113
114
  user = params['user']
114
- #check for source in hdfs format
115
- source_cluster, source_cluster_path = Hdfs.resolve_path(source_path)
116
- if source_cluster.nil?
117
- #not hdfs
115
+ begin
116
+ #check for source in gsheet format
118
117
  gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
119
118
  #return blank response if there are no slots available
120
119
  return nil unless gdrive_slot
121
120
  source_dst = s.source_dsts(gdrive_slot).first
121
+ in_string = source_dst.read(user)
122
122
  Gdrive.unslot_worker_by_path(stage_path)
123
- else
123
+ rescue
124
+ #try hdfs
125
+ source_cluster, source_cluster_path = Hdfs.resolve_path(source_path)
124
126
  source_path = "#{source_cluster}#{source_cluster_path}"
125
127
  source_dst = Dataset.find_or_create_by_handler_and_path("hdfs",source_path)
128
+ in_string = source_dst.read(user)
129
+ raise "No data found at hdfs://#{source_path}" unless in_string.to_s.length>0
126
130
  end
127
131
 
128
132
  #determine cluster for target
@@ -137,7 +141,6 @@ module Mobilize
137
141
  end
138
142
 
139
143
  target_path = "#{target_cluster}#{target_cluster_path}"
140
- in_string = source_dst.read(user)
141
144
  out_string = Hdfs.write(target_path,in_string,user)
142
145
 
143
146
  out_url = "hdfs://#{Hadoop.output_cluster}#{Hadoop.output_dir}hdfs/#{stage_path}/out"
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Hdfs
3
- VERSION = "1.0.06"
3
+ VERSION = "1.0.07"
4
4
  end
5
5
  end
@@ -10,11 +10,11 @@ Gem::Specification.new do |gem|
10
10
  gem.email = ["cpaesleme@ngmoco.com"]
11
11
  gem.description = %q{Adds hdfs read, write, and copy support to mobilize-ssh}
12
12
  gem.summary = %q{Adds hdfs read, write, and copy support to mobilize-ssh}
13
- gem.homepage = "http://github.com/ngmoco/mobilize-hdfs"
13
+ gem.homepage = "http://github.com/dena/mobilize-hdfs"
14
14
 
15
15
  gem.files = `git ls-files`.split($/)
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
- gem.add_runtime_dependency "mobilize-ssh","1.1.06"
19
+ gem.add_runtime_dependency "mobilize-ssh","1.1.07"
20
20
  end
@@ -5,7 +5,7 @@
5
5
  stage1: hdfs.write target:"/user/mobilize/test/test_hdfs_1.out",
6
6
  source:"Runner_mobilize(test)/test_hdfs_1.in"
7
7
  stage2: hdfs.copy source:"/user/mobilize/test/test_hdfs_1.out",
8
- target:"test_cluster_2/user/mobilize/test/test_hdfs_copy.out",
9
- stage3: hdfs.read source:"/user/mobilize/test/test_hdfs_1_copy.out"
8
+ target:"test_cluster_2/user/mobilize/test/test_hdfs_1_copy.out",
9
+ stage3: hdfs.read source:"test_cluster_2/user/mobilize/test/test_hdfs_1_copy.out"
10
10
  stage4: gsheet.write source:"hdfs://test_cluster_2/user/mobilize/test/test_hdfs_1_copy.out",
11
11
  target:"Runner_mobilize(test)/test_hdfs_1_copy.out"
@@ -22,7 +22,7 @@ describe "Mobilize" do
22
22
 
23
23
  puts "add test_source data"
24
24
  hdfs_1_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/test_hdfs_1.in",gdrive_slot)
25
- hdfs_1_tsv = ([(["test"]*10).join("\t")]*10).join("\n")
25
+ hdfs_1_tsv = ([%w{test0 test1 test2 test3 test4 test5 test6 test7 test8 test9}.join("\t")]*10).join("\n")
26
26
  hdfs_1_sheet.write(hdfs_1_tsv,u.name)
27
27
 
28
28
  jobs_sheet = r.gsheet(gdrive_slot)
@@ -41,7 +41,7 @@ describe "Mobilize" do
41
41
  puts "jobtracker posted data to test sheet"
42
42
  test_destination_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/test_hdfs_1_copy.out",gdrive_slot)
43
43
 
44
- assert test_destination_sheet.to_tsv.length == 499
44
+ assert test_destination_sheet.read(u.name).length == 599
45
45
  end
46
46
 
47
47
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-hdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.06
4
+ version: 1.0.07
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-04 00:00:00.000000000 Z
12
+ date: 2013-02-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mobilize-ssh
@@ -18,7 +18,7 @@ dependencies:
18
18
  requirements:
19
19
  - - '='
20
20
  - !ruby/object:Gem::Version
21
- version: 1.1.06
21
+ version: 1.1.07
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - '='
28
28
  - !ruby/object:Gem::Version
29
- version: 1.1.06
29
+ version: 1.1.07
30
30
  description: Adds hdfs read, write, and copy support to mobilize-ssh
31
31
  email:
32
32
  - cpaesleme@ngmoco.com
@@ -50,7 +50,7 @@ files:
50
50
  - test/mobilize-hdfs_test.rb
51
51
  - test/redis-test.conf
52
52
  - test/test_helper.rb
53
- homepage: http://github.com/ngmoco/mobilize-hdfs
53
+ homepage: http://github.com/dena/mobilize-hdfs
54
54
  licenses: []
55
55
  post_install_message:
56
56
  rdoc_options: []