webhdfs-rlz 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/AUTHORS +2 -0
- data/COPYING +13 -0
- data/Gemfile +3 -0
- data/README.md +127 -0
- data/VERSION +1 -0
- data/lib/webhdfs.rb +2 -0
- data/lib/webhdfs/backport.rb +27 -0
- data/lib/webhdfs/client.rb +6 -0
- data/lib/webhdfs/client_v1.rb +411 -0
- data/lib/webhdfs/client_v2.rb +249 -0
- data/lib/webhdfs/exceptions.rb +15 -0
- data/lib/webhdfs/fileutils.rb +439 -0
- data/lib/webhdfs/kerberos.rb +52 -0
- data/lib/webhdfs/proxy.rb +28 -0
- data/lib/webhdfs/prueba.rb +41 -0
- data/lib/webhdfs/request.rb +175 -0
- data/lib/webhdfs/ssl.rb +44 -0
- data/lib/webhdfs/utilities.rb +25 -0
- data/spec/spec_helper.rb +2 -0
- data/test/test_helper.rb +20 -0
- data/test/webhdfs/fileutils.rb +69 -0
- data/webhdfs.gemspec +23 -0
- metadata +139 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 473d3b0eaffcc87f27f27d0f8dccf2bfe5fe2189
|
4
|
+
data.tar.gz: e0ade8a94b22d7eee85fa72077bac08b23858e8d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b88255e72919b0141ad697487ad4cc3f9960c73f0cbdefd2e6738e1639d30e660580b9f6e7c232aaf2c352ada461b538131c2ea0da2229b21d0c3959b198e914
|
7
|
+
data.tar.gz: 354d2a40fecbc9e1fa63746f0d05b5b2627f7eeec9d8c4287682371713c260cfb04cf782648d6929f7936e665ad2987bbf05eb082fdf7ba83dd6ef6a66744c6a
|
data/AUTHORS
ADDED
data/COPYING
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (C) 2012 Fluentd Project
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
# webhdfs - A client library implementation for Hadoop WebHDFS, and HttpFs, for Ruby
|
2
|
+
|
3
|
+
The webhdfs gem is to access Hadoop WebHDFS (EXPERIMENTAL: and HttpFs). WebHDFS::Client is a client class, and WebHDFS::FileUtils is utility like 'fileutils'.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
gem install webhdfs
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
### WebHDFS::Client
|
12
|
+
|
13
|
+
For client object interface:
|
14
|
+
|
15
|
+
require 'webhdfs'
|
16
|
+
client = WebHDFS::Client.new(hostname, port)
|
17
|
+
# or with pseudo username authentication
|
18
|
+
client = WebHDFS::Client.new(hostname, port, username)
|
19
|
+
|
20
|
+
To create/append/read files:
|
21
|
+
|
22
|
+
client.create('/path/to/file', data)
|
23
|
+
client.create('/path/to/file', data, :overwrite => false, :blocksize => 268435456, :replication => 5, :permission => '0666')
|
24
|
+
|
25
|
+
#This does not require whole data in memory, and it can be read chunk by chunk, ex: File data
|
26
|
+
client.create('/path/to/file', file_IO_handle, :overwrite => false, :permission => 0666)
|
27
|
+
|
28
|
+
client.append('/path/to/existing/file', data)
|
29
|
+
|
30
|
+
client.read('/path/to/target') #=> data
|
31
|
+
client.read('/path/to/target' :offset => 2048, :length => 1024) #=> data
|
32
|
+
|
33
|
+
To mkdir/rename/delete directories or files:
|
34
|
+
|
35
|
+
client.mkdir('/hdfs/dirname')
|
36
|
+
client.mkdir('/hdfs/dirname', :permission => '0777')
|
37
|
+
|
38
|
+
client.rename(original_path, dst_path)
|
39
|
+
|
40
|
+
client.delete(path)
|
41
|
+
client.delete(dir_path, :recursive => true)
|
42
|
+
|
43
|
+
To get status or list of files and directories:
|
44
|
+
|
45
|
+
client.stat(file_path) #=> key-value pairs for file status
|
46
|
+
client.list(dir_path) #=> list of key-value pairs for files in dir_path
|
47
|
+
|
48
|
+
And, 'content_summary', 'checksum', 'homedir', 'chmod', 'chown', 'replication' and 'touch' methods available.
|
49
|
+
|
50
|
+
For known errors, automated retries are available. Set `retry_known_errors` option as true.
|
51
|
+
|
52
|
+
#### To retry for LeaseExpiredException automatically
|
53
|
+
client.retry_known_errors = true
|
54
|
+
|
55
|
+
# client.retry_interval = 1 # [sec], default: 1
|
56
|
+
# client.retry_times = 1 # [times], default: 1
|
57
|
+
|
58
|
+
### WebHDFS::FileUtils
|
59
|
+
|
60
|
+
require 'webhdfs/fileutils'
|
61
|
+
WebHDFS::FileUtils.set_server(host, port)
|
62
|
+
# or
|
63
|
+
WebHDFS::FileUtils.set_server(host, port, username, doas)
|
64
|
+
|
65
|
+
WebHDFS::FileUtils.copy_from_local(localpath, hdfspath)
|
66
|
+
WebHDFS::FileUtils.copy_to_local(hdfspath, localpath)
|
67
|
+
|
68
|
+
WebHDFS::FileUtils.append(path, data)
|
69
|
+
|
70
|
+
### For HttpFs
|
71
|
+
|
72
|
+
For HttpFs instead of WebHDFS:
|
73
|
+
|
74
|
+
client = WebHDFS::Client.new('hostname', 14000)
|
75
|
+
client.httpfs_mode = true
|
76
|
+
|
77
|
+
client.read(path) #=> data
|
78
|
+
|
79
|
+
# or with webhdfs/filetuils
|
80
|
+
WebHDFS::FileUtils.set_server('hostname', 14000)
|
81
|
+
WebHDFS::FileUtils.set_httpfs_mode
|
82
|
+
WebHDFS::FileUtils.copy_to_local(remote_path, local_path)
|
83
|
+
|
84
|
+
### For HTTP Proxy servers
|
85
|
+
|
86
|
+
client = WebHDFS::Client.new('hostname', 14000, 'proxy.server.local', 8080)
|
87
|
+
client.proxy_user = 'jack' # if needed
|
88
|
+
client.proxy_pass = 'secret' # if needed
|
89
|
+
|
90
|
+
### For SSL
|
91
|
+
|
92
|
+
Note that net/https and openssl libraries must be available:
|
93
|
+
|
94
|
+
client = WebHDFS::Client.new('hostname', 4443)
|
95
|
+
client.ssl = true
|
96
|
+
client.ssl_ca_file = "/path/to/ca_file.pem" # if needed
|
97
|
+
client.ssl_varify_mode = :peer # if needed (:none or :peer)
|
98
|
+
client.ssl_version = :TLSv1 # if needed
|
99
|
+
|
100
|
+
### For Kerberos Authentication
|
101
|
+
|
102
|
+
Note that [gssapi](https://github.com/zenchild/gssapi) library must be available:
|
103
|
+
|
104
|
+
client = WebHDFS::Client.new('hostname', 14000)
|
105
|
+
client.kerberos = true
|
106
|
+
client.kerberos_keytab = "/path/to/project.keytab"
|
107
|
+
|
108
|
+
### For SSL Client Authentication
|
109
|
+
|
110
|
+
Note that openssl libraries must be available:
|
111
|
+
|
112
|
+
require 'openssl'
|
113
|
+
|
114
|
+
client = WebHDFS::Client.new(host, port)
|
115
|
+
client.ssl = true
|
116
|
+
client.ssl_key = OpenSSL::PKey::RSA.new(open('/path/to/key.pem'))
|
117
|
+
client.ssl_cert = OpenSSL::X509::Certificate.new(open('/path/to/cert.pem'))
|
118
|
+
|
119
|
+
## AUTHORS
|
120
|
+
|
121
|
+
* Kazuki Ohta <kazuki.ohta@gmail.com>
|
122
|
+
* TAGOMORI Satoshi <tagomoris@gmail.com>
|
123
|
+
|
124
|
+
## LICENSE
|
125
|
+
|
126
|
+
* Copyright: Copyright (c) 2012- Fluentd Project
|
127
|
+
* License: Apache License, Version 2.0
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.8.0
|
data/lib/webhdfs.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
if RUBY_VERSION =~ /^1\.8\./
|
2
|
+
require 'cgi'
|
3
|
+
|
4
|
+
def require_relative(relative_feature)
|
5
|
+
file = caller.first.split(/:\d/, 2).first
|
6
|
+
raise LoadError, "require_relative is called in #{Regexp.last_match(1)}"\
|
7
|
+
if /\A\((.*)\)/ =~ file
|
8
|
+
require File.expand_path(relative_feature, File.dirname(file))
|
9
|
+
end
|
10
|
+
|
11
|
+
module URI
|
12
|
+
def self.encode_www_form(enum)
|
13
|
+
enum.map do |key, value|
|
14
|
+
if value.nil?
|
15
|
+
CGI.escape(key)
|
16
|
+
elsif value.respond_to?(:to_ary)
|
17
|
+
value.to_ary.map do |w|
|
18
|
+
str = CGI.escape(key)
|
19
|
+
str << '=' << CGI.escape(w) unless w.nil?
|
20
|
+
end.join('&')
|
21
|
+
else
|
22
|
+
CGI.escape(key.to_s) << '=' << CGI.escape(value.to_s)
|
23
|
+
end
|
24
|
+
end.join('&')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,411 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'json'
|
4
|
+
require 'addressable/uri'
|
5
|
+
|
6
|
+
require_relative 'utilities'
|
7
|
+
require_relative 'exceptions'
|
8
|
+
|
9
|
+
module WebHDFS
|
10
|
+
class ClientV1
|
11
|
+
# This hash table holds command options.
|
12
|
+
OPT_TABLE = {}.freeze # internal use only
|
13
|
+
KNOWN_ERRORS = ['LeaseExpiredException'].freeze
|
14
|
+
|
15
|
+
attr_accessor :host, :port, :username, :doas, :proxy_address, :proxy_port
|
16
|
+
attr_accessor :proxy_user, :proxy_pass
|
17
|
+
attr_accessor :open_timeout # default 30s (in ruby net/http)
|
18
|
+
attr_accessor :read_timeout # default 60s (in ruby net/http)
|
19
|
+
attr_accessor :httpfs_mode
|
20
|
+
attr_accessor :retry_known_errors # default false (not to retry)
|
21
|
+
attr_accessor :retry_times # default 1 (ignored when retry_known_errors is false)
|
22
|
+
attr_accessor :retry_interval # default 1 ([sec], ignored when retry_known_errors is false)
|
23
|
+
attr_accessor :ssl
|
24
|
+
attr_accessor :ssl_ca_file
|
25
|
+
attr_reader :ssl_verify_mode
|
26
|
+
attr_accessor :ssl_cert
|
27
|
+
attr_accessor :ssl_key
|
28
|
+
attr_accessor :ssl_version
|
29
|
+
attr_accessor :kerberos, :kerberos_keytab
|
30
|
+
attr_accessor :http_headers
|
31
|
+
|
32
|
+
SSL_VERIFY_MODES = [:none, :peer].freeze
|
33
|
+
def ssl_verify_mode=(mode)
|
34
|
+
unless SSL_VERIFY_MODES.include? mode
|
35
|
+
raise ArgumentError, "Invalid SSL verify mode #{mode.inspect}"
|
36
|
+
end
|
37
|
+
@ssl_verify_mode = mode
|
38
|
+
end
|
39
|
+
|
40
|
+
def initialize(host = 'localhost', port = 50_070, username = nil,
|
41
|
+
doas = nil, proxy_address = nil, proxy_port = nil,
|
42
|
+
http_headers = {})
|
43
|
+
@host = host
|
44
|
+
@port = port
|
45
|
+
@username = username
|
46
|
+
@doas = doas
|
47
|
+
@proxy_address = proxy_address
|
48
|
+
@proxy_port = proxy_port
|
49
|
+
@retry_known_errors = false
|
50
|
+
@retry_times = @retry_interval = 1
|
51
|
+
|
52
|
+
@httpfs_mode = false
|
53
|
+
|
54
|
+
@ssl = false
|
55
|
+
@ssl_ca_file = nil
|
56
|
+
@ssl_verify_mode = nil
|
57
|
+
@ssl_cert = @ssl_key = nil
|
58
|
+
@ssl_version = nil
|
59
|
+
|
60
|
+
@kerberos = false
|
61
|
+
@kerberos_keytab = nil
|
62
|
+
@http_headers = http_headers
|
63
|
+
end
|
64
|
+
|
65
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=CREATE
|
66
|
+
# [&overwrite=<true|false>][&blocksize=<LONG>]
|
67
|
+
# [&replication=<SHORT>]
|
68
|
+
# [&permission=<OCTAL>][&buffersize=<INT>]"
|
69
|
+
def create(path, body, options = {})
|
70
|
+
options = options.merge('data' => 'true') if @httpfs_mode
|
71
|
+
check_options(options, OPT_TABLE['CREATE'])
|
72
|
+
res = operate_requests('PUT', path, 'CREATE', options, body)
|
73
|
+
res.code == '201'
|
74
|
+
end
|
75
|
+
OPT_TABLE['CREATE'] = %w(overwrite blocksize replication permission
|
76
|
+
buffersize data)
|
77
|
+
|
78
|
+
# curl -i -X POST "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=
|
79
|
+
# APPEND[&buffersize=<INT>]"
|
80
|
+
def append(path, body, options = {})
|
81
|
+
options = options.merge('data' => 'true') if @httpfs_mode
|
82
|
+
check_options(options, OPT_TABLE['APPEND'])
|
83
|
+
res = operate_requests('POST', path, 'APPEND', options, body)
|
84
|
+
res.code == '200'
|
85
|
+
end
|
86
|
+
OPT_TABLE['APPEND'] = %w(buffersize data)
|
87
|
+
|
88
|
+
# curl -i -L "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=OPEN
|
89
|
+
# [&offset=<LONG>][&length=<LONG>][&buffersize=<INT>]"
|
90
|
+
def read(path, options = {})
|
91
|
+
check_options(options, OPT_TABLE['OPEN'])
|
92
|
+
res = operate_requests('GET', path, 'OPEN', options)
|
93
|
+
res.body
|
94
|
+
end
|
95
|
+
OPT_TABLE['OPEN'] = %w(offset length buffersize)
|
96
|
+
alias open read
|
97
|
+
|
98
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/<PATH>?op=
|
99
|
+
# MKDIRS[&permission=<OCTAL>]"
|
100
|
+
def mkdir(path, options = {})
|
101
|
+
check_options(options, OPT_TABLE['MKDIRS'])
|
102
|
+
res = operate_requests('PUT', path, 'MKDIRS', options)
|
103
|
+
check_success_json(res, 'boolean')
|
104
|
+
end
|
105
|
+
OPT_TABLE['MKDIRS'] = ['permission']
|
106
|
+
alias mkdirs mkdir
|
107
|
+
|
108
|
+
# curl -i -X PUT "<HOST>:<PORT>/webhdfs/v1/<PATH>?op=
|
109
|
+
# RENAME&destination=<PATH>"
|
110
|
+
def rename(path, dest, options = {})
|
111
|
+
check_options(options, OPT_TABLE['RENAME'])
|
112
|
+
dest = '/' + dest unless dest.start_with?('/')
|
113
|
+
res = operate_requests('PUT', path, 'RENAME',
|
114
|
+
options.merge('destination' => dest))
|
115
|
+
check_success_json(res, 'boolean')
|
116
|
+
end
|
117
|
+
|
118
|
+
# curl -i -X DELETE "http://<host>:<port>/webhdfs/v1/<path>?op=DELETE
|
119
|
+
# [&recursive=<true|false>]"
|
120
|
+
def delete(path, options = {})
|
121
|
+
check_options(options, OPT_TABLE['DELETE'])
|
122
|
+
res = operate_requests('DELETE', path, 'DELETE', options)
|
123
|
+
check_success_json(res, 'boolean')
|
124
|
+
end
|
125
|
+
OPT_TABLE['DELETE'] = ['recursive']
|
126
|
+
|
127
|
+
# curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETFILESTATUS"
|
128
|
+
def stat(path, options = {})
|
129
|
+
check_options(options, OPT_TABLE['GETFILESTATUS'])
|
130
|
+
res = operate_requests('GET', path, 'GETFILESTATUS', options)
|
131
|
+
check_success_json(res, 'FileStatus')
|
132
|
+
end
|
133
|
+
alias getfilestatus stat
|
134
|
+
|
135
|
+
# curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS"
|
136
|
+
def list(path, options = {})
|
137
|
+
check_options(options, OPT_TABLE['LISTSTATUS'])
|
138
|
+
res = operate_requests('GET', path, 'LISTSTATUS', options)
|
139
|
+
check_success_json(res, 'FileStatuses')['FileStatus']
|
140
|
+
end
|
141
|
+
alias liststatus list
|
142
|
+
|
143
|
+
# curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETCONTENTSUMMARY"
|
144
|
+
def content_summary(path, options = {})
|
145
|
+
check_options(options, OPT_TABLE['GETCONTENTSUMMARY'])
|
146
|
+
res = operate_requests('GET', path, 'GETCONTENTSUMMARY', options)
|
147
|
+
check_success_json(res, 'ContentSummary')
|
148
|
+
end
|
149
|
+
alias getcontentsummary content_summary
|
150
|
+
|
151
|
+
# curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETFILECHECKSUM"
|
152
|
+
def checksum(path, options = {})
|
153
|
+
check_options(options, OPT_TABLE['GETFILECHECKSUM'])
|
154
|
+
res = operate_requests('GET', path, 'GETFILECHECKSUM', options)
|
155
|
+
check_success_json(res, 'FileChecksum')
|
156
|
+
end
|
157
|
+
alias getfilechecksum checksum
|
158
|
+
|
159
|
+
# curl -i "http://<HOST>:<PORT>/webhdfs/v1/?op=GETHOMEDIRECTORY"
|
160
|
+
def homedir(options = {})
|
161
|
+
check_options(options, OPT_TABLE['GETHOMEDIRECTORY'])
|
162
|
+
res = operate_requests('GET', '/', 'GETHOMEDIRECTORY', options)
|
163
|
+
check_success_json(res, 'Path')
|
164
|
+
end
|
165
|
+
alias gethomedirectory homedir
|
166
|
+
|
167
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETPERMISSION
|
168
|
+
# [&permission=<OCTAL>]"
|
169
|
+
def chmod(path, mode, options = {})
|
170
|
+
check_options(options, OPT_TABLE['SETPERMISSION'])
|
171
|
+
res = operate_requests('PUT', path, 'SETPERMISSION',
|
172
|
+
options.merge('permission' => mode))
|
173
|
+
res.code == '200'
|
174
|
+
end
|
175
|
+
alias setpermission chmod
|
176
|
+
|
177
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETOWNER
|
178
|
+
# [&owner=<USER>][&group=<GROUP>]"
|
179
|
+
def chown(path, options = {})
|
180
|
+
check_options(options, OPT_TABLE['SETOWNER'])
|
181
|
+
unless options.key?('owner') || options.key?('group') ||
|
182
|
+
options.key?(:owner) || options.key?(:group)
|
183
|
+
raise ArgumentError, "'chown' needs at least one of owner or group"
|
184
|
+
end
|
185
|
+
res = operate_requests('PUT', path, 'SETOWNER', options)
|
186
|
+
res.code == '200'
|
187
|
+
end
|
188
|
+
OPT_TABLE['SETOWNER'] = %w(owner group)
|
189
|
+
alias setowner chown
|
190
|
+
|
191
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETREPLICATION
|
192
|
+
# [&replication=<SHORT>]"
|
193
|
+
def replication(path, replnum, options = {})
|
194
|
+
check_options(options, OPT_TABLE['SETREPLICATION'])
|
195
|
+
res = operate_requests('PUT', path, 'SETREPLICATION',
|
196
|
+
options.merge('replication' => replnum.to_s))
|
197
|
+
check_success_json(res, 'boolean')
|
198
|
+
end
|
199
|
+
alias setreplication replication
|
200
|
+
|
201
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETTIMES
|
202
|
+
# [&modificationtime=<TIME>][&accesstime=<TIME>]"
|
203
|
+
# motidicationtime: radix-10 logn integer
|
204
|
+
# accesstime: radix-10 logn integer
|
205
|
+
def touch(path, options = {})
|
206
|
+
check_options(options, OPT_TABLE['SETTIMES'])
|
207
|
+
unless options.key?('modificationtime') || options.key?('accesstime') ||
|
208
|
+
options.key?(:modificationtime) || options.key?(:accesstime)
|
209
|
+
raise ArgumentError, "'chown' needs at least one of " \
|
210
|
+
'modificationtime or accesstime'
|
211
|
+
end
|
212
|
+
res = operate_requests('PUT', path, 'SETTIMES', options)
|
213
|
+
res.code == '200'
|
214
|
+
end
|
215
|
+
OPT_TABLE['SETTIMES'] = %w(modificationtime accesstime)
|
216
|
+
alias settimes touch
|
217
|
+
|
218
|
+
# def delegation_token(user, options={}) # GETDELEGATIONTOKEN
|
219
|
+
# raise NotImplementedError
|
220
|
+
# end
|
221
|
+
# def renew_delegation_token(token, options={}) # RENEWDELEGATIONTOKEN
|
222
|
+
# raise NotImplementedError
|
223
|
+
# end
|
224
|
+
# def cancel_delegation_token(token, options={}) # CANCELDELEGATIONTOKEN
|
225
|
+
# raise NotImplementedError
|
226
|
+
# end
|
227
|
+
|
228
|
+
def build_path(path, op, params)
|
229
|
+
opts = if @username && @doas
|
230
|
+
{ 'op' => op, 'user.name' => @username, 'doas' => @doas }
|
231
|
+
elsif @username
|
232
|
+
{ 'op' => op, 'user.name' => @username }
|
233
|
+
elsif @doas
|
234
|
+
{ 'op' => op, 'doas' => @doas }
|
235
|
+
else
|
236
|
+
{ 'op' => op }
|
237
|
+
end
|
238
|
+
api_path(path) + '?' + URI.encode_www_form(params.merge(opts))
|
239
|
+
end
|
240
|
+
|
241
|
+
REDIRECTED_OPERATIONS = %w(APPEND CREATE OPEN GETFILECHECKSUM).freeze
|
242
|
+
def operate_requests(method, path, op, params = {}, payload = nil)
|
243
|
+
if !@httpfs_mode && REDIRECTED_OPERATIONS.include?(op)
|
244
|
+
res = request(@host, @port, method, path, op, params, nil)
|
245
|
+
unless res.is_a?(Net::HTTPRedirection) && res['location']
|
246
|
+
msg = 'NameNode returns non-redirection (or without location' \
|
247
|
+
" header), code:#{res.code}, body:#{res.body}."
|
248
|
+
raise WebHDFS::RequestFailedError, msg
|
249
|
+
end
|
250
|
+
uri = URI.parse(res['location'])
|
251
|
+
rpath = if uri.query
|
252
|
+
uri.path + '?' + uri.query
|
253
|
+
else
|
254
|
+
uri.path
|
255
|
+
end
|
256
|
+
request(uri.host, uri.port, method, rpath, nil, {},
|
257
|
+
payload, 'Content-Type' => 'application/octet-stream')
|
258
|
+
elsif @httpfs_mode && !payload.nil?
|
259
|
+
request(@host, @port, method, path, op, params,
|
260
|
+
payload, 'Content-Type' => 'application/octet-stream')
|
261
|
+
else
|
262
|
+
request(@host, @port, method, path, op, params, payload)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
# IllegalArgumentException 400 Bad Request
|
267
|
+
# UnsupportedOperationException 400 Bad Request
|
268
|
+
# SecurityException 401 Unauthorized
|
269
|
+
# IOException 403 Forbidden
|
270
|
+
# FileNotFoundException 404 Not Found
|
271
|
+
# RumtimeException 500 Internal Server Error
|
272
|
+
def request(host, port, method, path, op = nil, params = {},
|
273
|
+
payload = nil, header = nil, retries = 0)
|
274
|
+
conn = Net::HTTP.new(host, port, @proxy_address, @proxy_port)
|
275
|
+
conn.proxy_user = @proxy_user if @proxy_user
|
276
|
+
conn.proxy_pass = @proxy_pass if @proxy_pass
|
277
|
+
conn.open_timeout = @open_timeout if @open_timeout
|
278
|
+
conn.read_timeout = @read_timeout if @read_timeout
|
279
|
+
|
280
|
+
path = Addressable::URI.escape(path) # make safe for transmission via HTTP
|
281
|
+
request_path = if op
|
282
|
+
build_path(path, op, params)
|
283
|
+
else
|
284
|
+
path
|
285
|
+
end
|
286
|
+
if @ssl
|
287
|
+
conn.use_ssl = true
|
288
|
+
conn.ca_file = @ssl_ca_file if @ssl_ca_file
|
289
|
+
if @ssl_verify_mode
|
290
|
+
require 'openssl'
|
291
|
+
conn.verify_mode = case @ssl_verify_mode
|
292
|
+
when :none then OpenSSL::SSL::VERIFY_NONE
|
293
|
+
when :peer then OpenSSL::SSL::VERIFY_PEER
|
294
|
+
end
|
295
|
+
end
|
296
|
+
conn.cert = @ssl_cert if @ssl_cert
|
297
|
+
conn.key = @ssl_key if @ssl_key
|
298
|
+
conn.ssl_version = @ssl_version if @ssl_version
|
299
|
+
end
|
300
|
+
|
301
|
+
gsscli = nil
|
302
|
+
if @kerberos
|
303
|
+
require 'base64'
|
304
|
+
require 'gssapi'
|
305
|
+
gsscli = GSSAPI::Simple.new(@host, 'HTTP', @kerberos_keytab)
|
306
|
+
token = nil
|
307
|
+
begin
|
308
|
+
token = gsscli.init_context
|
309
|
+
rescue => e
|
310
|
+
raise WebHDFS::KerberosError, e.message
|
311
|
+
end
|
312
|
+
if header
|
313
|
+
header['Authorization'] = "Negotiate #{Base64.strict_encode64(token)}"
|
314
|
+
else
|
315
|
+
header = { 'Authorization' =>
|
316
|
+
"Negotiate #{Base64.strict_encode64(token)}" }
|
317
|
+
end
|
318
|
+
else
|
319
|
+
header = {} if header.nil?
|
320
|
+
header = @http_headers.merge(header)
|
321
|
+
end
|
322
|
+
|
323
|
+
res = nil
|
324
|
+
if !payload.nil? && payload.respond_to?(:read) &&
|
325
|
+
payload.respond_to?(:size)
|
326
|
+
req = Net::HTTPGenericRequest.new(method, (payload ? true : false),
|
327
|
+
true, request_path, header)
|
328
|
+
raise WebHDFS::ClientError, 'Error accepting given IO resource as' \
|
329
|
+
' data payload, Not valid in methods' \
|
330
|
+
' other than PUT and POST' unless method == 'PUT' || method == 'POST'
|
331
|
+
|
332
|
+
req.body_stream = payload
|
333
|
+
req.content_length = payload.size
|
334
|
+
begin
|
335
|
+
res = conn.request(req)
|
336
|
+
rescue => e
|
337
|
+
raise WebHDFS::ServerError, 'Failed to connect to host' \
|
338
|
+
" #{host}:#{port}, #{e.message}"
|
339
|
+
end
|
340
|
+
else
|
341
|
+
begin
|
342
|
+
res = conn.send_request(method, request_path, payload, header)
|
343
|
+
rescue => e
|
344
|
+
raise WebHDFS::ServerError, 'Failed to connect to host' \
|
345
|
+
" #{host}:#{port}, #{e.message}"
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
if @kerberos && res.code == '307'
|
350
|
+
itok = (res.header.get_fields('WWW-Authenticate') ||
|
351
|
+
['']).pop.split(/\s+/).last
|
352
|
+
unless itok
|
353
|
+
raise WebHDFS::KerberosError, 'Server does not return ' \
|
354
|
+
'WWW-Authenticate header'
|
355
|
+
end
|
356
|
+
|
357
|
+
begin
|
358
|
+
gsscli.init_context(Base64.strict_decode64(itok))
|
359
|
+
rescue => e
|
360
|
+
raise WebHDFS::KerberosError, e.message
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
case res
|
365
|
+
when Net::HTTPSuccess
|
366
|
+
res
|
367
|
+
when Net::HTTPRedirection
|
368
|
+
res
|
369
|
+
else
|
370
|
+
message = if res.body && !res.body.empty?
|
371
|
+
res.body.delete("\n")
|
372
|
+
else
|
373
|
+
'Response body is empty...'
|
374
|
+
end
|
375
|
+
|
376
|
+
if @retry_known_errors && retries < @retry_times
|
377
|
+
detail = nil
|
378
|
+
if message =~ /^\{"RemoteException":\{/
|
379
|
+
begin
|
380
|
+
detail = JSON.parse(message)
|
381
|
+
rescue
|
382
|
+
# ignore broken json response body
|
383
|
+
end
|
384
|
+
end
|
385
|
+
if detail && detail['RemoteException'] &&
|
386
|
+
KNOWN_ERRORS.include?(detail['RemoteException']['exception'])
|
387
|
+
sleep @retry_interval if @retry_interval > 0
|
388
|
+
return request(host, port, method, path, op, params, payload,
|
389
|
+
header, retries + 1)
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
case res.code
|
394
|
+
when '400'
|
395
|
+
raise WebHDFS::ClientError, message
|
396
|
+
when '401'
|
397
|
+
raise WebHDFS::SecurityError, message
|
398
|
+
when '403'
|
399
|
+
raise WebHDFS::IOError, message
|
400
|
+
when '404'
|
401
|
+
raise WebHDFS::FileNotFoundError, message
|
402
|
+
when '500'
|
403
|
+
raise WebHDFS::ServerError, message
|
404
|
+
else
|
405
|
+
raise WebHDFS::RequestFailedError, "response code:#{res.code}, " \
|
406
|
+
"message:#{message}"
|
407
|
+
end
|
408
|
+
end
|
409
|
+
end
|
410
|
+
end
|
411
|
+
end
|