webhdfs-rlz 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/AUTHORS +2 -0
- data/COPYING +13 -0
- data/Gemfile +3 -0
- data/README.md +127 -0
- data/VERSION +1 -0
- data/lib/webhdfs.rb +2 -0
- data/lib/webhdfs/backport.rb +27 -0
- data/lib/webhdfs/client.rb +6 -0
- data/lib/webhdfs/client_v1.rb +411 -0
- data/lib/webhdfs/client_v2.rb +249 -0
- data/lib/webhdfs/exceptions.rb +15 -0
- data/lib/webhdfs/fileutils.rb +439 -0
- data/lib/webhdfs/kerberos.rb +52 -0
- data/lib/webhdfs/proxy.rb +28 -0
- data/lib/webhdfs/prueba.rb +41 -0
- data/lib/webhdfs/request.rb +175 -0
- data/lib/webhdfs/ssl.rb +44 -0
- data/lib/webhdfs/utilities.rb +25 -0
- data/spec/spec_helper.rb +2 -0
- data/test/test_helper.rb +20 -0
- data/test/webhdfs/fileutils.rb +69 -0
- data/webhdfs.gemspec +23 -0
- metadata +139 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 473d3b0eaffcc87f27f27d0f8dccf2bfe5fe2189
|
4
|
+
data.tar.gz: e0ade8a94b22d7eee85fa72077bac08b23858e8d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b88255e72919b0141ad697487ad4cc3f9960c73f0cbdefd2e6738e1639d30e660580b9f6e7c232aaf2c352ada461b538131c2ea0da2229b21d0c3959b198e914
|
7
|
+
data.tar.gz: 354d2a40fecbc9e1fa63746f0d05b5b2627f7eeec9d8c4287682371713c260cfb04cf782648d6929f7936e665ad2987bbf05eb082fdf7ba83dd6ef6a66744c6a
|
data/AUTHORS
ADDED
data/COPYING
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (C) 2012 Fluentd Project
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
# webhdfs - A client library implementation for Hadoop WebHDFS, and HttpFs, for Ruby
|
2
|
+
|
3
|
+
The webhdfs gem is to access Hadoop WebHDFS (EXPERIMENTAL: and HttpFs). WebHDFS::Client is a client class, and WebHDFS::FileUtils is utility like 'fileutils'.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
gem install webhdfs
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
### WebHDFS::Client
|
12
|
+
|
13
|
+
For client object interface:
|
14
|
+
|
15
|
+
require 'webhdfs'
|
16
|
+
client = WebHDFS::Client.new(hostname, port)
|
17
|
+
# or with pseudo username authentication
|
18
|
+
client = WebHDFS::Client.new(hostname, port, username)
|
19
|
+
|
20
|
+
To create/append/read files:
|
21
|
+
|
22
|
+
client.create('/path/to/file', data)
|
23
|
+
client.create('/path/to/file', data, :overwrite => false, :blocksize => 268435456, :replication => 5, :permission => '0666')
|
24
|
+
|
25
|
+
#This does not require whole data in memory, and it can be read chunk by chunk, ex: File data
|
26
|
+
client.create('/path/to/file', file_IO_handle, :overwrite => false, :permission => 0666)
|
27
|
+
|
28
|
+
client.append('/path/to/existing/file', data)
|
29
|
+
|
30
|
+
client.read('/path/to/target') #=> data
|
31
|
+
client.read('/path/to/target' :offset => 2048, :length => 1024) #=> data
|
32
|
+
|
33
|
+
To mkdir/rename/delete directories or files:
|
34
|
+
|
35
|
+
client.mkdir('/hdfs/dirname')
|
36
|
+
client.mkdir('/hdfs/dirname', :permission => '0777')
|
37
|
+
|
38
|
+
client.rename(original_path, dst_path)
|
39
|
+
|
40
|
+
client.delete(path)
|
41
|
+
client.delete(dir_path, :recursive => true)
|
42
|
+
|
43
|
+
To get status or list of files and directories:
|
44
|
+
|
45
|
+
client.stat(file_path) #=> key-value pairs for file status
|
46
|
+
client.list(dir_path) #=> list of key-value pairs for files in dir_path
|
47
|
+
|
48
|
+
And, 'content_summary', 'checksum', 'homedir', 'chmod', 'chown', 'replication' and 'touch' methods available.
|
49
|
+
|
50
|
+
For known errors, automated retries are available. Set `retry_known_errors` option as true.
|
51
|
+
|
52
|
+
#### To retry for LeaseExpiredException automatically
|
53
|
+
client.retry_known_errors = true
|
54
|
+
|
55
|
+
# client.retry_interval = 1 # [sec], default: 1
|
56
|
+
# client.retry_times = 1 # [times], default: 1
|
57
|
+
|
58
|
+
### WebHDFS::FileUtils
|
59
|
+
|
60
|
+
require 'webhdfs/fileutils'
|
61
|
+
WebHDFS::FileUtils.set_server(host, port)
|
62
|
+
# or
|
63
|
+
WebHDFS::FileUtils.set_server(host, port, username, doas)
|
64
|
+
|
65
|
+
WebHDFS::FileUtils.copy_from_local(localpath, hdfspath)
|
66
|
+
WebHDFS::FileUtils.copy_to_local(hdfspath, localpath)
|
67
|
+
|
68
|
+
WebHDFS::FileUtils.append(path, data)
|
69
|
+
|
70
|
+
### For HttpFs
|
71
|
+
|
72
|
+
For HttpFs instead of WebHDFS:
|
73
|
+
|
74
|
+
client = WebHDFS::Client.new('hostname', 14000)
|
75
|
+
client.httpfs_mode = true
|
76
|
+
|
77
|
+
client.read(path) #=> data
|
78
|
+
|
79
|
+
# or with webhdfs/filetuils
|
80
|
+
WebHDFS::FileUtils.set_server('hostname', 14000)
|
81
|
+
WebHDFS::FileUtils.set_httpfs_mode
|
82
|
+
WebHDFS::FileUtils.copy_to_local(remote_path, local_path)
|
83
|
+
|
84
|
+
### For HTTP Proxy servers
|
85
|
+
|
86
|
+
client = WebHDFS::Client.new('hostname', 14000, 'proxy.server.local', 8080)
|
87
|
+
client.proxy_user = 'jack' # if needed
|
88
|
+
client.proxy_pass = 'secret' # if needed
|
89
|
+
|
90
|
+
### For SSL
|
91
|
+
|
92
|
+
Note that net/https and openssl libraries must be available:
|
93
|
+
|
94
|
+
client = WebHDFS::Client.new('hostname', 4443)
|
95
|
+
client.ssl = true
|
96
|
+
client.ssl_ca_file = "/path/to/ca_file.pem" # if needed
|
97
|
+
client.ssl_varify_mode = :peer # if needed (:none or :peer)
|
98
|
+
client.ssl_version = :TLSv1 # if needed
|
99
|
+
|
100
|
+
### For Kerberos Authentication
|
101
|
+
|
102
|
+
Note that [gssapi](https://github.com/zenchild/gssapi) library must be available:
|
103
|
+
|
104
|
+
client = WebHDFS::Client.new('hostname', 14000)
|
105
|
+
client.kerberos = true
|
106
|
+
client.kerberos_keytab = "/path/to/project.keytab"
|
107
|
+
|
108
|
+
### For SSL Client Authentication
|
109
|
+
|
110
|
+
Note that openssl libraries must be available:
|
111
|
+
|
112
|
+
require 'openssl'
|
113
|
+
|
114
|
+
client = WebHDFS::Client.new(host, port)
|
115
|
+
client.ssl = true
|
116
|
+
client.ssl_key = OpenSSL::PKey::RSA.new(open('/path/to/key.pem'))
|
117
|
+
client.ssl_cert = OpenSSL::X509::Certificate.new(open('/path/to/cert.pem'))
|
118
|
+
|
119
|
+
## AUTHORS
|
120
|
+
|
121
|
+
* Kazuki Ohta <kazuki.ohta@gmail.com>
|
122
|
+
* TAGOMORI Satoshi <tagomoris@gmail.com>
|
123
|
+
|
124
|
+
## LICENSE
|
125
|
+
|
126
|
+
* Copyright: Copyright (c) 2012- Fluentd Project
|
127
|
+
* License: Apache License, Version 2.0
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.8.0
|
data/lib/webhdfs.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
if RUBY_VERSION =~ /^1\.8\./
|
2
|
+
require 'cgi'
|
3
|
+
|
4
|
+
def require_relative(relative_feature)
|
5
|
+
file = caller.first.split(/:\d/, 2).first
|
6
|
+
raise LoadError, "require_relative is called in #{Regexp.last_match(1)}"\
|
7
|
+
if /\A\((.*)\)/ =~ file
|
8
|
+
require File.expand_path(relative_feature, File.dirname(file))
|
9
|
+
end
|
10
|
+
|
11
|
+
module URI
|
12
|
+
def self.encode_www_form(enum)
|
13
|
+
enum.map do |key, value|
|
14
|
+
if value.nil?
|
15
|
+
CGI.escape(key)
|
16
|
+
elsif value.respond_to?(:to_ary)
|
17
|
+
value.to_ary.map do |w|
|
18
|
+
str = CGI.escape(key)
|
19
|
+
str << '=' << CGI.escape(w) unless w.nil?
|
20
|
+
end.join('&')
|
21
|
+
else
|
22
|
+
CGI.escape(key.to_s) << '=' << CGI.escape(value.to_s)
|
23
|
+
end
|
24
|
+
end.join('&')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,411 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'json'
|
4
|
+
require 'addressable/uri'
|
5
|
+
|
6
|
+
require_relative 'utilities'
|
7
|
+
require_relative 'exceptions'
|
8
|
+
|
9
|
+
module WebHDFS
|
10
|
+
class ClientV1
|
11
|
+
# This hash table holds command options.
|
12
|
+
OPT_TABLE = {}.freeze # internal use only
|
13
|
+
KNOWN_ERRORS = ['LeaseExpiredException'].freeze
|
14
|
+
|
15
|
+
attr_accessor :host, :port, :username, :doas, :proxy_address, :proxy_port
|
16
|
+
attr_accessor :proxy_user, :proxy_pass
|
17
|
+
attr_accessor :open_timeout # default 30s (in ruby net/http)
|
18
|
+
attr_accessor :read_timeout # default 60s (in ruby net/http)
|
19
|
+
attr_accessor :httpfs_mode
|
20
|
+
attr_accessor :retry_known_errors # default false (not to retry)
|
21
|
+
attr_accessor :retry_times # default 1 (ignored when retry_known_errors is false)
|
22
|
+
attr_accessor :retry_interval # default 1 ([sec], ignored when retry_known_errors is false)
|
23
|
+
attr_accessor :ssl
|
24
|
+
attr_accessor :ssl_ca_file
|
25
|
+
attr_reader :ssl_verify_mode
|
26
|
+
attr_accessor :ssl_cert
|
27
|
+
attr_accessor :ssl_key
|
28
|
+
attr_accessor :ssl_version
|
29
|
+
attr_accessor :kerberos, :kerberos_keytab
|
30
|
+
attr_accessor :http_headers
|
31
|
+
|
32
|
+
SSL_VERIFY_MODES = [:none, :peer].freeze
|
33
|
+
def ssl_verify_mode=(mode)
|
34
|
+
unless SSL_VERIFY_MODES.include? mode
|
35
|
+
raise ArgumentError, "Invalid SSL verify mode #{mode.inspect}"
|
36
|
+
end
|
37
|
+
@ssl_verify_mode = mode
|
38
|
+
end
|
39
|
+
|
40
|
+
def initialize(host = 'localhost', port = 50_070, username = nil,
|
41
|
+
doas = nil, proxy_address = nil, proxy_port = nil,
|
42
|
+
http_headers = {})
|
43
|
+
@host = host
|
44
|
+
@port = port
|
45
|
+
@username = username
|
46
|
+
@doas = doas
|
47
|
+
@proxy_address = proxy_address
|
48
|
+
@proxy_port = proxy_port
|
49
|
+
@retry_known_errors = false
|
50
|
+
@retry_times = @retry_interval = 1
|
51
|
+
|
52
|
+
@httpfs_mode = false
|
53
|
+
|
54
|
+
@ssl = false
|
55
|
+
@ssl_ca_file = nil
|
56
|
+
@ssl_verify_mode = nil
|
57
|
+
@ssl_cert = @ssl_key = nil
|
58
|
+
@ssl_version = nil
|
59
|
+
|
60
|
+
@kerberos = false
|
61
|
+
@kerberos_keytab = nil
|
62
|
+
@http_headers = http_headers
|
63
|
+
end
|
64
|
+
|
65
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=CREATE
|
66
|
+
# [&overwrite=<true|false>][&blocksize=<LONG>]
|
67
|
+
# [&replication=<SHORT>]
|
68
|
+
# [&permission=<OCTAL>][&buffersize=<INT>]"
|
69
|
+
def create(path, body, options = {})
|
70
|
+
options = options.merge('data' => 'true') if @httpfs_mode
|
71
|
+
check_options(options, OPT_TABLE['CREATE'])
|
72
|
+
res = operate_requests('PUT', path, 'CREATE', options, body)
|
73
|
+
res.code == '201'
|
74
|
+
end
|
75
|
+
OPT_TABLE['CREATE'] = %w(overwrite blocksize replication permission
|
76
|
+
buffersize data)
|
77
|
+
|
78
|
+
# curl -i -X POST "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=
|
79
|
+
# APPEND[&buffersize=<INT>]"
|
80
|
+
def append(path, body, options = {})
|
81
|
+
options = options.merge('data' => 'true') if @httpfs_mode
|
82
|
+
check_options(options, OPT_TABLE['APPEND'])
|
83
|
+
res = operate_requests('POST', path, 'APPEND', options, body)
|
84
|
+
res.code == '200'
|
85
|
+
end
|
86
|
+
OPT_TABLE['APPEND'] = %w(buffersize data)
|
87
|
+
|
88
|
+
# curl -i -L "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=OPEN
|
89
|
+
# [&offset=<LONG>][&length=<LONG>][&buffersize=<INT>]"
|
90
|
+
def read(path, options = {})
|
91
|
+
check_options(options, OPT_TABLE['OPEN'])
|
92
|
+
res = operate_requests('GET', path, 'OPEN', options)
|
93
|
+
res.body
|
94
|
+
end
|
95
|
+
OPT_TABLE['OPEN'] = %w(offset length buffersize)
|
96
|
+
alias open read
|
97
|
+
|
98
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/<PATH>?op=
|
99
|
+
# MKDIRS[&permission=<OCTAL>]"
|
100
|
+
def mkdir(path, options = {})
|
101
|
+
check_options(options, OPT_TABLE['MKDIRS'])
|
102
|
+
res = operate_requests('PUT', path, 'MKDIRS', options)
|
103
|
+
check_success_json(res, 'boolean')
|
104
|
+
end
|
105
|
+
OPT_TABLE['MKDIRS'] = ['permission']
|
106
|
+
alias mkdirs mkdir
|
107
|
+
|
108
|
+
# curl -i -X PUT "<HOST>:<PORT>/webhdfs/v1/<PATH>?op=
|
109
|
+
# RENAME&destination=<PATH>"
|
110
|
+
def rename(path, dest, options = {})
|
111
|
+
check_options(options, OPT_TABLE['RENAME'])
|
112
|
+
dest = '/' + dest unless dest.start_with?('/')
|
113
|
+
res = operate_requests('PUT', path, 'RENAME',
|
114
|
+
options.merge('destination' => dest))
|
115
|
+
check_success_json(res, 'boolean')
|
116
|
+
end
|
117
|
+
|
118
|
+
# curl -i -X DELETE "http://<host>:<port>/webhdfs/v1/<path>?op=DELETE
|
119
|
+
# [&recursive=<true|false>]"
|
120
|
+
def delete(path, options = {})
|
121
|
+
check_options(options, OPT_TABLE['DELETE'])
|
122
|
+
res = operate_requests('DELETE', path, 'DELETE', options)
|
123
|
+
check_success_json(res, 'boolean')
|
124
|
+
end
|
125
|
+
OPT_TABLE['DELETE'] = ['recursive']
|
126
|
+
|
127
|
+
# curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETFILESTATUS"
|
128
|
+
def stat(path, options = {})
|
129
|
+
check_options(options, OPT_TABLE['GETFILESTATUS'])
|
130
|
+
res = operate_requests('GET', path, 'GETFILESTATUS', options)
|
131
|
+
check_success_json(res, 'FileStatus')
|
132
|
+
end
|
133
|
+
alias getfilestatus stat
|
134
|
+
|
135
|
+
# curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS"
|
136
|
+
def list(path, options = {})
|
137
|
+
check_options(options, OPT_TABLE['LISTSTATUS'])
|
138
|
+
res = operate_requests('GET', path, 'LISTSTATUS', options)
|
139
|
+
check_success_json(res, 'FileStatuses')['FileStatus']
|
140
|
+
end
|
141
|
+
alias liststatus list
|
142
|
+
|
143
|
+
# curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETCONTENTSUMMARY"
|
144
|
+
def content_summary(path, options = {})
|
145
|
+
check_options(options, OPT_TABLE['GETCONTENTSUMMARY'])
|
146
|
+
res = operate_requests('GET', path, 'GETCONTENTSUMMARY', options)
|
147
|
+
check_success_json(res, 'ContentSummary')
|
148
|
+
end
|
149
|
+
alias getcontentsummary content_summary
|
150
|
+
|
151
|
+
# curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETFILECHECKSUM"
|
152
|
+
def checksum(path, options = {})
|
153
|
+
check_options(options, OPT_TABLE['GETFILECHECKSUM'])
|
154
|
+
res = operate_requests('GET', path, 'GETFILECHECKSUM', options)
|
155
|
+
check_success_json(res, 'FileChecksum')
|
156
|
+
end
|
157
|
+
alias getfilechecksum checksum
|
158
|
+
|
159
|
+
# curl -i "http://<HOST>:<PORT>/webhdfs/v1/?op=GETHOMEDIRECTORY"
|
160
|
+
def homedir(options = {})
|
161
|
+
check_options(options, OPT_TABLE['GETHOMEDIRECTORY'])
|
162
|
+
res = operate_requests('GET', '/', 'GETHOMEDIRECTORY', options)
|
163
|
+
check_success_json(res, 'Path')
|
164
|
+
end
|
165
|
+
alias gethomedirectory homedir
|
166
|
+
|
167
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETPERMISSION
|
168
|
+
# [&permission=<OCTAL>]"
|
169
|
+
def chmod(path, mode, options = {})
|
170
|
+
check_options(options, OPT_TABLE['SETPERMISSION'])
|
171
|
+
res = operate_requests('PUT', path, 'SETPERMISSION',
|
172
|
+
options.merge('permission' => mode))
|
173
|
+
res.code == '200'
|
174
|
+
end
|
175
|
+
alias setpermission chmod
|
176
|
+
|
177
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETOWNER
|
178
|
+
# [&owner=<USER>][&group=<GROUP>]"
|
179
|
+
def chown(path, options = {})
|
180
|
+
check_options(options, OPT_TABLE['SETOWNER'])
|
181
|
+
unless options.key?('owner') || options.key?('group') ||
|
182
|
+
options.key?(:owner) || options.key?(:group)
|
183
|
+
raise ArgumentError, "'chown' needs at least one of owner or group"
|
184
|
+
end
|
185
|
+
res = operate_requests('PUT', path, 'SETOWNER', options)
|
186
|
+
res.code == '200'
|
187
|
+
end
|
188
|
+
OPT_TABLE['SETOWNER'] = %w(owner group)
|
189
|
+
alias setowner chown
|
190
|
+
|
191
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETREPLICATION
|
192
|
+
# [&replication=<SHORT>]"
|
193
|
+
def replication(path, replnum, options = {})
|
194
|
+
check_options(options, OPT_TABLE['SETREPLICATION'])
|
195
|
+
res = operate_requests('PUT', path, 'SETREPLICATION',
|
196
|
+
options.merge('replication' => replnum.to_s))
|
197
|
+
check_success_json(res, 'boolean')
|
198
|
+
end
|
199
|
+
alias setreplication replication
|
200
|
+
|
201
|
+
# curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETTIMES
|
202
|
+
# [&modificationtime=<TIME>][&accesstime=<TIME>]"
|
203
|
+
# motidicationtime: radix-10 logn integer
|
204
|
+
# accesstime: radix-10 logn integer
|
205
|
+
def touch(path, options = {})
|
206
|
+
check_options(options, OPT_TABLE['SETTIMES'])
|
207
|
+
unless options.key?('modificationtime') || options.key?('accesstime') ||
|
208
|
+
options.key?(:modificationtime) || options.key?(:accesstime)
|
209
|
+
raise ArgumentError, "'chown' needs at least one of " \
|
210
|
+
'modificationtime or accesstime'
|
211
|
+
end
|
212
|
+
res = operate_requests('PUT', path, 'SETTIMES', options)
|
213
|
+
res.code == '200'
|
214
|
+
end
|
215
|
+
OPT_TABLE['SETTIMES'] = %w(modificationtime accesstime)
|
216
|
+
alias settimes touch
|
217
|
+
|
218
|
+
# def delegation_token(user, options={}) # GETDELEGATIONTOKEN
|
219
|
+
# raise NotImplementedError
|
220
|
+
# end
|
221
|
+
# def renew_delegation_token(token, options={}) # RENEWDELEGATIONTOKEN
|
222
|
+
# raise NotImplementedError
|
223
|
+
# end
|
224
|
+
# def cancel_delegation_token(token, options={}) # CANCELDELEGATIONTOKEN
|
225
|
+
# raise NotImplementedError
|
226
|
+
# end
|
227
|
+
|
228
|
+
def build_path(path, op, params)
|
229
|
+
opts = if @username && @doas
|
230
|
+
{ 'op' => op, 'user.name' => @username, 'doas' => @doas }
|
231
|
+
elsif @username
|
232
|
+
{ 'op' => op, 'user.name' => @username }
|
233
|
+
elsif @doas
|
234
|
+
{ 'op' => op, 'doas' => @doas }
|
235
|
+
else
|
236
|
+
{ 'op' => op }
|
237
|
+
end
|
238
|
+
api_path(path) + '?' + URI.encode_www_form(params.merge(opts))
|
239
|
+
end
|
240
|
+
|
241
|
+
REDIRECTED_OPERATIONS = %w(APPEND CREATE OPEN GETFILECHECKSUM).freeze
|
242
|
+
def operate_requests(method, path, op, params = {}, payload = nil)
|
243
|
+
if !@httpfs_mode && REDIRECTED_OPERATIONS.include?(op)
|
244
|
+
res = request(@host, @port, method, path, op, params, nil)
|
245
|
+
unless res.is_a?(Net::HTTPRedirection) && res['location']
|
246
|
+
msg = 'NameNode returns non-redirection (or without location' \
|
247
|
+
" header), code:#{res.code}, body:#{res.body}."
|
248
|
+
raise WebHDFS::RequestFailedError, msg
|
249
|
+
end
|
250
|
+
uri = URI.parse(res['location'])
|
251
|
+
rpath = if uri.query
|
252
|
+
uri.path + '?' + uri.query
|
253
|
+
else
|
254
|
+
uri.path
|
255
|
+
end
|
256
|
+
request(uri.host, uri.port, method, rpath, nil, {},
|
257
|
+
payload, 'Content-Type' => 'application/octet-stream')
|
258
|
+
elsif @httpfs_mode && !payload.nil?
|
259
|
+
request(@host, @port, method, path, op, params,
|
260
|
+
payload, 'Content-Type' => 'application/octet-stream')
|
261
|
+
else
|
262
|
+
request(@host, @port, method, path, op, params, payload)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
# IllegalArgumentException 400 Bad Request
|
267
|
+
# UnsupportedOperationException 400 Bad Request
|
268
|
+
# SecurityException 401 Unauthorized
|
269
|
+
# IOException 403 Forbidden
|
270
|
+
# FileNotFoundException 404 Not Found
|
271
|
+
# RumtimeException 500 Internal Server Error
|
272
|
+
def request(host, port, method, path, op = nil, params = {},
|
273
|
+
payload = nil, header = nil, retries = 0)
|
274
|
+
conn = Net::HTTP.new(host, port, @proxy_address, @proxy_port)
|
275
|
+
conn.proxy_user = @proxy_user if @proxy_user
|
276
|
+
conn.proxy_pass = @proxy_pass if @proxy_pass
|
277
|
+
conn.open_timeout = @open_timeout if @open_timeout
|
278
|
+
conn.read_timeout = @read_timeout if @read_timeout
|
279
|
+
|
280
|
+
path = Addressable::URI.escape(path) # make safe for transmission via HTTP
|
281
|
+
request_path = if op
|
282
|
+
build_path(path, op, params)
|
283
|
+
else
|
284
|
+
path
|
285
|
+
end
|
286
|
+
if @ssl
|
287
|
+
conn.use_ssl = true
|
288
|
+
conn.ca_file = @ssl_ca_file if @ssl_ca_file
|
289
|
+
if @ssl_verify_mode
|
290
|
+
require 'openssl'
|
291
|
+
conn.verify_mode = case @ssl_verify_mode
|
292
|
+
when :none then OpenSSL::SSL::VERIFY_NONE
|
293
|
+
when :peer then OpenSSL::SSL::VERIFY_PEER
|
294
|
+
end
|
295
|
+
end
|
296
|
+
conn.cert = @ssl_cert if @ssl_cert
|
297
|
+
conn.key = @ssl_key if @ssl_key
|
298
|
+
conn.ssl_version = @ssl_version if @ssl_version
|
299
|
+
end
|
300
|
+
|
301
|
+
gsscli = nil
|
302
|
+
if @kerberos
|
303
|
+
require 'base64'
|
304
|
+
require 'gssapi'
|
305
|
+
gsscli = GSSAPI::Simple.new(@host, 'HTTP', @kerberos_keytab)
|
306
|
+
token = nil
|
307
|
+
begin
|
308
|
+
token = gsscli.init_context
|
309
|
+
rescue => e
|
310
|
+
raise WebHDFS::KerberosError, e.message
|
311
|
+
end
|
312
|
+
if header
|
313
|
+
header['Authorization'] = "Negotiate #{Base64.strict_encode64(token)}"
|
314
|
+
else
|
315
|
+
header = { 'Authorization' =>
|
316
|
+
"Negotiate #{Base64.strict_encode64(token)}" }
|
317
|
+
end
|
318
|
+
else
|
319
|
+
header = {} if header.nil?
|
320
|
+
header = @http_headers.merge(header)
|
321
|
+
end
|
322
|
+
|
323
|
+
res = nil
|
324
|
+
if !payload.nil? && payload.respond_to?(:read) &&
|
325
|
+
payload.respond_to?(:size)
|
326
|
+
req = Net::HTTPGenericRequest.new(method, (payload ? true : false),
|
327
|
+
true, request_path, header)
|
328
|
+
raise WebHDFS::ClientError, 'Error accepting given IO resource as' \
|
329
|
+
' data payload, Not valid in methods' \
|
330
|
+
' other than PUT and POST' unless method == 'PUT' || method == 'POST'
|
331
|
+
|
332
|
+
req.body_stream = payload
|
333
|
+
req.content_length = payload.size
|
334
|
+
begin
|
335
|
+
res = conn.request(req)
|
336
|
+
rescue => e
|
337
|
+
raise WebHDFS::ServerError, 'Failed to connect to host' \
|
338
|
+
" #{host}:#{port}, #{e.message}"
|
339
|
+
end
|
340
|
+
else
|
341
|
+
begin
|
342
|
+
res = conn.send_request(method, request_path, payload, header)
|
343
|
+
rescue => e
|
344
|
+
raise WebHDFS::ServerError, 'Failed to connect to host' \
|
345
|
+
" #{host}:#{port}, #{e.message}"
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
if @kerberos && res.code == '307'
|
350
|
+
itok = (res.header.get_fields('WWW-Authenticate') ||
|
351
|
+
['']).pop.split(/\s+/).last
|
352
|
+
unless itok
|
353
|
+
raise WebHDFS::KerberosError, 'Server does not return ' \
|
354
|
+
'WWW-Authenticate header'
|
355
|
+
end
|
356
|
+
|
357
|
+
begin
|
358
|
+
gsscli.init_context(Base64.strict_decode64(itok))
|
359
|
+
rescue => e
|
360
|
+
raise WebHDFS::KerberosError, e.message
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
case res
|
365
|
+
when Net::HTTPSuccess
|
366
|
+
res
|
367
|
+
when Net::HTTPRedirection
|
368
|
+
res
|
369
|
+
else
|
370
|
+
message = if res.body && !res.body.empty?
|
371
|
+
res.body.delete("\n")
|
372
|
+
else
|
373
|
+
'Response body is empty...'
|
374
|
+
end
|
375
|
+
|
376
|
+
if @retry_known_errors && retries < @retry_times
|
377
|
+
detail = nil
|
378
|
+
if message =~ /^\{"RemoteException":\{/
|
379
|
+
begin
|
380
|
+
detail = JSON.parse(message)
|
381
|
+
rescue
|
382
|
+
# ignore broken json response body
|
383
|
+
end
|
384
|
+
end
|
385
|
+
if detail && detail['RemoteException'] &&
|
386
|
+
KNOWN_ERRORS.include?(detail['RemoteException']['exception'])
|
387
|
+
sleep @retry_interval if @retry_interval > 0
|
388
|
+
return request(host, port, method, path, op, params, payload,
|
389
|
+
header, retries + 1)
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
case res.code
|
394
|
+
when '400'
|
395
|
+
raise WebHDFS::ClientError, message
|
396
|
+
when '401'
|
397
|
+
raise WebHDFS::SecurityError, message
|
398
|
+
when '403'
|
399
|
+
raise WebHDFS::IOError, message
|
400
|
+
when '404'
|
401
|
+
raise WebHDFS::FileNotFoundError, message
|
402
|
+
when '500'
|
403
|
+
raise WebHDFS::ServerError, message
|
404
|
+
else
|
405
|
+
raise WebHDFS::RequestFailedError, "response code:#{res.code}, " \
|
406
|
+
"message:#{message}"
|
407
|
+
end
|
408
|
+
end
|
409
|
+
end
|
410
|
+
end
|
411
|
+
end
|