webhdfs 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2
1
+ 0.3
@@ -0,0 +1,6 @@
1
+ require_relative 'client_v1'
2
+
3
+ module WebHDFS
4
+ class Client < ClientV1
5
+ end
6
+ end
@@ -0,0 +1,274 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'json'
4
+
5
+ require_relative 'exceptions'
6
+
7
+ module WebHDFS
8
+ class ClientV1
9
+
10
+ # This hash table holds command options.
11
+ OPT_TABLE = {} # internal use only
12
+
13
+ attr_accessor :host, :port, :username, :doas
14
+ attr_accessor :open_timeout, :read_timeout
15
+
16
+ def initialize(host='localhost', port=50070, username=nil, doas=nil)
17
+ @host = host
18
+ @port = port
19
+ @username = username
20
+ @doas = doas
21
+ end
22
+
23
+ # curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=CREATE
24
+ # [&overwrite=<true|false>][&blocksize=<LONG>][&replication=<SHORT>]
25
+ # [&permission=<OCTAL>][&buffersize=<INT>]"
26
+ def create(path, body, options={})
27
+ check_options(options, OPT_TABLE['CREATE'])
28
+ res = operate_requests('PUT', path, 'CREATE', options, body)
29
+ res.code == '201'
30
+ end
31
+ OPT_TABLE['CREATE'] = ['overwrite', 'blocksize', 'replication', 'permission', 'buffersize']
32
+
33
+ # curl -i -X POST "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=APPEND[&buffersize=<INT>]"
34
+ def append(path, body, options={})
35
+ check_options(options, OPT_TABLE['APPEND'])
36
+ res = operate_requests('POST', path, 'APPEND', options, body)
37
+ res.code == '200'
38
+ end
39
+ OPT_TABLE['APPEND'] = ['buffersize']
40
+
41
+ # curl -i -L "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=OPEN
42
+ # [&offset=<LONG>][&length=<LONG>][&buffersize=<INT>]"
43
+ def read(path, options={})
44
+ check_options(options, OPT_TABLE['OPEN'])
45
+ res = operate_requests('GET', path, 'OPEN', options)
46
+ res.body
47
+ end
48
+ OPT_TABLE['OPEN'] = ['offset', 'length', 'buffersize']
49
+ alias :open :read
50
+
51
+ # curl -i -X PUT "http://<HOST>:<PORT>/<PATH>?op=MKDIRS[&permission=<OCTAL>]"
52
+ def mkdir(path, options={})
53
+ check_options(options, OPT_TABLE['MKDIRS'])
54
+ res = operate_requests('PUT', path, 'MKDIRS', options)
55
+ check_success_json(res, 'boolean')
56
+ end
57
+ OPT_TABLE['MKDIRS'] = ['permission']
58
+ alias :mkdirs :mkdir
59
+
60
+ # curl -i -X PUT "<HOST>:<PORT>/webhdfs/v1/<PATH>?op=RENAME&destination=<PATH>"
61
+ def rename(path, dest, options={})
62
+ check_options(options, OPT_TABLE['RENAME'])
63
+ unless dest.start_with?('/')
64
+ dest = '/' + dest
65
+ end
66
+ res = operate_requests('PUT', path, 'RENAME', options.merge({'destination' => dest}))
67
+ check_success_json(res, 'boolean')
68
+ end
69
+
70
+ # curl -i -X DELETE "http://<host>:<port>/webhdfs/v1/<path>?op=DELETE
71
+ # [&recursive=<true|false>]"
72
+ def delete(path, options={})
73
+ check_options(options, OPT_TABLE['DELETE'])
74
+ res = operate_requests('DELETE', path, 'DELETE', options)
75
+ check_success_json(res, 'boolean')
76
+ end
77
+ OPT_TABLE['DELETE'] = ['recursive']
78
+
79
+ # curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETFILESTATUS"
80
+ def stat(path, options={})
81
+ check_options(options, OPT_TABLE['GETFILESTATUS'])
82
+ res = operate_requests('GET', path, 'GETFILESTATUS', options)
83
+ check_success_json(res, 'FileStatus')
84
+ end
85
+ alias :getfilestatus :stat
86
+
87
+ # curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS"
88
+ def list(path, options={})
89
+ check_options(options, OPT_TABLE['LISTSTATUS'])
90
+ res = operate_requests('GET', path, 'LISTSTATUS', options)
91
+ check_success_json(res, 'FileStatuses')['FileStatus']
92
+ end
93
+ alias :liststatus :list
94
+
95
+ # curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETCONTENTSUMMARY"
96
+ def content_summary(path, options={})
97
+ check_options(options, OPT_TABLE['GETCONTENTSUMMARY'])
98
+ res = operate_requests('GET', path, 'GETCONTENTSUMMARY', options)
99
+ check_success_json(res, 'ContentSummary')
100
+ end
101
+ alias :getcontentsummary :content_summary
102
+
103
+ # curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETFILECHECKSUM"
104
+ def checksum(path, options={})
105
+ check_options(options, OPT_TABLE['GETFILECHECKSUM'])
106
+ res = operate_requests('GET', path, 'GETFILECHECKSUM', options)
107
+ check_success_json(res, 'FileChecksum')
108
+ end
109
+ alias :getfilechecksum :checksum
110
+
111
+ # curl -i "http://<HOST>:<PORT>/webhdfs/v1/?op=GETHOMEDIRECTORY"
112
+ def homedir(options={})
113
+ check_options(options, OPT_TABLE['GETHOMEDIRECTORY'])
114
+ res = operate_requests('GET', '/', 'GETHOMEDIRECTORY', options)
115
+ check_success_json(res, 'Path')
116
+ end
117
+ alias :gethomedirectory :homedir
118
+
119
+ # curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETPERMISSION
120
+ # [&permission=<OCTAL>]"
121
+ def chmod(path, mode, options={})
122
+ check_options(options, OPT_TABLE['SETPERMISSION'])
123
+ res = operate_requests('PUT', path, 'SETPERMISSION', options.merge({'permission' => mode}))
124
+ res.code == '200'
125
+ end
126
+ alias :setpermission :chmod
127
+
128
+ # curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETOWNER
129
+ # [&owner=<USER>][&group=<GROUP>]"
130
+ def chown(path, options={})
131
+ check_options(options, OPT_TABLE['SETOWNER'])
132
+ unless options.has_key?('owner') or options.has_key?('group')
133
+ raise ArgumentError, "'chown' needs at least one of owner or group"
134
+ end
135
+ res = operate_requests('PUT', path, 'SETOWNER', options)
136
+ res.code == '200'
137
+ end
138
+ OPT_TABLE['SETOWNER'] = ['owner', 'group']
139
+ alias :setowner :chown
140
+
141
+ # curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETREPLICATION
142
+ # [&replication=<SHORT>]"
143
+ def replication(path, replnum, options={})
144
+ check_options(options, OPT_TABLE['SETREPLICATION'])
145
+ res = operate_requests('PUT', path, 'SETREPLICATION', options.merge({'replication' => replnum.to_s}))
146
+ check_success_json(res, 'boolean')
147
+ end
148
+ alias :setreplication :replication
149
+
150
+ # curl -i -X PUT "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=SETTIMES
151
+ # [&modificationtime=<TIME>][&accesstime=<TIME>]"
152
+ # motidicationtime: radix-10 logn integer
153
+ # accesstime: radix-10 logn integer
154
+ def touch(path, options={})
155
+ check_options(options, OPT_TABLE['SETTIMES'])
156
+ unless options.has_key?('modificationtime') or options.has_key?('accesstime')
157
+ raise ArgumentError, "'chown' needs at least one of modificationtime or accesstime"
158
+ end
159
+ res = operate_requests('PUT', path, 'SETTIMES', options)
160
+ res.code == '200'
161
+ end
162
+ OPT_TABLE['SETTIMES'] = ['modificationtime', 'accesstime']
163
+ alias :settimes :touch
164
+
165
+ # def delegation_token(user, options={}) # GETDELEGATIONTOKEN
166
+ # raise NotImplementedError
167
+ # end
168
+ # def renew_delegation_token(token, options={}) # RENEWDELEGATIONTOKEN
169
+ # raise NotImplementedError
170
+ # end
171
+ # def cancel_delegation_token(token, options={}) # CANCELDELEGATIONTOKEN
172
+ # raise NotImplementedError
173
+ # end
174
+
175
+ def check_options(options, optdecl=[])
176
+ ex = options.keys - (optdecl || [])
177
+ raise ArgumentError, "no such option: #{ex.keys.join(' ')}" unless ex.empty?
178
+ end
179
+
180
+ def check_success_json(res, attr=nil)
181
+ res.code == '200' and res.content_type == 'application/json' and (attr.nil? or JSON.parse(res.body)[attr])
182
+ end
183
+
184
+ def api_path(path)
185
+ if path.start_with?('/')
186
+ '/webhdfs/v1' + path
187
+ else
188
+ '/webhdfs/v1/' + path
189
+ end
190
+ end
191
+
192
+ def build_path(path, op, params)
193
+ opts = if @username and @doas
194
+ {'op' => op, 'user.name' => @username, 'doas' => @doas}
195
+ elsif @username
196
+ {'op' => op, 'user.name' => @username}
197
+ elsif @doas
198
+ {'op' => op, 'doas' => @doas}
199
+ else
200
+ {'op' => op}
201
+ end
202
+ query = URI.encode_www_form(params.merge(opts))
203
+ api_path(path) + '?' + query
204
+ end
205
+
206
+ REDIRECTED_OPERATIONS = ['APPEND', 'CREATE', 'OPEN', 'GETFILECHECKSUM']
207
+ def operate_requests(method, path, op, params={}, payload=nil)
208
+ if REDIRECTED_OPERATIONS.include?(op)
209
+ res = request(@host, @port, method, path, op, params, nil)
210
+ unless res.is_a?(Net::HTTPRedirection) and res['location']
211
+ msg = "NameNode returns non-redirection (or without location header), code:#{res.code}, body:#{res.body}."
212
+ raise WebHDFS::RequestFailedError, msg
213
+ end
214
+ uri = URI.parse(res['location'])
215
+ rpath = if uri.query
216
+ uri.path + '?' + uri.query
217
+ else
218
+ uri.path
219
+ end
220
+ request(uri.host, uri.port, method, rpath, nil, {}, payload)
221
+ else
222
+ request(@host, @port, method, path, op, params, nil)
223
+ end
224
+ end
225
+
226
+ # IllegalArgumentException 400 Bad Request
227
+ # UnsupportedOperationException 400 Bad Request
228
+ # SecurityException 401 Unauthorized
229
+ # IOException 403 Forbidden
230
+ # FileNotFoundException 404 Not Found
231
+ # RumtimeException 500 Internal Server Error
232
+ def request(host, port, method, path, op=nil, params={}, payload=nil)
233
+ conn = Net::HTTP.start(host, port)
234
+ conn.open_timeout = @open_timeout if @open_timeout
235
+ conn.read_timeout = @read_timeout if @read_timeout
236
+
237
+ request_path = if op
238
+ build_path(path, op, params)
239
+ else
240
+ path
241
+ end
242
+
243
+ p({:host => host, :port => port, :method => method, :path => request_path})
244
+ res = conn.send_request(method, request_path, payload)
245
+
246
+ case res
247
+ when Net::HTTPSuccess
248
+ res
249
+ when Net::HTTPRedirection
250
+ res
251
+ else
252
+ message = if res.body and not res.body.empty?
253
+ res.body.gsub(/\n/, '')
254
+ else
255
+ 'Response body is empty...'
256
+ end
257
+ case res.code
258
+ when '400'
259
+ raise WebHDFS::ClientError, message
260
+ when '401'
261
+ raise WebHDFS::SecurityError, message
262
+ when '403'
263
+ raise WebHDFS::IOError, message
264
+ when '404'
265
+ raise WebHDFS::FileNotFoundError, message
266
+ when '500'
267
+ raise WebHDFS::ServerError, message
268
+ else
269
+ raise WebHDFS::RequestFailedError, "response code:#{res.code}, message:#{message}"
270
+ end
271
+ end
272
+ end
273
+ end
274
+ end
@@ -0,0 +1,11 @@
1
+ module WebHDFS
2
+ class FileNotFoundError < StandardError; end
3
+
4
+ class IOError < StandardError; end
5
+ class SecurityError < StandardError; end
6
+
7
+ class ClientError < StandardError; end
8
+ class ServerError < StandardError; end
9
+
10
+ class RequestFailedError < StandardError; end
11
+ end
@@ -67,7 +67,7 @@ module WebHDFS
67
67
  #
68
68
  # Examples
69
69
  #
70
- # FileUtils.copy_from_local 'remote_file', 'local_file'
70
+ # FileUtils.copy_to_local 'remote_file', 'local_file'
71
71
  #
72
72
  def copy_to_local(path, file, options={})
73
73
  fu_check_options options, OPT_TABLE['copy_to_local']
@@ -88,7 +88,7 @@ module WebHDFS
88
88
  #
89
89
  # Examples
90
90
  #
91
- # FileUtils.copy_from_local 'local_file', 'remote_file'
91
+ # FileUtils.append 'remote_path', 'contents'
92
92
  #
93
93
  def append(path, body, options={})
94
94
  fu_check_options options, OPT_TABLE['append']
data/lib/webhdfs.rb CHANGED
@@ -1 +1,2 @@
1
1
  require File.join(File.dirname(__FILE__), 'webhdfs', 'fileutils.rb')
2
+ require File.join(File.dirname(__FILE__), 'webhdfs', 'client.rb')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.3'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-14 00:00:00.000000000Z
12
+ date: 2012-05-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rest-client
16
- requirement: &2157003340 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: 1.6.7
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2157003340
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 1.6.7
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: rake
27
- requirement: &2157002860 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ! '>='
@@ -32,10 +37,15 @@ dependencies:
32
37
  version: 0.9.2
33
38
  type: :development
34
39
  prerelease: false
35
- version_requirements: *2157002860
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 0.9.2
36
46
  - !ruby/object:Gem::Dependency
37
47
  name: rdoc
38
- requirement: &2157002400 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
39
49
  none: false
40
50
  requirements:
41
51
  - - ! '>='
@@ -43,10 +53,15 @@ dependencies:
43
53
  version: '3.12'
44
54
  type: :development
45
55
  prerelease: false
46
- version_requirements: *2157002400
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '3.12'
47
62
  - !ruby/object:Gem::Dependency
48
63
  name: simplecov
49
- requirement: &2157001940 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
50
65
  none: false
51
66
  requirements:
52
67
  - - ! '>='
@@ -54,10 +69,15 @@ dependencies:
54
69
  version: 0.5.4
55
70
  type: :development
56
71
  prerelease: false
57
- version_requirements: *2157001940
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: 0.5.4
58
78
  - !ruby/object:Gem::Dependency
59
79
  name: rr
60
- requirement: &2157001480 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
61
81
  none: false
62
82
  requirements:
63
83
  - - ! '>='
@@ -65,7 +85,12 @@ dependencies:
65
85
  version: 1.0.0
66
86
  type: :development
67
87
  prerelease: false
68
- version_requirements: *2157001480
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 1.0.0
69
94
  description: Ruby WebHDFS client
70
95
  email: kazuki.ohta@gmail.com
71
96
  executables: []
@@ -78,6 +103,9 @@ files:
78
103
  - Rakefile
79
104
  - VERSION
80
105
  - lib/webhdfs.rb
106
+ - lib/webhdfs/client.rb
107
+ - lib/webhdfs/client_v1.rb
108
+ - lib/webhdfs/exceptions.rb
81
109
  - lib/webhdfs/fileutils.rb
82
110
  - test/test_helper.rb
83
111
  - test/webhdfs/fileutils.rb
@@ -102,11 +130,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
102
130
  version: '0'
103
131
  requirements: []
104
132
  rubyforge_project:
105
- rubygems_version: 1.8.6
133
+ rubygems_version: 1.8.21
106
134
  signing_key:
107
135
  specification_version: 3
108
136
  summary: Ruby WebHDFS client
109
137
  test_files:
110
138
  - test/test_helper.rb
111
139
  - test/webhdfs/fileutils.rb
112
- has_rdoc: false