imw 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +34 -14
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/imw.rb +9 -6
- data/lib/imw/{resources/archive.rb → archives.rb} +20 -10
- data/lib/imw/archives/rar.rb +19 -0
- data/lib/imw/archives/tar.rb +19 -0
- data/lib/imw/archives/tarbz2.rb +73 -0
- data/lib/imw/archives/targz.rb +73 -0
- data/lib/imw/archives/zip.rb +51 -0
- data/lib/imw/{resources/compressed_file.rb → compressed_files.rb} +16 -11
- data/lib/imw/compressed_files/bz2.rb +16 -0
- data/lib/imw/{resources → compressed_files}/compressible.rb +2 -4
- data/lib/imw/compressed_files/gz.rb +16 -0
- data/lib/imw/formats.rb +31 -0
- data/lib/imw/formats/delimited.rb +90 -0
- data/lib/imw/formats/excel.rb +125 -0
- data/lib/imw/formats/json.rb +51 -0
- data/lib/imw/formats/sgml.rb +69 -0
- data/lib/imw/formats/yaml.rb +51 -0
- data/lib/imw/resource.rb +108 -10
- data/lib/imw/schemes.rb +21 -0
- data/lib/imw/schemes/hdfs.rb +240 -0
- data/lib/imw/schemes/http.rb +166 -0
- data/lib/imw/schemes/local.rb +219 -0
- data/lib/imw/schemes/remote.rb +114 -0
- data/lib/imw/schemes/s3.rb +135 -0
- data/lib/imw/tools.rb +8 -0
- data/lib/imw/{transforms → tools}/archiver.rb +1 -1
- data/lib/imw/{transforms → tools}/transferer.rb +10 -10
- data/spec/imw/{resources/archive_spec.rb → archive_spec.rb} +3 -3
- data/spec/imw/{resources/archives_and_compressed → archives}/rar_spec.rb +2 -2
- data/spec/imw/{resources/archives_and_compressed → archives}/tar_spec.rb +2 -2
- data/spec/imw/{resources/archives_and_compressed → archives}/tarbz2_spec.rb +4 -4
- data/spec/imw/{resources/archives_and_compressed → archives}/targz_spec.rb +4 -4
- data/spec/imw/{resources/archives_and_compressed → archives}/zip_spec.rb +2 -2
- data/spec/imw/compressed_files/bz2_spec.rb +15 -0
- data/spec/imw/{resources → compressed_files}/compressible_spec.rb +1 -1
- data/spec/imw/compressed_files/gz_spec.rb +15 -0
- data/spec/imw/{resources/compressed_file_spec.rb → compressed_files_spec.rb} +3 -3
- data/spec/imw/{resources/formats → formats}/delimited_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/json_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/sgml_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/yaml_spec.rb +2 -2
- data/spec/imw/resource_spec.rb +4 -4
- data/spec/imw/{resources/schemes → schemes}/hdfs_spec.rb +7 -7
- data/spec/imw/{resources/schemes → schemes}/http_spec.rb +2 -2
- data/spec/imw/{resources → schemes}/local_spec.rb +5 -5
- data/spec/imw/{resources → schemes}/remote_spec.rb +7 -3
- data/spec/imw/{resources/schemes → schemes}/s3_spec.rb +2 -2
- data/spec/imw/{transforms → tools}/archiver_spec.rb +2 -2
- data/spec/imw/tools/transferer_spec.rb +113 -0
- metadata +69 -71
- data/lib/imw/resources.rb +0 -118
- data/lib/imw/resources/archives_and_compressed.rb +0 -32
- data/lib/imw/resources/archives_and_compressed/bz2.rb +0 -18
- data/lib/imw/resources/archives_and_compressed/gz.rb +0 -18
- data/lib/imw/resources/archives_and_compressed/rar.rb +0 -23
- data/lib/imw/resources/archives_and_compressed/tar.rb +0 -23
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +0 -78
- data/lib/imw/resources/archives_and_compressed/targz.rb +0 -78
- data/lib/imw/resources/archives_and_compressed/zip.rb +0 -57
- data/lib/imw/resources/formats.rb +0 -32
- data/lib/imw/resources/formats/delimited.rb +0 -92
- data/lib/imw/resources/formats/excel.rb +0 -125
- data/lib/imw/resources/formats/json.rb +0 -53
- data/lib/imw/resources/formats/sgml.rb +0 -72
- data/lib/imw/resources/formats/yaml.rb +0 -53
- data/lib/imw/resources/local.rb +0 -198
- data/lib/imw/resources/remote.rb +0 -110
- data/lib/imw/resources/schemes.rb +0 -19
- data/lib/imw/resources/schemes/hdfs.rb +0 -242
- data/lib/imw/resources/schemes/http.rb +0 -161
- data/lib/imw/resources/schemes/s3.rb +0 -137
- data/lib/imw/transforms.rb +0 -8
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +0 -15
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +0 -15
- data/spec/imw/transforms/transferer_spec.rb +0 -113
data/lib/imw/resource.rb
CHANGED
@@ -1,32 +1,35 @@
|
|
1
1
|
require 'addressable/uri'
|
2
|
-
require 'imw/resources'
|
3
2
|
|
4
3
|
module IMW
|
5
4
|
|
5
|
+
# Define this constant in your configuration file to add your own
|
6
|
+
# URI handlers to IMW.
|
7
|
+
USER_DEFINED_HANDLERS = [] unless defined?(USER_DEFINED_HANDLERS)
|
8
|
+
|
6
9
|
# A resource can be anything addressable via a URI. Examples
|
7
10
|
# include local files, remote files, webpages, &c.
|
8
11
|
#
|
9
12
|
# The IMW::Resource class takes a URI as input and then dynamically
|
10
|
-
# extends itself with appropriate modules from IMW
|
11
|
-
#
|
13
|
+
# extends itself with appropriate modules from IMW. As an example,
|
14
|
+
# calling
|
12
15
|
#
|
13
16
|
# my_archive = IMW::Resource.new('/path/to/my/archive.tar.bz2')
|
14
17
|
#
|
15
18
|
# would return an IMW::Resource extended by
|
16
|
-
# IMW::
|
19
|
+
# IMW::Archives::Tarbz2 (among other modules) which
|
17
20
|
# therefore has methods for extracting, listing, and appending to
|
18
21
|
# the archive.
|
19
22
|
#
|
20
23
|
# Modules are so extended based on handlers defined in the
|
21
24
|
# <tt>imw/resources</tt> directory and accessible via
|
22
|
-
# IMW::
|
23
|
-
# defining the constant IMW::
|
24
|
-
#
|
25
|
+
# IMW::Resource.handlers. You can define your own handlers by
|
26
|
+
# defining the constant IMW::Resource::USER_DEFINED_HANDLERS in your
|
27
|
+
# configuration file.
|
25
28
|
#
|
26
29
|
# The modules extending a particular IMW::Resource instance can be
|
27
30
|
# listed as follows
|
28
31
|
#
|
29
|
-
# my_archive.resource_modules #=> [IMW::
|
32
|
+
# my_archive.resource_modules #=> [IMW::Local::Base, IMW::Local::File, IMW::Local::Compressible, IMW::Archives::Tarbz2]
|
30
33
|
#
|
31
34
|
# By default, resources are opened for reading. Passing in the
|
32
35
|
# appropriate <tt>:mode</tt> option changes this:
|
@@ -41,6 +44,9 @@ module IMW
|
|
41
44
|
#
|
42
45
|
# Read the documentation for modules in IMW::Resources to learn more
|
43
46
|
# about the various behaviors an IMW::Resource can acquire.
|
47
|
+
#
|
48
|
+
# You can also instantiate an IMW::Resource using IMW.open, which
|
49
|
+
# accepts all the same arguments as IMW::Resource.new.
|
44
50
|
class Resource
|
45
51
|
|
46
52
|
attr_reader :uri, :mode
|
@@ -66,9 +72,9 @@ module IMW
|
|
66
72
|
end
|
67
73
|
|
68
74
|
# Extend this resource with modules by passing it through a
|
69
|
-
# collection of handlers defined by IMW::
|
75
|
+
# collection of handlers defined by IMW::Resource.handlers.
|
70
76
|
def extend_appropriately!
|
71
|
-
|
77
|
+
self.class.extend_resource!(self)
|
72
78
|
end
|
73
79
|
|
74
80
|
# Set the URI of this resource by parsing the given +uri+ (if
|
@@ -186,5 +192,97 @@ module IMW
|
|
186
192
|
raise IMW::NoMethodError, "undefined method `#{method}' for #{self}, extended by #{resource_modules.join(', ')}"
|
187
193
|
end
|
188
194
|
end
|
195
|
+
|
196
|
+
# Iterate through IMW::Resource.handlers and extend the given
|
197
|
+
# +resource+ with modules whose handler conditions match the
|
198
|
+
# resource.
|
199
|
+
#
|
200
|
+
# @param [IMW::Resource] resource the resource to extend
|
201
|
+
# @return [IMW::Resource] the extended resource
|
202
|
+
def self.extend_resource! resource
|
203
|
+
handlers.each do |mod_name, handler|
|
204
|
+
case handler
|
205
|
+
when Regexp then extend_resource_with_mod_or_string!(resource, mod_name) if handler =~ resource.uri.to_s
|
206
|
+
when Proc then extend_resource_with_mod_or_string!(resource, mod_name) if handler.call(resource)
|
207
|
+
when TrueClass then extend_resource_with_mod_or_string!(resource, mod_name)
|
208
|
+
else
|
209
|
+
raise IMW::TypeError("A handler must be Regexp, Proc, or true")
|
210
|
+
end
|
211
|
+
end
|
212
|
+
resource
|
213
|
+
end
|
214
|
+
|
215
|
+
# A list of handlers to match against each new resource.
|
216
|
+
#
|
217
|
+
# When an IMW::Resource is instantiated it eventually calls
|
218
|
+
# IMW::Resource.extend_resource! which will iterate through the
|
219
|
+
# handlers in IMW::Resource.handlers, extending the resource with
|
220
|
+
# modules whose handler conditions are satisfied.
|
221
|
+
#
|
222
|
+
# A handler is just an Array with two elements. The first should be
|
223
|
+
# a module or a string identifying a module.
|
224
|
+
#
|
225
|
+
# If the second element is a Regexp, the corresponding module will
|
226
|
+
# be used if the regexp matches the resource's URI (as a string)
|
227
|
+
#
|
228
|
+
# If the second element is a Proc, it will be called with the
|
229
|
+
# resource as its only argument and if it returns true then the
|
230
|
+
# module will be used.
|
231
|
+
#
|
232
|
+
# You can define your own handlers by appending them to
|
233
|
+
# IMW::Resource::USER_DEFINED_HANDLERS in your <tt>.imwrc</tt>
|
234
|
+
# file.
|
235
|
+
#
|
236
|
+
# The order in which handlers appear is significant --
|
237
|
+
# IMW::CompressedFiles::HANDLERS must be _before_
|
238
|
+
# IMW::Archives::HANDLERS, for example, because of (say)
|
239
|
+
# <tt>.tar.bz2</tt> files.
|
240
|
+
#
|
241
|
+
# @return [Array]
|
242
|
+
def self.handlers
|
243
|
+
# order is important!
|
244
|
+
#
|
245
|
+
#
|
246
|
+
#
|
247
|
+
#CompressedFiles must come before
|
248
|
+
# Archives because of tar.bz2 type files
|
249
|
+
IMW::Schemes::HANDLERS + IMW::CompressedFiles::HANDLERS + IMW::Archives::HANDLERS + IMW::Formats::HANDLERS + USER_DEFINED_HANDLERS
|
250
|
+
end
|
251
|
+
|
252
|
+
protected
|
253
|
+
# Extend +resource+ with +mod_or_string+. Will work hard to try
|
254
|
+
# and interpret +mod_or_string+ as a module if it's a string.
|
255
|
+
#
|
256
|
+
# @param [IMW::Resource] resource the resource to extend
|
257
|
+
#
|
258
|
+
# @param [Module, String] mod_or_string the module or string
|
259
|
+
# representing a module to extend the resource with
|
260
|
+
def self.extend_resource_with_mod_or_string! resource, mod_or_string
|
261
|
+
if mod_or_string.is_a?(Module)
|
262
|
+
resource.extend(mod_or_string)
|
263
|
+
else
|
264
|
+
# Given a string "Mod::SubMod::SubSubMod" first split it into
|
265
|
+
# its parts ["Mod", "SubMod", "SubSubMod"] and then begin
|
266
|
+
# class_eval'ing them in order so that each is class_eval'd in
|
267
|
+
# the scope of the one before it.
|
268
|
+
#
|
269
|
+
# There is almost certainly a better way to do this.
|
270
|
+
# mod_names = mod_or_string.to_s.split('::')
|
271
|
+
# mods = []
|
272
|
+
# mod_names.each_with_index do |name, index|
|
273
|
+
# if index == 0
|
274
|
+
# mods << IMW.class_eval(name)
|
275
|
+
# else
|
276
|
+
# begin
|
277
|
+
# mods << class_eval(name)
|
278
|
+
# rescue NameError
|
279
|
+
# mods << mods[index - 1].class_eval(name)
|
280
|
+
# end
|
281
|
+
# end
|
282
|
+
# end
|
283
|
+
# resource.extend(mods.last)
|
284
|
+
resource.extend(IMW.class_eval(mod_or_string))
|
285
|
+
end
|
286
|
+
end
|
189
287
|
end
|
190
288
|
end
|
data/lib/imw/schemes.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
module IMW
|
2
|
+
module Schemes
|
3
|
+
autoload :Local, 'imw/schemes/local'
|
4
|
+
autoload :Remote, 'imw/schemes/remote'
|
5
|
+
autoload :S3, 'imw/schemes/s3'
|
6
|
+
autoload :HTTP, 'imw/schemes/http'
|
7
|
+
autoload :HTTPS, 'imw/schemes/http'
|
8
|
+
autoload :HDFS, 'imw/schemes/hdfs'
|
9
|
+
|
10
|
+
HANDLERS = [
|
11
|
+
["Schemes::Local::Base", Proc.new { |resource| resource.scheme == 'file' || resource.scheme.blank? } ],
|
12
|
+
["Schemes::Remote::Base", Proc.new { |resource| resource.scheme != 'file' && resource.scheme.present? } ],
|
13
|
+
["Schemes::S3", %r{^s3://} ],
|
14
|
+
["Schemes::HTTP", %r{^http://} ],
|
15
|
+
["Schemes::HTTPS", %r{^https://} ],
|
16
|
+
["Schemes::HDFS", %r{^hdfs://} ]
|
17
|
+
]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
@@ -0,0 +1,240 @@
|
|
1
|
+
module IMW
|
2
|
+
module Schemes
|
3
|
+
|
4
|
+
# Defines methods for reading and writing data to/from an
|
5
|
+
# HDFS[http://hadoop.apache.org/common/docs/current/hdfs_design.html]]
|
6
|
+
#
|
7
|
+
# Learn more about Hadoop[http://hadoop.apache.org] and the
|
8
|
+
# {Hadoop Distributed
|
9
|
+
# Filesystem}[http://hadoop.apache.org/common/docs/current/hdfs_design.html].
|
10
|
+
module HDFS
|
11
|
+
|
12
|
+
# Checks to see if this is a file or directory
|
13
|
+
def self.extended obj
|
14
|
+
obj.extend(obj.is_directory? ? HDFSDirectory : HDFSFile)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Is this resource an HDFS resource?
|
18
|
+
#
|
19
|
+
# @return [true, false]
|
20
|
+
def on_hdfs?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
alias_method :is_hdfs?, :on_hdfs?
|
24
|
+
|
25
|
+
# Copy this resource to the +new_uri+.
|
26
|
+
#
|
27
|
+
# @param [String, IMW::Resource] new_uri
|
28
|
+
# @return [IMW::Resource] the new resource
|
29
|
+
def cp new_uri
|
30
|
+
IMW::Transforms::Transferer.new(:cp, self, new_uri).transfer!
|
31
|
+
end
|
32
|
+
|
33
|
+
# Move this resource to the +new_uri+.
|
34
|
+
#
|
35
|
+
# @param [String, IMW::Resource] new_uri
|
36
|
+
# @return [IMW::Resource] the new resource
|
37
|
+
def mv new_uri
|
38
|
+
IMW::Transforms::Transferer.new(:mv, self, new_uri).transfer!
|
39
|
+
end
|
40
|
+
|
41
|
+
# Delete this resource from the HDFS.
|
42
|
+
#
|
43
|
+
# @option options [true,false] :skip_trash
|
44
|
+
def rm options={}
|
45
|
+
should_exist!("Cannot delete.")
|
46
|
+
args = [:rm]
|
47
|
+
args << '-skipTrash' if options[:skip] || options[:skip_trash] || options[:skipTrash]
|
48
|
+
args << path
|
49
|
+
HDFS.fs(*args)
|
50
|
+
self
|
51
|
+
end
|
52
|
+
alias_method :rm!, :rm
|
53
|
+
|
54
|
+
|
55
|
+
# Does this path exist on the HDFS?
|
56
|
+
#
|
57
|
+
# @return [true, false]
|
58
|
+
def exist?
|
59
|
+
return @exist unless @exist.nil?
|
60
|
+
refresh!
|
61
|
+
@exist
|
62
|
+
end
|
63
|
+
alias_method :exists?, :exist?
|
64
|
+
|
65
|
+
|
66
|
+
# Return the size (in bytes) of this resource on the HDFS.
|
67
|
+
#
|
68
|
+
# This value is cached. Call +refresh+ to refresh the cache
|
69
|
+
# manually.
|
70
|
+
#
|
71
|
+
# @return [Fixnum]
|
72
|
+
def size
|
73
|
+
return @size unless @size.nil?
|
74
|
+
refresh!
|
75
|
+
should_exist!("Cannot report size")
|
76
|
+
@size
|
77
|
+
end
|
78
|
+
|
79
|
+
# Return the number of directories contained at or below this
|
80
|
+
# path on the HDFS.
|
81
|
+
#
|
82
|
+
# This value is cached. Call +refresh+ to refresh the cache
|
83
|
+
# manually.
|
84
|
+
#
|
85
|
+
# @return [Fixnum]
|
86
|
+
def num_dirs
|
87
|
+
return @num_dirs unless @num_dirs.nil?
|
88
|
+
refresh!
|
89
|
+
should_exist!("Cannot report number of directories.")
|
90
|
+
@num_dirs
|
91
|
+
end
|
92
|
+
|
93
|
+
# Return the number of files contained at or below this path
|
94
|
+
# on the HDFS.
|
95
|
+
#
|
96
|
+
# This value is cached. Call +refresh+ to refresh the cache
|
97
|
+
# manually.
|
98
|
+
#
|
99
|
+
# @return [Fixnum]
|
100
|
+
def num_files
|
101
|
+
return @num_files unless @num_files.nil?
|
102
|
+
refresh!
|
103
|
+
should_exist!("Cannot report number of files.")
|
104
|
+
@num_files
|
105
|
+
end
|
106
|
+
|
107
|
+
# Is this resource an HDFS directory?
|
108
|
+
#
|
109
|
+
# @return [true, false]
|
110
|
+
def is_directory?
|
111
|
+
exist? && num_dirs > 0
|
112
|
+
end
|
113
|
+
|
114
|
+
# Refresh the cached file properties.
|
115
|
+
#
|
116
|
+
# @return [IMW::Resource] this resource
|
117
|
+
def refresh!
|
118
|
+
response = HDFS.fs(:count, path)
|
119
|
+
if response.blank? || response =~ /^Can not find listing for/
|
120
|
+
@exist = false
|
121
|
+
@num_dirs, @num_files, @size, @hdfs_path = false, false, false, false
|
122
|
+
else
|
123
|
+
@exist = true
|
124
|
+
parts = response.split
|
125
|
+
@num_dirs, @num_files, @size = parts[0..2].map(&:to_i)
|
126
|
+
@hdfs_path = parts.last
|
127
|
+
end
|
128
|
+
self
|
129
|
+
end
|
130
|
+
|
131
|
+
# Execute +command+ with +args+ on the Hadoop Distributed
|
132
|
+
# Filesystem (HDFS).
|
133
|
+
#
|
134
|
+
# If passed a block, yield each line of the output from the
|
135
|
+
# command, else just return the output.
|
136
|
+
#
|
137
|
+
# Try running `hadoop fs -help' for more information.
|
138
|
+
#
|
139
|
+
# @param [String, Symbol] command the command to run.
|
140
|
+
# @param [String, Symbol] args the arguments to pass the command
|
141
|
+
# @yield [String] each line of the command's output
|
142
|
+
# @return [String] the command's output
|
143
|
+
def self.fs command, *args
|
144
|
+
command_string = "#{executable} fs -#{command} #{args.compact.map(&:to_str).join(' ')}"
|
145
|
+
command_string += " 2>&1" if command == :count # FIXME or else it just spams the screen when we do HDFS#refresh!
|
146
|
+
output = `#{command_string}`.chomp
|
147
|
+
if block_given?
|
148
|
+
output.split("\n").each do |line|
|
149
|
+
yield line
|
150
|
+
end
|
151
|
+
else
|
152
|
+
output
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
protected
|
157
|
+
# Returns the path to the Hadoop executable.
|
158
|
+
#
|
159
|
+
# @return [String]
|
160
|
+
def self.executable
|
161
|
+
@executable ||= begin
|
162
|
+
string = `which hadoop`.chomp
|
163
|
+
raise IMW::Error.new("Could not find hadoop command. Is Hadoop installed?") if string.blank?
|
164
|
+
string
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# Defines methods for reading data from HDFS files.
|
170
|
+
module HDFSFile
|
171
|
+
|
172
|
+
# Return the contents of this HDFS file as a string.
|
173
|
+
#
|
174
|
+
# Be VERY careful how you use this!
|
175
|
+
#
|
176
|
+
# @return [String]
|
177
|
+
def read
|
178
|
+
HDFS.fs(:cat, path)
|
179
|
+
end
|
180
|
+
|
181
|
+
# Iterate through each line of this HDFS resource.
|
182
|
+
#
|
183
|
+
# @yield [String] each line of the file
|
184
|
+
def each &block
|
185
|
+
HDFS.fs(:cat, path, &block)
|
186
|
+
end
|
187
|
+
|
188
|
+
# Return a handle on a StringIO object representing the
|
189
|
+
# content in this HDFS file.
|
190
|
+
#
|
191
|
+
# Be VERY careful how you use this! It is a StringIO object
|
192
|
+
# so the whole HDFS file is read into a string before
|
193
|
+
# returning the handle.
|
194
|
+
#
|
195
|
+
# @return [StringIO]
|
196
|
+
def io
|
197
|
+
@io ||= StringIO.new(read)
|
198
|
+
end
|
199
|
+
|
200
|
+
# Map over the lines of this HDFS resource.
|
201
|
+
#
|
202
|
+
# @yield [String] each line of the file
|
203
|
+
# @return [Array] the result of the block on each line
|
204
|
+
def map &block
|
205
|
+
returning([]) do |output|
|
206
|
+
HDFS.fs(:cat, path) do |line|
|
207
|
+
output << block.call(line)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
# Defines methods for listing contents of HDFS directories.
|
215
|
+
module HDFSDirectory
|
216
|
+
|
217
|
+
# Return the paths of all files and directories directly below
|
218
|
+
# this directory on the HDFS.
|
219
|
+
#
|
220
|
+
# @return [Array<String>]
|
221
|
+
def contents
|
222
|
+
returning([]) do |paths|
|
223
|
+
HDFS.fs(:ls, path) do |line|
|
224
|
+
next if line =~ /^Found.*items$/
|
225
|
+
paths << line.split.last
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
# Return the resources directly below this directory on the
|
231
|
+
# HDFS.
|
232
|
+
#
|
233
|
+
# @return [Array<IMW::Resource>]
|
234
|
+
def resources
|
235
|
+
contents.map { |path| IMW.open(path) }
|
236
|
+
end
|
237
|
+
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
module IMW
|
2
|
+
module Schemes
|
3
|
+
|
4
|
+
# Defines methods for accessing a resource over HTTP. Uses
|
5
|
+
# RestClient to implement the basic HTTP verbs (GET, POST, PUT,
|
6
|
+
# DELETE, HEAD).
|
7
|
+
module HTTP
|
8
|
+
|
9
|
+
# Many websites have HTML content without an <tt>.html</tt>
|
10
|
+
# extension so automatically extend +obj+ with
|
11
|
+
# IMW::Resources::Formats::HTML in this case.
|
12
|
+
def self.extended obj
|
13
|
+
obj.extend(IMW::Formats::Html) if obj.extension.blank?
|
14
|
+
end
|
15
|
+
|
16
|
+
# Is this resource being accessed via HTTP?
|
17
|
+
#
|
18
|
+
# @return [true, false]
|
19
|
+
def via_http?
|
20
|
+
true
|
21
|
+
end
|
22
|
+
|
23
|
+
# Copy this resource to the +new_uri+.
|
24
|
+
#
|
25
|
+
# @param [String, IMW::Resource] new_uri
|
26
|
+
# @return [IMW::Resource] the new resource
|
27
|
+
def cp new_uri
|
28
|
+
IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
# Return the basename of the URI or <tt>_index</tt> if it's
|
33
|
+
# blank, as in the case of <tt>http://www.google.com</tt>.
|
34
|
+
#
|
35
|
+
# @return [String]
|
36
|
+
def effective_basename
|
37
|
+
(basename.blank? || basename =~ %r{^/*$}) ? "_index" : basename
|
38
|
+
end
|
39
|
+
|
40
|
+
# Send a GET request to this resource's URI.
|
41
|
+
#
|
42
|
+
# If the response doesn't have HTTP code 2xx, a RestClient
|
43
|
+
# error will be raised.
|
44
|
+
#
|
45
|
+
# If a block is given then the response will be passed to the
|
46
|
+
# block, even in case of a non-2xx code.
|
47
|
+
#
|
48
|
+
# See the documentation for
|
49
|
+
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
50
|
+
# for more information.
|
51
|
+
#
|
52
|
+
# @param [Hash] headers the headers to include in the request
|
53
|
+
# @yield [RestClient::Response] the response from the server
|
54
|
+
# @return [RestClient::Response] the response from the server
|
55
|
+
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
56
|
+
def get headers={}, &block
|
57
|
+
make_restclient_request do
|
58
|
+
RestClient.get(uri.to_s, headers, &block)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Send a POST request to this resource's URI with data
|
63
|
+
# +payload+.
|
64
|
+
#
|
65
|
+
# If the response doesn't have HTTP code 2xx, a RestClient
|
66
|
+
# error will be raised.
|
67
|
+
#
|
68
|
+
# If a block is given then the response will be passed to the
|
69
|
+
# block, even in case of a non-2xx code.
|
70
|
+
#
|
71
|
+
# See the documentation for
|
72
|
+
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
73
|
+
# for more information.
|
74
|
+
#
|
75
|
+
# @param [Hash, String] payload the data to send
|
76
|
+
# @param [Hash] headers the headers to include in the request
|
77
|
+
# @yield [RestClient::Response] the response from the server
|
78
|
+
# @return [RestClient::Response] the response from the server
|
79
|
+
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
80
|
+
def post payload, headers={}, &block
|
81
|
+
make_restclient_request do
|
82
|
+
RestClient.post(uri.to_s, payload, headers, &block)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# Send a PUT request to this resource's URI with data
|
87
|
+
# +payload+.
|
88
|
+
#
|
89
|
+
# If the response doesn't have HTTP code 2xx, a RestClient
|
90
|
+
# error will be raised.
|
91
|
+
#
|
92
|
+
# If a block is given then the response will be passed to the
|
93
|
+
# block, even in case of a non-2xx code.
|
94
|
+
#
|
95
|
+
# See the documentation for
|
96
|
+
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
97
|
+
# for more information.
|
98
|
+
#
|
99
|
+
# @param [Hash, String] payload the data to send
|
100
|
+
# @param [Hash] headers the headers to include in the request
|
101
|
+
# @yield [RestClient::Response] the response from the server
|
102
|
+
# @return [RestClient::Response] the response from the server
|
103
|
+
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
104
|
+
def put payload, headers={}, &block
|
105
|
+
make_restclient_request do
|
106
|
+
RestClient.put(uri.to_s, payload, headers, &block)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Send a DELETE request to this resource's URI.
|
111
|
+
#
|
112
|
+
# If the response doesn't have HTTP code 2xx, a RestClient
|
113
|
+
# error will be raised.
|
114
|
+
#
|
115
|
+
# If a block is given then the response will be passed to the
|
116
|
+
# block, even in case of a non-2xx code.
|
117
|
+
#
|
118
|
+
# See the documentation for
|
119
|
+
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
120
|
+
# for more information.
|
121
|
+
#
|
122
|
+
# @param [Hash] headers the headers to include in the request
|
123
|
+
# @yield [RestClient::Response] the response from the server
|
124
|
+
# @return [RestClient::Response] the response from the server
|
125
|
+
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
126
|
+
def delete headers={}, &block
|
127
|
+
make_restclient_request do
|
128
|
+
RestClient.delete(uri.to_s, headers, &block)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Send a HEAD request to this resource's URI.
|
133
|
+
#
|
134
|
+
# If the response doesn't have HTTP code 2xx, a RestClient
|
135
|
+
# error will be raised.
|
136
|
+
#
|
137
|
+
# If a block is given then the response will be passed to the
|
138
|
+
# block, even in case of a non-2xx code.
|
139
|
+
#
|
140
|
+
# See the documentation for
|
141
|
+
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
142
|
+
# for more information.
|
143
|
+
#
|
144
|
+
# @param [Hash] headers the headers to include in the request
|
145
|
+
# @yield [RestClient::Response] the response from the server
|
146
|
+
# @return [RestClient::Response] the response from the server
|
147
|
+
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
148
|
+
def head headers={}, &block
|
149
|
+
make_restclient_request do
|
150
|
+
RestClient.head(uri.to_s, headers, &block)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
protected
|
155
|
+
def make_restclient_request &block # :nodoc
|
156
|
+
require 'restclient'
|
157
|
+
begin
|
158
|
+
yield
|
159
|
+
rescue RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed => e
|
160
|
+
raise IMW::NetworkError.new("#{e.class} -- #{e.message}")
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|