imw 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +34 -14
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/imw.rb +9 -6
- data/lib/imw/{resources/archive.rb → archives.rb} +20 -10
- data/lib/imw/archives/rar.rb +19 -0
- data/lib/imw/archives/tar.rb +19 -0
- data/lib/imw/archives/tarbz2.rb +73 -0
- data/lib/imw/archives/targz.rb +73 -0
- data/lib/imw/archives/zip.rb +51 -0
- data/lib/imw/{resources/compressed_file.rb → compressed_files.rb} +16 -11
- data/lib/imw/compressed_files/bz2.rb +16 -0
- data/lib/imw/{resources → compressed_files}/compressible.rb +2 -4
- data/lib/imw/compressed_files/gz.rb +16 -0
- data/lib/imw/formats.rb +31 -0
- data/lib/imw/formats/delimited.rb +90 -0
- data/lib/imw/formats/excel.rb +125 -0
- data/lib/imw/formats/json.rb +51 -0
- data/lib/imw/formats/sgml.rb +69 -0
- data/lib/imw/formats/yaml.rb +51 -0
- data/lib/imw/resource.rb +108 -10
- data/lib/imw/schemes.rb +21 -0
- data/lib/imw/schemes/hdfs.rb +240 -0
- data/lib/imw/schemes/http.rb +166 -0
- data/lib/imw/schemes/local.rb +219 -0
- data/lib/imw/schemes/remote.rb +114 -0
- data/lib/imw/schemes/s3.rb +135 -0
- data/lib/imw/tools.rb +8 -0
- data/lib/imw/{transforms → tools}/archiver.rb +1 -1
- data/lib/imw/{transforms → tools}/transferer.rb +10 -10
- data/spec/imw/{resources/archive_spec.rb → archive_spec.rb} +3 -3
- data/spec/imw/{resources/archives_and_compressed → archives}/rar_spec.rb +2 -2
- data/spec/imw/{resources/archives_and_compressed → archives}/tar_spec.rb +2 -2
- data/spec/imw/{resources/archives_and_compressed → archives}/tarbz2_spec.rb +4 -4
- data/spec/imw/{resources/archives_and_compressed → archives}/targz_spec.rb +4 -4
- data/spec/imw/{resources/archives_and_compressed → archives}/zip_spec.rb +2 -2
- data/spec/imw/compressed_files/bz2_spec.rb +15 -0
- data/spec/imw/{resources → compressed_files}/compressible_spec.rb +1 -1
- data/spec/imw/compressed_files/gz_spec.rb +15 -0
- data/spec/imw/{resources/compressed_file_spec.rb → compressed_files_spec.rb} +3 -3
- data/spec/imw/{resources/formats → formats}/delimited_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/json_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/sgml_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/yaml_spec.rb +2 -2
- data/spec/imw/resource_spec.rb +4 -4
- data/spec/imw/{resources/schemes → schemes}/hdfs_spec.rb +7 -7
- data/spec/imw/{resources/schemes → schemes}/http_spec.rb +2 -2
- data/spec/imw/{resources → schemes}/local_spec.rb +5 -5
- data/spec/imw/{resources → schemes}/remote_spec.rb +7 -3
- data/spec/imw/{resources/schemes → schemes}/s3_spec.rb +2 -2
- data/spec/imw/{transforms → tools}/archiver_spec.rb +2 -2
- data/spec/imw/tools/transferer_spec.rb +113 -0
- metadata +69 -71
- data/lib/imw/resources.rb +0 -118
- data/lib/imw/resources/archives_and_compressed.rb +0 -32
- data/lib/imw/resources/archives_and_compressed/bz2.rb +0 -18
- data/lib/imw/resources/archives_and_compressed/gz.rb +0 -18
- data/lib/imw/resources/archives_and_compressed/rar.rb +0 -23
- data/lib/imw/resources/archives_and_compressed/tar.rb +0 -23
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +0 -78
- data/lib/imw/resources/archives_and_compressed/targz.rb +0 -78
- data/lib/imw/resources/archives_and_compressed/zip.rb +0 -57
- data/lib/imw/resources/formats.rb +0 -32
- data/lib/imw/resources/formats/delimited.rb +0 -92
- data/lib/imw/resources/formats/excel.rb +0 -125
- data/lib/imw/resources/formats/json.rb +0 -53
- data/lib/imw/resources/formats/sgml.rb +0 -72
- data/lib/imw/resources/formats/yaml.rb +0 -53
- data/lib/imw/resources/local.rb +0 -198
- data/lib/imw/resources/remote.rb +0 -110
- data/lib/imw/resources/schemes.rb +0 -19
- data/lib/imw/resources/schemes/hdfs.rb +0 -242
- data/lib/imw/resources/schemes/http.rb +0 -161
- data/lib/imw/resources/schemes/s3.rb +0 -137
- data/lib/imw/transforms.rb +0 -8
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +0 -15
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +0 -15
- data/spec/imw/transforms/transferer_spec.rb +0 -113
@@ -1,19 +0,0 @@
|
|
1
|
-
module IMW
|
2
|
-
module Resources
|
3
|
-
module Schemes
|
4
|
-
autoload :S3, 'imw/resources/schemes/s3'
|
5
|
-
autoload :HTTP, 'imw/resources/schemes/http'
|
6
|
-
autoload :HTTPS, 'imw/resources/schemes/http'
|
7
|
-
autoload :HDFS, 'imw/resources/schemes/hdfs'
|
8
|
-
|
9
|
-
# Handlers which extend a resource with scheme specific methods.
|
10
|
-
SCHEME_HANDLERS = [
|
11
|
-
["Schemes::S3", %r{^s3://} ],
|
12
|
-
["Schemes::HTTP", %r{^http://} ],
|
13
|
-
["Schemes::HTTPS", %r{^https://} ],
|
14
|
-
["Schemes::HDFS", %r{^hdfs://} ]
|
15
|
-
]
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
@@ -1,242 +0,0 @@
|
|
1
|
-
module IMW
|
2
|
-
module Resources
|
3
|
-
module Schemes
|
4
|
-
|
5
|
-
# Defines methods for reading and writing data to/from an
|
6
|
-
# HDFS[http://hadoop.apache.org/common/docs/current/hdfs_design.html]]
|
7
|
-
#
|
8
|
-
# Learn more about Hadoop[http://hadoop.apache.org] and the
|
9
|
-
# {Hadoop Distributed
|
10
|
-
# Filesystem}[http://hadoop.apache.org/common/docs/current/hdfs_design.html].
|
11
|
-
module HDFS
|
12
|
-
|
13
|
-
# Checks to see if this is a file or directory
|
14
|
-
def self.extended obj
|
15
|
-
obj.extend(obj.is_directory? ? HDFSDirectory : HDFSFile)
|
16
|
-
end
|
17
|
-
|
18
|
-
# Is this resource an HDFS resource?
|
19
|
-
#
|
20
|
-
# @return [true, false]
|
21
|
-
def on_hdfs?
|
22
|
-
true
|
23
|
-
end
|
24
|
-
alias_method :is_hdfs?, :on_hdfs?
|
25
|
-
|
26
|
-
# Copy this resource to the +new_uri+.
|
27
|
-
#
|
28
|
-
# @param [String, IMW::Resource] new_uri
|
29
|
-
# @return [IMW::Resource] the new resource
|
30
|
-
def cp new_uri
|
31
|
-
IMW::Transforms::Transferer.new(:cp, self, new_uri).transfer!
|
32
|
-
end
|
33
|
-
|
34
|
-
# Move this resource to the +new_uri+.
|
35
|
-
#
|
36
|
-
# @param [String, IMW::Resource] new_uri
|
37
|
-
# @return [IMW::Resource] the new resource
|
38
|
-
def mv new_uri
|
39
|
-
IMW::Transforms::Transferer.new(:mv, self, new_uri).transfer!
|
40
|
-
end
|
41
|
-
|
42
|
-
# Delete this resource from the HDFS.
|
43
|
-
#
|
44
|
-
# @option options [true,false] :skip_trash
|
45
|
-
def rm options={}
|
46
|
-
should_exist!("Cannot delete.")
|
47
|
-
args = [:rm]
|
48
|
-
args << '-skipTrash' if options[:skip] || options[:skip_trash] || options[:skipTrash]
|
49
|
-
args << path
|
50
|
-
HDFS.fs(*args)
|
51
|
-
self
|
52
|
-
end
|
53
|
-
alias_method :rm!, :rm
|
54
|
-
|
55
|
-
|
56
|
-
# Does this path exist on the HDFS?
|
57
|
-
#
|
58
|
-
# @return [true, false]
|
59
|
-
def exist?
|
60
|
-
return @exist unless @exist.nil?
|
61
|
-
refresh!
|
62
|
-
@exist
|
63
|
-
end
|
64
|
-
alias_method :exists?, :exist?
|
65
|
-
|
66
|
-
|
67
|
-
# Return the size (in bytes) of this resource on the HDFS.
|
68
|
-
#
|
69
|
-
# This value is cached. Call +refresh+ to refresh the cache
|
70
|
-
# manually.
|
71
|
-
#
|
72
|
-
# @return [Fixnum]
|
73
|
-
def size
|
74
|
-
return @size unless @size.nil?
|
75
|
-
refresh!
|
76
|
-
should_exist!("Cannot report size")
|
77
|
-
@size
|
78
|
-
end
|
79
|
-
|
80
|
-
# Return the number of directories contained at or below this
|
81
|
-
# path on the HDFS.
|
82
|
-
#
|
83
|
-
# This value is cached. Call +refresh+ to refresh the cache
|
84
|
-
# manually.
|
85
|
-
#
|
86
|
-
# @return [Fixnum]
|
87
|
-
def num_dirs
|
88
|
-
return @num_dirs unless @num_dirs.nil?
|
89
|
-
refresh!
|
90
|
-
should_exist!("Cannot report number of directories.")
|
91
|
-
@num_dirs
|
92
|
-
end
|
93
|
-
|
94
|
-
# Return the number of files contained at or below this path
|
95
|
-
# on the HDFS.
|
96
|
-
#
|
97
|
-
# This value is cached. Call +refresh+ to refresh the cache
|
98
|
-
# manually.
|
99
|
-
#
|
100
|
-
# @return [Fixnum]
|
101
|
-
def num_files
|
102
|
-
return @num_files unless @num_files.nil?
|
103
|
-
refresh!
|
104
|
-
should_exist!("Cannot report number of files.")
|
105
|
-
@num_files
|
106
|
-
end
|
107
|
-
|
108
|
-
# Is this resource an HDFS directory?
|
109
|
-
#
|
110
|
-
# @return [true, false]
|
111
|
-
def is_directory?
|
112
|
-
exist? && num_dirs > 0
|
113
|
-
end
|
114
|
-
|
115
|
-
# Refresh the cached file properties.
|
116
|
-
#
|
117
|
-
# @return [IMW::Resource] this resource
|
118
|
-
def refresh!
|
119
|
-
response = HDFS.fs(:count, path)
|
120
|
-
if response.blank? || response =~ /^Can not find listing for/
|
121
|
-
@exist = false
|
122
|
-
@num_dirs, @num_files, @size, @hdfs_path = false, false, false, false
|
123
|
-
else
|
124
|
-
@exist = true
|
125
|
-
parts = response.split
|
126
|
-
@num_dirs, @num_files, @size = parts[0..2].map(&:to_i)
|
127
|
-
@hdfs_path = parts.last
|
128
|
-
end
|
129
|
-
self
|
130
|
-
end
|
131
|
-
|
132
|
-
# Execute +command+ with +args+ on the Hadoop Distributed
|
133
|
-
# Filesystem (HDFS).
|
134
|
-
#
|
135
|
-
# If passed a block, yield each line of the output from the
|
136
|
-
# command, else just return the output.
|
137
|
-
#
|
138
|
-
# Try running `hadoop fs -help' for more information.
|
139
|
-
#
|
140
|
-
# @param [String, Symbol] command the command to run.
|
141
|
-
# @param [String, Symbol] args the arguments to pass the command
|
142
|
-
# @yield [String] each line of the command's output
|
143
|
-
# @return [String] the command's output
|
144
|
-
def self.fs command, *args
|
145
|
-
command_string = "#{executable} fs -#{command} #{args.compact.map(&:to_str).join(' ')}"
|
146
|
-
command_string += " 2>&1" if command == :count # FIXME or else it just spams the screen when we do HDFS#refresh!
|
147
|
-
output = `#{command_string}`.chomp
|
148
|
-
if block_given?
|
149
|
-
output.split("\n").each do |line|
|
150
|
-
yield line
|
151
|
-
end
|
152
|
-
else
|
153
|
-
output
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
protected
|
158
|
-
# Returns the path to the Hadoop executable.
|
159
|
-
#
|
160
|
-
# @return [String]
|
161
|
-
def self.executable
|
162
|
-
@executable ||= begin
|
163
|
-
string = `which hadoop`.chomp
|
164
|
-
raise IMW::Error.new("Could not find hadoop command. Is Hadoop installed?") if string.blank?
|
165
|
-
string
|
166
|
-
end
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
|
-
# Defines methods for reading data from HDFS files.
|
171
|
-
module HDFSFile
|
172
|
-
|
173
|
-
# Return the contents of this HDFS file as a string.
|
174
|
-
#
|
175
|
-
# Be VERY careful how you use this!
|
176
|
-
#
|
177
|
-
# @return [String]
|
178
|
-
def read
|
179
|
-
HDFS.fs(:cat, path)
|
180
|
-
end
|
181
|
-
|
182
|
-
# Iterate through each line of this HDFS resource.
|
183
|
-
#
|
184
|
-
# @yield [String] each line of the file
|
185
|
-
def each &block
|
186
|
-
HDFS.fs(:cat, path, &block)
|
187
|
-
end
|
188
|
-
|
189
|
-
# Return a handle on a StringIO object representing the
|
190
|
-
# content in this HDFS file.
|
191
|
-
#
|
192
|
-
# Be VERY careful how you use this! It is a StringIO object
|
193
|
-
# so the whole HDFS file is read into a string before
|
194
|
-
# returning the handle.
|
195
|
-
#
|
196
|
-
# @return [StringIO]
|
197
|
-
def io
|
198
|
-
@io ||= StringIO.new(read)
|
199
|
-
end
|
200
|
-
|
201
|
-
# Map over the lines of this HDFS resource.
|
202
|
-
#
|
203
|
-
# @yield [String] each line of the file
|
204
|
-
# @return [Array] the result of the block on each line
|
205
|
-
def map &block
|
206
|
-
returning([]) do |output|
|
207
|
-
HDFS.fs(:cat, path) do |line|
|
208
|
-
output << block.call(line)
|
209
|
-
end
|
210
|
-
end
|
211
|
-
end
|
212
|
-
|
213
|
-
end
|
214
|
-
|
215
|
-
# Defines methods for listing contents of HDFS directories.
|
216
|
-
module HDFSDirectory
|
217
|
-
|
218
|
-
# Return the paths of all files and directories directly below
|
219
|
-
# this directory on the HDFS.
|
220
|
-
#
|
221
|
-
# @return [Array<String>]
|
222
|
-
def contents
|
223
|
-
returning([]) do |paths|
|
224
|
-
HDFS.fs(:ls, path) do |line|
|
225
|
-
next if line =~ /^Found.*items$/
|
226
|
-
paths << line.split.last
|
227
|
-
end
|
228
|
-
end
|
229
|
-
end
|
230
|
-
|
231
|
-
# Return the resources directly below this directory on the
|
232
|
-
# HDFS.
|
233
|
-
#
|
234
|
-
# @return [Array<IMW::Resource>]
|
235
|
-
def resources
|
236
|
-
contents.map { |path| IMW.open(path) }
|
237
|
-
end
|
238
|
-
|
239
|
-
end
|
240
|
-
end
|
241
|
-
end
|
242
|
-
end
|
@@ -1,161 +0,0 @@
|
|
1
|
-
module IMW
|
2
|
-
module Resources
|
3
|
-
module Schemes
|
4
|
-
|
5
|
-
# Defines methods for accessing a resource over HTTP. Uses
|
6
|
-
# RestClient to implement the basic HTTP verbs (GET, POST, PUT,
|
7
|
-
# DELETE, HEAD).
|
8
|
-
module HTTP
|
9
|
-
|
10
|
-
# Is this resource being accessed via HTTP?
|
11
|
-
#
|
12
|
-
# @return [true, false]
|
13
|
-
def via_http?
|
14
|
-
true
|
15
|
-
end
|
16
|
-
|
17
|
-
# Copy this resource to the +new_uri+.
|
18
|
-
#
|
19
|
-
# @param [String, IMW::Resource] new_uri
|
20
|
-
# @return [IMW::Resource] the new resource
|
21
|
-
def cp new_uri
|
22
|
-
IMW::Transforms::Transferer.new(:cp, self, new_uri).transfer!
|
23
|
-
end
|
24
|
-
|
25
|
-
|
26
|
-
# Return the basename of the URI or <tt>_index</tt> if it's
|
27
|
-
# blank, as in the case of <tt>http://www.google.com</tt>.
|
28
|
-
#
|
29
|
-
# @return [String]
|
30
|
-
def effective_basename
|
31
|
-
(basename.blank? || basename =~ %r{^/*$}) ? "_index" : basename
|
32
|
-
end
|
33
|
-
|
34
|
-
# Send a GET request to this resource's URI.
|
35
|
-
#
|
36
|
-
# If the response doesn't have HTTP code 2xx, a RestClient
|
37
|
-
# error will be raised.
|
38
|
-
#
|
39
|
-
# If a block is given then the response will be passed to the
|
40
|
-
# block, even in case of a non-2xx code.
|
41
|
-
#
|
42
|
-
# See the documentation for
|
43
|
-
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
44
|
-
# for more information.
|
45
|
-
#
|
46
|
-
# @param [Hash] headers the headers to include in the request
|
47
|
-
# @yield [RestClient::Response] the response from the server
|
48
|
-
# @return [RestClient::Response] the response from the server
|
49
|
-
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
50
|
-
def get headers={}, &block
|
51
|
-
make_restclient_request do
|
52
|
-
RestClient.get(uri.to_s, headers, &block)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
# Send a POST request to this resource's URI with data
|
57
|
-
# +payload+.
|
58
|
-
#
|
59
|
-
# If the response doesn't have HTTP code 2xx, a RestClient
|
60
|
-
# error will be raised.
|
61
|
-
#
|
62
|
-
# If a block is given then the response will be passed to the
|
63
|
-
# block, even in case of a non-2xx code.
|
64
|
-
#
|
65
|
-
# See the documentation for
|
66
|
-
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
67
|
-
# for more information.
|
68
|
-
#
|
69
|
-
# @param [Hash, String] payload the data to send
|
70
|
-
# @param [Hash] headers the headers to include in the request
|
71
|
-
# @yield [RestClient::Response] the response from the server
|
72
|
-
# @return [RestClient::Response] the response from the server
|
73
|
-
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
74
|
-
def post payload, headers={}, &block
|
75
|
-
make_restclient_request do
|
76
|
-
RestClient.post(uri.to_s, payload, headers, &block)
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
# Send a PUT request to this resource's URI with data
|
81
|
-
# +payload+.
|
82
|
-
#
|
83
|
-
# If the response doesn't have HTTP code 2xx, a RestClient
|
84
|
-
# error will be raised.
|
85
|
-
#
|
86
|
-
# If a block is given then the response will be passed to the
|
87
|
-
# block, even in case of a non-2xx code.
|
88
|
-
#
|
89
|
-
# See the documentation for
|
90
|
-
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
91
|
-
# for more information.
|
92
|
-
#
|
93
|
-
# @param [Hash, String] payload the data to send
|
94
|
-
# @param [Hash] headers the headers to include in the request
|
95
|
-
# @yield [RestClient::Response] the response from the server
|
96
|
-
# @return [RestClient::Response] the response from the server
|
97
|
-
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
98
|
-
def put payload, headers={}, &block
|
99
|
-
make_restclient_request do
|
100
|
-
RestClient.put(uri.to_s, payload, headers, &block)
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
# Send a DELETE request to this resource's URI.
|
105
|
-
#
|
106
|
-
# If the response doesn't have HTTP code 2xx, a RestClient
|
107
|
-
# error will be raised.
|
108
|
-
#
|
109
|
-
# If a block is given then the response will be passed to the
|
110
|
-
# block, even in case of a non-2xx code.
|
111
|
-
#
|
112
|
-
# See the documentation for
|
113
|
-
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
114
|
-
# for more information.
|
115
|
-
#
|
116
|
-
# @param [Hash] headers the headers to include in the request
|
117
|
-
# @yield [RestClient::Response] the response from the server
|
118
|
-
# @return [RestClient::Response] the response from the server
|
119
|
-
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
120
|
-
def delete headers={}, &block
|
121
|
-
make_restclient_request do
|
122
|
-
RestClient.delete(uri.to_s, headers, &block)
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
# Send a HEAD request to this resource's URI.
|
127
|
-
#
|
128
|
-
# If the response doesn't have HTTP code 2xx, a RestClient
|
129
|
-
# error will be raised.
|
130
|
-
#
|
131
|
-
# If a block is given then the response will be passed to the
|
132
|
-
# block, even in case of a non-2xx code.
|
133
|
-
#
|
134
|
-
# See the documentation for
|
135
|
-
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
136
|
-
# for more information.
|
137
|
-
#
|
138
|
-
# @param [Hash] headers the headers to include in the request
|
139
|
-
# @yield [RestClient::Response] the response from the server
|
140
|
-
# @return [RestClient::Response] the response from the server
|
141
|
-
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
142
|
-
def head headers={}, &block
|
143
|
-
make_restclient_request do
|
144
|
-
RestClient.head(uri.to_s, headers, &block)
|
145
|
-
end
|
146
|
-
end
|
147
|
-
|
148
|
-
protected
|
149
|
-
def make_restclient_request &block # :nodoc
|
150
|
-
require 'restclient'
|
151
|
-
begin
|
152
|
-
yield
|
153
|
-
rescue RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed => e
|
154
|
-
raise IMW::NetworkError.new("#{e.class} -- #{e.message}")
|
155
|
-
end
|
156
|
-
end
|
157
|
-
end
|
158
|
-
end
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
@@ -1,137 +0,0 @@
|
|
1
|
-
module IMW
|
2
|
-
module Resources
|
3
|
-
module Schemes
|
4
|
-
|
5
|
-
# Defines methods for reading and writing data to {Amazon
|
6
|
-
# S3}[http://aws.amazon.com/s3] buckets.
|
7
|
-
#
|
8
|
-
# IMW.open('s3://my_bucket/path/to/some/file.csv')
|
9
|
-
#
|
10
|
-
# Learn more about {Amazon Web Services}[http://aws.amazon.com].
|
11
|
-
module S3
|
12
|
-
|
13
|
-
# For an S3 resource, the bucket is just the hostname.
|
14
|
-
#
|
15
|
-
# @return [String]
|
16
|
-
def bucket
|
17
|
-
host
|
18
|
-
end
|
19
|
-
|
20
|
-
# Is this resource an S3 resource?
|
21
|
-
#
|
22
|
-
# @return [true, false]
|
23
|
-
def on_s3?
|
24
|
-
true
|
25
|
-
end
|
26
|
-
alias_method :is_s3?, :on_s3?
|
27
|
-
|
28
|
-
# Copy this resource to the +new_uri+.
|
29
|
-
#
|
30
|
-
# @param [String, IMW::Resource] new_uri
|
31
|
-
# @return [IMW::Resource] the new resource
|
32
|
-
def cp new_uri
|
33
|
-
IMW::Transforms::Transferer.new(:cp, self, new_uri).transfer!
|
34
|
-
end
|
35
|
-
|
36
|
-
# The AWS::S3::S3Object corresponding to this resource.
|
37
|
-
def s3_object
|
38
|
-
self.class.make_connection!
|
39
|
-
@s3_object ||= AWS::S3::S3Object.new(path, bucket)
|
40
|
-
end
|
41
|
-
|
42
|
-
# Does this resource exist on S3?
|
43
|
-
#
|
44
|
-
# @return [true, false]
|
45
|
-
def exist?
|
46
|
-
s3_object.exists?
|
47
|
-
end
|
48
|
-
alias_method :exists?, :exist?
|
49
|
-
|
50
|
-
# Remove this resource from S3.
|
51
|
-
#
|
52
|
-
# @return [IMW::Resource] the deleted object
|
53
|
-
def rm
|
54
|
-
s3_object.delete
|
55
|
-
end
|
56
|
-
alias_method :rm!, :rm
|
57
|
-
|
58
|
-
# Return the S3N URL for this S3 object
|
59
|
-
#
|
60
|
-
# resource = IMW.open('s3://my_bucket/path/to/some/obj')
|
61
|
-
# resource.s3n_url
|
62
|
-
# => 's3n://my_bucket/path/to/some/obj'
|
63
|
-
#
|
64
|
-
# @return [String]
|
65
|
-
def s3n_url
|
66
|
-
uri.to_s.gsub(/^s3:/, 's3n:')
|
67
|
-
end
|
68
|
-
|
69
|
-
# Return the contents of this S3 object.
|
70
|
-
#
|
71
|
-
# @return [String]
|
72
|
-
def read
|
73
|
-
s3_object.value
|
74
|
-
end
|
75
|
-
|
76
|
-
# Store +source+ into +destination+.
|
77
|
-
#
|
78
|
-
# @param [String, IMW::Resource, #io] source
|
79
|
-
# @param [String, IMW::Resource, #path, #bucket] destination
|
80
|
-
# @return [IMW::Resource] the new S3 object
|
81
|
-
def self.put source, destination
|
82
|
-
source = IMW.open(source)
|
83
|
-
destintation = IMW.open(destination)
|
84
|
-
raise IMW::ArgumentError.new("destination must be on S3 -- #{destination.uri} given") unless destination.on_s3?
|
85
|
-
make_connection!
|
86
|
-
AWS::S3::S3Object.store(destination.path, source.io, destination.bucket)
|
87
|
-
destination
|
88
|
-
end
|
89
|
-
|
90
|
-
# Download +source+ from S3 into +destination+.
|
91
|
-
#
|
92
|
-
# @param [String, IMW::Resource, #path, #bucket] source
|
93
|
-
# @param [String, IMW::Resource, #write] destination
|
94
|
-
# @return [IMW::Resource] the new resource
|
95
|
-
def self.get source, destination
|
96
|
-
source = IMW.open(source)
|
97
|
-
destination = IMW.open(destination)
|
98
|
-
make_connection!
|
99
|
-
AWS::S3::Object.stream(source.path, source.bucket) do |chunk|
|
100
|
-
destination.write(chunk)
|
101
|
-
end
|
102
|
-
destination.close
|
103
|
-
destination.reopen
|
104
|
-
end
|
105
|
-
|
106
|
-
# Copy S3 resource +source+ to +destination+.
|
107
|
-
#
|
108
|
-
# @param [String, IMW::Resource, #path, #bucket] source
|
109
|
-
# @param [String, IMW::Resource, #path, #bucket] destination
|
110
|
-
# @return [IMW::Resource] the new resource
|
111
|
-
def self.copy source, destination
|
112
|
-
source = IMW.open(source)
|
113
|
-
destination = IMW.open(destination)
|
114
|
-
raise IMW::PathError.new("Bucket names must be non-blank and match to 'copy'") unless source.bucket.present? && destination.bucket.present? && source.bucket == destination.bucket
|
115
|
-
make_connection!
|
116
|
-
AWS::S3::Object.copy(source.path, destination.path, destination.bucket)
|
117
|
-
destination
|
118
|
-
end
|
119
|
-
|
120
|
-
protected
|
121
|
-
# Make an S3 connection.
|
122
|
-
#
|
123
|
-
# Uses settings defined in IMW::AWS_CREDENTIALS.
|
124
|
-
#
|
125
|
-
# @return [AWS
|
126
|
-
def self.make_connection!
|
127
|
-
return @connection if @connection
|
128
|
-
raise IMW::Error.new("Must define a constant IMW::AWS_CREDENTIALS with an :access_key_id and a :secret_access_key before using S3 resources") unless defined?(IMW::AWS_CREDENTIALS)
|
129
|
-
require 'aws/s3'
|
130
|
-
@connection = AWS::S3::Base.establish_connection!(IMW::AWS_CREDENTIALS)
|
131
|
-
end
|
132
|
-
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|