aws-s3 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +4 -0
- data/README +28 -27
- data/Rakefile +33 -4
- data/lib/aws/s3.rb +1 -0
- data/lib/aws/s3/acl.rb +8 -3
- data/lib/aws/s3/base.rb +10 -8
- data/lib/aws/s3/bucket.rb +1 -5
- data/lib/aws/s3/connection.rb +48 -9
- data/lib/aws/s3/exceptions.rb +3 -0
- data/lib/aws/s3/extensions.rb +99 -9
- data/lib/aws/s3/logging.rb +152 -9
- data/lib/aws/s3/object.rb +30 -22
- data/lib/aws/s3/parsing.rb +0 -29
- data/lib/aws/s3/response.rb +2 -2
- data/lib/aws/s3/version.rb +2 -2
- data/support/faster-xml-simple/lib/faster_xml_simple.rb +30 -11
- data/support/faster-xml-simple/test/regression_test.rb +11 -5
- data/support/faster-xml-simple/test/test_helper.rb +17 -0
- data/support/faster-xml-simple/test/xml_simple_comparison_test.rb +2 -3
- data/test/acl_test.rb +13 -2
- data/test/connection_test.rb +8 -0
- data/test/extensions_test.rb +57 -4
- data/test/fixtures/loglines.yml +5 -0
- data/test/fixtures/logs.yml +7 -0
- data/test/logging_test.rb +54 -1
- data/test/object_test.rb +15 -0
- data/test/parsing_test.rb +0 -20
- data/test/remote/object_test.rb +49 -1
- metadata +5 -2
data/lib/aws/s3/exceptions.rb
CHANGED
data/lib/aws/s3/extensions.rb
CHANGED
@@ -2,12 +2,14 @@
|
|
2
2
|
|
3
3
|
class Hash
|
4
4
|
def to_query_string(include_question_mark = true)
|
5
|
-
return '' if empty?
|
6
5
|
query_string = ''
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
unless empty?
|
7
|
+
query_string << '?' if include_question_mark
|
8
|
+
query_string << inject([]) do |params, (key, value)|
|
9
|
+
params << "#{key}=#{value}"
|
10
|
+
end.join('&')
|
11
|
+
end
|
12
|
+
query_string
|
11
13
|
end
|
12
14
|
|
13
15
|
def to_normalized_options
|
@@ -25,7 +27,7 @@ end
|
|
25
27
|
|
26
28
|
class String
|
27
29
|
def previous!
|
28
|
-
self[-1]
|
30
|
+
self[-1] -= 1
|
29
31
|
self
|
30
32
|
end
|
31
33
|
|
@@ -34,7 +36,7 @@ class String
|
|
34
36
|
end
|
35
37
|
|
36
38
|
def to_header
|
37
|
-
downcase.
|
39
|
+
downcase.tr('_', '-')
|
38
40
|
end
|
39
41
|
|
40
42
|
# ActiveSupport adds an underscore method to String so let's just use that one if
|
@@ -43,7 +45,51 @@ class String
|
|
43
45
|
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
44
46
|
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
45
47
|
downcase
|
46
|
-
end unless
|
48
|
+
end unless public_method_defined? :underscore
|
49
|
+
|
50
|
+
def utf8?
|
51
|
+
scan(/[^\x00-\xa0]/u) { |s| s.unpack('U') }
|
52
|
+
true
|
53
|
+
rescue ArgumentError
|
54
|
+
false
|
55
|
+
end
|
56
|
+
|
57
|
+
# All paths in in S3 have to be valid unicode so this takes care of
|
58
|
+
# cleaning up any strings that aren't valid utf-8 according to String#utf8?
|
59
|
+
def remove_extended!
|
60
|
+
gsub!(/[\x80-\xFF]/) { "%02X" % $&[0] }
|
61
|
+
end
|
62
|
+
|
63
|
+
def remove_extended
|
64
|
+
dup.remove_extended!
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
class CoercibleString < String
|
69
|
+
class << self
|
70
|
+
def coerce(string)
|
71
|
+
new(string).coerce
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def coerce
|
76
|
+
case self
|
77
|
+
when 'true': true
|
78
|
+
when 'false': false
|
79
|
+
when /^\d+$/: Integer(self)
|
80
|
+
when datetime_format: Time.parse(self)
|
81
|
+
else
|
82
|
+
self
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
# Lame hack since Date._parse is so accepting. S3 dates are of the form: '2006-10-29T23:14:47.000Z'
|
88
|
+
# so unless the string looks like that, don't even try, otherwise it might convert an object's
|
89
|
+
# key from something like '03 1-2-3-Apple-Tree.mp3' to Sat Feb 03 00:00:00 CST 2001.
|
90
|
+
def datetime_format
|
91
|
+
/^\d{4}-\d{2}-\d{2}\w\d{2}:\d{2}:\d{2}/
|
92
|
+
end
|
47
93
|
end
|
48
94
|
|
49
95
|
class Symbol
|
@@ -214,6 +260,50 @@ module SelectiveAttributeProxy
|
|
214
260
|
end
|
215
261
|
end
|
216
262
|
|
263
|
+
# When streaming data up, Net::HTTPGenericRequest hard codes a chunk size of 1k. For large files this
|
264
|
+
# is an unfortunately low chunk size, so here we make it use a much larger default size and move it into a method
|
265
|
+
# so that the implementation of send_request_with_body_stream doesn't need to be changed to change the chunk size (at least not anymore
|
266
|
+
# than I've already had to...).
|
267
|
+
module Net
|
268
|
+
class HTTPGenericRequest
|
269
|
+
def send_request_with_body_stream(sock, ver, path, f)
|
270
|
+
raise ArgumentError, "Content-Length not given and Transfer-Encoding is not `chunked'" unless content_length() or chunked?
|
271
|
+
unless content_type()
|
272
|
+
warn 'net/http: warning: Content-Type did not set; using application/x-www-form-urlencoded' if $VERBOSE
|
273
|
+
set_content_type 'application/x-www-form-urlencoded'
|
274
|
+
end
|
275
|
+
write_header sock, ver, path
|
276
|
+
if chunked?
|
277
|
+
while s = f.read(chunk_size)
|
278
|
+
sock.write(sprintf("%x\r\n", s.length) << s << "\r\n")
|
279
|
+
end
|
280
|
+
sock.write "0\r\n\r\n"
|
281
|
+
else
|
282
|
+
while s = f.read(chunk_size)
|
283
|
+
sock.write s
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def chunk_size
|
289
|
+
1048576 # 1 megabyte
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
# Net::HTTP before 1.8.4 doesn't have the use_ssl? method or the Delete request type
|
294
|
+
class HTTP
|
295
|
+
def use_ssl?
|
296
|
+
@use_ssl
|
297
|
+
end unless public_method_defined? :use_ssl?
|
298
|
+
|
299
|
+
class Delete < HTTPRequest
|
300
|
+
METHOD = 'DELETE'
|
301
|
+
REQUEST_HAS_BODY = false
|
302
|
+
RESPONSE_HAS_BODY = true
|
303
|
+
end unless const_defined? :Delete
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
217
307
|
class XmlGenerator < String #:nodoc:
|
218
308
|
attr_reader :xml
|
219
309
|
def initialize
|
@@ -222,4 +312,4 @@ class XmlGenerator < String #:nodoc:
|
|
222
312
|
build
|
223
313
|
end
|
224
314
|
end
|
225
|
-
#:startdoc:
|
315
|
+
#:startdoc:
|
data/lib/aws/s3/logging.rb
CHANGED
@@ -17,12 +17,25 @@ module AWS
|
|
17
17
|
#
|
18
18
|
# Now instead of logging right into the jukebox bucket, the logs will go into the bucket called jukebox-logs.
|
19
19
|
#
|
20
|
-
# Once logs have accumulated, you can access them using the <tt>
|
20
|
+
# Once logs have accumulated, you can access them using the <tt>logs</tt> method:
|
21
21
|
#
|
22
22
|
# pp Bucket.logs('jukebox')
|
23
|
-
# [#<AWS::S3::
|
24
|
-
# #<AWS::S3::
|
25
|
-
# #<AWS::S3::
|
23
|
+
# [#<AWS::S3::Logging::Log '/jukebox-logs/log-2006-11-14-07-15-24-2061C35880A310A1'>,
|
24
|
+
# #<AWS::S3::Logging::Log '/jukebox-logs/log-2006-11-14-08-15-27-D8EEF536EC09E6B3'>,
|
25
|
+
# #<AWS::S3::Logging::Log '/jukebox-logs/log-2006-11-14-08-15-29-355812B2B15BD789'>]
|
26
|
+
#
|
27
|
+
# Each log has a <tt>lines</tt> method that gives you information about each request in that log. All the fields are available
|
28
|
+
# as named methods. More information is available in Logging::Log::Line.
|
29
|
+
#
|
30
|
+
# logs = Bucket.logs('jukebox')
|
31
|
+
# log = logs.first
|
32
|
+
# line = log.lines.first
|
33
|
+
# line.operation
|
34
|
+
# # => 'REST.GET.LOGGING_STATUS'
|
35
|
+
# line.request_uri
|
36
|
+
# # => 'GET /jukebox?logging HTTP/1.1'
|
37
|
+
# line.remote_ip
|
38
|
+
# # => "67.165.183.125"
|
26
39
|
#
|
27
40
|
# Disabling logging is just as simple as enabling it:
|
28
41
|
#
|
@@ -70,7 +83,128 @@ module AWS
|
|
70
83
|
end
|
71
84
|
end
|
72
85
|
end
|
86
|
+
|
87
|
+
# A bucket log exposes requests made on the given bucket. Lines of the log represent a single request. The lines of a log
|
88
|
+
# can be accessed with the lines method.
|
89
|
+
#
|
90
|
+
# log = Bucket.logs_for('marcel').first
|
91
|
+
# log.lines
|
92
|
+
#
|
93
|
+
# More information about the logged requests can be found in the documentation for Log::Line.
|
94
|
+
class Log
|
95
|
+
def initialize(log_object) #:nodoc:
|
96
|
+
@log = log_object
|
97
|
+
end
|
98
|
+
|
99
|
+
# Returns the lines for the log. Each line is wrapped in a Log::Line.
|
100
|
+
def lines
|
101
|
+
log.value.map {|line| Line.new(line)}
|
102
|
+
end
|
103
|
+
memoized :lines
|
104
|
+
|
105
|
+
def inspect #:nodoc:
|
106
|
+
"#<%s:0x%s '%s'>" % [self.class.name, object_id, log.path]
|
107
|
+
end
|
73
108
|
|
109
|
+
private
|
110
|
+
attr_reader :log
|
111
|
+
|
112
|
+
# Each line of a log exposes the raw line, but it also has method accessors for all the fields of the logged request.
|
113
|
+
#
|
114
|
+
# The list of supported log line fields are listed in the S3 documentation: http://docs.amazonwebservices.com/AmazonS3/2006-03-01/LogFormat.html
|
115
|
+
#
|
116
|
+
# line = log.lines.first
|
117
|
+
# line.remote_ip
|
118
|
+
# # => '72.21.206.5'
|
119
|
+
#
|
120
|
+
# If a certain field does not apply to a given request (for example, the <tt>key</tt> field does not apply to a bucket request),
|
121
|
+
# or if it was unknown or unavailable, it will return <tt>nil</tt>.
|
122
|
+
#
|
123
|
+
# line.operation
|
124
|
+
# # => 'REST.GET.BUCKET'
|
125
|
+
# line.key
|
126
|
+
# # => nil
|
127
|
+
class Line < String
|
128
|
+
DATE = /\[([^\]]+)\]/
|
129
|
+
QUOTED_STRING = /"([^"]+)"/
|
130
|
+
REST = /(\S+)/
|
131
|
+
LINE_SCANNER = /#{DATE}|#{QUOTED_STRING}|#{REST}/
|
132
|
+
|
133
|
+
cattr_accessor :decorators
|
134
|
+
@@decorators = Hash.new {|hash, key| hash[key] = lambda {|entry| CoercibleString.coerce(entry)}}
|
135
|
+
cattr_reader :fields
|
136
|
+
@@fields = []
|
137
|
+
|
138
|
+
class << self
|
139
|
+
def field(name, offset, type = nil, &block) #:nodoc:
|
140
|
+
decorators[name] = block if block_given?
|
141
|
+
fields << name
|
142
|
+
class_eval(<<-EVAL, __FILE__, __LINE__)
|
143
|
+
def #{name}
|
144
|
+
value = parts[#{offset} - 1]
|
145
|
+
if value == '-'
|
146
|
+
nil
|
147
|
+
else
|
148
|
+
self.class.decorators[:#{name}].call(value)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
memoized :#{name}
|
152
|
+
EVAL
|
153
|
+
end
|
154
|
+
|
155
|
+
# Time.parse doesn't like %d/%B/%Y:%H:%M:%S %z so we have to transform it unfortunately
|
156
|
+
def typecast_time(datetime) #:nodoc:
|
157
|
+
month = datetime[/[a-z]+/i]
|
158
|
+
datetime.sub!(%r|^(\w{2})/(\w{3})|, '\2/\1')
|
159
|
+
datetime.sub!(month, Date::ABBR_MONTHS[month.downcase].to_s)
|
160
|
+
datetime.sub!(':', ' ')
|
161
|
+
Time.parse(datetime)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def initialize(line) #:nodoc:
|
166
|
+
super(line)
|
167
|
+
@parts = parse
|
168
|
+
end
|
169
|
+
|
170
|
+
field(:owner, 1) {|entry| Owner.new('id' => entry) }
|
171
|
+
field :bucket, 2
|
172
|
+
field(:time, 3) {|entry| typecast_time(entry)}
|
173
|
+
field :remote_ip, 4
|
174
|
+
field(:requestor, 5) {|entry| Owner.new('id' => entry) }
|
175
|
+
field :request_id, 6
|
176
|
+
field :operation, 7
|
177
|
+
field :key, 8
|
178
|
+
field :request_uri, 9
|
179
|
+
field :http_status, 10
|
180
|
+
field :error_code, 11
|
181
|
+
field :bytes_sent, 12
|
182
|
+
field :object_size, 13
|
183
|
+
field :total_time, 14
|
184
|
+
field :turn_around_time, 15
|
185
|
+
field :referrer, 16
|
186
|
+
field :user_agent, 17
|
187
|
+
|
188
|
+
# Returns all fields of the line in a hash of the form <tt>:field_name => :field_value</tt>.
|
189
|
+
#
|
190
|
+
# line.attributes.values_at(:bucket, :key)
|
191
|
+
# # => ['marcel', 'kiss.jpg']
|
192
|
+
def attributes
|
193
|
+
self.class.fields.inject({}) do |attribute_hash, field|
|
194
|
+
attribute_hash[field] = send(field)
|
195
|
+
attribute_hash
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
attr_reader :parts
|
201
|
+
|
202
|
+
def parse
|
203
|
+
scan(LINE_SCANNER).flatten.compact
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
74
208
|
module Management #:nodoc:
|
75
209
|
def self.included(klass) #:nodoc:
|
76
210
|
klass.extend(ClassMethods)
|
@@ -120,11 +254,20 @@ module AWS
|
|
120
254
|
# Returns the collection of logs for the bucket named <tt>name</tt>.
|
121
255
|
#
|
122
256
|
# Bucket.logs_for 'marcel'
|
123
|
-
|
124
|
-
|
257
|
+
#
|
258
|
+
# Accepts the same options as Bucket.find, such as <tt>:max_keys</tt> and <tt>:marker</tt>.
|
259
|
+
def logs_for(name = nil, options = {})
|
260
|
+
if name.is_a?(Hash)
|
261
|
+
options = name
|
262
|
+
name = nil
|
263
|
+
end
|
264
|
+
|
265
|
+
name = bucket_name(name)
|
125
266
|
logging_status = logging_status_for(name)
|
126
267
|
return [] unless logging_status.logging_enabled?
|
127
|
-
objects(logging_status.target_bucket, :prefix => logging_status.target_prefix)
|
268
|
+
objects(logging_status.target_bucket, options.merge(:prefix => logging_status.target_prefix)).map do |log_object|
|
269
|
+
Log.new(log_object)
|
270
|
+
end
|
128
271
|
end
|
129
272
|
alias_method :logs, :logs_for
|
130
273
|
end
|
@@ -154,8 +297,8 @@ module AWS
|
|
154
297
|
self.class.logging_enabled_for?(name)
|
155
298
|
end
|
156
299
|
|
157
|
-
def logs
|
158
|
-
self.class.logs_for(name)
|
300
|
+
def logs(options = {})
|
301
|
+
self.class.logs_for(name, options)
|
159
302
|
end
|
160
303
|
end
|
161
304
|
end
|
data/lib/aws/s3/object.rb
CHANGED
@@ -5,11 +5,7 @@ module AWS
|
|
5
5
|
#
|
6
6
|
# You can store an object on S3 by specifying a key, its data and the name of the bucket you want to put it in:
|
7
7
|
#
|
8
|
-
# S3Object.store(
|
9
|
-
# 'headshot.jpg',
|
10
|
-
# File.open('headshot.jpg'),
|
11
|
-
# 'photos'
|
12
|
-
# )
|
8
|
+
# S3Object.store('me.jpg', open('headshot.jpg'), 'photos')
|
13
9
|
#
|
14
10
|
# The content type of the object will be inferred by its extension. If the appropriate content type can not be inferred, S3 defaults
|
15
11
|
# to <tt>binary/octect-stream</tt>.
|
@@ -19,7 +15,7 @@ module AWS
|
|
19
15
|
# file = 'black-flowers.m4a'
|
20
16
|
# S3Object.store(
|
21
17
|
# file,
|
22
|
-
#
|
18
|
+
# open(file),
|
23
19
|
# 'jukebox',
|
24
20
|
# :content_type => 'audio/mp4a-latm'
|
25
21
|
# )
|
@@ -41,7 +37,7 @@ module AWS
|
|
41
37
|
#
|
42
38
|
# Or stream it by passing a block to <tt>stream</tt>:
|
43
39
|
#
|
44
|
-
#
|
40
|
+
# open('song.mp3', 'w') do |file|
|
45
41
|
# S3Object.stream('song.mp3', 'jukebox') do |chunk|
|
46
42
|
# file.write chunk
|
47
43
|
# end
|
@@ -55,6 +51,9 @@ module AWS
|
|
55
51
|
#
|
56
52
|
# Other functionality includes:
|
57
53
|
#
|
54
|
+
# # Check if an object exists?
|
55
|
+
# S3Object.exists? 'headshot.jpg', 'photos'
|
56
|
+
#
|
58
57
|
# # Copying an object
|
59
58
|
# S3Object.copy 'headshot.jpg', 'headshot2.jpg', 'photos'
|
60
59
|
#
|
@@ -166,6 +165,10 @@ module AWS
|
|
166
165
|
# the desired object's key, which functionally makes the key ordered one degree higher than the desired object key according to
|
167
166
|
# alphabetic ordering. This is a hack, but it should work around 99% of the time. I can't think of a scenario where it would return
|
168
167
|
# something incorrect.
|
168
|
+
|
169
|
+
# We need to ensure the key doesn't have extended characters but not uri escape it before doing the lookup and comparing since if the object exists,
|
170
|
+
# the key on S3 will have been normalized
|
171
|
+
key = key.remove_extended unless key.utf8?
|
169
172
|
bucket = Bucket.find(bucket_name(bucket), :marker => key.previous, :max_keys => 1)
|
170
173
|
# If our heuristic failed, trigger a NoSuchKey exception
|
171
174
|
if (object = bucket.objects.first) && object.key == key
|
@@ -178,9 +181,10 @@ module AWS
|
|
178
181
|
# Makes a copy of the object with <tt>key</tt> to <tt>copy_name</tt>.
|
179
182
|
def copy(key, copy_key, bucket = nil, options = {})
|
180
183
|
bucket = bucket_name(bucket)
|
181
|
-
original =
|
184
|
+
original = open(url_for(key, bucket))
|
182
185
|
default_options = {:content_type => original.content_type}
|
183
|
-
store(copy_key, original
|
186
|
+
store(copy_key, original, bucket, default_options.merge(options))
|
187
|
+
acl(copy_key, bucket, acl(key, bucket))
|
184
188
|
end
|
185
189
|
|
186
190
|
# Rename the object with key <tt>from</tt> to have key in <tt>to</tt>.
|
@@ -189,17 +193,25 @@ module AWS
|
|
189
193
|
delete(from, bucket)
|
190
194
|
end
|
191
195
|
|
192
|
-
# Fetch information about the
|
196
|
+
# Fetch information about the object with <tt>key</tt> from <tt>bucket</tt>. Information includes content type, content length,
|
193
197
|
# last modified time, and others.
|
194
198
|
#
|
195
199
|
# If the specified key does not exist, NoSuchKey is raised.
|
196
200
|
def about(key, bucket = nil, options = {})
|
197
201
|
response = head(path!(bucket, key, options), options)
|
198
|
-
if response.
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
202
|
+
raise NoSuchKey.new("No such key `#{key}'", bucket) if response.code == 404
|
203
|
+
About.new(response.headers)
|
204
|
+
end
|
205
|
+
|
206
|
+
# Checks if the object with <tt>key</tt> in <tt>bucket</tt> exists.
|
207
|
+
#
|
208
|
+
# S3Object.exists? 'kiss.jpg', 'marcel'
|
209
|
+
# # => true
|
210
|
+
def exists?(key, bucket = nil)
|
211
|
+
about(key, bucket)
|
212
|
+
true
|
213
|
+
rescue NoSuchKey
|
214
|
+
false
|
203
215
|
end
|
204
216
|
|
205
217
|
# Delete object with <tt>key</tt> from <tt>bucket</tt>.
|
@@ -214,14 +226,10 @@ module AWS
|
|
214
226
|
# may be desirable for very large files so they are not read into memory all at once.
|
215
227
|
#
|
216
228
|
# # Non streamed upload
|
217
|
-
# S3Object.store('
|
218
|
-
# 'hello world!',
|
219
|
-
# 'marcel')
|
229
|
+
# S3Object.store('greeting.txt', 'hello world!', 'marcel')
|
220
230
|
#
|
221
231
|
# # Streamed upload
|
222
|
-
# S3Object.store('roots.mpeg',
|
223
|
-
# File.open('roots.mpeg'),
|
224
|
-
# 'marcel')
|
232
|
+
# S3Object.store('roots.mpeg', open('roots.mpeg'), 'marcel')
|
225
233
|
def store(key, data, bucket = nil, options = {})
|
226
234
|
validate_key!(key)
|
227
235
|
# Must build path before infering content type in case bucket is being used for options
|
@@ -577,7 +585,7 @@ module AWS
|
|
577
585
|
|
578
586
|
# Don't dump binary data :)
|
579
587
|
def inspect #:nodoc:
|
580
|
-
"
|
588
|
+
"#<%s:0x%s '%s'>" % [self.class, object_id, path]
|
581
589
|
end
|
582
590
|
|
583
591
|
private
|