aws-s3 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +4 -0
- data/README +28 -27
- data/Rakefile +33 -4
- data/lib/aws/s3.rb +1 -0
- data/lib/aws/s3/acl.rb +8 -3
- data/lib/aws/s3/base.rb +10 -8
- data/lib/aws/s3/bucket.rb +1 -5
- data/lib/aws/s3/connection.rb +48 -9
- data/lib/aws/s3/exceptions.rb +3 -0
- data/lib/aws/s3/extensions.rb +99 -9
- data/lib/aws/s3/logging.rb +152 -9
- data/lib/aws/s3/object.rb +30 -22
- data/lib/aws/s3/parsing.rb +0 -29
- data/lib/aws/s3/response.rb +2 -2
- data/lib/aws/s3/version.rb +2 -2
- data/support/faster-xml-simple/lib/faster_xml_simple.rb +30 -11
- data/support/faster-xml-simple/test/regression_test.rb +11 -5
- data/support/faster-xml-simple/test/test_helper.rb +17 -0
- data/support/faster-xml-simple/test/xml_simple_comparison_test.rb +2 -3
- data/test/acl_test.rb +13 -2
- data/test/connection_test.rb +8 -0
- data/test/extensions_test.rb +57 -4
- data/test/fixtures/loglines.yml +5 -0
- data/test/fixtures/logs.yml +7 -0
- data/test/logging_test.rb +54 -1
- data/test/object_test.rb +15 -0
- data/test/parsing_test.rb +0 -20
- data/test/remote/object_test.rb +49 -1
- metadata +5 -2
data/lib/aws/s3/exceptions.rb
CHANGED
data/lib/aws/s3/extensions.rb
CHANGED
@@ -2,12 +2,14 @@
|
|
2
2
|
|
3
3
|
class Hash
|
4
4
|
def to_query_string(include_question_mark = true)
|
5
|
-
return '' if empty?
|
6
5
|
query_string = ''
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
unless empty?
|
7
|
+
query_string << '?' if include_question_mark
|
8
|
+
query_string << inject([]) do |params, (key, value)|
|
9
|
+
params << "#{key}=#{value}"
|
10
|
+
end.join('&')
|
11
|
+
end
|
12
|
+
query_string
|
11
13
|
end
|
12
14
|
|
13
15
|
def to_normalized_options
|
@@ -25,7 +27,7 @@ end
|
|
25
27
|
|
26
28
|
class String
|
27
29
|
def previous!
|
28
|
-
self[-1]
|
30
|
+
self[-1] -= 1
|
29
31
|
self
|
30
32
|
end
|
31
33
|
|
@@ -34,7 +36,7 @@ class String
|
|
34
36
|
end
|
35
37
|
|
36
38
|
def to_header
|
37
|
-
downcase.
|
39
|
+
downcase.tr('_', '-')
|
38
40
|
end
|
39
41
|
|
40
42
|
# ActiveSupport adds an underscore method to String so let's just use that one if
|
@@ -43,7 +45,51 @@ class String
|
|
43
45
|
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
44
46
|
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
45
47
|
downcase
|
46
|
-
end unless
|
48
|
+
end unless public_method_defined? :underscore
|
49
|
+
|
50
|
+
def utf8?
|
51
|
+
scan(/[^\x00-\xa0]/u) { |s| s.unpack('U') }
|
52
|
+
true
|
53
|
+
rescue ArgumentError
|
54
|
+
false
|
55
|
+
end
|
56
|
+
|
57
|
+
# All paths in in S3 have to be valid unicode so this takes care of
|
58
|
+
# cleaning up any strings that aren't valid utf-8 according to String#utf8?
|
59
|
+
def remove_extended!
|
60
|
+
gsub!(/[\x80-\xFF]/) { "%02X" % $&[0] }
|
61
|
+
end
|
62
|
+
|
63
|
+
def remove_extended
|
64
|
+
dup.remove_extended!
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
class CoercibleString < String
|
69
|
+
class << self
|
70
|
+
def coerce(string)
|
71
|
+
new(string).coerce
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def coerce
|
76
|
+
case self
|
77
|
+
when 'true': true
|
78
|
+
when 'false': false
|
79
|
+
when /^\d+$/: Integer(self)
|
80
|
+
when datetime_format: Time.parse(self)
|
81
|
+
else
|
82
|
+
self
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
# Lame hack since Date._parse is so accepting. S3 dates are of the form: '2006-10-29T23:14:47.000Z'
|
88
|
+
# so unless the string looks like that, don't even try, otherwise it might convert an object's
|
89
|
+
# key from something like '03 1-2-3-Apple-Tree.mp3' to Sat Feb 03 00:00:00 CST 2001.
|
90
|
+
def datetime_format
|
91
|
+
/^\d{4}-\d{2}-\d{2}\w\d{2}:\d{2}:\d{2}/
|
92
|
+
end
|
47
93
|
end
|
48
94
|
|
49
95
|
class Symbol
|
@@ -214,6 +260,50 @@ module SelectiveAttributeProxy
|
|
214
260
|
end
|
215
261
|
end
|
216
262
|
|
263
|
+
# When streaming data up, Net::HTTPGenericRequest hard codes a chunk size of 1k. For large files this
|
264
|
+
# is an unfortunately low chunk size, so here we make it use a much larger default size and move it into a method
|
265
|
+
# so that the implementation of send_request_with_body_stream doesn't need to be changed to change the chunk size (at least not anymore
|
266
|
+
# than I've already had to...).
|
267
|
+
module Net
|
268
|
+
class HTTPGenericRequest
|
269
|
+
def send_request_with_body_stream(sock, ver, path, f)
|
270
|
+
raise ArgumentError, "Content-Length not given and Transfer-Encoding is not `chunked'" unless content_length() or chunked?
|
271
|
+
unless content_type()
|
272
|
+
warn 'net/http: warning: Content-Type did not set; using application/x-www-form-urlencoded' if $VERBOSE
|
273
|
+
set_content_type 'application/x-www-form-urlencoded'
|
274
|
+
end
|
275
|
+
write_header sock, ver, path
|
276
|
+
if chunked?
|
277
|
+
while s = f.read(chunk_size)
|
278
|
+
sock.write(sprintf("%x\r\n", s.length) << s << "\r\n")
|
279
|
+
end
|
280
|
+
sock.write "0\r\n\r\n"
|
281
|
+
else
|
282
|
+
while s = f.read(chunk_size)
|
283
|
+
sock.write s
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def chunk_size
|
289
|
+
1048576 # 1 megabyte
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
# Net::HTTP before 1.8.4 doesn't have the use_ssl? method or the Delete request type
|
294
|
+
class HTTP
|
295
|
+
def use_ssl?
|
296
|
+
@use_ssl
|
297
|
+
end unless public_method_defined? :use_ssl?
|
298
|
+
|
299
|
+
class Delete < HTTPRequest
|
300
|
+
METHOD = 'DELETE'
|
301
|
+
REQUEST_HAS_BODY = false
|
302
|
+
RESPONSE_HAS_BODY = true
|
303
|
+
end unless const_defined? :Delete
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
217
307
|
class XmlGenerator < String #:nodoc:
|
218
308
|
attr_reader :xml
|
219
309
|
def initialize
|
@@ -222,4 +312,4 @@ class XmlGenerator < String #:nodoc:
|
|
222
312
|
build
|
223
313
|
end
|
224
314
|
end
|
225
|
-
#:startdoc:
|
315
|
+
#:startdoc:
|
data/lib/aws/s3/logging.rb
CHANGED
@@ -17,12 +17,25 @@ module AWS
|
|
17
17
|
#
|
18
18
|
# Now instead of logging right into the jukebox bucket, the logs will go into the bucket called jukebox-logs.
|
19
19
|
#
|
20
|
-
# Once logs have accumulated, you can access them using the <tt>
|
20
|
+
# Once logs have accumulated, you can access them using the <tt>logs</tt> method:
|
21
21
|
#
|
22
22
|
# pp Bucket.logs('jukebox')
|
23
|
-
# [#<AWS::S3::
|
24
|
-
# #<AWS::S3::
|
25
|
-
# #<AWS::S3::
|
23
|
+
# [#<AWS::S3::Logging::Log '/jukebox-logs/log-2006-11-14-07-15-24-2061C35880A310A1'>,
|
24
|
+
# #<AWS::S3::Logging::Log '/jukebox-logs/log-2006-11-14-08-15-27-D8EEF536EC09E6B3'>,
|
25
|
+
# #<AWS::S3::Logging::Log '/jukebox-logs/log-2006-11-14-08-15-29-355812B2B15BD789'>]
|
26
|
+
#
|
27
|
+
# Each log has a <tt>lines</tt> method that gives you information about each request in that log. All the fields are available
|
28
|
+
# as named methods. More information is available in Logging::Log::Line.
|
29
|
+
#
|
30
|
+
# logs = Bucket.logs('jukebox')
|
31
|
+
# log = logs.first
|
32
|
+
# line = log.lines.first
|
33
|
+
# line.operation
|
34
|
+
# # => 'REST.GET.LOGGING_STATUS'
|
35
|
+
# line.request_uri
|
36
|
+
# # => 'GET /jukebox?logging HTTP/1.1'
|
37
|
+
# line.remote_ip
|
38
|
+
# # => "67.165.183.125"
|
26
39
|
#
|
27
40
|
# Disabling logging is just as simple as enabling it:
|
28
41
|
#
|
@@ -70,7 +83,128 @@ module AWS
|
|
70
83
|
end
|
71
84
|
end
|
72
85
|
end
|
86
|
+
|
87
|
+
# A bucket log exposes requests made on the given bucket. Lines of the log represent a single request. The lines of a log
|
88
|
+
# can be accessed with the lines method.
|
89
|
+
#
|
90
|
+
# log = Bucket.logs_for('marcel').first
|
91
|
+
# log.lines
|
92
|
+
#
|
93
|
+
# More information about the logged requests can be found in the documentation for Log::Line.
|
94
|
+
class Log
|
95
|
+
def initialize(log_object) #:nodoc:
|
96
|
+
@log = log_object
|
97
|
+
end
|
98
|
+
|
99
|
+
# Returns the lines for the log. Each line is wrapped in a Log::Line.
|
100
|
+
def lines
|
101
|
+
log.value.map {|line| Line.new(line)}
|
102
|
+
end
|
103
|
+
memoized :lines
|
104
|
+
|
105
|
+
def inspect #:nodoc:
|
106
|
+
"#<%s:0x%s '%s'>" % [self.class.name, object_id, log.path]
|
107
|
+
end
|
73
108
|
|
109
|
+
private
|
110
|
+
attr_reader :log
|
111
|
+
|
112
|
+
# Each line of a log exposes the raw line, but it also has method accessors for all the fields of the logged request.
|
113
|
+
#
|
114
|
+
# The list of supported log line fields are listed in the S3 documentation: http://docs.amazonwebservices.com/AmazonS3/2006-03-01/LogFormat.html
|
115
|
+
#
|
116
|
+
# line = log.lines.first
|
117
|
+
# line.remote_ip
|
118
|
+
# # => '72.21.206.5'
|
119
|
+
#
|
120
|
+
# If a certain field does not apply to a given request (for example, the <tt>key</tt> field does not apply to a bucket request),
|
121
|
+
# or if it was unknown or unavailable, it will return <tt>nil</tt>.
|
122
|
+
#
|
123
|
+
# line.operation
|
124
|
+
# # => 'REST.GET.BUCKET'
|
125
|
+
# line.key
|
126
|
+
# # => nil
|
127
|
+
class Line < String
|
128
|
+
DATE = /\[([^\]]+)\]/
|
129
|
+
QUOTED_STRING = /"([^"]+)"/
|
130
|
+
REST = /(\S+)/
|
131
|
+
LINE_SCANNER = /#{DATE}|#{QUOTED_STRING}|#{REST}/
|
132
|
+
|
133
|
+
cattr_accessor :decorators
|
134
|
+
@@decorators = Hash.new {|hash, key| hash[key] = lambda {|entry| CoercibleString.coerce(entry)}}
|
135
|
+
cattr_reader :fields
|
136
|
+
@@fields = []
|
137
|
+
|
138
|
+
class << self
|
139
|
+
def field(name, offset, type = nil, &block) #:nodoc:
|
140
|
+
decorators[name] = block if block_given?
|
141
|
+
fields << name
|
142
|
+
class_eval(<<-EVAL, __FILE__, __LINE__)
|
143
|
+
def #{name}
|
144
|
+
value = parts[#{offset} - 1]
|
145
|
+
if value == '-'
|
146
|
+
nil
|
147
|
+
else
|
148
|
+
self.class.decorators[:#{name}].call(value)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
memoized :#{name}
|
152
|
+
EVAL
|
153
|
+
end
|
154
|
+
|
155
|
+
# Time.parse doesn't like %d/%B/%Y:%H:%M:%S %z so we have to transform it unfortunately
|
156
|
+
def typecast_time(datetime) #:nodoc:
|
157
|
+
month = datetime[/[a-z]+/i]
|
158
|
+
datetime.sub!(%r|^(\w{2})/(\w{3})|, '\2/\1')
|
159
|
+
datetime.sub!(month, Date::ABBR_MONTHS[month.downcase].to_s)
|
160
|
+
datetime.sub!(':', ' ')
|
161
|
+
Time.parse(datetime)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def initialize(line) #:nodoc:
|
166
|
+
super(line)
|
167
|
+
@parts = parse
|
168
|
+
end
|
169
|
+
|
170
|
+
field(:owner, 1) {|entry| Owner.new('id' => entry) }
|
171
|
+
field :bucket, 2
|
172
|
+
field(:time, 3) {|entry| typecast_time(entry)}
|
173
|
+
field :remote_ip, 4
|
174
|
+
field(:requestor, 5) {|entry| Owner.new('id' => entry) }
|
175
|
+
field :request_id, 6
|
176
|
+
field :operation, 7
|
177
|
+
field :key, 8
|
178
|
+
field :request_uri, 9
|
179
|
+
field :http_status, 10
|
180
|
+
field :error_code, 11
|
181
|
+
field :bytes_sent, 12
|
182
|
+
field :object_size, 13
|
183
|
+
field :total_time, 14
|
184
|
+
field :turn_around_time, 15
|
185
|
+
field :referrer, 16
|
186
|
+
field :user_agent, 17
|
187
|
+
|
188
|
+
# Returns all fields of the line in a hash of the form <tt>:field_name => :field_value</tt>.
|
189
|
+
#
|
190
|
+
# line.attributes.values_at(:bucket, :key)
|
191
|
+
# # => ['marcel', 'kiss.jpg']
|
192
|
+
def attributes
|
193
|
+
self.class.fields.inject({}) do |attribute_hash, field|
|
194
|
+
attribute_hash[field] = send(field)
|
195
|
+
attribute_hash
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
attr_reader :parts
|
201
|
+
|
202
|
+
def parse
|
203
|
+
scan(LINE_SCANNER).flatten.compact
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
74
208
|
module Management #:nodoc:
|
75
209
|
def self.included(klass) #:nodoc:
|
76
210
|
klass.extend(ClassMethods)
|
@@ -120,11 +254,20 @@ module AWS
|
|
120
254
|
# Returns the collection of logs for the bucket named <tt>name</tt>.
|
121
255
|
#
|
122
256
|
# Bucket.logs_for 'marcel'
|
123
|
-
|
124
|
-
|
257
|
+
#
|
258
|
+
# Accepts the same options as Bucket.find, such as <tt>:max_keys</tt> and <tt>:marker</tt>.
|
259
|
+
def logs_for(name = nil, options = {})
|
260
|
+
if name.is_a?(Hash)
|
261
|
+
options = name
|
262
|
+
name = nil
|
263
|
+
end
|
264
|
+
|
265
|
+
name = bucket_name(name)
|
125
266
|
logging_status = logging_status_for(name)
|
126
267
|
return [] unless logging_status.logging_enabled?
|
127
|
-
objects(logging_status.target_bucket, :prefix => logging_status.target_prefix)
|
268
|
+
objects(logging_status.target_bucket, options.merge(:prefix => logging_status.target_prefix)).map do |log_object|
|
269
|
+
Log.new(log_object)
|
270
|
+
end
|
128
271
|
end
|
129
272
|
alias_method :logs, :logs_for
|
130
273
|
end
|
@@ -154,8 +297,8 @@ module AWS
|
|
154
297
|
self.class.logging_enabled_for?(name)
|
155
298
|
end
|
156
299
|
|
157
|
-
def logs
|
158
|
-
self.class.logs_for(name)
|
300
|
+
def logs(options = {})
|
301
|
+
self.class.logs_for(name, options)
|
159
302
|
end
|
160
303
|
end
|
161
304
|
end
|
data/lib/aws/s3/object.rb
CHANGED
@@ -5,11 +5,7 @@ module AWS
|
|
5
5
|
#
|
6
6
|
# You can store an object on S3 by specifying a key, its data and the name of the bucket you want to put it in:
|
7
7
|
#
|
8
|
-
# S3Object.store(
|
9
|
-
# 'headshot.jpg',
|
10
|
-
# File.open('headshot.jpg'),
|
11
|
-
# 'photos'
|
12
|
-
# )
|
8
|
+
# S3Object.store('me.jpg', open('headshot.jpg'), 'photos')
|
13
9
|
#
|
14
10
|
# The content type of the object will be inferred by its extension. If the appropriate content type can not be inferred, S3 defaults
|
15
11
|
# to <tt>binary/octect-stream</tt>.
|
@@ -19,7 +15,7 @@ module AWS
|
|
19
15
|
# file = 'black-flowers.m4a'
|
20
16
|
# S3Object.store(
|
21
17
|
# file,
|
22
|
-
#
|
18
|
+
# open(file),
|
23
19
|
# 'jukebox',
|
24
20
|
# :content_type => 'audio/mp4a-latm'
|
25
21
|
# )
|
@@ -41,7 +37,7 @@ module AWS
|
|
41
37
|
#
|
42
38
|
# Or stream it by passing a block to <tt>stream</tt>:
|
43
39
|
#
|
44
|
-
#
|
40
|
+
# open('song.mp3', 'w') do |file|
|
45
41
|
# S3Object.stream('song.mp3', 'jukebox') do |chunk|
|
46
42
|
# file.write chunk
|
47
43
|
# end
|
@@ -55,6 +51,9 @@ module AWS
|
|
55
51
|
#
|
56
52
|
# Other functionality includes:
|
57
53
|
#
|
54
|
+
# # Check if an object exists?
|
55
|
+
# S3Object.exists? 'headshot.jpg', 'photos'
|
56
|
+
#
|
58
57
|
# # Copying an object
|
59
58
|
# S3Object.copy 'headshot.jpg', 'headshot2.jpg', 'photos'
|
60
59
|
#
|
@@ -166,6 +165,10 @@ module AWS
|
|
166
165
|
# the desired object's key, which functionally makes the key ordered one degree higher than the desired object key according to
|
167
166
|
# alphabetic ordering. This is a hack, but it should work around 99% of the time. I can't think of a scenario where it would return
|
168
167
|
# something incorrect.
|
168
|
+
|
169
|
+
# We need to ensure the key doesn't have extended characters but not uri escape it before doing the lookup and comparing since if the object exists,
|
170
|
+
# the key on S3 will have been normalized
|
171
|
+
key = key.remove_extended unless key.utf8?
|
169
172
|
bucket = Bucket.find(bucket_name(bucket), :marker => key.previous, :max_keys => 1)
|
170
173
|
# If our heuristic failed, trigger a NoSuchKey exception
|
171
174
|
if (object = bucket.objects.first) && object.key == key
|
@@ -178,9 +181,10 @@ module AWS
|
|
178
181
|
# Makes a copy of the object with <tt>key</tt> to <tt>copy_name</tt>.
|
179
182
|
def copy(key, copy_key, bucket = nil, options = {})
|
180
183
|
bucket = bucket_name(bucket)
|
181
|
-
original =
|
184
|
+
original = open(url_for(key, bucket))
|
182
185
|
default_options = {:content_type => original.content_type}
|
183
|
-
store(copy_key, original
|
186
|
+
store(copy_key, original, bucket, default_options.merge(options))
|
187
|
+
acl(copy_key, bucket, acl(key, bucket))
|
184
188
|
end
|
185
189
|
|
186
190
|
# Rename the object with key <tt>from</tt> to have key in <tt>to</tt>.
|
@@ -189,17 +193,25 @@ module AWS
|
|
189
193
|
delete(from, bucket)
|
190
194
|
end
|
191
195
|
|
192
|
-
# Fetch information about the
|
196
|
+
# Fetch information about the object with <tt>key</tt> from <tt>bucket</tt>. Information includes content type, content length,
|
193
197
|
# last modified time, and others.
|
194
198
|
#
|
195
199
|
# If the specified key does not exist, NoSuchKey is raised.
|
196
200
|
def about(key, bucket = nil, options = {})
|
197
201
|
response = head(path!(bucket, key, options), options)
|
198
|
-
if response.
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
202
|
+
raise NoSuchKey.new("No such key `#{key}'", bucket) if response.code == 404
|
203
|
+
About.new(response.headers)
|
204
|
+
end
|
205
|
+
|
206
|
+
# Checks if the object with <tt>key</tt> in <tt>bucket</tt> exists.
|
207
|
+
#
|
208
|
+
# S3Object.exists? 'kiss.jpg', 'marcel'
|
209
|
+
# # => true
|
210
|
+
def exists?(key, bucket = nil)
|
211
|
+
about(key, bucket)
|
212
|
+
true
|
213
|
+
rescue NoSuchKey
|
214
|
+
false
|
203
215
|
end
|
204
216
|
|
205
217
|
# Delete object with <tt>key</tt> from <tt>bucket</tt>.
|
@@ -214,14 +226,10 @@ module AWS
|
|
214
226
|
# may be desirable for very large files so they are not read into memory all at once.
|
215
227
|
#
|
216
228
|
# # Non streamed upload
|
217
|
-
# S3Object.store('
|
218
|
-
# 'hello world!',
|
219
|
-
# 'marcel')
|
229
|
+
# S3Object.store('greeting.txt', 'hello world!', 'marcel')
|
220
230
|
#
|
221
231
|
# # Streamed upload
|
222
|
-
# S3Object.store('roots.mpeg',
|
223
|
-
# File.open('roots.mpeg'),
|
224
|
-
# 'marcel')
|
232
|
+
# S3Object.store('roots.mpeg', open('roots.mpeg'), 'marcel')
|
225
233
|
def store(key, data, bucket = nil, options = {})
|
226
234
|
validate_key!(key)
|
227
235
|
# Must build path before infering content type in case bucket is being used for options
|
@@ -577,7 +585,7 @@ module AWS
|
|
577
585
|
|
578
586
|
# Don't dump binary data :)
|
579
587
|
def inspect #:nodoc:
|
580
|
-
"
|
588
|
+
"#<%s:0x%s '%s'>" % [self.class, object_id, path]
|
581
589
|
end
|
582
590
|
|
583
591
|
private
|