tus-server 0.2.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +205 -52
- data/lib/tus/checksum.rb +30 -17
- data/lib/tus/errors.rb +4 -0
- data/lib/tus/info.rb +16 -3
- data/lib/tus/input.rb +31 -0
- data/lib/tus/server.rb +96 -77
- data/lib/tus/storage/filesystem.rb +82 -28
- data/lib/tus/storage/gridfs.rb +161 -35
- data/lib/tus/storage/s3.rb +242 -0
- data/tus-server.gemspec +4 -2
- metadata +35 -4
- data/lib/tus/expirator.rb +0 -58
data/lib/tus/storage/gridfs.rb
CHANGED
@@ -1,72 +1,198 @@
|
|
1
1
|
require "mongo"
|
2
|
-
|
3
|
-
require "
|
2
|
+
|
3
|
+
require "tus/info"
|
4
|
+
require "tus/errors"
|
5
|
+
|
6
|
+
require "digest"
|
4
7
|
|
5
8
|
module Tus
|
6
9
|
module Storage
|
7
10
|
class Gridfs
|
8
|
-
attr_reader :client, :prefix, :bucket
|
11
|
+
attr_reader :client, :prefix, :bucket, :chunk_size
|
9
12
|
|
10
|
-
def initialize(client:, prefix: "fs")
|
13
|
+
def initialize(client:, prefix: "fs", chunk_size: nil)
|
11
14
|
@client = client
|
12
15
|
@prefix = prefix
|
13
16
|
@bucket = @client.database.fs(bucket_name: @prefix)
|
14
17
|
@bucket.send(:ensure_indexes!)
|
18
|
+
@chunk_size = chunk_size
|
15
19
|
end
|
16
20
|
|
17
|
-
def create_file(uid,
|
18
|
-
|
21
|
+
def create_file(uid, info = {})
|
22
|
+
tus_info = Tus::Info.new(info)
|
23
|
+
content_type = tus_info.metadata["content_type"]
|
24
|
+
|
25
|
+
file = Mongo::Grid::File.new("",
|
26
|
+
filename: uid,
|
27
|
+
metadata: {},
|
28
|
+
chunk_size: chunk_size,
|
29
|
+
content_type: content_type,
|
30
|
+
)
|
31
|
+
|
19
32
|
bucket.insert_one(file)
|
20
33
|
end
|
21
34
|
|
22
|
-
def
|
23
|
-
|
24
|
-
|
35
|
+
def concatenate(uid, part_uids, info = {})
|
36
|
+
file_infos = bucket.files_collection.find(filename: {"$in" => part_uids}).to_a
|
37
|
+
file_infos.sort_by! { |file_info| part_uids.index(file_info[:filename]) }
|
38
|
+
|
39
|
+
if file_infos.count != part_uids.count
|
40
|
+
raise Tus::Error, "some parts for concatenation are missing"
|
41
|
+
end
|
42
|
+
|
43
|
+
chunk_sizes = file_infos.map { |file_info| file_info[:chunkSize] }
|
44
|
+
if chunk_sizes[0..-2].uniq.count > 1
|
45
|
+
raise Tus::Error, "some parts have different chunk sizes, so they cannot be concatenated"
|
46
|
+
end
|
47
|
+
|
48
|
+
if chunk_sizes.uniq != [chunk_sizes.last] && bucket.chunks_collection.find(files_id: file_infos.last[:_id]).count > 1
|
49
|
+
raise Tus::Error, "last part has different chunk size and is composed of more than one chunk"
|
50
|
+
end
|
51
|
+
|
52
|
+
length = file_infos.inject(0) { |sum, file_info| sum + file_info[:length] }
|
53
|
+
chunk_size = file_infos.first[:chunkSize]
|
54
|
+
tus_info = Tus::Info.new(info)
|
55
|
+
content_type = tus_info.metadata["content_type"]
|
56
|
+
|
57
|
+
file = Mongo::Grid::File.new("",
|
58
|
+
filename: uid,
|
59
|
+
metadata: {},
|
60
|
+
chunk_size: chunk_size,
|
61
|
+
length: length,
|
62
|
+
content_type: content_type,
|
63
|
+
)
|
64
|
+
|
65
|
+
bucket.insert_one(file)
|
66
|
+
|
67
|
+
file_infos.inject(0) do |offset, file_info|
|
68
|
+
result = bucket.chunks_collection
|
69
|
+
.find(files_id: file_info[:_id])
|
70
|
+
.update_many("$set" => {files_id: file.id}, "$inc" => {n: offset})
|
71
|
+
|
72
|
+
offset += result.modified_count
|
73
|
+
end
|
25
74
|
|
26
|
-
|
27
|
-
|
28
|
-
file
|
75
|
+
bucket.files_collection.delete_many(filename: {"$in" => part_uids})
|
76
|
+
|
77
|
+
# server requires us to return the size of the concatenated file
|
78
|
+
length
|
29
79
|
end
|
30
80
|
|
31
|
-
def patch_file(uid,
|
81
|
+
def patch_file(uid, io, info = {})
|
32
82
|
file_info = bucket.files_collection.find(filename: uid).first
|
33
|
-
|
34
|
-
|
83
|
+
raise Tus::NotFound if file_info.nil?
|
84
|
+
|
85
|
+
file_info[:md5] = Digest::MD5.new # hack for `Chunk.split` updating MD5
|
86
|
+
file_info[:chunkSize] ||= io.size
|
87
|
+
file_info = Mongo::Grid::File::Info.new(Mongo::Options::Mapper.transform(file_info, Mongo::Grid::File::Info::MAPPINGS.invert))
|
88
|
+
|
89
|
+
tus_info = Tus::Info.new(info)
|
90
|
+
last_chunk = (tus_info.length && io.size == tus_info.remaining_length)
|
91
|
+
|
92
|
+
if io.size % file_info.chunk_size != 0 && !last_chunk
|
93
|
+
raise Tus::Error,
|
94
|
+
"Input has length #{io.size} but expected it to be a multiple of " \
|
95
|
+
"chunk size #{file_info.chunk_size} or for it to be the last chunk"
|
96
|
+
end
|
97
|
+
|
35
98
|
offset = bucket.chunks_collection.find(files_id: file_info.id).count
|
36
|
-
chunks = Mongo::Grid::File::Chunk.split(
|
99
|
+
chunks = Mongo::Grid::File::Chunk.split(io, file_info, offset)
|
100
|
+
|
37
101
|
bucket.chunks_collection.insert_many(chunks)
|
38
|
-
|
102
|
+
chunks.each { |chunk| chunk.data.data.clear } # deallocate strings
|
39
103
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
104
|
+
bucket.files_collection.find(filename: uid).update_one("$set" => {
|
105
|
+
length: file_info.length + io.size,
|
106
|
+
uploadDate: Time.now.utc,
|
107
|
+
chunkSize: file_info.chunk_size,
|
108
|
+
})
|
45
109
|
end
|
46
110
|
|
47
|
-
def
|
111
|
+
def read_info(uid)
|
48
112
|
file_info = bucket.files_collection.find(filename: uid).first
|
49
|
-
|
50
|
-
end
|
113
|
+
raise Tus::NotFound if file_info.nil?
|
51
114
|
|
52
|
-
|
53
|
-
info = bucket.files_collection.find(filename: uid).first
|
54
|
-
info.fetch("metadata")
|
115
|
+
file_info.fetch("metadata")
|
55
116
|
end
|
56
117
|
|
57
118
|
def update_info(uid, info)
|
58
|
-
bucket.files_collection.find(filename: uid)
|
119
|
+
bucket.files_collection.find(filename: uid)
|
120
|
+
.update_one("$set" => {metadata: info})
|
59
121
|
end
|
60
122
|
|
61
|
-
def
|
62
|
-
|
63
|
-
|
123
|
+
def get_file(uid, info = {}, range: nil)
|
124
|
+
file_info = bucket.files_collection.find(filename: uid).first
|
125
|
+
raise Tus::NotFound if file_info.nil?
|
126
|
+
|
127
|
+
filter = {files_id: file_info[:_id]}
|
128
|
+
|
129
|
+
if range
|
130
|
+
chunk_start = range.begin / file_info[:chunkSize] if range.begin
|
131
|
+
chunk_stop = range.end / file_info[:chunkSize] if range.end
|
132
|
+
|
133
|
+
filter[:n] = {}
|
134
|
+
filter[:n].update("$gte" => chunk_start) if chunk_start
|
135
|
+
filter[:n].update("$lte" => chunk_stop) if chunk_stop
|
136
|
+
end
|
137
|
+
|
138
|
+
chunks_view = bucket.chunks_collection.find(filter).read(bucket.read_preference).sort(n: 1)
|
139
|
+
|
140
|
+
chunks = Enumerator.new do |yielder|
|
141
|
+
chunks_view.each do |document|
|
142
|
+
data = document[:data].data
|
143
|
+
|
144
|
+
if document[:n] == chunk_start && document[:n] == chunk_stop
|
145
|
+
byte_start = range.begin % file_info[:chunkSize]
|
146
|
+
byte_stop = range.end % file_info[:chunkSize]
|
147
|
+
elsif document[:n] == chunk_start
|
148
|
+
byte_start = range.begin % file_info[:chunkSize]
|
149
|
+
byte_stop = file_info[:chunkSize] - 1
|
150
|
+
elsif document[:n] == chunk_stop
|
151
|
+
byte_start = 0
|
152
|
+
byte_stop = range.end % file_info[:chunkSize]
|
153
|
+
end
|
154
|
+
|
155
|
+
if byte_start && byte_stop
|
156
|
+
partial_data = data[byte_start..byte_stop]
|
157
|
+
yielder << partial_data
|
158
|
+
partial_data.clear # deallocate chunk string
|
159
|
+
else
|
160
|
+
yielder << data
|
161
|
+
end
|
162
|
+
|
163
|
+
data.clear # deallocate chunk string
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
Response.new(chunks: chunks, close: ->{chunks_view.close_query})
|
168
|
+
end
|
169
|
+
|
170
|
+
def delete_file(uid, info = {})
|
171
|
+
file_info = bucket.files_collection.find(filename: uid).first
|
172
|
+
bucket.delete(file_info.fetch("_id")) if file_info
|
64
173
|
end
|
65
174
|
|
66
|
-
|
175
|
+
def expire_files(expiration_date)
|
176
|
+
file_infos = bucket.files_collection.find(uploadDate: {"$lte" => expiration_date}).to_a
|
177
|
+
file_info_ids = file_infos.map { |info| info[:_id] }
|
178
|
+
|
179
|
+
bucket.files_collection.delete_many(_id: {"$in" => file_info_ids})
|
180
|
+
bucket.chunks_collection.delete_many(files_id: {"$in" => file_info_ids})
|
181
|
+
end
|
182
|
+
|
183
|
+
class Response
|
184
|
+
def initialize(chunks:, close:)
|
185
|
+
@chunks = chunks
|
186
|
+
@close = close
|
187
|
+
end
|
188
|
+
|
189
|
+
def each(&block)
|
190
|
+
@chunks.each(&block)
|
191
|
+
end
|
67
192
|
|
68
|
-
|
69
|
-
|
193
|
+
def close
|
194
|
+
@close.call
|
195
|
+
end
|
70
196
|
end
|
71
197
|
end
|
72
198
|
end
|
@@ -0,0 +1,242 @@
|
|
1
|
+
require "aws-sdk"
|
2
|
+
|
3
|
+
require "tus/info"
|
4
|
+
require "tus/checksum"
|
5
|
+
require "tus/errors"
|
6
|
+
|
7
|
+
require "json"
|
8
|
+
require "cgi/util"
|
9
|
+
|
10
|
+
Aws.eager_autoload!(services: ["S3"])
|
11
|
+
|
12
|
+
module Tus
|
13
|
+
module Storage
|
14
|
+
class S3
|
15
|
+
MIN_PART_SIZE = 5 * 1024 * 1024
|
16
|
+
|
17
|
+
attr_reader :client, :bucket, :prefix, :upload_options
|
18
|
+
|
19
|
+
def initialize(bucket:, prefix: nil, upload_options: {}, **client_options)
|
20
|
+
resource = Aws::S3::Resource.new(**client_options)
|
21
|
+
|
22
|
+
@client = resource.client
|
23
|
+
@bucket = resource.bucket(bucket)
|
24
|
+
@prefix = prefix
|
25
|
+
@upload_options = upload_options
|
26
|
+
end
|
27
|
+
|
28
|
+
def create_file(uid, info = {})
|
29
|
+
tus_info = Tus::Info.new(info)
|
30
|
+
|
31
|
+
options = upload_options.dup
|
32
|
+
options[:content_type] = tus_info.metadata["content_type"]
|
33
|
+
|
34
|
+
if filename = tus_info.metadata["filename"]
|
35
|
+
options[:content_disposition] ||= "inline"
|
36
|
+
options[:content_disposition] += "; filename=\"#{CGI.escape(filename).gsub("+", " ")}\""
|
37
|
+
end
|
38
|
+
|
39
|
+
multipart_upload = object(uid).initiate_multipart_upload(options)
|
40
|
+
|
41
|
+
info["multipart_id"] = multipart_upload.id
|
42
|
+
info["multipart_parts"] = []
|
43
|
+
end
|
44
|
+
|
45
|
+
def concatenate(uid, part_uids, info = {})
|
46
|
+
create_file(uid, info)
|
47
|
+
|
48
|
+
multipart_upload = object(uid).multipart_upload(info["multipart_id"])
|
49
|
+
|
50
|
+
queue = Queue.new
|
51
|
+
part_uids.each_with_index do |part_uid, idx|
|
52
|
+
queue << {
|
53
|
+
copy_source: [bucket.name, object(part_uid).key].join("/"),
|
54
|
+
part_number: idx + 1
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
threads = 10.times.map do
|
59
|
+
Thread.new do
|
60
|
+
Thread.current.abort_on_exception = true
|
61
|
+
completed = []
|
62
|
+
|
63
|
+
begin
|
64
|
+
loop do
|
65
|
+
multipart_copy_task = queue.deq(true) rescue break
|
66
|
+
|
67
|
+
part_number = multipart_copy_task[:part_number]
|
68
|
+
copy_source = multipart_copy_task[:copy_source]
|
69
|
+
|
70
|
+
part = multipart_upload.part(part_number)
|
71
|
+
response = part.copy_from(copy_source: copy_source)
|
72
|
+
|
73
|
+
completed << {
|
74
|
+
part_number: part_number,
|
75
|
+
etag: response.copy_part_result.etag,
|
76
|
+
}
|
77
|
+
end
|
78
|
+
|
79
|
+
completed
|
80
|
+
rescue
|
81
|
+
queue.clear
|
82
|
+
raise
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
parts = threads.flat_map(&:value).sort_by { |part| part[:part_number] }
|
88
|
+
|
89
|
+
multipart_upload.complete(multipart_upload: {parts: parts})
|
90
|
+
|
91
|
+
delete(part_uids.flat_map { |part_uid| [object(part_uid), object("#{part_uid}.info")] })
|
92
|
+
|
93
|
+
info.delete("multipart_id")
|
94
|
+
info.delete("multipart_parts")
|
95
|
+
rescue
|
96
|
+
abort_multipart_upload(multipart_upload) if multipart_upload
|
97
|
+
raise
|
98
|
+
end
|
99
|
+
|
100
|
+
def patch_file(uid, io, info = {})
|
101
|
+
tus_info = Tus::Info.new(info)
|
102
|
+
last_chunk = (tus_info.length && io.size == tus_info.remaining_length)
|
103
|
+
|
104
|
+
if io.size < MIN_PART_SIZE && !last_chunk
|
105
|
+
raise Tus::Error, "Chunk size cannot be smaller than 5MB"
|
106
|
+
end
|
107
|
+
|
108
|
+
upload_id = info["multipart_id"]
|
109
|
+
part_number = info["multipart_parts"].count + 1
|
110
|
+
|
111
|
+
multipart_upload = object(uid).multipart_upload(upload_id)
|
112
|
+
multipart_part = multipart_upload.part(part_number)
|
113
|
+
md5 = Tus::Checksum.new("md5").generate(io)
|
114
|
+
|
115
|
+
begin
|
116
|
+
response = multipart_part.upload(body: io, content_md5: md5)
|
117
|
+
rescue Aws::S3::Errors::NoSuchUpload
|
118
|
+
raise Tus::NotFound
|
119
|
+
end
|
120
|
+
|
121
|
+
info["multipart_parts"] << {
|
122
|
+
"part_number" => part_number,
|
123
|
+
"etag" => response.etag[/"(.+)"/, 1],
|
124
|
+
}
|
125
|
+
|
126
|
+
# finalize the multipart upload if this chunk was the last part
|
127
|
+
if last_chunk
|
128
|
+
multipart_upload.complete(
|
129
|
+
multipart_upload: {
|
130
|
+
parts: info["multipart_parts"].map do |part|
|
131
|
+
{part_number: part["part_number"], etag: part["etag"]}
|
132
|
+
end
|
133
|
+
}
|
134
|
+
)
|
135
|
+
|
136
|
+
info.delete("multipart_id")
|
137
|
+
info.delete("multipart_parts")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def read_info(uid)
|
142
|
+
response = object("#{uid}.info").get
|
143
|
+
JSON.parse(response.body.string)
|
144
|
+
rescue Aws::S3::Errors::NoSuchKey
|
145
|
+
raise Tus::NotFound
|
146
|
+
end
|
147
|
+
|
148
|
+
def update_info(uid, info)
|
149
|
+
object("#{uid}.info").put(body: info.to_json)
|
150
|
+
end
|
151
|
+
|
152
|
+
def get_file(uid, info = {}, range: nil)
|
153
|
+
if range
|
154
|
+
range = "bytes=#{range.begin}-#{range.end}"
|
155
|
+
end
|
156
|
+
|
157
|
+
raw_chunks = Enumerator.new do |yielder|
|
158
|
+
object(uid).get(range: range) do |chunk|
|
159
|
+
yielder << chunk
|
160
|
+
chunk.clear # deallocate string
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
begin
|
165
|
+
first_chunk = raw_chunks.next
|
166
|
+
rescue Aws::S3::Errors::NoSuchKey
|
167
|
+
raise Tus::NotFound
|
168
|
+
end
|
169
|
+
|
170
|
+
chunks = Enumerator.new do |yielder|
|
171
|
+
yielder << first_chunk
|
172
|
+
loop { yielder << raw_chunks.next }
|
173
|
+
end
|
174
|
+
|
175
|
+
Response.new(chunks: chunks)
|
176
|
+
end
|
177
|
+
|
178
|
+
def delete_file(uid, info = {})
|
179
|
+
if info["multipart_id"]
|
180
|
+
multipart_upload = object(uid).multipart_upload(info["multipart_id"])
|
181
|
+
abort_multipart_upload(multipart_upload)
|
182
|
+
|
183
|
+
delete [object("#{uid}.info")]
|
184
|
+
else
|
185
|
+
delete [object(uid), object("#{uid}.info")]
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
def expire_files(expiration_date)
|
190
|
+
old_objects = bucket.objects.select do |object|
|
191
|
+
object.last_modified <= expiration_date
|
192
|
+
end
|
193
|
+
|
194
|
+
delete(old_objects)
|
195
|
+
|
196
|
+
bucket.multipart_uploads.each do |multipart_upload|
|
197
|
+
next unless multipart_upload.initiated <= expiration_date
|
198
|
+
most_recent_part = multipart_upload.parts.sort_by(&:last_modified).last
|
199
|
+
if most_recent_part.nil? || most_recent_part.last_modified <= expiration_date
|
200
|
+
abort_multipart_upload(multipart_upload)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
private
|
206
|
+
|
207
|
+
def delete(objects)
|
208
|
+
# S3 can delete maximum of 1000 objects in a single request
|
209
|
+
objects.each_slice(1000) do |objects_batch|
|
210
|
+
delete_params = {objects: objects_batch.map { |object| {key: object.key} }}
|
211
|
+
bucket.delete_objects(delete: delete_params)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
# In order to ensure the multipart upload was successfully aborted,
|
216
|
+
# we need to check whether all parts have been deleted, and retry
|
217
|
+
# the abort if the list is nonempty.
|
218
|
+
def abort_multipart_upload(multipart_upload)
|
219
|
+
loop do
|
220
|
+
multipart_upload.abort
|
221
|
+
break unless multipart_upload.parts.any?
|
222
|
+
end
|
223
|
+
rescue Aws::S3::Errors::NoSuchUpload
|
224
|
+
# multipart upload was successfully aborted or doesn't exist
|
225
|
+
end
|
226
|
+
|
227
|
+
def object(key)
|
228
|
+
bucket.object([*prefix, key].join("/"))
|
229
|
+
end
|
230
|
+
|
231
|
+
class Response
|
232
|
+
def initialize(chunks:)
|
233
|
+
@chunks = chunks
|
234
|
+
end
|
235
|
+
|
236
|
+
def each(&block)
|
237
|
+
@chunks.each(&block)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|