tus-server 0.2.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +205 -52
- data/lib/tus/checksum.rb +30 -17
- data/lib/tus/errors.rb +4 -0
- data/lib/tus/info.rb +16 -3
- data/lib/tus/input.rb +31 -0
- data/lib/tus/server.rb +96 -77
- data/lib/tus/storage/filesystem.rb +82 -28
- data/lib/tus/storage/gridfs.rb +161 -35
- data/lib/tus/storage/s3.rb +242 -0
- data/tus-server.gemspec +4 -2
- metadata +35 -4
- data/lib/tus/expirator.rb +0 -58
data/lib/tus/storage/gridfs.rb
CHANGED
@@ -1,72 +1,198 @@
|
|
1
1
|
require "mongo"
|
2
|
-
|
3
|
-
require "
|
2
|
+
|
3
|
+
require "tus/info"
|
4
|
+
require "tus/errors"
|
5
|
+
|
6
|
+
require "digest"
|
4
7
|
|
5
8
|
module Tus
|
6
9
|
module Storage
|
7
10
|
class Gridfs
|
8
|
-
attr_reader :client, :prefix, :bucket
|
11
|
+
attr_reader :client, :prefix, :bucket, :chunk_size
|
9
12
|
|
10
|
-
def initialize(client:, prefix: "fs")
|
13
|
+
def initialize(client:, prefix: "fs", chunk_size: nil)
|
11
14
|
@client = client
|
12
15
|
@prefix = prefix
|
13
16
|
@bucket = @client.database.fs(bucket_name: @prefix)
|
14
17
|
@bucket.send(:ensure_indexes!)
|
18
|
+
@chunk_size = chunk_size
|
15
19
|
end
|
16
20
|
|
17
|
-
def create_file(uid,
|
18
|
-
|
21
|
+
def create_file(uid, info = {})
|
22
|
+
tus_info = Tus::Info.new(info)
|
23
|
+
content_type = tus_info.metadata["content_type"]
|
24
|
+
|
25
|
+
file = Mongo::Grid::File.new("",
|
26
|
+
filename: uid,
|
27
|
+
metadata: {},
|
28
|
+
chunk_size: chunk_size,
|
29
|
+
content_type: content_type,
|
30
|
+
)
|
31
|
+
|
19
32
|
bucket.insert_one(file)
|
20
33
|
end
|
21
34
|
|
22
|
-
def
|
23
|
-
|
24
|
-
|
35
|
+
def concatenate(uid, part_uids, info = {})
|
36
|
+
file_infos = bucket.files_collection.find(filename: {"$in" => part_uids}).to_a
|
37
|
+
file_infos.sort_by! { |file_info| part_uids.index(file_info[:filename]) }
|
38
|
+
|
39
|
+
if file_infos.count != part_uids.count
|
40
|
+
raise Tus::Error, "some parts for concatenation are missing"
|
41
|
+
end
|
42
|
+
|
43
|
+
chunk_sizes = file_infos.map { |file_info| file_info[:chunkSize] }
|
44
|
+
if chunk_sizes[0..-2].uniq.count > 1
|
45
|
+
raise Tus::Error, "some parts have different chunk sizes, so they cannot be concatenated"
|
46
|
+
end
|
47
|
+
|
48
|
+
if chunk_sizes.uniq != [chunk_sizes.last] && bucket.chunks_collection.find(files_id: file_infos.last[:_id]).count > 1
|
49
|
+
raise Tus::Error, "last part has different chunk size and is composed of more than one chunk"
|
50
|
+
end
|
51
|
+
|
52
|
+
length = file_infos.inject(0) { |sum, file_info| sum + file_info[:length] }
|
53
|
+
chunk_size = file_infos.first[:chunkSize]
|
54
|
+
tus_info = Tus::Info.new(info)
|
55
|
+
content_type = tus_info.metadata["content_type"]
|
56
|
+
|
57
|
+
file = Mongo::Grid::File.new("",
|
58
|
+
filename: uid,
|
59
|
+
metadata: {},
|
60
|
+
chunk_size: chunk_size,
|
61
|
+
length: length,
|
62
|
+
content_type: content_type,
|
63
|
+
)
|
64
|
+
|
65
|
+
bucket.insert_one(file)
|
66
|
+
|
67
|
+
file_infos.inject(0) do |offset, file_info|
|
68
|
+
result = bucket.chunks_collection
|
69
|
+
.find(files_id: file_info[:_id])
|
70
|
+
.update_many("$set" => {files_id: file.id}, "$inc" => {n: offset})
|
71
|
+
|
72
|
+
offset += result.modified_count
|
73
|
+
end
|
25
74
|
|
26
|
-
|
27
|
-
|
28
|
-
file
|
75
|
+
bucket.files_collection.delete_many(filename: {"$in" => part_uids})
|
76
|
+
|
77
|
+
# server requires us to return the size of the concatenated file
|
78
|
+
length
|
29
79
|
end
|
30
80
|
|
31
|
-
def patch_file(uid,
|
81
|
+
def patch_file(uid, io, info = {})
|
32
82
|
file_info = bucket.files_collection.find(filename: uid).first
|
33
|
-
|
34
|
-
|
83
|
+
raise Tus::NotFound if file_info.nil?
|
84
|
+
|
85
|
+
file_info[:md5] = Digest::MD5.new # hack for `Chunk.split` updating MD5
|
86
|
+
file_info[:chunkSize] ||= io.size
|
87
|
+
file_info = Mongo::Grid::File::Info.new(Mongo::Options::Mapper.transform(file_info, Mongo::Grid::File::Info::MAPPINGS.invert))
|
88
|
+
|
89
|
+
tus_info = Tus::Info.new(info)
|
90
|
+
last_chunk = (tus_info.length && io.size == tus_info.remaining_length)
|
91
|
+
|
92
|
+
if io.size % file_info.chunk_size != 0 && !last_chunk
|
93
|
+
raise Tus::Error,
|
94
|
+
"Input has length #{io.size} but expected it to be a multiple of " \
|
95
|
+
"chunk size #{file_info.chunk_size} or for it to be the last chunk"
|
96
|
+
end
|
97
|
+
|
35
98
|
offset = bucket.chunks_collection.find(files_id: file_info.id).count
|
36
|
-
chunks = Mongo::Grid::File::Chunk.split(
|
99
|
+
chunks = Mongo::Grid::File::Chunk.split(io, file_info, offset)
|
100
|
+
|
37
101
|
bucket.chunks_collection.insert_many(chunks)
|
38
|
-
|
102
|
+
chunks.each { |chunk| chunk.data.data.clear } # deallocate strings
|
39
103
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
104
|
+
bucket.files_collection.find(filename: uid).update_one("$set" => {
|
105
|
+
length: file_info.length + io.size,
|
106
|
+
uploadDate: Time.now.utc,
|
107
|
+
chunkSize: file_info.chunk_size,
|
108
|
+
})
|
45
109
|
end
|
46
110
|
|
47
|
-
def
|
111
|
+
def read_info(uid)
|
48
112
|
file_info = bucket.files_collection.find(filename: uid).first
|
49
|
-
|
50
|
-
end
|
113
|
+
raise Tus::NotFound if file_info.nil?
|
51
114
|
|
52
|
-
|
53
|
-
info = bucket.files_collection.find(filename: uid).first
|
54
|
-
info.fetch("metadata")
|
115
|
+
file_info.fetch("metadata")
|
55
116
|
end
|
56
117
|
|
57
118
|
def update_info(uid, info)
|
58
|
-
bucket.files_collection.find(filename: uid)
|
119
|
+
bucket.files_collection.find(filename: uid)
|
120
|
+
.update_one("$set" => {metadata: info})
|
59
121
|
end
|
60
122
|
|
61
|
-
def
|
62
|
-
|
63
|
-
|
123
|
+
def get_file(uid, info = {}, range: nil)
|
124
|
+
file_info = bucket.files_collection.find(filename: uid).first
|
125
|
+
raise Tus::NotFound if file_info.nil?
|
126
|
+
|
127
|
+
filter = {files_id: file_info[:_id]}
|
128
|
+
|
129
|
+
if range
|
130
|
+
chunk_start = range.begin / file_info[:chunkSize] if range.begin
|
131
|
+
chunk_stop = range.end / file_info[:chunkSize] if range.end
|
132
|
+
|
133
|
+
filter[:n] = {}
|
134
|
+
filter[:n].update("$gte" => chunk_start) if chunk_start
|
135
|
+
filter[:n].update("$lte" => chunk_stop) if chunk_stop
|
136
|
+
end
|
137
|
+
|
138
|
+
chunks_view = bucket.chunks_collection.find(filter).read(bucket.read_preference).sort(n: 1)
|
139
|
+
|
140
|
+
chunks = Enumerator.new do |yielder|
|
141
|
+
chunks_view.each do |document|
|
142
|
+
data = document[:data].data
|
143
|
+
|
144
|
+
if document[:n] == chunk_start && document[:n] == chunk_stop
|
145
|
+
byte_start = range.begin % file_info[:chunkSize]
|
146
|
+
byte_stop = range.end % file_info[:chunkSize]
|
147
|
+
elsif document[:n] == chunk_start
|
148
|
+
byte_start = range.begin % file_info[:chunkSize]
|
149
|
+
byte_stop = file_info[:chunkSize] - 1
|
150
|
+
elsif document[:n] == chunk_stop
|
151
|
+
byte_start = 0
|
152
|
+
byte_stop = range.end % file_info[:chunkSize]
|
153
|
+
end
|
154
|
+
|
155
|
+
if byte_start && byte_stop
|
156
|
+
partial_data = data[byte_start..byte_stop]
|
157
|
+
yielder << partial_data
|
158
|
+
partial_data.clear # deallocate chunk string
|
159
|
+
else
|
160
|
+
yielder << data
|
161
|
+
end
|
162
|
+
|
163
|
+
data.clear # deallocate chunk string
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
Response.new(chunks: chunks, close: ->{chunks_view.close_query})
|
168
|
+
end
|
169
|
+
|
170
|
+
def delete_file(uid, info = {})
|
171
|
+
file_info = bucket.files_collection.find(filename: uid).first
|
172
|
+
bucket.delete(file_info.fetch("_id")) if file_info
|
64
173
|
end
|
65
174
|
|
66
|
-
|
175
|
+
def expire_files(expiration_date)
|
176
|
+
file_infos = bucket.files_collection.find(uploadDate: {"$lte" => expiration_date}).to_a
|
177
|
+
file_info_ids = file_infos.map { |info| info[:_id] }
|
178
|
+
|
179
|
+
bucket.files_collection.delete_many(_id: {"$in" => file_info_ids})
|
180
|
+
bucket.chunks_collection.delete_many(files_id: {"$in" => file_info_ids})
|
181
|
+
end
|
182
|
+
|
183
|
+
class Response
|
184
|
+
def initialize(chunks:, close:)
|
185
|
+
@chunks = chunks
|
186
|
+
@close = close
|
187
|
+
end
|
188
|
+
|
189
|
+
def each(&block)
|
190
|
+
@chunks.each(&block)
|
191
|
+
end
|
67
192
|
|
68
|
-
|
69
|
-
|
193
|
+
def close
|
194
|
+
@close.call
|
195
|
+
end
|
70
196
|
end
|
71
197
|
end
|
72
198
|
end
|
@@ -0,0 +1,242 @@
|
|
1
|
+
require "aws-sdk"
|
2
|
+
|
3
|
+
require "tus/info"
|
4
|
+
require "tus/checksum"
|
5
|
+
require "tus/errors"
|
6
|
+
|
7
|
+
require "json"
|
8
|
+
require "cgi/util"
|
9
|
+
|
10
|
+
Aws.eager_autoload!(services: ["S3"])
|
11
|
+
|
12
|
+
module Tus
|
13
|
+
module Storage
|
14
|
+
class S3
|
15
|
+
MIN_PART_SIZE = 5 * 1024 * 1024
|
16
|
+
|
17
|
+
attr_reader :client, :bucket, :prefix, :upload_options
|
18
|
+
|
19
|
+
def initialize(bucket:, prefix: nil, upload_options: {}, **client_options)
|
20
|
+
resource = Aws::S3::Resource.new(**client_options)
|
21
|
+
|
22
|
+
@client = resource.client
|
23
|
+
@bucket = resource.bucket(bucket)
|
24
|
+
@prefix = prefix
|
25
|
+
@upload_options = upload_options
|
26
|
+
end
|
27
|
+
|
28
|
+
def create_file(uid, info = {})
|
29
|
+
tus_info = Tus::Info.new(info)
|
30
|
+
|
31
|
+
options = upload_options.dup
|
32
|
+
options[:content_type] = tus_info.metadata["content_type"]
|
33
|
+
|
34
|
+
if filename = tus_info.metadata["filename"]
|
35
|
+
options[:content_disposition] ||= "inline"
|
36
|
+
options[:content_disposition] += "; filename=\"#{CGI.escape(filename).gsub("+", " ")}\""
|
37
|
+
end
|
38
|
+
|
39
|
+
multipart_upload = object(uid).initiate_multipart_upload(options)
|
40
|
+
|
41
|
+
info["multipart_id"] = multipart_upload.id
|
42
|
+
info["multipart_parts"] = []
|
43
|
+
end
|
44
|
+
|
45
|
+
def concatenate(uid, part_uids, info = {})
|
46
|
+
create_file(uid, info)
|
47
|
+
|
48
|
+
multipart_upload = object(uid).multipart_upload(info["multipart_id"])
|
49
|
+
|
50
|
+
queue = Queue.new
|
51
|
+
part_uids.each_with_index do |part_uid, idx|
|
52
|
+
queue << {
|
53
|
+
copy_source: [bucket.name, object(part_uid).key].join("/"),
|
54
|
+
part_number: idx + 1
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
threads = 10.times.map do
|
59
|
+
Thread.new do
|
60
|
+
Thread.current.abort_on_exception = true
|
61
|
+
completed = []
|
62
|
+
|
63
|
+
begin
|
64
|
+
loop do
|
65
|
+
multipart_copy_task = queue.deq(true) rescue break
|
66
|
+
|
67
|
+
part_number = multipart_copy_task[:part_number]
|
68
|
+
copy_source = multipart_copy_task[:copy_source]
|
69
|
+
|
70
|
+
part = multipart_upload.part(part_number)
|
71
|
+
response = part.copy_from(copy_source: copy_source)
|
72
|
+
|
73
|
+
completed << {
|
74
|
+
part_number: part_number,
|
75
|
+
etag: response.copy_part_result.etag,
|
76
|
+
}
|
77
|
+
end
|
78
|
+
|
79
|
+
completed
|
80
|
+
rescue
|
81
|
+
queue.clear
|
82
|
+
raise
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
parts = threads.flat_map(&:value).sort_by { |part| part[:part_number] }
|
88
|
+
|
89
|
+
multipart_upload.complete(multipart_upload: {parts: parts})
|
90
|
+
|
91
|
+
delete(part_uids.flat_map { |part_uid| [object(part_uid), object("#{part_uid}.info")] })
|
92
|
+
|
93
|
+
info.delete("multipart_id")
|
94
|
+
info.delete("multipart_parts")
|
95
|
+
rescue
|
96
|
+
abort_multipart_upload(multipart_upload) if multipart_upload
|
97
|
+
raise
|
98
|
+
end
|
99
|
+
|
100
|
+
def patch_file(uid, io, info = {})
|
101
|
+
tus_info = Tus::Info.new(info)
|
102
|
+
last_chunk = (tus_info.length && io.size == tus_info.remaining_length)
|
103
|
+
|
104
|
+
if io.size < MIN_PART_SIZE && !last_chunk
|
105
|
+
raise Tus::Error, "Chunk size cannot be smaller than 5MB"
|
106
|
+
end
|
107
|
+
|
108
|
+
upload_id = info["multipart_id"]
|
109
|
+
part_number = info["multipart_parts"].count + 1
|
110
|
+
|
111
|
+
multipart_upload = object(uid).multipart_upload(upload_id)
|
112
|
+
multipart_part = multipart_upload.part(part_number)
|
113
|
+
md5 = Tus::Checksum.new("md5").generate(io)
|
114
|
+
|
115
|
+
begin
|
116
|
+
response = multipart_part.upload(body: io, content_md5: md5)
|
117
|
+
rescue Aws::S3::Errors::NoSuchUpload
|
118
|
+
raise Tus::NotFound
|
119
|
+
end
|
120
|
+
|
121
|
+
info["multipart_parts"] << {
|
122
|
+
"part_number" => part_number,
|
123
|
+
"etag" => response.etag[/"(.+)"/, 1],
|
124
|
+
}
|
125
|
+
|
126
|
+
# finalize the multipart upload if this chunk was the last part
|
127
|
+
if last_chunk
|
128
|
+
multipart_upload.complete(
|
129
|
+
multipart_upload: {
|
130
|
+
parts: info["multipart_parts"].map do |part|
|
131
|
+
{part_number: part["part_number"], etag: part["etag"]}
|
132
|
+
end
|
133
|
+
}
|
134
|
+
)
|
135
|
+
|
136
|
+
info.delete("multipart_id")
|
137
|
+
info.delete("multipart_parts")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def read_info(uid)
|
142
|
+
response = object("#{uid}.info").get
|
143
|
+
JSON.parse(response.body.string)
|
144
|
+
rescue Aws::S3::Errors::NoSuchKey
|
145
|
+
raise Tus::NotFound
|
146
|
+
end
|
147
|
+
|
148
|
+
def update_info(uid, info)
|
149
|
+
object("#{uid}.info").put(body: info.to_json)
|
150
|
+
end
|
151
|
+
|
152
|
+
def get_file(uid, info = {}, range: nil)
|
153
|
+
if range
|
154
|
+
range = "bytes=#{range.begin}-#{range.end}"
|
155
|
+
end
|
156
|
+
|
157
|
+
raw_chunks = Enumerator.new do |yielder|
|
158
|
+
object(uid).get(range: range) do |chunk|
|
159
|
+
yielder << chunk
|
160
|
+
chunk.clear # deallocate string
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
begin
|
165
|
+
first_chunk = raw_chunks.next
|
166
|
+
rescue Aws::S3::Errors::NoSuchKey
|
167
|
+
raise Tus::NotFound
|
168
|
+
end
|
169
|
+
|
170
|
+
chunks = Enumerator.new do |yielder|
|
171
|
+
yielder << first_chunk
|
172
|
+
loop { yielder << raw_chunks.next }
|
173
|
+
end
|
174
|
+
|
175
|
+
Response.new(chunks: chunks)
|
176
|
+
end
|
177
|
+
|
178
|
+
def delete_file(uid, info = {})
|
179
|
+
if info["multipart_id"]
|
180
|
+
multipart_upload = object(uid).multipart_upload(info["multipart_id"])
|
181
|
+
abort_multipart_upload(multipart_upload)
|
182
|
+
|
183
|
+
delete [object("#{uid}.info")]
|
184
|
+
else
|
185
|
+
delete [object(uid), object("#{uid}.info")]
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
def expire_files(expiration_date)
|
190
|
+
old_objects = bucket.objects.select do |object|
|
191
|
+
object.last_modified <= expiration_date
|
192
|
+
end
|
193
|
+
|
194
|
+
delete(old_objects)
|
195
|
+
|
196
|
+
bucket.multipart_uploads.each do |multipart_upload|
|
197
|
+
next unless multipart_upload.initiated <= expiration_date
|
198
|
+
most_recent_part = multipart_upload.parts.sort_by(&:last_modified).last
|
199
|
+
if most_recent_part.nil? || most_recent_part.last_modified <= expiration_date
|
200
|
+
abort_multipart_upload(multipart_upload)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
private
|
206
|
+
|
207
|
+
def delete(objects)
|
208
|
+
# S3 can delete maximum of 1000 objects in a single request
|
209
|
+
objects.each_slice(1000) do |objects_batch|
|
210
|
+
delete_params = {objects: objects_batch.map { |object| {key: object.key} }}
|
211
|
+
bucket.delete_objects(delete: delete_params)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
# In order to ensure the multipart upload was successfully aborted,
|
216
|
+
# we need to check whether all parts have been deleted, and retry
|
217
|
+
# the abort if the list is nonempty.
|
218
|
+
def abort_multipart_upload(multipart_upload)
|
219
|
+
loop do
|
220
|
+
multipart_upload.abort
|
221
|
+
break unless multipart_upload.parts.any?
|
222
|
+
end
|
223
|
+
rescue Aws::S3::Errors::NoSuchUpload
|
224
|
+
# multipart upload was successfully aborted or doesn't exist
|
225
|
+
end
|
226
|
+
|
227
|
+
def object(key)
|
228
|
+
bucket.object([*prefix, key].join("/"))
|
229
|
+
end
|
230
|
+
|
231
|
+
class Response
|
232
|
+
def initialize(chunks:)
|
233
|
+
@chunks = chunks
|
234
|
+
end
|
235
|
+
|
236
|
+
def each(&block)
|
237
|
+
@chunks.each(&block)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|