tus-server 0.2.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,72 +1,198 @@
1
1
  require "mongo"
2
- require "stringio"
3
- require "tempfile"
2
+
3
+ require "tus/info"
4
+ require "tus/errors"
5
+
6
+ require "digest"
4
7
 
5
8
  module Tus
6
9
  module Storage
7
10
  class Gridfs
8
- attr_reader :client, :prefix, :bucket
11
+ attr_reader :client, :prefix, :bucket, :chunk_size
9
12
 
10
- def initialize(client:, prefix: "fs")
13
+ def initialize(client:, prefix: "fs", chunk_size: nil)
11
14
  @client = client
12
15
  @prefix = prefix
13
16
  @bucket = @client.database.fs(bucket_name: @prefix)
14
17
  @bucket.send(:ensure_indexes!)
18
+ @chunk_size = chunk_size
15
19
  end
16
20
 
17
- def create_file(uid, metadata = {})
18
- file = Mongo::Grid::File.new("", filename: uid, metadata: metadata)
21
+ def create_file(uid, info = {})
22
+ tus_info = Tus::Info.new(info)
23
+ content_type = tus_info.metadata["content_type"]
24
+
25
+ file = Mongo::Grid::File.new("",
26
+ filename: uid,
27
+ metadata: {},
28
+ chunk_size: chunk_size,
29
+ content_type: content_type,
30
+ )
31
+
19
32
  bucket.insert_one(file)
20
33
  end
21
34
 
22
- def file_exists?(uid)
23
- !!bucket.files_collection.find(filename: uid).first
24
- end
35
+ def concatenate(uid, part_uids, info = {})
36
+ file_infos = bucket.files_collection.find(filename: {"$in" => part_uids}).to_a
37
+ file_infos.sort_by! { |file_info| part_uids.index(file_info[:filename]) }
38
+
39
+ if file_infos.count != part_uids.count
40
+ raise Tus::Error, "some parts for concatenation are missing"
41
+ end
42
+
43
+ chunk_sizes = file_infos.map { |file_info| file_info[:chunkSize] }
44
+ if chunk_sizes[0..-2].uniq.count > 1
45
+ raise Tus::Error, "some parts have different chunk sizes, so they cannot be concatenated"
46
+ end
47
+
48
+ if chunk_sizes.uniq != [chunk_sizes.last] && bucket.chunks_collection.find(files_id: file_infos.last[:_id]).count > 1
49
+ raise Tus::Error, "last part has different chunk size and is composed of more than one chunk"
50
+ end
51
+
52
+ length = file_infos.inject(0) { |sum, file_info| sum + file_info[:length] }
53
+ chunk_size = file_infos.first[:chunkSize]
54
+ tus_info = Tus::Info.new(info)
55
+ content_type = tus_info.metadata["content_type"]
56
+
57
+ file = Mongo::Grid::File.new("",
58
+ filename: uid,
59
+ metadata: {},
60
+ chunk_size: chunk_size,
61
+ length: length,
62
+ content_type: content_type,
63
+ )
64
+
65
+ bucket.insert_one(file)
66
+
67
+ file_infos.inject(0) do |offset, file_info|
68
+ result = bucket.chunks_collection
69
+ .find(files_id: file_info[:_id])
70
+ .update_many("$set" => {files_id: file.id}, "$inc" => {n: offset})
71
+
72
+ offset += result.modified_count
73
+ end
25
74
 
26
- def read_file(uid)
27
- file = bucket.find_one(filename: uid)
28
- file.data
75
+ bucket.files_collection.delete_many(filename: {"$in" => part_uids})
76
+
77
+ # server requires us to return the size of the concatenated file
78
+ length
29
79
  end
30
80
 
31
- def patch_file(uid, content)
81
+ def patch_file(uid, io, info = {})
32
82
  file_info = bucket.files_collection.find(filename: uid).first
33
- file_info["md5"] = Digest::MD5.new # hack around not able to update digest
34
- file_info = Mongo::Grid::File::Info.new(file_info)
83
+ raise Tus::NotFound if file_info.nil?
84
+
85
+ file_info[:md5] = Digest::MD5.new # hack for `Chunk.split` updating MD5
86
+ file_info[:chunkSize] ||= io.size
87
+ file_info = Mongo::Grid::File::Info.new(Mongo::Options::Mapper.transform(file_info, Mongo::Grid::File::Info::MAPPINGS.invert))
88
+
89
+ tus_info = Tus::Info.new(info)
90
+ last_chunk = (tus_info.length && io.size == tus_info.remaining_length)
91
+
92
+ if io.size % file_info.chunk_size != 0 && !last_chunk
93
+ raise Tus::Error,
94
+ "Input has length #{io.size} but expected it to be a multiple of " \
95
+ "chunk size #{file_info.chunk_size} or for it to be the last chunk"
96
+ end
97
+
35
98
  offset = bucket.chunks_collection.find(files_id: file_info.id).count
36
- chunks = Mongo::Grid::File::Chunk.split(content, file_info, offset)
99
+ chunks = Mongo::Grid::File::Chunk.split(io, file_info, offset)
100
+
37
101
  bucket.chunks_collection.insert_many(chunks)
38
- end
102
+ chunks.each { |chunk| chunk.data.data.clear } # deallocate strings
39
103
 
40
- def download_file(uid)
41
- tempfile = Tempfile.new("tus", binmode: true)
42
- tempfile.sync = true
43
- bucket.download_to_stream_by_name(uid, tempfile)
44
- tempfile.path
104
+ bucket.files_collection.find(filename: uid).update_one("$set" => {
105
+ length: file_info.length + io.size,
106
+ uploadDate: Time.now.utc,
107
+ chunkSize: file_info.chunk_size,
108
+ })
45
109
  end
46
110
 
47
- def delete_file(uid)
111
+ def read_info(uid)
48
112
  file_info = bucket.files_collection.find(filename: uid).first
49
- bucket.delete(file_info.fetch("_id")) if file_info
50
- end
113
+ raise Tus::NotFound if file_info.nil?
51
114
 
52
- def read_info(uid)
53
- info = bucket.files_collection.find(filename: uid).first
54
- info.fetch("metadata")
115
+ file_info.fetch("metadata")
55
116
  end
56
117
 
57
118
  def update_info(uid, info)
58
- bucket.files_collection.find(filename: uid).update_one("$set" => {metadata: info})
119
+ bucket.files_collection.find(filename: uid)
120
+ .update_one("$set" => {metadata: info})
59
121
  end
60
122
 
61
- def list_files
62
- infos = bucket.files_collection.find.to_a
63
- infos.map { |info| info.fetch("filename") }
123
+ def get_file(uid, info = {}, range: nil)
124
+ file_info = bucket.files_collection.find(filename: uid).first
125
+ raise Tus::NotFound if file_info.nil?
126
+
127
+ filter = {files_id: file_info[:_id]}
128
+
129
+ if range
130
+ chunk_start = range.begin / file_info[:chunkSize] if range.begin
131
+ chunk_stop = range.end / file_info[:chunkSize] if range.end
132
+
133
+ filter[:n] = {}
134
+ filter[:n].update("$gte" => chunk_start) if chunk_start
135
+ filter[:n].update("$lte" => chunk_stop) if chunk_stop
136
+ end
137
+
138
+ chunks_view = bucket.chunks_collection.find(filter).read(bucket.read_preference).sort(n: 1)
139
+
140
+ chunks = Enumerator.new do |yielder|
141
+ chunks_view.each do |document|
142
+ data = document[:data].data
143
+
144
+ if document[:n] == chunk_start && document[:n] == chunk_stop
145
+ byte_start = range.begin % file_info[:chunkSize]
146
+ byte_stop = range.end % file_info[:chunkSize]
147
+ elsif document[:n] == chunk_start
148
+ byte_start = range.begin % file_info[:chunkSize]
149
+ byte_stop = file_info[:chunkSize] - 1
150
+ elsif document[:n] == chunk_stop
151
+ byte_start = 0
152
+ byte_stop = range.end % file_info[:chunkSize]
153
+ end
154
+
155
+ if byte_start && byte_stop
156
+ partial_data = data[byte_start..byte_stop]
157
+ yielder << partial_data
158
+ partial_data.clear # deallocate chunk string
159
+ else
160
+ yielder << data
161
+ end
162
+
163
+ data.clear # deallocate chunk string
164
+ end
165
+ end
166
+
167
+ Response.new(chunks: chunks, close: ->{chunks_view.close_query})
168
+ end
169
+
170
+ def delete_file(uid, info = {})
171
+ file_info = bucket.files_collection.find(filename: uid).first
172
+ bucket.delete(file_info.fetch("_id")) if file_info
64
173
  end
65
174
 
66
- private
175
+ def expire_files(expiration_date)
176
+ file_infos = bucket.files_collection.find(uploadDate: {"$lte" => expiration_date}).to_a
177
+ file_info_ids = file_infos.map { |info| info[:_id] }
178
+
179
+ bucket.files_collection.delete_many(_id: {"$in" => file_info_ids})
180
+ bucket.chunks_collection.delete_many(files_id: {"$in" => file_info_ids})
181
+ end
182
+
183
+ class Response
184
+ def initialize(chunks:, close:)
185
+ @chunks = chunks
186
+ @close = close
187
+ end
188
+
189
+ def each(&block)
190
+ @chunks.each(&block)
191
+ end
67
192
 
68
- def bson_id(uid)
69
- BSON::ObjectId(uid)
193
+ def close
194
+ @close.call
195
+ end
70
196
  end
71
197
  end
72
198
  end
@@ -0,0 +1,242 @@
1
+ require "aws-sdk"
2
+
3
+ require "tus/info"
4
+ require "tus/checksum"
5
+ require "tus/errors"
6
+
7
+ require "json"
8
+ require "cgi/util"
9
+
10
+ Aws.eager_autoload!(services: ["S3"])
11
+
12
+ module Tus
13
+ module Storage
14
+ class S3
15
+ MIN_PART_SIZE = 5 * 1024 * 1024
16
+
17
+ attr_reader :client, :bucket, :prefix, :upload_options
18
+
19
+ def initialize(bucket:, prefix: nil, upload_options: {}, **client_options)
20
+ resource = Aws::S3::Resource.new(**client_options)
21
+
22
+ @client = resource.client
23
+ @bucket = resource.bucket(bucket)
24
+ @prefix = prefix
25
+ @upload_options = upload_options
26
+ end
27
+
28
+ def create_file(uid, info = {})
29
+ tus_info = Tus::Info.new(info)
30
+
31
+ options = upload_options.dup
32
+ options[:content_type] = tus_info.metadata["content_type"]
33
+
34
+ if filename = tus_info.metadata["filename"]
35
+ options[:content_disposition] ||= "inline"
36
+ options[:content_disposition] += "; filename=\"#{CGI.escape(filename).gsub("+", " ")}\""
37
+ end
38
+
39
+ multipart_upload = object(uid).initiate_multipart_upload(options)
40
+
41
+ info["multipart_id"] = multipart_upload.id
42
+ info["multipart_parts"] = []
43
+ end
44
+
45
+ def concatenate(uid, part_uids, info = {})
46
+ create_file(uid, info)
47
+
48
+ multipart_upload = object(uid).multipart_upload(info["multipart_id"])
49
+
50
+ queue = Queue.new
51
+ part_uids.each_with_index do |part_uid, idx|
52
+ queue << {
53
+ copy_source: [bucket.name, object(part_uid).key].join("/"),
54
+ part_number: idx + 1
55
+ }
56
+ end
57
+
58
+ threads = 10.times.map do
59
+ Thread.new do
60
+ Thread.current.abort_on_exception = true
61
+ completed = []
62
+
63
+ begin
64
+ loop do
65
+ multipart_copy_task = queue.deq(true) rescue break
66
+
67
+ part_number = multipart_copy_task[:part_number]
68
+ copy_source = multipart_copy_task[:copy_source]
69
+
70
+ part = multipart_upload.part(part_number)
71
+ response = part.copy_from(copy_source: copy_source)
72
+
73
+ completed << {
74
+ part_number: part_number,
75
+ etag: response.copy_part_result.etag,
76
+ }
77
+ end
78
+
79
+ completed
80
+ rescue
81
+ queue.clear
82
+ raise
83
+ end
84
+ end
85
+ end
86
+
87
+ parts = threads.flat_map(&:value).sort_by { |part| part[:part_number] }
88
+
89
+ multipart_upload.complete(multipart_upload: {parts: parts})
90
+
91
+ delete(part_uids.flat_map { |part_uid| [object(part_uid), object("#{part_uid}.info")] })
92
+
93
+ info.delete("multipart_id")
94
+ info.delete("multipart_parts")
95
+ rescue
96
+ abort_multipart_upload(multipart_upload) if multipart_upload
97
+ raise
98
+ end
99
+
100
+ def patch_file(uid, io, info = {})
101
+ tus_info = Tus::Info.new(info)
102
+ last_chunk = (tus_info.length && io.size == tus_info.remaining_length)
103
+
104
+ if io.size < MIN_PART_SIZE && !last_chunk
105
+ raise Tus::Error, "Chunk size cannot be smaller than 5MB"
106
+ end
107
+
108
+ upload_id = info["multipart_id"]
109
+ part_number = info["multipart_parts"].count + 1
110
+
111
+ multipart_upload = object(uid).multipart_upload(upload_id)
112
+ multipart_part = multipart_upload.part(part_number)
113
+ md5 = Tus::Checksum.new("md5").generate(io)
114
+
115
+ begin
116
+ response = multipart_part.upload(body: io, content_md5: md5)
117
+ rescue Aws::S3::Errors::NoSuchUpload
118
+ raise Tus::NotFound
119
+ end
120
+
121
+ info["multipart_parts"] << {
122
+ "part_number" => part_number,
123
+ "etag" => response.etag[/"(.+)"/, 1],
124
+ }
125
+
126
+ # finalize the multipart upload if this chunk was the last part
127
+ if last_chunk
128
+ multipart_upload.complete(
129
+ multipart_upload: {
130
+ parts: info["multipart_parts"].map do |part|
131
+ {part_number: part["part_number"], etag: part["etag"]}
132
+ end
133
+ }
134
+ )
135
+
136
+ info.delete("multipart_id")
137
+ info.delete("multipart_parts")
138
+ end
139
+ end
140
+
141
+ def read_info(uid)
142
+ response = object("#{uid}.info").get
143
+ JSON.parse(response.body.string)
144
+ rescue Aws::S3::Errors::NoSuchKey
145
+ raise Tus::NotFound
146
+ end
147
+
148
+ def update_info(uid, info)
149
+ object("#{uid}.info").put(body: info.to_json)
150
+ end
151
+
152
+ def get_file(uid, info = {}, range: nil)
153
+ if range
154
+ range = "bytes=#{range.begin}-#{range.end}"
155
+ end
156
+
157
+ raw_chunks = Enumerator.new do |yielder|
158
+ object(uid).get(range: range) do |chunk|
159
+ yielder << chunk
160
+ chunk.clear # deallocate string
161
+ end
162
+ end
163
+
164
+ begin
165
+ first_chunk = raw_chunks.next
166
+ rescue Aws::S3::Errors::NoSuchKey
167
+ raise Tus::NotFound
168
+ end
169
+
170
+ chunks = Enumerator.new do |yielder|
171
+ yielder << first_chunk
172
+ loop { yielder << raw_chunks.next }
173
+ end
174
+
175
+ Response.new(chunks: chunks)
176
+ end
177
+
178
+ def delete_file(uid, info = {})
179
+ if info["multipart_id"]
180
+ multipart_upload = object(uid).multipart_upload(info["multipart_id"])
181
+ abort_multipart_upload(multipart_upload)
182
+
183
+ delete [object("#{uid}.info")]
184
+ else
185
+ delete [object(uid), object("#{uid}.info")]
186
+ end
187
+ end
188
+
189
+ def expire_files(expiration_date)
190
+ old_objects = bucket.objects.select do |object|
191
+ object.last_modified <= expiration_date
192
+ end
193
+
194
+ delete(old_objects)
195
+
196
+ bucket.multipart_uploads.each do |multipart_upload|
197
+ next unless multipart_upload.initiated <= expiration_date
198
+ most_recent_part = multipart_upload.parts.sort_by(&:last_modified).last
199
+ if most_recent_part.nil? || most_recent_part.last_modified <= expiration_date
200
+ abort_multipart_upload(multipart_upload)
201
+ end
202
+ end
203
+ end
204
+
205
+ private
206
+
207
+ def delete(objects)
208
+ # S3 can delete maximum of 1000 objects in a single request
209
+ objects.each_slice(1000) do |objects_batch|
210
+ delete_params = {objects: objects_batch.map { |object| {key: object.key} }}
211
+ bucket.delete_objects(delete: delete_params)
212
+ end
213
+ end
214
+
215
+ # In order to ensure the multipart upload was successfully aborted,
216
+ # we need to check whether all parts have been deleted, and retry
217
+ # the abort if the list is nonempty.
218
+ def abort_multipart_upload(multipart_upload)
219
+ loop do
220
+ multipart_upload.abort
221
+ break unless multipart_upload.parts.any?
222
+ end
223
+ rescue Aws::S3::Errors::NoSuchUpload
224
+ # multipart upload was successfully aborted or doesn't exist
225
+ end
226
+
227
+ def object(key)
228
+ bucket.object([*prefix, key].join("/"))
229
+ end
230
+
231
+ class Response
232
+ def initialize(chunks:)
233
+ @chunks = chunks
234
+ end
235
+
236
+ def each(&block)
237
+ @chunks.each(&block)
238
+ end
239
+ end
240
+ end
241
+ end
242
+ end