tus-server 0.2.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,72 +1,198 @@
1
1
  require "mongo"
2
- require "stringio"
3
- require "tempfile"
2
+
3
+ require "tus/info"
4
+ require "tus/errors"
5
+
6
+ require "digest"
4
7
 
5
8
  module Tus
6
9
  module Storage
7
10
  class Gridfs
8
- attr_reader :client, :prefix, :bucket
11
+ attr_reader :client, :prefix, :bucket, :chunk_size
9
12
 
10
- def initialize(client:, prefix: "fs")
13
+ def initialize(client:, prefix: "fs", chunk_size: nil)
11
14
  @client = client
12
15
  @prefix = prefix
13
16
  @bucket = @client.database.fs(bucket_name: @prefix)
14
17
  @bucket.send(:ensure_indexes!)
18
+ @chunk_size = chunk_size
15
19
  end
16
20
 
17
- def create_file(uid, metadata = {})
18
- file = Mongo::Grid::File.new("", filename: uid, metadata: metadata)
21
+ def create_file(uid, info = {})
22
+ tus_info = Tus::Info.new(info)
23
+ content_type = tus_info.metadata["content_type"]
24
+
25
+ file = Mongo::Grid::File.new("",
26
+ filename: uid,
27
+ metadata: {},
28
+ chunk_size: chunk_size,
29
+ content_type: content_type,
30
+ )
31
+
19
32
  bucket.insert_one(file)
20
33
  end
21
34
 
22
- def file_exists?(uid)
23
- !!bucket.files_collection.find(filename: uid).first
24
- end
35
+ def concatenate(uid, part_uids, info = {})
36
+ file_infos = bucket.files_collection.find(filename: {"$in" => part_uids}).to_a
37
+ file_infos.sort_by! { |file_info| part_uids.index(file_info[:filename]) }
38
+
39
+ if file_infos.count != part_uids.count
40
+ raise Tus::Error, "some parts for concatenation are missing"
41
+ end
42
+
43
+ chunk_sizes = file_infos.map { |file_info| file_info[:chunkSize] }
44
+ if chunk_sizes[0..-2].uniq.count > 1
45
+ raise Tus::Error, "some parts have different chunk sizes, so they cannot be concatenated"
46
+ end
47
+
48
+ if chunk_sizes.uniq != [chunk_sizes.last] && bucket.chunks_collection.find(files_id: file_infos.last[:_id]).count > 1
49
+ raise Tus::Error, "last part has different chunk size and is composed of more than one chunk"
50
+ end
51
+
52
+ length = file_infos.inject(0) { |sum, file_info| sum + file_info[:length] }
53
+ chunk_size = file_infos.first[:chunkSize]
54
+ tus_info = Tus::Info.new(info)
55
+ content_type = tus_info.metadata["content_type"]
56
+
57
+ file = Mongo::Grid::File.new("",
58
+ filename: uid,
59
+ metadata: {},
60
+ chunk_size: chunk_size,
61
+ length: length,
62
+ content_type: content_type,
63
+ )
64
+
65
+ bucket.insert_one(file)
66
+
67
+ file_infos.inject(0) do |offset, file_info|
68
+ result = bucket.chunks_collection
69
+ .find(files_id: file_info[:_id])
70
+ .update_many("$set" => {files_id: file.id}, "$inc" => {n: offset})
71
+
72
+ offset += result.modified_count
73
+ end
25
74
 
26
- def read_file(uid)
27
- file = bucket.find_one(filename: uid)
28
- file.data
75
+ bucket.files_collection.delete_many(filename: {"$in" => part_uids})
76
+
77
+ # server requires us to return the size of the concatenated file
78
+ length
29
79
  end
30
80
 
31
- def patch_file(uid, content)
81
+ def patch_file(uid, io, info = {})
32
82
  file_info = bucket.files_collection.find(filename: uid).first
33
- file_info["md5"] = Digest::MD5.new # hack around not able to update digest
34
- file_info = Mongo::Grid::File::Info.new(file_info)
83
+ raise Tus::NotFound if file_info.nil?
84
+
85
+ file_info[:md5] = Digest::MD5.new # hack for `Chunk.split` updating MD5
86
+ file_info[:chunkSize] ||= io.size
87
+ file_info = Mongo::Grid::File::Info.new(Mongo::Options::Mapper.transform(file_info, Mongo::Grid::File::Info::MAPPINGS.invert))
88
+
89
+ tus_info = Tus::Info.new(info)
90
+ last_chunk = (tus_info.length && io.size == tus_info.remaining_length)
91
+
92
+ if io.size % file_info.chunk_size != 0 && !last_chunk
93
+ raise Tus::Error,
94
+ "Input has length #{io.size} but expected it to be a multiple of " \
95
+ "chunk size #{file_info.chunk_size} or for it to be the last chunk"
96
+ end
97
+
35
98
  offset = bucket.chunks_collection.find(files_id: file_info.id).count
36
- chunks = Mongo::Grid::File::Chunk.split(content, file_info, offset)
99
+ chunks = Mongo::Grid::File::Chunk.split(io, file_info, offset)
100
+
37
101
  bucket.chunks_collection.insert_many(chunks)
38
- end
102
+ chunks.each { |chunk| chunk.data.data.clear } # deallocate strings
39
103
 
40
- def download_file(uid)
41
- tempfile = Tempfile.new("tus", binmode: true)
42
- tempfile.sync = true
43
- bucket.download_to_stream_by_name(uid, tempfile)
44
- tempfile.path
104
+ bucket.files_collection.find(filename: uid).update_one("$set" => {
105
+ length: file_info.length + io.size,
106
+ uploadDate: Time.now.utc,
107
+ chunkSize: file_info.chunk_size,
108
+ })
45
109
  end
46
110
 
47
- def delete_file(uid)
111
+ def read_info(uid)
48
112
  file_info = bucket.files_collection.find(filename: uid).first
49
- bucket.delete(file_info.fetch("_id")) if file_info
50
- end
113
+ raise Tus::NotFound if file_info.nil?
51
114
 
52
- def read_info(uid)
53
- info = bucket.files_collection.find(filename: uid).first
54
- info.fetch("metadata")
115
+ file_info.fetch("metadata")
55
116
  end
56
117
 
57
118
  def update_info(uid, info)
58
- bucket.files_collection.find(filename: uid).update_one("$set" => {metadata: info})
119
+ bucket.files_collection.find(filename: uid)
120
+ .update_one("$set" => {metadata: info})
59
121
  end
60
122
 
61
- def list_files
62
- infos = bucket.files_collection.find.to_a
63
- infos.map { |info| info.fetch("filename") }
123
+ def get_file(uid, info = {}, range: nil)
124
+ file_info = bucket.files_collection.find(filename: uid).first
125
+ raise Tus::NotFound if file_info.nil?
126
+
127
+ filter = {files_id: file_info[:_id]}
128
+
129
+ if range
130
+ chunk_start = range.begin / file_info[:chunkSize] if range.begin
131
+ chunk_stop = range.end / file_info[:chunkSize] if range.end
132
+
133
+ filter[:n] = {}
134
+ filter[:n].update("$gte" => chunk_start) if chunk_start
135
+ filter[:n].update("$lte" => chunk_stop) if chunk_stop
136
+ end
137
+
138
+ chunks_view = bucket.chunks_collection.find(filter).read(bucket.read_preference).sort(n: 1)
139
+
140
+ chunks = Enumerator.new do |yielder|
141
+ chunks_view.each do |document|
142
+ data = document[:data].data
143
+
144
+ if document[:n] == chunk_start && document[:n] == chunk_stop
145
+ byte_start = range.begin % file_info[:chunkSize]
146
+ byte_stop = range.end % file_info[:chunkSize]
147
+ elsif document[:n] == chunk_start
148
+ byte_start = range.begin % file_info[:chunkSize]
149
+ byte_stop = file_info[:chunkSize] - 1
150
+ elsif document[:n] == chunk_stop
151
+ byte_start = 0
152
+ byte_stop = range.end % file_info[:chunkSize]
153
+ end
154
+
155
+ if byte_start && byte_stop
156
+ partial_data = data[byte_start..byte_stop]
157
+ yielder << partial_data
158
+ partial_data.clear # deallocate chunk string
159
+ else
160
+ yielder << data
161
+ end
162
+
163
+ data.clear # deallocate chunk string
164
+ end
165
+ end
166
+
167
+ Response.new(chunks: chunks, close: ->{chunks_view.close_query})
168
+ end
169
+
170
+ def delete_file(uid, info = {})
171
+ file_info = bucket.files_collection.find(filename: uid).first
172
+ bucket.delete(file_info.fetch("_id")) if file_info
64
173
  end
65
174
 
66
- private
175
+ def expire_files(expiration_date)
176
+ file_infos = bucket.files_collection.find(uploadDate: {"$lte" => expiration_date}).to_a
177
+ file_info_ids = file_infos.map { |info| info[:_id] }
178
+
179
+ bucket.files_collection.delete_many(_id: {"$in" => file_info_ids})
180
+ bucket.chunks_collection.delete_many(files_id: {"$in" => file_info_ids})
181
+ end
182
+
183
+ class Response
184
+ def initialize(chunks:, close:)
185
+ @chunks = chunks
186
+ @close = close
187
+ end
188
+
189
+ def each(&block)
190
+ @chunks.each(&block)
191
+ end
67
192
 
68
- def bson_id(uid)
69
- BSON::ObjectId(uid)
193
+ def close
194
+ @close.call
195
+ end
70
196
  end
71
197
  end
72
198
  end
@@ -0,0 +1,242 @@
1
+ require "aws-sdk"
2
+
3
+ require "tus/info"
4
+ require "tus/checksum"
5
+ require "tus/errors"
6
+
7
+ require "json"
8
+ require "cgi/util"
9
+
10
+ Aws.eager_autoload!(services: ["S3"])
11
+
12
+ module Tus
13
+ module Storage
14
+ class S3
15
+ MIN_PART_SIZE = 5 * 1024 * 1024
16
+
17
+ attr_reader :client, :bucket, :prefix, :upload_options
18
+
19
+ def initialize(bucket:, prefix: nil, upload_options: {}, **client_options)
20
+ resource = Aws::S3::Resource.new(**client_options)
21
+
22
+ @client = resource.client
23
+ @bucket = resource.bucket(bucket)
24
+ @prefix = prefix
25
+ @upload_options = upload_options
26
+ end
27
+
28
+ def create_file(uid, info = {})
29
+ tus_info = Tus::Info.new(info)
30
+
31
+ options = upload_options.dup
32
+ options[:content_type] = tus_info.metadata["content_type"]
33
+
34
+ if filename = tus_info.metadata["filename"]
35
+ options[:content_disposition] ||= "inline"
36
+ options[:content_disposition] += "; filename=\"#{CGI.escape(filename).gsub("+", " ")}\""
37
+ end
38
+
39
+ multipart_upload = object(uid).initiate_multipart_upload(options)
40
+
41
+ info["multipart_id"] = multipart_upload.id
42
+ info["multipart_parts"] = []
43
+ end
44
+
45
+ def concatenate(uid, part_uids, info = {})
46
+ create_file(uid, info)
47
+
48
+ multipart_upload = object(uid).multipart_upload(info["multipart_id"])
49
+
50
+ queue = Queue.new
51
+ part_uids.each_with_index do |part_uid, idx|
52
+ queue << {
53
+ copy_source: [bucket.name, object(part_uid).key].join("/"),
54
+ part_number: idx + 1
55
+ }
56
+ end
57
+
58
+ threads = 10.times.map do
59
+ Thread.new do
60
+ Thread.current.abort_on_exception = true
61
+ completed = []
62
+
63
+ begin
64
+ loop do
65
+ multipart_copy_task = queue.deq(true) rescue break
66
+
67
+ part_number = multipart_copy_task[:part_number]
68
+ copy_source = multipart_copy_task[:copy_source]
69
+
70
+ part = multipart_upload.part(part_number)
71
+ response = part.copy_from(copy_source: copy_source)
72
+
73
+ completed << {
74
+ part_number: part_number,
75
+ etag: response.copy_part_result.etag,
76
+ }
77
+ end
78
+
79
+ completed
80
+ rescue
81
+ queue.clear
82
+ raise
83
+ end
84
+ end
85
+ end
86
+
87
+ parts = threads.flat_map(&:value).sort_by { |part| part[:part_number] }
88
+
89
+ multipart_upload.complete(multipart_upload: {parts: parts})
90
+
91
+ delete(part_uids.flat_map { |part_uid| [object(part_uid), object("#{part_uid}.info")] })
92
+
93
+ info.delete("multipart_id")
94
+ info.delete("multipart_parts")
95
+ rescue
96
+ abort_multipart_upload(multipart_upload) if multipart_upload
97
+ raise
98
+ end
99
+
100
+ def patch_file(uid, io, info = {})
101
+ tus_info = Tus::Info.new(info)
102
+ last_chunk = (tus_info.length && io.size == tus_info.remaining_length)
103
+
104
+ if io.size < MIN_PART_SIZE && !last_chunk
105
+ raise Tus::Error, "Chunk size cannot be smaller than 5MB"
106
+ end
107
+
108
+ upload_id = info["multipart_id"]
109
+ part_number = info["multipart_parts"].count + 1
110
+
111
+ multipart_upload = object(uid).multipart_upload(upload_id)
112
+ multipart_part = multipart_upload.part(part_number)
113
+ md5 = Tus::Checksum.new("md5").generate(io)
114
+
115
+ begin
116
+ response = multipart_part.upload(body: io, content_md5: md5)
117
+ rescue Aws::S3::Errors::NoSuchUpload
118
+ raise Tus::NotFound
119
+ end
120
+
121
+ info["multipart_parts"] << {
122
+ "part_number" => part_number,
123
+ "etag" => response.etag[/"(.+)"/, 1],
124
+ }
125
+
126
+ # finalize the multipart upload if this chunk was the last part
127
+ if last_chunk
128
+ multipart_upload.complete(
129
+ multipart_upload: {
130
+ parts: info["multipart_parts"].map do |part|
131
+ {part_number: part["part_number"], etag: part["etag"]}
132
+ end
133
+ }
134
+ )
135
+
136
+ info.delete("multipart_id")
137
+ info.delete("multipart_parts")
138
+ end
139
+ end
140
+
141
+ def read_info(uid)
142
+ response = object("#{uid}.info").get
143
+ JSON.parse(response.body.string)
144
+ rescue Aws::S3::Errors::NoSuchKey
145
+ raise Tus::NotFound
146
+ end
147
+
148
+ def update_info(uid, info)
149
+ object("#{uid}.info").put(body: info.to_json)
150
+ end
151
+
152
+ def get_file(uid, info = {}, range: nil)
153
+ if range
154
+ range = "bytes=#{range.begin}-#{range.end}"
155
+ end
156
+
157
+ raw_chunks = Enumerator.new do |yielder|
158
+ object(uid).get(range: range) do |chunk|
159
+ yielder << chunk
160
+ chunk.clear # deallocate string
161
+ end
162
+ end
163
+
164
+ begin
165
+ first_chunk = raw_chunks.next
166
+ rescue Aws::S3::Errors::NoSuchKey
167
+ raise Tus::NotFound
168
+ end
169
+
170
+ chunks = Enumerator.new do |yielder|
171
+ yielder << first_chunk
172
+ loop { yielder << raw_chunks.next }
173
+ end
174
+
175
+ Response.new(chunks: chunks)
176
+ end
177
+
178
+ def delete_file(uid, info = {})
179
+ if info["multipart_id"]
180
+ multipart_upload = object(uid).multipart_upload(info["multipart_id"])
181
+ abort_multipart_upload(multipart_upload)
182
+
183
+ delete [object("#{uid}.info")]
184
+ else
185
+ delete [object(uid), object("#{uid}.info")]
186
+ end
187
+ end
188
+
189
+ def expire_files(expiration_date)
190
+ old_objects = bucket.objects.select do |object|
191
+ object.last_modified <= expiration_date
192
+ end
193
+
194
+ delete(old_objects)
195
+
196
+ bucket.multipart_uploads.each do |multipart_upload|
197
+ next unless multipart_upload.initiated <= expiration_date
198
+ most_recent_part = multipart_upload.parts.sort_by(&:last_modified).last
199
+ if most_recent_part.nil? || most_recent_part.last_modified <= expiration_date
200
+ abort_multipart_upload(multipart_upload)
201
+ end
202
+ end
203
+ end
204
+
205
+ private
206
+
207
+ def delete(objects)
208
+ # S3 can delete maximum of 1000 objects in a single request
209
+ objects.each_slice(1000) do |objects_batch|
210
+ delete_params = {objects: objects_batch.map { |object| {key: object.key} }}
211
+ bucket.delete_objects(delete: delete_params)
212
+ end
213
+ end
214
+
215
+ # In order to ensure the multipart upload was successfully aborted,
216
+ # we need to check whether all parts have been deleted, and retry
217
+ # the abort if the list is nonempty.
218
+ def abort_multipart_upload(multipart_upload)
219
+ loop do
220
+ multipart_upload.abort
221
+ break unless multipart_upload.parts.any?
222
+ end
223
+ rescue Aws::S3::Errors::NoSuchUpload
224
+ # multipart upload was successfully aborted or doesn't exist
225
+ end
226
+
227
+ def object(key)
228
+ bucket.object([*prefix, key].join("/"))
229
+ end
230
+
231
+ class Response
232
+ def initialize(chunks:)
233
+ @chunks = chunks
234
+ end
235
+
236
+ def each(&block)
237
+ @chunks.each(&block)
238
+ end
239
+ end
240
+ end
241
+ end
242
+ end