cos 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +12 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +13 -2
  5. data/Gemfile +4 -1
  6. data/LICENSE +191 -0
  7. data/README.md +2014 -17
  8. data/Rakefile +23 -6
  9. data/bin/cos +325 -0
  10. data/bin/setup +1 -3
  11. data/cos.gemspec +24 -13
  12. data/lib/cos.rb +41 -4
  13. data/lib/cos/api.rb +289 -0
  14. data/lib/cos/bucket.rb +731 -0
  15. data/lib/cos/checkpoint.rb +62 -0
  16. data/lib/cos/client.rb +58 -0
  17. data/lib/cos/config.rb +102 -0
  18. data/lib/cos/dir.rb +301 -0
  19. data/lib/cos/download.rb +252 -0
  20. data/lib/cos/exception.rb +62 -0
  21. data/lib/cos/file.rb +152 -0
  22. data/lib/cos/http.rb +95 -0
  23. data/lib/cos/logging.rb +47 -0
  24. data/lib/cos/resource.rb +201 -0
  25. data/lib/cos/signature.rb +119 -0
  26. data/lib/cos/slice.rb +292 -0
  27. data/lib/cos/struct.rb +49 -0
  28. data/lib/cos/tree.rb +165 -0
  29. data/lib/cos/util.rb +82 -0
  30. data/lib/cos/version.rb +2 -2
  31. data/spec/cos/bucket_spec.rb +562 -0
  32. data/spec/cos/client_spec.rb +77 -0
  33. data/spec/cos/dir_spec.rb +195 -0
  34. data/spec/cos/download_spec.rb +105 -0
  35. data/spec/cos/http_spec.rb +70 -0
  36. data/spec/cos/signature_spec.rb +83 -0
  37. data/spec/cos/slice_spec.rb +302 -0
  38. data/spec/cos/struct_spec.rb +38 -0
  39. data/spec/cos/tree_spec.rb +322 -0
  40. data/spec/cos/util_spec.rb +106 -0
  41. data/test/download_test.rb +44 -0
  42. data/test/list_test.rb +43 -0
  43. data/test/upload_test.rb +48 -0
  44. metadata +132 -21
  45. data/.idea/.name +0 -1
  46. data/.idea/cos.iml +0 -49
  47. data/.idea/encodings.xml +0 -6
  48. data/.idea/misc.xml +0 -14
  49. data/.idea/modules.xml +0 -8
  50. data/.idea/workspace.xml +0 -465
  51. data/bin/console +0 -14
@@ -0,0 +1,252 @@
1
+ # coding: utf-8
2
+
3
+ module COS
4
+
5
+ # 大文件分片下载, 支持断点续传, 支持多线程
6
+ # Range Headers support in HTTP1.1(rfc2616)
7
+ class Download < Checkpoint
8
+
9
+ include Logging
10
+
11
+ # 默认分块大小
12
+ PART_SIZE = 5 * 1024 * 1024
13
+
14
+ # 默认文件读取大小
15
+ READ_SIZE = 16 * 1024
16
+
17
+ required_attrs :bucket, :cos_file, :file_store, :options
18
+ optional_attrs :progress
19
+
20
+ attr_accessor :cpt_file, :session
21
+
22
+ def initialize(opts = {})
23
+ super(opts)
24
+
25
+ @cpt_file = options[:cpt_file] || "#{File.expand_path(file_store)}.cpt"
26
+ end
27
+
28
+ # 开始下载
29
+ def download
30
+ logger.info("Begin download, file: #{file_store}, threads: #{@num_threads}")
31
+
32
+ # 重建断点续传
33
+ rebuild
34
+
35
+ # 文件分片
36
+ divide_parts if @parts.empty?
37
+
38
+ # 未完成的片段
39
+ @todo_parts = @parts.reject { |p| p[:done] }
40
+
41
+ # 多线程下载
42
+ (1..@num_threads).map do
43
+ logger.debug("#{@num_threads} Threads Downloads")
44
+
45
+ Thread.new do
46
+ logger.debug("Create Thread #{Thread.current.object_id}")
47
+
48
+ loop do
49
+ # 获取下一个未下载的片段
50
+ p = sync_get_todo_part
51
+ break unless p
52
+
53
+ # 下载片段
54
+ download_part(p)
55
+ end
56
+ end
57
+ end.map(&:join)
58
+
59
+ # 完成下载, 合并文件
60
+ complete
61
+
62
+ unless finish?
63
+ File.delete(file_store) if File.exist?(file_store)
64
+ raise DownloadError, 'File downloaded sha1 not match, deleted!'
65
+ end
66
+ end
67
+
68
+ # 断点续传状态记录
69
+ # @example
70
+ # states = {
71
+ # :session => 'session',
72
+ # :file => 'file',
73
+ # :file_meta => {
74
+ # :sha1 => 'file sha1',
75
+ # :size => 10000,
76
+ # },
77
+ # :parts => [
78
+ # {:number => 1, :range => [0, 100], :done => false},
79
+ # {:number => 2, :range => [100, 200], :done => true}
80
+ # ],
81
+ # :sha1 => 'checkpoint file sha1'
82
+ # }
83
+ def checkpoint
84
+ logger.debug("Make checkpoint, options[:disable_cpt]: #{options[:disable_cpt] == true}")
85
+
86
+ parts = sync_get_all_parts
87
+ states = {
88
+ :session => session,
89
+ :file => file_store,
90
+ :file_meta => @file_meta,
91
+ :parts => parts
92
+ }
93
+
94
+ done = parts.count { |p| p[:done] }
95
+
96
+ # 下载进度回调
97
+ if progress
98
+ if done == 0 or parts.count == 0
99
+ progress.call(0.to_f)
100
+ else
101
+ percent = done.to_f / parts.size
102
+ progress.call(percent > 1 ? 1.to_f : percent)
103
+ end
104
+ end
105
+
106
+ write_checkpoint(states, cpt_file) unless options[:disable_cpt]
107
+
108
+ logger.debug("Download Parts #{done}/#{parts.size}")
109
+ end
110
+
111
+ private
112
+
113
+ # 是否完成下载并比对sha1
114
+ def finish?
115
+ @file_meta[:sha1].downcase == Util.file_sha1(@file_store)
116
+ end
117
+
118
+ def complete
119
+ # 返回100%的进度
120
+ progress.call(1.to_f) if progress
121
+
122
+ # 获取全部的分块
123
+ parts = sync_get_all_parts
124
+
125
+ # 合并分块文件
126
+ File.open(@file_store, 'w') do |w|
127
+ # 排序组合文件
128
+ parts.sort{ |x, y| x[:number] <=> y[:number] }.each do |p|
129
+ File.open(get_part_file(p)) do |r|
130
+ w.write(r.read(READ_SIZE)) until r.eof?
131
+ end
132
+ end
133
+ end
134
+
135
+ # 下载完成, 删除checkpoint文件
136
+ File.delete(cpt_file) unless options[:disable_cpt]
137
+ # 删除分块文件
138
+ parts.each{ |p| File.delete(get_part_file(p)) }
139
+
140
+ logger.info("Done download, file: #{@file_store}")
141
+ end
142
+
143
+ # 断点续传文件重建
144
+ def rebuild
145
+ logger.info("Begin rebuild session, checkpoint: #{cpt_file}")
146
+
147
+ # 是否启用断点续传并且记录文件存在
148
+ if options[:disable_cpt] || !File.exist?(cpt_file)
149
+ # 初始化
150
+ initiate
151
+ else
152
+ # 加载断点续传
153
+ states = load_checkpoint(cpt_file)
154
+
155
+ @session = states[:session]
156
+ @file_meta = states[:file_meta]
157
+ @parts = states[:parts]
158
+ end
159
+
160
+ logger.info("Done rebuild session, Parts: #{@parts.count}")
161
+ end
162
+
163
+ def initiate
164
+ logger.info('Begin initiate session')
165
+
166
+ @session = "#{cos_file.bucket.bucket_name}-#{cos_file.path}-#{Time.now.to_i}"
167
+
168
+ @file_meta = {
169
+ :sha1 => cos_file.sha,
170
+ :size => cos_file.filesize
171
+ }
172
+
173
+ # 保存断点
174
+ checkpoint
175
+
176
+ logger.info("Done initiate session: #{@session}")
177
+ end
178
+
179
+ # 下载片段
180
+ def download_part(p)
181
+ logger.debug("Begin download slice: #{p}")
182
+
183
+ part_file = get_part_file(p)
184
+
185
+ url = cos_file.url
186
+
187
+ # 下载
188
+ # Range:bytes=0-11
189
+ bucket.client.api.download(
190
+ url,
191
+ part_file,
192
+ headers: {Range: "bytes=#{p[:range].at(0)}-#{p[:range].at(1) - 1}"},
193
+ bucket: bucket.bucket_name
194
+ )
195
+
196
+ sync_update_part(p.merge(done: true))
197
+
198
+ checkpoint
199
+
200
+ logger.debug("Done download part: #{p}")
201
+ end
202
+
203
+ # 文件片段拆分
204
+ def divide_parts
205
+ logger.info("Begin divide parts, file: #{file_store}")
206
+
207
+ object_size = @file_meta[:size]
208
+ part_size = @options[:part_size] || PART_SIZE
209
+ num_parts = (object_size - 1) / part_size + 1
210
+
211
+ @parts = (1..num_parts).map do |i|
212
+ {
213
+ :number => i,
214
+ :range => [(i - 1) * part_size, [i * part_size, object_size].min],
215
+ :done => false
216
+ }
217
+ end
218
+
219
+ checkpoint
220
+
221
+ logger.info("Done divide parts, parts: #{@parts.size}")
222
+ end
223
+
224
+ # 同步获取下一片段
225
+ def sync_get_todo_part
226
+ @todo_mutex.synchronize {
227
+ @todo_parts.shift
228
+ }
229
+ end
230
+
231
+ # 同步更新片段
232
+ def sync_update_part(p)
233
+ @all_mutex.synchronize {
234
+ @parts[p[:number] - 1] = p
235
+ }
236
+ end
237
+
238
+ # 同步获取所有片段
239
+ def sync_get_all_parts
240
+ @all_mutex.synchronize {
241
+ @parts.dup
242
+ }
243
+ end
244
+
245
+ # 获取分块文件名
246
+ def get_part_file(p)
247
+ "#{@file_store}.part.#{p[:number]}"
248
+ end
249
+
250
+ end
251
+
252
+ end
@@ -0,0 +1,62 @@
1
+ # coding: utf-8
2
+
3
+ require 'json'
4
+
5
+ module COS
6
+
7
+ # 异常基类
8
+ class Exception < RuntimeError; end
9
+
10
+ # 服务端返回异常
11
+ # Code: -166, Message: 索引不存在, HttpCode: 400
12
+ # Code: -173, Message: 目录非空, HttpCode: 400
13
+ # Code: -180, Message: 非法路径, HttpCode: 400
14
+ # Code: -288, Message: process打包失败, HttpCode: 400
15
+ # Code: -4018, Message: 相同文件已上传过, HttpCode: 400
16
+ # Code: -5997, Message: 后端网络错误, HttpCode: 400
17
+ # Code: -5999, Message: 参数错误, HttpCode: 400
18
+ class ServerError < Exception
19
+
20
+ attr_reader :response, :http_code, :error_code, :message
21
+
22
+ def initialize(response)
23
+ @response = response
24
+ resp_obj = JSON.parse(response.body)
25
+
26
+ @error_code = resp_obj['code']
27
+ @message = resp_obj['message']
28
+ @http_code = response.code
29
+ end
30
+
31
+ def message
32
+ @message || "UnknownError[#{http_code}]."
33
+ end
34
+
35
+ def to_s
36
+ "ServerError Code: #{error_code}, Message: #{message}, HttpCode: #{http_code}"
37
+ end
38
+
39
+ end
40
+
41
+ # 参数错误
42
+ class AttrError < Exception; end
43
+
44
+ # 客户端错误
45
+ class ClientError < Exception; end
46
+
47
+ # 文件不一致
48
+ class FileInconsistentError < Exception; end
49
+
50
+ # 断点续传记录损坏
51
+ class CheckpointBrokenError < Exception; end
52
+
53
+ # 下载错误
54
+ class DownloadError < Exception; end
55
+
56
+ # 文件上传未完成
57
+ class FileUploadNotComplete < Exception; end
58
+
59
+ # 本地目录不存在
60
+ class LocalPathNotExist < Exception; end
61
+
62
+ end
@@ -0,0 +1,152 @@
1
+ # coding: utf-8
2
+
3
+ module COS
4
+
5
+ # COS文件资源
6
+ class COSFile < ResourceOperator
7
+
8
+ STORAGE_UNITS = %w[B KB MB GB]
9
+ STORAGE_BASE = 1024
10
+
11
+ # 初始化
12
+ #
13
+ # @param [Hash] attrs 参数
14
+ # @option attrs [Bucket] :bucket COS::Bucket对象
15
+ # @option attrs [String] :path 存储路径
16
+ # @option attrs [String] :name 文件名
17
+ # @option attrs [String] :ctime 创建时间unix时间戳
18
+ # @option attrs [String] :mtime 修改时间unix时间戳
19
+ # @option attrs [String] :biz_attr 业务信息
20
+ # @option attrs [String] :filesize 文件存储大小
21
+ # @option attrs [String] :filelen 文件大小
22
+ # @option attrs [String] :sha 文件sha1值
23
+ # @option attrs [String] :access_url 文件访问地址
24
+ #
25
+ # @raise [AttrError] 缺少参数
26
+ #
27
+ # @return [COS::COSFile]
28
+ def initialize(attrs = {})
29
+ super(attrs)
30
+ @type = 'file'
31
+ end
32
+
33
+ # 获取文件存储大小并转为数值型
34
+ #
35
+ # @return [Integer] 文件存储大小
36
+ def filesize
37
+ @filesize.to_i
38
+ end
39
+
40
+ # 获取文件大小并转为数值型
41
+ #
42
+ # @return [Integer] 文件大小
43
+ def filelen
44
+ @filelen.to_i
45
+ end
46
+
47
+ # 判断文件sha1是否一致
48
+ #
49
+ # @return [Boolean] 是否一致
50
+ def sha1_match?(file)
51
+ file = File.expand_path(file)
52
+ File.exist?(file) and sha.upcase == Util.file_sha1(file).upcase
53
+ end
54
+
55
+ # 文件大小
56
+ #
57
+ # @alias filesize
58
+ def size
59
+ filesize.to_i
60
+ end
61
+
62
+ # 获取格式化的文件大小
63
+ #
64
+ # @example
65
+ # 1B 1KB 1.1MB 1.12GB...
66
+ #
67
+ # @return [String]
68
+ def format_size
69
+ if filesize.to_i < STORAGE_BASE
70
+ size_str = filesize.to_s + STORAGE_UNITS[0]
71
+ else
72
+ c_size = human_rep(filesize.to_i)
73
+ size_str = "%.2f" % c_size[:size].round(2)
74
+ size_str = "#{size_str}#{c_size[:unit]}"
75
+ end
76
+
77
+ size_str
78
+ end
79
+
80
+ # 文件是否完整, 是否上传完了
81
+ #
82
+ # @return [Boolean] 是否完整
83
+ def complete?
84
+ access_url != nil and filelen == filesize
85
+ end
86
+
87
+ # 获取文件的URL, 支持cname, https
88
+ #
89
+ # @note 私有读取的bucket会自动生成带签名的URL
90
+ #
91
+ # @param options [Hash] 高级参数
92
+ # @option options [String] :cname 在cos控制台设置的cname域名
93
+ # @option options [Boolean] :https 是否生成https的URL
94
+ # @option options [Integer] :expire_seconds 签名有效时间(秒,私有读取bucket时需要)
95
+ #
96
+ # @raise [ServerError] 服务端异常返回
97
+ #
98
+ # @return [String] 文件访问URL
99
+ def url(options = {})
100
+ bucket.url(self, options)
101
+ end
102
+
103
+ # 下载当前文件, 支持断点续传, 支持多线程
104
+ #
105
+ # @param file_store [String] 本地文件存储路径
106
+ # @param options [Hash] 高级参数
107
+ # @option options [Integer] :min_slice_size 完整下载最小文件大小,
108
+ # 超过此大小将会使用分片多线程断点续传
109
+ # @option options [Integer] :download_retry 下载重试次数, 默认10
110
+ # @option options [Boolean] :disable_cpt 是否禁用checkpoint功能,如
111
+ # 果设置为true,则在下载的过程中不会写checkpoint文件,这意味着
112
+ # 下载失败后不能断点续传,而只能重新下载整个文件。如果这个值为
113
+ # true,则:cpt_file会被忽略。
114
+ # @option options [Integer] :threads 多线程下载线程数, 默认为10
115
+ # @option options [Integer] :slice_size 设置分片下载时每个分片的大小
116
+ # 默认为5 MB。
117
+ # @option options [String] :cpt_file 断点续传的checkpoint文件,如果
118
+ # 指定的cpt文件不存在,则会在file所在目录创建一个默认的cpt文件,
119
+ # 命名方式为:file.cpt,其中file是用户要下载的文件名。在下载的过
120
+ # 程中会不断更新此文件,成功完成下载后会删除此文件;如果指定的
121
+ # cpt文件已存在,则从cpt文件中记录的点继续下载。
122
+ #
123
+ # @yield [Float] 下载进度百分比回调, 进度值是一个0-1之间的小数
124
+ #
125
+ # @raise [ServerError] 服务端异常返回
126
+ #
127
+ # @return [String]
128
+ #
129
+ # @see Bucket#download
130
+ def download(file_store, options = {}, &block)
131
+ bucket.download(self, file_store, options, &block)
132
+ end
133
+
134
+ private
135
+
136
+ # 计算文件大小格式化单位
137
+ def human_rep(bytes)
138
+ number = Float(bytes)
139
+ max_exp = STORAGE_UNITS.size - 1
140
+
141
+ exponent = (Math.log(bytes) / Math.log(STORAGE_BASE)).to_i
142
+ exponent = max_exp if exponent > max_exp
143
+
144
+ number /= STORAGE_BASE ** exponent
145
+ unit = STORAGE_UNITS[exponent]
146
+
147
+ { size: number, unit: unit }
148
+ end
149
+
150
+ end
151
+
152
+ end