backup-backblaze 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 705a6fd53f7173dc6f8209bd8dd889ad49098f43
4
- data.tar.gz: f633db6bde19d0cba7c95cd84b49225f0fb74647
3
+ metadata.gz: ca99ce6c6832000797f8f2aafc05268d7fafc4cd
4
+ data.tar.gz: 225aa2e2d4cccbdfda851e48b3c2b59693e98ff2
5
5
  SHA512:
6
- metadata.gz: 6a945e4dc4597f54f5b15056f6cbb5edd6e0f4494f3412951d47dee96aab2c0d173a97b36d1992d36d94302cd26b8c51504845ac97b12ed01588219cbf0f02b6
7
- data.tar.gz: a6864d6aa9d249b432c4e0ed61f778434ae61ae3f4e6eca2b01319a81b3390c9e7473ebbe63fc6c64020d3c123690fe7aebba9eb17e0756513af85ae3b799906
6
+ metadata.gz: f0eef28fb429db54d39d02c1a382a1d19adb73467566525c77457140f01d6a8605c34d29ceead07aee0ccd3401a5ebca2936514517477a338473fe2382362425
7
+ data.tar.gz: 243ad8802135d8a102698608d1f5fffb822c99f6aa44c3291935a38d0b09c5030bd4be4fd3780447f63e567d645a491a72061febcf145de959da74ba485777b9
data/README.md CHANGED
@@ -47,6 +47,8 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
47
47
 
48
48
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
49
49
 
50
+ If you need to change the http api retry sequences, you'll need to install SWI-Prolog from http://www.swi-prolog.org/Download.html
51
+
50
52
  ## Contributing
51
53
 
52
54
  Bug reports and pull requests are welcome on GitHub at https://github.com/djellemah/backup-backblaze.
data/Rakefile CHANGED
@@ -3,4 +3,28 @@ require "rspec/core/rake_task"
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task :default => [:spec, :generate_retry]
7
+
8
+ task :spec => :generate_retry
9
+ task :build => :generate_retry
10
+
11
+ desc 'Generate the retry_lookup.rb file from prolog source'
12
+ task :generate_retry => FileList['lib/backup/backblaze/retry_lookup.rb']
13
+
14
+ SWIPL = 'swipl'
15
+ SWIPL_VERSION = '7.4.2'
16
+
17
+ def chk_swipl_version
18
+ version = `#{SWIPL} --version`
19
+ version =~ /SWI-Prolog version (7.\d+.\d+)/
20
+ raise unless $1 >= SWIPL_VERSION
21
+ rescue Errno::ENOENT, RuntimeError
22
+ puts "#{SWIPL} >= #{SWIPL_VERSION} not found on PATH. Install SWI-Prolog version >= #{SWIPL_VERSION} from http://www.swi-prolog.org/Download.html"
23
+ exit 1
24
+ end
25
+
26
+ file 'lib/backup/backblaze/retry_lookup.rb' => %w[src/retry_lookup.erb src/retry.pl] do |task|
27
+ puts "building #{task} from #{task.source} prolog"
28
+ chk_swipl_version
29
+ sh "erb -T- #{task.source} >#{task}"
30
+ end
@@ -2,6 +2,9 @@ require 'backup'
2
2
 
3
3
  module Backup
4
4
  module Backblaze
5
+ TEST_HEADERS = {}
6
+ # uncomment for testing
7
+ # TEST_HEADERS = {'X-Bz-Test-Mode' => ['fail_some_uploads', 'expire_some_account_authorization_tokens', 'force_cap_exceeded']}
5
8
  end
6
9
  end
7
10
 
@@ -1,8 +1,5 @@
1
- require 'base64'
2
- require 'excon'
3
- require 'json'
4
-
5
1
  require_relative 'hash_wrap'
2
+ require_relative 'api_importer'
6
3
 
7
4
  module Backup
8
5
  module Backblaze
@@ -13,78 +10,64 @@ module Backup
13
10
  auth!
14
11
  end
15
12
 
16
- attr_reader :account_id, :app_key, :body
13
+ attr_reader :account_id, :app_key, :body_wrap
17
14
 
18
15
  class NotFound < RuntimeError; end
19
16
 
17
+ extend ApiImporter
18
+
19
+ import_endpoint :b2_authorize_account do |fn|
20
+ # @body_wrap will be a Hashwrap
21
+ # have to set this here for retry-sequence to work properly
22
+ @body_wrap = fn[account_id, app_key]
23
+ end
24
+
25
+ # This can be called by retry paths for various api calls. So it might end
26
+ # up needing synchronisation of some kind.
20
27
  def auth!
21
28
  # first call b2_authorize_account to get an account_auth_token
22
- encoded = Base64.strict_encode64 "#{account_id}:#{app_key}"
23
- rsp = Excon.get \
24
- 'https://api.backblazeb2.com/b2api/v1/b2_authorize_account',
25
- headers: {'Authorization' => "Basic #{encoded}"},
26
- expects: 200
27
-
28
29
  # this has to stick around because it has various important data
29
- @body = HashWrap.from_json rsp.body
30
+ b2_authorize_account
30
31
 
31
- unless body.allowed.capabilities.include? 'writeFiles'
32
+ unless body_wrap.allowed.capabilities.include? 'writeFiles'
32
33
  raise "app_key #{app_key} does not have write access to account #{account_id}"
33
34
  end
34
35
  end
35
36
 
36
37
  def auth_headers
37
- {headers: {'Authorization' => authorization_token}}
38
+ Hash headers: {
39
+ 'Authorization' => authorization_token,
40
+ }.merge(TEST_HEADERS)
38
41
  end
39
42
 
40
43
  def api_url
41
- body.apiUrl or raise NotFound, 'apiUrl'
44
+ body_wrap.apiUrl or raise NotFound, 'apiUrl'
42
45
  end
43
46
 
44
47
  def authorization_token
45
- body.authorizationToken or raise NotFound, 'authorizationToken'
48
+ body_wrap.authorizationToken or raise NotFound, 'authorizationToken'
46
49
  end
47
50
 
48
51
  def minimum_part_size
49
52
  # why b2 has this as well as minimumPartSize ¯\_(ツ)_/¯
50
- body.absoluteMinimumPartSize
53
+ body_wrap.absoluteMinimumPartSize
51
54
  end
52
55
 
53
56
  def recommended_part_size
54
- body.recommendedPartSize
57
+ body_wrap.recommendedPartSize
55
58
  end
56
59
 
57
60
  # The following is leaning towards Bucket.new account, bucket_id/bucket_name
58
-
59
- # returns [upload_url, auth_token]
60
- # Several files can be uploaded to one url.
61
- # But uploading files in parallel requires one upload url per thread.
62
- def upload_url bucket_id:
63
- # get the upload url for a specific bucket id. Buckets can be named.
64
- body = {bucketId: bucket_id }
65
- rsp = Excon.post \
66
- "#{api_url}/b2api/v1/b2_get_upload_url",
67
- **auth_headers,
68
- body: body.to_json,
69
- expects: 200
70
-
71
- hw = HashWrap.from_json rsp.body
72
- return hw.uploadUrl, hw.authorizationToken
61
+ # body is a hash of string => string
62
+ import_endpoint :b2_list_buckets do |fn, body|
63
+ body_wrap = fn[api_url, auth_headers, body]
73
64
  end
74
65
 
75
66
  # return id for given name, or nil if no such named bucket
76
67
  def bucket_id bucket_name:
77
- rsp = Excon.post \
78
- "#{api_url}/b2api/v1/b2_list_buckets",
79
- **auth_headers,
80
- body: {bucketName: bucket_name, accountId: account_id}.to_json,
81
- expects: 200
82
-
83
- buckets = (JSON.parse rsp.body)['buckets']
84
- found = buckets.find do |ha|
85
- ha['bucketName'] == bucket_name
86
- end
87
- found&.dig 'bucketId' or raise NotFound, "no bucket named #{bucket_name}"
68
+ buckets = b2_list_buckets(bucketName: bucket_name, accountId: account_id).buckets
69
+ found = buckets.find{|hw| hw.bucketName == bucket_name}
70
+ found&.bucketId or raise NotFound, "no bucket named #{bucket_name}"
88
71
  end
89
72
 
90
73
  # Hurhur
@@ -92,68 +75,41 @@ module Backup
92
75
  b2_list_buckets bucketId: bucket_id, accountId: account_id
93
76
  end
94
77
 
95
- def b2_list_buckets body
96
- rsp = Excon.post \
97
- "#{api_url}/b2api/v1/b2_list_buckets",
98
- **auth_headers,
99
- body: body.select{|_,v|v}.to_json,
100
- expects: 200
101
-
102
- HashWrap.from_json rsp
78
+ import_endpoint :b2_list_file_names do |fn, body|
79
+ fn[api_url, auth_headers, body]
103
80
  end
104
81
 
105
82
  # This might be dangerous because large number of file names might come back.
106
83
  # But I'm not worrying about that now. Maybe later. Anyway, that's what
107
84
  # nextFile and startFile are for.
108
85
  def files bucket_name
109
- rsp = Excon.post \
110
- "#{api_url}/b2api/v1/b2_list_file_names",
111
- **auth_headers,
112
- body: {bucketId: (bucket_id bucket_name: bucket_name)}.to_json,
113
- expects: 200
114
-
86
+ body_wrap = b2_list_file_names bucketId: (bucket_id bucket_name: bucket_name)
115
87
  # ignoring the top-level {files:, nextFileName:} structure
116
- files_hash = (JSON.parse rsp.body)['files']
117
-
118
- # ignoring the top-level {files:, nextFileName:} structure
119
- files_hash.map do |file_info_hash|
120
- HashWrap.new file_info_hash
121
- end
88
+ body_wrap.files
122
89
  end
123
90
 
124
91
  # This is mostly used to get a fileId for a given fileName
125
92
  def file_info bucket_name, filename
126
- # It's too much of a PITA to make this Excon call in only one place
127
- rsp = Excon.post \
128
- "#{api_url}/b2api/v1/b2_list_file_names",
129
- **auth_headers,
130
- body: {bucketId: (bucket_id bucket_name: bucket_name), maxFileCount: 1, startFileName: filename}.to_json,
131
- expects: 200
132
-
133
- files_hash = (JSON.parse rsp.body)['files']
134
-
93
+ body_wrap = b2_list_file_names bucketId: (bucket_id bucket_name: bucket_name), maxFileCount: 1, startFileName: filename
94
+ files_hash = body_wrap.files
135
95
  raise NotFound, "#{filename} not found" unless files_hash.size == 1
136
-
137
- HashWrap.new files_hash.first
96
+ files_hash.first
138
97
  end
139
98
 
140
99
  # delete the named file in the named bucket
141
- # https://www.backblaze.com/b2/docs/b2_delete_file_version.html
100
+ import_endpoint :b2_delete_file_version do |fn, body|
101
+ fn[api_url, auth_headers, body]
102
+ end
103
+
142
104
  def delete_file bucket_name, filename
143
105
  # lookup fileId from given filename
144
106
  info = file_info bucket_name, filename
145
-
146
- # delete the fileId
147
- Excon.post \
148
- "#{api_url}/b2api/v1/b2_delete_file_version",
149
- **auth_headers,
150
- body: {fileName: filename, fileId: info.fileId}.to_json,
151
- expects: 200
107
+ body_wrap = b2_delete_file_version fileId: info.fileId, fileName: filename
152
108
 
153
109
  # ignore 400 with body containing "code": "file_not_present"
154
110
  rescue Excon::Errors::BadRequest => ex
155
- hw = HashWrap.from_json ex.response.body
156
- raise unless hw.code == 'file_not_present'
111
+ body_wrap = HashWrap.from_json ex.response.body
112
+ raise unless body_wrap.code == 'file_not_present'
157
113
  end
158
114
  end
159
115
  end
@@ -0,0 +1,93 @@
1
+ require 'excon'
2
+ require 'base64'
3
+
4
+ require_relative 'hash_wrap'
5
+ require_relative 'retry_lookup'
6
+ require_relative 'retry'
7
+ require_relative 'http'
8
+
9
+ module Backup
10
+ module Backblaze
11
+ # This is quite complicated and needs some explanation. API retry rules as
12
+ # defined by Backblaze are not simple. See RetryLookup.retry_sequence for a
13
+ # cross-product of all the rules :-O
14
+ #
15
+ # Some failures require a call to another api endpoint to retry. Some can
16
+ # backup by two or more calls to other api endpoints. So we can't just use,
17
+ # say, Excon's retry facility. Also, backblaze sends back a Retry-After
18
+ # value in some cases, which we ought to respect. Excon's built-in retry
19
+ # can't do that.
20
+ #
21
+ # So to handle that, any class that wants to use the retries must define
22
+ # methods with the same names as the symbols in retry_sequence.
23
+ #
24
+ # import_endpoint is an easifying method to help with that. Parameters will
25
+ # be unchanged between retries, but whatever happens in the body of an
26
+ # import_endpoint declaration will be re-evaluated on each retry.
27
+ #
28
+ # Also note that, the upload_xxx calls do not actually exist - they use urls
29
+ # that are returned by calls to get_upload_xxx. For example, there isn't an
30
+ # actual api endpoint b2_upload_file. We just kinda pretend there is to make
31
+ # the retry_sequence lookup work.
32
+ module ApiImporter
33
+ # define a method on the calling instance that hooks into our
34
+ # call retry logic.
35
+ #
36
+ # - callable is either a Method, or a symbol for a method in Http
37
+ def import_endpoint callable, &bind_blk
38
+ callable_thing, callable_name = case callable
39
+ when Symbol
40
+ [(Http.method callable), callable]
41
+ when Method
42
+ [callable, callable.name]
43
+ else
44
+ raise "dunno what to do with #{callable.inspect}"
45
+ end
46
+
47
+ # Warn about missing endpoint dependencies. Code paths with retry are
48
+ # not very likely to be executed. So a warning that they might not work
49
+ # is useful.
50
+ chunks = caller.chunk_while{|l| l !~ /#{__FILE__}.*#{__method__}/}.to_a
51
+ caller_location = chunks.last.first
52
+
53
+ Backup::Backblaze::RetryLookup.retry_dependencies[callable_name].each do |dependency_method|
54
+ begin
55
+ m = instance_method dependency_method
56
+ if m.arity == 0
57
+ warn "#{caller_location} #{self.name}##{dependency_method} required by #{callable} must have at least one argument (retries)"
58
+ end
59
+ rescue NameError
60
+ warn "#{caller_location} #{self.name}##{dependency_method} required by #{callable} but it was not found"
61
+ end
62
+ end
63
+
64
+ # Define the api method on the class, mainly so we end with an instance
65
+ # method we can call using the symbols in the retry_sequence.
66
+ # define_method callable_name do |*args, retries: 0, backoff: nil|
67
+ define_method callable_name do |*args, retries: 0, backoff: nil, **kwargs|
68
+ begin
69
+ # initiate retries
70
+ Retry.call retries, backoff, callable_name do
71
+ # Execute bind_blk in the context of self, and pass it the
72
+ # callable_thing along with the args. bind_blk must then call
73
+ # callable_thing with whatever arguments it needs.
74
+ # bind_blk can also deal with the return values from callable_thing
75
+ instance_exec callable_thing, *args, **kwargs, &bind_blk
76
+ end
77
+ rescue Retry::RetrySequence => retry_sequence
78
+ retry_sequence.reduce nil do |_rv, method_name|
79
+ if method_name == callable_name
80
+ # we assume that methods with the same name as the original can
81
+ # receive the same set of arguments as specified in the original
82
+ # call.
83
+ send method_name, *args, retries: retries + 1, backoff: retry_sequence.backoff
84
+ else
85
+ send method_name, retries: retries + 1, backoff: retry_sequence.backoff
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -1,12 +1,8 @@
1
- require 'excon'
2
- require 'base64'
3
- require 'json'
4
1
  require 'pathname'
5
2
 
6
3
  require_relative 'upload_file.rb'
7
4
  require_relative 'upload_large_file.rb'
8
5
  require_relative 'account.rb'
9
- require_relative 'retry.rb'
10
6
 
11
7
  require 'backup/storage/base'
12
8
 
@@ -67,26 +63,18 @@ module Backup
67
63
 
68
64
  upload =
69
65
  if src_pathname.size > working_part_size * 2.5 || src_pathname.size > 5 * 10**9
70
- Logger.info "Storing Large '#{dst}'"
71
- ::Backup::Backblaze::UploadLargeFile.new \
66
+ Backblaze::UploadLargeFile.new \
72
67
  src: src_pathname,
73
68
  dst: dst,
74
- authorization_token: account.authorization_token,
75
- url: account.api_url,
69
+ account: account,
76
70
  part_size: working_part_size,
77
71
  bucket_id: bucket_id
78
72
  else
79
- Logger.info "Storing '#{dst}'"
80
-
81
- # TODO could upload several files in parallel with several of these token_provider
82
- token_provider = ::Backup::Backblaze::Retry::TokenProvider.new do
83
- account.upload_url bucket_id: bucket_id
84
- end
85
-
86
- ::Backup::Backblaze::UploadFile.new \
73
+ Backblaze::UploadFile.new \
87
74
  src: src_pathname.to_s,
88
75
  dst: dst,
89
- token_provider: token_provider
76
+ account: account,
77
+ bucket_id: bucket_id
90
78
  end
91
79
 
92
80
  hash_wrap = upload.call
@@ -116,7 +104,7 @@ module Backup
116
104
  @account ||= begin
117
105
  account_deets = {account_id: account_id}
118
106
  Logger.info "Account login for #{account_deets.inspect}"
119
- ::Backup::Backblaze::Account.new account_id: account_id, app_key: app_key
107
+ Backblaze::Account.new account_id: account_id, app_key: app_key
120
108
  end
121
109
  end
122
110
  end
@@ -1,3 +1,5 @@
1
+ require 'json'
2
+
1
3
  module Backup
2
4
  module Backblaze
3
5
  # Intended as a quick-n-dirty way to deep-wrap json objects.
@@ -0,0 +1,132 @@
1
+ module Backup
2
+ module Backblaze
3
+ module Http
4
+ module_function def b2_authorize_account account_id, app_key
5
+ encoded = Base64.strict_encode64 "#{account_id}:#{app_key}"
6
+ rsp = Excon.get \
7
+ 'https://api.backblazeb2.com/b2api/v1/b2_authorize_account',
8
+ headers: {'Authorization' => "Basic #{encoded}"},
9
+ expects: 200
10
+ HashWrap.from_json rsp.body
11
+ end
12
+
13
+ module_function def b2_get_upload_url api_url, auth_headers, bucket_id
14
+ rsp = Excon.post \
15
+ "#{api_url}/b2api/v1/b2_get_upload_url",
16
+ headers: auth_headers,
17
+ body: {bucketId: bucket_id}.to_json,
18
+ expects: 200
19
+ HashWrap.from_json rsp.body
20
+ end
21
+
22
+ # upload with incorrect sha1 responds with
23
+ #
24
+ # {"code"=>"bad_request", "message"=>"Sha1 did not match data received", "status"=>400}
25
+ #
26
+ # Normal response
27
+ #
28
+ #{"accountId"=>"d765e276730e",
29
+ # "action"=>"upload",
30
+ # "bucketId"=>"dd8786b5eef2c7d66743001e",
31
+ # "contentLength"=>6144,
32
+ # "contentSha1"=>"5ba6cf1b3b3a088d73941052f60e78baf05d91fd",
33
+ # "contentType"=>"application/octet-stream",
34
+ # "fileId"=>"4_zdd8786b5eef2c7d66743001e_f1096f3027e0b1927_d20180725_m115148_c002_v0001095_t0047",
35
+ # "fileInfo"=>{"src_last_modified_millis"=>"1532503455580"},
36
+ # "fileName"=>"test_file",
37
+ # "uploadTimestamp"=>1532519508000}
38
+ module_function def b2_upload_file src, headers, url_token
39
+ rsp = Excon.post \
40
+ url_token.url,
41
+ headers: (headers.merge 'Authorization' => url_token.auth),
42
+ body: (File.read src),
43
+ expects: 200
44
+ HashWrap.from_json rsp.body
45
+ end
46
+
47
+ module_function def b2_list_buckets api_url, auth_headers, body
48
+ rsp = Excon.post \
49
+ "#{api_url}/b2api/v1/b2_list_buckets",
50
+ **auth_headers,
51
+ body: body.to_json,
52
+ expects: 200
53
+ HashWrap.from_json rsp.body
54
+ end
55
+
56
+ module_function def b2_list_file_names api_url, auth_headers, body
57
+ rsp = Excon.post \
58
+ "#{api_url}/b2api/v1/b2_list_file_names",
59
+ **auth_headers,
60
+ body: body.to_json,
61
+ expects: 200
62
+ HashWrap.from_json rsp.body
63
+ end
64
+
65
+ # delete the fileId
66
+ module_function def b2_delete_file_version api_url, auth_headers, body
67
+ rsp = Excon.post \
68
+ "#{api_url}/b2api/v1/b2_delete_file_version",
69
+ **auth_headers,
70
+ body: body.to_json,
71
+ expects: 200
72
+ HashWrap.from_json rsp.body
73
+ end
74
+
75
+ module_function def b2_start_large_file api_url, auth_headers, body
76
+ rsp = Excon.post \
77
+ "#{api_url}/b2api/v1/b2_start_large_file",
78
+ **auth_headers,
79
+ body: body.to_json,
80
+ expects: 200
81
+
82
+ HashWrap.from_json rsp.body
83
+ end
84
+
85
+ module_function def b2_get_upload_part_url api_url, auth_headers, file_id
86
+ rsp = Excon.post \
87
+ "#{api_url}/b2api/v1/b2_get_upload_part_url",
88
+ **auth_headers,
89
+ body: {fileId: file_id}.to_json,
90
+ expects: 200
91
+
92
+ # hash = JSON.parse rsp.body
93
+ # hash['code'] = 'emergency error'
94
+ # rsp.body = hash.to_json
95
+ # rsp.status = 503
96
+ # raise (Excon::Errors::ServiceUnavailable.new "yer died", nil, rsp)
97
+ HashWrap.from_json rsp.body
98
+ end
99
+
100
+ # NOTE Is there a way to stream this instead of loading multiple 100M chunks
101
+ # into memory? No, backblaze does not allow parts to use chunked encoding.
102
+ module_function def b2_upload_part upload_url, headers, bytes
103
+ # Yes, this is a different pattern to the other Excon.post calls ¯\_(ツ)_/¯
104
+ # Thread.new{sleep 5; exit!}
105
+ rsp = Excon.post \
106
+ upload_url,
107
+ headers: headers,
108
+ body: bytes,
109
+ expects: 200
110
+
111
+ # 200 response will be
112
+ # fileId The unique ID for this file.
113
+ # partNumber Which part this is.
114
+ # contentLength The number of bytes stored in the part.
115
+ # contentSha1 The SHA1 of the bytes stored in the part.
116
+
117
+
118
+ HashWrap.from_json rsp.body
119
+ end
120
+
121
+ module_function def b2_finish_large_file api_url, auth_headers, file_id, shas
122
+ rsp = Excon.post \
123
+ "#{api_url}/b2api/v1/b2_finish_large_file",
124
+ **auth_headers,
125
+ body: {fileId: file_id, partSha1Array: shas }.to_json,
126
+ expects: 200
127
+
128
+ HashWrap.from_json rsp.body
129
+ end
130
+ end
131
+ end
132
+ end