backup-backblaze 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 705a6fd53f7173dc6f8209bd8dd889ad49098f43
4
- data.tar.gz: f633db6bde19d0cba7c95cd84b49225f0fb74647
3
+ metadata.gz: ca99ce6c6832000797f8f2aafc05268d7fafc4cd
4
+ data.tar.gz: 225aa2e2d4cccbdfda851e48b3c2b59693e98ff2
5
5
  SHA512:
6
- metadata.gz: 6a945e4dc4597f54f5b15056f6cbb5edd6e0f4494f3412951d47dee96aab2c0d173a97b36d1992d36d94302cd26b8c51504845ac97b12ed01588219cbf0f02b6
7
- data.tar.gz: a6864d6aa9d249b432c4e0ed61f778434ae61ae3f4e6eca2b01319a81b3390c9e7473ebbe63fc6c64020d3c123690fe7aebba9eb17e0756513af85ae3b799906
6
+ metadata.gz: f0eef28fb429db54d39d02c1a382a1d19adb73467566525c77457140f01d6a8605c34d29ceead07aee0ccd3401a5ebca2936514517477a338473fe2382362425
7
+ data.tar.gz: 243ad8802135d8a102698608d1f5fffb822c99f6aa44c3291935a38d0b09c5030bd4be4fd3780447f63e567d645a491a72061febcf145de959da74ba485777b9
data/README.md CHANGED
@@ -47,6 +47,8 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
47
47
 
48
48
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
49
49
 
50
+ If you need to change the http api retry sequences, you'll need to install SWI-Prolog from http://www.swi-prolog.org/Download.html
51
+
50
52
  ## Contributing
51
53
 
52
54
  Bug reports and pull requests are welcome on GitHub at https://github.com/djellemah/backup-backblaze.
data/Rakefile CHANGED
@@ -3,4 +3,28 @@ require "rspec/core/rake_task"
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task :default => [:spec, :generate_retry]
7
+
8
+ task :spec => :generate_retry
9
+ task :build => :generate_retry
10
+
11
+ desc 'Generate the retry_lookup.rb file from prolog source'
12
+ task :generate_retry => FileList['lib/backup/backblaze/retry_lookup.rb']
13
+
14
+ SWIPL = 'swipl'
15
+ SWIPL_VERSION = '7.4.2'
16
+
17
+ def chk_swipl_version
18
+ version = `#{SWIPL} --version`
19
+ version =~ /SWI-Prolog version (7.\d+.\d+)/
20
+ raise unless $1 >= SWIPL_VERSION
21
+ rescue Errno::ENOENT, RuntimeError
22
+ puts "#{SWIPL} >= #{SWIPL_VERSION} not found on PATH. Install SWI-Prolog version >= #{SWIPL_VERSION} from http://www.swi-prolog.org/Download.html"
23
+ exit 1
24
+ end
25
+
26
+ file 'lib/backup/backblaze/retry_lookup.rb' => %w[src/retry_lookup.erb src/retry.pl] do |task|
27
+ puts "building #{task} from #{task.source} prolog"
28
+ chk_swipl_version
29
+ sh "erb -T- #{task.source} >#{task}"
30
+ end
@@ -2,6 +2,9 @@ require 'backup'
2
2
 
3
3
  module Backup
4
4
  module Backblaze
5
+ TEST_HEADERS = {}
6
+ # uncomment for testing
7
+ # TEST_HEADERS = {'X-Bz-Test-Mode' => ['fail_some_uploads', 'expire_some_account_authorization_tokens', 'force_cap_exceeded']}
5
8
  end
6
9
  end
7
10
 
@@ -1,8 +1,5 @@
1
- require 'base64'
2
- require 'excon'
3
- require 'json'
4
-
5
1
  require_relative 'hash_wrap'
2
+ require_relative 'api_importer'
6
3
 
7
4
  module Backup
8
5
  module Backblaze
@@ -13,78 +10,64 @@ module Backup
13
10
  auth!
14
11
  end
15
12
 
16
- attr_reader :account_id, :app_key, :body
13
+ attr_reader :account_id, :app_key, :body_wrap
17
14
 
18
15
  class NotFound < RuntimeError; end
19
16
 
17
+ extend ApiImporter
18
+
19
+ import_endpoint :b2_authorize_account do |fn|
20
+ # @body_wrap will be a Hashwrap
21
+ # have to set this here for retry-sequence to work properly
22
+ @body_wrap = fn[account_id, app_key]
23
+ end
24
+
25
+ # This can be called by retry paths for various api calls. So it might end
26
+ # up needing synchronisation of some kind.
20
27
  def auth!
21
28
  # first call b2_authorize_account to get an account_auth_token
22
- encoded = Base64.strict_encode64 "#{account_id}:#{app_key}"
23
- rsp = Excon.get \
24
- 'https://api.backblazeb2.com/b2api/v1/b2_authorize_account',
25
- headers: {'Authorization' => "Basic #{encoded}"},
26
- expects: 200
27
-
28
29
  # this has to stick around because it has various important data
29
- @body = HashWrap.from_json rsp.body
30
+ b2_authorize_account
30
31
 
31
- unless body.allowed.capabilities.include? 'writeFiles'
32
+ unless body_wrap.allowed.capabilities.include? 'writeFiles'
32
33
  raise "app_key #{app_key} does not have write access to account #{account_id}"
33
34
  end
34
35
  end
35
36
 
36
37
  def auth_headers
37
- {headers: {'Authorization' => authorization_token}}
38
+ Hash headers: {
39
+ 'Authorization' => authorization_token,
40
+ }.merge(TEST_HEADERS)
38
41
  end
39
42
 
40
43
  def api_url
41
- body.apiUrl or raise NotFound, 'apiUrl'
44
+ body_wrap.apiUrl or raise NotFound, 'apiUrl'
42
45
  end
43
46
 
44
47
  def authorization_token
45
- body.authorizationToken or raise NotFound, 'authorizationToken'
48
+ body_wrap.authorizationToken or raise NotFound, 'authorizationToken'
46
49
  end
47
50
 
48
51
  def minimum_part_size
49
52
  # why b2 has this as well as minimumPartSize ¯\_(ツ)_/¯
50
- body.absoluteMinimumPartSize
53
+ body_wrap.absoluteMinimumPartSize
51
54
  end
52
55
 
53
56
  def recommended_part_size
54
- body.recommendedPartSize
57
+ body_wrap.recommendedPartSize
55
58
  end
56
59
 
57
60
  # The following is leaning towards Bucket.new account, bucket_id/bucket_name
58
-
59
- # returns [upload_url, auth_token]
60
- # Several files can be uploaded to one url.
61
- # But uploading files in parallel requires one upload url per thread.
62
- def upload_url bucket_id:
63
- # get the upload url for a specific bucket id. Buckets can be named.
64
- body = {bucketId: bucket_id }
65
- rsp = Excon.post \
66
- "#{api_url}/b2api/v1/b2_get_upload_url",
67
- **auth_headers,
68
- body: body.to_json,
69
- expects: 200
70
-
71
- hw = HashWrap.from_json rsp.body
72
- return hw.uploadUrl, hw.authorizationToken
61
+ # body is a hash of string => string
62
+ import_endpoint :b2_list_buckets do |fn, body|
63
+ body_wrap = fn[api_url, auth_headers, body]
73
64
  end
74
65
 
75
66
  # return id for given name, or nil if no such named bucket
76
67
  def bucket_id bucket_name:
77
- rsp = Excon.post \
78
- "#{api_url}/b2api/v1/b2_list_buckets",
79
- **auth_headers,
80
- body: {bucketName: bucket_name, accountId: account_id}.to_json,
81
- expects: 200
82
-
83
- buckets = (JSON.parse rsp.body)['buckets']
84
- found = buckets.find do |ha|
85
- ha['bucketName'] == bucket_name
86
- end
87
- found&.dig 'bucketId' or raise NotFound, "no bucket named #{bucket_name}"
68
+ buckets = b2_list_buckets(bucketName: bucket_name, accountId: account_id).buckets
69
+ found = buckets.find{|hw| hw.bucketName == bucket_name}
70
+ found&.bucketId or raise NotFound, "no bucket named #{bucket_name}"
88
71
  end
89
72
 
90
73
  # Hurhur
@@ -92,68 +75,41 @@ module Backup
92
75
  b2_list_buckets bucketId: bucket_id, accountId: account_id
93
76
  end
94
77
 
95
- def b2_list_buckets body
96
- rsp = Excon.post \
97
- "#{api_url}/b2api/v1/b2_list_buckets",
98
- **auth_headers,
99
- body: body.select{|_,v|v}.to_json,
100
- expects: 200
101
-
102
- HashWrap.from_json rsp
78
+ import_endpoint :b2_list_file_names do |fn, body|
79
+ fn[api_url, auth_headers, body]
103
80
  end
104
81
 
105
82
  # This might be dangerous because large number of file names might come back.
106
83
  # But I'm not worrying about that now. Maybe later. Anyway, that's what
107
84
  # nextFile and startFile are for.
108
85
  def files bucket_name
109
- rsp = Excon.post \
110
- "#{api_url}/b2api/v1/b2_list_file_names",
111
- **auth_headers,
112
- body: {bucketId: (bucket_id bucket_name: bucket_name)}.to_json,
113
- expects: 200
114
-
86
+ body_wrap = b2_list_file_names bucketId: (bucket_id bucket_name: bucket_name)
115
87
  # ignoring the top-level {files:, nextFileName:} structure
116
- files_hash = (JSON.parse rsp.body)['files']
117
-
118
- # ignoring the top-level {files:, nextFileName:} structure
119
- files_hash.map do |file_info_hash|
120
- HashWrap.new file_info_hash
121
- end
88
+ body_wrap.files
122
89
  end
123
90
 
124
91
  # This is mostly used to get a fileId for a given fileName
125
92
  def file_info bucket_name, filename
126
- # It's too much of a PITA to make this Excon call in only one place
127
- rsp = Excon.post \
128
- "#{api_url}/b2api/v1/b2_list_file_names",
129
- **auth_headers,
130
- body: {bucketId: (bucket_id bucket_name: bucket_name), maxFileCount: 1, startFileName: filename}.to_json,
131
- expects: 200
132
-
133
- files_hash = (JSON.parse rsp.body)['files']
134
-
93
+ body_wrap = b2_list_file_names bucketId: (bucket_id bucket_name: bucket_name), maxFileCount: 1, startFileName: filename
94
+ files_hash = body_wrap.files
135
95
  raise NotFound, "#{filename} not found" unless files_hash.size == 1
136
-
137
- HashWrap.new files_hash.first
96
+ files_hash.first
138
97
  end
139
98
 
140
99
  # delete the named file in the named bucket
141
- # https://www.backblaze.com/b2/docs/b2_delete_file_version.html
100
+ import_endpoint :b2_delete_file_version do |fn, body|
101
+ fn[api_url, auth_headers, body]
102
+ end
103
+
142
104
  def delete_file bucket_name, filename
143
105
  # lookup fileId from given filename
144
106
  info = file_info bucket_name, filename
145
-
146
- # delete the fileId
147
- Excon.post \
148
- "#{api_url}/b2api/v1/b2_delete_file_version",
149
- **auth_headers,
150
- body: {fileName: filename, fileId: info.fileId}.to_json,
151
- expects: 200
107
+ body_wrap = b2_delete_file_version fileId: info.fileId, fileName: filename
152
108
 
153
109
  # ignore 400 with body containing "code": "file_not_present"
154
110
  rescue Excon::Errors::BadRequest => ex
155
- hw = HashWrap.from_json ex.response.body
156
- raise unless hw.code == 'file_not_present'
111
+ body_wrap = HashWrap.from_json ex.response.body
112
+ raise unless body_wrap.code == 'file_not_present'
157
113
  end
158
114
  end
159
115
  end
@@ -0,0 +1,93 @@
1
+ require 'excon'
2
+ require 'base64'
3
+
4
+ require_relative 'hash_wrap'
5
+ require_relative 'retry_lookup'
6
+ require_relative 'retry'
7
+ require_relative 'http'
8
+
9
+ module Backup
10
+ module Backblaze
11
+ # This is quite complicated and needs some explanation. API retry rules as
12
+ # defined by Backblaze are not simple. See RetryLookup.retry_sequence for a
13
+ # cross-product of all the rules :-O
14
+ #
15
+ # Some failures require a call to another api endpoint to retry. Some can
16
+ # backup by two or more calls to other api endpoints. So we can't just use,
17
+ # say, Excon's retry facility. Also, backblaze sends back a Retry-After
18
+ # value in some cases, which we ought to respect. Excon's built-in retry
19
+ # can't do that.
20
+ #
21
+ # So to handle that, any class that wants to use the retries must define
22
+ # methods with the same names as the symbols in retry_sequence.
23
+ #
24
+ # import_endpoint is an easifying method to help with that. Parameters will
25
+ # be unchanged between retries, but whatever happens in the body of an
26
+ # import_endpoint declaration will be re-evaluated on each retry.
27
+ #
28
+ # Also note that, the upload_xxx calls do not actually exist - they use urls
29
+ # that are returned by calls to get_upload_xxx. For example, there isn't an
30
+ # actual api endpoint b2_upload_file. We just kinda pretend there is to make
31
+ # the retry_sequence lookup work.
32
+ module ApiImporter
33
+ # define a method on the calling instance that hooks into our
34
+ # call retry logic.
35
+ #
36
+ # - callable is either a Method, or a symbol for a method in Http
37
+ def import_endpoint callable, &bind_blk
38
+ callable_thing, callable_name = case callable
39
+ when Symbol
40
+ [(Http.method callable), callable]
41
+ when Method
42
+ [callable, callable.name]
43
+ else
44
+ raise "dunno what to do with #{callable.inspect}"
45
+ end
46
+
47
+ # Warn about missing endpoint dependencies. Code paths with retry are
48
+ # not very likely to be executed. So a warning that they might not work
49
+ # is useful.
50
+ chunks = caller.chunk_while{|l| l !~ /#{__FILE__}.*#{__method__}/}.to_a
51
+ caller_location = chunks.last.first
52
+
53
+ Backup::Backblaze::RetryLookup.retry_dependencies[callable_name].each do |dependency_method|
54
+ begin
55
+ m = instance_method dependency_method
56
+ if m.arity == 0
57
+ warn "#{caller_location} #{self.name}##{dependency_method} required by #{callable} must have at least one argument (retries)"
58
+ end
59
+ rescue NameError
60
+ warn "#{caller_location} #{self.name}##{dependency_method} required by #{callable} but it was not found"
61
+ end
62
+ end
63
+
64
+ # Define the api method on the class, mainly so we end with an instance
65
+ # method we can call using the symbols in the retry_sequence.
66
+ # define_method callable_name do |*args, retries: 0, backoff: nil|
67
+ define_method callable_name do |*args, retries: 0, backoff: nil, **kwargs|
68
+ begin
69
+ # initiate retries
70
+ Retry.call retries, backoff, callable_name do
71
+ # Execute bind_blk in the context of self, and pass it the
72
+ # callable_thing along with the args. bind_blk must then call
73
+ # callable_thing with whatever arguments it needs.
74
+ # bind_blk can also deal with the return values from callable_thing
75
+ instance_exec callable_thing, *args, **kwargs, &bind_blk
76
+ end
77
+ rescue Retry::RetrySequence => retry_sequence
78
+ retry_sequence.reduce nil do |_rv, method_name|
79
+ if method_name == callable_name
80
+ # we assume that methods with the same name as the original can
81
+ # receive the same set of arguments as specified in the original
82
+ # call.
83
+ send method_name, *args, retries: retries + 1, backoff: retry_sequence.backoff
84
+ else
85
+ send method_name, retries: retries + 1, backoff: retry_sequence.backoff
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -1,12 +1,8 @@
1
- require 'excon'
2
- require 'base64'
3
- require 'json'
4
1
  require 'pathname'
5
2
 
6
3
  require_relative 'upload_file.rb'
7
4
  require_relative 'upload_large_file.rb'
8
5
  require_relative 'account.rb'
9
- require_relative 'retry.rb'
10
6
 
11
7
  require 'backup/storage/base'
12
8
 
@@ -67,26 +63,18 @@ module Backup
67
63
 
68
64
  upload =
69
65
  if src_pathname.size > working_part_size * 2.5 || src_pathname.size > 5 * 10**9
70
- Logger.info "Storing Large '#{dst}'"
71
- ::Backup::Backblaze::UploadLargeFile.new \
66
+ Backblaze::UploadLargeFile.new \
72
67
  src: src_pathname,
73
68
  dst: dst,
74
- authorization_token: account.authorization_token,
75
- url: account.api_url,
69
+ account: account,
76
70
  part_size: working_part_size,
77
71
  bucket_id: bucket_id
78
72
  else
79
- Logger.info "Storing '#{dst}'"
80
-
81
- # TODO could upload several files in parallel with several of these token_provider
82
- token_provider = ::Backup::Backblaze::Retry::TokenProvider.new do
83
- account.upload_url bucket_id: bucket_id
84
- end
85
-
86
- ::Backup::Backblaze::UploadFile.new \
73
+ Backblaze::UploadFile.new \
87
74
  src: src_pathname.to_s,
88
75
  dst: dst,
89
- token_provider: token_provider
76
+ account: account,
77
+ bucket_id: bucket_id
90
78
  end
91
79
 
92
80
  hash_wrap = upload.call
@@ -116,7 +104,7 @@ module Backup
116
104
  @account ||= begin
117
105
  account_deets = {account_id: account_id}
118
106
  Logger.info "Account login for #{account_deets.inspect}"
119
- ::Backup::Backblaze::Account.new account_id: account_id, app_key: app_key
107
+ Backblaze::Account.new account_id: account_id, app_key: app_key
120
108
  end
121
109
  end
122
110
  end
@@ -1,3 +1,5 @@
1
+ require 'json'
2
+
1
3
  module Backup
2
4
  module Backblaze
3
5
  # Intended as a quick-n-dirty way to deep-wrap json objects.
@@ -0,0 +1,132 @@
1
+ module Backup
2
+ module Backblaze
3
+ module Http
4
+ module_function def b2_authorize_account account_id, app_key
5
+ encoded = Base64.strict_encode64 "#{account_id}:#{app_key}"
6
+ rsp = Excon.get \
7
+ 'https://api.backblazeb2.com/b2api/v1/b2_authorize_account',
8
+ headers: {'Authorization' => "Basic #{encoded}"},
9
+ expects: 200
10
+ HashWrap.from_json rsp.body
11
+ end
12
+
13
+ module_function def b2_get_upload_url api_url, auth_headers, bucket_id
14
+ rsp = Excon.post \
15
+ "#{api_url}/b2api/v1/b2_get_upload_url",
16
+ headers: auth_headers,
17
+ body: {bucketId: bucket_id}.to_json,
18
+ expects: 200
19
+ HashWrap.from_json rsp.body
20
+ end
21
+
22
+ # upload with incorrect sha1 responds with
23
+ #
24
+ # {"code"=>"bad_request", "message"=>"Sha1 did not match data received", "status"=>400}
25
+ #
26
+ # Normal response
27
+ #
28
+ #{"accountId"=>"d765e276730e",
29
+ # "action"=>"upload",
30
+ # "bucketId"=>"dd8786b5eef2c7d66743001e",
31
+ # "contentLength"=>6144,
32
+ # "contentSha1"=>"5ba6cf1b3b3a088d73941052f60e78baf05d91fd",
33
+ # "contentType"=>"application/octet-stream",
34
+ # "fileId"=>"4_zdd8786b5eef2c7d66743001e_f1096f3027e0b1927_d20180725_m115148_c002_v0001095_t0047",
35
+ # "fileInfo"=>{"src_last_modified_millis"=>"1532503455580"},
36
+ # "fileName"=>"test_file",
37
+ # "uploadTimestamp"=>1532519508000}
38
+ module_function def b2_upload_file src, headers, url_token
39
+ rsp = Excon.post \
40
+ url_token.url,
41
+ headers: (headers.merge 'Authorization' => url_token.auth),
42
+ body: (File.read src),
43
+ expects: 200
44
+ HashWrap.from_json rsp.body
45
+ end
46
+
47
+ module_function def b2_list_buckets api_url, auth_headers, body
48
+ rsp = Excon.post \
49
+ "#{api_url}/b2api/v1/b2_list_buckets",
50
+ **auth_headers,
51
+ body: body.to_json,
52
+ expects: 200
53
+ HashWrap.from_json rsp.body
54
+ end
55
+
56
+ module_function def b2_list_file_names api_url, auth_headers, body
57
+ rsp = Excon.post \
58
+ "#{api_url}/b2api/v1/b2_list_file_names",
59
+ **auth_headers,
60
+ body: body.to_json,
61
+ expects: 200
62
+ HashWrap.from_json rsp.body
63
+ end
64
+
65
+ # delete the fileId
66
+ module_function def b2_delete_file_version api_url, auth_headers, body
67
+ rsp = Excon.post \
68
+ "#{api_url}/b2api/v1/b2_delete_file_version",
69
+ **auth_headers,
70
+ body: body.to_json,
71
+ expects: 200
72
+ HashWrap.from_json rsp.body
73
+ end
74
+
75
+ module_function def b2_start_large_file api_url, auth_headers, body
76
+ rsp = Excon.post \
77
+ "#{api_url}/b2api/v1/b2_start_large_file",
78
+ **auth_headers,
79
+ body: body.to_json,
80
+ expects: 200
81
+
82
+ HashWrap.from_json rsp.body
83
+ end
84
+
85
+ module_function def b2_get_upload_part_url api_url, auth_headers, file_id
86
+ rsp = Excon.post \
87
+ "#{api_url}/b2api/v1/b2_get_upload_part_url",
88
+ **auth_headers,
89
+ body: {fileId: file_id}.to_json,
90
+ expects: 200
91
+
92
+ # hash = JSON.parse rsp.body
93
+ # hash['code'] = 'emergency error'
94
+ # rsp.body = hash.to_json
95
+ # rsp.status = 503
96
+ # raise (Excon::Errors::ServiceUnavailable.new "yer died", nil, rsp)
97
+ HashWrap.from_json rsp.body
98
+ end
99
+
100
+ # NOTE Is there a way to stream this instead of loading multiple 100M chunks
101
+ # into memory? No, backblaze does not allow parts to use chunked encoding.
102
+ module_function def b2_upload_part upload_url, headers, bytes
103
+ # Yes, this is a different pattern to the other Excon.post calls ¯\_(ツ)_/¯
104
+ # Thread.new{sleep 5; exit!}
105
+ rsp = Excon.post \
106
+ upload_url,
107
+ headers: headers,
108
+ body: bytes,
109
+ expects: 200
110
+
111
+ # 200 response will be
112
+ # fileId The unique ID for this file.
113
+ # partNumber Which part this is.
114
+ # contentLength The number of bytes stored in the part.
115
+ # contentSha1 The SHA1 of the bytes stored in the part.
116
+
117
+
118
+ HashWrap.from_json rsp.body
119
+ end
120
+
121
+ module_function def b2_finish_large_file api_url, auth_headers, file_id, shas
122
+ rsp = Excon.post \
123
+ "#{api_url}/b2api/v1/b2_finish_large_file",
124
+ **auth_headers,
125
+ body: {fileId: file_id, partSha1Array: shas }.to_json,
126
+ expects: 200
127
+
128
+ HashWrap.from_json rsp.body
129
+ end
130
+ end
131
+ end
132
+ end