RubyGems - backup-backblaze - Versions diffs - 0.1.2 → 0.2.0 - Mend

backup-backblaze 0.1.2 → 0.2.0

Files changed (18) hide show

checksums.yaml +4 -4
data/README.md +2 -0
data/Rakefile +25 -1
data/lib/backup/backblaze.rb +3 -0
data/lib/backup/backblaze/account.rb +41 -85
data/lib/backup/backblaze/api_importer.rb +93 -0
data/lib/backup/backblaze/back_blaze.rb +6 -18
data/lib/backup/backblaze/hash_wrap.rb +2 -0
data/lib/backup/backblaze/http.rb +132 -0
data/lib/backup/backblaze/retry.rb +56 -52
data/lib/backup/backblaze/retry_lookup.rb +112 -0
data/lib/backup/backblaze/upload_file.rb +38 -49
data/lib/backup/backblaze/upload_large_file.rb +61 -83
data/lib/backup/backblaze/url_token.rb +11 -0
data/lib/backup/backblaze/version.rb +1 -1
data/src/retry.pl +157 -0
data/src/retry_lookup.erb +42 -0
metadata +8 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 705a6fd53f7173dc6f8209bd8dd889ad49098f43
-  data.tar.gz: f633db6bde19d0cba7c95cd84b49225f0fb74647
+  metadata.gz: ca99ce6c6832000797f8f2aafc05268d7fafc4cd
+  data.tar.gz: 225aa2e2d4cccbdfda851e48b3c2b59693e98ff2
 SHA512:
-  metadata.gz: 6a945e4dc4597f54f5b15056f6cbb5edd6e0f4494f3412951d47dee96aab2c0d173a97b36d1992d36d94302cd26b8c51504845ac97b12ed01588219cbf0f02b6
-  data.tar.gz: a6864d6aa9d249b432c4e0ed61f778434ae61ae3f4e6eca2b01319a81b3390c9e7473ebbe63fc6c64020d3c123690fe7aebba9eb17e0756513af85ae3b799906
+  metadata.gz: f0eef28fb429db54d39d02c1a382a1d19adb73467566525c77457140f01d6a8605c34d29ceead07aee0ccd3401a5ebca2936514517477a338473fe2382362425
+  data.tar.gz: 243ad8802135d8a102698608d1f5fffb822c99f6aa44c3291935a38d0b09c5030bd4be4fd3780447f63e567d645a491a72061febcf145de959da74ba485777b9

data/README.md CHANGED

@@ -47,6 +47,8 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
 To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
+If you need to change the http api retry sequences, you'll need to install SWI-Prolog from http://www.swi-prolog.org/Download.html
 ## Contributing
 Bug reports and pull requests are welcome on GitHub at https://github.com/djellemah/backup-backblaze.

data/Rakefile CHANGED

@@ -3,4 +3,28 @@ require "rspec/core/rake_task"
 RSpec::Core::RakeTask.new(:spec)
-task :default => :spec
+task :default => [:spec, :generate_retry]
+task :spec => :generate_retry
+task :build => :generate_retry
+desc 'Generate the retry_lookup.rb file from prolog source'
+task :generate_retry => FileList['lib/backup/backblaze/retry_lookup.rb']
+SWIPL = 'swipl'
+SWIPL_VERSION = '7.4.2'
+def chk_swipl_version
+  version = `#{SWIPL} --version`
+  version =~ /SWI-Prolog version (7.\d+.\d+)/
+  raise unless $1 >= SWIPL_VERSION
+rescue Errno::ENOENT, RuntimeError
+  puts "#{SWIPL} >= #{SWIPL_VERSION} not found on PATH. Install SWI-Prolog version >= #{SWIPL_VERSION} from http://www.swi-prolog.org/Download.html"
+  exit 1
+end
+file 'lib/backup/backblaze/retry_lookup.rb' => %w[src/retry_lookup.erb src/retry.pl] do |task|
+  puts "building #{task} from #{task.source} prolog"
+  chk_swipl_version
+  sh "erb -T- #{task.source} >#{task}"
+end

data/lib/backup/backblaze.rb CHANGED

@@ -2,6 +2,9 @@ require 'backup'
 module Backup
   module Backblaze
+    TEST_HEADERS = {}
+    # uncomment for testing
+    # TEST_HEADERS = {'X-Bz-Test-Mode' => ['fail_some_uploads', 'expire_some_account_authorization_tokens', 'force_cap_exceeded']}
   end
 end

data/lib/backup/backblaze/account.rb CHANGED

@@ -1,8 +1,5 @@
-require 'base64'
-require 'excon'
-require 'json'
 require_relative 'hash_wrap'
+require_relative 'api_importer'
 module Backup
   module Backblaze
@@ -13,78 +10,64 @@ module Backup
         auth!
       end
-      attr_reader :account_id, :app_key, :body
+      attr_reader :account_id, :app_key, :body_wrap
       class NotFound < RuntimeError; end
+      extend ApiImporter
+      import_endpoint :b2_authorize_account do |fn|
+        # @body_wrap will be a Hashwrap
+        # have to set this here for retry-sequence to work properly
+        @body_wrap = fn[account_id, app_key]
+      end
+      # This can be called by retry paths for various api calls. So it might end
+      # up needing synchronisation of some kind.
       def auth!
         # first call b2_authorize_account to get an account_auth_token
-        encoded = Base64.strict_encode64 "#{account_id}:#{app_key}"
-        rsp = Excon.get \
-          'https://api.backblazeb2.com/b2api/v1/b2_authorize_account',
-          headers: {'Authorization' => "Basic #{encoded}"},
-          expects: 200
         # this has to stick around because it has various important data
-        @body = HashWrap.from_json rsp.body
+        b2_authorize_account
-        unless body.allowed.capabilities.include? 'writeFiles'
+        unless body_wrap.allowed.capabilities.include? 'writeFiles'
           raise "app_key #{app_key} does not have write access to account #{account_id}"
         end
       end
       def auth_headers
-        {headers: {'Authorization' => authorization_token}}
+        Hash headers: {
+          'Authorization' => authorization_token,
+        }.merge(TEST_HEADERS)
       end
       def api_url
-        body.apiUrl or raise NotFound, 'apiUrl'
+        body_wrap.apiUrl or raise NotFound, 'apiUrl'
       end
       def authorization_token
-        body.authorizationToken or raise NotFound, 'authorizationToken'
+        body_wrap.authorizationToken or raise NotFound, 'authorizationToken'
       end
       def minimum_part_size
         # why b2 has this as well as minimumPartSize ¯\_(ツ)_/¯
-        body.absoluteMinimumPartSize
+        body_wrap.absoluteMinimumPartSize
       end
       def recommended_part_size
-        body.recommendedPartSize
+        body_wrap.recommendedPartSize
       end
       # The following is leaning towards Bucket.new account, bucket_id/bucket_name
-      # returns [upload_url, auth_token]
-      # Several files can be uploaded to one url.
-      # But uploading files in parallel requires one upload url per thread.
-      def upload_url bucket_id:
-        # get the upload url for a specific bucket id. Buckets can be named.
-        body = {bucketId: bucket_id }
-        rsp = Excon.post \
-          "#{api_url}/b2api/v1/b2_get_upload_url",
-          **auth_headers,
-          body: body.to_json,
-          expects: 200
-        hw = HashWrap.from_json rsp.body
-        return hw.uploadUrl, hw.authorizationToken
+      # body is a hash of string => string
+      import_endpoint :b2_list_buckets do |fn, body|
+        body_wrap = fn[api_url, auth_headers, body]
       end
       # return id for given name, or nil if no such named bucket
       def bucket_id bucket_name:
-        rsp = Excon.post \
-          "#{api_url}/b2api/v1/b2_list_buckets",
-          **auth_headers,
-          body: {bucketName: bucket_name, accountId: account_id}.to_json,
-          expects: 200
-        buckets = (JSON.parse rsp.body)['buckets']
-        found = buckets.find do |ha|
-          ha['bucketName'] == bucket_name
-        end
-        found&.dig 'bucketId' or raise NotFound, "no bucket named #{bucket_name}"
+        buckets = b2_list_buckets(bucketName: bucket_name, accountId: account_id).buckets
+        found = buckets.find{|hw| hw.bucketName == bucket_name}
+        found&.bucketId or raise NotFound, "no bucket named #{bucket_name}"
       end
       # Hurhur
@@ -92,68 +75,41 @@ module Backup
         b2_list_buckets bucketId: bucket_id, accountId: account_id
       end
-      def b2_list_buckets body
-        rsp = Excon.post \
-          "#{api_url}/b2api/v1/b2_list_buckets",
-          **auth_headers,
-          body: body.select{|_,v|v}.to_json,
-          expects: 200
-        HashWrap.from_json rsp
+      import_endpoint :b2_list_file_names do |fn, body|
+        fn[api_url, auth_headers, body]
       end
       # This might be dangerous because large number of file names might come back.
       # But I'm not worrying about that now. Maybe later. Anyway, that's what
       # nextFile and startFile are for.
       def files bucket_name
-        rsp = Excon.post \
-          "#{api_url}/b2api/v1/b2_list_file_names",
-          **auth_headers,
-          body: {bucketId: (bucket_id bucket_name: bucket_name)}.to_json,
-          expects: 200
+        body_wrap = b2_list_file_names bucketId: (bucket_id bucket_name: bucket_name)
         # ignoring the top-level {files:, nextFileName:} structure
-        files_hash = (JSON.parse rsp.body)['files']
-        # ignoring the top-level {files:, nextFileName:} structure
-        files_hash.map do |file_info_hash|
-          HashWrap.new file_info_hash
-        end
+        body_wrap.files
       end
       # This is mostly used to get a fileId for a given fileName
       def file_info bucket_name, filename
-        # It's too much of a PITA to make this Excon call in only one place
-        rsp = Excon.post \
-          "#{api_url}/b2api/v1/b2_list_file_names",
-          **auth_headers,
-          body: {bucketId: (bucket_id bucket_name: bucket_name), maxFileCount: 1, startFileName: filename}.to_json,
-          expects: 200
-        files_hash = (JSON.parse rsp.body)['files']
+        body_wrap = b2_list_file_names bucketId: (bucket_id bucket_name: bucket_name), maxFileCount: 1, startFileName: filename
+        files_hash = body_wrap.files
         raise NotFound, "#{filename} not found" unless files_hash.size == 1
-        HashWrap.new files_hash.first
+        files_hash.first
       end
       # delete the named file in the named bucket
-      # https://www.backblaze.com/b2/docs/b2_delete_file_version.html
+      import_endpoint :b2_delete_file_version do |fn, body|
+        fn[api_url, auth_headers, body]
+      end
       def delete_file bucket_name, filename
         # lookup fileId from given filename
         info = file_info bucket_name, filename
-        # delete the fileId
-        Excon.post \
-          "#{api_url}/b2api/v1/b2_delete_file_version",
-          **auth_headers,
-          body: {fileName: filename, fileId: info.fileId}.to_json,
-          expects: 200
+        body_wrap = b2_delete_file_version fileId: info.fileId, fileName: filename
       # ignore 400 with body containing "code": "file_not_present"
       rescue Excon::Errors::BadRequest => ex
-        hw = HashWrap.from_json ex.response.body
-        raise unless hw.code == 'file_not_present'
+        body_wrap = HashWrap.from_json ex.response.body
+        raise unless body_wrap.code == 'file_not_present'
       end
     end
   end

data/lib/backup/backblaze/api_importer.rb ADDED

@@ -0,0 +1,93 @@
+require 'excon'
+require 'base64'
+require_relative 'hash_wrap'
+require_relative 'retry_lookup'
+require_relative 'retry'
+require_relative 'http'
+module Backup
+  module Backblaze
+    # This is quite complicated and needs some explanation. API retry rules as
+    # defined by Backblaze are not simple. See RetryLookup.retry_sequence for a
+    # cross-product of all the rules :-O
+    #
+    # Some failures require a call to another api endpoint to retry. Some can
+    # backup by two or more calls to other api endpoints. So we can't just use,
+    # say, Excon's retry facility. Also, backblaze sends back a Retry-After
+    # value in some cases, which we ought to respect. Excon's built-in retry
+    # can't do that.
+    #
+    # So to handle that, any class that wants to use the retries must define
+    # methods with the same names as the symbols in retry_sequence.
+    #
+    # import_endpoint is an easifying method to help with that. Parameters will
+    # be unchanged between retries, but whatever happens in the body of an
+    # import_endpoint declaration will be re-evaluated on each retry.
+    #
+    # Also note that, the upload_xxx calls do not actually exist - they use urls
+    # that are returned by calls to get_upload_xxx. For example, there isn't an
+    # actual api endpoint b2_upload_file. We just kinda pretend there is to make
+    # the retry_sequence lookup work.
+    module ApiImporter
+      # define a method on the calling instance that hooks into our
+      # call retry logic.
+      #
+      #  - callable is either a Method, or a symbol for a method in Http
+      def import_endpoint callable, &bind_blk
+        callable_thing, callable_name = case callable
+        when Symbol
+          [(Http.method callable), callable]
+        when Method
+          [callable, callable.name]
+        else
+          raise "dunno what to do with #{callable.inspect}"
+        end
+        # Warn about missing endpoint dependencies. Code paths with retry are
+        # not very likely to be executed. So a warning that they might not work
+        # is useful.
+        chunks = caller.chunk_while{|l| l !~ /#{__FILE__}.*#{__method__}/}.to_a
+        caller_location = chunks.last.first
+        Backup::Backblaze::RetryLookup.retry_dependencies[callable_name].each do |dependency_method|
+          begin
+            m = instance_method dependency_method
+            if m.arity == 0
+              warn "#{caller_location} #{self.name}##{dependency_method} required by #{callable} must have at least one argument (retries)"
+            end
+          rescue NameError
+            warn "#{caller_location} #{self.name}##{dependency_method} required by #{callable} but it was not found"
+          end
+        end
+        # Define the api method on the class, mainly so we end with an instance
+        # method we can call using the symbols in the retry_sequence.
+        # define_method callable_name do |*args, retries: 0, backoff: nil|
+        define_method callable_name do |*args, retries: 0, backoff: nil, **kwargs|
+          begin
+            # initiate retries
+            Retry.call retries, backoff, callable_name do
+              # Execute bind_blk in the context of self, and pass it the
+              # callable_thing along with the args. bind_blk must then call
+              # callable_thing with whatever arguments it needs.
+              # bind_blk can also deal with the return values from callable_thing
+              instance_exec callable_thing, *args, **kwargs, &bind_blk
+            end
+          rescue Retry::RetrySequence => retry_sequence
+            retry_sequence.reduce nil do |_rv, method_name|
+              if method_name == callable_name
+                # we assume that methods with the same name as the original can
+                # receive the same set of arguments as specified in the original
+                # call.
+                send method_name, *args, retries: retries + 1, backoff: retry_sequence.backoff
+              else
+                send method_name, retries: retries + 1, backoff: retry_sequence.backoff
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/backup/backblaze/back_blaze.rb CHANGED

@@ -1,12 +1,8 @@
-require 'excon'
-require 'base64'
-require 'json'
 require 'pathname'
 require_relative 'upload_file.rb'
 require_relative 'upload_large_file.rb'
 require_relative 'account.rb'
-require_relative 'retry.rb'
 require 'backup/storage/base'
@@ -67,26 +63,18 @@ module Backup
           upload =
           if src_pathname.size > working_part_size * 2.5 || src_pathname.size > 5 * 10**9
-            Logger.info "Storing Large '#{dst}'"
-            ::Backup::Backblaze::UploadLargeFile.new \
+            Backblaze::UploadLargeFile.new \
               src: src_pathname,
               dst: dst,
-              authorization_token: account.authorization_token,
-              url: account.api_url,
+              account: account,
               part_size: working_part_size,
               bucket_id: bucket_id
           else
-            Logger.info "Storing '#{dst}'"
-            # TODO could upload several files in parallel with several of these token_provider
-            token_provider = ::Backup::Backblaze::Retry::TokenProvider.new do
-              account.upload_url bucket_id: bucket_id
-            end
-            ::Backup::Backblaze::UploadFile.new \
+            Backblaze::UploadFile.new \
               src: src_pathname.to_s,
               dst: dst,
-              token_provider: token_provider
+              account: account,
+              bucket_id: bucket_id
           end
           hash_wrap = upload.call
@@ -116,7 +104,7 @@ module Backup
         @account ||= begin
           account_deets = {account_id: account_id}
           Logger.info "Account login for #{account_deets.inspect}"
-          ::Backup::Backblaze::Account.new account_id: account_id, app_key: app_key
+          Backblaze::Account.new account_id: account_id, app_key: app_key
         end
       end
     end

data/lib/backup/backblaze/hash_wrap.rb CHANGED

@@ -1,3 +1,5 @@
+require 'json'
 module Backup
   module Backblaze
     # Intended as a quick-n-dirty way to deep-wrap json objects.

data/lib/backup/backblaze/http.rb ADDED

@@ -0,0 +1,132 @@
+module Backup
+  module Backblaze
+    module Http
+      module_function def b2_authorize_account account_id, app_key
+        encoded = Base64.strict_encode64 "#{account_id}:#{app_key}"
+        rsp = Excon.get \
+          'https://api.backblazeb2.com/b2api/v1/b2_authorize_account',
+          headers: {'Authorization' => "Basic #{encoded}"},
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_get_upload_url api_url, auth_headers, bucket_id
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_get_upload_url",
+          headers: auth_headers,
+          body: {bucketId: bucket_id}.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      # upload with incorrect sha1 responds with
+      #
+      # {"code"=>"bad_request", "message"=>"Sha1 did not match data received", "status"=>400}
+      #
+      # Normal response
+      #
+      #{"accountId"=>"d765e276730e",
+      # "action"=>"upload",
+      # "bucketId"=>"dd8786b5eef2c7d66743001e",
+      # "contentLength"=>6144,
+      # "contentSha1"=>"5ba6cf1b3b3a088d73941052f60e78baf05d91fd",
+      # "contentType"=>"application/octet-stream",
+      # "fileId"=>"4_zdd8786b5eef2c7d66743001e_f1096f3027e0b1927_d20180725_m115148_c002_v0001095_t0047",
+      # "fileInfo"=>{"src_last_modified_millis"=>"1532503455580"},
+      # "fileName"=>"test_file",
+      # "uploadTimestamp"=>1532519508000}
+      module_function def b2_upload_file src, headers, url_token
+        rsp = Excon.post \
+          url_token.url,
+          headers: (headers.merge 'Authorization' => url_token.auth),
+          body: (File.read src),
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_list_buckets api_url, auth_headers, body
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_list_buckets",
+          **auth_headers,
+          body: body.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_list_file_names api_url, auth_headers, body
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_list_file_names",
+          **auth_headers,
+          body: body.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      # delete the fileId
+      module_function def b2_delete_file_version api_url, auth_headers, body
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_delete_file_version",
+          **auth_headers,
+          body: body.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_start_large_file api_url, auth_headers, body
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_start_large_file",
+          **auth_headers,
+          body: body.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_get_upload_part_url api_url, auth_headers, file_id
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_get_upload_part_url",
+          **auth_headers,
+          body: {fileId: file_id}.to_json,
+          expects: 200
+        # hash = JSON.parse rsp.body
+        # hash['code'] = 'emergency error'
+        # rsp.body = hash.to_json
+        # rsp.status = 503
+        # raise (Excon::Errors::ServiceUnavailable.new "yer died", nil, rsp)
+        HashWrap.from_json rsp.body
+      end
+      # NOTE Is there a way to stream this instead of loading multiple 100M chunks
+      # into memory? No, backblaze does not allow parts to use chunked encoding.
+      module_function def b2_upload_part upload_url, headers, bytes
+        # Yes, this is a different pattern to the other Excon.post calls ¯\_(ツ)_/¯
+        # Thread.new{sleep 5; exit!}
+        rsp = Excon.post \
+          upload_url,
+          headers: headers,
+          body: bytes,
+          expects: 200
+        # 200 response will be
+        # fileId The unique ID for this file.
+        # partNumber Which part this is.
+        # contentLength The number of bytes stored in the part.
+        # contentSha1 The SHA1 of the bytes stored in the part.
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_finish_large_file api_url, auth_headers, file_id, shas
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_finish_large_file",
+          **auth_headers,
+          body: {fileId: file_id, partSha1Array: shas }.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+    end
+  end
+end