RubyGems - backup-backblaze - Versions diffs - 0.1.2 → 0.2.0 - Mend

backup-backblaze 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/README.md +2 -0
data/Rakefile +25 -1
data/lib/backup/backblaze.rb +3 -0
data/lib/backup/backblaze/account.rb +41 -85
data/lib/backup/backblaze/api_importer.rb +93 -0
data/lib/backup/backblaze/back_blaze.rb +6 -18
data/lib/backup/backblaze/hash_wrap.rb +2 -0
data/lib/backup/backblaze/http.rb +132 -0
data/lib/backup/backblaze/retry.rb +56 -52
data/lib/backup/backblaze/retry_lookup.rb +112 -0
data/lib/backup/backblaze/upload_file.rb +38 -49
data/lib/backup/backblaze/upload_large_file.rb +61 -83
data/lib/backup/backblaze/url_token.rb +11 -0
data/lib/backup/backblaze/version.rb +1 -1
data/src/retry.pl +157 -0
data/src/retry_lookup.erb +42 -0
metadata +8 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 705a6fd53f7173dc6f8209bd8dd889ad49098f43
-  data.tar.gz: f633db6bde19d0cba7c95cd84b49225f0fb74647
+  metadata.gz: ca99ce6c6832000797f8f2aafc05268d7fafc4cd
+  data.tar.gz: 225aa2e2d4cccbdfda851e48b3c2b59693e98ff2
 SHA512:
-  metadata.gz: 6a945e4dc4597f54f5b15056f6cbb5edd6e0f4494f3412951d47dee96aab2c0d173a97b36d1992d36d94302cd26b8c51504845ac97b12ed01588219cbf0f02b6
-  data.tar.gz: a6864d6aa9d249b432c4e0ed61f778434ae61ae3f4e6eca2b01319a81b3390c9e7473ebbe63fc6c64020d3c123690fe7aebba9eb17e0756513af85ae3b799906
+  metadata.gz: f0eef28fb429db54d39d02c1a382a1d19adb73467566525c77457140f01d6a8605c34d29ceead07aee0ccd3401a5ebca2936514517477a338473fe2382362425
+  data.tar.gz: 243ad8802135d8a102698608d1f5fffb822c99f6aa44c3291935a38d0b09c5030bd4be4fd3780447f63e567d645a491a72061febcf145de959da74ba485777b9

data/README.md CHANGED

@@ -47,6 +47,8 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
 To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
+If you need to change the http api retry sequences, you'll need to install SWI-Prolog from http://www.swi-prolog.org/Download.html
 ## Contributing
 Bug reports and pull requests are welcome on GitHub at https://github.com/djellemah/backup-backblaze.

data/Rakefile CHANGED

@@ -3,4 +3,28 @@ require "rspec/core/rake_task"
 RSpec::Core::RakeTask.new(:spec)
-task :default => :spec
+task :default => [:spec, :generate_retry]
+task :spec => :generate_retry
+task :build => :generate_retry
+desc 'Generate the retry_lookup.rb file from prolog source'
+task :generate_retry => FileList['lib/backup/backblaze/retry_lookup.rb']
+SWIPL = 'swipl'
+SWIPL_VERSION = '7.4.2'
+def chk_swipl_version
+  version = `#{SWIPL} --version`
+  version =~ /SWI-Prolog version (7.\d+.\d+)/
+  raise unless $1 >= SWIPL_VERSION
+rescue Errno::ENOENT, RuntimeError
+  puts "#{SWIPL} >= #{SWIPL_VERSION} not found on PATH. Install SWI-Prolog version >= #{SWIPL_VERSION} from http://www.swi-prolog.org/Download.html"
+  exit 1
+end
+file 'lib/backup/backblaze/retry_lookup.rb' => %w[src/retry_lookup.erb src/retry.pl] do |task|
+  puts "building #{task} from #{task.source} prolog"
+  chk_swipl_version
+  sh "erb -T- #{task.source} >#{task}"
+end

data/lib/backup/backblaze.rb CHANGED

@@ -2,6 +2,9 @@ require 'backup'
 module Backup
   module Backblaze
+    TEST_HEADERS = {}
+    # uncomment for testing
+    # TEST_HEADERS = {'X-Bz-Test-Mode' => ['fail_some_uploads', 'expire_some_account_authorization_tokens', 'force_cap_exceeded']}
   end
 end

data/lib/backup/backblaze/account.rb CHANGED

@@ -1,8 +1,5 @@
-require 'base64'
-require 'excon'
-require 'json'
 require_relative 'hash_wrap'
+require_relative 'api_importer'
 module Backup
   module Backblaze
@@ -13,78 +10,64 @@ module Backup
         auth!
       end
-      attr_reader :account_id, :app_key, :body
+      attr_reader :account_id, :app_key, :body_wrap
       class NotFound < RuntimeError; end
+      extend ApiImporter
+      import_endpoint :b2_authorize_account do |fn|
+        # @body_wrap will be a Hashwrap
+        # have to set this here for retry-sequence to work properly
+        @body_wrap = fn[account_id, app_key]
+      end
+      # This can be called by retry paths for various api calls. So it might end
+      # up needing synchronisation of some kind.
       def auth!
         # first call b2_authorize_account to get an account_auth_token
-        encoded = Base64.strict_encode64 "#{account_id}:#{app_key}"
-        rsp = Excon.get \
-          'https://api.backblazeb2.com/b2api/v1/b2_authorize_account',
-          headers: {'Authorization' => "Basic #{encoded}"},
-          expects: 200
         # this has to stick around because it has various important data
-        @body = HashWrap.from_json rsp.body
+        b2_authorize_account
-        unless body.allowed.capabilities.include? 'writeFiles'
+        unless body_wrap.allowed.capabilities.include? 'writeFiles'
           raise "app_key #{app_key} does not have write access to account #{account_id}"
         end
       end
       def auth_headers
-        {headers: {'Authorization' => authorization_token}}
+        Hash headers: {
+          'Authorization' => authorization_token,
+        }.merge(TEST_HEADERS)
       end
       def api_url
-        body.apiUrl or raise NotFound, 'apiUrl'
+        body_wrap.apiUrl or raise NotFound, 'apiUrl'
       end
       def authorization_token
-        body.authorizationToken or raise NotFound, 'authorizationToken'
+        body_wrap.authorizationToken or raise NotFound, 'authorizationToken'
       end
       def minimum_part_size
         # why b2 has this as well as minimumPartSize ¯\_(ツ)_/¯
-        body.absoluteMinimumPartSize
+        body_wrap.absoluteMinimumPartSize
       end
       def recommended_part_size
-        body.recommendedPartSize
+        body_wrap.recommendedPartSize
       end
       # The following is leaning towards Bucket.new account, bucket_id/bucket_name
-      # returns [upload_url, auth_token]
-      # Several files can be uploaded to one url.
-      # But uploading files in parallel requires one upload url per thread.
-      def upload_url bucket_id:
-        # get the upload url for a specific bucket id. Buckets can be named.
-        body = {bucketId: bucket_id }
-        rsp = Excon.post \
-          "#{api_url}/b2api/v1/b2_get_upload_url",
-          **auth_headers,
-          body: body.to_json,
-          expects: 200
-        hw = HashWrap.from_json rsp.body
-        return hw.uploadUrl, hw.authorizationToken
+      # body is a hash of string => string
+      import_endpoint :b2_list_buckets do |fn, body|
+        body_wrap = fn[api_url, auth_headers, body]
       end
       # return id for given name, or nil if no such named bucket
       def bucket_id bucket_name:
-        rsp = Excon.post \
-          "#{api_url}/b2api/v1/b2_list_buckets",
-          **auth_headers,
-          body: {bucketName: bucket_name, accountId: account_id}.to_json,
-          expects: 200
-        buckets = (JSON.parse rsp.body)['buckets']
-        found = buckets.find do |ha|
-          ha['bucketName'] == bucket_name
-        end
-        found&.dig 'bucketId' or raise NotFound, "no bucket named #{bucket_name}"
+        buckets = b2_list_buckets(bucketName: bucket_name, accountId: account_id).buckets
+        found = buckets.find{|hw| hw.bucketName == bucket_name}
+        found&.bucketId or raise NotFound, "no bucket named #{bucket_name}"
       end
       # Hurhur
@@ -92,68 +75,41 @@ module Backup
         b2_list_buckets bucketId: bucket_id, accountId: account_id
       end
-      def b2_list_buckets body
-        rsp = Excon.post \
-          "#{api_url}/b2api/v1/b2_list_buckets",
-          **auth_headers,
-          body: body.select{|_,v|v}.to_json,
-          expects: 200
-        HashWrap.from_json rsp
+      import_endpoint :b2_list_file_names do |fn, body|
+        fn[api_url, auth_headers, body]
       end
       # This might be dangerous because large number of file names might come back.
       # But I'm not worrying about that now. Maybe later. Anyway, that's what
       # nextFile and startFile are for.
       def files bucket_name
-        rsp = Excon.post \
-          "#{api_url}/b2api/v1/b2_list_file_names",
-          **auth_headers,
-          body: {bucketId: (bucket_id bucket_name: bucket_name)}.to_json,
-          expects: 200
+        body_wrap = b2_list_file_names bucketId: (bucket_id bucket_name: bucket_name)
         # ignoring the top-level {files:, nextFileName:} structure
-        files_hash = (JSON.parse rsp.body)['files']
-        # ignoring the top-level {files:, nextFileName:} structure
-        files_hash.map do |file_info_hash|
-          HashWrap.new file_info_hash
-        end
+        body_wrap.files
       end
       # This is mostly used to get a fileId for a given fileName
       def file_info bucket_name, filename
-        # It's too much of a PITA to make this Excon call in only one place
-        rsp = Excon.post \
-          "#{api_url}/b2api/v1/b2_list_file_names",
-          **auth_headers,
-          body: {bucketId: (bucket_id bucket_name: bucket_name), maxFileCount: 1, startFileName: filename}.to_json,
-          expects: 200
-        files_hash = (JSON.parse rsp.body)['files']
+        body_wrap = b2_list_file_names bucketId: (bucket_id bucket_name: bucket_name), maxFileCount: 1, startFileName: filename
+        files_hash = body_wrap.files
         raise NotFound, "#{filename} not found" unless files_hash.size == 1
-        HashWrap.new files_hash.first
+        files_hash.first
       end
       # delete the named file in the named bucket
-      # https://www.backblaze.com/b2/docs/b2_delete_file_version.html
+      import_endpoint :b2_delete_file_version do |fn, body|
+        fn[api_url, auth_headers, body]
+      end
       def delete_file bucket_name, filename
         # lookup fileId from given filename
         info = file_info bucket_name, filename
-        # delete the fileId
-        Excon.post \
-          "#{api_url}/b2api/v1/b2_delete_file_version",
-          **auth_headers,
-          body: {fileName: filename, fileId: info.fileId}.to_json,
-          expects: 200
+        body_wrap = b2_delete_file_version fileId: info.fileId, fileName: filename
       # ignore 400 with body containing "code": "file_not_present"
       rescue Excon::Errors::BadRequest => ex
-        hw = HashWrap.from_json ex.response.body
-        raise unless hw.code == 'file_not_present'
+        body_wrap = HashWrap.from_json ex.response.body
+        raise unless body_wrap.code == 'file_not_present'
       end
     end
   end

data/lib/backup/backblaze/api_importer.rb ADDED

@@ -0,0 +1,93 @@
+require 'excon'
+require 'base64'
+require_relative 'hash_wrap'
+require_relative 'retry_lookup'
+require_relative 'retry'
+require_relative 'http'
+module Backup
+  module Backblaze
+    # This is quite complicated and needs some explanation. API retry rules as
+    # defined by Backblaze are not simple. See RetryLookup.retry_sequence for a
+    # cross-product of all the rules :-O
+    #
+    # Some failures require a call to another api endpoint to retry. Some can
+    # backup by two or more calls to other api endpoints. So we can't just use,
+    # say, Excon's retry facility. Also, backblaze sends back a Retry-After
+    # value in some cases, which we ought to respect. Excon's built-in retry
+    # can't do that.
+    #
+    # So to handle that, any class that wants to use the retries must define
+    # methods with the same names as the symbols in retry_sequence.
+    #
+    # import_endpoint is an easifying method to help with that. Parameters will
+    # be unchanged between retries, but whatever happens in the body of an
+    # import_endpoint declaration will be re-evaluated on each retry.
+    #
+    # Also note that, the upload_xxx calls do not actually exist - they use urls
+    # that are returned by calls to get_upload_xxx. For example, there isn't an
+    # actual api endpoint b2_upload_file. We just kinda pretend there is to make
+    # the retry_sequence lookup work.
+    module ApiImporter
+      # define a method on the calling instance that hooks into our
+      # call retry logic.
+      #
+      #  - callable is either a Method, or a symbol for a method in Http
+      def import_endpoint callable, &bind_blk
+        callable_thing, callable_name = case callable
+        when Symbol
+          [(Http.method callable), callable]
+        when Method
+          [callable, callable.name]
+        else
+          raise "dunno what to do with #{callable.inspect}"
+        end
+        # Warn about missing endpoint dependencies. Code paths with retry are
+        # not very likely to be executed. So a warning that they might not work
+        # is useful.
+        chunks = caller.chunk_while{|l| l !~ /#{__FILE__}.*#{__method__}/}.to_a
+        caller_location = chunks.last.first
+        Backup::Backblaze::RetryLookup.retry_dependencies[callable_name].each do |dependency_method|
+          begin
+            m = instance_method dependency_method
+            if m.arity == 0
+              warn "#{caller_location} #{self.name}##{dependency_method} required by #{callable} must have at least one argument (retries)"
+            end
+          rescue NameError
+            warn "#{caller_location} #{self.name}##{dependency_method} required by #{callable} but it was not found"
+          end
+        end
+        # Define the api method on the class, mainly so we end with an instance
+        # method we can call using the symbols in the retry_sequence.
+        # define_method callable_name do |*args, retries: 0, backoff: nil|
+        define_method callable_name do |*args, retries: 0, backoff: nil, **kwargs|
+          begin
+            # initiate retries
+            Retry.call retries, backoff, callable_name do
+              # Execute bind_blk in the context of self, and pass it the
+              # callable_thing along with the args. bind_blk must then call
+              # callable_thing with whatever arguments it needs.
+              # bind_blk can also deal with the return values from callable_thing
+              instance_exec callable_thing, *args, **kwargs, &bind_blk
+            end
+          rescue Retry::RetrySequence => retry_sequence
+            retry_sequence.reduce nil do |_rv, method_name|
+              if method_name == callable_name
+                # we assume that methods with the same name as the original can
+                # receive the same set of arguments as specified in the original
+                # call.
+                send method_name, *args, retries: retries + 1, backoff: retry_sequence.backoff
+              else
+                send method_name, retries: retries + 1, backoff: retry_sequence.backoff
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/backup/backblaze/back_blaze.rb CHANGED

@@ -1,12 +1,8 @@
-require 'excon'
-require 'base64'
-require 'json'
 require 'pathname'
 require_relative 'upload_file.rb'
 require_relative 'upload_large_file.rb'
 require_relative 'account.rb'
-require_relative 'retry.rb'
 require 'backup/storage/base'
@@ -67,26 +63,18 @@ module Backup
           upload =
           if src_pathname.size > working_part_size * 2.5 || src_pathname.size > 5 * 10**9
-            Logger.info "Storing Large '#{dst}'"
-            ::Backup::Backblaze::UploadLargeFile.new \
+            Backblaze::UploadLargeFile.new \
               src: src_pathname,
               dst: dst,
-              authorization_token: account.authorization_token,
-              url: account.api_url,
+              account: account,
               part_size: working_part_size,
               bucket_id: bucket_id
           else
-            Logger.info "Storing '#{dst}'"
-            # TODO could upload several files in parallel with several of these token_provider
-            token_provider = ::Backup::Backblaze::Retry::TokenProvider.new do
-              account.upload_url bucket_id: bucket_id
-            end
-            ::Backup::Backblaze::UploadFile.new \
+            Backblaze::UploadFile.new \
               src: src_pathname.to_s,
               dst: dst,
-              token_provider: token_provider
+              account: account,
+              bucket_id: bucket_id
           end
           hash_wrap = upload.call
@@ -116,7 +104,7 @@ module Backup
         @account ||= begin
           account_deets = {account_id: account_id}
           Logger.info "Account login for #{account_deets.inspect}"
-          ::Backup::Backblaze::Account.new account_id: account_id, app_key: app_key
+          Backblaze::Account.new account_id: account_id, app_key: app_key
         end
       end
     end

data/lib/backup/backblaze/hash_wrap.rb CHANGED

@@ -1,3 +1,5 @@
+require 'json'
 module Backup
   module Backblaze
     # Intended as a quick-n-dirty way to deep-wrap json objects.

data/lib/backup/backblaze/http.rb ADDED

@@ -0,0 +1,132 @@
+module Backup
+  module Backblaze
+    module Http
+      module_function def b2_authorize_account account_id, app_key
+        encoded = Base64.strict_encode64 "#{account_id}:#{app_key}"
+        rsp = Excon.get \
+          'https://api.backblazeb2.com/b2api/v1/b2_authorize_account',
+          headers: {'Authorization' => "Basic #{encoded}"},
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_get_upload_url api_url, auth_headers, bucket_id
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_get_upload_url",
+          headers: auth_headers,
+          body: {bucketId: bucket_id}.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      # upload with incorrect sha1 responds with
+      #
+      # {"code"=>"bad_request", "message"=>"Sha1 did not match data received", "status"=>400}
+      #
+      # Normal response
+      #
+      #{"accountId"=>"d765e276730e",
+      # "action"=>"upload",
+      # "bucketId"=>"dd8786b5eef2c7d66743001e",
+      # "contentLength"=>6144,
+      # "contentSha1"=>"5ba6cf1b3b3a088d73941052f60e78baf05d91fd",
+      # "contentType"=>"application/octet-stream",
+      # "fileId"=>"4_zdd8786b5eef2c7d66743001e_f1096f3027e0b1927_d20180725_m115148_c002_v0001095_t0047",
+      # "fileInfo"=>{"src_last_modified_millis"=>"1532503455580"},
+      # "fileName"=>"test_file",
+      # "uploadTimestamp"=>1532519508000}
+      module_function def b2_upload_file src, headers, url_token
+        rsp = Excon.post \
+          url_token.url,
+          headers: (headers.merge 'Authorization' => url_token.auth),
+          body: (File.read src),
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_list_buckets api_url, auth_headers, body
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_list_buckets",
+          **auth_headers,
+          body: body.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_list_file_names api_url, auth_headers, body
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_list_file_names",
+          **auth_headers,
+          body: body.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      # delete the fileId
+      module_function def b2_delete_file_version api_url, auth_headers, body
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_delete_file_version",
+          **auth_headers,
+          body: body.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_start_large_file api_url, auth_headers, body
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_start_large_file",
+          **auth_headers,
+          body: body.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_get_upload_part_url api_url, auth_headers, file_id
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_get_upload_part_url",
+          **auth_headers,
+          body: {fileId: file_id}.to_json,
+          expects: 200
+        # hash = JSON.parse rsp.body
+        # hash['code'] = 'emergency error'
+        # rsp.body = hash.to_json
+        # rsp.status = 503
+        # raise (Excon::Errors::ServiceUnavailable.new "yer died", nil, rsp)
+        HashWrap.from_json rsp.body
+      end
+      # NOTE Is there a way to stream this instead of loading multiple 100M chunks
+      # into memory? No, backblaze does not allow parts to use chunked encoding.
+      module_function def b2_upload_part upload_url, headers, bytes
+        # Yes, this is a different pattern to the other Excon.post calls ¯\_(ツ)_/¯
+        # Thread.new{sleep 5; exit!}
+        rsp = Excon.post \
+          upload_url,
+          headers: headers,
+          body: bytes,
+          expects: 200
+        # 200 response will be
+        # fileId The unique ID for this file.
+        # partNumber Which part this is.
+        # contentLength The number of bytes stored in the part.
+        # contentSha1 The SHA1 of the bytes stored in the part.
+        HashWrap.from_json rsp.body
+      end
+      module_function def b2_finish_large_file api_url, auth_headers, file_id, shas
+        rsp = Excon.post \
+          "#{api_url}/b2api/v1/b2_finish_large_file",
+          **auth_headers,
+          body: {fileId: file_id, partSha1Array: shas }.to_json,
+          expects: 200
+        HashWrap.from_json rsp.body
+      end
+    end
+  end
+end