backup-backblaze 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 705a6fd53f7173dc6f8209bd8dd889ad49098f43
4
+ data.tar.gz: f633db6bde19d0cba7c95cd84b49225f0fb74647
5
+ SHA512:
6
+ metadata.gz: 6a945e4dc4597f54f5b15056f6cbb5edd6e0f4494f3412951d47dee96aab2c0d173a97b36d1992d36d94302cd26b8c51504845ac97b12ed01588219cbf0f02b6
7
+ data.tar.gz: a6864d6aa9d249b432c4e0ed61f778434ae61ae3f4e6eca2b01319a81b3390c9e7473ebbe63fc6c64020d3c123690fe7aebba9eb17e0756513af85ae3b799906
@@ -0,0 +1,13 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+
11
+ # rspec failure tracking
12
+ .rspec_status
13
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.rvmrc ADDED
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
+ # development environment upon cd'ing into the directory
5
+
6
+ # First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
7
+ # Only full ruby name is supported here, for short names use:
8
+ # echo "rvm use 2.5.0@b2" > .rvmrc
9
+ #environment_id="ruby-2.5.0@b2"
10
+ environment_id="ruby-2.3.6@backup-backblaze"
11
+
12
+ # Uncomment the following lines if you want to verify rvm version per project
13
+ # rvmrc_rvm_version="1.29.3 (master)" # 1.10.1 seems like a safe start
14
+ # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | __rvm_awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
15
+ # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
16
+ # return 1
17
+ # }
18
+
19
+ # First we attempt to load the desired environment directly from the environment
20
+ # file. This is very fast and efficient compared to running through the entire
21
+ # CLI and selector. If you want feedback on which environment was used then
22
+ # insert the word 'use' after --create as this triggers verbose mode.
23
+ if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
24
+ && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
25
+ then
26
+ \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
27
+ for __hook in "${rvm_path:-$HOME/.rvm}/hooks/after_use"*
28
+ do
29
+ if [[ -f "${__hook}" && -x "${__hook}" && -s "${__hook}" ]]
30
+ then \. "${__hook}" || true
31
+ fi
32
+ done
33
+ unset __hook
34
+ if (( ${rvm_use_flag:=1} >= 2 )) # display only when forced
35
+ then
36
+ if [[ $- == *i* ]] # check for interactive shells
37
+ then printf "%b" "Using: $(tput setaf 2 2>/dev/null)$GEM_HOME$(tput sgr0 2>/dev/null)\n" # show the user the ruby and gemset they are using in green
38
+ else printf "%b" "Using: $GEM_HOME\n" # don't use colors in non-interactive shells
39
+ fi
40
+ fi
41
+ else
42
+ # If the environment file has not yet been created, use the RVM CLI to select.
43
+ rvm --create "$environment_id" || {
44
+ echo "Failed to create RVM environment '${environment_id}'."
45
+ return 1
46
+ }
47
+ fi
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.5.0
5
+ before_install: gem install bundler -v 1.15.1
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in backup-backblaze.gemspec
4
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 John Anderson
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,60 @@
1
+ # Backup::Backblaze
2
+
3
+ Plugin for the [Backup](https://github.com/backup/backup) gem to use [Backblaze](https://www.backblaze.com/) as storage.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'backup-backblaze'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install backup-backblaze
20
+
21
+ ## Usage
22
+
23
+ Add a storage block something like this to your usual Backup DSL file:
24
+
25
+ # BackBlaze must be a string here, not a class name. Because it's defined external to Backup gem.
26
+ store_with 'BackBlaze' do |server|
27
+ # from backblaze ui
28
+ server.account_id = 'deadbeefdead'
29
+ server.app_key = 'c27111357f682232c9943f6e63e98f916722c975e4'
30
+
31
+ # bucket name must be globally unique (yes, really).
32
+ # create buckets on the backblaze website. app_key must have access.
33
+ server.bucket = 'your_globally_unique_bucket_name'
34
+
35
+ # path defaults to '/'
36
+ server.path = '/whatever/you_like'
37
+ server.keep = 3
38
+
39
+ # minimum is 5mb, default is 100mb. Leave at default unless you have a good reason.
40
+ # server.part_size = 5000000
41
+ end
42
+
43
+
44
+ ## Development
45
+
46
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
47
+
48
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
49
+
50
+ ## Contributing
51
+
52
+ Bug reports and pull requests are welcome on GitHub at https://github.com/djellemah/backup-backblaze.
53
+
54
+ ## Acknowledgements
55
+
56
+ Thanks to [NETSTOCK](https://www.netstock.co/) for funding development.
57
+
58
+ ## License
59
+
60
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,39 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'backup/backblaze/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'backup-backblaze'
8
+ spec.version = Backup::Backblaze::VERSION
9
+ spec.authors = ['John Anderson']
10
+ spec.email = ['panic@semiosix.com']
11
+
12
+ spec.summary = %q{Backup plugin for BackBlaze}
13
+ spec.description = %q{BackBlaze provides cloud storage. This makes it available to the Backup gem.}
14
+ spec.homepage = 'http://github.com/djellemah/backup-backblaze'
15
+ spec.license = 'MIT'
16
+
17
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
19
+ if spec.respond_to?(:metadata)
20
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
21
+ else
22
+ raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.'
23
+ end
24
+
25
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
26
+ f.match(%r{^(test|spec|features)/})
27
+ end
28
+ spec.bindir = 'exe'
29
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ['lib']
31
+
32
+ spec.add_development_dependency 'bundler', '~> 1.15'
33
+ spec.add_development_dependency 'rake', '~> 10.0'
34
+ spec.add_development_dependency "rspec", "~> 3.0"
35
+ spec.add_development_dependency 'pry'
36
+
37
+ spec.add_dependency 'backup'
38
+ spec.add_dependency 'excon'
39
+ end
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "backup/backblaze"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting # with your gem easier.
7
+ require "pry"
8
+ Pry.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,9 @@
1
+ require 'backup'
2
+
3
+ module Backup
4
+ module Backblaze
5
+ end
6
+ end
7
+
8
+ require_relative 'backblaze/version'
9
+ require_relative 'backblaze/back_blaze'
@@ -0,0 +1,160 @@
1
+ require 'base64'
2
+ require 'excon'
3
+ require 'json'
4
+
5
+ require_relative 'hash_wrap'
6
+
7
+ module Backup
8
+ module Backblaze
9
+ class Account
10
+ def initialize account_id:, app_key:
11
+ @account_id = account_id
12
+ @app_key = app_key
13
+ auth!
14
+ end
15
+
16
+ attr_reader :account_id, :app_key, :body
17
+
18
+ class NotFound < RuntimeError; end
19
+
20
+ def auth!
21
+ # first call b2_authorize_account to get an account_auth_token
22
+ encoded = Base64.strict_encode64 "#{account_id}:#{app_key}"
23
+ rsp = Excon.get \
24
+ 'https://api.backblazeb2.com/b2api/v1/b2_authorize_account',
25
+ headers: {'Authorization' => "Basic #{encoded}"},
26
+ expects: 200
27
+
28
+ # this has to stick around because it has various important data
29
+ @body = HashWrap.from_json rsp.body
30
+
31
+ unless body.allowed.capabilities.include? 'writeFiles'
32
+ raise "app_key #{app_key} does not have write access to account #{account_id}"
33
+ end
34
+ end
35
+
36
+ def auth_headers
37
+ {headers: {'Authorization' => authorization_token}}
38
+ end
39
+
40
+ def api_url
41
+ body.apiUrl or raise NotFound, 'apiUrl'
42
+ end
43
+
44
+ def authorization_token
45
+ body.authorizationToken or raise NotFound, 'authorizationToken'
46
+ end
47
+
48
+ def minimum_part_size
49
+ # why b2 has this as well as minimumPartSize ¯\_(ツ)_/¯
50
+ body.absoluteMinimumPartSize
51
+ end
52
+
53
+ def recommended_part_size
54
+ body.recommendedPartSize
55
+ end
56
+
57
+ # The following is leaning towards Bucket.new account, bucket_id/bucket_name
58
+
59
+ # returns [upload_url, auth_token]
60
+ # Several files can be uploaded to one url.
61
+ # But uploading files in parallel requires one upload url per thread.
62
+ def upload_url bucket_id:
63
+ # get the upload url for a specific bucket id. Buckets can be named.
64
+ body = {bucketId: bucket_id }
65
+ rsp = Excon.post \
66
+ "#{api_url}/b2api/v1/b2_get_upload_url",
67
+ **auth_headers,
68
+ body: body.to_json,
69
+ expects: 200
70
+
71
+ hw = HashWrap.from_json rsp.body
72
+ return hw.uploadUrl, hw.authorizationToken
73
+ end
74
+
75
+ # return id for given name, or nil if no such named bucket
76
+ def bucket_id bucket_name:
77
+ rsp = Excon.post \
78
+ "#{api_url}/b2api/v1/b2_list_buckets",
79
+ **auth_headers,
80
+ body: {bucketName: bucket_name, accountId: account_id}.to_json,
81
+ expects: 200
82
+
83
+ buckets = (JSON.parse rsp.body)['buckets']
84
+ found = buckets.find do |ha|
85
+ ha['bucketName'] == bucket_name
86
+ end
87
+ found&.dig 'bucketId' or raise NotFound, "no bucket named #{bucket_name}"
88
+ end
89
+
90
+ # Hurhur
91
+ def bucket_list bucket_id: nil
92
+ b2_list_buckets bucketId: bucket_id, accountId: account_id
93
+ end
94
+
95
+ def b2_list_buckets body
96
+ rsp = Excon.post \
97
+ "#{api_url}/b2api/v1/b2_list_buckets",
98
+ **auth_headers,
99
+ body: body.select{|_,v|v}.to_json,
100
+ expects: 200
101
+
102
+ HashWrap.from_json rsp
103
+ end
104
+
105
+ # This might be dangerous because large number of file names might come back.
106
+ # But I'm not worrying about that now. Maybe later. Anyway, that's what
107
+ # nextFile and startFile are for.
108
+ def files bucket_name
109
+ rsp = Excon.post \
110
+ "#{api_url}/b2api/v1/b2_list_file_names",
111
+ **auth_headers,
112
+ body: {bucketId: (bucket_id bucket_name: bucket_name)}.to_json,
113
+ expects: 200
114
+
115
+ # ignoring the top-level {files:, nextFileName:} structure
116
+ files_hash = (JSON.parse rsp.body)['files']
117
+
118
+ # ignoring the top-level {files:, nextFileName:} structure
119
+ files_hash.map do |file_info_hash|
120
+ HashWrap.new file_info_hash
121
+ end
122
+ end
123
+
124
+ # This is mostly used to get a fileId for a given fileName
125
+ def file_info bucket_name, filename
126
+ # It's too much of a PITA to make this Excon call in only one place
127
+ rsp = Excon.post \
128
+ "#{api_url}/b2api/v1/b2_list_file_names",
129
+ **auth_headers,
130
+ body: {bucketId: (bucket_id bucket_name: bucket_name), maxFileCount: 1, startFileName: filename}.to_json,
131
+ expects: 200
132
+
133
+ files_hash = (JSON.parse rsp.body)['files']
134
+
135
+ raise NotFound, "#{filename} not found" unless files_hash.size == 1
136
+
137
+ HashWrap.new files_hash.first
138
+ end
139
+
140
+ # delete the named file in the named bucket
141
+ # https://www.backblaze.com/b2/docs/b2_delete_file_version.html
142
+ def delete_file bucket_name, filename
143
+ # lookup fileId from given filename
144
+ info = file_info bucket_name, filename
145
+
146
+ # delete the fileId
147
+ Excon.post \
148
+ "#{api_url}/b2api/v1/b2_delete_file_version",
149
+ **auth_headers,
150
+ body: {fileName: filename, fileId: info.fileId}.to_json,
151
+ expects: 200
152
+
153
+ # ignore 400 with body containing "code": "file_not_present"
154
+ rescue Excon::Errors::BadRequest => ex
155
+ hw = HashWrap.from_json ex.response.body
156
+ raise unless hw.code == 'file_not_present'
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,124 @@
1
+ require 'excon'
2
+ require 'base64'
3
+ require 'json'
4
+ require 'pathname'
5
+
6
+ require_relative 'upload_file.rb'
7
+ require_relative 'upload_large_file.rb'
8
+ require_relative 'account.rb'
9
+ require_relative 'retry.rb'
10
+
11
+ require 'backup/storage/base'
12
+
13
+ # module naming like this is required by Backup to find the storage
14
+ module Backup
15
+ module Storage
16
+ # Different naming to module
17
+ class BackBlaze < Base
18
+ include Backup::Storage::Cycler
19
+
20
+ class ConfigurationError < Backup::Error; end
21
+
22
+ # Values specified in Model DSL:
23
+ # - API credentials
24
+ # - bucket name
25
+ REQUIRED_ATTRS = %i[account_id app_key bucket]
26
+ attr_accessor *REQUIRED_ATTRS
27
+
28
+ # - part size for large files
29
+ attr_accessor :part_size
30
+
31
+ def initialize(model, storage_id = nil)
32
+ super
33
+ @path ||= '/'
34
+ check_configuration
35
+ end
36
+
37
+ protected
38
+
39
+ def check_configuration
40
+ not_specified = REQUIRED_ATTRS.reject{|name| send name}
41
+ if not_specified.any?
42
+ raise ConfigurationError, "#{not_specified.join(", ")} required"
43
+ end
44
+
45
+ if part_size && part_size < account.minimum_part_size
46
+ raise ConfigurationError, "part_size must be > #{account.minimum_part_size}"
47
+ end
48
+ end
49
+
50
+ def remote_relative_pathname
51
+ @remote_relative_pathname ||= Pathname.new(remote_path).relative_path_from(root)
52
+ end
53
+
54
+ def root; @root ||= Pathname.new '/'; end
55
+ def tmp_dir; @tmp_dir ||= Pathname.new Config.tmp_path; end
56
+
57
+ def working_part_size
58
+ @working_part_size ||= part_size || account.recommended_part_size
59
+ end
60
+
61
+ def transfer!
62
+ bucket_id = account.bucket_id bucket_name: bucket
63
+
64
+ package.filenames.each do |filename|
65
+ dst = (remote_relative_pathname + filename).to_s
66
+ src_pathname = tmp_dir + filename
67
+
68
+ upload =
69
+ if src_pathname.size > working_part_size * 2.5 || src_pathname.size > 5 * 10**9
70
+ Logger.info "Storing Large '#{dst}'"
71
+ ::Backup::Backblaze::UploadLargeFile.new \
72
+ src: src_pathname,
73
+ dst: dst,
74
+ authorization_token: account.authorization_token,
75
+ url: account.api_url,
76
+ part_size: working_part_size,
77
+ bucket_id: bucket_id
78
+ else
79
+ Logger.info "Storing '#{dst}'"
80
+
81
+ # TODO could upload several files in parallel with several of these token_provider
82
+ token_provider = ::Backup::Backblaze::Retry::TokenProvider.new do
83
+ account.upload_url bucket_id: bucket_id
84
+ end
85
+
86
+ ::Backup::Backblaze::UploadFile.new \
87
+ src: src_pathname.to_s,
88
+ dst: dst,
89
+ token_provider: token_provider
90
+ end
91
+
92
+ hash_wrap = upload.call
93
+
94
+ Logger.info "'#{dst}' stored at #{hash_wrap.fileName}"
95
+ end
96
+ end
97
+
98
+ # Called by the Cycler.
99
+ # Any error raised will be logged as a warning.
100
+ def remove!(package)
101
+ Logger.info "Removing backup package dated #{package.time}"
102
+
103
+ # workaround for stoopid design in Backup
104
+ package_remote_relative_pathname = Pathname.new(remote_path(package)).relative_path_from(root)
105
+
106
+ package.filenames.each do |filename|
107
+ dst = (package_remote_relative_pathname + filename).to_s
108
+ Logger.info "Removing file #{dst}"
109
+ account.delete_file bucket, dst
110
+ end
111
+ end
112
+
113
+ protected
114
+
115
+ def account
116
+ @account ||= begin
117
+ account_deets = {account_id: account_id}
118
+ Logger.info "Account login for #{account_deets.inspect}"
119
+ ::Backup::Backblaze::Account.new account_id: account_id, app_key: app_key
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,43 @@
1
+ module Backup
2
+ module Backblaze
3
+ # Intended as a quick-n-dirty way to deep-wrap json objects.
4
+ # If it doesn't work for you, rather than scope-creeping this consider: Hash, OpenStruct, a class, etc.
5
+ class HashWrap
6
+ def initialize( hash )
7
+ @hash = hash
8
+ end
9
+
10
+ def method_missing(meth, *args, &blk)
11
+ value = @hash.fetch meth.to_s do |_key|
12
+ @hash.fetch meth do |_key|
13
+ super
14
+ end
15
+ end
16
+ __wrap value
17
+ end
18
+
19
+ private def __wrap value
20
+ case value
21
+ when Hash
22
+ self.class.new value
23
+ when Array
24
+ value.map do |item|
25
+ __wrap item
26
+ end
27
+ else
28
+ value
29
+ end
30
+ end
31
+
32
+ def to_h
33
+ # no, you can't have a copy of this hash to mess with
34
+ @hash.dup
35
+ end
36
+
37
+ # really a convenience method
38
+ def self.from_json json
39
+ new JSON.parse json
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,78 @@
1
+ module Backup
2
+ module Backblaze
3
+ module Retry
4
+ MAX_RETRIES = 3
5
+
6
+ # Use the url and token returned by the next_url_token block, until we get
7
+ # a reset indicating that we need a new url and token.
8
+ class TokenProvider
9
+ def initialize &next_url_token
10
+ @next_url_token = next_url_token
11
+ reset
12
+ end
13
+
14
+ attr_reader :upload_url, :file_auth_token
15
+
16
+ def reset
17
+ @upload_url, @file_auth_token = @next_url_token.call
18
+ self
19
+ end
20
+ end
21
+
22
+ class TooManyRetries < RuntimeError; end
23
+
24
+ # Try up to retries times to call the upload_blk. Recursive.
25
+ #
26
+ # Various errors (passed through from Excon) coming out of upload_blk will
27
+ # be caught. When an error is caught, :reset method called on
28
+ # token_provider.
29
+ #
30
+ # Return whatever upload_blk returns
31
+ def retry_upload retries, token_provider, &upload_blk
32
+ raise TooManyRetries, "max retries is #{MAX_RETRIES}" unless retries < MAX_RETRIES
33
+ sleep retries ** 2 # exponential backoff for retries > 0
34
+
35
+ # Called by all the rescue blocks that want to retry.
36
+ # Mainly so we don't make stoopid errors - like leaving out the +1 for one of the calls :-|
37
+ retry_lambda = lambda do
38
+ retry_upload retries + 1, token_provider.reset, &upload_blk
39
+ end
40
+
41
+ begin
42
+ upload_blk.call token_provider, retries
43
+ rescue Excon::Errors::Error => ex
44
+ # The most convenient place to log this
45
+ Backup::Logger.info ex.message
46
+ raise
47
+ end
48
+
49
+ # Recoverable errors details sourced from:
50
+ # https://www.backblaze.com/b2/docs/integration_checklist.html
51
+ # https://www.backblaze.com/b2/docs/uploading.html
52
+
53
+ # socket-related, 408, and 429
54
+ rescue Excon::Errors::SocketError, Excon::Errors::Timeout, Excon::Errors::RequestTimeout, Excon::Errors::TooManyRequests
55
+ retry_lambda.call
56
+
57
+ # some 401
58
+ rescue Excon::Errors::Unauthorized => ex
59
+ hw = HashWrap.from_json ex.response.body
60
+ case hw.code
61
+ when 'bad_auth_token', 'expired_auth_token'
62
+ retry_lambda.call
63
+ else
64
+ raise
65
+ end
66
+
67
+ # 500-599 where the BackBlaze "code" doesn't matter
68
+ rescue Excon::Errors::HTTPStatusError => ex
69
+ if (500..599) === ex.response.status
70
+ retry_lambda.call
71
+ else
72
+ raise
73
+ end
74
+
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,111 @@
1
+ require 'digest'
2
+
3
+ require_relative 'retry.rb'
4
+
5
+ module Backup
6
+ module Backblaze
7
+ # calculates sha1 and uploads file
8
+ # Of course, this entire class is an atomic failure, because the underlying file could change at any point.
9
+ #
10
+ # dst can contain / for namespaces
11
+ class UploadFile
12
+ def initialize src:, dst:, token_provider:, content_type: nil
13
+ @src = src
14
+ @dst = dst
15
+ @content_type = content_type
16
+ @token_provider = token_provider
17
+ end
18
+
19
+ attr_reader :src, :dst, :token_provider, :content_type
20
+
21
+ def headers
22
+ # headers all have to be strings, otherwise excon & Net::HTTP choke :-|
23
+ {
24
+ 'X-Bz-File-Name' => (URI.encode dst.encode 'UTF-8'),
25
+ 'X-Bz-Content-Sha1' => sha1_digest,
26
+ 'Content-Length' => content_length.to_s,
27
+ 'Content-Type' => content_type,
28
+
29
+ # optional
30
+ 'X-Bz-Info-src_last_modified_millis' => last_modified_millis.to_s,
31
+ 'X-Bz-Info-b2-content-disposition' => content_disposition,
32
+ }.select{|k,v| v}
33
+ end
34
+
35
+ def content_type
36
+ @content_type || 'b2/x-auto'
37
+ end
38
+
39
+ # No idea what has to be in here
40
+ def content_disposition
41
+ end
42
+
43
+ def content_length
44
+ File.size src
45
+ end
46
+
47
+ def sha1
48
+ @sha1 = Digest::SHA1.file src
49
+ end
50
+
51
+ def sha1_digest
52
+ @sha1_digest = sha1.hexdigest
53
+ end
54
+
55
+ def last_modified_millis
56
+ @last_modified_millis ||= begin
57
+ time = File.lstat(src).mtime
58
+ time.tv_sec * 1000 + time.tv_usec / 1000
59
+ end
60
+ end
61
+
62
+ include Retry
63
+
64
+ # upload with incorrect sha1 responds with
65
+ #
66
+ # {"code"=>"bad_request", "message"=>"Sha1 did not match data received", "status"=>400}
67
+ #
68
+ # Normal response
69
+ #
70
+ #{"accountId"=>"d765e276730e",
71
+ # "action"=>"upload",
72
+ # "bucketId"=>"dd8786b5eef2c7d66743001e",
73
+ # "contentLength"=>6144,
74
+ # "contentSha1"=>"5ba6cf1b3b3a088d73941052f60e78baf05d91fd",
75
+ # "contentType"=>"application/octet-stream",
76
+ # "fileId"=>"4_zdd8786b5eef2c7d66743001e_f1096f3027e0b1927_d20180725_m115148_c002_v0001095_t0047",
77
+ # "fileInfo"=>{"src_last_modified_millis"=>"1532503455580"},
78
+ # "fileName"=>"test_file",
79
+ # "uploadTimestamp"=>1532519508000}
80
+ def call
81
+ retry_upload 0, token_provider do |token_provider, retries|
82
+ Backup::Logger.info "#{src} retry #{retries}"
83
+ rsp = Excon.post \
84
+ token_provider.upload_url,
85
+ headers: (headers.merge 'Authorization' => token_provider.file_auth_token),
86
+ body: (File.read src),
87
+ expects: 200
88
+
89
+ HashWrap.from_json rsp.body
90
+ end
91
+ end
92
+
93
+ # Seems this doesn't work. Fails with
94
+ #
95
+ # 400 Missing header: Content-Length
96
+ #
97
+ # Probably because chunked encoding doesn't send an initial Content-Length
98
+ private def excon_stream_upload( upload )
99
+ File.open src do |io|
100
+ chunker = lambda do
101
+ # Excon.defaults[:chunk_size] defaults to 1048576, ie 1MB
102
+ # to_s will convert the nil received after everything is read to the final empty chunk
103
+ io.read(Excon.defaults[:chunk_size]).to_s
104
+ end
105
+
106
+ Excon.post url, headers: headers, :request_block => chunker, debug_request: true, debug_response: true, instrumentor: Excon::StandardInstrumentor
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,173 @@
1
+ require 'digest'
2
+ require_relative 'hash_wrap'
3
+ require_relative 'retry'
4
+
5
+ module Backup
6
+ module Backblaze
7
+ # Upload a large file in several parts.
8
+ class UploadLargeFile
9
+ # src is a Pathname
10
+ # dst is a String
11
+ def initialize src:, dst:, authorization_token:, content_type: nil, url:, part_size:, bucket_id:
12
+ @src = src
13
+ @dst = dst
14
+ @authorization_token = authorization_token
15
+ @content_type = content_type
16
+ @url = url
17
+ @part_size = part_size
18
+ @bucket_id = bucket_id
19
+ end
20
+
21
+ attr_reader :src, :dst, :authorization_token, :url, :content_type, :part_size, :bucket_id
22
+
23
+ # same as account
24
+ def auth_headers
25
+ # only cos the double {{}} is a quite ugly :-p
26
+ Hash headers: {'Authorization' => authorization_token}
27
+ end
28
+
29
+ def content_type
30
+ @content_type || 'b2/x-auto'
31
+ end
32
+
33
+ # No idea what has to be in here
34
+ def content_disposition
35
+ end
36
+
37
+ def content_length
38
+ src.size
39
+ end
40
+
41
+ def last_modified_millis
42
+ @last_modified_millis ||= begin
43
+ time = File.lstat(src).mtime
44
+ time.tv_sec * 1000 + time.tv_usec / 1000
45
+ end
46
+ end
47
+
48
+ # https://www.backblaze.com/b2/docs/b2_start_large_file.html
49
+ # definitely need fileInfo back from this. Maybe also uploadTimestamp not sure yet.
50
+ def b2_start_large_file
51
+ # Unlike in UploadFile, it's OK to use symbols here cos to_json converts them to strings
52
+ body = {
53
+ bucketId: bucket_id,
54
+ fileName: dst,
55
+ contentType: content_type,
56
+ fileInfo: {
57
+ src_last_modified_millis: last_modified_millis.to_s,
58
+ 'b2-content-disposition': content_disposition
59
+ # this seems to be optional, and is hard to calculate for large file up
60
+ # front. So don't send it.
61
+ # large_file_sha1: sha1_digest,
62
+ }.select{|k,v| v}
63
+ }
64
+
65
+ rsp = Excon.post \
66
+ "#{url}/b2api/v1/b2_start_large_file",
67
+ **auth_headers,
68
+ body: body.to_json,
69
+ expects: 200
70
+
71
+ HashWrap.from_json rsp.body
72
+ end
73
+
74
+ def file_id
75
+ @file_id ||= b2_start_large_file.fileId
76
+ end
77
+
78
+ def b2_get_upload_part_url
79
+ rsp = Excon.post \
80
+ "#{url}/b2api/v1/b2_get_upload_part_url",
81
+ **auth_headers,
82
+ body: {fileId: file_id}.to_json,
83
+ expects: 200
84
+
85
+ hash = JSON.parse rsp.body
86
+ return hash.values_at 'uploadUrl', 'authorizationToken'
87
+ end
88
+
89
+ # NOTE Is there a way to stream this instead of loading multiple 100M chunks
90
+ # into memory? No, backblaze does not allow parts to use chunked encoding.
91
+ def b2_upload_part sequence, upload_url, file_auth_token, &log_block
92
+ # read length, offset
93
+ bytes = src.read part_size, part_size * sequence
94
+
95
+ # return nil if the read comes back as a nil, ie no bytes read
96
+ return if bytes.nil? || bytes.empty?
97
+
98
+ # This is a bit weird. But not so weird that it needs fixing.
99
+ log_block.call
100
+
101
+ headers = {
102
+ # not the same as the auth_headers value
103
+ 'Authorization' => file_auth_token,
104
+ # cos backblaze wants 1-based, but we want 0-based for reading file
105
+ 'X-Bz-Part-Number' => sequence + 1,
106
+ 'Content-Length' => bytes.length,
107
+ 'X-Bz-Content-Sha1' => (sha = Digest::SHA1.hexdigest bytes),
108
+ }
109
+
110
+ # Yes, this is a different pattern to the other Excon.post calls ¯\_(ツ)_/¯
111
+ rsp = Excon.post \
112
+ upload_url,
113
+ headers: headers,
114
+ body: bytes,
115
+ expects: 200
116
+
117
+ # 200 response will be
118
+ # fileId The unique ID for this file.
119
+ # partNumber Which part this is.
120
+ # contentLength The number of bytes stored in the part.
121
+ # contentSha1 The SHA1 of the bytes stored in the part.
122
+
123
+ # return for the sha collection
124
+ sha
125
+ end
126
+
127
+ def b2_finish_large_file shas
128
+ rsp = Excon.post \
129
+ "#{url}/b2api/v1/b2_finish_large_file",
130
+ **auth_headers,
131
+ body: {fileId: file_id, partSha1Array: shas }.to_json,
132
+ expects: 200
133
+
134
+ HashWrap.from_json rsp.body
135
+ end
136
+
137
+ # 10000 is backblaze specified max number of parts
138
+ MAX_PARTS = 10000
139
+
140
+ include Retry
141
+
142
+ def call
143
+ if src.size > part_size * MAX_PARTS
144
+ raise Error, "File #{src.to_s} has size #{src.size} which is larger than part_size * MAX_PARTS #{part_size * MAX_PARTS}. Try increasing part_size in model."
145
+ end
146
+
147
+ # TODO could have multiple threads here, each would need a separate token_provider
148
+ token_provider = TokenProvider.new &method(:b2_get_upload_part_url)
149
+ shas = (0...MAX_PARTS).each_with_object [] do |sequence, shas|
150
+ sha = retry_upload 0, token_provider do |token_provider, retries|
151
+ # return sha
152
+ b2_upload_part sequence, token_provider.upload_url, token_provider.file_auth_token do
153
+ Backup::Logger.info "#{src} trying part #{sequence + 1} of #{(src.size / part_size.to_r).ceil} retry #{retries}"
154
+ end
155
+ end
156
+
157
+ # sha will come back as nil once the file is done.
158
+ if sha
159
+ shas << sha
160
+ Backup::Logger.info "#{src} stored part #{sequence + 1} with #{sha}"
161
+ else
162
+ break shas
163
+ end
164
+ end
165
+
166
+ # finish up, log and return the response
167
+ hash_wrap = b2_finish_large_file shas
168
+ Backup::Logger.info "#{src} finished"
169
+ hash_wrap
170
+ end
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,5 @@
1
+ module Backup
2
+ module Backblaze
3
+ VERSION = '0.1.2'
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: backup-backblaze
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ platform: ruby
6
+ authors:
7
+ - John Anderson
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-08-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.15'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.15'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: backup
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: excon
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: BackBlaze provides cloud storage. This makes it available to the Backup
98
+ gem.
99
+ email:
100
+ - panic@semiosix.com
101
+ executables: []
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - ".gitignore"
106
+ - ".rspec"
107
+ - ".rvmrc"
108
+ - ".travis.yml"
109
+ - Gemfile
110
+ - LICENSE.txt
111
+ - README.md
112
+ - Rakefile
113
+ - backup-backblaze.gemspec
114
+ - bin/console
115
+ - bin/setup
116
+ - lib/backup/backblaze.rb
117
+ - lib/backup/backblaze/account.rb
118
+ - lib/backup/backblaze/back_blaze.rb
119
+ - lib/backup/backblaze/hash_wrap.rb
120
+ - lib/backup/backblaze/retry.rb
121
+ - lib/backup/backblaze/upload_file.rb
122
+ - lib/backup/backblaze/upload_large_file.rb
123
+ - lib/backup/backblaze/version.rb
124
+ homepage: http://github.com/djellemah/backup-backblaze
125
+ licenses:
126
+ - MIT
127
+ metadata:
128
+ allowed_push_host: https://rubygems.org
129
+ post_install_message:
130
+ rdoc_options: []
131
+ require_paths:
132
+ - lib
133
+ required_ruby_version: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ required_rubygems_version: !ruby/object:Gem::Requirement
139
+ requirements:
140
+ - - ">="
141
+ - !ruby/object:Gem::Version
142
+ version: '0'
143
+ requirements: []
144
+ rubyforge_project:
145
+ rubygems_version: 2.6.14
146
+ signing_key:
147
+ specification_version: 4
148
+ summary: Backup plugin for BackBlaze
149
+ test_files: []