smart_s3_sync 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 04cc107694aecec62f525ff7afd6f5b51610f39d
4
+ data.tar.gz: 31fbf52ab4201430ab60e81827dafca121bf468e
5
+ SHA512:
6
+ metadata.gz: 14ffb04946ac2294b21ea0641d4f67c6e490b4137760f9f332ad8984ac6d686256b5eb0861f96a55a3f212ee3831ef713816914cfb67f549182e56685f5995c3
7
+ data.tar.gz: feebb0f4f47a06cbc869588cc6a4c46226513c19f4eeb6d3e70994a4acfa870e72177d233a990e8a7d1e4b9086f57d65ceb50ef540373ed1eb5e74067b0866f2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in s5.gemspec
4
+ gemspec
@@ -0,0 +1,52 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ smart_s3_sync (0.0.1)
5
+ fog
6
+ sqlite3
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ builder (3.2.2)
12
+ excon (0.37.0)
13
+ fog (1.22.1)
14
+ fog-brightbox
15
+ fog-core (~> 1.22)
16
+ fog-json
17
+ ipaddress (~> 0.5)
18
+ nokogiri (~> 1.5, >= 1.5.11)
19
+ fog-brightbox (0.1.1)
20
+ fog-core (~> 1.22)
21
+ fog-json
22
+ inflecto
23
+ fog-core (1.22.0)
24
+ builder
25
+ excon (~> 0.33)
26
+ formatador (~> 0.2)
27
+ mime-types
28
+ net-scp (~> 1.1)
29
+ net-ssh (>= 2.1.3)
30
+ fog-json (1.0.0)
31
+ multi_json (~> 1.0)
32
+ formatador (0.2.5)
33
+ inflecto (0.0.2)
34
+ ipaddress (0.8.0)
35
+ mime-types (2.3)
36
+ mini_portile (0.6.0)
37
+ multi_json (1.10.1)
38
+ net-scp (1.2.1)
39
+ net-ssh (>= 2.6.5)
40
+ net-ssh (2.9.1)
41
+ nokogiri (1.6.2.1)
42
+ mini_portile (= 0.6.0)
43
+ rake (10.3.2)
44
+ sqlite3 (1.3.9)
45
+
46
+ PLATFORMS
47
+ ruby
48
+
49
+ DEPENDENCIES
50
+ bundler (~> 1.3)
51
+ rake
52
+ smart_s3_sync!
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Chris Rhoden
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ # SmartS3Sync
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'smart_s3_sync'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install smart_s3_sync
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/s5 ADDED
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'smart_s3_sync'
4
+ require 'uri'
5
+
6
+ def quit(message)
7
+ STDERR.puts message
8
+ exit 1
9
+ end
10
+
11
+ if ARGV.length != 2
12
+ quit "Usage: #{File.basename($0)} REMOTE_URI LOCAL_DIR"
13
+ end
14
+
15
+ remote_uri, local_dir = ARGV
16
+
17
+ remote_uri = URI.parse(remote_uri)
18
+
19
+ quit "Only s3 is currently supported" if remote_uri.scheme.downcase != 's3'
20
+
21
+ bucket = remote_uri.host
22
+ prefix = remote_uri.path.sub('/','')
23
+
24
+ if remote_uri.userinfo
25
+ access_key_id = URI.decode(remote_uri.user).to_s
26
+ access_key_secret = URI.decode(remote_uri.password).to_s
27
+ else
28
+ access_key_id = ENV['S3_ACCESS_KEY_ID']
29
+ access_key_secret = ENV['S3_SECRET_ACCESS_KEY']
30
+ end
31
+
32
+ if !(access_key_id && access_key_secret)
33
+ quit "S3_ACCESS_KEY_ID and S3_SECRET_ACCESS_KEY environment variables are required"
34
+ end
35
+
36
+ SmartS3Sync.sync(local_dir, bucket, {
37
+ provider: 'AWS',
38
+ aws_access_key_id: access_key_id,
39
+ aws_secret_access_key: access_key_secret,
40
+ endpoint: 'http://s3.amazonaws.com'
41
+ }, prefix)
@@ -0,0 +1,35 @@
1
+ require 'smart_s3_sync/version'
2
+ require 'smart_s3_sync/file_table'
3
+ require 'fog'
4
+
5
+ module SmartS3Sync
6
+
7
+ def self.sync(dir, remote_dir, connection_options, remote_prefix=nil)
8
+ table = FileTable.new(dir, remote_prefix)
9
+
10
+ bucket = Fog::Storage.new(connection_options).directories.
11
+ get(remote_dir, {:prefix => remote_prefix})
12
+
13
+
14
+ # Add all files in the cloud to our map.
15
+ bucket.files.each { |file| table.push(file) }
16
+
17
+ # And copy them to the right places
18
+ table.copy!(bucket)
19
+
20
+ # sweep through and remove all files not in the cloud
21
+ Dir[File.join(dir, '**/*')].each do |file|
22
+ if !File.directory?(file)
23
+ File.unlink(file) unless table.keep?(file)
24
+ end
25
+ end
26
+
27
+ # and then all empty directories
28
+ Dir[File.join(dir, '**/*')].each do |file|
29
+ if File.directory?(file) && Dir.entries(file).length == 0
30
+ Dir.rmdir(file)
31
+ end
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,40 @@
1
+ require 'smart_s3_sync/file_target'
2
+
3
+ module SmartS3Sync
4
+ class FileTable
5
+ def initialize(root, prefix=nil)
6
+ @map = {} # map of hashes to file destinations
7
+ @files = [] # single list of files to keep
8
+ @root = File.expand_path(root) # root file destination
9
+ @prefix = prefix ? prefix.gsub(/(?:^\/)|(?:\/$)/,'') : ''
10
+ end
11
+
12
+ def push(fog_file)
13
+ digest = hash_key(fog_file) # pull cloud calculated hex digest from file
14
+ @map[digest] ||= FileTarget.new(digest, fog_file.key) # grab or create target
15
+ destination_filename = File.expand_path(strip_prefix(fog_file.key), @root) # calculate local path
16
+ @files.push destination_filename # add local path to global list of files to keep
17
+ @map[digest].add_destination(destination_filename) # and add local path to the target
18
+ end
19
+
20
+ def copy!(fog_dir)
21
+ @map.each do |(k, target)|
22
+ target.copy!(fog_dir)
23
+ end
24
+ end
25
+
26
+ def keep?(filename)
27
+ @files.include?(filename)
28
+ end
29
+
30
+ private
31
+
32
+ def hash_key(fog_file)
33
+ (fog_file.content_md5 || fog_file.etag).intern # these should be equivalent
34
+ end
35
+
36
+ def strip_prefix(key)
37
+ key.sub(/^#{@prefix}\/?/, '')
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,110 @@
1
+ require 'tempfile'
2
+
3
+ module SmartS3Sync
4
+ class FileTarget
5
+ attr_reader :digest, :remote_filename, :local_source, :destinations
6
+
7
+ def initialize(digest, remote_filename)
8
+ @digest = digest
9
+ @remote_filename = remote_filename
10
+ @local_source = nil
11
+ @destinations = []
12
+ end
13
+
14
+ def add_destination(file)
15
+ unless destinations.include?(file)
16
+ # If we already have a local file with the right checksum,
17
+ # we don't add it to the list of destinations and instead
18
+ # mark it as a local source.
19
+ if File.exists?(file) && file_hash(file) == digest.to_s
20
+ puts "#{file} is up to date"
21
+ @local_source = file
22
+ else
23
+ destinations.push(file)
24
+ end
25
+ end
26
+ end
27
+
28
+ def copy!(fog_dir)
29
+ # If every copy in the cloud is already present, the
30
+ # number of destinations will be 0 - there is no work
31
+ # left to do.
32
+ if destinations.length > 0
33
+ if local_source.nil? # we prefer to not have to download
34
+ copy_from_fog(fog_dir)
35
+ else
36
+ copy_from_local(source)
37
+ end
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def copy_from_fog(fog_dir)
44
+ puts "Downloading #{remote_filename}."
45
+ tries = 0
46
+ file = nil
47
+ begin
48
+ file = download(fog_dir) # basically, just try.
49
+
50
+ if file_hash(file.path) != digest.to_s
51
+ raise "Hash mismatch downloading #{remote_filename}"
52
+ end
53
+
54
+ copy_from_local(file.path) # with a copy locally, the job is the same
55
+ @local_source = destinations.shift # we now have a local copy!
56
+ rescue StandardError => e
57
+ if tries < 5
58
+ tries += 1
59
+ puts e
60
+ puts "retrying"
61
+ retry
62
+ else
63
+ raise e
64
+ end
65
+ ensure
66
+ file.close(true) unless file.nil?
67
+ end
68
+ end
69
+
70
+ def copy_from_local(source)
71
+ destinations.each do |dest|
72
+ FileUtils.mkdir_p(File.dirname(dest))
73
+ FileUtils.ln(source, dest, :force => true)
74
+ end
75
+ end
76
+
77
+ def file_hash(path)
78
+ Digest::MD5.file(path).hexdigest
79
+ end
80
+
81
+ def download(fog_dir)
82
+ file = Tempfile.new(digest.to_s)
83
+ done = 0
84
+ now = Time.now.to_i
85
+
86
+ fog_dir.files.get(remote_filename) do |chunk, left, total|
87
+ if (chunk.bytes.size + left == total) # fog might restart in the middle
88
+ file.rewind
89
+ if done !=0
90
+ puts " ERROR ... retrying"
91
+ done = 0
92
+ end
93
+ end
94
+
95
+ file.write chunk
96
+ (((1 - (left.to_f / total)) * 50).to_i - done).times do
97
+ done += 1
98
+ print "#"
99
+ end
100
+ if done == 50
101
+ done = total / 1048576.to_f
102
+ end
103
+ end
104
+
105
+ puts " #{((done / [Time.now.to_i - now, 0.5].max) * 100).to_i / 100.0} MB/s"
106
+ file.close
107
+ file
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,3 @@
1
+ module SmartS3Sync
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'smart_s3_sync/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "smart_s3_sync"
8
+ spec.version = SmartS3Sync::VERSION
9
+ spec.authors = ["Chris Rhoden"]
10
+ spec.email = ["chris@prx.org"]
11
+ spec.description = %q{Intelligent syncing from Cloud providers when duplicate content abounds.}
12
+ spec.summary = %q{Intelligent syncing from Cloud providers when duplicate content abounds.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "fog"
22
+ spec.add_dependency "sqlite3"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "rake"
26
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: smart_s3_sync
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Chris Rhoden
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fog
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: sqlite3
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Intelligent syncing from Cloud providers when duplicate content abounds.
70
+ email:
71
+ - chris@prx.org
72
+ executables:
73
+ - s5
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - Gemfile
78
+ - Gemfile.lock
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - bin/s5
83
+ - lib/smart_s3_sync.rb
84
+ - lib/smart_s3_sync/file_table.rb
85
+ - lib/smart_s3_sync/file_target.rb
86
+ - lib/smart_s3_sync/version.rb
87
+ - smart_s3_sync.gemspec
88
+ homepage: ''
89
+ licenses:
90
+ - MIT
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.1.11
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: Intelligent syncing from Cloud providers when duplicate content abounds.
112
+ test_files: []