yum_s3_sync 0.0.3 → 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 23ccfe9d584a3f0764dc5af307715ac8695aa64c
4
- data.tar.gz: ca3c0fdbb706a4efd312644b00928c00db287a77
3
+ metadata.gz: eaae8aaa8330a034cd7997a9d5dee0c4034b305c
4
+ data.tar.gz: 1212ab5d4e1688fe2cd8f3bfb168abfb5f2e2c99
5
5
  SHA512:
6
- metadata.gz: 0e56e5c4af3f7f23beed8560af4aa849e0197752bd2251f1f3ea67b62eeb349dc31d57c8048a191785bd2a71d2eb0f497385d3e0b3cd3fef011802b34839e64c
7
- data.tar.gz: bcb588bba36f39bf2eba56e0b240ef32c2f97b000bec75e3b77948ba13cf557c8a920ffdd859ece47aa4f289ef87e8cd78e451f6bbac997428f8cdec68dcf89a
6
+ metadata.gz: 4f9c878f67a70767968e6de7bc76326b359cd33a30dbd036390e4cb1caa2e381aebfa1f3ad25fcec82c10d92b9bbc24c32fce6a8272522112b20f8334d8758a6
7
+ data.tar.gz: c57dc78761bd06c09ada734014e3e3aadfa483dbf13a9eabf75d17352dc6699b4be814b22ea43214ae147ef1e869284474b211a1fbd9abc3f7cd381440ee5fba
data/README.md CHANGED
@@ -4,8 +4,8 @@ Synchronize a Yum repository over HTTP to S3. This can be useful when you want t
4
4
 
5
5
  There are two command line tools supplied:
6
6
 
7
- yums3sync - Synchronizes a single Yum repository to a bucket:/prefix
8
- yums3syncdir - Scan a directory of links for repositories then synchronize all of them to bucket:/prefix
7
+ * ``yums3sync`` - Synchronizes a single Yum repository to a bucket:/prefix
8
+ * ``yums3syncdir`` - Scan a directory of links for repositories then synchronize all of them to bucket:/prefix
9
9
 
10
10
  YumS3sync is smart enough to only copy files changes between runs, and does almost nothing if nothing changed on the source end.
11
11
 
@@ -15,6 +15,12 @@ After the syncronization you can use the repository using Yum with an S3 plugin
15
15
 
16
16
  ## Installation
17
17
 
18
+ ### Gem
19
+ ```
20
+ gem install yum_s3_sync
21
+ ```
22
+
23
+ ### Manual
18
24
  Clone this git repository and build a Gem. The program should work without additional dependencies on RHEL/Centos 6 with EPEL enabled. It is tested on Ruby 1.8.7 and 2.1
19
25
 
20
26
  ## Usage
@@ -26,6 +32,8 @@ Usage: yums3sync [options]
26
32
  -s, --source SOURCE HTTP source URL
27
33
  -b, --bucket BUCKET Target bucket name
28
34
  -p, --prefix PREFIX Target bucket prefix
35
+ -k, --keep Never overwrite exitant files
36
+ -n, --dry-run Don't make any changes
29
37
  ```
30
38
 
31
39
  Example usage:
@@ -44,6 +52,8 @@ Usage: yums3syncdir [options]
44
52
  -b, --bucket BUCKET Target bucket name
45
53
  -p, --prefix PREFIX Target bucket prefix
46
54
  -x, --exclude prefix1,prefix2 Exclude prefixes
55
+ -k, --keep Never overwrite exitant files
56
+ -n, --dry-run Don't make any changes
47
57
  ```
48
58
 
49
59
  Example usage:
@@ -16,6 +16,12 @@ opt_parser = OptionParser.new do |opts|
16
16
  opts.on('-p', '--prefix PREFIX', 'Target bucket prefix') do |p|
17
17
  options[:target_base] = p
18
18
  end
19
+ opts.on('-k', '--keep', 'Never overwrite exitant files') do |k|
20
+ options[:keep] = true
21
+ end
22
+ opts.on('-n', '--dry-run', 'Don\'t make any changes') do |n|
23
+ options[:dry_run] = true
24
+ end
19
25
  end
20
26
 
21
27
  opt_parser.parse!
@@ -25,7 +31,7 @@ if !options[:source_base] || !options[:target_bucket] || !options[:target_base]
25
31
  exit 1
26
32
  end
27
33
 
28
- repo_syncer = YumS3Sync::RepoSyncer.new(options[:source_base], options[:target_bucket], options[:target_base])
34
+ repo_syncer = YumS3Sync::RepoSyncer.new(options[:source_base], options[:target_bucket], options[:target_base], options[:keep], options[:dry_run])
29
35
 
30
36
  begin
31
37
  repo_syncer.sync
@@ -14,18 +14,21 @@ opt_parser = OptionParser.new do |opts|
14
14
  opts.on('-s', '--source SOURCEDIR', 'HTTP source base URL') do |s|
15
15
  options[:source_base] = s
16
16
  end
17
-
18
17
  opts.on('-b', '--bucket BUCKET', 'Target bucket name') do |b|
19
18
  options[:target_bucket] = b
20
19
  end
21
-
22
20
  opts.on('-p', '--prefix PREFIX', 'Target bucket prefix') do |p|
23
21
  options[:target_base] = p
24
22
  end
25
-
26
23
  opts.on('-x', '--exclude prefix1,prefix2', Array, 'Exclude prefixes') do |x|
27
24
  options[:exclude] = x
28
25
  end
26
+ opts.on('-k', '--keep', 'Never overwrite exitant files') do |k|
27
+ options[:keep] = true
28
+ end
29
+ opts.on('-n', '--dry-run', 'Don\'t make any changes') do |n|
30
+ options[:dry_run] = true
31
+ end
29
32
  end
30
33
 
31
34
  opt_parser.parse!
@@ -77,7 +80,7 @@ Net::HTTP.start(uri.host) do |http|
77
80
  end
78
81
 
79
82
  repositories.each do |repository|
80
- repo_syncer = YumS3Sync::RepoSyncer.new(options[:source_base] + '/' + repository, options[:target_bucket], options[:target_base] + '/' + repository)
83
+ repo_syncer = YumS3Sync::RepoSyncer.new(options[:source_base] + '/' + repository, options[:target_bucket], options[:target_base] + '/' + repository, options[:keep], options[:dry_run])
81
84
 
82
85
  begin
83
86
  repo_syncer.sync
@@ -1,4 +1,5 @@
1
1
  require 'open-uri'
2
+ require 'socket'
2
3
 
3
4
  module YumS3Sync
4
5
  class HTTPDownloader
@@ -1,13 +1,16 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'yum_s3_sync'
4
+ require 'parallel'
4
5
 
5
6
  module YumS3Sync
6
7
  class RepoSyncer
7
- def initialize(source_base, target_bucket, target_base)
8
+ def initialize(source_base, target_bucket, target_base, keep = false, dry_run = false)
8
9
  @source_base = source_base
9
10
  @target_bucket = target_bucket
10
11
  @target_base = target_base
12
+ @keep = keep
13
+ @dry_run = dry_run
11
14
  end
12
15
 
13
16
  def sync
@@ -17,34 +20,49 @@ module YumS3Sync
17
20
  s3_downloader = YumS3Sync::S3Downloader.new(@target_bucket, @target_base)
18
21
  dest_repository = YumS3Sync::YumRepository.new(s3_downloader)
19
22
 
23
+ s3_uploader = YumS3Sync::S3Uploader.new(@target_bucket, @target_base, http_downloader, @dry_run)
24
+
25
+ s3_file_lister = YumS3Sync::S3FileLister.new(@target_bucket, @target_base)
26
+ s3_deleter = YumS3Sync::S3Deleter.new(@target_bucket, @target_base, @dry_run)
27
+
20
28
  new_packages = source_repository.compare(dest_repository)
21
- s3_uploader = YumS3Sync::S3Uploader.new(@target_bucket, @target_base, http_downloader)
22
29
 
23
30
  metadata = []
24
- source_repository.metadata.each do |_type, file|
31
+ new_metadata = false
32
+ source_repository.metadata.each do |type, file|
33
+ if !dest_repository.metadata[type] || dest_repository.metadata[type][:checksum] != file[:checksum]
34
+ new_metadata = true
35
+ end
36
+
25
37
  metadata.push file[:href]
26
38
  end
27
39
 
28
40
  new_packages.each do |package|
29
- s3_uploader.upload(package)
41
+ s3_uploader.upload(package, @keep)
30
42
  end
31
43
 
32
- if !dest_repository.exists? || !new_packages.empty?
44
+ if !dest_repository.exists? || !new_packages.empty? || new_metadata
33
45
  metadata.each do |file|
34
- s3_uploader.upload(file)
46
+ s3_uploader.upload(file, true)
35
47
  end
36
48
  end
37
49
 
38
- unless new_packages.empty?
39
- s3_file_lister = YumS3Sync::S3FileLister.new(@target_bucket, @target_base)
40
- s3_deleter = YumS3Sync::S3Deleter.new(@target_bucket, @target_base)
50
+ file_names = s3_file_lister.list
41
51
 
42
- s3_file_lister.list.each do |file|
43
- if !source_repository.packages[file] && !metadata.include?(file)
44
- s3_deleter.delete(file)
45
- end
52
+ puts "Locating removed files"
53
+ file_names.each do |filename|
54
+ if !source_repository.packages[filename] && !metadata.include?(filename)
55
+ s3_deleter.delete(filename)
46
56
  end
47
57
  end
58
+
59
+ puts "Locating missing files"
60
+ source_repository.packages.each do |package, data|
61
+ unless file_names.include? package
62
+ s3_uploader.upload(package, true)
63
+ end
64
+ end
65
+
48
66
  end
49
67
  end
50
68
  end
@@ -2,9 +2,10 @@ require 'aws-sdk'
2
2
 
3
3
  module YumS3Sync
4
4
  class S3Deleter
5
- def initialize(bucket, prefix)
5
+ def initialize(bucket, prefix, dry_run = false)
6
6
  @bucket = bucket
7
7
  @prefix = prefix
8
+ @dry_run = dry_run
8
9
  end
9
10
 
10
11
  def delete(file)
@@ -16,8 +17,12 @@ module YumS3Sync
16
17
  dest_obj = s3.buckets[@bucket].objects[target]
17
18
 
18
19
  if dest_obj.exists?
19
- puts "Deleting #{@bucket}::#{target}"
20
- dest_obj.delete
20
+ if @dry_run
21
+ puts "Dry-run: Deleting #{@bucket}::#{target}"
22
+ else
23
+ puts "Deleting #{@bucket}::#{target}"
24
+ dest_obj.delete
25
+ end
21
26
  end
22
27
  end
23
28
  end
@@ -1,4 +1,5 @@
1
1
  require 'aws-sdk'
2
+ require 'parallel'
2
3
 
3
4
  module YumS3Sync
4
5
  class S3FileLister
@@ -8,16 +9,15 @@ module YumS3Sync
8
9
  end
9
10
 
10
11
  def list
11
- files = []
12
-
13
12
  s3 = AWS::S3.new
14
- s3.buckets[@bucket].objects.with_prefix(@prefix).each do |file|
15
- basename = file.key.sub(/#{@prefix}\/*/, '')
16
13
 
17
- files.push basename
18
- end
14
+ puts "Listing all files in #{@bucket}:#{@prefix}"
15
+ s3_objects = s3.buckets[@bucket].objects.with_prefix(@prefix)
19
16
 
20
- files
17
+ s3_objects.map do |file|
18
+ basename = file.key.sub(/#{@prefix}\/*/, '')
19
+ basename
20
+ end
21
21
  end
22
22
  end
23
23
  end
@@ -2,25 +2,36 @@ require 'aws-sdk'
2
2
 
3
3
  module YumS3Sync
4
4
  class S3Uploader
5
- def initialize(bucket, prefix, downloader)
5
+ def initialize(bucket, prefix, downloader, dry_run = false)
6
6
  @bucket = bucket
7
7
  @prefix = prefix
8
8
  @downloader = downloader
9
+ @dry_run = dry_run
9
10
  end
10
11
 
11
- def upload(file)
12
+ def upload(file, overwrite = false)
12
13
  retries = 0
13
14
  s3 = AWS::S3.new
14
15
 
15
- source_file = @downloader.download(file)
16
- target = "#{@prefix}/#{file}"
17
- target.gsub!(/\/+/, '/')
18
- dest_obj = s3.buckets[@bucket].objects[target]
19
- dest_obj.delete if dest_obj.exists?
20
-
21
- puts "Uploading #{@bucket}::#{target}"
22
16
  begin
23
- dest_obj.write(:file => source_file)
17
+ target = "#{@prefix}/#{file}"
18
+ target.gsub!(/\/+/, '/')
19
+ dest_obj = s3.buckets[@bucket].objects[target]
20
+
21
+ if dest_obj.exists? && ! overwrite
22
+ puts "Already exists: skipping #{@bucket}::#{target}"
23
+ return
24
+ end
25
+
26
+ source_file = @downloader.download(file)
27
+
28
+ if @dry_run
29
+ puts "Dry-run: Uploading #{@bucket}::#{target}"
30
+ else
31
+ puts "Uploading #{@bucket}::#{target}"
32
+ dest_obj.delete if dest_obj.exists?
33
+ dest_obj.write(:file => source_file)
34
+ end
24
35
  rescue StandardError => e
25
36
  if retries < 10
26
37
  retries += 1
@@ -1,3 +1,3 @@
1
1
  module YumS3Sync
2
- VERSION = '0.0.3'
2
+ VERSION = '0.0.13'
3
3
  end
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'zlib'
4
+ require 'nokogiri'
4
5
  require 'rexml/document'
5
6
  require 'rexml/streamlistener'
6
7
 
@@ -9,13 +10,19 @@ module YumS3Sync
9
10
  attr_accessor :metadata
10
11
 
11
12
  def initialize(downloader)
13
+ @metadata = {}
12
14
  @downloader = downloader
13
15
 
14
- repomd_parser = RepModListener.new
15
16
  repomd_file = @downloader.download('repodata/repomd.xml')
16
17
  if repomd_file
17
- REXML::Document.parse_stream(repomd_file, repomd_parser)
18
- @metadata = repomd_parser.metadata
18
+ doc = Nokogiri::XML(repomd_file)
19
+ doc.xpath("//xmlns:data").each do |file|
20
+ metadata[file['type']] = {
21
+ :href => file.xpath('xmlns:location')[0]['href'],
22
+ :checksum => file.xpath('xmlns:checksum')[0].child.to_s
23
+ }
24
+ end
25
+
19
26
  @metadata['repomd'] = { :href => 'repodata/repomd.xml' }
20
27
  else
21
28
  @metadata = { 'primary' => nil }
@@ -28,11 +35,16 @@ module YumS3Sync
28
35
  primary_file = @downloader.download(@metadata['primary'][:href])
29
36
  return {} unless primary_file
30
37
 
38
+ puts "Parsing #{@metadata['primary'][:href]}"
31
39
  gzstream = Zlib::GzipReader.new(primary_file)
32
- package_parser = PackageListener.new
33
40
 
34
- REXML::Document.parse_stream(gzstream, package_parser)
35
- package_parser.packages
41
+ doc = Nokogiri::XML(gzstream)
42
+ packages = {}
43
+ doc.xpath("//xmlns:package").each do |package|
44
+ packages[package.xpath("xmlns:location")[0]['href']] = package.xpath("xmlns:checksum")[0].child.to_s
45
+ end
46
+
47
+ packages
36
48
  end
37
49
 
38
50
  def packages
@@ -58,82 +70,4 @@ module YumS3Sync
58
70
  end
59
71
  end
60
72
 
61
- class PackageListener
62
- attr_accessor :packages
63
- include REXML::StreamListener
64
-
65
- def initialize
66
- self.packages = {}
67
- end
68
-
69
- def tag_start(name, *attrs)
70
- @current_tag = name
71
- case name
72
- when 'metadata'
73
- puts "Parsing #{attrs[0]['packages']} packages"
74
- when 'package'
75
- @package = {}
76
- when 'location'
77
- @package['href'] = attrs[0]['href']
78
- end
79
- end
80
-
81
- def tag_end(name)
82
- case name
83
- when 'package'
84
- if @package
85
- packages[@package['href']] = @package['checksum']
86
- @package = nil
87
- else
88
- fail 'Unmatched <package> tag'
89
- end
90
- end
91
- end
92
-
93
- def text(data)
94
- return if data =~ /^\s+$/
95
- if @current_tag == 'checksum'
96
- @package['checksum'] = data
97
- end
98
- end
99
- end
100
-
101
- class RepModListener
102
- attr_accessor :metadata
103
- include REXML::StreamListener
104
-
105
- def initialize
106
- self.metadata = {}
107
- end
108
-
109
- def tag_start(name, *attrs)
110
- @current_tag = name
111
- case name
112
- when 'data'
113
- @data = {}
114
- @data['type'] = attrs[0]['type']
115
- when 'location'
116
- @data['location'] = attrs[0]['href']
117
- end
118
- end
119
-
120
- def tag_end(name)
121
- case name
122
- when 'data'
123
- if @data
124
- metadata[@data['type']] = { :href => @data['location'], :checksum => @data['checksum'] }
125
- @data = nil
126
- else
127
- fail 'Unmatched <data> tag'
128
- end
129
- end
130
- end
131
-
132
- def text(data)
133
- return if data =~ /^\s+$/
134
- if @current_tag == 'checksum'
135
- @data['checksum'] = data
136
- end
137
- end
138
- end
139
73
  end
@@ -21,5 +21,6 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency 'rake', '~> 10.0'
22
22
 
23
23
  spec.add_dependency 'nokogiri', '>= 1.4.3'
24
+ spec.add_dependency 'parallel'
24
25
  spec.add_dependency 'aws-sdk'
25
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yum_s3_sync
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hein-Pieter van Braam
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-14 00:00:00.000000000 Z
11
+ date: 2015-11-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: 1.4.3
55
+ - !ruby/object:Gem::Dependency
56
+ name: parallel
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: aws-sdk
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -112,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
126
  version: '0'
113
127
  requirements: []
114
128
  rubyforge_project:
115
- rubygems_version: 2.2.2
129
+ rubygems_version: 2.4.8
116
130
  signing_key:
117
131
  specification_version: 4
118
132
  summary: Simple program to synchronize Yum repositories with S3 buckets