yum_s3_sync 0.0.3 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 23ccfe9d584a3f0764dc5af307715ac8695aa64c
4
- data.tar.gz: ca3c0fdbb706a4efd312644b00928c00db287a77
3
+ metadata.gz: eaae8aaa8330a034cd7997a9d5dee0c4034b305c
4
+ data.tar.gz: 1212ab5d4e1688fe2cd8f3bfb168abfb5f2e2c99
5
5
  SHA512:
6
- metadata.gz: 0e56e5c4af3f7f23beed8560af4aa849e0197752bd2251f1f3ea67b62eeb349dc31d57c8048a191785bd2a71d2eb0f497385d3e0b3cd3fef011802b34839e64c
7
- data.tar.gz: bcb588bba36f39bf2eba56e0b240ef32c2f97b000bec75e3b77948ba13cf557c8a920ffdd859ece47aa4f289ef87e8cd78e451f6bbac997428f8cdec68dcf89a
6
+ metadata.gz: 4f9c878f67a70767968e6de7bc76326b359cd33a30dbd036390e4cb1caa2e381aebfa1f3ad25fcec82c10d92b9bbc24c32fce6a8272522112b20f8334d8758a6
7
+ data.tar.gz: c57dc78761bd06c09ada734014e3e3aadfa483dbf13a9eabf75d17352dc6699b4be814b22ea43214ae147ef1e869284474b211a1fbd9abc3f7cd381440ee5fba
data/README.md CHANGED
@@ -4,8 +4,8 @@ Synchronize a Yum repository over HTTP to S3. This can be useful when you want t
4
4
 
5
5
  There are two command line tools supplied:
6
6
 
7
- yums3sync - Synchronizes a single Yum repository to a bucket:/prefix
8
- yums3syncdir - Scan a directory of links for repositories then synchronize all of them to bucket:/prefix
7
+ * ``yums3sync`` - Synchronizes a single Yum repository to a bucket:/prefix
8
+ * ``yums3syncdir`` - Scan a directory of links for repositories then synchronize all of them to bucket:/prefix
9
9
 
10
10
  YumS3sync is smart enough to only copy files changes between runs, and does almost nothing if nothing changed on the source end.
11
11
 
@@ -15,6 +15,12 @@ After the syncronization you can use the repository using Yum with an S3 plugin
15
15
 
16
16
  ## Installation
17
17
 
18
+ ### Gem
19
+ ```
20
+ gem install yum_s3_sync
21
+ ```
22
+
23
+ ### Manual
18
24
  Clone this git repository and build a Gem. The program should work without additional dependencies on RHEL/Centos 6 with EPEL enabled. It is tested on Ruby 1.8.7 and 2.1
19
25
 
20
26
  ## Usage
@@ -26,6 +32,8 @@ Usage: yums3sync [options]
26
32
  -s, --source SOURCE HTTP source URL
27
33
  -b, --bucket BUCKET Target bucket name
28
34
  -p, --prefix PREFIX Target bucket prefix
35
+ -k, --keep Never overwrite exitant files
36
+ -n, --dry-run Don't make any changes
29
37
  ```
30
38
 
31
39
  Example usage:
@@ -44,6 +52,8 @@ Usage: yums3syncdir [options]
44
52
  -b, --bucket BUCKET Target bucket name
45
53
  -p, --prefix PREFIX Target bucket prefix
46
54
  -x, --exclude prefix1,prefix2 Exclude prefixes
55
+ -k, --keep Never overwrite exitant files
56
+ -n, --dry-run Don't make any changes
47
57
  ```
48
58
 
49
59
  Example usage:
@@ -16,6 +16,12 @@ opt_parser = OptionParser.new do |opts|
16
16
  opts.on('-p', '--prefix PREFIX', 'Target bucket prefix') do |p|
17
17
  options[:target_base] = p
18
18
  end
19
+ opts.on('-k', '--keep', 'Never overwrite exitant files') do |k|
20
+ options[:keep] = true
21
+ end
22
+ opts.on('-n', '--dry-run', 'Don\'t make any changes') do |n|
23
+ options[:dry_run] = true
24
+ end
19
25
  end
20
26
 
21
27
  opt_parser.parse!
@@ -25,7 +31,7 @@ if !options[:source_base] || !options[:target_bucket] || !options[:target_base]
25
31
  exit 1
26
32
  end
27
33
 
28
- repo_syncer = YumS3Sync::RepoSyncer.new(options[:source_base], options[:target_bucket], options[:target_base])
34
+ repo_syncer = YumS3Sync::RepoSyncer.new(options[:source_base], options[:target_bucket], options[:target_base], options[:keep], options[:dry_run])
29
35
 
30
36
  begin
31
37
  repo_syncer.sync
@@ -14,18 +14,21 @@ opt_parser = OptionParser.new do |opts|
14
14
  opts.on('-s', '--source SOURCEDIR', 'HTTP source base URL') do |s|
15
15
  options[:source_base] = s
16
16
  end
17
-
18
17
  opts.on('-b', '--bucket BUCKET', 'Target bucket name') do |b|
19
18
  options[:target_bucket] = b
20
19
  end
21
-
22
20
  opts.on('-p', '--prefix PREFIX', 'Target bucket prefix') do |p|
23
21
  options[:target_base] = p
24
22
  end
25
-
26
23
  opts.on('-x', '--exclude prefix1,prefix2', Array, 'Exclude prefixes') do |x|
27
24
  options[:exclude] = x
28
25
  end
26
+ opts.on('-k', '--keep', 'Never overwrite exitant files') do |k|
27
+ options[:keep] = true
28
+ end
29
+ opts.on('-n', '--dry-run', 'Don\'t make any changes') do |n|
30
+ options[:dry_run] = true
31
+ end
29
32
  end
30
33
 
31
34
  opt_parser.parse!
@@ -77,7 +80,7 @@ Net::HTTP.start(uri.host) do |http|
77
80
  end
78
81
 
79
82
  repositories.each do |repository|
80
- repo_syncer = YumS3Sync::RepoSyncer.new(options[:source_base] + '/' + repository, options[:target_bucket], options[:target_base] + '/' + repository)
83
+ repo_syncer = YumS3Sync::RepoSyncer.new(options[:source_base] + '/' + repository, options[:target_bucket], options[:target_base] + '/' + repository, options[:keep], options[:dry_run])
81
84
 
82
85
  begin
83
86
  repo_syncer.sync
@@ -1,4 +1,5 @@
1
1
  require 'open-uri'
2
+ require 'socket'
2
3
 
3
4
  module YumS3Sync
4
5
  class HTTPDownloader
@@ -1,13 +1,16 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'yum_s3_sync'
4
+ require 'parallel'
4
5
 
5
6
  module YumS3Sync
6
7
  class RepoSyncer
7
- def initialize(source_base, target_bucket, target_base)
8
+ def initialize(source_base, target_bucket, target_base, keep = false, dry_run = false)
8
9
  @source_base = source_base
9
10
  @target_bucket = target_bucket
10
11
  @target_base = target_base
12
+ @keep = keep
13
+ @dry_run = dry_run
11
14
  end
12
15
 
13
16
  def sync
@@ -17,34 +20,49 @@ module YumS3Sync
17
20
  s3_downloader = YumS3Sync::S3Downloader.new(@target_bucket, @target_base)
18
21
  dest_repository = YumS3Sync::YumRepository.new(s3_downloader)
19
22
 
23
+ s3_uploader = YumS3Sync::S3Uploader.new(@target_bucket, @target_base, http_downloader, @dry_run)
24
+
25
+ s3_file_lister = YumS3Sync::S3FileLister.new(@target_bucket, @target_base)
26
+ s3_deleter = YumS3Sync::S3Deleter.new(@target_bucket, @target_base, @dry_run)
27
+
20
28
  new_packages = source_repository.compare(dest_repository)
21
- s3_uploader = YumS3Sync::S3Uploader.new(@target_bucket, @target_base, http_downloader)
22
29
 
23
30
  metadata = []
24
- source_repository.metadata.each do |_type, file|
31
+ new_metadata = false
32
+ source_repository.metadata.each do |type, file|
33
+ if !dest_repository.metadata[type] || dest_repository.metadata[type][:checksum] != file[:checksum]
34
+ new_metadata = true
35
+ end
36
+
25
37
  metadata.push file[:href]
26
38
  end
27
39
 
28
40
  new_packages.each do |package|
29
- s3_uploader.upload(package)
41
+ s3_uploader.upload(package, @keep)
30
42
  end
31
43
 
32
- if !dest_repository.exists? || !new_packages.empty?
44
+ if !dest_repository.exists? || !new_packages.empty? || new_metadata
33
45
  metadata.each do |file|
34
- s3_uploader.upload(file)
46
+ s3_uploader.upload(file, true)
35
47
  end
36
48
  end
37
49
 
38
- unless new_packages.empty?
39
- s3_file_lister = YumS3Sync::S3FileLister.new(@target_bucket, @target_base)
40
- s3_deleter = YumS3Sync::S3Deleter.new(@target_bucket, @target_base)
50
+ file_names = s3_file_lister.list
41
51
 
42
- s3_file_lister.list.each do |file|
43
- if !source_repository.packages[file] && !metadata.include?(file)
44
- s3_deleter.delete(file)
45
- end
52
+ puts "Locating removed files"
53
+ file_names.each do |filename|
54
+ if !source_repository.packages[filename] && !metadata.include?(filename)
55
+ s3_deleter.delete(filename)
46
56
  end
47
57
  end
58
+
59
+ puts "Locating missing files"
60
+ source_repository.packages.each do |package, data|
61
+ unless file_names.include? package
62
+ s3_uploader.upload(package, true)
63
+ end
64
+ end
65
+
48
66
  end
49
67
  end
50
68
  end
@@ -2,9 +2,10 @@ require 'aws-sdk'
2
2
 
3
3
  module YumS3Sync
4
4
  class S3Deleter
5
- def initialize(bucket, prefix)
5
+ def initialize(bucket, prefix, dry_run = false)
6
6
  @bucket = bucket
7
7
  @prefix = prefix
8
+ @dry_run = dry_run
8
9
  end
9
10
 
10
11
  def delete(file)
@@ -16,8 +17,12 @@ module YumS3Sync
16
17
  dest_obj = s3.buckets[@bucket].objects[target]
17
18
 
18
19
  if dest_obj.exists?
19
- puts "Deleting #{@bucket}::#{target}"
20
- dest_obj.delete
20
+ if @dry_run
21
+ puts "Dry-run: Deleting #{@bucket}::#{target}"
22
+ else
23
+ puts "Deleting #{@bucket}::#{target}"
24
+ dest_obj.delete
25
+ end
21
26
  end
22
27
  end
23
28
  end
@@ -1,4 +1,5 @@
1
1
  require 'aws-sdk'
2
+ require 'parallel'
2
3
 
3
4
  module YumS3Sync
4
5
  class S3FileLister
@@ -8,16 +9,15 @@ module YumS3Sync
8
9
  end
9
10
 
10
11
  def list
11
- files = []
12
-
13
12
  s3 = AWS::S3.new
14
- s3.buckets[@bucket].objects.with_prefix(@prefix).each do |file|
15
- basename = file.key.sub(/#{@prefix}\/*/, '')
16
13
 
17
- files.push basename
18
- end
14
+ puts "Listing all files in #{@bucket}:#{@prefix}"
15
+ s3_objects = s3.buckets[@bucket].objects.with_prefix(@prefix)
19
16
 
20
- files
17
+ s3_objects.map do |file|
18
+ basename = file.key.sub(/#{@prefix}\/*/, '')
19
+ basename
20
+ end
21
21
  end
22
22
  end
23
23
  end
@@ -2,25 +2,36 @@ require 'aws-sdk'
2
2
 
3
3
  module YumS3Sync
4
4
  class S3Uploader
5
- def initialize(bucket, prefix, downloader)
5
+ def initialize(bucket, prefix, downloader, dry_run = false)
6
6
  @bucket = bucket
7
7
  @prefix = prefix
8
8
  @downloader = downloader
9
+ @dry_run = dry_run
9
10
  end
10
11
 
11
- def upload(file)
12
+ def upload(file, overwrite = false)
12
13
  retries = 0
13
14
  s3 = AWS::S3.new
14
15
 
15
- source_file = @downloader.download(file)
16
- target = "#{@prefix}/#{file}"
17
- target.gsub!(/\/+/, '/')
18
- dest_obj = s3.buckets[@bucket].objects[target]
19
- dest_obj.delete if dest_obj.exists?
20
-
21
- puts "Uploading #{@bucket}::#{target}"
22
16
  begin
23
- dest_obj.write(:file => source_file)
17
+ target = "#{@prefix}/#{file}"
18
+ target.gsub!(/\/+/, '/')
19
+ dest_obj = s3.buckets[@bucket].objects[target]
20
+
21
+ if dest_obj.exists? && ! overwrite
22
+ puts "Already exists: skipping #{@bucket}::#{target}"
23
+ return
24
+ end
25
+
26
+ source_file = @downloader.download(file)
27
+
28
+ if @dry_run
29
+ puts "Dry-run: Uploading #{@bucket}::#{target}"
30
+ else
31
+ puts "Uploading #{@bucket}::#{target}"
32
+ dest_obj.delete if dest_obj.exists?
33
+ dest_obj.write(:file => source_file)
34
+ end
24
35
  rescue StandardError => e
25
36
  if retries < 10
26
37
  retries += 1
@@ -1,3 +1,3 @@
1
1
  module YumS3Sync
2
- VERSION = '0.0.3'
2
+ VERSION = '0.0.13'
3
3
  end
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'zlib'
4
+ require 'nokogiri'
4
5
  require 'rexml/document'
5
6
  require 'rexml/streamlistener'
6
7
 
@@ -9,13 +10,19 @@ module YumS3Sync
9
10
  attr_accessor :metadata
10
11
 
11
12
  def initialize(downloader)
13
+ @metadata = {}
12
14
  @downloader = downloader
13
15
 
14
- repomd_parser = RepModListener.new
15
16
  repomd_file = @downloader.download('repodata/repomd.xml')
16
17
  if repomd_file
17
- REXML::Document.parse_stream(repomd_file, repomd_parser)
18
- @metadata = repomd_parser.metadata
18
+ doc = Nokogiri::XML(repomd_file)
19
+ doc.xpath("//xmlns:data").each do |file|
20
+ metadata[file['type']] = {
21
+ :href => file.xpath('xmlns:location')[0]['href'],
22
+ :checksum => file.xpath('xmlns:checksum')[0].child.to_s
23
+ }
24
+ end
25
+
19
26
  @metadata['repomd'] = { :href => 'repodata/repomd.xml' }
20
27
  else
21
28
  @metadata = { 'primary' => nil }
@@ -28,11 +35,16 @@ module YumS3Sync
28
35
  primary_file = @downloader.download(@metadata['primary'][:href])
29
36
  return {} unless primary_file
30
37
 
38
+ puts "Parsing #{@metadata['primary'][:href]}"
31
39
  gzstream = Zlib::GzipReader.new(primary_file)
32
- package_parser = PackageListener.new
33
40
 
34
- REXML::Document.parse_stream(gzstream, package_parser)
35
- package_parser.packages
41
+ doc = Nokogiri::XML(gzstream)
42
+ packages = {}
43
+ doc.xpath("//xmlns:package").each do |package|
44
+ packages[package.xpath("xmlns:location")[0]['href']] = package.xpath("xmlns:checksum")[0].child.to_s
45
+ end
46
+
47
+ packages
36
48
  end
37
49
 
38
50
  def packages
@@ -58,82 +70,4 @@ module YumS3Sync
58
70
  end
59
71
  end
60
72
 
61
- class PackageListener
62
- attr_accessor :packages
63
- include REXML::StreamListener
64
-
65
- def initialize
66
- self.packages = {}
67
- end
68
-
69
- def tag_start(name, *attrs)
70
- @current_tag = name
71
- case name
72
- when 'metadata'
73
- puts "Parsing #{attrs[0]['packages']} packages"
74
- when 'package'
75
- @package = {}
76
- when 'location'
77
- @package['href'] = attrs[0]['href']
78
- end
79
- end
80
-
81
- def tag_end(name)
82
- case name
83
- when 'package'
84
- if @package
85
- packages[@package['href']] = @package['checksum']
86
- @package = nil
87
- else
88
- fail 'Unmatched <package> tag'
89
- end
90
- end
91
- end
92
-
93
- def text(data)
94
- return if data =~ /^\s+$/
95
- if @current_tag == 'checksum'
96
- @package['checksum'] = data
97
- end
98
- end
99
- end
100
-
101
- class RepModListener
102
- attr_accessor :metadata
103
- include REXML::StreamListener
104
-
105
- def initialize
106
- self.metadata = {}
107
- end
108
-
109
- def tag_start(name, *attrs)
110
- @current_tag = name
111
- case name
112
- when 'data'
113
- @data = {}
114
- @data['type'] = attrs[0]['type']
115
- when 'location'
116
- @data['location'] = attrs[0]['href']
117
- end
118
- end
119
-
120
- def tag_end(name)
121
- case name
122
- when 'data'
123
- if @data
124
- metadata[@data['type']] = { :href => @data['location'], :checksum => @data['checksum'] }
125
- @data = nil
126
- else
127
- fail 'Unmatched <data> tag'
128
- end
129
- end
130
- end
131
-
132
- def text(data)
133
- return if data =~ /^\s+$/
134
- if @current_tag == 'checksum'
135
- @data['checksum'] = data
136
- end
137
- end
138
- end
139
73
  end
@@ -21,5 +21,6 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency 'rake', '~> 10.0'
22
22
 
23
23
  spec.add_dependency 'nokogiri', '>= 1.4.3'
24
+ spec.add_dependency 'parallel'
24
25
  spec.add_dependency 'aws-sdk'
25
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yum_s3_sync
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hein-Pieter van Braam
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-14 00:00:00.000000000 Z
11
+ date: 2015-11-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: 1.4.3
55
+ - !ruby/object:Gem::Dependency
56
+ name: parallel
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: aws-sdk
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -112,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
126
  version: '0'
113
127
  requirements: []
114
128
  rubyforge_project:
115
- rubygems_version: 2.2.2
129
+ rubygems_version: 2.4.8
116
130
  signing_key:
117
131
  specification_version: 4
118
132
  summary: Simple program to synchronize Yum repositories with S3 buckets