redsnapper 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/lib/redsnapper.rb +40 -22
  4. data/redsnapper.gemspec +3 -3
  5. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ad704b383d106faf8371c582c8ac01674268e8b3
4
- data.tar.gz: ff80041ec11718c48302a367a8a7a48bc59ca654
3
+ metadata.gz: 20921b02baf5c314923959df7f3eeb540cfdcb2d
4
+ data.tar.gz: 59c37e1602ade44ad3745f6ff42e0cac358f2a91
5
5
  SHA512:
6
- metadata.gz: f21f67d23ddd54adeabcca421dff2ccbaf031544605a6d336cc903fc5e170a0831abe7e076532317b65831017835ad8e2de84a6b5d3413b40b0432accf60690d
7
- data.tar.gz: 067eb80b79bd5baa06229802cfde89ef0f6b3382ac83005746929af890a47536147ab05ea25182be5d9642242355f35aed12f2fdede4f262d4be8b03cb5eca0e
6
+ metadata.gz: f6b52020b660064c763be81cea95dc917619f5c9d7be7792d62a8c0f4fc3f383ea1febf78f08ef959ddf0f1375049319c81ddc80943ffa258904a254a7d57689
7
+ data.tar.gz: 13217548cdaf7c211052feb77c8849ab5c365040b0ffd0be5e9a60cb271b754853a3d6f683ec061c9e158ba1634980c167ba3dba15430934441b9cf53374721e
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.1
1
+ 0.4.0
@@ -1,11 +1,14 @@
1
1
  require 'thread/pool'
2
2
  require 'open3'
3
3
  require 'set'
4
+ require 'date'
4
5
 
5
6
  class RedSnapper
6
7
  TARSNAP = 'tarsnap'
7
8
  THREAD_POOL_DEFAULT_SIZE = 10
9
+
8
10
  EXIT_ERROR = "tarsnap: Error exit delayed from previous errors.\n"
11
+ NOT_OLDER_ERROR = "File on disk is not older; skipping.\n"
9
12
 
10
13
  @@output_mutex = Mutex.new
11
14
 
@@ -31,51 +34,65 @@ class RedSnapper
31
34
  @error = false
32
35
  end
33
36
 
34
- def file_sizes
35
- return @sizes if @sizes
36
-
37
+ def files
38
+ return @files if @files
39
+
37
40
  command = [ TARSNAP, '-tvf', @archive, *@options[:tarsnap_options] ]
38
41
  command.push(@options[:directory]) if @options[:directory]
39
42
 
40
- @sizes = {}
41
- dirs = Set.new
43
+ @files = {}
42
44
 
43
45
  Open3.popen3(*command) do |_, out, _|
44
46
  out.gets(nil).split("\n").each do |entry|
45
- (_, _, _, _, size, _, _, _, name) = entry.split(/\s+/, 9)
46
- if name.end_with?('/')
47
- dirs.add(name)
48
- else
49
- @sizes[name] = size.to_i
50
- end
47
+ (_, _, _, _, size, month, day, year_or_time, name) = entry.split(/\s+/, 9)
48
+
49
+ date = DateTime.parse("#{month} #{day}, #{year_or_time}")
50
+ date = date.prev_year if date < DateTime.now
51
+
52
+ @files[name] = {
53
+ :size => size.to_i,
54
+ :date => date
55
+ }
51
56
  end
52
57
  end
53
58
 
54
- empty_dirs = dirs.clone
55
- @sizes.each { |f, _| empty_dirs.delete(File.dirname(f) + '/') }
59
+ @files
60
+ end
56
61
 
62
+ def empty_dirs(files, dirs)
63
+ empty_dirs = dirs.clone
64
+ files.each { |f| empty_dirs.delete(File.dirname(f) + '/') }
57
65
  dirs.each do |dir|
58
66
  components = dir.split('/')[0..-2]
59
67
  components.each_with_index do |_, i|
60
68
  empty_dirs.delete(components[0, i + 1].join('/') + '/')
61
69
  end
62
70
  end
63
-
64
- empty_dirs.each { |dir| @sizes[dir] = 0 }
65
-
66
- @sizes
71
+ empty_dirs
67
72
  end
68
73
 
69
- def files
70
- @files ||= file_sizes.keys
74
+ def files_to_extract
75
+ files_to_extract, dirs = files.partition { |f| !f.first.end_with?('/') }.map(&:to_h)
76
+ empty_dirs(files_to_extract.keys, dirs.keys).each do |dir|
77
+ files_to_extract[dir] = { :size => 0 }
78
+ end
79
+ files_to_extract
71
80
  end
72
81
 
73
82
  def file_groups
74
83
  groups = (1..@thread_pool.max).map { Group.new }
75
- file_sizes.sort { |a, b| b.last <=> a.last }.each do |file|
76
- groups.sort.last.add(*file)
84
+ files_to_extract.sort { |a, b| b.last[:size] <=> a.last[:size] }.each do |name, props|
85
+
86
+ # If the previous batch of files had an entry with the same size and date,
87
+ # assume that this is a duplicate and assign it zero weight. There may be
88
+ # some false positives here since the granularity of the data we have from
89
+ # tarsnap is only "same day". However, a false positive just affects the
90
+ # queing scheme, not which files get queued.
91
+
92
+ size = (@options[:previous] && @options[:previous][name] == props) ? 0 : props[:size]
93
+ groups.sort.last.add(name, size)
77
94
  end
78
- groups.map(&:files)
95
+ groups.map(&:files).reject(&:empty?)
79
96
  end
80
97
 
81
98
  def run
@@ -84,6 +101,7 @@ class RedSnapper
84
101
  command = [ TARSNAP, '-xvf', @archive, *(@options[:tarsnap_options] + chunk) ]
85
102
  Open3.popen3(*command) do |_, _, err|
86
103
  while line = err.gets
104
+ next if line.end_with?(NOT_OLDER_ERROR)
87
105
  if line == EXIT_ERROR
88
106
  @error = true
89
107
  next
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: redsnapper 0.3.1 ruby lib
5
+ # stub: redsnapper 0.4.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "redsnapper"
9
- s.version = "0.3.1"
9
+ s.version = "0.4.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Scott Wheeler"]
14
- s.date = "2016-12-31"
14
+ s.date = "2017-01-05"
15
15
  s.description = "Faster extraction of large tarsnap archives using a pool of parallel tarsnap clients"
16
16
  s.email = "scott@directededge.com"
17
17
  s.executables = ["redsnapper"]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redsnapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Scott Wheeler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-31 00:00:00.000000000 Z
11
+ date: 2017-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thread