redsnapper 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/lib/redsnapper.rb +40 -22
  4. data/redsnapper.gemspec +3 -3
  5. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ad704b383d106faf8371c582c8ac01674268e8b3
4
- data.tar.gz: ff80041ec11718c48302a367a8a7a48bc59ca654
3
+ metadata.gz: 20921b02baf5c314923959df7f3eeb540cfdcb2d
4
+ data.tar.gz: 59c37e1602ade44ad3745f6ff42e0cac358f2a91
5
5
  SHA512:
6
- metadata.gz: f21f67d23ddd54adeabcca421dff2ccbaf031544605a6d336cc903fc5e170a0831abe7e076532317b65831017835ad8e2de84a6b5d3413b40b0432accf60690d
7
- data.tar.gz: 067eb80b79bd5baa06229802cfde89ef0f6b3382ac83005746929af890a47536147ab05ea25182be5d9642242355f35aed12f2fdede4f262d4be8b03cb5eca0e
6
+ metadata.gz: f6b52020b660064c763be81cea95dc917619f5c9d7be7792d62a8c0f4fc3f383ea1febf78f08ef959ddf0f1375049319c81ddc80943ffa258904a254a7d57689
7
+ data.tar.gz: 13217548cdaf7c211052feb77c8849ab5c365040b0ffd0be5e9a60cb271b754853a3d6f683ec061c9e158ba1634980c167ba3dba15430934441b9cf53374721e
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.1
1
+ 0.4.0
@@ -1,11 +1,14 @@
1
1
  require 'thread/pool'
2
2
  require 'open3'
3
3
  require 'set'
4
+ require 'date'
4
5
 
5
6
  class RedSnapper
6
7
  TARSNAP = 'tarsnap'
7
8
  THREAD_POOL_DEFAULT_SIZE = 10
9
+
8
10
  EXIT_ERROR = "tarsnap: Error exit delayed from previous errors.\n"
11
+ NOT_OLDER_ERROR = "File on disk is not older; skipping.\n"
9
12
 
10
13
  @@output_mutex = Mutex.new
11
14
 
@@ -31,51 +34,65 @@ class RedSnapper
31
34
  @error = false
32
35
  end
33
36
 
34
- def file_sizes
35
- return @sizes if @sizes
36
-
37
+ def files
38
+ return @files if @files
39
+
37
40
  command = [ TARSNAP, '-tvf', @archive, *@options[:tarsnap_options] ]
38
41
  command.push(@options[:directory]) if @options[:directory]
39
42
 
40
- @sizes = {}
41
- dirs = Set.new
43
+ @files = {}
42
44
 
43
45
  Open3.popen3(*command) do |_, out, _|
44
46
  out.gets(nil).split("\n").each do |entry|
45
- (_, _, _, _, size, _, _, _, name) = entry.split(/\s+/, 9)
46
- if name.end_with?('/')
47
- dirs.add(name)
48
- else
49
- @sizes[name] = size.to_i
50
- end
47
+ (_, _, _, _, size, month, day, year_or_time, name) = entry.split(/\s+/, 9)
48
+
49
+ date = DateTime.parse("#{month} #{day}, #{year_or_time}")
50
+ date = date.prev_year if date < DateTime.now
51
+
52
+ @files[name] = {
53
+ :size => size.to_i,
54
+ :date => date
55
+ }
51
56
  end
52
57
  end
53
58
 
54
- empty_dirs = dirs.clone
55
- @sizes.each { |f, _| empty_dirs.delete(File.dirname(f) + '/') }
59
+ @files
60
+ end
56
61
 
62
+ def empty_dirs(files, dirs)
63
+ empty_dirs = dirs.clone
64
+ files.each { |f| empty_dirs.delete(File.dirname(f) + '/') }
57
65
  dirs.each do |dir|
58
66
  components = dir.split('/')[0..-2]
59
67
  components.each_with_index do |_, i|
60
68
  empty_dirs.delete(components[0, i + 1].join('/') + '/')
61
69
  end
62
70
  end
63
-
64
- empty_dirs.each { |dir| @sizes[dir] = 0 }
65
-
66
- @sizes
71
+ empty_dirs
67
72
  end
68
73
 
69
- def files
70
- @files ||= file_sizes.keys
74
+ def files_to_extract
75
+ files_to_extract, dirs = files.partition { |f| !f.first.end_with?('/') }.map(&:to_h)
76
+ empty_dirs(files_to_extract.keys, dirs.keys).each do |dir|
77
+ files_to_extract[dir] = { :size => 0 }
78
+ end
79
+ files_to_extract
71
80
  end
72
81
 
73
82
  def file_groups
74
83
  groups = (1..@thread_pool.max).map { Group.new }
75
- file_sizes.sort { |a, b| b.last <=> a.last }.each do |file|
76
- groups.sort.last.add(*file)
84
+ files_to_extract.sort { |a, b| b.last[:size] <=> a.last[:size] }.each do |name, props|
85
+
86
+ # If the previous batch of files had an entry with the same size and date,
87
+ # assume that this is a duplicate and assign it zero weight. There may be
88
+ # some false positives here since the granularity of the data we have from
89
+ # tarsnap is only "same day". However, a false positive just affects the
90
+ # queing scheme, not which files get queued.
91
+
92
+ size = (@options[:previous] && @options[:previous][name] == props) ? 0 : props[:size]
93
+ groups.sort.last.add(name, size)
77
94
  end
78
- groups.map(&:files)
95
+ groups.map(&:files).reject(&:empty?)
79
96
  end
80
97
 
81
98
  def run
@@ -84,6 +101,7 @@ class RedSnapper
84
101
  command = [ TARSNAP, '-xvf', @archive, *(@options[:tarsnap_options] + chunk) ]
85
102
  Open3.popen3(*command) do |_, _, err|
86
103
  while line = err.gets
104
+ next if line.end_with?(NOT_OLDER_ERROR)
87
105
  if line == EXIT_ERROR
88
106
  @error = true
89
107
  next
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: redsnapper 0.3.1 ruby lib
5
+ # stub: redsnapper 0.4.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "redsnapper"
9
- s.version = "0.3.1"
9
+ s.version = "0.4.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Scott Wheeler"]
14
- s.date = "2016-12-31"
14
+ s.date = "2017-01-05"
15
15
  s.description = "Faster extraction of large tarsnap archives using a pool of parallel tarsnap clients"
16
16
  s.email = "scott@directededge.com"
17
17
  s.executables = ["redsnapper"]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redsnapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Scott Wheeler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-31 00:00:00.000000000 Z
11
+ date: 2017-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thread