outatime 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/outatime/cli.rb +1 -1
- data/lib/outatime/fetcher.rb +87 -55
- data/lib/outatime/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed16328dea0d2e300283d1982a891c17a23341bb
|
4
|
+
data.tar.gz: b2bd09379a2fe1ef252ab34894f88fbcb33d468a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 30b90838d71c9d13f5dde1b1e1b512c324b7c5e9b349b6af99593d8e943834a84ebc1d13bb570dd549f21e0698bf8e9209d00cc12860c49d3a05fb1228e5955c
|
7
|
+
data.tar.gz: bb2152657dd2c26d0501b9814b97e5242f433aae73ea955a96e43218f0ecd1b6063b4deb1e3a3c2695ccf134fe1daf4ed34b11e47eafc53651c68c17d04ad2c1
|
data/lib/outatime/cli.rb
CHANGED
@@ -22,7 +22,7 @@ module Outatime
|
|
22
22
|
def run
|
23
23
|
fetcher = Outatime::Fetcher.new(options)
|
24
24
|
|
25
|
-
pb = ProgressBar.create(total:
|
25
|
+
pb = ProgressBar.create(total: nil,
|
26
26
|
format: "%t: |%B| %f %c/%C %R MB/sec",
|
27
27
|
rate_scale: lambda { |rate| rate / 1024 / 1024 },
|
28
28
|
throttle_rate: 0.5)
|
data/lib/outatime/fetcher.rb
CHANGED
@@ -1,3 +1,45 @@
|
|
1
|
+
require 'thread/pool'
|
2
|
+
|
3
|
+
# Outatime::Fetcher is responsible for finding the exact revision for each file
|
4
|
+
# for a given time.
|
5
|
+
#
|
6
|
+
# AWS S3 API lists all file revisions in a very particular order and this
|
7
|
+
# class takes advantage of that to quickly parse and find the revision.
|
8
|
+
#
|
9
|
+
# The 'GET Bucket Object Versions'
|
10
|
+
# (http://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketGETVersion.html)
|
11
|
+
# returns all file revisions ordered by key (path + filename) and last
|
12
|
+
# modified time.
|
13
|
+
#
|
14
|
+
# For example:
|
15
|
+
#
|
16
|
+
# id: 12456, key: "src/file1", last_modified: 7 Feb 11:38
|
17
|
+
# id: 12357, key: "src/file1", last_modified: 7 Feb 11:37
|
18
|
+
# id: 12357, key: "src/file1", last_modified: 7 Feb 11:00
|
19
|
+
# id: 22222, key: "src/file2", last_modified: 7 Feb 11:39
|
20
|
+
# ...
|
21
|
+
#
|
22
|
+
# Keep in mind that this response is paginated, where the max amount of
|
23
|
+
# revisions per page is 1000.
|
24
|
+
#
|
25
|
+
# When a versioned bucket contains a huge amount of files and revisions,
|
26
|
+
# fetching all data may take a long time. So Outatime::Fetcher starts
|
27
|
+
# downloading the correct revision even when not all file revisions are
|
28
|
+
# already fetched, i.e. when it is still downloading the revisions and their
|
29
|
+
# data. That accelerates the process.
|
30
|
+
#
|
31
|
+
# So how do we know when we have all information we need before start downloading
|
32
|
+
# a specific file revision? As the response is ordered by key (filename) and
|
33
|
+
# timestamps, fetcher can start downloading if the actual response page
|
34
|
+
# contains all possible revisions for a file, i.e. its revisions aren't
|
35
|
+
# paginated.
|
36
|
+
#
|
37
|
+
# When the response for a given key (filename) is paginated, the file is not
|
38
|
+
# downloaded until all revisions are fetched in the next response page.
|
39
|
+
#
|
40
|
+
# This algorithm removes the need to fetch all file revisions (which may take
|
41
|
+
# several requests) for a versioned bucket before starting to download its files,
|
42
|
+
# but acts on each individual file instead.
|
1
43
|
module Outatime
|
2
44
|
class Fetcher
|
3
45
|
attr_accessor :options
|
@@ -15,10 +57,10 @@ module Outatime
|
|
15
57
|
#
|
16
58
|
def initialize(options = {})
|
17
59
|
@options = options
|
18
|
-
@files_mutex = Mutex.new
|
19
60
|
@fetch_block_mutex = Mutex.new
|
20
61
|
@s3_client = options[:s3_client] if options[:s3_client]
|
21
|
-
@from
|
62
|
+
@from = ::Chronic.parse(@options[:from]) if @options[:from]
|
63
|
+
@pool = Thread.pool(@options.fetch(:threads, 20))
|
22
64
|
|
23
65
|
# raise if the date/time was not parsed
|
24
66
|
raise ArgumentError, "The from time was not parseable." if @from.nil?
|
@@ -31,7 +73,11 @@ module Outatime
|
|
31
73
|
#
|
32
74
|
# Returns nothing.
|
33
75
|
def fetch!(&block)
|
34
|
-
|
76
|
+
object_versions do |object_version|
|
77
|
+
fetch_object(object_version, &block)
|
78
|
+
end
|
79
|
+
|
80
|
+
@pool.wait(:done)
|
35
81
|
end
|
36
82
|
|
37
83
|
# Public: Returns the objects total size.
|
@@ -45,34 +91,29 @@ module Outatime
|
|
45
91
|
#
|
46
92
|
# Returns an Array of Aws::S3::Types::ObjectVersion.
|
47
93
|
def object_versions
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
delete_marker_keys = delete_markers.map { |dm| dm.key }
|
66
|
-
|
67
|
-
# check versions to see if we have newer delete_markers
|
68
|
-
# if so, delete those versions
|
69
|
-
versions.delete_if do |version|
|
70
|
-
if dm_index = delete_marker_keys.index(version.key)
|
71
|
-
if version.last_modified <= delete_markers[dm_index].last_modified
|
72
|
-
true
|
94
|
+
remaining_versions = []
|
95
|
+
remaining_delete_markers = []
|
96
|
+
|
97
|
+
s3_client.list_object_versions(bucket: @options[:bucket],
|
98
|
+
prefix: @options[:prefix]).each do |response|
|
99
|
+
|
100
|
+
versions = remaining_versions.concat(response.versions)
|
101
|
+
versions_by_key = versions.group_by {|v| v.key }
|
102
|
+
delete_markers = remaining_delete_markers.concat(response.delete_markers)
|
103
|
+
delete_markers_by_key = delete_markers.group_by {|v| v.key }
|
104
|
+
|
105
|
+
versions_by_key.each do |key, versions|
|
106
|
+
next if response.next_key_marker == key
|
107
|
+
filter_items(versions).each do |version|
|
108
|
+
dl_marker = filter_items(Array(delete_markers_by_key[version.key])).first
|
109
|
+
if dl_marker.nil? || (version.last_modified > dl_marker.last_modified)
|
110
|
+
yield version
|
73
111
|
end
|
74
112
|
end
|
75
113
|
end
|
114
|
+
|
115
|
+
remaining_versions = Array(versions_by_key[response.next_key_marker])
|
116
|
+
remaining_delete_markers = Array(delete_markers_by_key[response.next_key_marker])
|
76
117
|
end
|
77
118
|
end
|
78
119
|
|
@@ -90,33 +131,24 @@ module Outatime
|
|
90
131
|
# files - an Array of Aws::S3::Types::ObjectVersion.
|
91
132
|
#
|
92
133
|
# Returns nothing.
|
93
|
-
def
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
puts "Copying from s3 #{file.key} - #{Time.now}" if verbose?
|
108
|
-
s3_client.get_object(response_target: "#{dest}",
|
109
|
-
bucket: @options[:bucket],
|
110
|
-
key: file.key,
|
111
|
-
version_id: file.version_id)
|
112
|
-
end
|
113
|
-
|
114
|
-
@fetch_block_mutex.synchronize { yield file } if block_given?
|
115
|
-
end
|
134
|
+
def fetch_object(file)
|
135
|
+
@pool.process do
|
136
|
+
dest = Pathname.new("#{@options[:destination]}/#{file.key}")
|
137
|
+
if file.key.end_with?("/")
|
138
|
+
puts "Creating s3 subdirectory #{file.key} - #{Time.now}" if verbose?
|
139
|
+
dest.mkpath
|
140
|
+
else
|
141
|
+
dest.dirname.mkpath
|
142
|
+
|
143
|
+
puts "Copying from s3 #{file.key} - #{Time.now}" if verbose?
|
144
|
+
s3_client.get_object(response_target: "#{dest}",
|
145
|
+
bucket: @options[:bucket],
|
146
|
+
key: file.key,
|
147
|
+
version_id: file.version_id)
|
116
148
|
end
|
117
|
-
end
|
118
149
|
|
119
|
-
|
150
|
+
@fetch_block_mutex.synchronize { yield file } if block_given?
|
151
|
+
end
|
120
152
|
end
|
121
153
|
|
122
154
|
# Private: Creates the S3 client instance.
|
@@ -132,8 +164,8 @@ module Outatime
|
|
132
164
|
# items - An Array of objects. Object must respond to #last_modified.
|
133
165
|
#
|
134
166
|
# Returns Array.
|
135
|
-
def
|
136
|
-
items.keep_if { |obj| obj.last_modified <= @from }
|
167
|
+
def filter_items(items)
|
168
|
+
items.keep_if { |obj| obj.last_modified <= @from }.uniq {|obj| obj.key }
|
137
169
|
end
|
138
170
|
end
|
139
171
|
end
|
data/lib/outatime/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: outatime
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Justin Mazzi
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2017-02-
|
13
|
+
date: 2017-02-07 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: aws-sdk
|