outatime 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/outatime/cli.rb +1 -1
- data/lib/outatime/fetcher.rb +87 -55
- data/lib/outatime/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed16328dea0d2e300283d1982a891c17a23341bb
|
4
|
+
data.tar.gz: b2bd09379a2fe1ef252ab34894f88fbcb33d468a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 30b90838d71c9d13f5dde1b1e1b512c324b7c5e9b349b6af99593d8e943834a84ebc1d13bb570dd549f21e0698bf8e9209d00cc12860c49d3a05fb1228e5955c
|
7
|
+
data.tar.gz: bb2152657dd2c26d0501b9814b97e5242f433aae73ea955a96e43218f0ecd1b6063b4deb1e3a3c2695ccf134fe1daf4ed34b11e47eafc53651c68c17d04ad2c1
|
data/lib/outatime/cli.rb
CHANGED
@@ -22,7 +22,7 @@ module Outatime
|
|
22
22
|
def run
|
23
23
|
fetcher = Outatime::Fetcher.new(options)
|
24
24
|
|
25
|
-
pb = ProgressBar.create(total:
|
25
|
+
pb = ProgressBar.create(total: nil,
|
26
26
|
format: "%t: |%B| %f %c/%C %R MB/sec",
|
27
27
|
rate_scale: lambda { |rate| rate / 1024 / 1024 },
|
28
28
|
throttle_rate: 0.5)
|
data/lib/outatime/fetcher.rb
CHANGED
@@ -1,3 +1,45 @@
|
|
1
|
+
require 'thread/pool'
|
2
|
+
|
3
|
+
# Outatime::Fetcher is responsible for finding the exact revision for each file
|
4
|
+
# for a given time.
|
5
|
+
#
|
6
|
+
# AWS S3 API lists all file revisions in a very particular order and this
|
7
|
+
# class takes advantage of that to quickly parse and find the revision.
|
8
|
+
#
|
9
|
+
# The 'GET Bucket Object Versions'
|
10
|
+
# (http://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketGETVersion.html)
|
11
|
+
# returns all file revisions ordered by key (path + filename) and last
|
12
|
+
# modified time.
|
13
|
+
#
|
14
|
+
# For example:
|
15
|
+
#
|
16
|
+
# id: 12456, key: "src/file1", last_modified: 7 Feb 11:38
|
17
|
+
# id: 12357, key: "src/file1", last_modified: 7 Feb 11:37
|
18
|
+
# id: 12357, key: "src/file1", last_modified: 7 Feb 11:00
|
19
|
+
# id: 22222, key: "src/file2", last_modified: 7 Feb 11:39
|
20
|
+
# ...
|
21
|
+
#
|
22
|
+
# Keep in mind that this response is paginated, where the max amount of
|
23
|
+
# revisions per page is 1000.
|
24
|
+
#
|
25
|
+
# When a versioned bucket contains a huge amount of files and revisions,
|
26
|
+
# fetching all data may take a long time. So Outatime::Fetcher starts
|
27
|
+
# downloading the correct revision even when not all file revisions are
|
28
|
+
# already fetched, i.e. when it is still downloading the revisions and their
|
29
|
+
# data. That accelerates the process.
|
30
|
+
#
|
31
|
+
# So how do we know when we have all information we need before start downloading
|
32
|
+
# a specific file revision? As the response is ordered by key (filename) and
|
33
|
+
# timestamps, fetcher can start downloading if the actual response page
|
34
|
+
# contains all possible revisions for a file, i.e. its revisions aren't
|
35
|
+
# paginated.
|
36
|
+
#
|
37
|
+
# When the response for a given key (filename) is paginated, the file is not
|
38
|
+
# downloaded until all revisions are fetched in the next response page.
|
39
|
+
#
|
40
|
+
# This algorithm removes the need to fetch all file revisions (which may take
|
41
|
+
# several requests) for a versioned bucket before starting to download its files,
|
42
|
+
# but acts on each individual file instead.
|
1
43
|
module Outatime
|
2
44
|
class Fetcher
|
3
45
|
attr_accessor :options
|
@@ -15,10 +57,10 @@ module Outatime
|
|
15
57
|
#
|
16
58
|
def initialize(options = {})
|
17
59
|
@options = options
|
18
|
-
@files_mutex = Mutex.new
|
19
60
|
@fetch_block_mutex = Mutex.new
|
20
61
|
@s3_client = options[:s3_client] if options[:s3_client]
|
21
|
-
@from
|
62
|
+
@from = ::Chronic.parse(@options[:from]) if @options[:from]
|
63
|
+
@pool = Thread.pool(@options.fetch(:threads, 20))
|
22
64
|
|
23
65
|
# raise if the date/time was not parsed
|
24
66
|
raise ArgumentError, "The from time was not parseable." if @from.nil?
|
@@ -31,7 +73,11 @@ module Outatime
|
|
31
73
|
#
|
32
74
|
# Returns nothing.
|
33
75
|
def fetch!(&block)
|
34
|
-
|
76
|
+
object_versions do |object_version|
|
77
|
+
fetch_object(object_version, &block)
|
78
|
+
end
|
79
|
+
|
80
|
+
@pool.wait(:done)
|
35
81
|
end
|
36
82
|
|
37
83
|
# Public: Returns the objects total size.
|
@@ -45,34 +91,29 @@ module Outatime
|
|
45
91
|
#
|
46
92
|
# Returns an Array of Aws::S3::Types::ObjectVersion.
|
47
93
|
def object_versions
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
delete_marker_keys = delete_markers.map { |dm| dm.key }
|
66
|
-
|
67
|
-
# check versions to see if we have newer delete_markers
|
68
|
-
# if so, delete those versions
|
69
|
-
versions.delete_if do |version|
|
70
|
-
if dm_index = delete_marker_keys.index(version.key)
|
71
|
-
if version.last_modified <= delete_markers[dm_index].last_modified
|
72
|
-
true
|
94
|
+
remaining_versions = []
|
95
|
+
remaining_delete_markers = []
|
96
|
+
|
97
|
+
s3_client.list_object_versions(bucket: @options[:bucket],
|
98
|
+
prefix: @options[:prefix]).each do |response|
|
99
|
+
|
100
|
+
versions = remaining_versions.concat(response.versions)
|
101
|
+
versions_by_key = versions.group_by {|v| v.key }
|
102
|
+
delete_markers = remaining_delete_markers.concat(response.delete_markers)
|
103
|
+
delete_markers_by_key = delete_markers.group_by {|v| v.key }
|
104
|
+
|
105
|
+
versions_by_key.each do |key, versions|
|
106
|
+
next if response.next_key_marker == key
|
107
|
+
filter_items(versions).each do |version|
|
108
|
+
dl_marker = filter_items(Array(delete_markers_by_key[version.key])).first
|
109
|
+
if dl_marker.nil? || (version.last_modified > dl_marker.last_modified)
|
110
|
+
yield version
|
73
111
|
end
|
74
112
|
end
|
75
113
|
end
|
114
|
+
|
115
|
+
remaining_versions = Array(versions_by_key[response.next_key_marker])
|
116
|
+
remaining_delete_markers = Array(delete_markers_by_key[response.next_key_marker])
|
76
117
|
end
|
77
118
|
end
|
78
119
|
|
@@ -90,33 +131,24 @@ module Outatime
|
|
90
131
|
# files - an Array of Aws::S3::Types::ObjectVersion.
|
91
132
|
#
|
92
133
|
# Returns nothing.
|
93
|
-
def
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
puts "Copying from s3 #{file.key} - #{Time.now}" if verbose?
|
108
|
-
s3_client.get_object(response_target: "#{dest}",
|
109
|
-
bucket: @options[:bucket],
|
110
|
-
key: file.key,
|
111
|
-
version_id: file.version_id)
|
112
|
-
end
|
113
|
-
|
114
|
-
@fetch_block_mutex.synchronize { yield file } if block_given?
|
115
|
-
end
|
134
|
+
def fetch_object(file)
|
135
|
+
@pool.process do
|
136
|
+
dest = Pathname.new("#{@options[:destination]}/#{file.key}")
|
137
|
+
if file.key.end_with?("/")
|
138
|
+
puts "Creating s3 subdirectory #{file.key} - #{Time.now}" if verbose?
|
139
|
+
dest.mkpath
|
140
|
+
else
|
141
|
+
dest.dirname.mkpath
|
142
|
+
|
143
|
+
puts "Copying from s3 #{file.key} - #{Time.now}" if verbose?
|
144
|
+
s3_client.get_object(response_target: "#{dest}",
|
145
|
+
bucket: @options[:bucket],
|
146
|
+
key: file.key,
|
147
|
+
version_id: file.version_id)
|
116
148
|
end
|
117
|
-
end
|
118
149
|
|
119
|
-
|
150
|
+
@fetch_block_mutex.synchronize { yield file } if block_given?
|
151
|
+
end
|
120
152
|
end
|
121
153
|
|
122
154
|
# Private: Creates the S3 client instance.
|
@@ -132,8 +164,8 @@ module Outatime
|
|
132
164
|
# items - An Array of objects. Object must respond to #last_modified.
|
133
165
|
#
|
134
166
|
# Returns Array.
|
135
|
-
def
|
136
|
-
items.keep_if { |obj| obj.last_modified <= @from }
|
167
|
+
def filter_items(items)
|
168
|
+
items.keep_if { |obj| obj.last_modified <= @from }.uniq {|obj| obj.key }
|
137
169
|
end
|
138
170
|
end
|
139
171
|
end
|
data/lib/outatime/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: outatime
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Justin Mazzi
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2017-02-
|
13
|
+
date: 2017-02-07 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: aws-sdk
|