s3_download_by_date 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/s3download.rb +6 -0
- data/lib/s3download/cli.rb +106 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bb808c568d4ae76a89af1cb9cee48822cae37732
|
4
|
+
data.tar.gz: cecc15bb7626a11de2f3f1a4ff2af0845d164d55
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 300c9352d0d1fe0f6d00c7963be58b3a20ab146eb1031c9ceed1f67e239ff5b000d20a656f358c80bd068154f7c5e741865e03438fba833e24bc4f6354c04513
|
7
|
+
data.tar.gz: 6783cdbae30906fbf1e66ac810c65c898d34d3fa32a667d7e9c3433ab97ef7ab7bdfe291b436c0a91bbd0618060de8638af4ba0e7de15812989f5df90ee70045
|
data/lib/s3download.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
require 'aws-sdk'
|
4
|
+
require 'thor'
|
5
|
+
require 'chronic'
|
6
|
+
require 'progressbar'
|
7
|
+
require 'active_support/time'
|
8
|
+
require 'fileutils'
|
9
|
+
require 'json'
|
10
|
+
|
11
|
+
class S3download::Cli < Thor
|
12
|
+
default_task :fetch
|
13
|
+
attr_accessor :s3, :debug, :files_found, :range, :target, :bucket, :from, :to
|
14
|
+
|
15
|
+
desc 'fetch', 'Download S3 files by range (date)'
|
16
|
+
class_option :bucket, :alias => 'b', :banner => 'S3 Bucket Name'
|
17
|
+
class_option :prefix, :alias => 'f', :banner => 'Folder inside the specified bucket'
|
18
|
+
class_option :from, :default => Chronic.parse('today at 00:00:00'), :desc => 'From in a natural language date/time like yesterday, \'last week\', etc...'
|
19
|
+
class_option :to, :default => Chronic.parse('today at 23:59:59'), :desc => 'To in a natural language date/time like yesterday, \'last week\', etc...'
|
20
|
+
class_option :save_to, :banner => 'Location of downloaded files'
|
21
|
+
class_option :timezone, :default => 'UTC', :desc => 'timezone to filter files by date [UTC] ex: "Eastern Time (US & Canada)"'
|
22
|
+
class_option :debug, :default => false, :type => :boolean
|
23
|
+
class_option :verbose, :default => false, :type => :boolean
|
24
|
+
def fetch
|
25
|
+
raise Thor::RequiredArgumentMissingError, 'You must supply an S3 bucket name' if options[:bucket].nil?
|
26
|
+
raise Thor::RequiredArgumentMissingError, 'You must supply a prefix (folder within the s3 bucket to filter by' if options[:prefix].nil?
|
27
|
+
raise Thor::RequiredArgumentMissingError, 'You must supply a location where to save the downloaded files to' if options[:save_to].nil?
|
28
|
+
init
|
29
|
+
|
30
|
+
ProgressBar.new("Filter Files", bucket.count) do |pbar|
|
31
|
+
|
32
|
+
bucket.each do |object|
|
33
|
+
if options[:verbose]
|
34
|
+
say "timezone: #{options[:timezone]}"
|
35
|
+
say "object last modified: #{object.last_modified}"
|
36
|
+
say "object last modified in timezone: #{object.last_modified.in_time_zone(options[:timezone])}"
|
37
|
+
say "object falls in range: #{self.range}? => #{self.range.cover?(object.last_modified.in_time_zone(options[:timezone]))}"
|
38
|
+
end
|
39
|
+
|
40
|
+
if self.range.cover?(object.last_modified.in_time_zone(options[:timezone]))
|
41
|
+
self.files_found += 1
|
42
|
+
say("Downloading #{object.key} #{object.last_modified.in_time_zone(options[:timezone])}\n", color = :white) if options[:debug]
|
43
|
+
FileUtils.mkdir_p "#{target}/#{object.key.match(/(.+)\//)[1]}"
|
44
|
+
|
45
|
+
begin
|
46
|
+
File.open("#{target}/#{object.key}", "w") do |f|
|
47
|
+
f.write(object.read)
|
48
|
+
end
|
49
|
+
rescue Exception => e
|
50
|
+
say "Unable to save file: #{e}", color = :red
|
51
|
+
end
|
52
|
+
end
|
53
|
+
pbar.inc
|
54
|
+
end
|
55
|
+
end
|
56
|
+
say "Total files downloaded from S3: #{self.files_found}", color = :yellow
|
57
|
+
end
|
58
|
+
|
59
|
+
desc 'list_timezones', 'list timezones'
|
60
|
+
def list_timezones
|
61
|
+
say JSON.pretty_generate(ActiveSupport::TimeZone::MAPPING), color = :white
|
62
|
+
end
|
63
|
+
|
64
|
+
map ["-v", "--version"] => :version
|
65
|
+
desc "version", "version"
|
66
|
+
def version
|
67
|
+
say S3download::ABOUT
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
def init
|
72
|
+
aws_init
|
73
|
+
self.debug = true if options[:verbose]
|
74
|
+
self.bucket = self.s3.buckets[options[:bucket]].objects.with_prefix(options[:prefix])
|
75
|
+
self.from = Chronic.parse("#{options[:from]}").in_time_zone(options[:timezone])
|
76
|
+
self.to = Chronic.parse("#{options[:to]}").in_time_zone(options[:timezone])
|
77
|
+
self.range = self.from..self.to
|
78
|
+
self.target = File.expand_path(options[:save_to])
|
79
|
+
self.files_found = 0
|
80
|
+
|
81
|
+
FileUtils.mkdir_p "#{self.target}"
|
82
|
+
|
83
|
+
File.open("#{self.target}/download_info.txt", "w") {|f|
|
84
|
+
f.write("#{Time.now} - Downloaded bucket #{options[:bucket]}/#{options[:prefix]} from: #{from} - to #{to}")
|
85
|
+
}
|
86
|
+
|
87
|
+
say "S3 Search & Download", color = :white
|
88
|
+
say "--------------------\n", color = :white
|
89
|
+
say "Bucket: #{self.bucket}", color = :cyan
|
90
|
+
say "TimeZone: #{options[:timezone]}", color = :cyan
|
91
|
+
say "From: #{from}", color = :cyan
|
92
|
+
say "To: #{to}", color = :cyan
|
93
|
+
say("Download target: #{options[:save_to]}/#{options[:prefix]}", color = :cyan) if options[:debug]
|
94
|
+
say("Range: #{self.range}", color = :green) if options[:debug]
|
95
|
+
end
|
96
|
+
|
97
|
+
def aws_init
|
98
|
+
AWS.config({
|
99
|
+
:access_key_id => ENV['AWS_ACCESS_KEY'],
|
100
|
+
:secret_access_key => ENV['AWS_SECRET_KEY'],
|
101
|
+
:region => ENV['REGION'] || 'us-east-1'
|
102
|
+
})
|
103
|
+
|
104
|
+
self.s3 = AWS::S3.new
|
105
|
+
end
|
106
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: s3_download_by_date
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ami Mahloof
|
@@ -123,6 +123,8 @@ files:
|
|
123
123
|
- LICENSE
|
124
124
|
- README.md
|
125
125
|
- bin/s3download
|
126
|
+
- lib/s3download.rb
|
127
|
+
- lib/s3download/cli.rb
|
126
128
|
- s3_download_by_date.gemspec
|
127
129
|
homepage: https://github.com/innovia/s3_download_by_date
|
128
130
|
licenses:
|