s3_download_by_date 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +15 -0
  2. data/bin/s3download +102 -0
  3. metadata +125 -0
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NzZiOGY0NDdmMGE1NjI3MWUwZmVjMWMxNjgwNGEzNmQ2NDg2YzBmOA==
5
+ data.tar.gz: !binary |-
6
+ OWZlNThlM2ZiYTJiOTcwOGRiMjMyNzY1NGNjYTgxOTEyZGEyZTc4Nw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZmFjNWMxYjA4ZjU0OTNmYzYzMTlkZTc2NGMxZTQxZGEyYjQ0MDc3ODA4ZDc3
10
+ Mzg3NjFmYTFiZjZiYmU2YjM5YjI3NjliYzNmMjk5ZjViNzllNWU0OWE1YWJi
11
+ Y2YyYjdlNGU5ZTQwNDc3Njc3NWIzNDkxZGZlYjE0ZTUxY2RmMGU=
12
+ data.tar.gz: !binary |-
13
+ ZjIwZjU4MDUzZTM0MzU5NjhlZTY4OWNhNWQwOTBhOWRlN2UzM2QxZGUwZTg0
14
+ YjI3MDQ0MjM1NzVmMmFhZGU4NTMwOWUxMTY2MWZmYWIzNTgzOWEzOTgwYTE4
15
+ NTg5YzdmMTAxZDU4ODkzYzhlZjA2NmRiZDE2NDMxZGJhNzUxZWI=
data/bin/s3download ADDED
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ require 'aws-sdk'
4
+ require 'thor'
5
+ require 'chronic'
6
+ require 'progressbar'
7
+ require 'active_support/time'
8
+ require 'fileutils'
9
+ require 'json'
10
+
11
+ class S3Downloader < Thor
12
+ default_task :show_help
13
+ AWS.config({
14
+ :access_key_id => ENV['AWS_ACCESS_KEY'],
15
+ :secret_access_key => ENV['AWS_SECRET_KEY'],
16
+ :region => ENV['REGION'] || 'us-east-1'
17
+ })
18
+
19
+ @@s3 = AWS::S3.new
20
+
21
+ desc 'list_timezones', 'list timezones'
22
+ def list_timezones
23
+ puts JSON.pretty_generate(ActiveSupport::TimeZone::MAPPING)
24
+ end
25
+
26
+ desc 'show_help', 'show full help'
27
+ def show_help
28
+ puts <<-HELP
29
+ Download S3 files by range (date)
30
+
31
+ Usage:
32
+ s3download range --bucket=S3 Bucket Name --prefix=folder inside the specified bucket --save-to=Location of downloaded files
33
+
34
+ Options:
35
+ --bucket=S3 Bucket Name # S3 Bucket Name
36
+ --prefix=folder inside the specified bucket # Folder inside the specified bucket
37
+ [--from=From in a natural language date/time like yesterday, last week, etc...]
38
+ # Default: 2014-08-04 00:00:00 +0300
39
+ [--to=To in a natural language date/time like yesterday, last week, etc...]
40
+ # Default: 2014-08-04 23:59:59 +0300
41
+ --save-to=Location of downloaded files # The target directory where download files be stored
42
+ --debug, [--no-debug]
43
+
44
+ Utility:
45
+ s3download list_timezones
46
+ {
47
+ "International Date Line West": "Pacific/Midway",
48
+ "Midway Island": "Pacific/Midway",
49
+ "American Samoa": "Pacific/Pago_Pago",
50
+ "Hawaii": "Pacific/Honolulu",
51
+ "Alaska": "America/Juneau",
52
+ "Pacific Time (US & Canada)": "America/Los_Angeles"
53
+ .
54
+ .
55
+ .
56
+ }
57
+ HELP
58
+ end
59
+
60
+ desc 'download', 'Download S3 files by range (date)'
61
+ method_option :bucket, :required => true, :alias => 'b', :desc => 'S3 Bucket Name', :banner => 'S3 Bucket Name'
62
+ method_option :prefix, :required => true, :alias => 'f', :desc => 'Folder inside the specified bucket', :banner => 'folder inside the specified bucket'
63
+ method_option :from, :default => Chronic.parse('today at 00:00:00'), :banner => 'From in a natural language date/time like yesterday, \'last week\', etc...'
64
+ method_option :to, :default => Chronic.parse('today at 23:59:59'), :banner => 'To in a natural language date/time like yesterday, \'last week\', etc...'
65
+ method_option :save_to, :required => true, :desc => 'the target directory where download files be stored', :banner => 'Location of downloaded files'
66
+ method_option :debug, :default => false, :type => :boolean, :desc => 'the target directory where download files be stored'
67
+ method_option :timezone, :default => 'UTC', :banner => 'timezone to filter files by date [UTC] ex: "Eastern Time (US & Canada)"'
68
+ def range
69
+ timezone = options[:timezone]
70
+ puts "TimeZone: #{timezone}"
71
+ bucket = @@s3.buckets[options[:bucket]].objects.with_prefix(options[:prefix])
72
+ from = Chronic.parse("#{options[:from]}").in_time_zone(timezone)
73
+ to = Chronic.parse("#{options[:to]}").in_time_zone(timezone)
74
+ puts "From: #{from}"
75
+ puts "To: #{to}"
76
+ range = from..to
77
+ target = File.expand_path(options[:save_to])
78
+
79
+ say("download target: #{options[:save_to]}/#{options[:prefix]}", color=:cyan) if options[:debug]
80
+ say("Range: #{range}", color=:green) if options[:debug]
81
+
82
+ ProgressBar.new("Filter Files", bucket.count) do |pbar|
83
+ bucket.each do |object|
84
+ if range.cover?(object.last_modified.in_time_zone(timezone))
85
+ say("Downloading #{object.key} #{object.last_modified.in_time_zone(timezone)}\n", color=:white) if options[:debug]
86
+ FileUtils.mkdir_p "#{target}/#{range}/#{object.key.match(/(.+)\//)[1]}"
87
+
88
+ begin
89
+ File.open("#{target}/#{range.gsub('/', ':')}/#{object.key}", "w") do |f|
90
+ f.write(object.read)
91
+ end
92
+ rescue Exception => e
93
+ end
94
+ end
95
+ pbar.inc
96
+ end
97
+ end
98
+ end
99
+
100
+ end
101
+
102
+ S3Downloader.start
metadata ADDED
@@ -0,0 +1,125 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: s3_download_by_date
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ami Mahloof
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: thor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '0.19'
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: 0.19.1
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '0.19'
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 0.19.1
33
+ - !ruby/object:Gem::Dependency
34
+ name: chronic
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ~>
38
+ - !ruby/object:Gem::Version
39
+ version: '0.10'
40
+ - - ! '>='
41
+ - !ruby/object:Gem::Version
42
+ version: 0.10.2
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ~>
48
+ - !ruby/object:Gem::Version
49
+ version: '0.10'
50
+ - - ! '>='
51
+ - !ruby/object:Gem::Version
52
+ version: 0.10.2
53
+ - !ruby/object:Gem::Dependency
54
+ name: progressbar
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ~>
58
+ - !ruby/object:Gem::Version
59
+ version: '0.21'
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: 0.21.0
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '0.21'
70
+ - - ! '>='
71
+ - !ruby/object:Gem::Version
72
+ version: 0.21.0
73
+ - !ruby/object:Gem::Dependency
74
+ name: aws-sdk
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ~>
78
+ - !ruby/object:Gem::Version
79
+ version: '1.40'
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: 1.40.0
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ~>
88
+ - !ruby/object:Gem::Version
89
+ version: '1.40'
90
+ - - ! '>='
91
+ - !ruby/object:Gem::Version
92
+ version: 1.40.0
93
+ description: Download files by modified date range
94
+ email: ami.mahloof@gmail.com
95
+ executables:
96
+ - s3download
97
+ extensions: []
98
+ extra_rdoc_files: []
99
+ files:
100
+ - bin/s3download
101
+ homepage: https://github.com/innovia
102
+ licenses:
103
+ - MIT
104
+ metadata: {}
105
+ post_install_message:
106
+ rdoc_options: []
107
+ require_paths:
108
+ - lib
109
+ required_ruby_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ required_rubygems_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ! '>='
117
+ - !ruby/object:Gem::Version
118
+ version: 1.3.6
119
+ requirements: []
120
+ rubyforge_project:
121
+ rubygems_version: 2.2.2
122
+ signing_key:
123
+ specification_version: 4
124
+ summary: Download from S3 by date range
125
+ test_files: []