schiphol 0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +16 -0
- data/README.md +45 -0
- data/lib/schiphol/mimes.rb +3 -0
- data/lib/schiphol.rb +241 -0
- metadata +92 -0
data/LICENSE
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
Schiphol - A file downloader script for Ruby.
|
2
|
+
|
3
|
+
This program is free software: you can redistribute it and/or modify
|
4
|
+
it under the terms of the GNU General Public License as published by
|
5
|
+
the Free Software Foundation, either version 3 of the License, or
|
6
|
+
(at your option) any later version.
|
7
|
+
|
8
|
+
This program is distributed in the hope that it will be useful,
|
9
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
GNU General Public License for more details.
|
12
|
+
|
13
|
+
You should have received a copy of the GNU General Public License
|
14
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
Author: Louis-Antoine Mullie (louis.mullie@gmail.com). Copyright 2011-12.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
## Schiphol
|
2
|
+
|
3
|
+
... is a smart downloader script for Ruby, with
|
4
|
+
|
5
|
+
- Automatic file type resolution w/ MIMEs.
|
6
|
+
- Progress bar for downloads, with ETA.
|
7
|
+
- Downloads and extracts ZIP archives.
|
8
|
+
- Automatically retry downloads N times.
|
9
|
+
|
10
|
+
## Install
|
11
|
+
|
12
|
+
gem install schiphol
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
**Basic**
|
17
|
+
|
18
|
+
It can't get simpler.
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
require 'schiphol'
|
22
|
+
|
23
|
+
Schiphol.download('http://www.url.com/path/to/file.html')
|
24
|
+
```
|
25
|
+
|
26
|
+
**Advanced**
|
27
|
+
|
28
|
+
The options shown are the default values.
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
require 'schiphol'
|
32
|
+
|
33
|
+
Schiphol.download(
|
34
|
+
'http://www.url.com/path/to/file.html',
|
35
|
+
:download_folder => './my_downloads',
|
36
|
+
:target_directory => '',
|
37
|
+
:show_progress => true,
|
38
|
+
:rectify_extensions => false,
|
39
|
+
:max_tries => 3
|
40
|
+
)
|
41
|
+
```
|
42
|
+
|
43
|
+
## License
|
44
|
+
|
45
|
+
This software is released under the GPL.
|
@@ -0,0 +1,3 @@
|
|
1
|
+
class Schiphol
|
2
|
+
MIMETypes = {"application/pdf"=>:pdf, "application/x-pdf"=>:pdf, "application/acrobat"=>:pdf, "applications/vnd.pdf"=>:pdf, "text/pdf"=>:pdf, "text/x-pdf"=>:pdf, "application/msword,\n application/doc"=>:doc, "appl/text"=>:doc, "application/vnd.msword"=>:doc, "application/vnd.ms-word"=>:doc, "application/winword"=>:doc, "application/word"=>:doc, "application/x-msw6"=>:doc, "application/x-msword"=>:doc, "text/plain"=>:txt, "text/html"=>:html, "application/xhtml+xml"=>:html, "text/xml"=>:xml, "application/xml"=>:xml, "application/x-xml"=>:xml, "application/abiword"=>:abw, "image/gif"=>:gif, "image/x-xbitmap"=>:gif, "image/gi_"=>:gif, "image/jpeg"=>:jpeg, "image/jpg"=>:jpeg, "image/jpe_"=>:jpeg, "image/pjpeg"=>:jpeg, "image/vnd.swiftview-jpeg"=>:jpeg, "image/png"=>:png, "application/png"=>:png, "application/x-png"=>:png }
|
3
|
+
end
|
data/lib/schiphol.rb
ADDED
@@ -0,0 +1,241 @@
|
|
1
|
+
# **Schiphol: downloader script for Ruby.**
|
2
|
+
|
3
|
+
# - Automatic file type resolution w/ MIMEs.
|
4
|
+
# - Progress bar for downloads, with ETA.
|
5
|
+
# - Downloads and extracts ZIP archives.
|
6
|
+
# - Automatically retry downloads N times.
|
7
|
+
class Schiphol
|
8
|
+
|
9
|
+
VERSION = '0.9'
|
10
|
+
|
11
|
+
#* External dependencies *#
|
12
|
+
|
13
|
+
# Require net/http to download files.
|
14
|
+
require 'net/http'
|
15
|
+
# Require fileutils to move files around.
|
16
|
+
require 'fileutils'
|
17
|
+
# Require rubyzip to unzip packages.
|
18
|
+
require 'zip/zip'
|
19
|
+
# Require progressbar to track progress.
|
20
|
+
require 'progressbar'
|
21
|
+
|
22
|
+
#* Internal dependencies *#
|
23
|
+
require 'schiphol/mimes'
|
24
|
+
|
25
|
+
#* Default runtime options *#
|
26
|
+
|
27
|
+
DefaultOptions = {
|
28
|
+
# The main folder for downloaded files.
|
29
|
+
:download_folder => './downloads',
|
30
|
+
# A directory inside self.downloads in
|
31
|
+
# which the file will be downloaded.
|
32
|
+
# When empty, files will be downloaded
|
33
|
+
# directly into self.downloads.
|
34
|
+
:target_directory => '',
|
35
|
+
# Whether to show a progress bar or not.
|
36
|
+
:show_progress => true,
|
37
|
+
# If set to true, downloaded files bear
|
38
|
+
# the appropriate extension for their
|
39
|
+
# MIME type rather than the original
|
40
|
+
# web file's extension.
|
41
|
+
:rectify_extensions => false,
|
42
|
+
# Maximal number of times to try.
|
43
|
+
:max_tries => 3
|
44
|
+
}
|
45
|
+
|
46
|
+
#* Public static methods *#
|
47
|
+
|
48
|
+
# Download a file into destination, and return
|
49
|
+
# the path to the downloaded file. If the filename
|
50
|
+
# is nil, it will set the default filename to 'top'.
|
51
|
+
def self.download(url, options = {})
|
52
|
+
|
53
|
+
# Get the default options that aren't set.
|
54
|
+
options = DefaultOptions.merge(options)
|
55
|
+
|
56
|
+
# Get the folder we're downloading to.
|
57
|
+
path = get_or_create_path(options)
|
58
|
+
|
59
|
+
# Get the parsed URI from the URL.
|
60
|
+
uri = ::URI.parse(url)
|
61
|
+
|
62
|
+
# Get the filename.
|
63
|
+
dname, fname = get_dname_fname(uri)
|
64
|
+
|
65
|
+
# Globalize number of tries for this download.
|
66
|
+
tries = 0
|
67
|
+
|
68
|
+
# Globalize the scope of our file handler.
|
69
|
+
file = nil
|
70
|
+
|
71
|
+
begin
|
72
|
+
|
73
|
+
# Start an HTTP server to download.
|
74
|
+
Net::HTTP.start(uri.host) do |http|
|
75
|
+
|
76
|
+
# Use SSL if appropriate based on the scheme.
|
77
|
+
http.use_ssl = true if uri.scheme == 'https'
|
78
|
+
|
79
|
+
# Start a GET request to the server.
|
80
|
+
http.request_get(uri.path) do |response|
|
81
|
+
|
82
|
+
# Get filename and rectify extension.
|
83
|
+
if options[:rectify_extensions]
|
84
|
+
fname = rectify_extensions(
|
85
|
+
uri.path, response.content_type)
|
86
|
+
end
|
87
|
+
|
88
|
+
# Setup progress bar.
|
89
|
+
if options[:show_progress]
|
90
|
+
bar = create_bar(url,
|
91
|
+
response.content_length)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Check response code was OK.
|
95
|
+
check_response_code(response.code)
|
96
|
+
|
97
|
+
# Open a file to write to.
|
98
|
+
file = File.open("#{path}/#{fname}", 'w')
|
99
|
+
|
100
|
+
# Write the downloaded file.
|
101
|
+
response.read_body do |segment|
|
102
|
+
# Increment the progress bar.
|
103
|
+
bar.inc(segment.length) if bar
|
104
|
+
# Write the read segment.
|
105
|
+
file.write(segment)
|
106
|
+
end
|
107
|
+
|
108
|
+
# Terminate the progresss bar.
|
109
|
+
bar.finish if bar
|
110
|
+
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
# Return the path to the download.
|
116
|
+
file.path.to_s
|
117
|
+
|
118
|
+
# Attempt to retry N times.
|
119
|
+
rescue Exception => error
|
120
|
+
|
121
|
+
# Retry if more tries available.
|
122
|
+
retry if (tries += 1) > options[:max_tries]
|
123
|
+
|
124
|
+
# Raise exception if can't retry.
|
125
|
+
raise "Couldn't download #{url} " +
|
126
|
+
"(Max number of attempts reached). " +
|
127
|
+
"Error: (#{error.message})"
|
128
|
+
# Delete the file opened for writing.
|
129
|
+
file.delete
|
130
|
+
|
131
|
+
# Ensure the file handler is closed.
|
132
|
+
ensure
|
133
|
+
file.close unless file.nil?
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
|
138
|
+
#* Private methods *#
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
# Create a progress bar w/ length.
|
143
|
+
def self.create_bar(url, length)
|
144
|
+
|
145
|
+
unless length
|
146
|
+
warn 'Unknown file size; ETR unknown.'
|
147
|
+
length = 10000
|
148
|
+
end
|
149
|
+
|
150
|
+
ProgressBar.new(url, length)
|
151
|
+
|
152
|
+
end
|
153
|
+
|
154
|
+
# Get or create download folder.
|
155
|
+
def self.get_or_create_path(options)
|
156
|
+
|
157
|
+
# Path is download folder [+ directory].
|
158
|
+
path = File.join(
|
159
|
+
options[:download_folder],
|
160
|
+
options[:target_directory]
|
161
|
+
)
|
162
|
+
|
163
|
+
# Create path if non-existent.
|
164
|
+
unless FileTest.directory?(path)
|
165
|
+
FileUtils.mkdir(path)
|
166
|
+
end
|
167
|
+
|
168
|
+
path
|
169
|
+
|
170
|
+
end
|
171
|
+
|
172
|
+
# Parse the directory and filename
|
173
|
+
# out of the path.
|
174
|
+
def self.get_dname_fname(uri)
|
175
|
+
|
176
|
+
split = uri.path.split('/')
|
177
|
+
|
178
|
+
if split.size == 1
|
179
|
+
return '/', split[0]
|
180
|
+
else
|
181
|
+
return File.join(
|
182
|
+
*split[0..-2]), split[-1]
|
183
|
+
end
|
184
|
+
|
185
|
+
end
|
186
|
+
|
187
|
+
# Rectify extension based on MIME type.
|
188
|
+
def self.rectify_extensions(file, t)
|
189
|
+
|
190
|
+
fn = File.basename(file, '.*')
|
191
|
+
|
192
|
+
ext = MIMETypes[t].to_s
|
193
|
+
|
194
|
+
unless ext
|
195
|
+
raise "Don't know how to handle MIME type #{t}."
|
196
|
+
end
|
197
|
+
|
198
|
+
fn + '.' + ext
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
# Check that response code is OK.
|
203
|
+
def self.check_response_code(code)
|
204
|
+
unless code == '200'
|
205
|
+
raise "Response code was not 200 , but #{code}."
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
#* Unimplemented methods *#
|
210
|
+
|
211
|
+
def download_and_extract(url, options = {})
|
212
|
+
raise 'Not implemented yet.'
|
213
|
+
unzip(download(url, options),
|
214
|
+
options[:destination])
|
215
|
+
end
|
216
|
+
|
217
|
+
# Decompress a ZIP archive; result will
|
218
|
+
# be stored in same folder as downloaded
|
219
|
+
# ZIP file, under a directory bearing the
|
220
|
+
# same name as the ZIP archive.
|
221
|
+
def self.unzip(file, options)
|
222
|
+
|
223
|
+
raise 'Not implemented yet.'
|
224
|
+
f_path = ''
|
225
|
+
|
226
|
+
Zip::ZipFile.open(file) do |zip_file|
|
227
|
+
zip_file.each do |f|
|
228
|
+
f_path = File.join(destination, f.name)
|
229
|
+
FileUtils.mkdir_p(File.absolute_path(File.dirname(f_path)))
|
230
|
+
zip_file.extract(f, f_path) unless File.exist?(f_path)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
mac_remove = File.join(dest, '__MACOSX')
|
235
|
+
if File.readable?(mac_remove)
|
236
|
+
FileUtils.rm_rf(mac_remove)
|
237
|
+
end
|
238
|
+
|
239
|
+
end
|
240
|
+
|
241
|
+
end
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: schiphol
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.9'
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Louis Mullie
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-06-12 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rubyzip
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.9.6.1
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.9.6.1
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: progressbar
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.10.0
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 0.10.0
|
46
|
+
description: ! ' Schiphol is a smart file downloader for Ruby, with automatic file
|
47
|
+
type resolution by MIME header, progress bar with ETR, extraction of downloaded
|
48
|
+
ZIP archives, and auto-retries for a preset number of times. '
|
49
|
+
email:
|
50
|
+
- louis.mullie@gmail.com
|
51
|
+
executables: []
|
52
|
+
extensions: []
|
53
|
+
extra_rdoc_files: []
|
54
|
+
files:
|
55
|
+
- lib/schiphol/mimes.rb
|
56
|
+
- lib/schiphol.rb
|
57
|
+
- README.md
|
58
|
+
- LICENSE
|
59
|
+
homepage: https://github.com/louismullie/schiphol
|
60
|
+
licenses: []
|
61
|
+
post_install_message: ! '********************************************************************************
|
62
|
+
|
63
|
+
|
64
|
+
Thank you for installing Schiphol!
|
65
|
+
|
66
|
+
|
67
|
+
********************************************************************************
|
68
|
+
|
69
|
+
|
70
|
+
'
|
71
|
+
rdoc_options: []
|
72
|
+
require_paths:
|
73
|
+
- lib
|
74
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
requirements: []
|
87
|
+
rubyforge_project:
|
88
|
+
rubygems_version: 1.8.24
|
89
|
+
signing_key:
|
90
|
+
specification_version: 3
|
91
|
+
summary: ! 'Schiphol: a smart file downloader for Ruby.'
|
92
|
+
test_files: []
|