schiphol 0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +16 -0
- data/README.md +45 -0
- data/lib/schiphol/mimes.rb +3 -0
- data/lib/schiphol.rb +241 -0
- metadata +92 -0
data/LICENSE
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
Schiphol - A file downloader script for Ruby.
|
2
|
+
|
3
|
+
This program is free software: you can redistribute it and/or modify
|
4
|
+
it under the terms of the GNU General Public License as published by
|
5
|
+
the Free Software Foundation, either version 3 of the License, or
|
6
|
+
(at your option) any later version.
|
7
|
+
|
8
|
+
This program is distributed in the hope that it will be useful,
|
9
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
GNU General Public License for more details.
|
12
|
+
|
13
|
+
You should have received a copy of the GNU General Public License
|
14
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
Author: Louis-Antoine Mullie (louis.mullie@gmail.com). Copyright 2011-12.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
## Schiphol
|
2
|
+
|
3
|
+
... is a smart downloader script for Ruby, with
|
4
|
+
|
5
|
+
- Automatic file type resolution w/ MIMEs.
|
6
|
+
- Progress bar for downloads, with ETA.
|
7
|
+
- Downloads and extracts ZIP archives.
|
8
|
+
- Automatically retry downloads N times.
|
9
|
+
|
10
|
+
## Install
|
11
|
+
|
12
|
+
gem install schiphol
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
**Basic**
|
17
|
+
|
18
|
+
It can't get simpler.
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
require 'schiphol'
|
22
|
+
|
23
|
+
Schiphol.download('http://www.url.com/path/to/file.html')
|
24
|
+
```
|
25
|
+
|
26
|
+
**Advanced**
|
27
|
+
|
28
|
+
The options shown are the default values.
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
require 'schiphol'
|
32
|
+
|
33
|
+
Schiphol.download(
|
34
|
+
'http://www.url.com/path/to/file.html',
|
35
|
+
:download_folder => './my_downloads',
|
36
|
+
:target_directory => '',
|
37
|
+
:show_progress => true,
|
38
|
+
:rectify_extensions => false,
|
39
|
+
:max_tries => 3
|
40
|
+
)
|
41
|
+
```
|
42
|
+
|
43
|
+
## License
|
44
|
+
|
45
|
+
This software is released under the GPL.
|
@@ -0,0 +1,3 @@
|
|
1
|
+
class Schiphol
|
2
|
+
MIMETypes = {"application/pdf"=>:pdf, "application/x-pdf"=>:pdf, "application/acrobat"=>:pdf, "applications/vnd.pdf"=>:pdf, "text/pdf"=>:pdf, "text/x-pdf"=>:pdf, "application/msword,\n application/doc"=>:doc, "appl/text"=>:doc, "application/vnd.msword"=>:doc, "application/vnd.ms-word"=>:doc, "application/winword"=>:doc, "application/word"=>:doc, "application/x-msw6"=>:doc, "application/x-msword"=>:doc, "text/plain"=>:txt, "text/html"=>:html, "application/xhtml+xml"=>:html, "text/xml"=>:xml, "application/xml"=>:xml, "application/x-xml"=>:xml, "application/abiword"=>:abw, "image/gif"=>:gif, "image/x-xbitmap"=>:gif, "image/gi_"=>:gif, "image/jpeg"=>:jpeg, "image/jpg"=>:jpeg, "image/jpe_"=>:jpeg, "image/pjpeg"=>:jpeg, "image/vnd.swiftview-jpeg"=>:jpeg, "image/png"=>:png, "application/png"=>:png, "application/x-png"=>:png }
|
3
|
+
end
|
data/lib/schiphol.rb
ADDED
@@ -0,0 +1,241 @@
|
|
1
|
+
# **Schiphol: downloader script for Ruby.**
|
2
|
+
|
3
|
+
# - Automatic file type resolution w/ MIMEs.
|
4
|
+
# - Progress bar for downloads, with ETA.
|
5
|
+
# - Downloads and extracts ZIP archives.
|
6
|
+
# - Automatically retry downloads N times.
|
7
|
+
class Schiphol
|
8
|
+
|
9
|
+
VERSION = '0.9'
|
10
|
+
|
11
|
+
#* External dependencies *#
|
12
|
+
|
13
|
+
# Require net/http to download files.
|
14
|
+
require 'net/http'
|
15
|
+
# Require fileutils to move files around.
|
16
|
+
require 'fileutils'
|
17
|
+
# Require rubyzip to unzip packages.
|
18
|
+
require 'zip/zip'
|
19
|
+
# Require progressbar to track progress.
|
20
|
+
require 'progressbar'
|
21
|
+
|
22
|
+
#* Internal dependencies *#
|
23
|
+
require 'schiphol/mimes'
|
24
|
+
|
25
|
+
#* Default runtime options *#
|
26
|
+
|
27
|
+
DefaultOptions = {
|
28
|
+
# The main folder for downloaded files.
|
29
|
+
:download_folder => './downloads',
|
30
|
+
# A directory inside self.downloads in
|
31
|
+
# which the file will be downloaded.
|
32
|
+
# When empty, files will be downloaded
|
33
|
+
# directly into self.downloads.
|
34
|
+
:target_directory => '',
|
35
|
+
# Whether to show a progress bar or not.
|
36
|
+
:show_progress => true,
|
37
|
+
# If set to true, downloaded files bear
|
38
|
+
# the appropriate extension for their
|
39
|
+
# MIME type rather than the original
|
40
|
+
# web file's extension.
|
41
|
+
:rectify_extensions => false,
|
42
|
+
# Maximal number of times to try.
|
43
|
+
:max_tries => 3
|
44
|
+
}
|
45
|
+
|
46
|
+
#* Public static methods *#
|
47
|
+
|
48
|
+
# Download a file into destination, and return
|
49
|
+
# the path to the downloaded file. If the filename
|
50
|
+
# is nil, it will set the default filename to 'top'.
|
51
|
+
def self.download(url, options = {})
|
52
|
+
|
53
|
+
# Get the default options that aren't set.
|
54
|
+
options = DefaultOptions.merge(options)
|
55
|
+
|
56
|
+
# Get the folder we're downloading to.
|
57
|
+
path = get_or_create_path(options)
|
58
|
+
|
59
|
+
# Get the parsed URI from the URL.
|
60
|
+
uri = ::URI.parse(url)
|
61
|
+
|
62
|
+
# Get the filename.
|
63
|
+
dname, fname = get_dname_fname(uri)
|
64
|
+
|
65
|
+
# Globalize number of tries for this download.
|
66
|
+
tries = 0
|
67
|
+
|
68
|
+
# Globalize the scope of our file handler.
|
69
|
+
file = nil
|
70
|
+
|
71
|
+
begin
|
72
|
+
|
73
|
+
# Start an HTTP server to download.
|
74
|
+
Net::HTTP.start(uri.host) do |http|
|
75
|
+
|
76
|
+
# Use SSL if appropriate based on the scheme.
|
77
|
+
http.use_ssl = true if uri.scheme == 'https'
|
78
|
+
|
79
|
+
# Start a GET request to the server.
|
80
|
+
http.request_get(uri.path) do |response|
|
81
|
+
|
82
|
+
# Get filename and rectify extension.
|
83
|
+
if options[:rectify_extensions]
|
84
|
+
fname = rectify_extensions(
|
85
|
+
uri.path, response.content_type)
|
86
|
+
end
|
87
|
+
|
88
|
+
# Setup progress bar.
|
89
|
+
if options[:show_progress]
|
90
|
+
bar = create_bar(url,
|
91
|
+
response.content_length)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Check response code was OK.
|
95
|
+
check_response_code(response.code)
|
96
|
+
|
97
|
+
# Open a file to write to.
|
98
|
+
file = File.open("#{path}/#{fname}", 'w')
|
99
|
+
|
100
|
+
# Write the downloaded file.
|
101
|
+
response.read_body do |segment|
|
102
|
+
# Increment the progress bar.
|
103
|
+
bar.inc(segment.length) if bar
|
104
|
+
# Write the read segment.
|
105
|
+
file.write(segment)
|
106
|
+
end
|
107
|
+
|
108
|
+
# Terminate the progresss bar.
|
109
|
+
bar.finish if bar
|
110
|
+
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
# Return the path to the download.
|
116
|
+
file.path.to_s
|
117
|
+
|
118
|
+
# Attempt to retry N times.
|
119
|
+
rescue Exception => error
|
120
|
+
|
121
|
+
# Retry if more tries available.
|
122
|
+
retry if (tries += 1) > options[:max_tries]
|
123
|
+
|
124
|
+
# Raise exception if can't retry.
|
125
|
+
raise "Couldn't download #{url} " +
|
126
|
+
"(Max number of attempts reached). " +
|
127
|
+
"Error: (#{error.message})"
|
128
|
+
# Delete the file opened for writing.
|
129
|
+
file.delete
|
130
|
+
|
131
|
+
# Ensure the file handler is closed.
|
132
|
+
ensure
|
133
|
+
file.close unless file.nil?
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
|
138
|
+
#* Private methods *#
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
# Create a progress bar w/ length.
|
143
|
+
def self.create_bar(url, length)
|
144
|
+
|
145
|
+
unless length
|
146
|
+
warn 'Unknown file size; ETR unknown.'
|
147
|
+
length = 10000
|
148
|
+
end
|
149
|
+
|
150
|
+
ProgressBar.new(url, length)
|
151
|
+
|
152
|
+
end
|
153
|
+
|
154
|
+
# Get or create download folder.
|
155
|
+
def self.get_or_create_path(options)
|
156
|
+
|
157
|
+
# Path is download folder [+ directory].
|
158
|
+
path = File.join(
|
159
|
+
options[:download_folder],
|
160
|
+
options[:target_directory]
|
161
|
+
)
|
162
|
+
|
163
|
+
# Create path if non-existent.
|
164
|
+
unless FileTest.directory?(path)
|
165
|
+
FileUtils.mkdir(path)
|
166
|
+
end
|
167
|
+
|
168
|
+
path
|
169
|
+
|
170
|
+
end
|
171
|
+
|
172
|
+
# Parse the directory and filename
|
173
|
+
# out of the path.
|
174
|
+
def self.get_dname_fname(uri)
|
175
|
+
|
176
|
+
split = uri.path.split('/')
|
177
|
+
|
178
|
+
if split.size == 1
|
179
|
+
return '/', split[0]
|
180
|
+
else
|
181
|
+
return File.join(
|
182
|
+
*split[0..-2]), split[-1]
|
183
|
+
end
|
184
|
+
|
185
|
+
end
|
186
|
+
|
187
|
+
# Rectify extension based on MIME type.
|
188
|
+
def self.rectify_extensions(file, t)
|
189
|
+
|
190
|
+
fn = File.basename(file, '.*')
|
191
|
+
|
192
|
+
ext = MIMETypes[t].to_s
|
193
|
+
|
194
|
+
unless ext
|
195
|
+
raise "Don't know how to handle MIME type #{t}."
|
196
|
+
end
|
197
|
+
|
198
|
+
fn + '.' + ext
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
# Check that response code is OK.
|
203
|
+
def self.check_response_code(code)
|
204
|
+
unless code == '200'
|
205
|
+
raise "Response code was not 200 , but #{code}."
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
#* Unimplemented methods *#
|
210
|
+
|
211
|
+
def download_and_extract(url, options = {})
|
212
|
+
raise 'Not implemented yet.'
|
213
|
+
unzip(download(url, options),
|
214
|
+
options[:destination])
|
215
|
+
end
|
216
|
+
|
217
|
+
# Decompress a ZIP archive; result will
|
218
|
+
# be stored in same folder as downloaded
|
219
|
+
# ZIP file, under a directory bearing the
|
220
|
+
# same name as the ZIP archive.
|
221
|
+
def self.unzip(file, options)
|
222
|
+
|
223
|
+
raise 'Not implemented yet.'
|
224
|
+
f_path = ''
|
225
|
+
|
226
|
+
Zip::ZipFile.open(file) do |zip_file|
|
227
|
+
zip_file.each do |f|
|
228
|
+
f_path = File.join(destination, f.name)
|
229
|
+
FileUtils.mkdir_p(File.absolute_path(File.dirname(f_path)))
|
230
|
+
zip_file.extract(f, f_path) unless File.exist?(f_path)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
mac_remove = File.join(dest, '__MACOSX')
|
235
|
+
if File.readable?(mac_remove)
|
236
|
+
FileUtils.rm_rf(mac_remove)
|
237
|
+
end
|
238
|
+
|
239
|
+
end
|
240
|
+
|
241
|
+
end
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: schiphol
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.9'
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Louis Mullie
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-06-12 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rubyzip
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.9.6.1
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.9.6.1
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: progressbar
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.10.0
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 0.10.0
|
46
|
+
description: ! ' Schiphol is a smart file downloader for Ruby, with automatic file
|
47
|
+
type resolution by MIME header, progress bar with ETR, extraction of downloaded
|
48
|
+
ZIP archives, and auto-retries for a preset number of times. '
|
49
|
+
email:
|
50
|
+
- louis.mullie@gmail.com
|
51
|
+
executables: []
|
52
|
+
extensions: []
|
53
|
+
extra_rdoc_files: []
|
54
|
+
files:
|
55
|
+
- lib/schiphol/mimes.rb
|
56
|
+
- lib/schiphol.rb
|
57
|
+
- README.md
|
58
|
+
- LICENSE
|
59
|
+
homepage: https://github.com/louismullie/schiphol
|
60
|
+
licenses: []
|
61
|
+
post_install_message: ! '********************************************************************************
|
62
|
+
|
63
|
+
|
64
|
+
Thank you for installing Schiphol!
|
65
|
+
|
66
|
+
|
67
|
+
********************************************************************************
|
68
|
+
|
69
|
+
|
70
|
+
'
|
71
|
+
rdoc_options: []
|
72
|
+
require_paths:
|
73
|
+
- lib
|
74
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
requirements: []
|
87
|
+
rubyforge_project:
|
88
|
+
rubygems_version: 1.8.24
|
89
|
+
signing_key:
|
90
|
+
specification_version: 3
|
91
|
+
summary: ! 'Schiphol: a smart file downloader for Ruby.'
|
92
|
+
test_files: []
|