imw 0.2.7 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +23 -0
- data/Gemfile.lock +47 -0
- data/LICENSE +20 -674
- data/README.rdoc +3 -4
- data/VERSION +1 -1
- data/lib/imw.rb +64 -35
- data/lib/imw/dataset.rb +12 -2
- data/lib/imw/formats.rb +4 -2
- data/lib/imw/formats/delimited.rb +96 -36
- data/lib/imw/formats/excel.rb +69 -101
- data/lib/imw/formats/json.rb +3 -5
- data/lib/imw/formats/pdf.rb +71 -0
- data/lib/imw/formats/yaml.rb +3 -5
- data/lib/imw/metadata.rb +66 -0
- data/lib/imw/metadata/contains_metadata.rb +44 -0
- data/lib/imw/metadata/dsl.rb +111 -0
- data/lib/imw/metadata/field.rb +65 -0
- data/lib/imw/metadata/schema.rb +227 -0
- data/lib/imw/metadata/schematized.rb +27 -0
- data/lib/imw/parsers.rb +1 -0
- data/lib/imw/parsers/flat.rb +44 -0
- data/lib/imw/resource.rb +36 -224
- data/lib/imw/schemes.rb +3 -1
- data/lib/imw/schemes/hdfs.rb +12 -1
- data/lib/imw/schemes/http.rb +1 -2
- data/lib/imw/schemes/local.rb +139 -16
- data/lib/imw/schemes/remote.rb +14 -9
- data/lib/imw/schemes/s3.rb +12 -0
- data/lib/imw/schemes/sql.rb +117 -0
- data/lib/imw/tools.rb +5 -3
- data/lib/imw/tools/downloader.rb +63 -0
- data/lib/imw/tools/summarizer.rb +21 -10
- data/lib/imw/utils.rb +10 -0
- data/lib/imw/utils/dynamically_extendable.rb +137 -0
- data/lib/imw/utils/error.rb +3 -0
- data/lib/imw/utils/extensions.rb +0 -4
- data/lib/imw/utils/extensions/array.rb +6 -7
- data/lib/imw/utils/extensions/hash.rb +3 -5
- data/lib/imw/utils/extensions/string.rb +3 -3
- data/lib/imw/utils/has_uri.rb +114 -0
- data/spec/data/{sample.csv → formats/delimited/sample.csv} +1 -1
- data/spec/data/{sample.tsv → formats/delimited/sample.tsv} +0 -0
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +11 -0
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +16 -0
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +11 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +12 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +13 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +22 -0
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +10 -0
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +15 -0
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +10 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +11 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +12 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +21 -0
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +1 -0
- data/spec/data/formats/none/sample +650 -0
- data/spec/data/formats/sgml/sample.xml +617 -0
- data/spec/data/formats/text/sample.txt +650 -0
- data/spec/data/formats/yaml/sample.yaml +410 -0
- data/spec/data/schema-tabular.yaml +11 -0
- data/spec/imw/formats/delimited_spec.rb +34 -2
- data/spec/imw/formats/excel_spec.rb +55 -0
- data/spec/imw/formats/json_spec.rb +3 -3
- data/spec/imw/formats/sgml_spec.rb +4 -4
- data/spec/imw/formats/yaml_spec.rb +3 -3
- data/spec/imw/metadata/field_spec.rb +26 -0
- data/spec/imw/metadata/schema_spec.rb +27 -0
- data/spec/imw/metadata_spec.rb +39 -0
- data/spec/imw/parsers/line_parser_spec.rb +1 -1
- data/spec/imw/resource_spec.rb +0 -100
- data/spec/imw/schemes/hdfs_spec.rb +19 -13
- data/spec/imw/schemes/local_spec.rb +59 -3
- data/spec/imw/schemes/s3_spec.rb +4 -0
- data/spec/imw/utils/dynamically_extendable_spec.rb +69 -0
- data/spec/imw/utils/has_uri_spec.rb +55 -0
- data/spec/spec_helper.rb +1 -2
- data/spec/support/random.rb +4 -4
- metadata +58 -17
- data/CHANGELOG +0 -0
- data/TODO +0 -18
- data/spec/data/sample.json +0 -782
- data/spec/data/sample.txt +0 -131
- data/spec/data/sample.xml +0 -653
- data/spec/data/sample.yaml +0 -651
- data/spec/spec.opts +0 -4
- data/spec/support/extensions.rb +0 -18
data/lib/imw/schemes/http.rb
CHANGED
@@ -28,7 +28,6 @@ module IMW
|
|
28
28
|
IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
|
29
29
|
end
|
30
30
|
|
31
|
-
|
32
31
|
# Return the basename of the URI or <tt>_index</tt> if it's
|
33
32
|
# blank, as in the case of <tt>http://www.google.com</tt>.
|
34
33
|
#
|
@@ -79,7 +78,7 @@ module IMW
|
|
79
78
|
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
80
79
|
def post payload, headers={}, &block
|
81
80
|
make_restclient_request do
|
82
|
-
RestClient.post(uri.to_s, payload,
|
81
|
+
RestClient.post(uri.to_s, payload, &block)
|
83
82
|
end
|
84
83
|
end
|
85
84
|
|
data/lib/imw/schemes/local.rb
CHANGED
@@ -65,7 +65,7 @@ module IMW
|
|
65
65
|
def dir
|
66
66
|
IMW.open(dirname)
|
67
67
|
end
|
68
|
-
|
68
|
+
|
69
69
|
end
|
70
70
|
|
71
71
|
# Defines methods for appropriate for a local file.
|
@@ -93,6 +93,14 @@ module IMW
|
|
93
93
|
@io ||= open(path, mode)
|
94
94
|
end
|
95
95
|
|
96
|
+
# Close this resource's file handle if it exists.
|
97
|
+
def close
|
98
|
+
# explicitly check the @io instance variable b/c self.io
|
99
|
+
# will open up a new handle by default
|
100
|
+
io.close if @io
|
101
|
+
super()
|
102
|
+
end
|
103
|
+
|
96
104
|
# Read from this file.
|
97
105
|
#
|
98
106
|
# @param [Fixnum] length bytes to read
|
@@ -101,6 +109,13 @@ module IMW
|
|
101
109
|
io.read(length)
|
102
110
|
end
|
103
111
|
|
112
|
+
# Read a line from this file.
|
113
|
+
#
|
114
|
+
# @return [String]
|
115
|
+
def readline
|
116
|
+
io.readline
|
117
|
+
end
|
118
|
+
|
104
119
|
# Write to this file
|
105
120
|
#
|
106
121
|
# @param [String, #to_s] text text to write
|
@@ -109,6 +124,14 @@ module IMW
|
|
109
124
|
io.write text
|
110
125
|
end
|
111
126
|
|
127
|
+
# Write the text with a trailing newline to this resource.
|
128
|
+
#
|
129
|
+
# @param [String, #to_s] text
|
130
|
+
def puts text
|
131
|
+
io.write text.to_s + "\n"
|
132
|
+
end
|
133
|
+
alias_method :<<, :puts
|
134
|
+
|
112
135
|
# Return the lines in this file.
|
113
136
|
#
|
114
137
|
# If passed a block, yield each line of the file to the block.
|
@@ -132,15 +155,50 @@ module IMW
|
|
132
155
|
io.map(&block)
|
133
156
|
end
|
134
157
|
|
135
|
-
#
|
158
|
+
# Emit +data+ into this file.
|
136
159
|
#
|
137
|
-
# @param [String, Array, #each] data object to
|
138
|
-
|
139
|
-
def dump data, options={}
|
160
|
+
# @param [String, Array, #each] data object to emit
|
161
|
+
def emit data, options={}
|
140
162
|
data.each do |element| # works if data is an Array or a String
|
141
163
|
io.puts(element.to_s)
|
142
164
|
end
|
143
|
-
|
165
|
+
end
|
166
|
+
|
167
|
+
# Return a snippet of text from this resource.
|
168
|
+
#
|
169
|
+
# Will read the first 1024 bytes and strip non-ASCII
|
170
|
+
# characters from them. For more control, redefine this
|
171
|
+
# method in another module.
|
172
|
+
#
|
173
|
+
# @return [String]
|
174
|
+
def snippet
|
175
|
+
returning([]) do |snip|
|
176
|
+
io.read(1024).bytes.each do |byte|
|
177
|
+
# CR LF SPACE ~
|
178
|
+
snip << byte.chr if byte == 13 || byte == 10 || byte >= 32 && byte <= 126
|
179
|
+
end
|
180
|
+
end.join
|
181
|
+
end
|
182
|
+
|
183
|
+
# Return the number of lines in this file.
|
184
|
+
#
|
185
|
+
# @return [Integer]
|
186
|
+
def num_lines
|
187
|
+
wc[0]
|
188
|
+
end
|
189
|
+
|
190
|
+
# Return the number of words in this file.
|
191
|
+
#
|
192
|
+
# @return [Integer]
|
193
|
+
def num_words
|
194
|
+
wc[1]
|
195
|
+
end
|
196
|
+
|
197
|
+
# Return the number of characters in this file.
|
198
|
+
#
|
199
|
+
# @return [Integer]
|
200
|
+
def num_chars
|
201
|
+
wc[2]
|
144
202
|
end
|
145
203
|
|
146
204
|
# Return a summary of properties of this local file.
|
@@ -154,20 +212,40 @@ module IMW
|
|
154
212
|
data = {
|
155
213
|
:basename => basename,
|
156
214
|
:size => size,
|
157
|
-
:extension => extension
|
215
|
+
:extension => extension,
|
216
|
+
:num_lines => num_lines
|
158
217
|
}
|
159
|
-
if respond_to?(:snippet)
|
160
|
-
|
161
|
-
end
|
218
|
+
data[:snippet] = snippet if respond_to?(:snippet)
|
219
|
+
data[:schema] = schema if respond_to?(:schema)
|
162
220
|
data
|
163
221
|
end
|
164
222
|
|
223
|
+
protected
|
224
|
+
|
225
|
+
# Return a triple of line, word, and character counts for this
|
226
|
+
# resource.
|
227
|
+
#
|
228
|
+
# Relies on the Unix utility +wc+.
|
229
|
+
#
|
230
|
+
# @return [Array<Integer>]
|
231
|
+
def wc
|
232
|
+
@wc ||= begin
|
233
|
+
`wc #{path}`.chomp.strip.split.map(&:to_i)
|
234
|
+
rescue
|
235
|
+
[0,0,0] # FIXME
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
165
239
|
end
|
166
240
|
|
167
241
|
# Defines methods for manipulating the contents of a local
|
168
242
|
# directory.
|
169
243
|
module LocalDirectory
|
170
244
|
|
245
|
+
# Lets local directories contain a special metadata file which
|
246
|
+
# describes their contents.
|
247
|
+
include IMW::Metadata::ContainsMetadata
|
248
|
+
|
171
249
|
# Is this resource a directory?
|
172
250
|
#
|
173
251
|
# @return [true, false]
|
@@ -207,11 +285,11 @@ module IMW
|
|
207
285
|
# @param [String, IMW::Resource] obj
|
208
286
|
# @return [true, false]
|
209
287
|
def contains? obj
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
288
|
+
obj = IMW.open(obj)
|
289
|
+
return false unless obj.is_local?
|
290
|
+
return true if obj.path == path
|
291
|
+
return false unless obj.path.starts_with?(path)
|
292
|
+
return true if self[obj.path[path.length..-1]].size > 0
|
215
293
|
false
|
216
294
|
end
|
217
295
|
|
@@ -277,6 +355,31 @@ module IMW
|
|
277
355
|
self
|
278
356
|
end
|
279
357
|
|
358
|
+
# Return the resource at the base path of this resource joined
|
359
|
+
# to +path+.
|
360
|
+
#
|
361
|
+
# IMW.open('/path/to/dir').join('subdir')
|
362
|
+
# #=> IMW::Resource at '/path/to/dir/subdir'
|
363
|
+
#
|
364
|
+
# @param [Array<String>] paths
|
365
|
+
# @return [IMW::Resource]
|
366
|
+
def join *paths
|
367
|
+
IMW.open(File.join(stripped_uri.to_s, *paths))
|
368
|
+
end
|
369
|
+
|
370
|
+
# Recursively walk down this directory
|
371
|
+
def walk(options={}, &block)
|
372
|
+
require 'find'
|
373
|
+
Find.find(path) do |path|
|
374
|
+
if options[:only]
|
375
|
+
next if options[:only] == :files && !File.file?(path)
|
376
|
+
next if options[:only] == :directories && !File.directory?(path)
|
377
|
+
next if options[:only] == :symlinks && !File.symlink?(path)
|
378
|
+
end
|
379
|
+
yield path
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
280
383
|
# Return a hash summarizing this directory with a key
|
281
384
|
# <tt>:contents</tt> containing an array of hashes summarizing
|
282
385
|
# this directories contents.
|
@@ -293,10 +396,30 @@ module IMW
|
|
293
396
|
:basename => basename,
|
294
397
|
:size => size,
|
295
398
|
:num_files => contents.length,
|
296
|
-
:contents => resources.map
|
399
|
+
:contents => resources.map do |resource|
|
400
|
+
resource.guess_schema! if guess_schema? && resource.respond_to?(:guess_schema!)
|
401
|
+
resource_summary = resource.summary
|
402
|
+
resource_summary[:schema] = metadata[resource] if metadata && metadata.describe?(resource) # this should be handled by 'resources' method above
|
403
|
+
resource_summary
|
404
|
+
end
|
297
405
|
}
|
298
406
|
end
|
299
407
|
|
408
|
+
# Whether or not to have this directory's resources guess
|
409
|
+
# their schemas when none is provided.
|
410
|
+
#
|
411
|
+
# @return [true, false]
|
412
|
+
def guess_schema?
|
413
|
+
(!! @guess_schema)
|
414
|
+
end
|
415
|
+
|
416
|
+
# Force this directory's resources to guess at their schema.
|
417
|
+
#
|
418
|
+
# @return [true]
|
419
|
+
def guess_schema!
|
420
|
+
@guess_schema = true
|
421
|
+
end
|
422
|
+
|
300
423
|
end
|
301
424
|
end
|
302
425
|
end
|
data/lib/imw/schemes/remote.rb
CHANGED
@@ -39,14 +39,6 @@ module IMW
|
|
39
39
|
@query_string ||= uri.query
|
40
40
|
end
|
41
41
|
|
42
|
-
# Return the fragment part of this resource's URI. Will likely be
|
43
|
-
# +nil+ for local resources.
|
44
|
-
#
|
45
|
-
# @return [String]
|
46
|
-
def fragment
|
47
|
-
@fragment ||= uri.fragment
|
48
|
-
end
|
49
|
-
|
50
42
|
# Return the path part of this resource's URI. Will _not_
|
51
43
|
# include the +query_string+ or +fragment+.
|
52
44
|
#
|
@@ -103,8 +95,21 @@ module IMW
|
|
103
95
|
|
104
96
|
module RemoteDirectory
|
105
97
|
|
98
|
+
# Return the resource at the base path of this resource joined
|
99
|
+
# to +path+.
|
100
|
+
#
|
101
|
+
# IMW.open('http://example.com/path/to/dir').join('subdir')
|
102
|
+
# #=> IMW::Resource at 'http://example.com/path/to/dir/subdir'
|
103
|
+
#
|
104
|
+
# @param [Array<String>] paths
|
105
|
+
# @return [IMW::Resource]
|
106
|
+
def join *paths
|
107
|
+
IMW.open(File.join(stripped_uri.to_s, *paths))
|
108
|
+
end
|
109
|
+
|
106
110
|
#
|
107
|
-
# TODO -- bloody everything
|
111
|
+
# TODO -- bloody everything. what's the best way to tell if
|
112
|
+
# the remote URL is a directory?
|
108
113
|
#
|
109
114
|
|
110
115
|
|
data/lib/imw/schemes/s3.rb
CHANGED
@@ -116,6 +116,18 @@ module IMW
|
|
116
116
|
destination
|
117
117
|
end
|
118
118
|
|
119
|
+
# Return the resource at the base path of this resource joined
|
120
|
+
# to +path+.
|
121
|
+
#
|
122
|
+
# IMW.open('s3:://bucket/path/to/dir').join('subdir')
|
123
|
+
# #=> IMW::Resource at 's3://bucket/path/to/dir/subdir'
|
124
|
+
#
|
125
|
+
# @param [Array<String>] paths
|
126
|
+
# @return [IMW::Resource]
|
127
|
+
def join *paths
|
128
|
+
IMW.open(File.join(stripped_uri.to_s, *paths))
|
129
|
+
end
|
130
|
+
|
119
131
|
protected
|
120
132
|
# Make an S3 connection.
|
121
133
|
#
|
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'dbi'
|
2
|
+
|
3
|
+
module IMW
|
4
|
+
module Schemes
|
5
|
+
|
6
|
+
# Encapsulates a connection to a relational database.
|
7
|
+
#
|
8
|
+
# Calling
|
9
|
+
#
|
10
|
+
# IMW.open('sql://host:port/database_name')
|
11
|
+
#
|
12
|
+
# shold create a connection to a database at the given +port+ on
|
13
|
+
# the given +host+ using the given +database_name+.
|
14
|
+
module SQL
|
15
|
+
|
16
|
+
# A base implementation of a connection to a relational
|
17
|
+
# database.
|
18
|
+
#
|
19
|
+
# The Base#extended method will examine the +scheme+ of an
|
20
|
+
# object extended with this module and choose a more specific
|
21
|
+
# database adaptor module to extend with as well.
|
22
|
+
module Base
|
23
|
+
|
24
|
+
# When an IMW::Resource is extended use URI's scheme to choose
|
25
|
+
# which other module inside IMW::Schemes::SQL to extend with.
|
26
|
+
def self.extended obj
|
27
|
+
case obj.scheme
|
28
|
+
when 'mysql' then obj.extend(IMW::Schemes::SQL::MySQL)
|
29
|
+
when 'postgresql' then obj.extend(IMW::Schemes::SQL::PostgreSQL)
|
30
|
+
else raise IMW::ArgumentError.new("Unknown database type: #{obj.scheme}")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# For an SQL connection the database will be the same as the
|
35
|
+
# path.
|
36
|
+
#
|
37
|
+
# @return [String]
|
38
|
+
def database
|
39
|
+
@database ||= path.tr('/','')
|
40
|
+
end
|
41
|
+
|
42
|
+
# Redefineeach method inappropriate for databases.
|
43
|
+
[:dirname, :basename, :extname, :extension, :name].each do |method|
|
44
|
+
define_method(method) do
|
45
|
+
nil
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# The (cached) database connection for this resource.
|
50
|
+
#
|
51
|
+
# @return [DBI::DatabaseHandle]
|
52
|
+
def connection
|
53
|
+
@connection ||= DBI.connect("#{dbi_module}:#{database}:#{host}", user, password)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Return the password associated with user's account on the
|
57
|
+
# given database.
|
58
|
+
#
|
59
|
+
# @return [String]
|
60
|
+
def password
|
61
|
+
@password ||= resource_options[:password]
|
62
|
+
end
|
63
|
+
|
64
|
+
# Return an array of the table names in the current database.
|
65
|
+
#
|
66
|
+
# @return [Array<String>]
|
67
|
+
def tables
|
68
|
+
returning([]) do |table_names|
|
69
|
+
execute("SHOW TABLES") do |row|
|
70
|
+
table_names << row.first
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Execute the (joined) +query_string_parts+ using this
|
76
|
+
# resource's cached connection.
|
77
|
+
#
|
78
|
+
# If passed a block, yield each row of the result set to the
|
79
|
+
# block.
|
80
|
+
#
|
81
|
+
# @param [Array<String>] query_string_parts
|
82
|
+
# @yield [DBI::Row]
|
83
|
+
# @return [DBI::StatementHandle]
|
84
|
+
def execute *query_string_parts, &block
|
85
|
+
query = query_string_parts.join(' ')
|
86
|
+
IMW.announce_if_verbose "Querying #{self}: #{query}"
|
87
|
+
statement = connection.execute(query)
|
88
|
+
block_given? ? statement.fetch(&block) : statement
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Module for MySQL databases.
|
93
|
+
module MySQL
|
94
|
+
|
95
|
+
# Return the name of the DBI module used to connect to MySQL.
|
96
|
+
#
|
97
|
+
# @return [String]
|
98
|
+
def dbi_module
|
99
|
+
"DBI:Mysql"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Module for PostgreSQL databases.
|
104
|
+
module PostgreSQL
|
105
|
+
|
106
|
+
# Return the name of the DBI module used to connect to PostgreSQL.
|
107
|
+
#
|
108
|
+
# @return [String]
|
109
|
+
def dbi_module
|
110
|
+
"DBI:Pg"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
data/lib/imw/tools.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
module IMW
|
2
2
|
module Tools
|
3
|
-
autoload :Archiver,
|
4
|
-
autoload :Transferer,
|
5
|
-
autoload :Summarizer,
|
3
|
+
autoload :Archiver, 'imw/tools/archiver'
|
4
|
+
autoload :Transferer, 'imw/tools/transferer'
|
5
|
+
autoload :Summarizer, 'imw/tools/summarizer'
|
6
|
+
autoload :ExtensionAnalyzer, 'imw/tools/extension_analyzer'
|
7
|
+
autoload :Downloader, 'imw/tools/downloader'
|
6
8
|
end
|
7
9
|
end
|
8
10
|
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module IMW
|
2
|
+
module Tools
|
3
|
+
|
4
|
+
# A class to download a collection of resources to a shared
|
5
|
+
# directory.
|
6
|
+
class Downloader
|
7
|
+
|
8
|
+
def initialize dir, *inputs
|
9
|
+
self.dir = dir
|
10
|
+
self.inputs = inputs unless inputs.blank?
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.dir= new_dir
|
14
|
+
@dir = IMW.open(new_dir)
|
15
|
+
raise IMW::PathError.new("#{@dir} must be a local directory") unless @dir.is_local? && @dir.is_directory?
|
16
|
+
@dir
|
17
|
+
end
|
18
|
+
attr_reader :dir
|
19
|
+
|
20
|
+
def inputs= new_inputs
|
21
|
+
@inputs = new_inputs.flatten.compact.map { |raw_input| IMW.open(raw_input) }
|
22
|
+
end
|
23
|
+
attr_reader :inputs
|
24
|
+
|
25
|
+
def downloaded_path_for input
|
26
|
+
dir.join(input.respond_to?(:effective_basename) ? input.effective_basename : input.basename)
|
27
|
+
end
|
28
|
+
|
29
|
+
def download!
|
30
|
+
before_download
|
31
|
+
inputs.each do |input|
|
32
|
+
downloaded_path = downloaded_path_for(input)
|
33
|
+
IMW.log_if_verbose "Downloading #{input} to #{downloaded_path}"
|
34
|
+
input.cp(downloaded_path)
|
35
|
+
end
|
36
|
+
after_download
|
37
|
+
end
|
38
|
+
|
39
|
+
def downloaded?
|
40
|
+
downloaded_resources.all? { |resource| resource.exist? }
|
41
|
+
end
|
42
|
+
|
43
|
+
def downloaded_resources
|
44
|
+
inputs.map do |input|
|
45
|
+
IMW.open(downloaded_path_for(input))
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def clean!
|
50
|
+
IMW.log_if_verbose("Deleting downloader directory #{dir}")
|
51
|
+
dir.rm_rf!
|
52
|
+
end
|
53
|
+
|
54
|
+
def before_download
|
55
|
+
end
|
56
|
+
|
57
|
+
def after_download
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|