imw 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +23 -0
- data/Gemfile.lock +47 -0
- data/LICENSE +20 -674
- data/README.rdoc +3 -4
- data/VERSION +1 -1
- data/lib/imw.rb +64 -35
- data/lib/imw/dataset.rb +12 -2
- data/lib/imw/formats.rb +4 -2
- data/lib/imw/formats/delimited.rb +96 -36
- data/lib/imw/formats/excel.rb +69 -101
- data/lib/imw/formats/json.rb +3 -5
- data/lib/imw/formats/pdf.rb +71 -0
- data/lib/imw/formats/yaml.rb +3 -5
- data/lib/imw/metadata.rb +66 -0
- data/lib/imw/metadata/contains_metadata.rb +44 -0
- data/lib/imw/metadata/dsl.rb +111 -0
- data/lib/imw/metadata/field.rb +65 -0
- data/lib/imw/metadata/schema.rb +227 -0
- data/lib/imw/metadata/schematized.rb +27 -0
- data/lib/imw/parsers.rb +1 -0
- data/lib/imw/parsers/flat.rb +44 -0
- data/lib/imw/resource.rb +36 -224
- data/lib/imw/schemes.rb +3 -1
- data/lib/imw/schemes/hdfs.rb +12 -1
- data/lib/imw/schemes/http.rb +1 -2
- data/lib/imw/schemes/local.rb +139 -16
- data/lib/imw/schemes/remote.rb +14 -9
- data/lib/imw/schemes/s3.rb +12 -0
- data/lib/imw/schemes/sql.rb +117 -0
- data/lib/imw/tools.rb +5 -3
- data/lib/imw/tools/downloader.rb +63 -0
- data/lib/imw/tools/summarizer.rb +21 -10
- data/lib/imw/utils.rb +10 -0
- data/lib/imw/utils/dynamically_extendable.rb +137 -0
- data/lib/imw/utils/error.rb +3 -0
- data/lib/imw/utils/extensions.rb +0 -4
- data/lib/imw/utils/extensions/array.rb +6 -7
- data/lib/imw/utils/extensions/hash.rb +3 -5
- data/lib/imw/utils/extensions/string.rb +3 -3
- data/lib/imw/utils/has_uri.rb +114 -0
- data/spec/data/{sample.csv → formats/delimited/sample.csv} +1 -1
- data/spec/data/{sample.tsv → formats/delimited/sample.tsv} +0 -0
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +11 -0
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +16 -0
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +11 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +12 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +13 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +22 -0
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +10 -0
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +15 -0
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +10 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +11 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +12 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +21 -0
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +1 -0
- data/spec/data/formats/none/sample +650 -0
- data/spec/data/formats/sgml/sample.xml +617 -0
- data/spec/data/formats/text/sample.txt +650 -0
- data/spec/data/formats/yaml/sample.yaml +410 -0
- data/spec/data/schema-tabular.yaml +11 -0
- data/spec/imw/formats/delimited_spec.rb +34 -2
- data/spec/imw/formats/excel_spec.rb +55 -0
- data/spec/imw/formats/json_spec.rb +3 -3
- data/spec/imw/formats/sgml_spec.rb +4 -4
- data/spec/imw/formats/yaml_spec.rb +3 -3
- data/spec/imw/metadata/field_spec.rb +26 -0
- data/spec/imw/metadata/schema_spec.rb +27 -0
- data/spec/imw/metadata_spec.rb +39 -0
- data/spec/imw/parsers/line_parser_spec.rb +1 -1
- data/spec/imw/resource_spec.rb +0 -100
- data/spec/imw/schemes/hdfs_spec.rb +19 -13
- data/spec/imw/schemes/local_spec.rb +59 -3
- data/spec/imw/schemes/s3_spec.rb +4 -0
- data/spec/imw/utils/dynamically_extendable_spec.rb +69 -0
- data/spec/imw/utils/has_uri_spec.rb +55 -0
- data/spec/spec_helper.rb +1 -2
- data/spec/support/random.rb +4 -4
- metadata +58 -17
- data/CHANGELOG +0 -0
- data/TODO +0 -18
- data/spec/data/sample.json +0 -782
- data/spec/data/sample.txt +0 -131
- data/spec/data/sample.xml +0 -653
- data/spec/data/sample.yaml +0 -651
- data/spec/spec.opts +0 -4
- data/spec/support/extensions.rb +0 -18
data/lib/imw/schemes/http.rb
CHANGED
|
@@ -28,7 +28,6 @@ module IMW
|
|
|
28
28
|
IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
-
|
|
32
31
|
# Return the basename of the URI or <tt>_index</tt> if it's
|
|
33
32
|
# blank, as in the case of <tt>http://www.google.com</tt>.
|
|
34
33
|
#
|
|
@@ -79,7 +78,7 @@ module IMW
|
|
|
79
78
|
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
|
80
79
|
def post payload, headers={}, &block
|
|
81
80
|
make_restclient_request do
|
|
82
|
-
RestClient.post(uri.to_s, payload,
|
|
81
|
+
RestClient.post(uri.to_s, payload, &block)
|
|
83
82
|
end
|
|
84
83
|
end
|
|
85
84
|
|
data/lib/imw/schemes/local.rb
CHANGED
|
@@ -65,7 +65,7 @@ module IMW
|
|
|
65
65
|
def dir
|
|
66
66
|
IMW.open(dirname)
|
|
67
67
|
end
|
|
68
|
-
|
|
68
|
+
|
|
69
69
|
end
|
|
70
70
|
|
|
71
71
|
# Defines methods for appropriate for a local file.
|
|
@@ -93,6 +93,14 @@ module IMW
|
|
|
93
93
|
@io ||= open(path, mode)
|
|
94
94
|
end
|
|
95
95
|
|
|
96
|
+
# Close this resource's file handle if it exists.
|
|
97
|
+
def close
|
|
98
|
+
# explicitly check the @io instance variable b/c self.io
|
|
99
|
+
# will open up a new handle by default
|
|
100
|
+
io.close if @io
|
|
101
|
+
super()
|
|
102
|
+
end
|
|
103
|
+
|
|
96
104
|
# Read from this file.
|
|
97
105
|
#
|
|
98
106
|
# @param [Fixnum] length bytes to read
|
|
@@ -101,6 +109,13 @@ module IMW
|
|
|
101
109
|
io.read(length)
|
|
102
110
|
end
|
|
103
111
|
|
|
112
|
+
# Read a line from this file.
|
|
113
|
+
#
|
|
114
|
+
# @return [String]
|
|
115
|
+
def readline
|
|
116
|
+
io.readline
|
|
117
|
+
end
|
|
118
|
+
|
|
104
119
|
# Write to this file
|
|
105
120
|
#
|
|
106
121
|
# @param [String, #to_s] text text to write
|
|
@@ -109,6 +124,14 @@ module IMW
|
|
|
109
124
|
io.write text
|
|
110
125
|
end
|
|
111
126
|
|
|
127
|
+
# Write the text with a trailing newline to this resource.
|
|
128
|
+
#
|
|
129
|
+
# @param [String, #to_s] text
|
|
130
|
+
def puts text
|
|
131
|
+
io.write text.to_s + "\n"
|
|
132
|
+
end
|
|
133
|
+
alias_method :<<, :puts
|
|
134
|
+
|
|
112
135
|
# Return the lines in this file.
|
|
113
136
|
#
|
|
114
137
|
# If passed a block, yield each line of the file to the block.
|
|
@@ -132,15 +155,50 @@ module IMW
|
|
|
132
155
|
io.map(&block)
|
|
133
156
|
end
|
|
134
157
|
|
|
135
|
-
#
|
|
158
|
+
# Emit +data+ into this file.
|
|
136
159
|
#
|
|
137
|
-
# @param [String, Array, #each] data object to
|
|
138
|
-
|
|
139
|
-
def dump data, options={}
|
|
160
|
+
# @param [String, Array, #each] data object to emit
|
|
161
|
+
def emit data, options={}
|
|
140
162
|
data.each do |element| # works if data is an Array or a String
|
|
141
163
|
io.puts(element.to_s)
|
|
142
164
|
end
|
|
143
|
-
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Return a snippet of text from this resource.
|
|
168
|
+
#
|
|
169
|
+
# Will read the first 1024 bytes and strip non-ASCII
|
|
170
|
+
# characters from them. For more control, redefine this
|
|
171
|
+
# method in another module.
|
|
172
|
+
#
|
|
173
|
+
# @return [String]
|
|
174
|
+
def snippet
|
|
175
|
+
returning([]) do |snip|
|
|
176
|
+
io.read(1024).bytes.each do |byte|
|
|
177
|
+
# CR LF SPACE ~
|
|
178
|
+
snip << byte.chr if byte == 13 || byte == 10 || byte >= 32 && byte <= 126
|
|
179
|
+
end
|
|
180
|
+
end.join
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Return the number of lines in this file.
|
|
184
|
+
#
|
|
185
|
+
# @return [Integer]
|
|
186
|
+
def num_lines
|
|
187
|
+
wc[0]
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Return the number of words in this file.
|
|
191
|
+
#
|
|
192
|
+
# @return [Integer]
|
|
193
|
+
def num_words
|
|
194
|
+
wc[1]
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Return the number of characters in this file.
|
|
198
|
+
#
|
|
199
|
+
# @return [Integer]
|
|
200
|
+
def num_chars
|
|
201
|
+
wc[2]
|
|
144
202
|
end
|
|
145
203
|
|
|
146
204
|
# Return a summary of properties of this local file.
|
|
@@ -154,20 +212,40 @@ module IMW
|
|
|
154
212
|
data = {
|
|
155
213
|
:basename => basename,
|
|
156
214
|
:size => size,
|
|
157
|
-
:extension => extension
|
|
215
|
+
:extension => extension,
|
|
216
|
+
:num_lines => num_lines
|
|
158
217
|
}
|
|
159
|
-
if respond_to?(:snippet)
|
|
160
|
-
|
|
161
|
-
end
|
|
218
|
+
data[:snippet] = snippet if respond_to?(:snippet)
|
|
219
|
+
data[:schema] = schema if respond_to?(:schema)
|
|
162
220
|
data
|
|
163
221
|
end
|
|
164
222
|
|
|
223
|
+
protected
|
|
224
|
+
|
|
225
|
+
# Return a triple of line, word, and character counts for this
|
|
226
|
+
# resource.
|
|
227
|
+
#
|
|
228
|
+
# Relies on the Unix utility +wc+.
|
|
229
|
+
#
|
|
230
|
+
# @return [Array<Integer>]
|
|
231
|
+
def wc
|
|
232
|
+
@wc ||= begin
|
|
233
|
+
`wc #{path}`.chomp.strip.split.map(&:to_i)
|
|
234
|
+
rescue
|
|
235
|
+
[0,0,0] # FIXME
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
165
239
|
end
|
|
166
240
|
|
|
167
241
|
# Defines methods for manipulating the contents of a local
|
|
168
242
|
# directory.
|
|
169
243
|
module LocalDirectory
|
|
170
244
|
|
|
245
|
+
# Lets local directories contain a special metadata file which
|
|
246
|
+
# describes their contents.
|
|
247
|
+
include IMW::Metadata::ContainsMetadata
|
|
248
|
+
|
|
171
249
|
# Is this resource a directory?
|
|
172
250
|
#
|
|
173
251
|
# @return [true, false]
|
|
@@ -207,11 +285,11 @@ module IMW
|
|
|
207
285
|
# @param [String, IMW::Resource] obj
|
|
208
286
|
# @return [true, false]
|
|
209
287
|
def contains? obj
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
288
|
+
obj = IMW.open(obj)
|
|
289
|
+
return false unless obj.is_local?
|
|
290
|
+
return true if obj.path == path
|
|
291
|
+
return false unless obj.path.starts_with?(path)
|
|
292
|
+
return true if self[obj.path[path.length..-1]].size > 0
|
|
215
293
|
false
|
|
216
294
|
end
|
|
217
295
|
|
|
@@ -277,6 +355,31 @@ module IMW
|
|
|
277
355
|
self
|
|
278
356
|
end
|
|
279
357
|
|
|
358
|
+
# Return the resource at the base path of this resource joined
|
|
359
|
+
# to +path+.
|
|
360
|
+
#
|
|
361
|
+
# IMW.open('/path/to/dir').join('subdir')
|
|
362
|
+
# #=> IMW::Resource at '/path/to/dir/subdir'
|
|
363
|
+
#
|
|
364
|
+
# @param [Array<String>] paths
|
|
365
|
+
# @return [IMW::Resource]
|
|
366
|
+
def join *paths
|
|
367
|
+
IMW.open(File.join(stripped_uri.to_s, *paths))
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
# Recursively walk down this directory
|
|
371
|
+
def walk(options={}, &block)
|
|
372
|
+
require 'find'
|
|
373
|
+
Find.find(path) do |path|
|
|
374
|
+
if options[:only]
|
|
375
|
+
next if options[:only] == :files && !File.file?(path)
|
|
376
|
+
next if options[:only] == :directories && !File.directory?(path)
|
|
377
|
+
next if options[:only] == :symlinks && !File.symlink?(path)
|
|
378
|
+
end
|
|
379
|
+
yield path
|
|
380
|
+
end
|
|
381
|
+
end
|
|
382
|
+
|
|
280
383
|
# Return a hash summarizing this directory with a key
|
|
281
384
|
# <tt>:contents</tt> containing an array of hashes summarizing
|
|
282
385
|
# this directories contents.
|
|
@@ -293,10 +396,30 @@ module IMW
|
|
|
293
396
|
:basename => basename,
|
|
294
397
|
:size => size,
|
|
295
398
|
:num_files => contents.length,
|
|
296
|
-
:contents => resources.map
|
|
399
|
+
:contents => resources.map do |resource|
|
|
400
|
+
resource.guess_schema! if guess_schema? && resource.respond_to?(:guess_schema!)
|
|
401
|
+
resource_summary = resource.summary
|
|
402
|
+
resource_summary[:schema] = metadata[resource] if metadata && metadata.describe?(resource) # this should be handled by 'resources' method above
|
|
403
|
+
resource_summary
|
|
404
|
+
end
|
|
297
405
|
}
|
|
298
406
|
end
|
|
299
407
|
|
|
408
|
+
# Whether or not to have this directory's resources guess
|
|
409
|
+
# their schemas when none is provided.
|
|
410
|
+
#
|
|
411
|
+
# @return [true, false]
|
|
412
|
+
def guess_schema?
|
|
413
|
+
(!! @guess_schema)
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
# Force this directory's resources to guess at their schema.
|
|
417
|
+
#
|
|
418
|
+
# @return [true]
|
|
419
|
+
def guess_schema!
|
|
420
|
+
@guess_schema = true
|
|
421
|
+
end
|
|
422
|
+
|
|
300
423
|
end
|
|
301
424
|
end
|
|
302
425
|
end
|
data/lib/imw/schemes/remote.rb
CHANGED
|
@@ -39,14 +39,6 @@ module IMW
|
|
|
39
39
|
@query_string ||= uri.query
|
|
40
40
|
end
|
|
41
41
|
|
|
42
|
-
# Return the fragment part of this resource's URI. Will likely be
|
|
43
|
-
# +nil+ for local resources.
|
|
44
|
-
#
|
|
45
|
-
# @return [String]
|
|
46
|
-
def fragment
|
|
47
|
-
@fragment ||= uri.fragment
|
|
48
|
-
end
|
|
49
|
-
|
|
50
42
|
# Return the path part of this resource's URI. Will _not_
|
|
51
43
|
# include the +query_string+ or +fragment+.
|
|
52
44
|
#
|
|
@@ -103,8 +95,21 @@ module IMW
|
|
|
103
95
|
|
|
104
96
|
module RemoteDirectory
|
|
105
97
|
|
|
98
|
+
# Return the resource at the base path of this resource joined
|
|
99
|
+
# to +path+.
|
|
100
|
+
#
|
|
101
|
+
# IMW.open('http://example.com/path/to/dir').join('subdir')
|
|
102
|
+
# #=> IMW::Resource at 'http://example.com/path/to/dir/subdir'
|
|
103
|
+
#
|
|
104
|
+
# @param [Array<String>] paths
|
|
105
|
+
# @return [IMW::Resource]
|
|
106
|
+
def join *paths
|
|
107
|
+
IMW.open(File.join(stripped_uri.to_s, *paths))
|
|
108
|
+
end
|
|
109
|
+
|
|
106
110
|
#
|
|
107
|
-
# TODO -- bloody everything
|
|
111
|
+
# TODO -- bloody everything. what's the best way to tell if
|
|
112
|
+
# the remote URL is a directory?
|
|
108
113
|
#
|
|
109
114
|
|
|
110
115
|
|
data/lib/imw/schemes/s3.rb
CHANGED
|
@@ -116,6 +116,18 @@ module IMW
|
|
|
116
116
|
destination
|
|
117
117
|
end
|
|
118
118
|
|
|
119
|
+
# Return the resource at the base path of this resource joined
|
|
120
|
+
# to +path+.
|
|
121
|
+
#
|
|
122
|
+
# IMW.open('s3:://bucket/path/to/dir').join('subdir')
|
|
123
|
+
# #=> IMW::Resource at 's3://bucket/path/to/dir/subdir'
|
|
124
|
+
#
|
|
125
|
+
# @param [Array<String>] paths
|
|
126
|
+
# @return [IMW::Resource]
|
|
127
|
+
def join *paths
|
|
128
|
+
IMW.open(File.join(stripped_uri.to_s, *paths))
|
|
129
|
+
end
|
|
130
|
+
|
|
119
131
|
protected
|
|
120
132
|
# Make an S3 connection.
|
|
121
133
|
#
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
require 'dbi'
|
|
2
|
+
|
|
3
|
+
module IMW
|
|
4
|
+
module Schemes
|
|
5
|
+
|
|
6
|
+
# Encapsulates a connection to a relational database.
|
|
7
|
+
#
|
|
8
|
+
# Calling
|
|
9
|
+
#
|
|
10
|
+
# IMW.open('sql://host:port/database_name')
|
|
11
|
+
#
|
|
12
|
+
# shold create a connection to a database at the given +port+ on
|
|
13
|
+
# the given +host+ using the given +database_name+.
|
|
14
|
+
module SQL
|
|
15
|
+
|
|
16
|
+
# A base implementation of a connection to a relational
|
|
17
|
+
# database.
|
|
18
|
+
#
|
|
19
|
+
# The Base#extended method will examine the +scheme+ of an
|
|
20
|
+
# object extended with this module and choose a more specific
|
|
21
|
+
# database adaptor module to extend with as well.
|
|
22
|
+
module Base
|
|
23
|
+
|
|
24
|
+
# When an IMW::Resource is extended use URI's scheme to choose
|
|
25
|
+
# which other module inside IMW::Schemes::SQL to extend with.
|
|
26
|
+
def self.extended obj
|
|
27
|
+
case obj.scheme
|
|
28
|
+
when 'mysql' then obj.extend(IMW::Schemes::SQL::MySQL)
|
|
29
|
+
when 'postgresql' then obj.extend(IMW::Schemes::SQL::PostgreSQL)
|
|
30
|
+
else raise IMW::ArgumentError.new("Unknown database type: #{obj.scheme}")
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# For an SQL connection the database will be the same as the
|
|
35
|
+
# path.
|
|
36
|
+
#
|
|
37
|
+
# @return [String]
|
|
38
|
+
def database
|
|
39
|
+
@database ||= path.tr('/','')
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Redefineeach method inappropriate for databases.
|
|
43
|
+
[:dirname, :basename, :extname, :extension, :name].each do |method|
|
|
44
|
+
define_method(method) do
|
|
45
|
+
nil
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# The (cached) database connection for this resource.
|
|
50
|
+
#
|
|
51
|
+
# @return [DBI::DatabaseHandle]
|
|
52
|
+
def connection
|
|
53
|
+
@connection ||= DBI.connect("#{dbi_module}:#{database}:#{host}", user, password)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Return the password associated with user's account on the
|
|
57
|
+
# given database.
|
|
58
|
+
#
|
|
59
|
+
# @return [String]
|
|
60
|
+
def password
|
|
61
|
+
@password ||= resource_options[:password]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Return an array of the table names in the current database.
|
|
65
|
+
#
|
|
66
|
+
# @return [Array<String>]
|
|
67
|
+
def tables
|
|
68
|
+
returning([]) do |table_names|
|
|
69
|
+
execute("SHOW TABLES") do |row|
|
|
70
|
+
table_names << row.first
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Execute the (joined) +query_string_parts+ using this
|
|
76
|
+
# resource's cached connection.
|
|
77
|
+
#
|
|
78
|
+
# If passed a block, yield each row of the result set to the
|
|
79
|
+
# block.
|
|
80
|
+
#
|
|
81
|
+
# @param [Array<String>] query_string_parts
|
|
82
|
+
# @yield [DBI::Row]
|
|
83
|
+
# @return [DBI::StatementHandle]
|
|
84
|
+
def execute *query_string_parts, &block
|
|
85
|
+
query = query_string_parts.join(' ')
|
|
86
|
+
IMW.announce_if_verbose "Querying #{self}: #{query}"
|
|
87
|
+
statement = connection.execute(query)
|
|
88
|
+
block_given? ? statement.fetch(&block) : statement
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Module for MySQL databases.
|
|
93
|
+
module MySQL
|
|
94
|
+
|
|
95
|
+
# Return the name of the DBI module used to connect to MySQL.
|
|
96
|
+
#
|
|
97
|
+
# @return [String]
|
|
98
|
+
def dbi_module
|
|
99
|
+
"DBI:Mysql"
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Module for PostgreSQL databases.
|
|
104
|
+
module PostgreSQL
|
|
105
|
+
|
|
106
|
+
# Return the name of the DBI module used to connect to PostgreSQL.
|
|
107
|
+
#
|
|
108
|
+
# @return [String]
|
|
109
|
+
def dbi_module
|
|
110
|
+
"DBI:Pg"
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
data/lib/imw/tools.rb
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
module IMW
|
|
2
2
|
module Tools
|
|
3
|
-
autoload :Archiver,
|
|
4
|
-
autoload :Transferer,
|
|
5
|
-
autoload :Summarizer,
|
|
3
|
+
autoload :Archiver, 'imw/tools/archiver'
|
|
4
|
+
autoload :Transferer, 'imw/tools/transferer'
|
|
5
|
+
autoload :Summarizer, 'imw/tools/summarizer'
|
|
6
|
+
autoload :ExtensionAnalyzer, 'imw/tools/extension_analyzer'
|
|
7
|
+
autoload :Downloader, 'imw/tools/downloader'
|
|
6
8
|
end
|
|
7
9
|
end
|
|
8
10
|
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
module IMW
|
|
2
|
+
module Tools
|
|
3
|
+
|
|
4
|
+
# A class to download a collection of resources to a shared
|
|
5
|
+
# directory.
|
|
6
|
+
class Downloader
|
|
7
|
+
|
|
8
|
+
def initialize dir, *inputs
|
|
9
|
+
self.dir = dir
|
|
10
|
+
self.inputs = inputs unless inputs.blank?
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.dir= new_dir
|
|
14
|
+
@dir = IMW.open(new_dir)
|
|
15
|
+
raise IMW::PathError.new("#{@dir} must be a local directory") unless @dir.is_local? && @dir.is_directory?
|
|
16
|
+
@dir
|
|
17
|
+
end
|
|
18
|
+
attr_reader :dir
|
|
19
|
+
|
|
20
|
+
def inputs= new_inputs
|
|
21
|
+
@inputs = new_inputs.flatten.compact.map { |raw_input| IMW.open(raw_input) }
|
|
22
|
+
end
|
|
23
|
+
attr_reader :inputs
|
|
24
|
+
|
|
25
|
+
def downloaded_path_for input
|
|
26
|
+
dir.join(input.respond_to?(:effective_basename) ? input.effective_basename : input.basename)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def download!
|
|
30
|
+
before_download
|
|
31
|
+
inputs.each do |input|
|
|
32
|
+
downloaded_path = downloaded_path_for(input)
|
|
33
|
+
IMW.log_if_verbose "Downloading #{input} to #{downloaded_path}"
|
|
34
|
+
input.cp(downloaded_path)
|
|
35
|
+
end
|
|
36
|
+
after_download
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def downloaded?
|
|
40
|
+
downloaded_resources.all? { |resource| resource.exist? }
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def downloaded_resources
|
|
44
|
+
inputs.map do |input|
|
|
45
|
+
IMW.open(downloaded_path_for(input))
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def clean!
|
|
50
|
+
IMW.log_if_verbose("Deleting downloader directory #{dir}")
|
|
51
|
+
dir.rm_rf!
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def before_download
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def after_download
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|