imw 0.2.9 → 0.2.10
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/examples/metadata.yml +10 -0
- data/lib/imw/tools/aggregator.rb +148 -0
- data/lib/imw/tools.rb +1 -0
- data/lib/imw/utils/error.rb +7 -1
- data/spec/imw/tools/aggregator_spec.rb +71 -0
- metadata +8 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.10
|
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'imw/resource'
|
2
|
+
|
3
|
+
module IMW
|
4
|
+
module Tools
|
5
|
+
|
6
|
+
# Aggregates resources into a single local directory.
|
7
|
+
#
|
8
|
+
# The directory should already exist.
|
9
|
+
#
|
10
|
+
# Any local resources will be copied into the directory.
|
11
|
+
#
|
12
|
+
# Any remote resources will be downloaded into the directory.
|
13
|
+
#
|
14
|
+
# If any of the resources are (local and) archives, they will first be
|
15
|
+
# extracted, with only their contents winding up in the final
|
16
|
+
# directory (the file hierarchy of the archive will be preserved).
|
17
|
+
#
|
18
|
+
# If any of the resources are (local and) compressed, they will
|
19
|
+
# first be uncompressed before being added to the directory.
|
20
|
+
#
|
21
|
+
# As an example:
|
22
|
+
#
|
23
|
+
# aggregator = IMW::Tools::Aggregator.new '/path/to/agg_dir'
|
24
|
+
# aggregator.aggregate '/path/to/my/regular_file.tsv', '/path/to/an/archive.tar.bz2', '/path/to/my_compressed_file.gz', 'http://mywebsite.com/index.html'
|
25
|
+
#
|
26
|
+
# This will create a directory at <tt>/path/to/agg_dir</tt> which
|
27
|
+
# looks like
|
28
|
+
#
|
29
|
+
# path_to_agg_dir
|
30
|
+
# |-- regular_file.tsv
|
31
|
+
# |-- archive
|
32
|
+
# | |-- internal_archive_file_1
|
33
|
+
# | |-- internal_archive_file_2
|
34
|
+
# | ...
|
35
|
+
# | `-- internal_archive_file_N
|
36
|
+
# |-- my_compressed_file
|
37
|
+
# `-- index.html
|
38
|
+
#
|
39
|
+
# Notice that
|
40
|
+
#
|
41
|
+
# - the local file was copied over
|
42
|
+
#
|
43
|
+
# - the remote file was downloaded and copied over
|
44
|
+
#
|
45
|
+
# - the tar archive was first exctracted
|
46
|
+
#
|
47
|
+
# - the compressed file was aggregated
|
48
|
+
#
|
49
|
+
# This process can take a while when the constituent files are
|
50
|
+
# large.
|
51
|
+
class Aggregator
|
52
|
+
|
53
|
+
attr_reader :dir
|
54
|
+
|
55
|
+
def initialize dir
|
56
|
+
self.dir = IMW.open(dir)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Set the directory for this Aggregator.
|
60
|
+
#
|
61
|
+
# Will raise unless +new_dir+ is an existing, local directory.
|
62
|
+
#
|
63
|
+
# @param [String, IMW::Resource] new_dir
|
64
|
+
# @return [IMW::Resource]
|
65
|
+
def dir= new_dir
|
66
|
+
@dir = IMW.open(new_dir)
|
67
|
+
raise IMW::SchemError.new("Aggregator requires a local directory, not #{@dir}") unless @dir.is_local?
|
68
|
+
@dir.should_exist! "Aggregator requires the aggregation directory to already exist"
|
69
|
+
raise IMW::PathError.new("Aggregator requires a directory, not #{@dir}") unless @dir.is_directory?
|
70
|
+
@dir
|
71
|
+
end
|
72
|
+
|
73
|
+
# Return a list of error messages for this Aggregator.
|
74
|
+
#
|
75
|
+
# @return [Array] the error messages
|
76
|
+
def errors
|
77
|
+
@errors ||= []
|
78
|
+
end
|
79
|
+
|
80
|
+
# Was this archiver successful (did it not have any errors)?
|
81
|
+
#
|
82
|
+
# @return [true, false]
|
83
|
+
def success?
|
84
|
+
errors.empty?
|
85
|
+
end
|
86
|
+
|
87
|
+
# Aggregate the given inputs into this Aggregator's +dir+.
|
88
|
+
#
|
89
|
+
# @param [Array<IMW::Resource,String>] inputs
|
90
|
+
# @return [IMW::Tools::Aggregator]
|
91
|
+
def aggregate *paths_or_inputs
|
92
|
+
@errors = []
|
93
|
+
paths_or_inputs.each do |path_or_input|
|
94
|
+
input = IMW.open(path_or_input)
|
95
|
+
if input.is_local?
|
96
|
+
aggregate_local_input(input)
|
97
|
+
else
|
98
|
+
download = download_remote_input(input)
|
99
|
+
if download.is_compressed? || download.is_archive?
|
100
|
+
aggregate_local_input(download)
|
101
|
+
download.rm!
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
protected
|
108
|
+
|
109
|
+
# Aggregate a local input.
|
110
|
+
#
|
111
|
+
# Will extract archives, decompress compressed files, and copy
|
112
|
+
# regular files and directories (but will not recurse into
|
113
|
+
# directories to find archives or compressed files).
|
114
|
+
#
|
115
|
+
# @param [IMW::Resource] input
|
116
|
+
def aggregate_local_input input
|
117
|
+
new_path = File.join(dir.path, input.basename)
|
118
|
+
case
|
119
|
+
when input.is_archive?
|
120
|
+
IMW.announce_if_verbose("Aggregating and extracting #{input} to #{dir}...")
|
121
|
+
FileUtils.cd(dir.path) do
|
122
|
+
input.extract
|
123
|
+
end
|
124
|
+
when input.is_compressed?
|
125
|
+
IMW.announce_if_verbose("Decompressing #{input}...")
|
126
|
+
input.cp(new_path).decompress!
|
127
|
+
else
|
128
|
+
IMW.announce_if_verbose("Copying #{input}...")
|
129
|
+
input.cp(new_path)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Download a remote input to this Aggregator's +dir+.
|
134
|
+
#
|
135
|
+
# @param [IMW::Resource] input
|
136
|
+
def download_remote_input input
|
137
|
+
IMW.announce_if_verbose("Downloading #{input}...")
|
138
|
+
input.cp(File.join(dir.path, input.effective_basename))
|
139
|
+
end
|
140
|
+
|
141
|
+
def add_processing_error error # :nodoc:
|
142
|
+
IMW.logger.warn error
|
143
|
+
errors << error
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
data/lib/imw/tools.rb
CHANGED
data/lib/imw/utils/error.rb
CHANGED
@@ -22,7 +22,13 @@ module IMW
|
|
22
22
|
# Error communicating with a remote entity.
|
23
23
|
NetworkError = Class.new(Error)
|
24
24
|
|
25
|
-
#
|
25
|
+
# Raised when a resource is of the wrong scheme.
|
26
|
+
SchemeError = Class.new(Error)
|
27
|
+
|
28
|
+
# Raised when a resource is of the wrong (or malformed) format.
|
29
|
+
FormatError = Class.new(Error)
|
30
|
+
|
31
|
+
# Bad argument.
|
26
32
|
ArgumentError = Class.new(Error)
|
27
33
|
|
28
34
|
# Error in defining or matching a schema.
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
+
|
3
|
+
describe IMW::Tools::Aggregator do
|
4
|
+
before do
|
5
|
+
@dir = 'agg_dir'
|
6
|
+
FileUtils.mkdir_p(@dir)
|
7
|
+
|
8
|
+
# remote files
|
9
|
+
@homepage = "http://www.google.com"
|
10
|
+
@website = "http://www.google.com/support/"
|
11
|
+
@remote_files = [@homepage, @website]
|
12
|
+
|
13
|
+
# regular files
|
14
|
+
@csv = "foobar-csv.csv"
|
15
|
+
@xml = "foobar-xml.xml"
|
16
|
+
@txt = "foobar-txt.txt"
|
17
|
+
@blah = "foobar"
|
18
|
+
@files = [@csv, @xml, @txt, @blah]
|
19
|
+
|
20
|
+
# compressed files
|
21
|
+
@bz2 = "foobar-bz2.bz2"
|
22
|
+
@gz = "foobar-gz.gz"
|
23
|
+
@compressed_files = [@bz2, @gz]
|
24
|
+
|
25
|
+
# archives
|
26
|
+
@zip = "foobar-zip.zip"
|
27
|
+
@tarbz2 = "foobar-tarbz2.tar.bz2"
|
28
|
+
@targz = "foobar-targz.tar.gz"
|
29
|
+
@tar = "foobar-tar.tar"
|
30
|
+
@rar = "foobar-rar.rar"
|
31
|
+
@archives = [@zip, @tarbz2, @targz, @rar, @tar]
|
32
|
+
|
33
|
+
@local_files = @files + @compressed_files + @archives
|
34
|
+
|
35
|
+
@all_files = @remote_files + @local_files
|
36
|
+
|
37
|
+
@local_files.each do |path|
|
38
|
+
IMWTest::Random.file path
|
39
|
+
end
|
40
|
+
|
41
|
+
@aggregator = IMW::Tools::Aggregator.new @dir
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should copy regular files to its directory" do
|
45
|
+
@aggregator.aggregate *@files
|
46
|
+
@aggregator.dir.path.should contain(*@files)
|
47
|
+
@files.each { |path| IMW.open(path).exist?.should be_true }
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should copy remote files to its archive directory" do
|
51
|
+
@aggregator.aggregate *@remote_files
|
52
|
+
@aggregator.dir.path.should contain('_index', 'support') # _index from Http#effective_basename on http://www.google.com
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should uncompress compressed files to its directory" do
|
56
|
+
@aggregator.aggregate *@compressed_files
|
57
|
+
@aggregator.dir.path.should contain('foobar-bz2', 'foobar-gz')
|
58
|
+
@aggregator.dir.path.should_not contain(*@compressed_files)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should copy the content of archive files to its archive directory (but not the actual archives)" do
|
62
|
+
@aggregator.aggregate *@archives
|
63
|
+
@archives.each do |archive|
|
64
|
+
@aggregator.dir.path.should_not contain(archive)
|
65
|
+
@aggregator.dir.path.should contain(*IMW.open(archive).contents)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: imw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 3
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 10
|
10
|
+
version: 0.2.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dhruv Bansal
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-10-
|
19
|
+
date: 2010-10-29 00:00:00 -05:00
|
20
20
|
default_executable: imw
|
21
21
|
dependencies: []
|
22
22
|
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- bin/imw
|
41
41
|
- etc/imwrc.rb
|
42
42
|
- examples/dataset.rb
|
43
|
+
- examples/metadata.yml
|
43
44
|
- lib/imw.rb
|
44
45
|
- lib/imw/archives.rb
|
45
46
|
- lib/imw/archives/rar.rb
|
@@ -85,6 +86,7 @@ files:
|
|
85
86
|
- lib/imw/schemes/s3.rb
|
86
87
|
- lib/imw/schemes/sql.rb
|
87
88
|
- lib/imw/tools.rb
|
89
|
+
- lib/imw/tools/aggregator.rb
|
88
90
|
- lib/imw/tools/archiver.rb
|
89
91
|
- lib/imw/tools/downloader.rb
|
90
92
|
- lib/imw/tools/extension_analyzer.rb
|
@@ -164,6 +166,7 @@ files:
|
|
164
166
|
- spec/imw/schemes/remote_spec.rb
|
165
167
|
- spec/imw/schemes/s3_spec.rb
|
166
168
|
- spec/imw/schemes/sql_spec.rb
|
169
|
+
- spec/imw/tools/aggregator_spec.rb
|
167
170
|
- spec/imw/tools/archiver_spec.rb
|
168
171
|
- spec/imw/tools/summarizer_spec.rb
|
169
172
|
- spec/imw/tools/transferer_spec.rb
|
@@ -222,6 +225,7 @@ test_files:
|
|
222
225
|
- spec/imw/tools/archiver_spec.rb
|
223
226
|
- spec/imw/tools/summarizer_spec.rb
|
224
227
|
- spec/imw/tools/transferer_spec.rb
|
228
|
+
- spec/imw/tools/aggregator_spec.rb
|
225
229
|
- spec/imw/compressed_files/compressible_spec.rb
|
226
230
|
- spec/imw/compressed_files/bz2_spec.rb
|
227
231
|
- spec/imw/compressed_files/gz_spec.rb
|