imw 0.2.9 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/examples/metadata.yml +10 -0
- data/lib/imw/tools/aggregator.rb +148 -0
- data/lib/imw/tools.rb +1 -0
- data/lib/imw/utils/error.rb +7 -1
- data/spec/imw/tools/aggregator_spec.rb +71 -0
- metadata +8 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.10
|
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'imw/resource'
|
2
|
+
|
3
|
+
module IMW
|
4
|
+
module Tools
|
5
|
+
|
6
|
+
# Aggregates resources into a single local directory.
|
7
|
+
#
|
8
|
+
# The directory should already exist.
|
9
|
+
#
|
10
|
+
# Any local resources will be copied into the directory.
|
11
|
+
#
|
12
|
+
# Any remote resources will be downloaded into the directory.
|
13
|
+
#
|
14
|
+
# If any of the resources are (local and) archives, they will first be
|
15
|
+
# extracted, with only their contents winding up in the final
|
16
|
+
# directory (the file hierarchy of the archive will be preserved).
|
17
|
+
#
|
18
|
+
# If any of the resources are (local and) compressed, they will
|
19
|
+
# first be uncompressed before being added to the directory.
|
20
|
+
#
|
21
|
+
# As an example:
|
22
|
+
#
|
23
|
+
# aggregator = IMW::Tools::Aggregator.new '/path/to/agg_dir'
|
24
|
+
# aggregator.aggregate '/path/to/my/regular_file.tsv', '/path/to/an/archive.tar.bz2', '/path/to/my_compressed_file.gz', 'http://mywebsite.com/index.html'
|
25
|
+
#
|
26
|
+
# This will create a directory at <tt>/path/to/agg_dir</tt> which
|
27
|
+
# looks like
|
28
|
+
#
|
29
|
+
# path_to_agg_dir
|
30
|
+
# |-- regular_file.tsv
|
31
|
+
# |-- archive
|
32
|
+
# | |-- internal_archive_file_1
|
33
|
+
# | |-- internal_archive_file_2
|
34
|
+
# | ...
|
35
|
+
# | `-- internal_archive_file_N
|
36
|
+
# |-- my_compressed_file
|
37
|
+
# `-- index.html
|
38
|
+
#
|
39
|
+
# Notice that
|
40
|
+
#
|
41
|
+
# - the local file was copied over
|
42
|
+
#
|
43
|
+
# - the remote file was downloaded and copied over
|
44
|
+
#
|
45
|
+
# - the tar archive was first exctracted
|
46
|
+
#
|
47
|
+
# - the compressed file was aggregated
|
48
|
+
#
|
49
|
+
# This process can take a while when the constituent files are
|
50
|
+
# large.
|
51
|
+
class Aggregator
|
52
|
+
|
53
|
+
attr_reader :dir
|
54
|
+
|
55
|
+
def initialize dir
|
56
|
+
self.dir = IMW.open(dir)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Set the directory for this Aggregator.
|
60
|
+
#
|
61
|
+
# Will raise unless +new_dir+ is an existing, local directory.
|
62
|
+
#
|
63
|
+
# @param [String, IMW::Resource] new_dir
|
64
|
+
# @return [IMW::Resource]
|
65
|
+
def dir= new_dir
|
66
|
+
@dir = IMW.open(new_dir)
|
67
|
+
raise IMW::SchemError.new("Aggregator requires a local directory, not #{@dir}") unless @dir.is_local?
|
68
|
+
@dir.should_exist! "Aggregator requires the aggregation directory to already exist"
|
69
|
+
raise IMW::PathError.new("Aggregator requires a directory, not #{@dir}") unless @dir.is_directory?
|
70
|
+
@dir
|
71
|
+
end
|
72
|
+
|
73
|
+
# Return a list of error messages for this Aggregator.
|
74
|
+
#
|
75
|
+
# @return [Array] the error messages
|
76
|
+
def errors
|
77
|
+
@errors ||= []
|
78
|
+
end
|
79
|
+
|
80
|
+
# Was this archiver successful (did it not have any errors)?
|
81
|
+
#
|
82
|
+
# @return [true, false]
|
83
|
+
def success?
|
84
|
+
errors.empty?
|
85
|
+
end
|
86
|
+
|
87
|
+
# Aggregate the given inputs into this Aggregator's +dir+.
|
88
|
+
#
|
89
|
+
# @param [Array<IMW::Resource,String>] inputs
|
90
|
+
# @return [IMW::Tools::Aggregator]
|
91
|
+
def aggregate *paths_or_inputs
|
92
|
+
@errors = []
|
93
|
+
paths_or_inputs.each do |path_or_input|
|
94
|
+
input = IMW.open(path_or_input)
|
95
|
+
if input.is_local?
|
96
|
+
aggregate_local_input(input)
|
97
|
+
else
|
98
|
+
download = download_remote_input(input)
|
99
|
+
if download.is_compressed? || download.is_archive?
|
100
|
+
aggregate_local_input(download)
|
101
|
+
download.rm!
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
protected
|
108
|
+
|
109
|
+
# Aggregate a local input.
|
110
|
+
#
|
111
|
+
# Will extract archives, decompress compressed files, and copy
|
112
|
+
# regular files and directories (but will not recurse into
|
113
|
+
# directories to find archives or compressed files).
|
114
|
+
#
|
115
|
+
# @param [IMW::Resource] input
|
116
|
+
def aggregate_local_input input
|
117
|
+
new_path = File.join(dir.path, input.basename)
|
118
|
+
case
|
119
|
+
when input.is_archive?
|
120
|
+
IMW.announce_if_verbose("Aggregating and extracting #{input} to #{dir}...")
|
121
|
+
FileUtils.cd(dir.path) do
|
122
|
+
input.extract
|
123
|
+
end
|
124
|
+
when input.is_compressed?
|
125
|
+
IMW.announce_if_verbose("Decompressing #{input}...")
|
126
|
+
input.cp(new_path).decompress!
|
127
|
+
else
|
128
|
+
IMW.announce_if_verbose("Copying #{input}...")
|
129
|
+
input.cp(new_path)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Download a remote input to this Aggregator's +dir+.
|
134
|
+
#
|
135
|
+
# @param [IMW::Resource] input
|
136
|
+
def download_remote_input input
|
137
|
+
IMW.announce_if_verbose("Downloading #{input}...")
|
138
|
+
input.cp(File.join(dir.path, input.effective_basename))
|
139
|
+
end
|
140
|
+
|
141
|
+
def add_processing_error error # :nodoc:
|
142
|
+
IMW.logger.warn error
|
143
|
+
errors << error
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
data/lib/imw/tools.rb
CHANGED
data/lib/imw/utils/error.rb
CHANGED
@@ -22,7 +22,13 @@ module IMW
|
|
22
22
|
# Error communicating with a remote entity.
|
23
23
|
NetworkError = Class.new(Error)
|
24
24
|
|
25
|
-
#
|
25
|
+
# Raised when a resource is of the wrong scheme.
|
26
|
+
SchemeError = Class.new(Error)
|
27
|
+
|
28
|
+
# Raised when a resource is of the wrong (or malformed) format.
|
29
|
+
FormatError = Class.new(Error)
|
30
|
+
|
31
|
+
# Bad argument.
|
26
32
|
ArgumentError = Class.new(Error)
|
27
33
|
|
28
34
|
# Error in defining or matching a schema.
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
+
|
3
|
+
describe IMW::Tools::Aggregator do
|
4
|
+
before do
|
5
|
+
@dir = 'agg_dir'
|
6
|
+
FileUtils.mkdir_p(@dir)
|
7
|
+
|
8
|
+
# remote files
|
9
|
+
@homepage = "http://www.google.com"
|
10
|
+
@website = "http://www.google.com/support/"
|
11
|
+
@remote_files = [@homepage, @website]
|
12
|
+
|
13
|
+
# regular files
|
14
|
+
@csv = "foobar-csv.csv"
|
15
|
+
@xml = "foobar-xml.xml"
|
16
|
+
@txt = "foobar-txt.txt"
|
17
|
+
@blah = "foobar"
|
18
|
+
@files = [@csv, @xml, @txt, @blah]
|
19
|
+
|
20
|
+
# compressed files
|
21
|
+
@bz2 = "foobar-bz2.bz2"
|
22
|
+
@gz = "foobar-gz.gz"
|
23
|
+
@compressed_files = [@bz2, @gz]
|
24
|
+
|
25
|
+
# archives
|
26
|
+
@zip = "foobar-zip.zip"
|
27
|
+
@tarbz2 = "foobar-tarbz2.tar.bz2"
|
28
|
+
@targz = "foobar-targz.tar.gz"
|
29
|
+
@tar = "foobar-tar.tar"
|
30
|
+
@rar = "foobar-rar.rar"
|
31
|
+
@archives = [@zip, @tarbz2, @targz, @rar, @tar]
|
32
|
+
|
33
|
+
@local_files = @files + @compressed_files + @archives
|
34
|
+
|
35
|
+
@all_files = @remote_files + @local_files
|
36
|
+
|
37
|
+
@local_files.each do |path|
|
38
|
+
IMWTest::Random.file path
|
39
|
+
end
|
40
|
+
|
41
|
+
@aggregator = IMW::Tools::Aggregator.new @dir
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should copy regular files to its directory" do
|
45
|
+
@aggregator.aggregate *@files
|
46
|
+
@aggregator.dir.path.should contain(*@files)
|
47
|
+
@files.each { |path| IMW.open(path).exist?.should be_true }
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should copy remote files to its archive directory" do
|
51
|
+
@aggregator.aggregate *@remote_files
|
52
|
+
@aggregator.dir.path.should contain('_index', 'support') # _index from Http#effective_basename on http://www.google.com
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should uncompress compressed files to its directory" do
|
56
|
+
@aggregator.aggregate *@compressed_files
|
57
|
+
@aggregator.dir.path.should contain('foobar-bz2', 'foobar-gz')
|
58
|
+
@aggregator.dir.path.should_not contain(*@compressed_files)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should copy the content of archive files to its archive directory (but not the actual archives)" do
|
62
|
+
@aggregator.aggregate *@archives
|
63
|
+
@archives.each do |archive|
|
64
|
+
@aggregator.dir.path.should_not contain(archive)
|
65
|
+
@aggregator.dir.path.should contain(*IMW.open(archive).contents)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: imw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 3
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 10
|
10
|
+
version: 0.2.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dhruv Bansal
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-10-
|
19
|
+
date: 2010-10-29 00:00:00 -05:00
|
20
20
|
default_executable: imw
|
21
21
|
dependencies: []
|
22
22
|
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- bin/imw
|
41
41
|
- etc/imwrc.rb
|
42
42
|
- examples/dataset.rb
|
43
|
+
- examples/metadata.yml
|
43
44
|
- lib/imw.rb
|
44
45
|
- lib/imw/archives.rb
|
45
46
|
- lib/imw/archives/rar.rb
|
@@ -85,6 +86,7 @@ files:
|
|
85
86
|
- lib/imw/schemes/s3.rb
|
86
87
|
- lib/imw/schemes/sql.rb
|
87
88
|
- lib/imw/tools.rb
|
89
|
+
- lib/imw/tools/aggregator.rb
|
88
90
|
- lib/imw/tools/archiver.rb
|
89
91
|
- lib/imw/tools/downloader.rb
|
90
92
|
- lib/imw/tools/extension_analyzer.rb
|
@@ -164,6 +166,7 @@ files:
|
|
164
166
|
- spec/imw/schemes/remote_spec.rb
|
165
167
|
- spec/imw/schemes/s3_spec.rb
|
166
168
|
- spec/imw/schemes/sql_spec.rb
|
169
|
+
- spec/imw/tools/aggregator_spec.rb
|
167
170
|
- spec/imw/tools/archiver_spec.rb
|
168
171
|
- spec/imw/tools/summarizer_spec.rb
|
169
172
|
- spec/imw/tools/transferer_spec.rb
|
@@ -222,6 +225,7 @@ test_files:
|
|
222
225
|
- spec/imw/tools/archiver_spec.rb
|
223
226
|
- spec/imw/tools/summarizer_spec.rb
|
224
227
|
- spec/imw/tools/transferer_spec.rb
|
228
|
+
- spec/imw/tools/aggregator_spec.rb
|
225
229
|
- spec/imw/compressed_files/compressible_spec.rb
|
226
230
|
- spec/imw/compressed_files/bz2_spec.rb
|
227
231
|
- spec/imw/compressed_files/gz_spec.rb
|