imw 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
data/lib/imw/runner.rb
CHANGED
|
@@ -18,7 +18,7 @@ module IMW
|
|
|
18
18
|
def initialize *args
|
|
19
19
|
@args = args
|
|
20
20
|
@options = DEFAULT_OPTIONS.dup
|
|
21
|
-
parser.parse!(args)
|
|
21
|
+
parser.parse!(args)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
24
|
def parser
|
|
@@ -67,11 +67,12 @@ EOF
|
|
|
67
67
|
end
|
|
68
68
|
|
|
69
69
|
def handles
|
|
70
|
+
require 'set'
|
|
70
71
|
matched_handles = Set.new
|
|
71
72
|
if options[:selectors].blank?
|
|
72
|
-
matched_handles += IMW
|
|
73
|
+
matched_handles += IMW.repository.keys
|
|
73
74
|
else
|
|
74
|
-
keys = IMW
|
|
75
|
+
keys = IMW.repository.keys
|
|
75
76
|
unless keys.empty?
|
|
76
77
|
options[:selectors].each do |selector|
|
|
77
78
|
matched_handles += keys.find_all { |key| key =~ Regexp.new(selector) }
|
|
@@ -82,7 +83,7 @@ EOF
|
|
|
82
83
|
end
|
|
83
84
|
|
|
84
85
|
def datasets
|
|
85
|
-
handles.map { |handle| IMW
|
|
86
|
+
handles.map { |handle| IMW.repository[handle] }
|
|
86
87
|
end
|
|
87
88
|
|
|
88
89
|
def list!
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
require 'imw/resource'
|
|
2
|
+
|
|
3
|
+
module IMW
|
|
4
|
+
module Transforms
|
|
5
|
+
|
|
6
|
+
# Packages an Array of input files into a single output archive.
|
|
7
|
+
# When the archive is extracted, all the input files given will be
|
|
8
|
+
# in a single directory with a chosen name. The path to the output
|
|
9
|
+
# archive determines both the name of the archive and its type (tar,
|
|
10
|
+
# tar.bz2, zip, &c.).
|
|
11
|
+
#
|
|
12
|
+
# If any of the input files are themselves archives, they will first
|
|
13
|
+
# be extracted, with only their contents winding up in the final
|
|
14
|
+
# directory (the file hierarchy of the archive will be preserved).
|
|
15
|
+
# If any of the input files are compressed, they will first be
|
|
16
|
+
# uncompressed before being added to the directory.
|
|
17
|
+
#
|
|
18
|
+
# Both local and remote files can be archived. An exmaple:
|
|
19
|
+
#
|
|
20
|
+
# archiver = IMW::Transforms::Archiver.new 'my_archive', '/path/to/my/regular_file.tsv', '/path/to/an/archive.tar.bz2', '/path/to/my_compressed_file.gz', 'http://mywebsite.com/index.html'
|
|
21
|
+
# archiver.package! '/path/to/my_archive.zip'
|
|
22
|
+
#
|
|
23
|
+
# This will create a ZIP archive at
|
|
24
|
+
# <tt>/path/to/my_archive.zip</tt>. When the ZIP archive is
|
|
25
|
+
# extracted its contents will look like
|
|
26
|
+
#
|
|
27
|
+
# my_archive
|
|
28
|
+
# |-- regular_file.tsv
|
|
29
|
+
# |-- archive_file1
|
|
30
|
+
# |-- archive_dir
|
|
31
|
+
# | |-- archive_file2
|
|
32
|
+
# | `-- archive_file3
|
|
33
|
+
# |-- archive_file3
|
|
34
|
+
# |-- my_compressed_file
|
|
35
|
+
# `-- index.html
|
|
36
|
+
#
|
|
37
|
+
# Notice that
|
|
38
|
+
#
|
|
39
|
+
# - the name of the extracted directory is given by the first
|
|
40
|
+
# argument to the Archiver when it was instantiated.
|
|
41
|
+
#
|
|
42
|
+
# - all files wind up in the top-level of this extracted directory
|
|
43
|
+
# when possible (<tt>regular_file.tsv</tt>, <tt>index.html</tt>)
|
|
44
|
+
#
|
|
45
|
+
# - /path/to/archive.tar.bz2 was not directly included, but its
|
|
46
|
+
# contents (<tt>archive_file1</tt>,
|
|
47
|
+
# <tt>archive_dir/archive_file2</tt>,
|
|
48
|
+
# <tt>archive_dir/archive_file3</tt>) were included instead.
|
|
49
|
+
#
|
|
50
|
+
# - /path/to/my_compressed_file.gz was first uncompressed before
|
|
51
|
+
# being added to the archive.
|
|
52
|
+
#
|
|
53
|
+
# - the remote file <tt>http://mywebsite.com/index.html</tt> was
|
|
54
|
+
# downloaded and included
|
|
55
|
+
#
|
|
56
|
+
# This process can take a while when the constituent files are
|
|
57
|
+
# large because there is quite a lot of preparation done to the
|
|
58
|
+
# files to make this nice output structure in the final archive.
|
|
59
|
+
# Further calls to <tt>package!</tt> on the same instance of
|
|
60
|
+
# Archiver will skip the preparation step (the intermediate
|
|
61
|
+
# results of which are sitting in IMW's temporary directory) and
|
|
62
|
+
# directly create the package, saving time when attempting to
|
|
63
|
+
# create multiple package formats from the same input data.
|
|
64
|
+
class Archiver
|
|
65
|
+
|
|
66
|
+
attr_accessor :name, :local_inputs, :remote_inputs
|
|
67
|
+
|
|
68
|
+
def initialize name, raw_inputs
|
|
69
|
+
@name = name
|
|
70
|
+
self.inputs = raw_inputs
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Set the inputs for this archiver.
|
|
74
|
+
#
|
|
75
|
+
# @param [String, IMW::Resource] new_inputs the inputs to archive, local or remote
|
|
76
|
+
def inputs= new_inputs
|
|
77
|
+
@local_inputs, @remote_inputs = [], []
|
|
78
|
+
new_inputs.each do |obj|
|
|
79
|
+
input = obj.is_a?(IMW::Resource) ? obj : IMW.open(obj) # take either paths/URIs or IMW::Resource objects
|
|
80
|
+
if input.is_local?
|
|
81
|
+
@local_inputs << (input.directory? ? input.resources : input) # recurse through directories
|
|
82
|
+
else
|
|
83
|
+
@remote_inputs << input
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
@local_inputs.flatten!
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Return a list of error messages for this archiver.
|
|
90
|
+
#
|
|
91
|
+
# @return [Array] the error messages
|
|
92
|
+
def errors
|
|
93
|
+
@errors ||= []
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Was this archiver successful (did it not have any errors)?
|
|
97
|
+
#
|
|
98
|
+
# @return [true, false]
|
|
99
|
+
def success?
|
|
100
|
+
errors.empty?
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# A temporary directory to work in. Its contents will
|
|
104
|
+
# ultimately consist of a directory named for the package
|
|
105
|
+
# containing all the input files.
|
|
106
|
+
#
|
|
107
|
+
# @return [String]
|
|
108
|
+
def tmp_dir
|
|
109
|
+
@tmp_dir ||= File.join(IMW.path_to(:tmp_root, 'packager'), (Time.now.to_i.to_s + "-" + $$.to_s)) # guaranteed unique on a node
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# A directory which will contain all the content being packaged,
|
|
113
|
+
# including the contents of any archives that were included in
|
|
114
|
+
# the list of files to process.
|
|
115
|
+
#
|
|
116
|
+
# @return [String]
|
|
117
|
+
def dir
|
|
118
|
+
@dir ||= File.join(tmp_dir, name.to_s)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Remove the +tmp_dir+ entirely, getting rid of all temporary
|
|
122
|
+
# files.
|
|
123
|
+
def clean!
|
|
124
|
+
FileUtils.rm_rf(tmp_dir)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Copy, decompress, or extract the input paths to the temporary
|
|
128
|
+
# directory, readying them for packaging.
|
|
129
|
+
def prepare!
|
|
130
|
+
FileUtils.mkdir_p dir unless File.exist?(dir)
|
|
131
|
+
|
|
132
|
+
local_inputs.each do |existing_file|
|
|
133
|
+
new_path = File.join(dir, existing_file.basename)
|
|
134
|
+
case
|
|
135
|
+
when existing_file.is_archive?
|
|
136
|
+
FileUtils.cd(dir) do
|
|
137
|
+
existing_file.extract
|
|
138
|
+
end
|
|
139
|
+
when existing_file.is_compressed?
|
|
140
|
+
existing_file.cp(new_path).decompress!
|
|
141
|
+
else
|
|
142
|
+
existing_file.cp(new_path)
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
remote_inputs.each do |remote_input|
|
|
147
|
+
remote_input.cp(File.join(dir, remote_input.effective_basename))
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Checks to see if all expected files exist in the temporary
|
|
152
|
+
# directory for this packager.
|
|
153
|
+
#
|
|
154
|
+
# @return [true, false]
|
|
155
|
+
def prepared?
|
|
156
|
+
local_inputs.each do |existing_file|
|
|
157
|
+
case
|
|
158
|
+
when existing_file.is_archive?
|
|
159
|
+
existing_file.contents.each do |archived_file_path|
|
|
160
|
+
return false unless File.exist?(File.join(dir, archived_file_path))
|
|
161
|
+
end
|
|
162
|
+
when existing_file.is_compressed?
|
|
163
|
+
return false unless File.exist?(File.join(dir, existing_file.decompressed_basename))
|
|
164
|
+
else
|
|
165
|
+
return false unless File.exist?(File.join(dir, existing_file.basename))
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
remote_inputs.each do |remote_input|
|
|
170
|
+
return false unless File.exist?(File.join(dir, remote_input.effective_basename))
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
true
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Package the contents of the temporary directory to an archive
|
|
177
|
+
# at +output+ but return exceptions instead of raising them.
|
|
178
|
+
#
|
|
179
|
+
# @param [String, IMW::Resource] output the path to the output package
|
|
180
|
+
# @param [Hash] options
|
|
181
|
+
# @return [RuntimeError, IMW::Resource] either the completed package or the error which was raised
|
|
182
|
+
def package output, options={}
|
|
183
|
+
begin
|
|
184
|
+
package! output, options={}
|
|
185
|
+
rescue => e
|
|
186
|
+
return e
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Package the contents of the temporary directory to an archive
|
|
191
|
+
# at +output+. The extension of +output+ determines the kind of
|
|
192
|
+
# archive.
|
|
193
|
+
#
|
|
194
|
+
# @param [String, IMW::Resource] output the path to the output package
|
|
195
|
+
# @param [Hash] options
|
|
196
|
+
# @return [IMW::Resource] the completed package
|
|
197
|
+
def package! output, options={}
|
|
198
|
+
prepare! unless prepared?
|
|
199
|
+
output = IMW.open(output)
|
|
200
|
+
FileUtils.mkdir_p(output.dirname) unless File.exist?(output.dirname)
|
|
201
|
+
output.rm! if output.exist?
|
|
202
|
+
FileUtils.cd(tmp_dir) { IMW.open(output.basename).create(*Dir["#{name}/**/*"]).mv(output.path) }
|
|
203
|
+
add_processing_error "Archiver: couldn't create archive #{output.path}" unless output.exists?
|
|
204
|
+
output
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
protected
|
|
208
|
+
def add_processing_error error # :nodoc:
|
|
209
|
+
IMW.logger.warn error
|
|
210
|
+
errors << error
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
module IMW
|
|
2
|
+
module Transforms
|
|
3
|
+
class Transferer
|
|
4
|
+
|
|
5
|
+
attr_accessor :action, :source, :destination
|
|
6
|
+
|
|
7
|
+
def initialize action, source, destination
|
|
8
|
+
@action = normalize_action(action)
|
|
9
|
+
@source = IMW.open(source)
|
|
10
|
+
@destination = IMW.open(destination)
|
|
11
|
+
raise IMW::PathError.new("Source and destination have the same URI: #{@source.uri}") if @source.uri.to_s == @destination.uri.to_s
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def transfer!
|
|
15
|
+
if source.is_local?
|
|
16
|
+
source.should_exist!("Cannot copy") # don't bother checking for remote resources
|
|
17
|
+
source_scheme = 'file' # make sure it isn't blank
|
|
18
|
+
else
|
|
19
|
+
source_scheme = source.scheme
|
|
20
|
+
end
|
|
21
|
+
destination_scheme = destination.is_local? ? 'file' : destination.scheme
|
|
22
|
+
method = "#{source_scheme}_to_#{destination_scheme}"
|
|
23
|
+
if respond_to?(method)
|
|
24
|
+
send(method)
|
|
25
|
+
else
|
|
26
|
+
raise IMW::NoMethodError.new("Do not know how to #{action} #{source.uri} => #{destination.uri} (#{source_scheme.inspect} => #{destination_scheme.inspect})")
|
|
27
|
+
end
|
|
28
|
+
destination.reopen
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
protected
|
|
32
|
+
|
|
33
|
+
def normalize_action action # :nodoc:
|
|
34
|
+
case action.to_sym
|
|
35
|
+
when :cp, :copy then :cp
|
|
36
|
+
when :mv, :move, :mv! then :mv
|
|
37
|
+
else raise IMW::ArgumentError.new("action (#{action}) must be one of `cp' (or `copy') or `mv' (or `move' or `mv!'")
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
#
|
|
42
|
+
# Purely local file
|
|
43
|
+
#
|
|
44
|
+
|
|
45
|
+
def file_to_file
|
|
46
|
+
FileUtils.send(action, source.path, destination.path)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
#
|
|
50
|
+
# HTTP
|
|
51
|
+
#
|
|
52
|
+
|
|
53
|
+
def http_to_file
|
|
54
|
+
File.open(destination.path, 'w') { |f| f.write(source.read) }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
#
|
|
58
|
+
# S3
|
|
59
|
+
#
|
|
60
|
+
|
|
61
|
+
def file_to_s3
|
|
62
|
+
IMW::Resources::Schemes::S3.put(source, destination)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def http_to_s3
|
|
66
|
+
IMW::Resources::Schemes::S3.put(source, destination)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def s3_to_file
|
|
70
|
+
IMW::Resources::Schemes::S3.get(source, destination)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def s3_to_s3
|
|
74
|
+
IMW::Resources::Schemes::S3.copy(source, destination)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
#
|
|
78
|
+
# HDFS
|
|
79
|
+
#
|
|
80
|
+
|
|
81
|
+
def hdfs_to_hdfs
|
|
82
|
+
IMW::Resources::Schemes::HDFS.fs(action, source.path, destination.path)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def file_to_hdfs
|
|
86
|
+
IMW::Resources::Schemes::HDFS.fs(:put, source.path, destination.path)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def hdfs_to_file
|
|
90
|
+
IMW::Resources::Schemes::HDFS.fs(:get, source.path, destination.path)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def s3_to_hdfs
|
|
94
|
+
IMW::Resources::Schemes::HDFS.fs(action, source.s3n_url, destination.path)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def hdfs_to_s3
|
|
98
|
+
IMW::Resources::Schemes::HDFS.fs(action, source.path, destination.s3n_url)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
data/lib/imw/utils/error.rb
CHANGED
|
@@ -1,54 +1,50 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/utils/error -- errors
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Error objects for IMW.
|
|
7
|
-
#
|
|
8
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
9
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
10
|
-
# License:: GPL 3.0
|
|
11
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
12
|
-
#
|
|
13
|
-
|
|
14
1
|
module IMW
|
|
15
2
|
|
|
16
|
-
#
|
|
17
|
-
|
|
18
|
-
end
|
|
3
|
+
# Base error class which all IMW errors subclass.
|
|
4
|
+
Error = Class.new(StandardError)
|
|
19
5
|
|
|
20
|
-
|
|
21
|
-
|
|
6
|
+
# Method undefined.
|
|
7
|
+
NoMethodError = Class.new(Error)
|
|
22
8
|
|
|
23
|
-
|
|
24
|
-
|
|
9
|
+
# Type error.
|
|
10
|
+
TypeError = Class.new(Error)
|
|
25
11
|
|
|
26
|
-
|
|
27
|
-
|
|
12
|
+
# Not implemented (typically because user needs to define a method
|
|
13
|
+
# when subclassing a base class).
|
|
14
|
+
NotImplementedError = Class.new(Error)
|
|
28
15
|
|
|
29
|
-
|
|
30
|
-
|
|
16
|
+
# Error during parsing.
|
|
17
|
+
ParseError = Class.new(Error)
|
|
18
|
+
|
|
19
|
+
# Error with a non-existing, invalid, or inaccessible path.
|
|
20
|
+
PathError = Class.new(Error)
|
|
21
|
+
|
|
22
|
+
# Error communicating with a remote entity.
|
|
23
|
+
NetworkError = Class.new(Error)
|
|
24
|
+
|
|
25
|
+
# Error communicating with a remote entity.
|
|
26
|
+
ArgumentError = Class.new(Error)
|
|
31
27
|
|
|
32
28
|
# An error meant to be used when a system call goes awry. It will
|
|
33
29
|
# report exit status and the process id of the offending call.
|
|
34
30
|
class SystemCallError < IMW::Error
|
|
35
31
|
|
|
36
|
-
|
|
32
|
+
attr_reader :status, :message
|
|
33
|
+
|
|
34
|
+
def initialize(status, message)
|
|
35
|
+
@status = status
|
|
37
36
|
@message = message
|
|
38
37
|
end
|
|
39
38
|
|
|
40
39
|
def display
|
|
41
|
-
"(error code: #{
|
|
40
|
+
"(error code: #{status.exitstatus}, pid: #{status.pid}) #{message}"
|
|
42
41
|
end
|
|
43
42
|
|
|
44
43
|
def to_s
|
|
45
|
-
"(error code: #{
|
|
44
|
+
"(error code: #{status.exitstatus}, pid: #{status.pid}) #{message}"
|
|
46
45
|
end
|
|
47
46
|
|
|
48
47
|
end
|
|
49
48
|
|
|
50
|
-
# A error for improperly specified, inappropriate, or broken paths.
|
|
51
|
-
class PathError < IMW::Error
|
|
52
|
-
end
|
|
53
49
|
|
|
54
50
|
end
|
|
@@ -1,15 +1,3 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/utils/extensions/array.rb -- array extensions
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Extensions to the +Array+ class.
|
|
7
|
-
#
|
|
8
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
9
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
10
|
-
# License:: GPL 3.0
|
|
11
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
12
|
-
#
|
|
13
1
|
require 'active_support/core_ext/array/extract_options'
|
|
14
2
|
class Array #:nodoc:
|
|
15
3
|
include ActiveSupport::CoreExtensions::Array::ExtractOptions
|
|
@@ -118,8 +106,10 @@ class Array
|
|
|
118
106
|
terminals.map! {|terminal| yield terminal } if block
|
|
119
107
|
terminals
|
|
120
108
|
end
|
|
121
|
-
|
|
122
109
|
|
|
123
|
-
|
|
110
|
+
# Dump the data in this array to the resource at the given +uri+.
|
|
111
|
+
def dump uri
|
|
112
|
+
IMW.open(uri, :mode => 'w').dump(self)
|
|
113
|
+
end
|
|
124
114
|
|
|
125
|
-
|
|
115
|
+
end
|
|
@@ -1,16 +1,3 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/utils/extensions/hash.rb -- hash extensions
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Extensions to the built-in +Hash+ class.
|
|
7
|
-
#
|
|
8
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
9
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
10
|
-
# License:: GPL 3.0
|
|
11
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
12
|
-
#
|
|
13
|
-
|
|
14
1
|
require 'active_support/core_ext/hash/reverse_merge'
|
|
15
2
|
|
|
16
3
|
class Hash
|
|
@@ -169,6 +156,7 @@ class Hash
|
|
|
169
156
|
# search(options.slice(:mass, :velocity, :time))
|
|
170
157
|
# Returns a new hash with only the given keys.
|
|
171
158
|
def slice(*keys)
|
|
159
|
+
require 'set'
|
|
172
160
|
allowed = Set.new(respond_to?(:convert_key) ? keys.map { |key| convert_key(key) } : keys)
|
|
173
161
|
reject { |key,| !allowed.include?(key) }
|
|
174
162
|
end
|
|
@@ -212,7 +200,9 @@ class Hash
|
|
|
212
200
|
terminals.map! {|terminal| yield terminal } if block
|
|
213
201
|
terminals
|
|
214
202
|
end
|
|
215
|
-
|
|
216
|
-
end
|
|
217
203
|
|
|
218
|
-
#
|
|
204
|
+
# Dump the data from this Hash into the given +uri+.
|
|
205
|
+
def dump uri
|
|
206
|
+
IMW.open(uri).dump(self)
|
|
207
|
+
end
|
|
208
|
+
end
|
|
@@ -1,17 +1,3 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/utils/extensions/hpricot.rb -- extensions to hpricot
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Some IMW extensions for Why's Hpricot library.
|
|
7
|
-
#
|
|
8
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
9
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
10
|
-
# License:: GPL 3.0
|
|
11
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
12
|
-
#
|
|
13
|
-
# puts "#{File.basename(__FILE__)}: Something clever" # at bottom
|
|
14
|
-
|
|
15
1
|
require 'hpricot'
|
|
16
2
|
|
|
17
3
|
module Hpricot::IMWExtensions
|
|
@@ -1,16 +1,3 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/utils/extensions/string.rb -- string extensions
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Implements some useful extensions to the +String+ class.
|
|
7
|
-
#
|
|
8
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
9
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
10
|
-
# License:: GPL 3.0
|
|
11
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
12
|
-
#
|
|
13
|
-
|
|
14
1
|
class String
|
|
15
2
|
|
|
16
3
|
# Does the string end with the specified +suffix+ (stolen from
|
|
@@ -44,6 +31,9 @@ class String
|
|
|
44
31
|
self.downcase.underscore.to_sym
|
|
45
32
|
end
|
|
46
33
|
|
|
47
|
-
|
|
34
|
+
# Dump this string into the given +uri+.
|
|
35
|
+
def dump uri
|
|
36
|
+
IMW.open(uri).dump(self)
|
|
37
|
+
end
|
|
48
38
|
|
|
49
|
-
|
|
39
|
+
end
|
|
@@ -1,14 +1,3 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/utils/extensions/symbol.rb -- extensions to symbol class
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
7
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
8
|
-
# License:: GPL 3.0
|
|
9
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
10
|
-
#
|
|
11
|
-
|
|
12
1
|
class Symbol
|
|
13
2
|
|
|
14
3
|
# Turn the symbol into a simple proc (stolen from
|
|
@@ -24,5 +13,3 @@ class Symbol
|
|
|
24
13
|
end
|
|
25
14
|
|
|
26
15
|
end
|
|
27
|
-
|
|
28
|
-
# puts "#{File.basename(__FILE__)}: You whisper a word of power and smile as the the Ruby Palace thunders with the sound of falling blocks." # at bottom
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
require 'imw/utils/extensions/string'
|
|
2
|
+
require 'imw/utils/extensions/array'
|
|
3
|
+
require 'imw/utils/extensions/hash'
|
|
4
|
+
require 'imw/utils/extensions/struct'
|
|
5
|
+
require 'imw/utils/extensions/symbol'
|
|
6
|
+
|
|
7
|
+
require 'active_support/core_ext/object/blank'
|
|
8
|
+
require 'active_support/core_ext/object/misc'
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
module IMW
|
|
12
|
+
# A replacement for the standard system call which raises an
|
|
13
|
+
# IMW::SystemCallError if the command fails which prints better
|
|
14
|
+
# debugging info.
|
|
15
|
+
#
|
|
16
|
+
# This function relies upon Kernel.system and obeys the same rules:
|
|
17
|
+
#
|
|
18
|
+
# - if +commands+ has only only a single element then no shell
|
|
19
|
+
# characters or spaces are escaped -- you have to do it yourself
|
|
20
|
+
# or you get to use shell characters, depending on your
|
|
21
|
+
# perspective.
|
|
22
|
+
#
|
|
23
|
+
# - if +commands+ is a list of elements then the second and further
|
|
24
|
+
# elements in the list have their shell characters and spaces
|
|
25
|
+
# escaped
|
|
26
|
+
#
|
|
27
|
+
# But it also has its own rules:
|
|
28
|
+
#
|
|
29
|
+
# - When one of the +commands+ is an empty or blank string,
|
|
30
|
+
# Kernel.system honors it and escapes it properly and sends it
|
|
31
|
+
# along for evaluation. This can be a problem for some programs
|
|
32
|
+
# and so IMW.system excludes blank (as in <tt>blank?</tt>)
|
|
33
|
+
# elements of +commands+.
|
|
34
|
+
#
|
|
35
|
+
# - +commands+ will be flattened (see the gotcha below)
|
|
36
|
+
#
|
|
37
|
+
# Calling out to the shell like this is often brittle. Imagine
|
|
38
|
+
# defining
|
|
39
|
+
#
|
|
40
|
+
# prog = 'some_prog'
|
|
41
|
+
# flags = '-v -f'
|
|
42
|
+
# args = 'file.txt'
|
|
43
|
+
#
|
|
44
|
+
# and later calling
|
|
45
|
+
#
|
|
46
|
+
# IMW.system prog, flags, args
|
|
47
|
+
#
|
|
48
|
+
# The space in the second argument ('-v -f') will be escaped and
|
|
49
|
+
# will therefore not be properly parsed by +some_prog+. Instead try
|
|
50
|
+
#
|
|
51
|
+
# prog = 'some_prog'
|
|
52
|
+
# flags = ['-v', '-f']
|
|
53
|
+
# args = ['file.txt']
|
|
54
|
+
#
|
|
55
|
+
# IMW.system prog, flags, *args
|
|
56
|
+
#
|
|
57
|
+
# which will work fine since +flags+ will automatically be flattend.
|
|
58
|
+
def self.system *commands
|
|
59
|
+
stripped_commands = commands.flatten.map { |command| command.to_s unless command.blank? }.compact
|
|
60
|
+
Kernel.system(*stripped_commands)
|
|
61
|
+
raise IMW::SystemCallError.new($?.dup, commands.join(' ')) unless $?.success?
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
|