imw 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +34 -14
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/imw.rb +9 -6
- data/lib/imw/{resources/archive.rb → archives.rb} +20 -10
- data/lib/imw/archives/rar.rb +19 -0
- data/lib/imw/archives/tar.rb +19 -0
- data/lib/imw/archives/tarbz2.rb +73 -0
- data/lib/imw/archives/targz.rb +73 -0
- data/lib/imw/archives/zip.rb +51 -0
- data/lib/imw/{resources/compressed_file.rb → compressed_files.rb} +16 -11
- data/lib/imw/compressed_files/bz2.rb +16 -0
- data/lib/imw/{resources → compressed_files}/compressible.rb +2 -4
- data/lib/imw/compressed_files/gz.rb +16 -0
- data/lib/imw/formats.rb +31 -0
- data/lib/imw/formats/delimited.rb +90 -0
- data/lib/imw/formats/excel.rb +125 -0
- data/lib/imw/formats/json.rb +51 -0
- data/lib/imw/formats/sgml.rb +69 -0
- data/lib/imw/formats/yaml.rb +51 -0
- data/lib/imw/resource.rb +108 -10
- data/lib/imw/schemes.rb +21 -0
- data/lib/imw/schemes/hdfs.rb +240 -0
- data/lib/imw/schemes/http.rb +166 -0
- data/lib/imw/schemes/local.rb +219 -0
- data/lib/imw/schemes/remote.rb +114 -0
- data/lib/imw/schemes/s3.rb +135 -0
- data/lib/imw/tools.rb +8 -0
- data/lib/imw/{transforms → tools}/archiver.rb +1 -1
- data/lib/imw/{transforms → tools}/transferer.rb +10 -10
- data/spec/imw/{resources/archive_spec.rb → archive_spec.rb} +3 -3
- data/spec/imw/{resources/archives_and_compressed → archives}/rar_spec.rb +2 -2
- data/spec/imw/{resources/archives_and_compressed → archives}/tar_spec.rb +2 -2
- data/spec/imw/{resources/archives_and_compressed → archives}/tarbz2_spec.rb +4 -4
- data/spec/imw/{resources/archives_and_compressed → archives}/targz_spec.rb +4 -4
- data/spec/imw/{resources/archives_and_compressed → archives}/zip_spec.rb +2 -2
- data/spec/imw/compressed_files/bz2_spec.rb +15 -0
- data/spec/imw/{resources → compressed_files}/compressible_spec.rb +1 -1
- data/spec/imw/compressed_files/gz_spec.rb +15 -0
- data/spec/imw/{resources/compressed_file_spec.rb → compressed_files_spec.rb} +3 -3
- data/spec/imw/{resources/formats → formats}/delimited_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/json_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/sgml_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/yaml_spec.rb +2 -2
- data/spec/imw/resource_spec.rb +4 -4
- data/spec/imw/{resources/schemes → schemes}/hdfs_spec.rb +7 -7
- data/spec/imw/{resources/schemes → schemes}/http_spec.rb +2 -2
- data/spec/imw/{resources → schemes}/local_spec.rb +5 -5
- data/spec/imw/{resources → schemes}/remote_spec.rb +7 -3
- data/spec/imw/{resources/schemes → schemes}/s3_spec.rb +2 -2
- data/spec/imw/{transforms → tools}/archiver_spec.rb +2 -2
- data/spec/imw/tools/transferer_spec.rb +113 -0
- metadata +69 -71
- data/lib/imw/resources.rb +0 -118
- data/lib/imw/resources/archives_and_compressed.rb +0 -32
- data/lib/imw/resources/archives_and_compressed/bz2.rb +0 -18
- data/lib/imw/resources/archives_and_compressed/gz.rb +0 -18
- data/lib/imw/resources/archives_and_compressed/rar.rb +0 -23
- data/lib/imw/resources/archives_and_compressed/tar.rb +0 -23
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +0 -78
- data/lib/imw/resources/archives_and_compressed/targz.rb +0 -78
- data/lib/imw/resources/archives_and_compressed/zip.rb +0 -57
- data/lib/imw/resources/formats.rb +0 -32
- data/lib/imw/resources/formats/delimited.rb +0 -92
- data/lib/imw/resources/formats/excel.rb +0 -125
- data/lib/imw/resources/formats/json.rb +0 -53
- data/lib/imw/resources/formats/sgml.rb +0 -72
- data/lib/imw/resources/formats/yaml.rb +0 -53
- data/lib/imw/resources/local.rb +0 -198
- data/lib/imw/resources/remote.rb +0 -110
- data/lib/imw/resources/schemes.rb +0 -19
- data/lib/imw/resources/schemes/hdfs.rb +0 -242
- data/lib/imw/resources/schemes/http.rb +0 -161
- data/lib/imw/resources/schemes/s3.rb +0 -137
- data/lib/imw/transforms.rb +0 -8
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +0 -15
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +0 -15
- data/spec/imw/transforms/transferer_spec.rb +0 -113
@@ -1,53 +0,0 @@
|
|
1
|
-
module IMW
|
2
|
-
module Resources
|
3
|
-
module Formats
|
4
|
-
|
5
|
-
# Defines methods for reading and writing JSON data.
|
6
|
-
module Json
|
7
|
-
|
8
|
-
# Return the content of this resource.
|
9
|
-
#
|
10
|
-
# Will try to be smart about iterating over the data when
|
11
|
-
# passed a block.
|
12
|
-
#
|
13
|
-
# - if the outermost JSON data structure is an array, then
|
14
|
-
# yield each element
|
15
|
-
#
|
16
|
-
# - if the outermost JSON data structure is a mapping, then
|
17
|
-
# yield each key, value pair
|
18
|
-
#
|
19
|
-
# - otherwise just yield the structure
|
20
|
-
#
|
21
|
-
# @return [Hash, Array, String, Fixnum] whatever the JSON contained
|
22
|
-
def load &block
|
23
|
-
require 'json'
|
24
|
-
json = JSON.parse(read)
|
25
|
-
if block_given?
|
26
|
-
case json
|
27
|
-
when Array
|
28
|
-
json.each { |obj| yield obj }
|
29
|
-
when Hash
|
30
|
-
json.each_pair { |key, value| yield key, value }
|
31
|
-
else
|
32
|
-
yield json
|
33
|
-
end
|
34
|
-
else
|
35
|
-
json
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
# Dump the +data+ into this resource. It must be opened for
|
40
|
-
# writing.
|
41
|
-
#
|
42
|
-
# @param [Hash, String, Array, Fixnum] data the Ruby object to dump
|
43
|
-
# @option options [true, false] :persist (false) Don't close the IO object after writing
|
44
|
-
def dump data, options={}
|
45
|
-
require 'json'
|
46
|
-
write(data.to_json)
|
47
|
-
io.close unless options[:persist]
|
48
|
-
self
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
@@ -1,72 +0,0 @@
|
|
1
|
-
module IMW
|
2
|
-
module Resources
|
3
|
-
module Formats
|
4
|
-
|
5
|
-
# Defines methods to parse SGML-derived data formats (XML, HTML,
|
6
|
-
# &c.). This module isn't directly used to extend resources.
|
7
|
-
# Instead, more specific modules (e.g. -
|
8
|
-
# IMW::Resources::Formats::Xml) are used.
|
9
|
-
module Sgml
|
10
|
-
|
11
|
-
# Parse this resource using Hpricot and return (or yield if
|
12
|
-
# given a block) the resulting Hpricot::Doc.
|
13
|
-
#
|
14
|
-
# @return [Hpricot::Doc]
|
15
|
-
# @yield [Hpricot::Doc]
|
16
|
-
def load &block
|
17
|
-
require 'hpricot'
|
18
|
-
sgml = Hpricot(io)
|
19
|
-
if block_given?
|
20
|
-
yield sgml
|
21
|
-
else
|
22
|
-
sgml
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
# Parse the Hpricot::Doc of this resource with the given
|
27
|
-
# +parser+.
|
28
|
-
#
|
29
|
-
# The parser can either be an IMW::Parsers::HtmlParser or a
|
30
|
-
# hash which will be used to build such a parser. See the
|
31
|
-
# documentation for IMW::Parsers::HtmlParser for more
|
32
|
-
# information.
|
33
|
-
#
|
34
|
-
# @param [Hash, IMW::Parsers::HtmlParser] parser
|
35
|
-
# @return [Hash] the parser's output
|
36
|
-
def parse parser
|
37
|
-
if parser.is_a?(IMW::Parsers::HtmlParser)
|
38
|
-
parser.parse(load)
|
39
|
-
else
|
40
|
-
IMW::Parsers::HtmlParser.new(parser).parse(load)
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
# Defines methods for XML data.
|
46
|
-
module Xml
|
47
|
-
include Sgml
|
48
|
-
end
|
49
|
-
|
50
|
-
# Defines methods for XSL data.
|
51
|
-
module Xsl
|
52
|
-
include Sgml
|
53
|
-
end
|
54
|
-
|
55
|
-
# Defines methods for XHTML data.
|
56
|
-
module Xhtml
|
57
|
-
include Sgml
|
58
|
-
end
|
59
|
-
|
60
|
-
# Defines methods for HTML data.
|
61
|
-
module Html
|
62
|
-
include Sgml
|
63
|
-
end
|
64
|
-
|
65
|
-
# Defines methods for RDF data.
|
66
|
-
module Rdf
|
67
|
-
include Sgml
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
@@ -1,53 +0,0 @@
|
|
1
|
-
module IMW
|
2
|
-
module Resources
|
3
|
-
module Formats
|
4
|
-
|
5
|
-
# Provides methods for reading and writing YAML data.
|
6
|
-
module Yaml
|
7
|
-
|
8
|
-
# Return the content of this resource.
|
9
|
-
#
|
10
|
-
# Will try to be smart about iterating over the data when
|
11
|
-
# passed a block.
|
12
|
-
#
|
13
|
-
# - if the outermost YAML data structure is an array, then
|
14
|
-
# yield each element
|
15
|
-
#
|
16
|
-
# - if the outermost YAML data structure is a mapping, then
|
17
|
-
# yield each key, value pair
|
18
|
-
#
|
19
|
-
# - otherwise just yield the structure
|
20
|
-
#
|
21
|
-
# @return [Hash, Array, String, Fixnum] whatever the YAML contained
|
22
|
-
def load &block
|
23
|
-
require 'yaml'
|
24
|
-
yaml = YAML.load(read)
|
25
|
-
if block_given?
|
26
|
-
case yaml
|
27
|
-
when Array
|
28
|
-
yaml.each { |obj| yield obj }
|
29
|
-
when Hash
|
30
|
-
yaml.each_pair { |key, value| yield key, value }
|
31
|
-
else
|
32
|
-
yield yaml
|
33
|
-
end
|
34
|
-
else
|
35
|
-
yaml
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
# Dump the +data+ into this resource. It must be opened for
|
40
|
-
# writing.
|
41
|
-
#
|
42
|
-
# @param [Hash, String, Array, Fixnum] data the Ruby object to dump
|
43
|
-
# @option options [true, false] :persist (false) Don't close the IO object after writing
|
44
|
-
def dump data, options={}
|
45
|
-
require 'yaml'
|
46
|
-
write(data.to_yaml)
|
47
|
-
io.close unless options[:persist]
|
48
|
-
self
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
data/lib/imw/resources/local.rb
DELETED
@@ -1,198 +0,0 @@
|
|
1
|
-
module IMW
|
2
|
-
module Resources
|
3
|
-
|
4
|
-
# Defines methods appropriate for any file (or directory) on the
|
5
|
-
# local machine. Includes methods from the File class like
|
6
|
-
# File#exist?, File#size, &c.
|
7
|
-
#
|
8
|
-
# When extending with this module, it will automatically also
|
9
|
-
# extend with either IMW::Resources::LocalDirectory or
|
10
|
-
# IMW::Resources::LocalFile, as appropriate.
|
11
|
-
module LocalObj
|
12
|
-
|
13
|
-
def self.extended obj
|
14
|
-
# also extend with file or directory as appropriate
|
15
|
-
obj.extend(obj.directory? ? LocalDirectory : LocalFile)
|
16
|
-
end
|
17
|
-
|
18
|
-
# Steal a bunch of class methods from File which only take a
|
19
|
-
# path as a first argument.
|
20
|
-
[:executable?, :executable_real?, :exist?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
|
21
|
-
define_method class_method do
|
22
|
-
File.send(class_method, path)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
alias_method :exists?, :exist?
|
26
|
-
|
27
|
-
# Return the path to this local object.
|
28
|
-
#
|
29
|
-
# @return [String]
|
30
|
-
def path
|
31
|
-
@path ||= File.expand_path(@encoded_uri ? Addressable::URI.decode(uri.to_s) : uri.to_s)
|
32
|
-
end
|
33
|
-
|
34
|
-
# Is this file on the local machine?
|
35
|
-
#
|
36
|
-
# @return [true, false]
|
37
|
-
def is_local?
|
38
|
-
true
|
39
|
-
end
|
40
|
-
|
41
|
-
# Copy this resource to the +new_uri+.
|
42
|
-
#
|
43
|
-
# @param [String, IMW::Resource] new_uri
|
44
|
-
# @return [IMW::Resource] the new resource
|
45
|
-
def cp new_uri
|
46
|
-
IMW::Transforms::Transferer.new(:cp, self, new_uri).transfer!
|
47
|
-
end
|
48
|
-
|
49
|
-
# Move this resource to the +new_uri+.
|
50
|
-
#
|
51
|
-
# @param [String, IMW::Resource] new_uri
|
52
|
-
# @return [IMW::Resource] the new resource
|
53
|
-
def mv new_uri
|
54
|
-
IMW::Transforms::Transferer.new(:mv, self, new_uri).transfer!
|
55
|
-
end
|
56
|
-
|
57
|
-
end
|
58
|
-
|
59
|
-
# Defines methods for appropriate for a local file.
|
60
|
-
module LocalFile
|
61
|
-
|
62
|
-
# Delete this resource.
|
63
|
-
def rm
|
64
|
-
should_exist!("Cannot delete")
|
65
|
-
FileUtils.rm path
|
66
|
-
self
|
67
|
-
end
|
68
|
-
alias_method :rm!, :rm
|
69
|
-
|
70
|
-
# Return the IO object at this path.
|
71
|
-
#
|
72
|
-
# @return [File]
|
73
|
-
def io
|
74
|
-
@io ||= open(path, mode)
|
75
|
-
end
|
76
|
-
|
77
|
-
# Read from this file.
|
78
|
-
#
|
79
|
-
# @param [Fixnum] length bytes to read
|
80
|
-
# @return [String]
|
81
|
-
def read length=nil
|
82
|
-
io.read(length)
|
83
|
-
end
|
84
|
-
|
85
|
-
# Write to this file
|
86
|
-
#
|
87
|
-
# @param [String, #to_s] text text to write
|
88
|
-
# @return [Fixnum] bytes written
|
89
|
-
def write text
|
90
|
-
io.write text
|
91
|
-
end
|
92
|
-
|
93
|
-
# Return the lines in this file.
|
94
|
-
#
|
95
|
-
# If passed a block, yield each line of the file to the block.
|
96
|
-
#
|
97
|
-
# @yield [String] each line of the file
|
98
|
-
# @return [Array] the lines in the file
|
99
|
-
def load &block
|
100
|
-
if block_given?
|
101
|
-
io.each do |line|
|
102
|
-
yield line
|
103
|
-
end
|
104
|
-
else
|
105
|
-
read.split("\n")
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
# Map over the lines in this file.
|
110
|
-
#
|
111
|
-
# @yield [String] each line of the file
|
112
|
-
def map &block
|
113
|
-
io.map(&block)
|
114
|
-
end
|
115
|
-
|
116
|
-
# Dump +data+ into this file.
|
117
|
-
#
|
118
|
-
# @param [String, Array, #each] data object to dump
|
119
|
-
# @option options [true, false] :persist (false) Don't close the file after writing
|
120
|
-
def dump data, options={}
|
121
|
-
data.each do |element| # works if data is an Array or a String
|
122
|
-
io.puts(element.to_s)
|
123
|
-
end
|
124
|
-
io.close unless options[:persist]
|
125
|
-
end
|
126
|
-
|
127
|
-
end
|
128
|
-
|
129
|
-
|
130
|
-
module LocalDirectory
|
131
|
-
|
132
|
-
# Delete this directory.
|
133
|
-
#
|
134
|
-
# @return [IMW::Resource] the deleted directory
|
135
|
-
def rmdir
|
136
|
-
FileUtils.rmdir path
|
137
|
-
self
|
138
|
-
end
|
139
|
-
|
140
|
-
# Delete this directory recursively.
|
141
|
-
#
|
142
|
-
# @return [IMW::Resource] the deleted directory
|
143
|
-
def rm_rf
|
144
|
-
FileUtils.rm_rf path
|
145
|
-
self
|
146
|
-
end
|
147
|
-
|
148
|
-
# Return a list of paths relative to this directory which match
|
149
|
-
# the +selector+. Works just like Dir[].
|
150
|
-
#
|
151
|
-
# @param [String] selector
|
152
|
-
# @return [Array] the matched paths
|
153
|
-
def [] selector='*'
|
154
|
-
Dir[File.join(path, selector)]
|
155
|
-
end
|
156
|
-
|
157
|
-
# Return a list of all paths directly within this directory.
|
158
|
-
#
|
159
|
-
# @return [Array]
|
160
|
-
def contents
|
161
|
-
self['*']
|
162
|
-
end
|
163
|
-
|
164
|
-
# Does this directory contain +obj+?
|
165
|
-
#
|
166
|
-
# @param [String, IMW::Resource] obj
|
167
|
-
# @return [true, false]
|
168
|
-
def contains? obj
|
169
|
-
require 'find'
|
170
|
-
obj_path = obj.is_a?(String) ? obj : obj.path
|
171
|
-
Find.find(path) do |sub_path|
|
172
|
-
return true if sub_path.ends_with?(obj_path)
|
173
|
-
end
|
174
|
-
false
|
175
|
-
end
|
176
|
-
|
177
|
-
# Return all paths within this directory, recursively.
|
178
|
-
#
|
179
|
-
# @return [Array<String>]
|
180
|
-
def all_contents
|
181
|
-
self['**/*']
|
182
|
-
end
|
183
|
-
|
184
|
-
# Return all resources within this directory, i.e. - all paths
|
185
|
-
# converted to IMW::Resource objects.
|
186
|
-
#
|
187
|
-
# @return [Array<IMW::Resource>]
|
188
|
-
def resources
|
189
|
-
all_contents.map do |path|
|
190
|
-
IMW.open(path) unless File.directory?(path)
|
191
|
-
end.compact
|
192
|
-
end
|
193
|
-
|
194
|
-
end
|
195
|
-
end
|
196
|
-
end
|
197
|
-
|
198
|
-
|
data/lib/imw/resources/remote.rb
DELETED
@@ -1,110 +0,0 @@
|
|
1
|
-
module IMW
|
2
|
-
module Resources
|
3
|
-
|
4
|
-
# Defines methods appropriate for accessing a remote resource, no
|
5
|
-
# matter what the protocol.
|
6
|
-
module RemoteObj
|
7
|
-
|
8
|
-
#
|
9
|
-
# TODO -- self.extended should extend by RemoteDirectory when appropriate
|
10
|
-
#
|
11
|
-
|
12
|
-
def self.extended obj
|
13
|
-
obj.extend(RemoteFile)
|
14
|
-
end
|
15
|
-
|
16
|
-
# Is this resource on a remote host?
|
17
|
-
#
|
18
|
-
# @return [true,false]
|
19
|
-
def is_remote?
|
20
|
-
true
|
21
|
-
end
|
22
|
-
|
23
|
-
# The host of this resource.
|
24
|
-
#
|
25
|
-
# @return [String]
|
26
|
-
def host
|
27
|
-
@host ||= uri.host
|
28
|
-
end
|
29
|
-
|
30
|
-
# Return the query string part of this resource's URI. Will
|
31
|
-
# likely be +nil+ for local resources.
|
32
|
-
#
|
33
|
-
# @return [String]
|
34
|
-
def query_string
|
35
|
-
@query_string ||= uri.query
|
36
|
-
end
|
37
|
-
|
38
|
-
# Return the fragment part of this resource's URI. Will likely be
|
39
|
-
# +nil+ for local resources.
|
40
|
-
#
|
41
|
-
# @return [String]
|
42
|
-
def fragment
|
43
|
-
@fragment ||= uri.fragment
|
44
|
-
end
|
45
|
-
|
46
|
-
# Return the path part of this resource's URI. Will _not_
|
47
|
-
# include the +query_string+ or +fragment+.
|
48
|
-
#
|
49
|
-
# @return [String]
|
50
|
-
def path
|
51
|
-
@path ||= uri.path
|
52
|
-
end
|
53
|
-
|
54
|
-
end
|
55
|
-
|
56
|
-
module RemoteFile
|
57
|
-
|
58
|
-
# Return the IO object for this remote file.
|
59
|
-
#
|
60
|
-
# The mode of this resource is ignored.
|
61
|
-
#
|
62
|
-
# @return [StringIO]
|
63
|
-
def io
|
64
|
-
require 'open-uri'
|
65
|
-
@io ||= open(uri.to_s) # ignore mode
|
66
|
-
end
|
67
|
-
|
68
|
-
# Read the contents of this remote file.
|
69
|
-
#
|
70
|
-
# @return [String]
|
71
|
-
def read
|
72
|
-
io.read
|
73
|
-
end
|
74
|
-
|
75
|
-
# Return the lines of this remote file.
|
76
|
-
#
|
77
|
-
# If passed a block then yield each line to the block.
|
78
|
-
#
|
79
|
-
# @return [Array] the lines of this remote file
|
80
|
-
# @yield [String] each line of this remote file
|
81
|
-
def load &block
|
82
|
-
if block_given?
|
83
|
-
io.each do |line|
|
84
|
-
yield line
|
85
|
-
end
|
86
|
-
else
|
87
|
-
read.split("\n")
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
# Map over the lines in this remote file.
|
92
|
-
#
|
93
|
-
# @yield [String] each line of the file
|
94
|
-
def map &block
|
95
|
-
io.map(&block)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
|
100
|
-
module RemoteDirectory
|
101
|
-
|
102
|
-
#
|
103
|
-
# TODO -- bloody everything
|
104
|
-
#
|
105
|
-
|
106
|
-
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|