imw 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +34 -14
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/imw.rb +9 -6
- data/lib/imw/{resources/archive.rb → archives.rb} +20 -10
- data/lib/imw/archives/rar.rb +19 -0
- data/lib/imw/archives/tar.rb +19 -0
- data/lib/imw/archives/tarbz2.rb +73 -0
- data/lib/imw/archives/targz.rb +73 -0
- data/lib/imw/archives/zip.rb +51 -0
- data/lib/imw/{resources/compressed_file.rb → compressed_files.rb} +16 -11
- data/lib/imw/compressed_files/bz2.rb +16 -0
- data/lib/imw/{resources → compressed_files}/compressible.rb +2 -4
- data/lib/imw/compressed_files/gz.rb +16 -0
- data/lib/imw/formats.rb +31 -0
- data/lib/imw/formats/delimited.rb +90 -0
- data/lib/imw/formats/excel.rb +125 -0
- data/lib/imw/formats/json.rb +51 -0
- data/lib/imw/formats/sgml.rb +69 -0
- data/lib/imw/formats/yaml.rb +51 -0
- data/lib/imw/resource.rb +108 -10
- data/lib/imw/schemes.rb +21 -0
- data/lib/imw/schemes/hdfs.rb +240 -0
- data/lib/imw/schemes/http.rb +166 -0
- data/lib/imw/schemes/local.rb +219 -0
- data/lib/imw/schemes/remote.rb +114 -0
- data/lib/imw/schemes/s3.rb +135 -0
- data/lib/imw/tools.rb +8 -0
- data/lib/imw/{transforms → tools}/archiver.rb +1 -1
- data/lib/imw/{transforms → tools}/transferer.rb +10 -10
- data/spec/imw/{resources/archive_spec.rb → archive_spec.rb} +3 -3
- data/spec/imw/{resources/archives_and_compressed → archives}/rar_spec.rb +2 -2
- data/spec/imw/{resources/archives_and_compressed → archives}/tar_spec.rb +2 -2
- data/spec/imw/{resources/archives_and_compressed → archives}/tarbz2_spec.rb +4 -4
- data/spec/imw/{resources/archives_and_compressed → archives}/targz_spec.rb +4 -4
- data/spec/imw/{resources/archives_and_compressed → archives}/zip_spec.rb +2 -2
- data/spec/imw/compressed_files/bz2_spec.rb +15 -0
- data/spec/imw/{resources → compressed_files}/compressible_spec.rb +1 -1
- data/spec/imw/compressed_files/gz_spec.rb +15 -0
- data/spec/imw/{resources/compressed_file_spec.rb → compressed_files_spec.rb} +3 -3
- data/spec/imw/{resources/formats → formats}/delimited_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/json_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/sgml_spec.rb +2 -2
- data/spec/imw/{resources/formats → formats}/yaml_spec.rb +2 -2
- data/spec/imw/resource_spec.rb +4 -4
- data/spec/imw/{resources/schemes → schemes}/hdfs_spec.rb +7 -7
- data/spec/imw/{resources/schemes → schemes}/http_spec.rb +2 -2
- data/spec/imw/{resources → schemes}/local_spec.rb +5 -5
- data/spec/imw/{resources → schemes}/remote_spec.rb +7 -3
- data/spec/imw/{resources/schemes → schemes}/s3_spec.rb +2 -2
- data/spec/imw/{transforms → tools}/archiver_spec.rb +2 -2
- data/spec/imw/tools/transferer_spec.rb +113 -0
- metadata +69 -71
- data/lib/imw/resources.rb +0 -118
- data/lib/imw/resources/archives_and_compressed.rb +0 -32
- data/lib/imw/resources/archives_and_compressed/bz2.rb +0 -18
- data/lib/imw/resources/archives_and_compressed/gz.rb +0 -18
- data/lib/imw/resources/archives_and_compressed/rar.rb +0 -23
- data/lib/imw/resources/archives_and_compressed/tar.rb +0 -23
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +0 -78
- data/lib/imw/resources/archives_and_compressed/targz.rb +0 -78
- data/lib/imw/resources/archives_and_compressed/zip.rb +0 -57
- data/lib/imw/resources/formats.rb +0 -32
- data/lib/imw/resources/formats/delimited.rb +0 -92
- data/lib/imw/resources/formats/excel.rb +0 -125
- data/lib/imw/resources/formats/json.rb +0 -53
- data/lib/imw/resources/formats/sgml.rb +0 -72
- data/lib/imw/resources/formats/yaml.rb +0 -53
- data/lib/imw/resources/local.rb +0 -198
- data/lib/imw/resources/remote.rb +0 -110
- data/lib/imw/resources/schemes.rb +0 -19
- data/lib/imw/resources/schemes/hdfs.rb +0 -242
- data/lib/imw/resources/schemes/http.rb +0 -161
- data/lib/imw/resources/schemes/s3.rb +0 -137
- data/lib/imw/transforms.rb +0 -8
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +0 -15
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +0 -15
- data/spec/imw/transforms/transferer_spec.rb +0 -113
@@ -0,0 +1,219 @@
|
|
1
|
+
module IMW
|
2
|
+
module Schemes
|
3
|
+
module Local
|
4
|
+
|
5
|
+
# Defines methods appropriate for any file (or directory) on the
|
6
|
+
# local machine. Includes methods from the File class like
|
7
|
+
# File#exist?, File#size, &c.
|
8
|
+
#
|
9
|
+
# When extending with this module, it will automatically also
|
10
|
+
# extend with either IMW::Schemes::Local::LocalDirectory or
|
11
|
+
# IMW::Schemes::Local::LocalFile, as appropriate.
|
12
|
+
module Base
|
13
|
+
|
14
|
+
def self.extended obj
|
15
|
+
# also extend with file or directory as appropriate
|
16
|
+
if obj.directory?
|
17
|
+
obj.extend(LocalDirectory)
|
18
|
+
else
|
19
|
+
obj.extend(LocalFile)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Steal a bunch of class methods from File which only take a
|
24
|
+
# path as a first argument.
|
25
|
+
[:executable?, :executable_real?, :exist?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
|
26
|
+
define_method class_method do
|
27
|
+
File.send(class_method, path)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
alias_method :exists?, :exist?
|
31
|
+
|
32
|
+
# Return the path to this local object.
|
33
|
+
#
|
34
|
+
# @return [String]
|
35
|
+
def path
|
36
|
+
@path ||= File.expand_path(@encoded_uri ? Addressable::URI.decode(uri.to_s) : uri.to_s)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Is this file on the local machine?
|
40
|
+
#
|
41
|
+
# @return [true, false]
|
42
|
+
def is_local?
|
43
|
+
true
|
44
|
+
end
|
45
|
+
|
46
|
+
# Copy this resource to the +new_uri+.
|
47
|
+
#
|
48
|
+
# @param [String, IMW::Resource] new_uri
|
49
|
+
# @return [IMW::Resource] the new resource
|
50
|
+
def cp new_uri
|
51
|
+
IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
|
52
|
+
end
|
53
|
+
|
54
|
+
# Move this resource to the +new_uri+.
|
55
|
+
#
|
56
|
+
# @param [String, IMW::Resource] new_uri
|
57
|
+
# @return [IMW::Resource] the new resource
|
58
|
+
def mv new_uri
|
59
|
+
IMW::Tools::Transferer.new(:mv, self, new_uri).transfer!
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
# Defines methods for appropriate for a local file.
|
65
|
+
module LocalFile
|
66
|
+
|
67
|
+
# Is this resource a regular file?
|
68
|
+
#
|
69
|
+
# @return [true, false]
|
70
|
+
def is_file?
|
71
|
+
true
|
72
|
+
end
|
73
|
+
|
74
|
+
# Delete this resource.
|
75
|
+
def rm
|
76
|
+
should_exist!("Cannot delete")
|
77
|
+
FileUtils.rm path
|
78
|
+
self
|
79
|
+
end
|
80
|
+
alias_method :rm!, :rm
|
81
|
+
|
82
|
+
# Return the IO object at this path.
|
83
|
+
#
|
84
|
+
# @return [File]
|
85
|
+
def io
|
86
|
+
@io ||= open(path, mode)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Read from this file.
|
90
|
+
#
|
91
|
+
# @param [Fixnum] length bytes to read
|
92
|
+
# @return [String]
|
93
|
+
def read length=nil
|
94
|
+
io.read(length)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Write to this file
|
98
|
+
#
|
99
|
+
# @param [String, #to_s] text text to write
|
100
|
+
# @return [Fixnum] bytes written
|
101
|
+
def write text
|
102
|
+
io.write text
|
103
|
+
end
|
104
|
+
|
105
|
+
# Return the lines in this file.
|
106
|
+
#
|
107
|
+
# If passed a block, yield each line of the file to the block.
|
108
|
+
#
|
109
|
+
# @yield [String] each line of the file
|
110
|
+
# @return [Array] the lines in the file
|
111
|
+
def load &block
|
112
|
+
if block_given?
|
113
|
+
io.each do |line|
|
114
|
+
yield line
|
115
|
+
end
|
116
|
+
else
|
117
|
+
read.split("\n")
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# Map over the lines in this file.
|
122
|
+
#
|
123
|
+
# @yield [String] each line of the file
|
124
|
+
def map &block
|
125
|
+
io.map(&block)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Dump +data+ into this file.
|
129
|
+
#
|
130
|
+
# @param [String, Array, #each] data object to dump
|
131
|
+
# @option options [true, false] :persist (false) Don't close the file after writing
|
132
|
+
def dump data, options={}
|
133
|
+
data.each do |element| # works if data is an Array or a String
|
134
|
+
io.puts(element.to_s)
|
135
|
+
end
|
136
|
+
io.close unless options[:persist]
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Defines methods for manipulating the contents of a local
|
141
|
+
# directory.
|
142
|
+
module LocalDirectory
|
143
|
+
|
144
|
+
# Is this resource a directory?
|
145
|
+
#
|
146
|
+
# @return [true, false]
|
147
|
+
def is_directory?
|
148
|
+
true
|
149
|
+
end
|
150
|
+
|
151
|
+
# Delete this directory.
|
152
|
+
#
|
153
|
+
# @return [IMW::Resource] the deleted directory
|
154
|
+
def rmdir
|
155
|
+
FileUtils.rmdir path
|
156
|
+
self
|
157
|
+
end
|
158
|
+
|
159
|
+
# Delete this directory recursively.
|
160
|
+
#
|
161
|
+
# @return [IMW::Resource] the deleted directory
|
162
|
+
def rm_rf
|
163
|
+
FileUtils.rm_rf path
|
164
|
+
self
|
165
|
+
end
|
166
|
+
|
167
|
+
# Return a list of paths relative to this directory which match
|
168
|
+
# the +selector+. Works just like Dir[].
|
169
|
+
#
|
170
|
+
# @param [String] selector
|
171
|
+
# @return [Array] the matched paths
|
172
|
+
def [] selector='*'
|
173
|
+
Dir[File.join(path, selector)]
|
174
|
+
end
|
175
|
+
|
176
|
+
# Return a list of all paths directly within this directory.
|
177
|
+
#
|
178
|
+
# @return [Array]
|
179
|
+
def contents
|
180
|
+
self['*']
|
181
|
+
end
|
182
|
+
|
183
|
+
# Does this directory contain +obj+?
|
184
|
+
#
|
185
|
+
# @param [String, IMW::Resource] obj
|
186
|
+
# @return [true, false]
|
187
|
+
def contains? obj
|
188
|
+
require 'find'
|
189
|
+
obj_path = obj.is_a?(String) ? obj : obj.path
|
190
|
+
Find.find(path) do |sub_path|
|
191
|
+
return true if sub_path.ends_with?(obj_path)
|
192
|
+
end
|
193
|
+
false
|
194
|
+
end
|
195
|
+
|
196
|
+
# Return all paths within this directory, recursively.
|
197
|
+
#
|
198
|
+
# @return [Array<String>]
|
199
|
+
def all_contents
|
200
|
+
self['**/*']
|
201
|
+
end
|
202
|
+
|
203
|
+
# Return all resources within this directory, i.e. - all paths
|
204
|
+
# converted to IMW::Resource objects.
|
205
|
+
#
|
206
|
+
# @return [Array<IMW::Resource>]
|
207
|
+
def resources
|
208
|
+
all_contents.map do |path|
|
209
|
+
IMW.open(path) unless File.directory?(path)
|
210
|
+
end.compact
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
|
219
|
+
|
@@ -0,0 +1,114 @@
|
|
1
|
+
module IMW
|
2
|
+
module Schemes
|
3
|
+
|
4
|
+
# Contains modules which define methods appropriate for remote
|
5
|
+
# resources, no matter the protocol.
|
6
|
+
module Remote
|
7
|
+
|
8
|
+
# Defines methods appropriate for accessing a remote resource,
|
9
|
+
# no matter the protocol.
|
10
|
+
module Base
|
11
|
+
|
12
|
+
#
|
13
|
+
# TODO -- self.extended should extend by RemoteDirectory when appropriate
|
14
|
+
#
|
15
|
+
|
16
|
+
def self.extended obj
|
17
|
+
obj.extend(RemoteFile)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Is this resource on a remote host?
|
21
|
+
#
|
22
|
+
# @return [true,false]
|
23
|
+
def is_remote?
|
24
|
+
true
|
25
|
+
end
|
26
|
+
|
27
|
+
# The host of this resource.
|
28
|
+
#
|
29
|
+
# @return [String]
|
30
|
+
def host
|
31
|
+
@host ||= uri.host
|
32
|
+
end
|
33
|
+
|
34
|
+
# Return the query string part of this resource's URI. Will
|
35
|
+
# likely be +nil+ for local resources.
|
36
|
+
#
|
37
|
+
# @return [String]
|
38
|
+
def query_string
|
39
|
+
@query_string ||= uri.query
|
40
|
+
end
|
41
|
+
|
42
|
+
# Return the fragment part of this resource's URI. Will likely be
|
43
|
+
# +nil+ for local resources.
|
44
|
+
#
|
45
|
+
# @return [String]
|
46
|
+
def fragment
|
47
|
+
@fragment ||= uri.fragment
|
48
|
+
end
|
49
|
+
|
50
|
+
# Return the path part of this resource's URI. Will _not_
|
51
|
+
# include the +query_string+ or +fragment+.
|
52
|
+
#
|
53
|
+
# @return [String]
|
54
|
+
def path
|
55
|
+
@path ||= uri.path
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
module RemoteFile
|
61
|
+
|
62
|
+
# Return the IO object for this remote file.
|
63
|
+
#
|
64
|
+
# The mode of this resource is ignored.
|
65
|
+
#
|
66
|
+
# @return [StringIO]
|
67
|
+
def io
|
68
|
+
require 'open-uri'
|
69
|
+
@io ||= open(uri.to_s) # ignore mode
|
70
|
+
end
|
71
|
+
|
72
|
+
# Read the contents of this remote file.
|
73
|
+
#
|
74
|
+
# @return [String]
|
75
|
+
def read
|
76
|
+
io.read
|
77
|
+
end
|
78
|
+
|
79
|
+
# Return the lines of this remote file.
|
80
|
+
#
|
81
|
+
# If passed a block then yield each line to the block.
|
82
|
+
#
|
83
|
+
# @return [Array] the lines of this remote file
|
84
|
+
# @yield [String] each line of this remote file
|
85
|
+
def load &block
|
86
|
+
if block_given?
|
87
|
+
io.each do |line|
|
88
|
+
yield line
|
89
|
+
end
|
90
|
+
else
|
91
|
+
read.split("\n")
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# Map over the lines in this remote file.
|
96
|
+
#
|
97
|
+
# @yield [String] each line of the file
|
98
|
+
def map &block
|
99
|
+
io.map(&block)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
module RemoteDirectory
|
105
|
+
|
106
|
+
#
|
107
|
+
# TODO -- bloody everything
|
108
|
+
#
|
109
|
+
|
110
|
+
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
module IMW
|
2
|
+
module Schemes
|
3
|
+
|
4
|
+
# Defines methods for reading and writing data to {Amazon
|
5
|
+
# S3}[http://aws.amazon.com/s3] buckets.
|
6
|
+
#
|
7
|
+
# IMW.open('s3://my_bucket/path/to/some/file.csv')
|
8
|
+
#
|
9
|
+
# Learn more about {Amazon Web Services}[http://aws.amazon.com].
|
10
|
+
module S3
|
11
|
+
|
12
|
+
# For an S3 resource, the bucket is just the hostname.
|
13
|
+
#
|
14
|
+
# @return [String]
|
15
|
+
def bucket
|
16
|
+
host
|
17
|
+
end
|
18
|
+
|
19
|
+
# Is this resource an S3 resource?
|
20
|
+
#
|
21
|
+
# @return [true, false]
|
22
|
+
def on_s3?
|
23
|
+
true
|
24
|
+
end
|
25
|
+
alias_method :is_s3?, :on_s3?
|
26
|
+
|
27
|
+
# Copy this resource to the +new_uri+.
|
28
|
+
#
|
29
|
+
# @param [String, IMW::Resource] new_uri
|
30
|
+
# @return [IMW::Resource] the new resource
|
31
|
+
def cp new_uri
|
32
|
+
IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
|
33
|
+
end
|
34
|
+
|
35
|
+
# The AWS::S3::S3Object corresponding to this resource.
|
36
|
+
def s3_object
|
37
|
+
self.class.make_connection!
|
38
|
+
@s3_object ||= AWS::S3::S3Object.new(path, bucket)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Does this resource exist on S3?
|
42
|
+
#
|
43
|
+
# @return [true, false]
|
44
|
+
def exist?
|
45
|
+
s3_object.exists?
|
46
|
+
end
|
47
|
+
alias_method :exists?, :exist?
|
48
|
+
|
49
|
+
# Remove this resource from S3.
|
50
|
+
#
|
51
|
+
# @return [IMW::Resource] the deleted object
|
52
|
+
def rm
|
53
|
+
s3_object.delete
|
54
|
+
end
|
55
|
+
alias_method :rm!, :rm
|
56
|
+
|
57
|
+
# Return the S3N URL for this S3 object
|
58
|
+
#
|
59
|
+
# resource = IMW.open('s3://my_bucket/path/to/some/obj')
|
60
|
+
# resource.s3n_url
|
61
|
+
# => 's3n://my_bucket/path/to/some/obj'
|
62
|
+
#
|
63
|
+
# @return [String]
|
64
|
+
def s3n_url
|
65
|
+
uri.to_s.gsub(/^s3:/, 's3n:')
|
66
|
+
end
|
67
|
+
|
68
|
+
# Return the contents of this S3 object.
|
69
|
+
#
|
70
|
+
# @return [String]
|
71
|
+
def read
|
72
|
+
s3_object.value
|
73
|
+
end
|
74
|
+
|
75
|
+
# Store +source+ into +destination+.
|
76
|
+
#
|
77
|
+
# @param [String, IMW::Resource, #io] source
|
78
|
+
# @param [String, IMW::Resource, #path, #bucket] destination
|
79
|
+
# @return [IMW::Resource] the new S3 object
|
80
|
+
def self.put source, destination
|
81
|
+
source = IMW.open(source)
|
82
|
+
destintation = IMW.open(destination)
|
83
|
+
raise IMW::ArgumentError.new("destination must be on S3 -- #{destination.uri} given") unless destination.on_s3?
|
84
|
+
make_connection!
|
85
|
+
AWS::S3::S3Object.store(destination.path, source.io, destination.bucket)
|
86
|
+
destination
|
87
|
+
end
|
88
|
+
|
89
|
+
# Download +source+ from S3 into +destination+.
|
90
|
+
#
|
91
|
+
# @param [String, IMW::Resource, #path, #bucket] source
|
92
|
+
# @param [String, IMW::Resource, #write] destination
|
93
|
+
# @return [IMW::Resource] the new resource
|
94
|
+
def self.get source, destination
|
95
|
+
source = IMW.open(source)
|
96
|
+
destination = IMW.open(destination)
|
97
|
+
make_connection!
|
98
|
+
AWS::S3::Object.stream(source.path, source.bucket) do |chunk|
|
99
|
+
destination.write(chunk)
|
100
|
+
end
|
101
|
+
destination.close
|
102
|
+
destination.reopen
|
103
|
+
end
|
104
|
+
|
105
|
+
# Copy S3 resource +source+ to +destination+.
|
106
|
+
#
|
107
|
+
# @param [String, IMW::Resource, #path, #bucket] source
|
108
|
+
# @param [String, IMW::Resource, #path, #bucket] destination
|
109
|
+
# @return [IMW::Resource] the new resource
|
110
|
+
def self.copy source, destination
|
111
|
+
source = IMW.open(source)
|
112
|
+
destination = IMW.open(destination)
|
113
|
+
raise IMW::PathError.new("Bucket names must be non-blank and match to 'copy'") unless source.bucket.present? && destination.bucket.present? && source.bucket == destination.bucket
|
114
|
+
make_connection!
|
115
|
+
AWS::S3::Object.copy(source.path, destination.path, destination.bucket)
|
116
|
+
destination
|
117
|
+
end
|
118
|
+
|
119
|
+
protected
|
120
|
+
# Make an S3 connection.
|
121
|
+
#
|
122
|
+
# Uses settings defined in IMW::AWS_CREDENTIALS.
|
123
|
+
#
|
124
|
+
# @return [AWS
|
125
|
+
def self.make_connection!
|
126
|
+
return @connection if @connection
|
127
|
+
raise IMW::Error.new("Must define a constant IMW::AWS_CREDENTIALS with an :access_key_id and a :secret_access_key before using S3 resources") unless defined?(IMW::AWS_CREDENTIALS)
|
128
|
+
require 'aws/s3'
|
129
|
+
@connection = AWS::S3::Base.establish_connection!(IMW::AWS_CREDENTIALS)
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|