imw 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/README.rdoc +34 -14
  2. data/Rakefile +1 -1
  3. data/VERSION +1 -1
  4. data/lib/imw.rb +9 -6
  5. data/lib/imw/{resources/archive.rb → archives.rb} +20 -10
  6. data/lib/imw/archives/rar.rb +19 -0
  7. data/lib/imw/archives/tar.rb +19 -0
  8. data/lib/imw/archives/tarbz2.rb +73 -0
  9. data/lib/imw/archives/targz.rb +73 -0
  10. data/lib/imw/archives/zip.rb +51 -0
  11. data/lib/imw/{resources/compressed_file.rb → compressed_files.rb} +16 -11
  12. data/lib/imw/compressed_files/bz2.rb +16 -0
  13. data/lib/imw/{resources → compressed_files}/compressible.rb +2 -4
  14. data/lib/imw/compressed_files/gz.rb +16 -0
  15. data/lib/imw/formats.rb +31 -0
  16. data/lib/imw/formats/delimited.rb +90 -0
  17. data/lib/imw/formats/excel.rb +125 -0
  18. data/lib/imw/formats/json.rb +51 -0
  19. data/lib/imw/formats/sgml.rb +69 -0
  20. data/lib/imw/formats/yaml.rb +51 -0
  21. data/lib/imw/resource.rb +108 -10
  22. data/lib/imw/schemes.rb +21 -0
  23. data/lib/imw/schemes/hdfs.rb +240 -0
  24. data/lib/imw/schemes/http.rb +166 -0
  25. data/lib/imw/schemes/local.rb +219 -0
  26. data/lib/imw/schemes/remote.rb +114 -0
  27. data/lib/imw/schemes/s3.rb +135 -0
  28. data/lib/imw/tools.rb +8 -0
  29. data/lib/imw/{transforms → tools}/archiver.rb +1 -1
  30. data/lib/imw/{transforms → tools}/transferer.rb +10 -10
  31. data/spec/imw/{resources/archive_spec.rb → archive_spec.rb} +3 -3
  32. data/spec/imw/{resources/archives_and_compressed → archives}/rar_spec.rb +2 -2
  33. data/spec/imw/{resources/archives_and_compressed → archives}/tar_spec.rb +2 -2
  34. data/spec/imw/{resources/archives_and_compressed → archives}/tarbz2_spec.rb +4 -4
  35. data/spec/imw/{resources/archives_and_compressed → archives}/targz_spec.rb +4 -4
  36. data/spec/imw/{resources/archives_and_compressed → archives}/zip_spec.rb +2 -2
  37. data/spec/imw/compressed_files/bz2_spec.rb +15 -0
  38. data/spec/imw/{resources → compressed_files}/compressible_spec.rb +1 -1
  39. data/spec/imw/compressed_files/gz_spec.rb +15 -0
  40. data/spec/imw/{resources/compressed_file_spec.rb → compressed_files_spec.rb} +3 -3
  41. data/spec/imw/{resources/formats → formats}/delimited_spec.rb +2 -2
  42. data/spec/imw/{resources/formats → formats}/json_spec.rb +2 -2
  43. data/spec/imw/{resources/formats → formats}/sgml_spec.rb +2 -2
  44. data/spec/imw/{resources/formats → formats}/yaml_spec.rb +2 -2
  45. data/spec/imw/resource_spec.rb +4 -4
  46. data/spec/imw/{resources/schemes → schemes}/hdfs_spec.rb +7 -7
  47. data/spec/imw/{resources/schemes → schemes}/http_spec.rb +2 -2
  48. data/spec/imw/{resources → schemes}/local_spec.rb +5 -5
  49. data/spec/imw/{resources → schemes}/remote_spec.rb +7 -3
  50. data/spec/imw/{resources/schemes → schemes}/s3_spec.rb +2 -2
  51. data/spec/imw/{transforms → tools}/archiver_spec.rb +2 -2
  52. data/spec/imw/tools/transferer_spec.rb +113 -0
  53. metadata +69 -71
  54. data/lib/imw/resources.rb +0 -118
  55. data/lib/imw/resources/archives_and_compressed.rb +0 -32
  56. data/lib/imw/resources/archives_and_compressed/bz2.rb +0 -18
  57. data/lib/imw/resources/archives_and_compressed/gz.rb +0 -18
  58. data/lib/imw/resources/archives_and_compressed/rar.rb +0 -23
  59. data/lib/imw/resources/archives_and_compressed/tar.rb +0 -23
  60. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +0 -78
  61. data/lib/imw/resources/archives_and_compressed/targz.rb +0 -78
  62. data/lib/imw/resources/archives_and_compressed/zip.rb +0 -57
  63. data/lib/imw/resources/formats.rb +0 -32
  64. data/lib/imw/resources/formats/delimited.rb +0 -92
  65. data/lib/imw/resources/formats/excel.rb +0 -125
  66. data/lib/imw/resources/formats/json.rb +0 -53
  67. data/lib/imw/resources/formats/sgml.rb +0 -72
  68. data/lib/imw/resources/formats/yaml.rb +0 -53
  69. data/lib/imw/resources/local.rb +0 -198
  70. data/lib/imw/resources/remote.rb +0 -110
  71. data/lib/imw/resources/schemes.rb +0 -19
  72. data/lib/imw/resources/schemes/hdfs.rb +0 -242
  73. data/lib/imw/resources/schemes/http.rb +0 -161
  74. data/lib/imw/resources/schemes/s3.rb +0 -137
  75. data/lib/imw/transforms.rb +0 -8
  76. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +0 -15
  77. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +0 -15
  78. data/spec/imw/transforms/transferer_spec.rb +0 -113
@@ -0,0 +1,219 @@
1
+ module IMW
2
+ module Schemes
3
+ module Local
4
+
5
+ # Defines methods appropriate for any file (or directory) on the
6
+ # local machine. Includes methods from the File class like
7
+ # File#exist?, File#size, &c.
8
+ #
9
+ # When extending with this module, it will automatically also
10
+ # extend with either IMW::Schemes::Local::LocalDirectory or
11
+ # IMW::Schemes::Local::LocalFile, as appropriate.
12
+ module Base
13
+
14
+ def self.extended obj
15
+ # also extend with file or directory as appropriate
16
+ if obj.directory?
17
+ obj.extend(LocalDirectory)
18
+ else
19
+ obj.extend(LocalFile)
20
+ end
21
+ end
22
+
23
+ # Steal a bunch of class methods from File which only take a
24
+ # path as a first argument.
25
+ [:executable?, :executable_real?, :exist?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
26
+ define_method class_method do
27
+ File.send(class_method, path)
28
+ end
29
+ end
30
+ alias_method :exists?, :exist?
31
+
32
+ # Return the path to this local object.
33
+ #
34
+ # @return [String]
35
+ def path
36
+ @path ||= File.expand_path(@encoded_uri ? Addressable::URI.decode(uri.to_s) : uri.to_s)
37
+ end
38
+
39
+ # Is this file on the local machine?
40
+ #
41
+ # @return [true, false]
42
+ def is_local?
43
+ true
44
+ end
45
+
46
+ # Copy this resource to the +new_uri+.
47
+ #
48
+ # @param [String, IMW::Resource] new_uri
49
+ # @return [IMW::Resource] the new resource
50
+ def cp new_uri
51
+ IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
52
+ end
53
+
54
+ # Move this resource to the +new_uri+.
55
+ #
56
+ # @param [String, IMW::Resource] new_uri
57
+ # @return [IMW::Resource] the new resource
58
+ def mv new_uri
59
+ IMW::Tools::Transferer.new(:mv, self, new_uri).transfer!
60
+ end
61
+
62
+ end
63
+
64
+ # Defines methods for appropriate for a local file.
65
+ module LocalFile
66
+
67
+ # Is this resource a regular file?
68
+ #
69
+ # @return [true, false]
70
+ def is_file?
71
+ true
72
+ end
73
+
74
+ # Delete this resource.
75
+ def rm
76
+ should_exist!("Cannot delete")
77
+ FileUtils.rm path
78
+ self
79
+ end
80
+ alias_method :rm!, :rm
81
+
82
+ # Return the IO object at this path.
83
+ #
84
+ # @return [File]
85
+ def io
86
+ @io ||= open(path, mode)
87
+ end
88
+
89
+ # Read from this file.
90
+ #
91
+ # @param [Fixnum] length bytes to read
92
+ # @return [String]
93
+ def read length=nil
94
+ io.read(length)
95
+ end
96
+
97
+ # Write to this file
98
+ #
99
+ # @param [String, #to_s] text text to write
100
+ # @return [Fixnum] bytes written
101
+ def write text
102
+ io.write text
103
+ end
104
+
105
+ # Return the lines in this file.
106
+ #
107
+ # If passed a block, yield each line of the file to the block.
108
+ #
109
+ # @yield [String] each line of the file
110
+ # @return [Array] the lines in the file
111
+ def load &block
112
+ if block_given?
113
+ io.each do |line|
114
+ yield line
115
+ end
116
+ else
117
+ read.split("\n")
118
+ end
119
+ end
120
+
121
+ # Map over the lines in this file.
122
+ #
123
+ # @yield [String] each line of the file
124
+ def map &block
125
+ io.map(&block)
126
+ end
127
+
128
+ # Dump +data+ into this file.
129
+ #
130
+ # @param [String, Array, #each] data object to dump
131
+ # @option options [true, false] :persist (false) Don't close the file after writing
132
+ def dump data, options={}
133
+ data.each do |element| # works if data is an Array or a String
134
+ io.puts(element.to_s)
135
+ end
136
+ io.close unless options[:persist]
137
+ end
138
+ end
139
+
140
+ # Defines methods for manipulating the contents of a local
141
+ # directory.
142
+ module LocalDirectory
143
+
144
+ # Is this resource a directory?
145
+ #
146
+ # @return [true, false]
147
+ def is_directory?
148
+ true
149
+ end
150
+
151
+ # Delete this directory.
152
+ #
153
+ # @return [IMW::Resource] the deleted directory
154
+ def rmdir
155
+ FileUtils.rmdir path
156
+ self
157
+ end
158
+
159
+ # Delete this directory recursively.
160
+ #
161
+ # @return [IMW::Resource] the deleted directory
162
+ def rm_rf
163
+ FileUtils.rm_rf path
164
+ self
165
+ end
166
+
167
+ # Return a list of paths relative to this directory which match
168
+ # the +selector+. Works just like Dir[].
169
+ #
170
+ # @param [String] selector
171
+ # @return [Array] the matched paths
172
+ def [] selector='*'
173
+ Dir[File.join(path, selector)]
174
+ end
175
+
176
+ # Return a list of all paths directly within this directory.
177
+ #
178
+ # @return [Array]
179
+ def contents
180
+ self['*']
181
+ end
182
+
183
+ # Does this directory contain +obj+?
184
+ #
185
+ # @param [String, IMW::Resource] obj
186
+ # @return [true, false]
187
+ def contains? obj
188
+ require 'find'
189
+ obj_path = obj.is_a?(String) ? obj : obj.path
190
+ Find.find(path) do |sub_path|
191
+ return true if sub_path.ends_with?(obj_path)
192
+ end
193
+ false
194
+ end
195
+
196
+ # Return all paths within this directory, recursively.
197
+ #
198
+ # @return [Array<String>]
199
+ def all_contents
200
+ self['**/*']
201
+ end
202
+
203
+ # Return all resources within this directory, i.e. - all paths
204
+ # converted to IMW::Resource objects.
205
+ #
206
+ # @return [Array<IMW::Resource>]
207
+ def resources
208
+ all_contents.map do |path|
209
+ IMW.open(path) unless File.directory?(path)
210
+ end.compact
211
+ end
212
+
213
+ end
214
+ end
215
+ end
216
+ end
217
+
218
+
219
+
@@ -0,0 +1,114 @@
1
+ module IMW
2
+ module Schemes
3
+
4
+ # Contains modules which define methods appropriate for remote
5
+ # resources, no matter the protocol.
6
+ module Remote
7
+
8
+ # Defines methods appropriate for accessing a remote resource,
9
+ # no matter the protocol.
10
+ module Base
11
+
12
+ #
13
+ # TODO -- self.extended should extend by RemoteDirectory when appropriate
14
+ #
15
+
16
+ def self.extended obj
17
+ obj.extend(RemoteFile)
18
+ end
19
+
20
+ # Is this resource on a remote host?
21
+ #
22
+ # @return [true,false]
23
+ def is_remote?
24
+ true
25
+ end
26
+
27
+ # The host of this resource.
28
+ #
29
+ # @return [String]
30
+ def host
31
+ @host ||= uri.host
32
+ end
33
+
34
+ # Return the query string part of this resource's URI. Will
35
+ # likely be +nil+ for local resources.
36
+ #
37
+ # @return [String]
38
+ def query_string
39
+ @query_string ||= uri.query
40
+ end
41
+
42
+ # Return the fragment part of this resource's URI. Will likely be
43
+ # +nil+ for local resources.
44
+ #
45
+ # @return [String]
46
+ def fragment
47
+ @fragment ||= uri.fragment
48
+ end
49
+
50
+ # Return the path part of this resource's URI. Will _not_
51
+ # include the +query_string+ or +fragment+.
52
+ #
53
+ # @return [String]
54
+ def path
55
+ @path ||= uri.path
56
+ end
57
+
58
+ end
59
+
60
+ module RemoteFile
61
+
62
+ # Return the IO object for this remote file.
63
+ #
64
+ # The mode of this resource is ignored.
65
+ #
66
+ # @return [StringIO]
67
+ def io
68
+ require 'open-uri'
69
+ @io ||= open(uri.to_s) # ignore mode
70
+ end
71
+
72
+ # Read the contents of this remote file.
73
+ #
74
+ # @return [String]
75
+ def read
76
+ io.read
77
+ end
78
+
79
+ # Return the lines of this remote file.
80
+ #
81
+ # If passed a block then yield each line to the block.
82
+ #
83
+ # @return [Array] the lines of this remote file
84
+ # @yield [String] each line of this remote file
85
+ def load &block
86
+ if block_given?
87
+ io.each do |line|
88
+ yield line
89
+ end
90
+ else
91
+ read.split("\n")
92
+ end
93
+ end
94
+
95
+ # Map over the lines in this remote file.
96
+ #
97
+ # @yield [String] each line of the file
98
+ def map &block
99
+ io.map(&block)
100
+ end
101
+ end
102
+
103
+
104
+ module RemoteDirectory
105
+
106
+ #
107
+ # TODO -- bloody everything
108
+ #
109
+
110
+
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,135 @@
1
+ module IMW
2
+ module Schemes
3
+
4
+ # Defines methods for reading and writing data to {Amazon
5
+ # S3}[http://aws.amazon.com/s3] buckets.
6
+ #
7
+ # IMW.open('s3://my_bucket/path/to/some/file.csv')
8
+ #
9
+ # Learn more about {Amazon Web Services}[http://aws.amazon.com].
10
+ module S3
11
+
12
+ # For an S3 resource, the bucket is just the hostname.
13
+ #
14
+ # @return [String]
15
+ def bucket
16
+ host
17
+ end
18
+
19
+ # Is this resource an S3 resource?
20
+ #
21
+ # @return [true, false]
22
+ def on_s3?
23
+ true
24
+ end
25
+ alias_method :is_s3?, :on_s3?
26
+
27
+ # Copy this resource to the +new_uri+.
28
+ #
29
+ # @param [String, IMW::Resource] new_uri
30
+ # @return [IMW::Resource] the new resource
31
+ def cp new_uri
32
+ IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
33
+ end
34
+
35
+ # The AWS::S3::S3Object corresponding to this resource.
36
+ def s3_object
37
+ self.class.make_connection!
38
+ @s3_object ||= AWS::S3::S3Object.new(path, bucket)
39
+ end
40
+
41
+ # Does this resource exist on S3?
42
+ #
43
+ # @return [true, false]
44
+ def exist?
45
+ s3_object.exists?
46
+ end
47
+ alias_method :exists?, :exist?
48
+
49
+ # Remove this resource from S3.
50
+ #
51
+ # @return [IMW::Resource] the deleted object
52
+ def rm
53
+ s3_object.delete
54
+ end
55
+ alias_method :rm!, :rm
56
+
57
+ # Return the S3N URL for this S3 object
58
+ #
59
+ # resource = IMW.open('s3://my_bucket/path/to/some/obj')
60
+ # resource.s3n_url
61
+ # => 's3n://my_bucket/path/to/some/obj'
62
+ #
63
+ # @return [String]
64
+ def s3n_url
65
+ uri.to_s.gsub(/^s3:/, 's3n:')
66
+ end
67
+
68
+ # Return the contents of this S3 object.
69
+ #
70
+ # @return [String]
71
+ def read
72
+ s3_object.value
73
+ end
74
+
75
+ # Store +source+ into +destination+.
76
+ #
77
+ # @param [String, IMW::Resource, #io] source
78
+ # @param [String, IMW::Resource, #path, #bucket] destination
79
+ # @return [IMW::Resource] the new S3 object
80
+ def self.put source, destination
81
+ source = IMW.open(source)
82
+ destintation = IMW.open(destination)
83
+ raise IMW::ArgumentError.new("destination must be on S3 -- #{destination.uri} given") unless destination.on_s3?
84
+ make_connection!
85
+ AWS::S3::S3Object.store(destination.path, source.io, destination.bucket)
86
+ destination
87
+ end
88
+
89
+ # Download +source+ from S3 into +destination+.
90
+ #
91
+ # @param [String, IMW::Resource, #path, #bucket] source
92
+ # @param [String, IMW::Resource, #write] destination
93
+ # @return [IMW::Resource] the new resource
94
+ def self.get source, destination
95
+ source = IMW.open(source)
96
+ destination = IMW.open(destination)
97
+ make_connection!
98
+ AWS::S3::Object.stream(source.path, source.bucket) do |chunk|
99
+ destination.write(chunk)
100
+ end
101
+ destination.close
102
+ destination.reopen
103
+ end
104
+
105
+ # Copy S3 resource +source+ to +destination+.
106
+ #
107
+ # @param [String, IMW::Resource, #path, #bucket] source
108
+ # @param [String, IMW::Resource, #path, #bucket] destination
109
+ # @return [IMW::Resource] the new resource
110
+ def self.copy source, destination
111
+ source = IMW.open(source)
112
+ destination = IMW.open(destination)
113
+ raise IMW::PathError.new("Bucket names must be non-blank and match to 'copy'") unless source.bucket.present? && destination.bucket.present? && source.bucket == destination.bucket
114
+ make_connection!
115
+ AWS::S3::Object.copy(source.path, destination.path, destination.bucket)
116
+ destination
117
+ end
118
+
119
+ protected
120
+ # Make an S3 connection.
121
+ #
122
+ # Uses settings defined in IMW::AWS_CREDENTIALS.
123
+ #
124
+ # @return [AWS
125
+ def self.make_connection!
126
+ return @connection if @connection
127
+ raise IMW::Error.new("Must define a constant IMW::AWS_CREDENTIALS with an :access_key_id and a :secret_access_key before using S3 resources") unless defined?(IMW::AWS_CREDENTIALS)
128
+ require 'aws/s3'
129
+ @connection = AWS::S3::Base.establish_connection!(IMW::AWS_CREDENTIALS)
130
+ end
131
+
132
+ end
133
+ end
134
+ end
135
+