imw 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. data/README.rdoc +34 -14
  2. data/Rakefile +1 -1
  3. data/VERSION +1 -1
  4. data/lib/imw.rb +9 -6
  5. data/lib/imw/{resources/archive.rb → archives.rb} +20 -10
  6. data/lib/imw/archives/rar.rb +19 -0
  7. data/lib/imw/archives/tar.rb +19 -0
  8. data/lib/imw/archives/tarbz2.rb +73 -0
  9. data/lib/imw/archives/targz.rb +73 -0
  10. data/lib/imw/archives/zip.rb +51 -0
  11. data/lib/imw/{resources/compressed_file.rb → compressed_files.rb} +16 -11
  12. data/lib/imw/compressed_files/bz2.rb +16 -0
  13. data/lib/imw/{resources → compressed_files}/compressible.rb +2 -4
  14. data/lib/imw/compressed_files/gz.rb +16 -0
  15. data/lib/imw/formats.rb +31 -0
  16. data/lib/imw/formats/delimited.rb +90 -0
  17. data/lib/imw/formats/excel.rb +125 -0
  18. data/lib/imw/formats/json.rb +51 -0
  19. data/lib/imw/formats/sgml.rb +69 -0
  20. data/lib/imw/formats/yaml.rb +51 -0
  21. data/lib/imw/resource.rb +108 -10
  22. data/lib/imw/schemes.rb +21 -0
  23. data/lib/imw/schemes/hdfs.rb +240 -0
  24. data/lib/imw/schemes/http.rb +166 -0
  25. data/lib/imw/schemes/local.rb +219 -0
  26. data/lib/imw/schemes/remote.rb +114 -0
  27. data/lib/imw/schemes/s3.rb +135 -0
  28. data/lib/imw/tools.rb +8 -0
  29. data/lib/imw/{transforms → tools}/archiver.rb +1 -1
  30. data/lib/imw/{transforms → tools}/transferer.rb +10 -10
  31. data/spec/imw/{resources/archive_spec.rb → archive_spec.rb} +3 -3
  32. data/spec/imw/{resources/archives_and_compressed → archives}/rar_spec.rb +2 -2
  33. data/spec/imw/{resources/archives_and_compressed → archives}/tar_spec.rb +2 -2
  34. data/spec/imw/{resources/archives_and_compressed → archives}/tarbz2_spec.rb +4 -4
  35. data/spec/imw/{resources/archives_and_compressed → archives}/targz_spec.rb +4 -4
  36. data/spec/imw/{resources/archives_and_compressed → archives}/zip_spec.rb +2 -2
  37. data/spec/imw/compressed_files/bz2_spec.rb +15 -0
  38. data/spec/imw/{resources → compressed_files}/compressible_spec.rb +1 -1
  39. data/spec/imw/compressed_files/gz_spec.rb +15 -0
  40. data/spec/imw/{resources/compressed_file_spec.rb → compressed_files_spec.rb} +3 -3
  41. data/spec/imw/{resources/formats → formats}/delimited_spec.rb +2 -2
  42. data/spec/imw/{resources/formats → formats}/json_spec.rb +2 -2
  43. data/spec/imw/{resources/formats → formats}/sgml_spec.rb +2 -2
  44. data/spec/imw/{resources/formats → formats}/yaml_spec.rb +2 -2
  45. data/spec/imw/resource_spec.rb +4 -4
  46. data/spec/imw/{resources/schemes → schemes}/hdfs_spec.rb +7 -7
  47. data/spec/imw/{resources/schemes → schemes}/http_spec.rb +2 -2
  48. data/spec/imw/{resources → schemes}/local_spec.rb +5 -5
  49. data/spec/imw/{resources → schemes}/remote_spec.rb +7 -3
  50. data/spec/imw/{resources/schemes → schemes}/s3_spec.rb +2 -2
  51. data/spec/imw/{transforms → tools}/archiver_spec.rb +2 -2
  52. data/spec/imw/tools/transferer_spec.rb +113 -0
  53. metadata +69 -71
  54. data/lib/imw/resources.rb +0 -118
  55. data/lib/imw/resources/archives_and_compressed.rb +0 -32
  56. data/lib/imw/resources/archives_and_compressed/bz2.rb +0 -18
  57. data/lib/imw/resources/archives_and_compressed/gz.rb +0 -18
  58. data/lib/imw/resources/archives_and_compressed/rar.rb +0 -23
  59. data/lib/imw/resources/archives_and_compressed/tar.rb +0 -23
  60. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +0 -78
  61. data/lib/imw/resources/archives_and_compressed/targz.rb +0 -78
  62. data/lib/imw/resources/archives_and_compressed/zip.rb +0 -57
  63. data/lib/imw/resources/formats.rb +0 -32
  64. data/lib/imw/resources/formats/delimited.rb +0 -92
  65. data/lib/imw/resources/formats/excel.rb +0 -125
  66. data/lib/imw/resources/formats/json.rb +0 -53
  67. data/lib/imw/resources/formats/sgml.rb +0 -72
  68. data/lib/imw/resources/formats/yaml.rb +0 -53
  69. data/lib/imw/resources/local.rb +0 -198
  70. data/lib/imw/resources/remote.rb +0 -110
  71. data/lib/imw/resources/schemes.rb +0 -19
  72. data/lib/imw/resources/schemes/hdfs.rb +0 -242
  73. data/lib/imw/resources/schemes/http.rb +0 -161
  74. data/lib/imw/resources/schemes/s3.rb +0 -137
  75. data/lib/imw/transforms.rb +0 -8
  76. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +0 -15
  77. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +0 -15
  78. data/spec/imw/transforms/transferer_spec.rb +0 -113
@@ -0,0 +1,219 @@
1
+ module IMW
2
+ module Schemes
3
+ module Local
4
+
5
+ # Defines methods appropriate for any file (or directory) on the
6
+ # local machine. Includes methods from the File class like
7
+ # File#exist?, File#size, &c.
8
+ #
9
+ # When extending with this module, it will automatically also
10
+ # extend with either IMW::Schemes::Local::LocalDirectory or
11
+ # IMW::Schemes::Local::LocalFile, as appropriate.
12
+ module Base
13
+
14
+ def self.extended obj
15
+ # also extend with file or directory as appropriate
16
+ if obj.directory?
17
+ obj.extend(LocalDirectory)
18
+ else
19
+ obj.extend(LocalFile)
20
+ end
21
+ end
22
+
23
+ # Steal a bunch of class methods from File which only take a
24
+ # path as a first argument.
25
+ [:executable?, :executable_real?, :exist?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
26
+ define_method class_method do
27
+ File.send(class_method, path)
28
+ end
29
+ end
30
+ alias_method :exists?, :exist?
31
+
32
+ # Return the path to this local object.
33
+ #
34
+ # @return [String]
35
+ def path
36
+ @path ||= File.expand_path(@encoded_uri ? Addressable::URI.decode(uri.to_s) : uri.to_s)
37
+ end
38
+
39
+ # Is this file on the local machine?
40
+ #
41
+ # @return [true, false]
42
+ def is_local?
43
+ true
44
+ end
45
+
46
+ # Copy this resource to the +new_uri+.
47
+ #
48
+ # @param [String, IMW::Resource] new_uri
49
+ # @return [IMW::Resource] the new resource
50
+ def cp new_uri
51
+ IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
52
+ end
53
+
54
+ # Move this resource to the +new_uri+.
55
+ #
56
+ # @param [String, IMW::Resource] new_uri
57
+ # @return [IMW::Resource] the new resource
58
+ def mv new_uri
59
+ IMW::Tools::Transferer.new(:mv, self, new_uri).transfer!
60
+ end
61
+
62
+ end
63
+
64
+ # Defines methods for appropriate for a local file.
65
+ module LocalFile
66
+
67
+ # Is this resource a regular file?
68
+ #
69
+ # @return [true, false]
70
+ def is_file?
71
+ true
72
+ end
73
+
74
+ # Delete this resource.
75
+ def rm
76
+ should_exist!("Cannot delete")
77
+ FileUtils.rm path
78
+ self
79
+ end
80
+ alias_method :rm!, :rm
81
+
82
+ # Return the IO object at this path.
83
+ #
84
+ # @return [File]
85
+ def io
86
+ @io ||= open(path, mode)
87
+ end
88
+
89
+ # Read from this file.
90
+ #
91
+ # @param [Fixnum] length bytes to read
92
+ # @return [String]
93
+ def read length=nil
94
+ io.read(length)
95
+ end
96
+
97
+ # Write to this file
98
+ #
99
+ # @param [String, #to_s] text text to write
100
+ # @return [Fixnum] bytes written
101
+ def write text
102
+ io.write text
103
+ end
104
+
105
+ # Return the lines in this file.
106
+ #
107
+ # If passed a block, yield each line of the file to the block.
108
+ #
109
+ # @yield [String] each line of the file
110
+ # @return [Array] the lines in the file
111
+ def load &block
112
+ if block_given?
113
+ io.each do |line|
114
+ yield line
115
+ end
116
+ else
117
+ read.split("\n")
118
+ end
119
+ end
120
+
121
+ # Map over the lines in this file.
122
+ #
123
+ # @yield [String] each line of the file
124
+ def map &block
125
+ io.map(&block)
126
+ end
127
+
128
+ # Dump +data+ into this file.
129
+ #
130
+ # @param [String, Array, #each] data object to dump
131
+ # @option options [true, false] :persist (false) Don't close the file after writing
132
+ def dump data, options={}
133
+ data.each do |element| # works if data is an Array or a String
134
+ io.puts(element.to_s)
135
+ end
136
+ io.close unless options[:persist]
137
+ end
138
+ end
139
+
140
+ # Defines methods for manipulating the contents of a local
141
+ # directory.
142
+ module LocalDirectory
143
+
144
+ # Is this resource a directory?
145
+ #
146
+ # @return [true, false]
147
+ def is_directory?
148
+ true
149
+ end
150
+
151
+ # Delete this directory.
152
+ #
153
+ # @return [IMW::Resource] the deleted directory
154
+ def rmdir
155
+ FileUtils.rmdir path
156
+ self
157
+ end
158
+
159
+ # Delete this directory recursively.
160
+ #
161
+ # @return [IMW::Resource] the deleted directory
162
+ def rm_rf
163
+ FileUtils.rm_rf path
164
+ self
165
+ end
166
+
167
+ # Return a list of paths relative to this directory which match
168
+ # the +selector+. Works just like Dir[].
169
+ #
170
+ # @param [String] selector
171
+ # @return [Array] the matched paths
172
+ def [] selector='*'
173
+ Dir[File.join(path, selector)]
174
+ end
175
+
176
+ # Return a list of all paths directly within this directory.
177
+ #
178
+ # @return [Array]
179
+ def contents
180
+ self['*']
181
+ end
182
+
183
+ # Does this directory contain +obj+?
184
+ #
185
+ # @param [String, IMW::Resource] obj
186
+ # @return [true, false]
187
+ def contains? obj
188
+ require 'find'
189
+ obj_path = obj.is_a?(String) ? obj : obj.path
190
+ Find.find(path) do |sub_path|
191
+ return true if sub_path.ends_with?(obj_path)
192
+ end
193
+ false
194
+ end
195
+
196
+ # Return all paths within this directory, recursively.
197
+ #
198
+ # @return [Array<String>]
199
+ def all_contents
200
+ self['**/*']
201
+ end
202
+
203
+ # Return all resources within this directory, i.e. - all paths
204
+ # converted to IMW::Resource objects.
205
+ #
206
+ # @return [Array<IMW::Resource>]
207
+ def resources
208
+ all_contents.map do |path|
209
+ IMW.open(path) unless File.directory?(path)
210
+ end.compact
211
+ end
212
+
213
+ end
214
+ end
215
+ end
216
+ end
217
+
218
+
219
+
@@ -0,0 +1,114 @@
1
+ module IMW
2
+ module Schemes
3
+
4
+ # Contains modules which define methods appropriate for remote
5
+ # resources, no matter the protocol.
6
+ module Remote
7
+
8
+ # Defines methods appropriate for accessing a remote resource,
9
+ # no matter the protocol.
10
+ module Base
11
+
12
+ #
13
+ # TODO -- self.extended should extend by RemoteDirectory when appropriate
14
+ #
15
+
16
+ def self.extended obj
17
+ obj.extend(RemoteFile)
18
+ end
19
+
20
+ # Is this resource on a remote host?
21
+ #
22
+ # @return [true,false]
23
+ def is_remote?
24
+ true
25
+ end
26
+
27
+ # The host of this resource.
28
+ #
29
+ # @return [String]
30
+ def host
31
+ @host ||= uri.host
32
+ end
33
+
34
+ # Return the query string part of this resource's URI. Will
35
+ # likely be +nil+ for local resources.
36
+ #
37
+ # @return [String]
38
+ def query_string
39
+ @query_string ||= uri.query
40
+ end
41
+
42
+ # Return the fragment part of this resource's URI. Will likely be
43
+ # +nil+ for local resources.
44
+ #
45
+ # @return [String]
46
+ def fragment
47
+ @fragment ||= uri.fragment
48
+ end
49
+
50
+ # Return the path part of this resource's URI. Will _not_
51
+ # include the +query_string+ or +fragment+.
52
+ #
53
+ # @return [String]
54
+ def path
55
+ @path ||= uri.path
56
+ end
57
+
58
+ end
59
+
60
+ module RemoteFile
61
+
62
+ # Return the IO object for this remote file.
63
+ #
64
+ # The mode of this resource is ignored.
65
+ #
66
+ # @return [StringIO]
67
+ def io
68
+ require 'open-uri'
69
+ @io ||= open(uri.to_s) # ignore mode
70
+ end
71
+
72
+ # Read the contents of this remote file.
73
+ #
74
+ # @return [String]
75
+ def read
76
+ io.read
77
+ end
78
+
79
+ # Return the lines of this remote file.
80
+ #
81
+ # If passed a block then yield each line to the block.
82
+ #
83
+ # @return [Array] the lines of this remote file
84
+ # @yield [String] each line of this remote file
85
+ def load &block
86
+ if block_given?
87
+ io.each do |line|
88
+ yield line
89
+ end
90
+ else
91
+ read.split("\n")
92
+ end
93
+ end
94
+
95
+ # Map over the lines in this remote file.
96
+ #
97
+ # @yield [String] each line of the file
98
+ def map &block
99
+ io.map(&block)
100
+ end
101
+ end
102
+
103
+
104
+ module RemoteDirectory
105
+
106
+ #
107
+ # TODO -- bloody everything
108
+ #
109
+
110
+
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,135 @@
1
+ module IMW
2
+ module Schemes
3
+
4
+ # Defines methods for reading and writing data to {Amazon
5
+ # S3}[http://aws.amazon.com/s3] buckets.
6
+ #
7
+ # IMW.open('s3://my_bucket/path/to/some/file.csv')
8
+ #
9
+ # Learn more about {Amazon Web Services}[http://aws.amazon.com].
10
+ module S3
11
+
12
+ # For an S3 resource, the bucket is just the hostname.
13
+ #
14
+ # @return [String]
15
+ def bucket
16
+ host
17
+ end
18
+
19
+ # Is this resource an S3 resource?
20
+ #
21
+ # @return [true, false]
22
+ def on_s3?
23
+ true
24
+ end
25
+ alias_method :is_s3?, :on_s3?
26
+
27
+ # Copy this resource to the +new_uri+.
28
+ #
29
+ # @param [String, IMW::Resource] new_uri
30
+ # @return [IMW::Resource] the new resource
31
+ def cp new_uri
32
+ IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
33
+ end
34
+
35
+ # The AWS::S3::S3Object corresponding to this resource.
36
+ def s3_object
37
+ self.class.make_connection!
38
+ @s3_object ||= AWS::S3::S3Object.new(path, bucket)
39
+ end
40
+
41
+ # Does this resource exist on S3?
42
+ #
43
+ # @return [true, false]
44
+ def exist?
45
+ s3_object.exists?
46
+ end
47
+ alias_method :exists?, :exist?
48
+
49
+ # Remove this resource from S3.
50
+ #
51
+ # @return [IMW::Resource] the deleted object
52
+ def rm
53
+ s3_object.delete
54
+ end
55
+ alias_method :rm!, :rm
56
+
57
+ # Return the S3N URL for this S3 object
58
+ #
59
+ # resource = IMW.open('s3://my_bucket/path/to/some/obj')
60
+ # resource.s3n_url
61
+ # => 's3n://my_bucket/path/to/some/obj'
62
+ #
63
+ # @return [String]
64
+ def s3n_url
65
+ uri.to_s.gsub(/^s3:/, 's3n:')
66
+ end
67
+
68
+ # Return the contents of this S3 object.
69
+ #
70
+ # @return [String]
71
+ def read
72
+ s3_object.value
73
+ end
74
+
75
+ # Store +source+ into +destination+.
76
+ #
77
+ # @param [String, IMW::Resource, #io] source
78
+ # @param [String, IMW::Resource, #path, #bucket] destination
79
+ # @return [IMW::Resource] the new S3 object
80
+ def self.put source, destination
81
+ source = IMW.open(source)
82
+ destintation = IMW.open(destination)
83
+ raise IMW::ArgumentError.new("destination must be on S3 -- #{destination.uri} given") unless destination.on_s3?
84
+ make_connection!
85
+ AWS::S3::S3Object.store(destination.path, source.io, destination.bucket)
86
+ destination
87
+ end
88
+
89
+ # Download +source+ from S3 into +destination+.
90
+ #
91
+ # @param [String, IMW::Resource, #path, #bucket] source
92
+ # @param [String, IMW::Resource, #write] destination
93
+ # @return [IMW::Resource] the new resource
94
+ def self.get source, destination
95
+ source = IMW.open(source)
96
+ destination = IMW.open(destination)
97
+ make_connection!
98
+ AWS::S3::Object.stream(source.path, source.bucket) do |chunk|
99
+ destination.write(chunk)
100
+ end
101
+ destination.close
102
+ destination.reopen
103
+ end
104
+
105
+ # Copy S3 resource +source+ to +destination+.
106
+ #
107
+ # @param [String, IMW::Resource, #path, #bucket] source
108
+ # @param [String, IMW::Resource, #path, #bucket] destination
109
+ # @return [IMW::Resource] the new resource
110
+ def self.copy source, destination
111
+ source = IMW.open(source)
112
+ destination = IMW.open(destination)
113
+ raise IMW::PathError.new("Bucket names must be non-blank and match to 'copy'") unless source.bucket.present? && destination.bucket.present? && source.bucket == destination.bucket
114
+ make_connection!
115
+ AWS::S3::Object.copy(source.path, destination.path, destination.bucket)
116
+ destination
117
+ end
118
+
119
+ protected
120
+ # Make an S3 connection.
121
+ #
122
+ # Uses settings defined in IMW::AWS_CREDENTIALS.
123
+ #
124
+ # @return [AWS
125
+ def self.make_connection!
126
+ return @connection if @connection
127
+ raise IMW::Error.new("Must define a constant IMW::AWS_CREDENTIALS with an :access_key_id and a :secret_access_key before using S3 resources") unless defined?(IMW::AWS_CREDENTIALS)
128
+ require 'aws/s3'
129
+ @connection = AWS::S3::Base.establish_connection!(IMW::AWS_CREDENTIALS)
130
+ end
131
+
132
+ end
133
+ end
134
+ end
135
+