imw 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/README.rdoc +34 -14
  2. data/Rakefile +1 -1
  3. data/VERSION +1 -1
  4. data/lib/imw.rb +9 -6
  5. data/lib/imw/{resources/archive.rb → archives.rb} +20 -10
  6. data/lib/imw/archives/rar.rb +19 -0
  7. data/lib/imw/archives/tar.rb +19 -0
  8. data/lib/imw/archives/tarbz2.rb +73 -0
  9. data/lib/imw/archives/targz.rb +73 -0
  10. data/lib/imw/archives/zip.rb +51 -0
  11. data/lib/imw/{resources/compressed_file.rb → compressed_files.rb} +16 -11
  12. data/lib/imw/compressed_files/bz2.rb +16 -0
  13. data/lib/imw/{resources → compressed_files}/compressible.rb +2 -4
  14. data/lib/imw/compressed_files/gz.rb +16 -0
  15. data/lib/imw/formats.rb +31 -0
  16. data/lib/imw/formats/delimited.rb +90 -0
  17. data/lib/imw/formats/excel.rb +125 -0
  18. data/lib/imw/formats/json.rb +51 -0
  19. data/lib/imw/formats/sgml.rb +69 -0
  20. data/lib/imw/formats/yaml.rb +51 -0
  21. data/lib/imw/resource.rb +108 -10
  22. data/lib/imw/schemes.rb +21 -0
  23. data/lib/imw/schemes/hdfs.rb +240 -0
  24. data/lib/imw/schemes/http.rb +166 -0
  25. data/lib/imw/schemes/local.rb +219 -0
  26. data/lib/imw/schemes/remote.rb +114 -0
  27. data/lib/imw/schemes/s3.rb +135 -0
  28. data/lib/imw/tools.rb +8 -0
  29. data/lib/imw/{transforms → tools}/archiver.rb +1 -1
  30. data/lib/imw/{transforms → tools}/transferer.rb +10 -10
  31. data/spec/imw/{resources/archive_spec.rb → archive_spec.rb} +3 -3
  32. data/spec/imw/{resources/archives_and_compressed → archives}/rar_spec.rb +2 -2
  33. data/spec/imw/{resources/archives_and_compressed → archives}/tar_spec.rb +2 -2
  34. data/spec/imw/{resources/archives_and_compressed → archives}/tarbz2_spec.rb +4 -4
  35. data/spec/imw/{resources/archives_and_compressed → archives}/targz_spec.rb +4 -4
  36. data/spec/imw/{resources/archives_and_compressed → archives}/zip_spec.rb +2 -2
  37. data/spec/imw/compressed_files/bz2_spec.rb +15 -0
  38. data/spec/imw/{resources → compressed_files}/compressible_spec.rb +1 -1
  39. data/spec/imw/compressed_files/gz_spec.rb +15 -0
  40. data/spec/imw/{resources/compressed_file_spec.rb → compressed_files_spec.rb} +3 -3
  41. data/spec/imw/{resources/formats → formats}/delimited_spec.rb +2 -2
  42. data/spec/imw/{resources/formats → formats}/json_spec.rb +2 -2
  43. data/spec/imw/{resources/formats → formats}/sgml_spec.rb +2 -2
  44. data/spec/imw/{resources/formats → formats}/yaml_spec.rb +2 -2
  45. data/spec/imw/resource_spec.rb +4 -4
  46. data/spec/imw/{resources/schemes → schemes}/hdfs_spec.rb +7 -7
  47. data/spec/imw/{resources/schemes → schemes}/http_spec.rb +2 -2
  48. data/spec/imw/{resources → schemes}/local_spec.rb +5 -5
  49. data/spec/imw/{resources → schemes}/remote_spec.rb +7 -3
  50. data/spec/imw/{resources/schemes → schemes}/s3_spec.rb +2 -2
  51. data/spec/imw/{transforms → tools}/archiver_spec.rb +2 -2
  52. data/spec/imw/tools/transferer_spec.rb +113 -0
  53. metadata +69 -71
  54. data/lib/imw/resources.rb +0 -118
  55. data/lib/imw/resources/archives_and_compressed.rb +0 -32
  56. data/lib/imw/resources/archives_and_compressed/bz2.rb +0 -18
  57. data/lib/imw/resources/archives_and_compressed/gz.rb +0 -18
  58. data/lib/imw/resources/archives_and_compressed/rar.rb +0 -23
  59. data/lib/imw/resources/archives_and_compressed/tar.rb +0 -23
  60. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +0 -78
  61. data/lib/imw/resources/archives_and_compressed/targz.rb +0 -78
  62. data/lib/imw/resources/archives_and_compressed/zip.rb +0 -57
  63. data/lib/imw/resources/formats.rb +0 -32
  64. data/lib/imw/resources/formats/delimited.rb +0 -92
  65. data/lib/imw/resources/formats/excel.rb +0 -125
  66. data/lib/imw/resources/formats/json.rb +0 -53
  67. data/lib/imw/resources/formats/sgml.rb +0 -72
  68. data/lib/imw/resources/formats/yaml.rb +0 -53
  69. data/lib/imw/resources/local.rb +0 -198
  70. data/lib/imw/resources/remote.rb +0 -110
  71. data/lib/imw/resources/schemes.rb +0 -19
  72. data/lib/imw/resources/schemes/hdfs.rb +0 -242
  73. data/lib/imw/resources/schemes/http.rb +0 -161
  74. data/lib/imw/resources/schemes/s3.rb +0 -137
  75. data/lib/imw/transforms.rb +0 -8
  76. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +0 -15
  77. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +0 -15
  78. data/spec/imw/transforms/transferer_spec.rb +0 -113
@@ -1,53 +0,0 @@
1
- module IMW
2
- module Resources
3
- module Formats
4
-
5
- # Defines methods for reading and writing JSON data.
6
- module Json
7
-
8
- # Return the content of this resource.
9
- #
10
- # Will try to be smart about iterating over the data when
11
- # passed a block.
12
- #
13
- # - if the outermost JSON data structure is an array, then
14
- # yield each element
15
- #
16
- # - if the outermost JSON data structure is a mapping, then
17
- # yield each key, value pair
18
- #
19
- # - otherwise just yield the structure
20
- #
21
- # @return [Hash, Array, String, Fixnum] whatever the JSON contained
22
- def load &block
23
- require 'json'
24
- json = JSON.parse(read)
25
- if block_given?
26
- case json
27
- when Array
28
- json.each { |obj| yield obj }
29
- when Hash
30
- json.each_pair { |key, value| yield key, value }
31
- else
32
- yield json
33
- end
34
- else
35
- json
36
- end
37
- end
38
-
39
- # Dump the +data+ into this resource. It must be opened for
40
- # writing.
41
- #
42
- # @param [Hash, String, Array, Fixnum] data the Ruby object to dump
43
- # @option options [true, false] :persist (false) Don't close the IO object after writing
44
- def dump data, options={}
45
- require 'json'
46
- write(data.to_json)
47
- io.close unless options[:persist]
48
- self
49
- end
50
- end
51
- end
52
- end
53
- end
@@ -1,72 +0,0 @@
1
- module IMW
2
- module Resources
3
- module Formats
4
-
5
- # Defines methods to parse SGML-derived data formats (XML, HTML,
6
- # &c.). This module isn't directly used to extend resources.
7
- # Instead, more specific modules (e.g. -
8
- # IMW::Resources::Formats::Xml) are used.
9
- module Sgml
10
-
11
- # Parse this resource using Hpricot and return (or yield if
12
- # given a block) the resulting Hpricot::Doc.
13
- #
14
- # @return [Hpricot::Doc]
15
- # @yield [Hpricot::Doc]
16
- def load &block
17
- require 'hpricot'
18
- sgml = Hpricot(io)
19
- if block_given?
20
- yield sgml
21
- else
22
- sgml
23
- end
24
- end
25
-
26
- # Parse the Hpricot::Doc of this resource with the given
27
- # +parser+.
28
- #
29
- # The parser can either be an IMW::Parsers::HtmlParser or a
30
- # hash which will be used to build such a parser. See the
31
- # documentation for IMW::Parsers::HtmlParser for more
32
- # information.
33
- #
34
- # @param [Hash, IMW::Parsers::HtmlParser] parser
35
- # @return [Hash] the parser's output
36
- def parse parser
37
- if parser.is_a?(IMW::Parsers::HtmlParser)
38
- parser.parse(load)
39
- else
40
- IMW::Parsers::HtmlParser.new(parser).parse(load)
41
- end
42
- end
43
- end
44
-
45
- # Defines methods for XML data.
46
- module Xml
47
- include Sgml
48
- end
49
-
50
- # Defines methods for XSL data.
51
- module Xsl
52
- include Sgml
53
- end
54
-
55
- # Defines methods for XHTML data.
56
- module Xhtml
57
- include Sgml
58
- end
59
-
60
- # Defines methods for HTML data.
61
- module Html
62
- include Sgml
63
- end
64
-
65
- # Defines methods for RDF data.
66
- module Rdf
67
- include Sgml
68
- end
69
- end
70
- end
71
- end
72
-
@@ -1,53 +0,0 @@
1
- module IMW
2
- module Resources
3
- module Formats
4
-
5
- # Provides methods for reading and writing YAML data.
6
- module Yaml
7
-
8
- # Return the content of this resource.
9
- #
10
- # Will try to be smart about iterating over the data when
11
- # passed a block.
12
- #
13
- # - if the outermost YAML data structure is an array, then
14
- # yield each element
15
- #
16
- # - if the outermost YAML data structure is a mapping, then
17
- # yield each key, value pair
18
- #
19
- # - otherwise just yield the structure
20
- #
21
- # @return [Hash, Array, String, Fixnum] whatever the YAML contained
22
- def load &block
23
- require 'yaml'
24
- yaml = YAML.load(read)
25
- if block_given?
26
- case yaml
27
- when Array
28
- yaml.each { |obj| yield obj }
29
- when Hash
30
- yaml.each_pair { |key, value| yield key, value }
31
- else
32
- yield yaml
33
- end
34
- else
35
- yaml
36
- end
37
- end
38
-
39
- # Dump the +data+ into this resource. It must be opened for
40
- # writing.
41
- #
42
- # @param [Hash, String, Array, Fixnum] data the Ruby object to dump
43
- # @option options [true, false] :persist (false) Don't close the IO object after writing
44
- def dump data, options={}
45
- require 'yaml'
46
- write(data.to_yaml)
47
- io.close unless options[:persist]
48
- self
49
- end
50
- end
51
- end
52
- end
53
- end
@@ -1,198 +0,0 @@
1
- module IMW
2
- module Resources
3
-
4
- # Defines methods appropriate for any file (or directory) on the
5
- # local machine. Includes methods from the File class like
6
- # File#exist?, File#size, &c.
7
- #
8
- # When extending with this module, it will automatically also
9
- # extend with either IMW::Resources::LocalDirectory or
10
- # IMW::Resources::LocalFile, as appropriate.
11
- module LocalObj
12
-
13
- def self.extended obj
14
- # also extend with file or directory as appropriate
15
- obj.extend(obj.directory? ? LocalDirectory : LocalFile)
16
- end
17
-
18
- # Steal a bunch of class methods from File which only take a
19
- # path as a first argument.
20
- [:executable?, :executable_real?, :exist?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
21
- define_method class_method do
22
- File.send(class_method, path)
23
- end
24
- end
25
- alias_method :exists?, :exist?
26
-
27
- # Return the path to this local object.
28
- #
29
- # @return [String]
30
- def path
31
- @path ||= File.expand_path(@encoded_uri ? Addressable::URI.decode(uri.to_s) : uri.to_s)
32
- end
33
-
34
- # Is this file on the local machine?
35
- #
36
- # @return [true, false]
37
- def is_local?
38
- true
39
- end
40
-
41
- # Copy this resource to the +new_uri+.
42
- #
43
- # @param [String, IMW::Resource] new_uri
44
- # @return [IMW::Resource] the new resource
45
- def cp new_uri
46
- IMW::Transforms::Transferer.new(:cp, self, new_uri).transfer!
47
- end
48
-
49
- # Move this resource to the +new_uri+.
50
- #
51
- # @param [String, IMW::Resource] new_uri
52
- # @return [IMW::Resource] the new resource
53
- def mv new_uri
54
- IMW::Transforms::Transferer.new(:mv, self, new_uri).transfer!
55
- end
56
-
57
- end
58
-
59
- # Defines methods for appropriate for a local file.
60
- module LocalFile
61
-
62
- # Delete this resource.
63
- def rm
64
- should_exist!("Cannot delete")
65
- FileUtils.rm path
66
- self
67
- end
68
- alias_method :rm!, :rm
69
-
70
- # Return the IO object at this path.
71
- #
72
- # @return [File]
73
- def io
74
- @io ||= open(path, mode)
75
- end
76
-
77
- # Read from this file.
78
- #
79
- # @param [Fixnum] length bytes to read
80
- # @return [String]
81
- def read length=nil
82
- io.read(length)
83
- end
84
-
85
- # Write to this file
86
- #
87
- # @param [String, #to_s] text text to write
88
- # @return [Fixnum] bytes written
89
- def write text
90
- io.write text
91
- end
92
-
93
- # Return the lines in this file.
94
- #
95
- # If passed a block, yield each line of the file to the block.
96
- #
97
- # @yield [String] each line of the file
98
- # @return [Array] the lines in the file
99
- def load &block
100
- if block_given?
101
- io.each do |line|
102
- yield line
103
- end
104
- else
105
- read.split("\n")
106
- end
107
- end
108
-
109
- # Map over the lines in this file.
110
- #
111
- # @yield [String] each line of the file
112
- def map &block
113
- io.map(&block)
114
- end
115
-
116
- # Dump +data+ into this file.
117
- #
118
- # @param [String, Array, #each] data object to dump
119
- # @option options [true, false] :persist (false) Don't close the file after writing
120
- def dump data, options={}
121
- data.each do |element| # works if data is an Array or a String
122
- io.puts(element.to_s)
123
- end
124
- io.close unless options[:persist]
125
- end
126
-
127
- end
128
-
129
-
130
- module LocalDirectory
131
-
132
- # Delete this directory.
133
- #
134
- # @return [IMW::Resource] the deleted directory
135
- def rmdir
136
- FileUtils.rmdir path
137
- self
138
- end
139
-
140
- # Delete this directory recursively.
141
- #
142
- # @return [IMW::Resource] the deleted directory
143
- def rm_rf
144
- FileUtils.rm_rf path
145
- self
146
- end
147
-
148
- # Return a list of paths relative to this directory which match
149
- # the +selector+. Works just like Dir[].
150
- #
151
- # @param [String] selector
152
- # @return [Array] the matched paths
153
- def [] selector='*'
154
- Dir[File.join(path, selector)]
155
- end
156
-
157
- # Return a list of all paths directly within this directory.
158
- #
159
- # @return [Array]
160
- def contents
161
- self['*']
162
- end
163
-
164
- # Does this directory contain +obj+?
165
- #
166
- # @param [String, IMW::Resource] obj
167
- # @return [true, false]
168
- def contains? obj
169
- require 'find'
170
- obj_path = obj.is_a?(String) ? obj : obj.path
171
- Find.find(path) do |sub_path|
172
- return true if sub_path.ends_with?(obj_path)
173
- end
174
- false
175
- end
176
-
177
- # Return all paths within this directory, recursively.
178
- #
179
- # @return [Array<String>]
180
- def all_contents
181
- self['**/*']
182
- end
183
-
184
- # Return all resources within this directory, i.e. - all paths
185
- # converted to IMW::Resource objects.
186
- #
187
- # @return [Array<IMW::Resource>]
188
- def resources
189
- all_contents.map do |path|
190
- IMW.open(path) unless File.directory?(path)
191
- end.compact
192
- end
193
-
194
- end
195
- end
196
- end
197
-
198
-
@@ -1,110 +0,0 @@
1
- module IMW
2
- module Resources
3
-
4
- # Defines methods appropriate for accessing a remote resource, no
5
- # matter what the protocol.
6
- module RemoteObj
7
-
8
- #
9
- # TODO -- self.extended should extend by RemoteDirectory when appropriate
10
- #
11
-
12
- def self.extended obj
13
- obj.extend(RemoteFile)
14
- end
15
-
16
- # Is this resource on a remote host?
17
- #
18
- # @return [true,false]
19
- def is_remote?
20
- true
21
- end
22
-
23
- # The host of this resource.
24
- #
25
- # @return [String]
26
- def host
27
- @host ||= uri.host
28
- end
29
-
30
- # Return the query string part of this resource's URI. Will
31
- # likely be +nil+ for local resources.
32
- #
33
- # @return [String]
34
- def query_string
35
- @query_string ||= uri.query
36
- end
37
-
38
- # Return the fragment part of this resource's URI. Will likely be
39
- # +nil+ for local resources.
40
- #
41
- # @return [String]
42
- def fragment
43
- @fragment ||= uri.fragment
44
- end
45
-
46
- # Return the path part of this resource's URI. Will _not_
47
- # include the +query_string+ or +fragment+.
48
- #
49
- # @return [String]
50
- def path
51
- @path ||= uri.path
52
- end
53
-
54
- end
55
-
56
- module RemoteFile
57
-
58
- # Return the IO object for this remote file.
59
- #
60
- # The mode of this resource is ignored.
61
- #
62
- # @return [StringIO]
63
- def io
64
- require 'open-uri'
65
- @io ||= open(uri.to_s) # ignore mode
66
- end
67
-
68
- # Read the contents of this remote file.
69
- #
70
- # @return [String]
71
- def read
72
- io.read
73
- end
74
-
75
- # Return the lines of this remote file.
76
- #
77
- # If passed a block then yield each line to the block.
78
- #
79
- # @return [Array] the lines of this remote file
80
- # @yield [String] each line of this remote file
81
- def load &block
82
- if block_given?
83
- io.each do |line|
84
- yield line
85
- end
86
- else
87
- read.split("\n")
88
- end
89
- end
90
-
91
- # Map over the lines in this remote file.
92
- #
93
- # @yield [String] each line of the file
94
- def map &block
95
- io.map(&block)
96
- end
97
- end
98
-
99
-
100
- module RemoteDirectory
101
-
102
- #
103
- # TODO -- bloody everything
104
- #
105
-
106
-
107
- end
108
- end
109
- end
110
-