imw 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. data/README.rdoc +34 -14
  2. data/Rakefile +1 -1
  3. data/VERSION +1 -1
  4. data/lib/imw.rb +9 -6
  5. data/lib/imw/{resources/archive.rb → archives.rb} +20 -10
  6. data/lib/imw/archives/rar.rb +19 -0
  7. data/lib/imw/archives/tar.rb +19 -0
  8. data/lib/imw/archives/tarbz2.rb +73 -0
  9. data/lib/imw/archives/targz.rb +73 -0
  10. data/lib/imw/archives/zip.rb +51 -0
  11. data/lib/imw/{resources/compressed_file.rb → compressed_files.rb} +16 -11
  12. data/lib/imw/compressed_files/bz2.rb +16 -0
  13. data/lib/imw/{resources → compressed_files}/compressible.rb +2 -4
  14. data/lib/imw/compressed_files/gz.rb +16 -0
  15. data/lib/imw/formats.rb +31 -0
  16. data/lib/imw/formats/delimited.rb +90 -0
  17. data/lib/imw/formats/excel.rb +125 -0
  18. data/lib/imw/formats/json.rb +51 -0
  19. data/lib/imw/formats/sgml.rb +69 -0
  20. data/lib/imw/formats/yaml.rb +51 -0
  21. data/lib/imw/resource.rb +108 -10
  22. data/lib/imw/schemes.rb +21 -0
  23. data/lib/imw/schemes/hdfs.rb +240 -0
  24. data/lib/imw/schemes/http.rb +166 -0
  25. data/lib/imw/schemes/local.rb +219 -0
  26. data/lib/imw/schemes/remote.rb +114 -0
  27. data/lib/imw/schemes/s3.rb +135 -0
  28. data/lib/imw/tools.rb +8 -0
  29. data/lib/imw/{transforms → tools}/archiver.rb +1 -1
  30. data/lib/imw/{transforms → tools}/transferer.rb +10 -10
  31. data/spec/imw/{resources/archive_spec.rb → archive_spec.rb} +3 -3
  32. data/spec/imw/{resources/archives_and_compressed → archives}/rar_spec.rb +2 -2
  33. data/spec/imw/{resources/archives_and_compressed → archives}/tar_spec.rb +2 -2
  34. data/spec/imw/{resources/archives_and_compressed → archives}/tarbz2_spec.rb +4 -4
  35. data/spec/imw/{resources/archives_and_compressed → archives}/targz_spec.rb +4 -4
  36. data/spec/imw/{resources/archives_and_compressed → archives}/zip_spec.rb +2 -2
  37. data/spec/imw/compressed_files/bz2_spec.rb +15 -0
  38. data/spec/imw/{resources → compressed_files}/compressible_spec.rb +1 -1
  39. data/spec/imw/compressed_files/gz_spec.rb +15 -0
  40. data/spec/imw/{resources/compressed_file_spec.rb → compressed_files_spec.rb} +3 -3
  41. data/spec/imw/{resources/formats → formats}/delimited_spec.rb +2 -2
  42. data/spec/imw/{resources/formats → formats}/json_spec.rb +2 -2
  43. data/spec/imw/{resources/formats → formats}/sgml_spec.rb +2 -2
  44. data/spec/imw/{resources/formats → formats}/yaml_spec.rb +2 -2
  45. data/spec/imw/resource_spec.rb +4 -4
  46. data/spec/imw/{resources/schemes → schemes}/hdfs_spec.rb +7 -7
  47. data/spec/imw/{resources/schemes → schemes}/http_spec.rb +2 -2
  48. data/spec/imw/{resources → schemes}/local_spec.rb +5 -5
  49. data/spec/imw/{resources → schemes}/remote_spec.rb +7 -3
  50. data/spec/imw/{resources/schemes → schemes}/s3_spec.rb +2 -2
  51. data/spec/imw/{transforms → tools}/archiver_spec.rb +2 -2
  52. data/spec/imw/tools/transferer_spec.rb +113 -0
  53. metadata +69 -71
  54. data/lib/imw/resources.rb +0 -118
  55. data/lib/imw/resources/archives_and_compressed.rb +0 -32
  56. data/lib/imw/resources/archives_and_compressed/bz2.rb +0 -18
  57. data/lib/imw/resources/archives_and_compressed/gz.rb +0 -18
  58. data/lib/imw/resources/archives_and_compressed/rar.rb +0 -23
  59. data/lib/imw/resources/archives_and_compressed/tar.rb +0 -23
  60. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +0 -78
  61. data/lib/imw/resources/archives_and_compressed/targz.rb +0 -78
  62. data/lib/imw/resources/archives_and_compressed/zip.rb +0 -57
  63. data/lib/imw/resources/formats.rb +0 -32
  64. data/lib/imw/resources/formats/delimited.rb +0 -92
  65. data/lib/imw/resources/formats/excel.rb +0 -125
  66. data/lib/imw/resources/formats/json.rb +0 -53
  67. data/lib/imw/resources/formats/sgml.rb +0 -72
  68. data/lib/imw/resources/formats/yaml.rb +0 -53
  69. data/lib/imw/resources/local.rb +0 -198
  70. data/lib/imw/resources/remote.rb +0 -110
  71. data/lib/imw/resources/schemes.rb +0 -19
  72. data/lib/imw/resources/schemes/hdfs.rb +0 -242
  73. data/lib/imw/resources/schemes/http.rb +0 -161
  74. data/lib/imw/resources/schemes/s3.rb +0 -137
  75. data/lib/imw/transforms.rb +0 -8
  76. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +0 -15
  77. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +0 -15
  78. data/spec/imw/transforms/transferer_spec.rb +0 -113
@@ -1,53 +0,0 @@
1
- module IMW
2
- module Resources
3
- module Formats
4
-
5
- # Defines methods for reading and writing JSON data.
6
- module Json
7
-
8
- # Return the content of this resource.
9
- #
10
- # Will try to be smart about iterating over the data when
11
- # passed a block.
12
- #
13
- # - if the outermost JSON data structure is an array, then
14
- # yield each element
15
- #
16
- # - if the outermost JSON data structure is a mapping, then
17
- # yield each key, value pair
18
- #
19
- # - otherwise just yield the structure
20
- #
21
- # @return [Hash, Array, String, Fixnum] whatever the JSON contained
22
- def load &block
23
- require 'json'
24
- json = JSON.parse(read)
25
- if block_given?
26
- case json
27
- when Array
28
- json.each { |obj| yield obj }
29
- when Hash
30
- json.each_pair { |key, value| yield key, value }
31
- else
32
- yield json
33
- end
34
- else
35
- json
36
- end
37
- end
38
-
39
- # Dump the +data+ into this resource. It must be opened for
40
- # writing.
41
- #
42
- # @param [Hash, String, Array, Fixnum] data the Ruby object to dump
43
- # @option options [true, false] :persist (false) Don't close the IO object after writing
44
- def dump data, options={}
45
- require 'json'
46
- write(data.to_json)
47
- io.close unless options[:persist]
48
- self
49
- end
50
- end
51
- end
52
- end
53
- end
@@ -1,72 +0,0 @@
1
- module IMW
2
- module Resources
3
- module Formats
4
-
5
- # Defines methods to parse SGML-derived data formats (XML, HTML,
6
- # &c.). This module isn't directly used to extend resources.
7
- # Instead, more specific modules (e.g. -
8
- # IMW::Resources::Formats::Xml) are used.
9
- module Sgml
10
-
11
- # Parse this resource using Hpricot and return (or yield if
12
- # given a block) the resulting Hpricot::Doc.
13
- #
14
- # @return [Hpricot::Doc]
15
- # @yield [Hpricot::Doc]
16
- def load &block
17
- require 'hpricot'
18
- sgml = Hpricot(io)
19
- if block_given?
20
- yield sgml
21
- else
22
- sgml
23
- end
24
- end
25
-
26
- # Parse the Hpricot::Doc of this resource with the given
27
- # +parser+.
28
- #
29
- # The parser can either be an IMW::Parsers::HtmlParser or a
30
- # hash which will be used to build such a parser. See the
31
- # documentation for IMW::Parsers::HtmlParser for more
32
- # information.
33
- #
34
- # @param [Hash, IMW::Parsers::HtmlParser] parser
35
- # @return [Hash] the parser's output
36
- def parse parser
37
- if parser.is_a?(IMW::Parsers::HtmlParser)
38
- parser.parse(load)
39
- else
40
- IMW::Parsers::HtmlParser.new(parser).parse(load)
41
- end
42
- end
43
- end
44
-
45
- # Defines methods for XML data.
46
- module Xml
47
- include Sgml
48
- end
49
-
50
- # Defines methods for XSL data.
51
- module Xsl
52
- include Sgml
53
- end
54
-
55
- # Defines methods for XHTML data.
56
- module Xhtml
57
- include Sgml
58
- end
59
-
60
- # Defines methods for HTML data.
61
- module Html
62
- include Sgml
63
- end
64
-
65
- # Defines methods for RDF data.
66
- module Rdf
67
- include Sgml
68
- end
69
- end
70
- end
71
- end
72
-
@@ -1,53 +0,0 @@
1
- module IMW
2
- module Resources
3
- module Formats
4
-
5
- # Provides methods for reading and writing YAML data.
6
- module Yaml
7
-
8
- # Return the content of this resource.
9
- #
10
- # Will try to be smart about iterating over the data when
11
- # passed a block.
12
- #
13
- # - if the outermost YAML data structure is an array, then
14
- # yield each element
15
- #
16
- # - if the outermost YAML data structure is a mapping, then
17
- # yield each key, value pair
18
- #
19
- # - otherwise just yield the structure
20
- #
21
- # @return [Hash, Array, String, Fixnum] whatever the YAML contained
22
- def load &block
23
- require 'yaml'
24
- yaml = YAML.load(read)
25
- if block_given?
26
- case yaml
27
- when Array
28
- yaml.each { |obj| yield obj }
29
- when Hash
30
- yaml.each_pair { |key, value| yield key, value }
31
- else
32
- yield yaml
33
- end
34
- else
35
- yaml
36
- end
37
- end
38
-
39
- # Dump the +data+ into this resource. It must be opened for
40
- # writing.
41
- #
42
- # @param [Hash, String, Array, Fixnum] data the Ruby object to dump
43
- # @option options [true, false] :persist (false) Don't close the IO object after writing
44
- def dump data, options={}
45
- require 'yaml'
46
- write(data.to_yaml)
47
- io.close unless options[:persist]
48
- self
49
- end
50
- end
51
- end
52
- end
53
- end
@@ -1,198 +0,0 @@
1
- module IMW
2
- module Resources
3
-
4
- # Defines methods appropriate for any file (or directory) on the
5
- # local machine. Includes methods from the File class like
6
- # File#exist?, File#size, &c.
7
- #
8
- # When extending with this module, it will automatically also
9
- # extend with either IMW::Resources::LocalDirectory or
10
- # IMW::Resources::LocalFile, as appropriate.
11
- module LocalObj
12
-
13
- def self.extended obj
14
- # also extend with file or directory as appropriate
15
- obj.extend(obj.directory? ? LocalDirectory : LocalFile)
16
- end
17
-
18
- # Steal a bunch of class methods from File which only take a
19
- # path as a first argument.
20
- [:executable?, :executable_real?, :exist?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
21
- define_method class_method do
22
- File.send(class_method, path)
23
- end
24
- end
25
- alias_method :exists?, :exist?
26
-
27
- # Return the path to this local object.
28
- #
29
- # @return [String]
30
- def path
31
- @path ||= File.expand_path(@encoded_uri ? Addressable::URI.decode(uri.to_s) : uri.to_s)
32
- end
33
-
34
- # Is this file on the local machine?
35
- #
36
- # @return [true, false]
37
- def is_local?
38
- true
39
- end
40
-
41
- # Copy this resource to the +new_uri+.
42
- #
43
- # @param [String, IMW::Resource] new_uri
44
- # @return [IMW::Resource] the new resource
45
- def cp new_uri
46
- IMW::Transforms::Transferer.new(:cp, self, new_uri).transfer!
47
- end
48
-
49
- # Move this resource to the +new_uri+.
50
- #
51
- # @param [String, IMW::Resource] new_uri
52
- # @return [IMW::Resource] the new resource
53
- def mv new_uri
54
- IMW::Transforms::Transferer.new(:mv, self, new_uri).transfer!
55
- end
56
-
57
- end
58
-
59
- # Defines methods for appropriate for a local file.
60
- module LocalFile
61
-
62
- # Delete this resource.
63
- def rm
64
- should_exist!("Cannot delete")
65
- FileUtils.rm path
66
- self
67
- end
68
- alias_method :rm!, :rm
69
-
70
- # Return the IO object at this path.
71
- #
72
- # @return [File]
73
- def io
74
- @io ||= open(path, mode)
75
- end
76
-
77
- # Read from this file.
78
- #
79
- # @param [Fixnum] length bytes to read
80
- # @return [String]
81
- def read length=nil
82
- io.read(length)
83
- end
84
-
85
- # Write to this file
86
- #
87
- # @param [String, #to_s] text text to write
88
- # @return [Fixnum] bytes written
89
- def write text
90
- io.write text
91
- end
92
-
93
- # Return the lines in this file.
94
- #
95
- # If passed a block, yield each line of the file to the block.
96
- #
97
- # @yield [String] each line of the file
98
- # @return [Array] the lines in the file
99
- def load &block
100
- if block_given?
101
- io.each do |line|
102
- yield line
103
- end
104
- else
105
- read.split("\n")
106
- end
107
- end
108
-
109
- # Map over the lines in this file.
110
- #
111
- # @yield [String] each line of the file
112
- def map &block
113
- io.map(&block)
114
- end
115
-
116
- # Dump +data+ into this file.
117
- #
118
- # @param [String, Array, #each] data object to dump
119
- # @option options [true, false] :persist (false) Don't close the file after writing
120
- def dump data, options={}
121
- data.each do |element| # works if data is an Array or a String
122
- io.puts(element.to_s)
123
- end
124
- io.close unless options[:persist]
125
- end
126
-
127
- end
128
-
129
-
130
- module LocalDirectory
131
-
132
- # Delete this directory.
133
- #
134
- # @return [IMW::Resource] the deleted directory
135
- def rmdir
136
- FileUtils.rmdir path
137
- self
138
- end
139
-
140
- # Delete this directory recursively.
141
- #
142
- # @return [IMW::Resource] the deleted directory
143
- def rm_rf
144
- FileUtils.rm_rf path
145
- self
146
- end
147
-
148
- # Return a list of paths relative to this directory which match
149
- # the +selector+. Works just like Dir[].
150
- #
151
- # @param [String] selector
152
- # @return [Array] the matched paths
153
- def [] selector='*'
154
- Dir[File.join(path, selector)]
155
- end
156
-
157
- # Return a list of all paths directly within this directory.
158
- #
159
- # @return [Array]
160
- def contents
161
- self['*']
162
- end
163
-
164
- # Does this directory contain +obj+?
165
- #
166
- # @param [String, IMW::Resource] obj
167
- # @return [true, false]
168
- def contains? obj
169
- require 'find'
170
- obj_path = obj.is_a?(String) ? obj : obj.path
171
- Find.find(path) do |sub_path|
172
- return true if sub_path.ends_with?(obj_path)
173
- end
174
- false
175
- end
176
-
177
- # Return all paths within this directory, recursively.
178
- #
179
- # @return [Array<String>]
180
- def all_contents
181
- self['**/*']
182
- end
183
-
184
- # Return all resources within this directory, i.e. - all paths
185
- # converted to IMW::Resource objects.
186
- #
187
- # @return [Array<IMW::Resource>]
188
- def resources
189
- all_contents.map do |path|
190
- IMW.open(path) unless File.directory?(path)
191
- end.compact
192
- end
193
-
194
- end
195
- end
196
- end
197
-
198
-
@@ -1,110 +0,0 @@
1
- module IMW
2
- module Resources
3
-
4
- # Defines methods appropriate for accessing a remote resource, no
5
- # matter what the protocol.
6
- module RemoteObj
7
-
8
- #
9
- # TODO -- self.extended should extend by RemoteDirectory when appropriate
10
- #
11
-
12
- def self.extended obj
13
- obj.extend(RemoteFile)
14
- end
15
-
16
- # Is this resource on a remote host?
17
- #
18
- # @return [true,false]
19
- def is_remote?
20
- true
21
- end
22
-
23
- # The host of this resource.
24
- #
25
- # @return [String]
26
- def host
27
- @host ||= uri.host
28
- end
29
-
30
- # Return the query string part of this resource's URI. Will
31
- # likely be +nil+ for local resources.
32
- #
33
- # @return [String]
34
- def query_string
35
- @query_string ||= uri.query
36
- end
37
-
38
- # Return the fragment part of this resource's URI. Will likely be
39
- # +nil+ for local resources.
40
- #
41
- # @return [String]
42
- def fragment
43
- @fragment ||= uri.fragment
44
- end
45
-
46
- # Return the path part of this resource's URI. Will _not_
47
- # include the +query_string+ or +fragment+.
48
- #
49
- # @return [String]
50
- def path
51
- @path ||= uri.path
52
- end
53
-
54
- end
55
-
56
- module RemoteFile
57
-
58
- # Return the IO object for this remote file.
59
- #
60
- # The mode of this resource is ignored.
61
- #
62
- # @return [StringIO]
63
- def io
64
- require 'open-uri'
65
- @io ||= open(uri.to_s) # ignore mode
66
- end
67
-
68
- # Read the contents of this remote file.
69
- #
70
- # @return [String]
71
- def read
72
- io.read
73
- end
74
-
75
- # Return the lines of this remote file.
76
- #
77
- # If passed a block then yield each line to the block.
78
- #
79
- # @return [Array] the lines of this remote file
80
- # @yield [String] each line of this remote file
81
- def load &block
82
- if block_given?
83
- io.each do |line|
84
- yield line
85
- end
86
- else
87
- read.split("\n")
88
- end
89
- end
90
-
91
- # Map over the lines in this remote file.
92
- #
93
- # @yield [String] each line of the file
94
- def map &block
95
- io.map(&block)
96
- end
97
- end
98
-
99
-
100
- module RemoteDirectory
101
-
102
- #
103
- # TODO -- bloody everything
104
- #
105
-
106
-
107
- end
108
- end
109
- end
110
-