imw 0.2.18 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. data/Gemfile +7 -26
  2. data/Gemfile.lock +13 -38
  3. data/{LICENSE → LICENSE.txt} +1 -1
  4. data/README.textile +35 -0
  5. data/Rakefile +45 -22
  6. data/VERSION +1 -1
  7. data/examples/foo.rb +19 -0
  8. data/examples/html_selector.rb +22 -0
  9. data/examples/nes_game_list.csv +625 -0
  10. data/examples/nes_gamespot.csv +1371 -0
  11. data/examples/nes_nintendo.csv +624 -0
  12. data/examples/nes_unlicensed.csv +89 -0
  13. data/examples/nes_wikipedia.csv +710 -0
  14. data/examples/nibbler_test.rb +24 -0
  15. data/examples/script.rb +19 -0
  16. data/lib/imw.rb +28 -140
  17. data/lib/imw/error.rb +9 -0
  18. data/lib/imw/recordizer.rb +8 -0
  19. data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
  20. data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
  21. data/lib/imw/resource.rb +3 -119
  22. data/lib/imw/serializer.rb +7 -0
  23. data/lib/imw/serializer/json_serializer.rb +17 -0
  24. data/lib/imw/uri.rb +41 -0
  25. data/spec/resource_spec.rb +78 -0
  26. data/spec/uri_spec.rb +55 -0
  27. metadata +81 -232
  28. data/README.rdoc +0 -371
  29. data/bin/imw +0 -5
  30. data/bin/tsv_to_json.rb +0 -29
  31. data/etc/imwrc.rb +0 -26
  32. data/examples/dataset.rb +0 -12
  33. data/examples/metadata.yml +0 -10
  34. data/lib/imw/archives.rb +0 -120
  35. data/lib/imw/archives/rar.rb +0 -19
  36. data/lib/imw/archives/tar.rb +0 -19
  37. data/lib/imw/archives/tarbz2.rb +0 -73
  38. data/lib/imw/archives/targz.rb +0 -73
  39. data/lib/imw/archives/zip.rb +0 -51
  40. data/lib/imw/boot.rb +0 -87
  41. data/lib/imw/compressed_files.rb +0 -94
  42. data/lib/imw/compressed_files/bz2.rb +0 -16
  43. data/lib/imw/compressed_files/compressible.rb +0 -75
  44. data/lib/imw/compressed_files/gz.rb +0 -16
  45. data/lib/imw/dataset.rb +0 -125
  46. data/lib/imw/dataset/paths.rb +0 -29
  47. data/lib/imw/dataset/workflow.rb +0 -195
  48. data/lib/imw/formats.rb +0 -33
  49. data/lib/imw/formats/delimited.rb +0 -170
  50. data/lib/imw/formats/excel.rb +0 -100
  51. data/lib/imw/formats/json.rb +0 -41
  52. data/lib/imw/formats/pdf.rb +0 -71
  53. data/lib/imw/formats/sgml.rb +0 -69
  54. data/lib/imw/formats/yaml.rb +0 -41
  55. data/lib/imw/metadata.rb +0 -83
  56. data/lib/imw/metadata/contains_metadata.rb +0 -54
  57. data/lib/imw/metadata/dsl.rb +0 -111
  58. data/lib/imw/metadata/field.rb +0 -37
  59. data/lib/imw/metadata/has_metadata.rb +0 -98
  60. data/lib/imw/metadata/has_summary.rb +0 -57
  61. data/lib/imw/metadata/schema.rb +0 -17
  62. data/lib/imw/parsers.rb +0 -8
  63. data/lib/imw/parsers/flat.rb +0 -44
  64. data/lib/imw/parsers/html_parser.rb +0 -387
  65. data/lib/imw/parsers/html_parser/matchers.rb +0 -289
  66. data/lib/imw/parsers/line_parser.rb +0 -87
  67. data/lib/imw/parsers/regexp_parser.rb +0 -72
  68. data/lib/imw/repository.rb +0 -12
  69. data/lib/imw/runner.rb +0 -118
  70. data/lib/imw/schemes.rb +0 -23
  71. data/lib/imw/schemes/ftp.rb +0 -142
  72. data/lib/imw/schemes/hdfs.rb +0 -251
  73. data/lib/imw/schemes/http.rb +0 -165
  74. data/lib/imw/schemes/local.rb +0 -409
  75. data/lib/imw/schemes/remote.rb +0 -119
  76. data/lib/imw/schemes/s3.rb +0 -143
  77. data/lib/imw/schemes/sql.rb +0 -129
  78. data/lib/imw/tools.rb +0 -12
  79. data/lib/imw/tools/aggregator.rb +0 -148
  80. data/lib/imw/tools/archiver.rb +0 -220
  81. data/lib/imw/tools/downloader.rb +0 -63
  82. data/lib/imw/tools/extension_analyzer.rb +0 -114
  83. data/lib/imw/tools/summarizer.rb +0 -83
  84. data/lib/imw/tools/transferer.rb +0 -167
  85. data/lib/imw/utils.rb +0 -74
  86. data/lib/imw/utils/dynamically_extendable.rb +0 -137
  87. data/lib/imw/utils/error.rb +0 -59
  88. data/lib/imw/utils/extensions/hpricot.rb +0 -34
  89. data/lib/imw/utils/has_uri.rb +0 -131
  90. data/lib/imw/utils/log.rb +0 -92
  91. data/lib/imw/utils/misc.rb +0 -57
  92. data/lib/imw/utils/paths.rb +0 -146
  93. data/lib/imw/utils/uri.rb +0 -59
  94. data/lib/imw/utils/uuid.rb +0 -33
  95. data/lib/imw/utils/validate.rb +0 -38
  96. data/lib/imw/utils/version.rb +0 -11
  97. data/spec/data/formats/delimited/sample.csv +0 -131
  98. data/spec/data/formats/delimited/sample.tsv +0 -131
  99. data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
  100. data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
  101. data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
  102. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
  103. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
  104. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
  105. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
  106. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
  107. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
  108. data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
  109. data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
  110. data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
  111. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
  112. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
  113. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
  114. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
  115. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
  116. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
  117. data/spec/data/formats/excel/sample.xls +0 -0
  118. data/spec/data/formats/json/sample.json +0 -1
  119. data/spec/data/formats/none/sample +0 -650
  120. data/spec/data/formats/sgml/sample.xml +0 -617
  121. data/spec/data/formats/text/sample.txt +0 -650
  122. data/spec/data/formats/yaml/sample.yaml +0 -410
  123. data/spec/data/schema-tabular.yaml +0 -11
  124. data/spec/imw/archives/rar_spec.rb +0 -16
  125. data/spec/imw/archives/tar_spec.rb +0 -16
  126. data/spec/imw/archives/tarbz2_spec.rb +0 -24
  127. data/spec/imw/archives/targz_spec.rb +0 -21
  128. data/spec/imw/archives/zip_spec.rb +0 -16
  129. data/spec/imw/archives_spec.rb +0 -77
  130. data/spec/imw/compressed_files/bz2_spec.rb +0 -15
  131. data/spec/imw/compressed_files/compressible_spec.rb +0 -36
  132. data/spec/imw/compressed_files/gz_spec.rb +0 -15
  133. data/spec/imw/compressed_files_spec.rb +0 -47
  134. data/spec/imw/dataset/paths_spec.rb +0 -32
  135. data/spec/imw/dataset/workflow_spec.rb +0 -41
  136. data/spec/imw/formats/delimited_spec.rb +0 -44
  137. data/spec/imw/formats/excel_spec.rb +0 -55
  138. data/spec/imw/formats/json_spec.rb +0 -18
  139. data/spec/imw/formats/sgml_spec.rb +0 -24
  140. data/spec/imw/formats/yaml_spec.rb +0 -19
  141. data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
  142. data/spec/imw/metadata/field_spec.rb +0 -25
  143. data/spec/imw/metadata/has_metadata_spec.rb +0 -58
  144. data/spec/imw/metadata/has_summary_spec.rb +0 -32
  145. data/spec/imw/metadata/schema_spec.rb +0 -24
  146. data/spec/imw/metadata_spec.rb +0 -86
  147. data/spec/imw/parsers/line_parser_spec.rb +0 -96
  148. data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
  149. data/spec/imw/resource_spec.rb +0 -32
  150. data/spec/imw/schemes/hdfs_spec.rb +0 -67
  151. data/spec/imw/schemes/http_spec.rb +0 -19
  152. data/spec/imw/schemes/local_spec.rb +0 -165
  153. data/spec/imw/schemes/remote_spec.rb +0 -38
  154. data/spec/imw/schemes/s3_spec.rb +0 -31
  155. data/spec/imw/schemes/sql_spec.rb +0 -3
  156. data/spec/imw/tools/aggregator_spec.rb +0 -71
  157. data/spec/imw/tools/archiver_spec.rb +0 -120
  158. data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
  159. data/spec/imw/tools/summarizer_spec.rb +0 -8
  160. data/spec/imw/tools/transferer_spec.rb +0 -195
  161. data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
  162. data/spec/imw/utils/has_uri_spec.rb +0 -61
  163. data/spec/imw/utils/paths_spec.rb +0 -10
  164. data/spec/imw/utils/shared_paths_spec.rb +0 -29
  165. data/spec/imw_spec.rb +0 -14
  166. data/spec/rcov.opts +0 -1
  167. data/spec/spec_helper.rb +0 -31
  168. data/spec/support/custom_matchers.rb +0 -28
  169. data/spec/support/file_contents_matcher.rb +0 -30
  170. data/spec/support/paths_matcher.rb +0 -66
  171. data/spec/support/random.rb +0 -213
  172. data/spec/support/without_regard_to_order_matcher.rb +0 -41
@@ -1,165 +0,0 @@
1
- module IMW
2
- module Schemes
3
-
4
- # Defines methods for accessing a resource over HTTP. Uses
5
- # RestClient to implement the basic HTTP verbs (GET, POST, PUT,
6
- # DELETE, HEAD).
7
- module HTTP
8
-
9
- # Many websites have HTML content without an <tt>.html</tt>
10
- # extension so automatically extend +obj+ with
11
- # IMW::Resources::Formats::HTML in this case.
12
- def self.extended obj
13
- obj.extend(IMW::Formats::Html) if obj.extension.blank?
14
- end
15
-
16
- # Is this resource being accessed via HTTP?
17
- #
18
- # @return [true, false]
19
- def via_http?
20
- true
21
- end
22
-
23
- # Copy this resource to the +new_uri+.
24
- #
25
- # @param [String, IMW::Resource] new_uri
26
- # @return [IMW::Resource] the new resource
27
- def cp new_uri
28
- IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
29
- end
30
-
31
- # Return the basename of the URI or <tt>_index</tt> if it's
32
- # blank, as in the case of <tt>http://www.google.com</tt>.
33
- #
34
- # @return [String]
35
- def effective_basename
36
- (basename.blank? || basename =~ %r{^/*$}) ? "_index" : basename
37
- end
38
-
39
- # Send a GET request to this resource's URI.
40
- #
41
- # If the response doesn't have HTTP code 2xx, a RestClient
42
- # error will be raised.
43
- #
44
- # If a block is given then the response will be passed to the
45
- # block, even in case of a non-2xx code.
46
- #
47
- # See the documentation for
48
- # RestClient[http://rdoc.info/projects/archiloque/rest-client]
49
- # for more information.
50
- #
51
- # @param [Hash] headers the headers to include in the request
52
- # @yield [RestClient::Response] the response from the server
53
- # @return [RestClient::Response] the response from the server
54
- # @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
55
- def get headers={}, &block
56
- make_restclient_request do
57
- RestClient.get(uri.to_s, headers, &block)
58
- end
59
- end
60
-
61
- # Send a POST request to this resource's URI with data
62
- # +payload+.
63
- #
64
- # If the response doesn't have HTTP code 2xx, a RestClient
65
- # error will be raised.
66
- #
67
- # If a block is given then the response will be passed to the
68
- # block, even in case of a non-2xx code.
69
- #
70
- # See the documentation for
71
- # RestClient[http://rdoc.info/projects/archiloque/rest-client]
72
- # for more information.
73
- #
74
- # @param [Hash, String] payload the data to send
75
- # @param [Hash] headers the headers to include in the request
76
- # @yield [RestClient::Response] the response from the server
77
- # @return [RestClient::Response] the response from the server
78
- # @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
79
- def post payload, headers={}, &block
80
- make_restclient_request do
81
- RestClient.post(uri.to_s, payload, headers, &block)
82
- end
83
- end
84
-
85
- # Send a PUT request to this resource's URI with data
86
- # +payload+.
87
- #
88
- # If the response doesn't have HTTP code 2xx, a RestClient
89
- # error will be raised.
90
- #
91
- # If a block is given then the response will be passed to the
92
- # block, even in case of a non-2xx code.
93
- #
94
- # See the documentation for
95
- # RestClient[http://rdoc.info/projects/archiloque/rest-client]
96
- # for more information.
97
- #
98
- # @param [Hash, String] payload the data to send
99
- # @param [Hash] headers the headers to include in the request
100
- # @yield [RestClient::Response] the response from the server
101
- # @return [RestClient::Response] the response from the server
102
- # @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
103
- def put payload, headers={}, &block
104
- make_restclient_request do
105
- RestClient.put(uri.to_s, payload, headers, &block)
106
- end
107
- end
108
-
109
- # Send a DELETE request to this resource's URI.
110
- #
111
- # If the response doesn't have HTTP code 2xx, a RestClient
112
- # error will be raised.
113
- #
114
- # If a block is given then the response will be passed to the
115
- # block, even in case of a non-2xx code.
116
- #
117
- # See the documentation for
118
- # RestClient[http://rdoc.info/projects/archiloque/rest-client]
119
- # for more information.
120
- #
121
- # @param [Hash] headers the headers to include in the request
122
- # @yield [RestClient::Response] the response from the server
123
- # @return [RestClient::Response] the response from the server
124
- # @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
125
- def delete headers={}, &block
126
- make_restclient_request do
127
- RestClient.delete(uri.to_s, headers, &block)
128
- end
129
- end
130
-
131
- # Send a HEAD request to this resource's URI.
132
- #
133
- # If the response doesn't have HTTP code 2xx, a RestClient
134
- # error will be raised.
135
- #
136
- # If a block is given then the response will be passed to the
137
- # block, even in case of a non-2xx code.
138
- #
139
- # See the documentation for
140
- # RestClient[http://rdoc.info/projects/archiloque/rest-client]
141
- # for more information.
142
- #
143
- # @param [Hash] headers the headers to include in the request
144
- # @yield [RestClient::Response] the response from the server
145
- # @return [RestClient::Response] the response from the server
146
- # @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
147
- def head headers={}, &block
148
- make_restclient_request do
149
- RestClient.head(uri.to_s, headers, &block)
150
- end
151
- end
152
-
153
- protected
154
- def make_restclient_request &block # :nodoc
155
- require 'restclient'
156
- begin
157
- yield
158
- rescue RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed => e
159
- raise IMW::NetworkError.new("#{e.class} -- #{e.message}")
160
- end
161
- end
162
- end
163
- end
164
- end
165
-
@@ -1,409 +0,0 @@
1
- module IMW
2
- module Schemes
3
- module Local
4
-
5
- # Defines methods appropriate for any file (or directory) on the
6
- # local machine. Includes methods from the File class like
7
- # File#exist?, File#size, &c.
8
- #
9
- # When extending with this module, it will automatically also
10
- # extend with either IMW::Schemes::Local::LocalDirectory or
11
- # IMW::Schemes::Local::LocalFile, as appropriate.
12
- module Base
13
-
14
- def self.extended obj
15
- # also extend with file or directory as appropriate
16
- if obj.directory?
17
- obj.extend(LocalDirectory)
18
- else
19
- obj.extend(LocalFile)
20
- end
21
- end
22
-
23
- # Steal a bunch of class methods from File which only take a
24
- # path as a first argument.
25
- [:executable?, :executable_real?, :exist?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
26
- define_method class_method do
27
- File.send(class_method, path)
28
- end
29
- end
30
- alias_method :exists?, :exist?
31
-
32
- # Return the path to this local object.
33
- #
34
- # @return [String]
35
- def path
36
- @path ||= File.expand_path(@encoded_uri ? Addressable::URI.decode(uri.to_s) : uri.to_s)
37
- end
38
-
39
- # Is this file on the local machine?
40
- #
41
- # @return [true, false]
42
- def is_local?
43
- true
44
- end
45
-
46
- # Copy this resource to the +new_uri+.
47
- #
48
- # @param [String, IMW::Resource] new_uri
49
- # @return [IMW::Resource] the new resource
50
- def cp new_uri
51
- IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
52
- end
53
-
54
- # Move this resource to the +new_uri+.
55
- #
56
- # @param [String, IMW::Resource] new_uri
57
- # @return [IMW::Resource] the new resource
58
- def mv new_uri
59
- IMW::Tools::Transferer.new(:mv, self, new_uri).transfer!
60
- end
61
-
62
- # Return the directory of this resource.
63
- #
64
- # @return [IMW::Resource]
65
- def dir
66
- IMW.open(dirname)
67
- end
68
-
69
- end
70
-
71
- # Defines methods for appropriate for a local file.
72
- module LocalFile
73
-
74
- include IMW::Metadata::HasMetadata
75
-
76
- # Is this resource a regular file?
77
- #
78
- # @return [true, false]
79
- def is_file?
80
- true
81
- end
82
-
83
- # Delete this resource.
84
- def rm
85
- should_exist!("Cannot delete")
86
- FileUtils.rm path
87
- self
88
- end
89
- alias_method :rm!, :rm
90
-
91
- # Return the IO object at this path.
92
- #
93
- # @return [File]
94
- def io
95
- @io ||= open(path, mode)
96
- end
97
-
98
- # Close this resource's file handle if it exists.
99
- def close
100
- # explicitly check the @io instance variable b/c self.io
101
- # will open up a new handle by default
102
- io.close if @io
103
- super()
104
- end
105
-
106
- # Read from this file.
107
- #
108
- # @param [Fixnum] length bytes to read
109
- # @return [String]
110
- def read length=nil
111
- io.read(length)
112
- end
113
-
114
- # Read a line from this file.
115
- #
116
- # @return [String]
117
- def readline
118
- io.readline
119
- end
120
-
121
- # Write to this file
122
- #
123
- # @param [String, #to_s] text text to write
124
- # @return [Fixnum] bytes written
125
- def write text
126
- io.write text
127
- end
128
-
129
- # Write the text with a trailing newline to this resource.
130
- #
131
- # @param [String, #to_s] text
132
- def << text
133
- io.write text.to_s + "\n"
134
- end
135
-
136
- # Return the lines in this file.
137
- #
138
- # If passed a block, yield each line of the file to the block.
139
- #
140
- # @yield [String] each line of the file
141
- # @return [Array] the lines in the file
142
- def load &block
143
- if block_given?
144
- io.each do |line|
145
- yield line
146
- end
147
- else
148
- read.split("\n")
149
- end
150
- end
151
-
152
- # Map over the lines in this file.
153
- #
154
- # @yield [String] each line of the file
155
- def map &block
156
- io.map(&block)
157
- end
158
-
159
- # Emit +data+ into this file.
160
- #
161
- # @param [String, Array, #each] data object to emit
162
- def emit data, options={}
163
- data.each do |element| # works if data is an Array or a String
164
- io << (element.to_s)
165
- end
166
- end
167
-
168
- # Return a snippet of text from this resource.
169
- #
170
- # Will read the first 1024 bytes and strip non-ASCII
171
- # characters from them. For more control, redefine this
172
- # method in another module.
173
- #
174
- # @return [String]
175
- def snippet
176
- [].tap do |snip|
177
- (io.read(1024) || '').bytes.each do |byte|
178
- # CR LF SPACE ~
179
- snip << byte.chr if byte == 13 || byte == 10 || byte >= 32 && byte <= 126
180
- end
181
- end.join
182
- end
183
-
184
- # Return the number of lines in this file.
185
- #
186
- # @return [Integer]
187
- def num_lines
188
- wc[0]
189
- end
190
-
191
- # Return the number of words in this file.
192
- #
193
- # @return [Integer]
194
- def num_words
195
- wc[1]
196
- end
197
-
198
- # Return the number of characters in this file.
199
- #
200
- # @return [Integer]
201
- def num_chars
202
- wc[2]
203
- end
204
-
205
- # Return a summary of properties of this local file.
206
- #
207
- # Returned properties include
208
- # - basename
209
- # - size
210
- # - extension
211
- # - num_lines
212
- def external_summary
213
- super().merge({
214
- :size => size,
215
- :num_lines => num_lines
216
- })
217
- end
218
-
219
-
220
-
221
-
222
- protected
223
-
224
- # Return a triple of line, word, and character counts for this
225
- # resource.
226
- #
227
- # Relies on the Unix utility +wc+.
228
- #
229
- # @return [Array<Integer>]
230
- def wc
231
- @wc ||= begin
232
- `wc #{path}`.chomp.strip.split.map(&:to_i)
233
- rescue
234
- [nil,nil,nil] # FIXME
235
- end
236
- end
237
-
238
- end
239
-
240
- # Defines methods for manipulating the contents of a local
241
- # directory.
242
- module LocalDirectory
243
-
244
- # Lets local directories contain a special metadata file which
245
- # describes their contents.
246
- include IMW::Metadata::ContainsMetadata
247
-
248
- # Is this resource a directory?
249
- #
250
- # @return [true, false]
251
- def is_directory?
252
- true
253
- end
254
-
255
- # Delete this directory.
256
- #
257
- # @return [IMW::Resource] the deleted directory
258
- def rmdir
259
- FileUtils.rmdir path
260
- self
261
- end
262
- alias_method :rmdir!, :rmdir
263
-
264
- # Delete this directory recursively.
265
- #
266
- # @return [IMW::Resource] the deleted directory
267
- def rm_rf
268
- FileUtils.rm_rf path
269
- self
270
- end
271
- alias_method :rm_rf!, :rm_rf
272
-
273
- # Return a list of paths relative to this directory which match
274
- # the +selector+. Works just like Dir[].
275
- #
276
- # @param [String] selector
277
- # @return [Array] the matched paths
278
- def [] selector='*'
279
- Dir[File.join(path, selector)]
280
- end
281
-
282
- # Does this directory contain +obj+?
283
- #
284
- # @param [String, IMW::Resource] obj
285
- # @return [true, false]
286
- def contains? obj
287
- obj = IMW.open(obj)
288
- return false unless obj.is_local?
289
- return true if obj.path == path
290
- return false unless obj.path.starts_with?(path)
291
- return true if self[obj.path[path.length..-1]].size > 0
292
- false
293
- end
294
-
295
- # Return a list of all paths directly within this directory.
296
- #
297
- # @return [Array<String>]
298
- def contents
299
- self['*']
300
- end
301
-
302
- # Return all paths within this directory, recursively.
303
- #
304
- # @return [Array<String>]
305
- def all_contents
306
- self['**/*']
307
- end
308
-
309
- # Return all resources directly within this directory.
310
- #
311
- # @return [Array<IMW::Resource>]
312
- def resources
313
- contents.map { |path| IMW.open(path) }
314
- end
315
-
316
- # Return all resources within this directory, recursively.
317
- #
318
- # @return [Array<IMW::Resource>]
319
- def all_resources
320
- all_contents.map do |path|
321
- IMW.open(path) unless File.directory?(path)
322
- end.compact
323
- end
324
-
325
- # Package the contents of this directory to an archive at
326
- # +package_path+.
327
- #
328
- # @param [String, IMW::Resource] package_path
329
- # @return [IMW::Resource] the new package
330
- def package package_path
331
- temp_package = IMW.open(File.join(dirname, File.basename(package_path)))
332
- FileUtils.cd(dirname) { temp_package.create(basename) }
333
- temp_package.path == File.expand_path(package_path) ? temp_package : temp_package.mv(package_path)
334
- end
335
- alias_method :package!, :package
336
-
337
- # Change the working directory to this local directory.
338
- #
339
- # If passed a black, execute the block in this directory and
340
- # then change back to the initial directory.
341
- #
342
- # This method works the same as FileUtils.cd.
343
- def cd &block
344
- FileUtils.cd(path, &block)
345
- end
346
-
347
- # Create this directory.
348
- #
349
- # No error if the directory already exists.
350
- #
351
- # @return [IMW::Resource] this directory
352
- def create
353
- FileUtils.mkdir_p(path) unless exist?
354
- self
355
- end
356
-
357
- # Return the resource at the base path of this resource joined
358
- # to +path+.
359
- #
360
- # IMW.open('/path/to/dir').join('subdir')
361
- # #=> IMW::Resource at '/path/to/dir/subdir'
362
- #
363
- # @param [Array<String>] paths
364
- # @return [IMW::Resource]
365
- def join *paths
366
- IMW.open(File.join(stripped_uri.to_s, *paths))
367
- end
368
-
369
- # Open (and create if necessary) a subdirectory beneath this
370
- # directory.
371
- #
372
- # @params [Array<String>] paths
373
- # @return [IMW::Resource]
374
- def subdir! *paths
375
- IMW.dir!(File.join(stripped_uri.to_s, *paths))
376
- end
377
-
378
- # Recursively walk down this directory
379
- def walk(options={}, &block)
380
- require 'find'
381
- Find.find(path) do |path|
382
- if options[:only]
383
- next if options[:only] == :files && !File.file?(path)
384
- next if options[:only] == :directories && !File.directory?(path)
385
- next if options[:only] == :symlinks && !File.symlink?(path)
386
- end
387
- yield path
388
- end
389
- end
390
-
391
- # The directory summary includes the following information
392
- # - size
393
- # - num_files
394
- #
395
- # @return [Hash]
396
- def external_summary
397
- super().merge({
398
- :size => size,
399
- :num_files => contents.length,
400
- })
401
- end
402
-
403
- end
404
- end
405
- end
406
- end
407
-
408
-
409
-