imw 0.2.18 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
data/lib/imw/schemes/http.rb
DELETED
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
module Schemes
|
|
3
|
-
|
|
4
|
-
# Defines methods for accessing a resource over HTTP. Uses
|
|
5
|
-
# RestClient to implement the basic HTTP verbs (GET, POST, PUT,
|
|
6
|
-
# DELETE, HEAD).
|
|
7
|
-
module HTTP
|
|
8
|
-
|
|
9
|
-
# Many websites have HTML content without an <tt>.html</tt>
|
|
10
|
-
# extension so automatically extend +obj+ with
|
|
11
|
-
# IMW::Resources::Formats::HTML in this case.
|
|
12
|
-
def self.extended obj
|
|
13
|
-
obj.extend(IMW::Formats::Html) if obj.extension.blank?
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
# Is this resource being accessed via HTTP?
|
|
17
|
-
#
|
|
18
|
-
# @return [true, false]
|
|
19
|
-
def via_http?
|
|
20
|
-
true
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Copy this resource to the +new_uri+.
|
|
24
|
-
#
|
|
25
|
-
# @param [String, IMW::Resource] new_uri
|
|
26
|
-
# @return [IMW::Resource] the new resource
|
|
27
|
-
def cp new_uri
|
|
28
|
-
IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# Return the basename of the URI or <tt>_index</tt> if it's
|
|
32
|
-
# blank, as in the case of <tt>http://www.google.com</tt>.
|
|
33
|
-
#
|
|
34
|
-
# @return [String]
|
|
35
|
-
def effective_basename
|
|
36
|
-
(basename.blank? || basename =~ %r{^/*$}) ? "_index" : basename
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# Send a GET request to this resource's URI.
|
|
40
|
-
#
|
|
41
|
-
# If the response doesn't have HTTP code 2xx, a RestClient
|
|
42
|
-
# error will be raised.
|
|
43
|
-
#
|
|
44
|
-
# If a block is given then the response will be passed to the
|
|
45
|
-
# block, even in case of a non-2xx code.
|
|
46
|
-
#
|
|
47
|
-
# See the documentation for
|
|
48
|
-
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
|
49
|
-
# for more information.
|
|
50
|
-
#
|
|
51
|
-
# @param [Hash] headers the headers to include in the request
|
|
52
|
-
# @yield [RestClient::Response] the response from the server
|
|
53
|
-
# @return [RestClient::Response] the response from the server
|
|
54
|
-
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
|
55
|
-
def get headers={}, &block
|
|
56
|
-
make_restclient_request do
|
|
57
|
-
RestClient.get(uri.to_s, headers, &block)
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Send a POST request to this resource's URI with data
|
|
62
|
-
# +payload+.
|
|
63
|
-
#
|
|
64
|
-
# If the response doesn't have HTTP code 2xx, a RestClient
|
|
65
|
-
# error will be raised.
|
|
66
|
-
#
|
|
67
|
-
# If a block is given then the response will be passed to the
|
|
68
|
-
# block, even in case of a non-2xx code.
|
|
69
|
-
#
|
|
70
|
-
# See the documentation for
|
|
71
|
-
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
|
72
|
-
# for more information.
|
|
73
|
-
#
|
|
74
|
-
# @param [Hash, String] payload the data to send
|
|
75
|
-
# @param [Hash] headers the headers to include in the request
|
|
76
|
-
# @yield [RestClient::Response] the response from the server
|
|
77
|
-
# @return [RestClient::Response] the response from the server
|
|
78
|
-
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
|
79
|
-
def post payload, headers={}, &block
|
|
80
|
-
make_restclient_request do
|
|
81
|
-
RestClient.post(uri.to_s, payload, headers, &block)
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
# Send a PUT request to this resource's URI with data
|
|
86
|
-
# +payload+.
|
|
87
|
-
#
|
|
88
|
-
# If the response doesn't have HTTP code 2xx, a RestClient
|
|
89
|
-
# error will be raised.
|
|
90
|
-
#
|
|
91
|
-
# If a block is given then the response will be passed to the
|
|
92
|
-
# block, even in case of a non-2xx code.
|
|
93
|
-
#
|
|
94
|
-
# See the documentation for
|
|
95
|
-
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
|
96
|
-
# for more information.
|
|
97
|
-
#
|
|
98
|
-
# @param [Hash, String] payload the data to send
|
|
99
|
-
# @param [Hash] headers the headers to include in the request
|
|
100
|
-
# @yield [RestClient::Response] the response from the server
|
|
101
|
-
# @return [RestClient::Response] the response from the server
|
|
102
|
-
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
|
103
|
-
def put payload, headers={}, &block
|
|
104
|
-
make_restclient_request do
|
|
105
|
-
RestClient.put(uri.to_s, payload, headers, &block)
|
|
106
|
-
end
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
# Send a DELETE request to this resource's URI.
|
|
110
|
-
#
|
|
111
|
-
# If the response doesn't have HTTP code 2xx, a RestClient
|
|
112
|
-
# error will be raised.
|
|
113
|
-
#
|
|
114
|
-
# If a block is given then the response will be passed to the
|
|
115
|
-
# block, even in case of a non-2xx code.
|
|
116
|
-
#
|
|
117
|
-
# See the documentation for
|
|
118
|
-
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
|
119
|
-
# for more information.
|
|
120
|
-
#
|
|
121
|
-
# @param [Hash] headers the headers to include in the request
|
|
122
|
-
# @yield [RestClient::Response] the response from the server
|
|
123
|
-
# @return [RestClient::Response] the response from the server
|
|
124
|
-
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
|
125
|
-
def delete headers={}, &block
|
|
126
|
-
make_restclient_request do
|
|
127
|
-
RestClient.delete(uri.to_s, headers, &block)
|
|
128
|
-
end
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
# Send a HEAD request to this resource's URI.
|
|
132
|
-
#
|
|
133
|
-
# If the response doesn't have HTTP code 2xx, a RestClient
|
|
134
|
-
# error will be raised.
|
|
135
|
-
#
|
|
136
|
-
# If a block is given then the response will be passed to the
|
|
137
|
-
# block, even in case of a non-2xx code.
|
|
138
|
-
#
|
|
139
|
-
# See the documentation for
|
|
140
|
-
# RestClient[http://rdoc.info/projects/archiloque/rest-client]
|
|
141
|
-
# for more information.
|
|
142
|
-
#
|
|
143
|
-
# @param [Hash] headers the headers to include in the request
|
|
144
|
-
# @yield [RestClient::Response] the response from the server
|
|
145
|
-
# @return [RestClient::Response] the response from the server
|
|
146
|
-
# @raise [RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed] error from RestClient on non-2xx response codes
|
|
147
|
-
def head headers={}, &block
|
|
148
|
-
make_restclient_request do
|
|
149
|
-
RestClient.head(uri.to_s, headers, &block)
|
|
150
|
-
end
|
|
151
|
-
end
|
|
152
|
-
|
|
153
|
-
protected
|
|
154
|
-
def make_restclient_request &block # :nodoc
|
|
155
|
-
require 'restclient'
|
|
156
|
-
begin
|
|
157
|
-
yield
|
|
158
|
-
rescue RestClient::NotModified, RestClient::Unauthorized, RestClient::ResourceNotFound, RestClient::RequestFailed => e
|
|
159
|
-
raise IMW::NetworkError.new("#{e.class} -- #{e.message}")
|
|
160
|
-
end
|
|
161
|
-
end
|
|
162
|
-
end
|
|
163
|
-
end
|
|
164
|
-
end
|
|
165
|
-
|
data/lib/imw/schemes/local.rb
DELETED
|
@@ -1,409 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
module Schemes
|
|
3
|
-
module Local
|
|
4
|
-
|
|
5
|
-
# Defines methods appropriate for any file (or directory) on the
|
|
6
|
-
# local machine. Includes methods from the File class like
|
|
7
|
-
# File#exist?, File#size, &c.
|
|
8
|
-
#
|
|
9
|
-
# When extending with this module, it will automatically also
|
|
10
|
-
# extend with either IMW::Schemes::Local::LocalDirectory or
|
|
11
|
-
# IMW::Schemes::Local::LocalFile, as appropriate.
|
|
12
|
-
module Base
|
|
13
|
-
|
|
14
|
-
def self.extended obj
|
|
15
|
-
# also extend with file or directory as appropriate
|
|
16
|
-
if obj.directory?
|
|
17
|
-
obj.extend(LocalDirectory)
|
|
18
|
-
else
|
|
19
|
-
obj.extend(LocalFile)
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Steal a bunch of class methods from File which only take a
|
|
24
|
-
# path as a first argument.
|
|
25
|
-
[:executable?, :executable_real?, :exist?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
|
|
26
|
-
define_method class_method do
|
|
27
|
-
File.send(class_method, path)
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
alias_method :exists?, :exist?
|
|
31
|
-
|
|
32
|
-
# Return the path to this local object.
|
|
33
|
-
#
|
|
34
|
-
# @return [String]
|
|
35
|
-
def path
|
|
36
|
-
@path ||= File.expand_path(@encoded_uri ? Addressable::URI.decode(uri.to_s) : uri.to_s)
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# Is this file on the local machine?
|
|
40
|
-
#
|
|
41
|
-
# @return [true, false]
|
|
42
|
-
def is_local?
|
|
43
|
-
true
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
# Copy this resource to the +new_uri+.
|
|
47
|
-
#
|
|
48
|
-
# @param [String, IMW::Resource] new_uri
|
|
49
|
-
# @return [IMW::Resource] the new resource
|
|
50
|
-
def cp new_uri
|
|
51
|
-
IMW::Tools::Transferer.new(:cp, self, new_uri).transfer!
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# Move this resource to the +new_uri+.
|
|
55
|
-
#
|
|
56
|
-
# @param [String, IMW::Resource] new_uri
|
|
57
|
-
# @return [IMW::Resource] the new resource
|
|
58
|
-
def mv new_uri
|
|
59
|
-
IMW::Tools::Transferer.new(:mv, self, new_uri).transfer!
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# Return the directory of this resource.
|
|
63
|
-
#
|
|
64
|
-
# @return [IMW::Resource]
|
|
65
|
-
def dir
|
|
66
|
-
IMW.open(dirname)
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# Defines methods for appropriate for a local file.
|
|
72
|
-
module LocalFile
|
|
73
|
-
|
|
74
|
-
include IMW::Metadata::HasMetadata
|
|
75
|
-
|
|
76
|
-
# Is this resource a regular file?
|
|
77
|
-
#
|
|
78
|
-
# @return [true, false]
|
|
79
|
-
def is_file?
|
|
80
|
-
true
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
# Delete this resource.
|
|
84
|
-
def rm
|
|
85
|
-
should_exist!("Cannot delete")
|
|
86
|
-
FileUtils.rm path
|
|
87
|
-
self
|
|
88
|
-
end
|
|
89
|
-
alias_method :rm!, :rm
|
|
90
|
-
|
|
91
|
-
# Return the IO object at this path.
|
|
92
|
-
#
|
|
93
|
-
# @return [File]
|
|
94
|
-
def io
|
|
95
|
-
@io ||= open(path, mode)
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
# Close this resource's file handle if it exists.
|
|
99
|
-
def close
|
|
100
|
-
# explicitly check the @io instance variable b/c self.io
|
|
101
|
-
# will open up a new handle by default
|
|
102
|
-
io.close if @io
|
|
103
|
-
super()
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
# Read from this file.
|
|
107
|
-
#
|
|
108
|
-
# @param [Fixnum] length bytes to read
|
|
109
|
-
# @return [String]
|
|
110
|
-
def read length=nil
|
|
111
|
-
io.read(length)
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
# Read a line from this file.
|
|
115
|
-
#
|
|
116
|
-
# @return [String]
|
|
117
|
-
def readline
|
|
118
|
-
io.readline
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
# Write to this file
|
|
122
|
-
#
|
|
123
|
-
# @param [String, #to_s] text text to write
|
|
124
|
-
# @return [Fixnum] bytes written
|
|
125
|
-
def write text
|
|
126
|
-
io.write text
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
# Write the text with a trailing newline to this resource.
|
|
130
|
-
#
|
|
131
|
-
# @param [String, #to_s] text
|
|
132
|
-
def << text
|
|
133
|
-
io.write text.to_s + "\n"
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
# Return the lines in this file.
|
|
137
|
-
#
|
|
138
|
-
# If passed a block, yield each line of the file to the block.
|
|
139
|
-
#
|
|
140
|
-
# @yield [String] each line of the file
|
|
141
|
-
# @return [Array] the lines in the file
|
|
142
|
-
def load &block
|
|
143
|
-
if block_given?
|
|
144
|
-
io.each do |line|
|
|
145
|
-
yield line
|
|
146
|
-
end
|
|
147
|
-
else
|
|
148
|
-
read.split("\n")
|
|
149
|
-
end
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
# Map over the lines in this file.
|
|
153
|
-
#
|
|
154
|
-
# @yield [String] each line of the file
|
|
155
|
-
def map &block
|
|
156
|
-
io.map(&block)
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
# Emit +data+ into this file.
|
|
160
|
-
#
|
|
161
|
-
# @param [String, Array, #each] data object to emit
|
|
162
|
-
def emit data, options={}
|
|
163
|
-
data.each do |element| # works if data is an Array or a String
|
|
164
|
-
io << (element.to_s)
|
|
165
|
-
end
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
# Return a snippet of text from this resource.
|
|
169
|
-
#
|
|
170
|
-
# Will read the first 1024 bytes and strip non-ASCII
|
|
171
|
-
# characters from them. For more control, redefine this
|
|
172
|
-
# method in another module.
|
|
173
|
-
#
|
|
174
|
-
# @return [String]
|
|
175
|
-
def snippet
|
|
176
|
-
[].tap do |snip|
|
|
177
|
-
(io.read(1024) || '').bytes.each do |byte|
|
|
178
|
-
# CR LF SPACE ~
|
|
179
|
-
snip << byte.chr if byte == 13 || byte == 10 || byte >= 32 && byte <= 126
|
|
180
|
-
end
|
|
181
|
-
end.join
|
|
182
|
-
end
|
|
183
|
-
|
|
184
|
-
# Return the number of lines in this file.
|
|
185
|
-
#
|
|
186
|
-
# @return [Integer]
|
|
187
|
-
def num_lines
|
|
188
|
-
wc[0]
|
|
189
|
-
end
|
|
190
|
-
|
|
191
|
-
# Return the number of words in this file.
|
|
192
|
-
#
|
|
193
|
-
# @return [Integer]
|
|
194
|
-
def num_words
|
|
195
|
-
wc[1]
|
|
196
|
-
end
|
|
197
|
-
|
|
198
|
-
# Return the number of characters in this file.
|
|
199
|
-
#
|
|
200
|
-
# @return [Integer]
|
|
201
|
-
def num_chars
|
|
202
|
-
wc[2]
|
|
203
|
-
end
|
|
204
|
-
|
|
205
|
-
# Return a summary of properties of this local file.
|
|
206
|
-
#
|
|
207
|
-
# Returned properties include
|
|
208
|
-
# - basename
|
|
209
|
-
# - size
|
|
210
|
-
# - extension
|
|
211
|
-
# - num_lines
|
|
212
|
-
def external_summary
|
|
213
|
-
super().merge({
|
|
214
|
-
:size => size,
|
|
215
|
-
:num_lines => num_lines
|
|
216
|
-
})
|
|
217
|
-
end
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
protected
|
|
223
|
-
|
|
224
|
-
# Return a triple of line, word, and character counts for this
|
|
225
|
-
# resource.
|
|
226
|
-
#
|
|
227
|
-
# Relies on the Unix utility +wc+.
|
|
228
|
-
#
|
|
229
|
-
# @return [Array<Integer>]
|
|
230
|
-
def wc
|
|
231
|
-
@wc ||= begin
|
|
232
|
-
`wc #{path}`.chomp.strip.split.map(&:to_i)
|
|
233
|
-
rescue
|
|
234
|
-
[nil,nil,nil] # FIXME
|
|
235
|
-
end
|
|
236
|
-
end
|
|
237
|
-
|
|
238
|
-
end
|
|
239
|
-
|
|
240
|
-
# Defines methods for manipulating the contents of a local
|
|
241
|
-
# directory.
|
|
242
|
-
module LocalDirectory
|
|
243
|
-
|
|
244
|
-
# Lets local directories contain a special metadata file which
|
|
245
|
-
# describes their contents.
|
|
246
|
-
include IMW::Metadata::ContainsMetadata
|
|
247
|
-
|
|
248
|
-
# Is this resource a directory?
|
|
249
|
-
#
|
|
250
|
-
# @return [true, false]
|
|
251
|
-
def is_directory?
|
|
252
|
-
true
|
|
253
|
-
end
|
|
254
|
-
|
|
255
|
-
# Delete this directory.
|
|
256
|
-
#
|
|
257
|
-
# @return [IMW::Resource] the deleted directory
|
|
258
|
-
def rmdir
|
|
259
|
-
FileUtils.rmdir path
|
|
260
|
-
self
|
|
261
|
-
end
|
|
262
|
-
alias_method :rmdir!, :rmdir
|
|
263
|
-
|
|
264
|
-
# Delete this directory recursively.
|
|
265
|
-
#
|
|
266
|
-
# @return [IMW::Resource] the deleted directory
|
|
267
|
-
def rm_rf
|
|
268
|
-
FileUtils.rm_rf path
|
|
269
|
-
self
|
|
270
|
-
end
|
|
271
|
-
alias_method :rm_rf!, :rm_rf
|
|
272
|
-
|
|
273
|
-
# Return a list of paths relative to this directory which match
|
|
274
|
-
# the +selector+. Works just like Dir[].
|
|
275
|
-
#
|
|
276
|
-
# @param [String] selector
|
|
277
|
-
# @return [Array] the matched paths
|
|
278
|
-
def [] selector='*'
|
|
279
|
-
Dir[File.join(path, selector)]
|
|
280
|
-
end
|
|
281
|
-
|
|
282
|
-
# Does this directory contain +obj+?
|
|
283
|
-
#
|
|
284
|
-
# @param [String, IMW::Resource] obj
|
|
285
|
-
# @return [true, false]
|
|
286
|
-
def contains? obj
|
|
287
|
-
obj = IMW.open(obj)
|
|
288
|
-
return false unless obj.is_local?
|
|
289
|
-
return true if obj.path == path
|
|
290
|
-
return false unless obj.path.starts_with?(path)
|
|
291
|
-
return true if self[obj.path[path.length..-1]].size > 0
|
|
292
|
-
false
|
|
293
|
-
end
|
|
294
|
-
|
|
295
|
-
# Return a list of all paths directly within this directory.
|
|
296
|
-
#
|
|
297
|
-
# @return [Array<String>]
|
|
298
|
-
def contents
|
|
299
|
-
self['*']
|
|
300
|
-
end
|
|
301
|
-
|
|
302
|
-
# Return all paths within this directory, recursively.
|
|
303
|
-
#
|
|
304
|
-
# @return [Array<String>]
|
|
305
|
-
def all_contents
|
|
306
|
-
self['**/*']
|
|
307
|
-
end
|
|
308
|
-
|
|
309
|
-
# Return all resources directly within this directory.
|
|
310
|
-
#
|
|
311
|
-
# @return [Array<IMW::Resource>]
|
|
312
|
-
def resources
|
|
313
|
-
contents.map { |path| IMW.open(path) }
|
|
314
|
-
end
|
|
315
|
-
|
|
316
|
-
# Return all resources within this directory, recursively.
|
|
317
|
-
#
|
|
318
|
-
# @return [Array<IMW::Resource>]
|
|
319
|
-
def all_resources
|
|
320
|
-
all_contents.map do |path|
|
|
321
|
-
IMW.open(path) unless File.directory?(path)
|
|
322
|
-
end.compact
|
|
323
|
-
end
|
|
324
|
-
|
|
325
|
-
# Package the contents of this directory to an archive at
|
|
326
|
-
# +package_path+.
|
|
327
|
-
#
|
|
328
|
-
# @param [String, IMW::Resource] package_path
|
|
329
|
-
# @return [IMW::Resource] the new package
|
|
330
|
-
def package package_path
|
|
331
|
-
temp_package = IMW.open(File.join(dirname, File.basename(package_path)))
|
|
332
|
-
FileUtils.cd(dirname) { temp_package.create(basename) }
|
|
333
|
-
temp_package.path == File.expand_path(package_path) ? temp_package : temp_package.mv(package_path)
|
|
334
|
-
end
|
|
335
|
-
alias_method :package!, :package
|
|
336
|
-
|
|
337
|
-
# Change the working directory to this local directory.
|
|
338
|
-
#
|
|
339
|
-
# If passed a black, execute the block in this directory and
|
|
340
|
-
# then change back to the initial directory.
|
|
341
|
-
#
|
|
342
|
-
# This method works the same as FileUtils.cd.
|
|
343
|
-
def cd &block
|
|
344
|
-
FileUtils.cd(path, &block)
|
|
345
|
-
end
|
|
346
|
-
|
|
347
|
-
# Create this directory.
|
|
348
|
-
#
|
|
349
|
-
# No error if the directory already exists.
|
|
350
|
-
#
|
|
351
|
-
# @return [IMW::Resource] this directory
|
|
352
|
-
def create
|
|
353
|
-
FileUtils.mkdir_p(path) unless exist?
|
|
354
|
-
self
|
|
355
|
-
end
|
|
356
|
-
|
|
357
|
-
# Return the resource at the base path of this resource joined
|
|
358
|
-
# to +path+.
|
|
359
|
-
#
|
|
360
|
-
# IMW.open('/path/to/dir').join('subdir')
|
|
361
|
-
# #=> IMW::Resource at '/path/to/dir/subdir'
|
|
362
|
-
#
|
|
363
|
-
# @param [Array<String>] paths
|
|
364
|
-
# @return [IMW::Resource]
|
|
365
|
-
def join *paths
|
|
366
|
-
IMW.open(File.join(stripped_uri.to_s, *paths))
|
|
367
|
-
end
|
|
368
|
-
|
|
369
|
-
# Open (and create if necessary) a subdirectory beneath this
|
|
370
|
-
# directory.
|
|
371
|
-
#
|
|
372
|
-
# @params [Array<String>] paths
|
|
373
|
-
# @return [IMW::Resource]
|
|
374
|
-
def subdir! *paths
|
|
375
|
-
IMW.dir!(File.join(stripped_uri.to_s, *paths))
|
|
376
|
-
end
|
|
377
|
-
|
|
378
|
-
# Recursively walk down this directory
|
|
379
|
-
def walk(options={}, &block)
|
|
380
|
-
require 'find'
|
|
381
|
-
Find.find(path) do |path|
|
|
382
|
-
if options[:only]
|
|
383
|
-
next if options[:only] == :files && !File.file?(path)
|
|
384
|
-
next if options[:only] == :directories && !File.directory?(path)
|
|
385
|
-
next if options[:only] == :symlinks && !File.symlink?(path)
|
|
386
|
-
end
|
|
387
|
-
yield path
|
|
388
|
-
end
|
|
389
|
-
end
|
|
390
|
-
|
|
391
|
-
# The directory summary includes the following information
|
|
392
|
-
# - size
|
|
393
|
-
# - num_files
|
|
394
|
-
#
|
|
395
|
-
# @return [Hash]
|
|
396
|
-
def external_summary
|
|
397
|
-
super().merge({
|
|
398
|
-
:size => size,
|
|
399
|
-
:num_files => contents.length,
|
|
400
|
-
})
|
|
401
|
-
end
|
|
402
|
-
|
|
403
|
-
end
|
|
404
|
-
end
|
|
405
|
-
end
|
|
406
|
-
end
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|