imw 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: imw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dhruv Bansal
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2010-
|
13
|
+
date: 2010-05-12 00:00:00 -05:00
|
14
14
|
default_executable: imw
|
15
15
|
dependencies: []
|
16
16
|
|
@@ -23,61 +23,67 @@ extensions: []
|
|
23
23
|
extra_rdoc_files:
|
24
24
|
- LICENSE
|
25
25
|
- README.rdoc
|
26
|
+
- TODO
|
26
27
|
files:
|
27
28
|
- .gitignore
|
28
29
|
- CHANGELOG
|
29
30
|
- LICENSE
|
30
31
|
- README.rdoc
|
31
32
|
- Rakefile
|
33
|
+
- TODO
|
32
34
|
- VERSION
|
33
35
|
- bin/imw
|
34
36
|
- etc/imwrc.rb
|
37
|
+
- examples/dataset.rb
|
35
38
|
- lib/imw.rb
|
36
39
|
- lib/imw/boot.rb
|
37
40
|
- lib/imw/dataset.rb
|
38
41
|
- lib/imw/dataset/paths.rb
|
39
|
-
- lib/imw/dataset/task.rb
|
40
42
|
- lib/imw/dataset/workflow.rb
|
41
|
-
- lib/imw/files.rb
|
42
|
-
- lib/imw/files/archive.rb
|
43
|
-
- lib/imw/files/basicfile.rb
|
44
|
-
- lib/imw/files/binary.rb
|
45
|
-
- lib/imw/files/compressed_file.rb
|
46
|
-
- lib/imw/files/compressed_files_and_archives.rb
|
47
|
-
- lib/imw/files/compressible.rb
|
48
|
-
- lib/imw/files/csv.rb
|
49
|
-
- lib/imw/files/directory.rb
|
50
|
-
- lib/imw/files/excel.rb
|
51
|
-
- lib/imw/files/json.rb
|
52
|
-
- lib/imw/files/sgml.rb
|
53
|
-
- lib/imw/files/text.rb
|
54
|
-
- lib/imw/files/yaml.rb
|
55
|
-
- lib/imw/packagers.rb
|
56
|
-
- lib/imw/packagers/archiver.rb
|
57
|
-
- lib/imw/packagers/s3_mover.rb
|
58
43
|
- lib/imw/parsers.rb
|
59
44
|
- lib/imw/parsers/html_parser.rb
|
60
45
|
- lib/imw/parsers/html_parser/matchers.rb
|
61
46
|
- lib/imw/parsers/line_parser.rb
|
62
47
|
- lib/imw/parsers/regexp_parser.rb
|
63
48
|
- lib/imw/repository.rb
|
49
|
+
- lib/imw/resource.rb
|
50
|
+
- lib/imw/resources.rb
|
51
|
+
- lib/imw/resources/archive.rb
|
52
|
+
- lib/imw/resources/archives_and_compressed.rb
|
53
|
+
- lib/imw/resources/archives_and_compressed/bz2.rb
|
54
|
+
- lib/imw/resources/archives_and_compressed/gz.rb
|
55
|
+
- lib/imw/resources/archives_and_compressed/rar.rb
|
56
|
+
- lib/imw/resources/archives_and_compressed/tar.rb
|
57
|
+
- lib/imw/resources/archives_and_compressed/tarbz2.rb
|
58
|
+
- lib/imw/resources/archives_and_compressed/targz.rb
|
59
|
+
- lib/imw/resources/archives_and_compressed/zip.rb
|
60
|
+
- lib/imw/resources/compressed_file.rb
|
61
|
+
- lib/imw/resources/compressible.rb
|
62
|
+
- lib/imw/resources/formats.rb
|
63
|
+
- lib/imw/resources/formats/delimited.rb
|
64
|
+
- lib/imw/resources/formats/excel.rb
|
65
|
+
- lib/imw/resources/formats/json.rb
|
66
|
+
- lib/imw/resources/formats/sgml.rb
|
67
|
+
- lib/imw/resources/formats/yaml.rb
|
68
|
+
- lib/imw/resources/local.rb
|
69
|
+
- lib/imw/resources/remote.rb
|
70
|
+
- lib/imw/resources/schemes.rb
|
71
|
+
- lib/imw/resources/schemes/hdfs.rb
|
72
|
+
- lib/imw/resources/schemes/http.rb
|
73
|
+
- lib/imw/resources/schemes/s3.rb
|
64
74
|
- lib/imw/runner.rb
|
75
|
+
- lib/imw/transforms.rb
|
76
|
+
- lib/imw/transforms/archiver.rb
|
77
|
+
- lib/imw/transforms/transferer.rb
|
65
78
|
- lib/imw/utils.rb
|
66
|
-
- lib/imw/utils/components.rb
|
67
|
-
- lib/imw/utils/config.rb
|
68
79
|
- lib/imw/utils/error.rb
|
80
|
+
- lib/imw/utils/extensions.rb
|
69
81
|
- lib/imw/utils/extensions/array.rb
|
70
|
-
- lib/imw/utils/extensions/class/attribute_accessors.rb
|
71
|
-
- lib/imw/utils/extensions/core.rb
|
72
|
-
- lib/imw/utils/extensions/dir.rb
|
73
|
-
- lib/imw/utils/extensions/file_core.rb
|
74
82
|
- lib/imw/utils/extensions/hash.rb
|
75
83
|
- lib/imw/utils/extensions/hpricot.rb
|
76
84
|
- lib/imw/utils/extensions/string.rb
|
77
85
|
- lib/imw/utils/extensions/struct.rb
|
78
86
|
- lib/imw/utils/extensions/symbol.rb
|
79
|
-
- lib/imw/utils/extensions/typed_struct.rb
|
80
|
-
- lib/imw/utils/extensions/uri.rb
|
81
87
|
- lib/imw/utils/log.rb
|
82
88
|
- lib/imw/utils/misc.rb
|
83
89
|
- lib/imw/utils/paths.rb
|
@@ -85,45 +91,47 @@ files:
|
|
85
91
|
- lib/imw/utils/uuid.rb
|
86
92
|
- lib/imw/utils/validate.rb
|
87
93
|
- lib/imw/utils/version.rb
|
88
|
-
- lib/imw/utils/view.rb
|
89
|
-
- lib/imw/utils/view/dump_csv.rb
|
90
|
-
- lib/imw/utils/view/dump_csv_older.rb
|
91
94
|
- spec/data/sample.csv
|
95
|
+
- spec/data/sample.json
|
92
96
|
- spec/data/sample.tsv
|
93
97
|
- spec/data/sample.txt
|
94
98
|
- spec/data/sample.xml
|
95
99
|
- spec/data/sample.yaml
|
96
|
-
- spec/imw/dataset/
|
97
|
-
- spec/imw/dataset/
|
98
|
-
- spec/imw/files/archive_spec.rb
|
99
|
-
- spec/imw/files/basicfile_spec.rb
|
100
|
-
- spec/imw/files/bz2_spec.rb
|
101
|
-
- spec/imw/files/compressed_file_spec.rb
|
102
|
-
- spec/imw/files/compressible_spec.rb
|
103
|
-
- spec/imw/files/file_spec.rb
|
104
|
-
- spec/imw/files/gz_spec.rb
|
105
|
-
- spec/imw/files/rar_spec.rb
|
106
|
-
- spec/imw/files/tar_spec.rb
|
107
|
-
- spec/imw/files/text_spec.rb
|
108
|
-
- spec/imw/files/zip_spec.rb
|
109
|
-
- spec/imw/files_spec.rb
|
110
|
-
- spec/imw/packagers/archiver_spec.rb
|
111
|
-
- spec/imw/packagers/s3_mover_spec.rb
|
100
|
+
- spec/imw/dataset/paths_spec.rb
|
101
|
+
- spec/imw/dataset/workflow_spec.rb
|
112
102
|
- spec/imw/parsers/line_parser_spec.rb
|
113
103
|
- spec/imw/parsers/regexp_parser_spec.rb
|
114
|
-
- spec/imw/
|
115
|
-
- spec/imw/
|
104
|
+
- spec/imw/resource_spec.rb
|
105
|
+
- spec/imw/resources/archive_spec.rb
|
106
|
+
- spec/imw/resources/archives_and_compressed/bz2_spec.rb
|
107
|
+
- spec/imw/resources/archives_and_compressed/gz_spec.rb
|
108
|
+
- spec/imw/resources/archives_and_compressed/rar_spec.rb
|
109
|
+
- spec/imw/resources/archives_and_compressed/tar_spec.rb
|
110
|
+
- spec/imw/resources/archives_and_compressed/tarbz2_spec.rb
|
111
|
+
- spec/imw/resources/archives_and_compressed/targz_spec.rb
|
112
|
+
- spec/imw/resources/archives_and_compressed/zip_spec.rb
|
113
|
+
- spec/imw/resources/compressed_file_spec.rb
|
114
|
+
- spec/imw/resources/compressible_spec.rb
|
115
|
+
- spec/imw/resources/formats/delimited_spec.rb
|
116
|
+
- spec/imw/resources/formats/json_spec.rb
|
117
|
+
- spec/imw/resources/formats/sgml_spec.rb
|
118
|
+
- spec/imw/resources/formats/yaml_spec.rb
|
119
|
+
- spec/imw/resources/local_spec.rb
|
120
|
+
- spec/imw/resources/remote_spec.rb
|
121
|
+
- spec/imw/resources/schemes/hdfs_spec.rb
|
122
|
+
- spec/imw/resources/schemes/http_spec.rb
|
123
|
+
- spec/imw/resources/schemes/s3_spec.rb
|
124
|
+
- spec/imw/transforms/archiver_spec.rb
|
125
|
+
- spec/imw/transforms/transferer_spec.rb
|
116
126
|
- spec/imw/utils/paths_spec.rb
|
117
|
-
- spec/imw/
|
118
|
-
- spec/imw/workflow/rip_spec.rb
|
127
|
+
- spec/imw/utils/shared_paths_spec.rb
|
119
128
|
- spec/rcov.opts
|
120
129
|
- spec/spec.opts
|
121
130
|
- spec/spec_helper.rb
|
122
|
-
- spec/support/archive_contents_matcher.rb
|
123
131
|
- spec/support/custom_matchers.rb
|
124
|
-
- spec/support/directory_contents_matcher.rb
|
125
132
|
- spec/support/extensions.rb
|
126
133
|
- spec/support/file_contents_matcher.rb
|
134
|
+
- spec/support/paths_matcher.rb
|
127
135
|
- spec/support/random.rb
|
128
136
|
- spec/support/without_regard_to_order_matcher.rb
|
129
137
|
has_rdoc: true
|
@@ -155,34 +163,39 @@ signing_key:
|
|
155
163
|
specification_version: 3
|
156
164
|
summary: The Infinite Monkeywrench (IMW) makes acquiring, extracting, transforming, loading, and packaging data easy.
|
157
165
|
test_files:
|
158
|
-
- spec/imw/
|
159
|
-
- spec/imw/
|
160
|
-
- spec/imw/
|
161
|
-
- spec/imw/
|
162
|
-
- spec/imw/
|
163
|
-
- spec/imw/
|
166
|
+
- spec/imw/resources/compressed_file_spec.rb
|
167
|
+
- spec/imw/resources/archives_and_compressed/targz_spec.rb
|
168
|
+
- spec/imw/resources/archives_and_compressed/tar_spec.rb
|
169
|
+
- spec/imw/resources/archives_and_compressed/zip_spec.rb
|
170
|
+
- spec/imw/resources/archives_and_compressed/tarbz2_spec.rb
|
171
|
+
- spec/imw/resources/archives_and_compressed/bz2_spec.rb
|
172
|
+
- spec/imw/resources/archives_and_compressed/rar_spec.rb
|
173
|
+
- spec/imw/resources/archives_and_compressed/gz_spec.rb
|
174
|
+
- spec/imw/resources/archive_spec.rb
|
175
|
+
- spec/imw/resources/local_spec.rb
|
176
|
+
- spec/imw/resources/remote_spec.rb
|
177
|
+
- spec/imw/resources/compressible_spec.rb
|
178
|
+
- spec/imw/resources/formats/json_spec.rb
|
179
|
+
- spec/imw/resources/formats/yaml_spec.rb
|
180
|
+
- spec/imw/resources/formats/delimited_spec.rb
|
181
|
+
- spec/imw/resources/formats/sgml_spec.rb
|
182
|
+
- spec/imw/resources/schemes/http_spec.rb
|
183
|
+
- spec/imw/resources/schemes/hdfs_spec.rb
|
184
|
+
- spec/imw/resources/schemes/s3_spec.rb
|
185
|
+
- spec/imw/dataset/paths_spec.rb
|
186
|
+
- spec/imw/dataset/workflow_spec.rb
|
164
187
|
- spec/imw/parsers/line_parser_spec.rb
|
165
188
|
- spec/imw/parsers/regexp_parser_spec.rb
|
166
|
-
- spec/imw/
|
167
|
-
- spec/imw/
|
168
|
-
- spec/imw/
|
169
|
-
- spec/imw/files/archive_spec.rb
|
170
|
-
- spec/imw/files/compressible_spec.rb
|
171
|
-
- spec/imw/files/tar_spec.rb
|
172
|
-
- spec/imw/files/zip_spec.rb
|
173
|
-
- spec/imw/files/text_spec.rb
|
174
|
-
- spec/imw/files/bz2_spec.rb
|
175
|
-
- spec/imw/files/rar_spec.rb
|
176
|
-
- spec/imw/files/gz_spec.rb
|
177
|
-
- spec/imw/files_spec.rb
|
189
|
+
- spec/imw/resource_spec.rb
|
190
|
+
- spec/imw/transforms/archiver_spec.rb
|
191
|
+
- spec/imw/transforms/transferer_spec.rb
|
178
192
|
- spec/imw/utils/paths_spec.rb
|
179
|
-
- spec/imw/utils/
|
180
|
-
- spec/imw/utils/extensions/file_core_spec.rb
|
193
|
+
- spec/imw/utils/shared_paths_spec.rb
|
181
194
|
- spec/spec_helper.rb
|
182
195
|
- spec/support/without_regard_to_order_matcher.rb
|
183
196
|
- spec/support/extensions.rb
|
184
|
-
- spec/support/archive_contents_matcher.rb
|
185
197
|
- spec/support/custom_matchers.rb
|
186
198
|
- spec/support/random.rb
|
199
|
+
- spec/support/paths_matcher.rb
|
187
200
|
- spec/support/file_contents_matcher.rb
|
188
|
-
-
|
201
|
+
- examples/dataset.rb
|
data/lib/imw/dataset/task.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
require 'rake'
|
2
|
-
|
3
|
-
module IMW
|
4
|
-
|
5
|
-
Task = Class.new(Rake::Task)
|
6
|
-
FileTask = Class.new(Rake::FileTask)
|
7
|
-
FileCreationTask = Class.new(Rake::FileCreationTask)
|
8
|
-
|
9
|
-
class Dataset
|
10
|
-
include Rake::TaskManager
|
11
|
-
|
12
|
-
# Return a new (or existing) <tt>IMW::Task</tt> with the given
|
13
|
-
# +name+. Dependencies can be declared and a block passed in just
|
14
|
-
# as in Rake.
|
15
|
-
def task name, &block
|
16
|
-
self.define_task IMW::Task, name, &block
|
17
|
-
end
|
18
|
-
|
19
|
-
# Return a new (or existing) <tt>IMW::FileTask</tt> with the given
|
20
|
-
# +name+. Dependencies can be declared and a block passed in just
|
21
|
-
# as in Rake.
|
22
|
-
def file name, &block
|
23
|
-
self.define_task IMW::FileTask, name, &block
|
24
|
-
end
|
25
|
-
|
26
|
-
# Return a new (or existing) <tt>IMW::FileCreationTask</tt> with the given
|
27
|
-
# +name+. Dependencies can be declared and a block passed in just
|
28
|
-
# as in Rake.
|
29
|
-
def file_create name, &block
|
30
|
-
self.define_task IMW::FileCreationTask, name, &block
|
31
|
-
end
|
32
|
-
|
33
|
-
# Override this method to define default tasks for a subclass of
|
34
|
-
# IMW::Dataset.
|
35
|
-
def set_tasks
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
|
41
|
-
|
data/lib/imw/files/archive.rb
DELETED
@@ -1,113 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. lib/imw/files/archive.rb -- describes archives of files
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# Module for describing known archive types. An including archive
|
7
|
-
# type's class must define an instance variable +archive+ which is a
|
8
|
-
# hash with the following required keys:
|
9
|
-
#
|
10
|
-
# <tt>:program</tt>:: a symbol naming the program to be used. It
|
11
|
-
# should match one of the symbols in <tt>IMW::EXTERNAL_PROGRAMS</tt>
|
12
|
-
#
|
13
|
-
# <tt>:create_flags</tt>:: a string of flags to pass to the archiving
|
14
|
-
# program when creating the archive
|
15
|
-
#
|
16
|
-
# <tt>:append_flags</tt>:: a string of flags to pass to the archiving
|
17
|
-
# program when appending files to the archive
|
18
|
-
#
|
19
|
-
# <tt>:extract_flags</tt>:: a string of flags to pass to the archiving
|
20
|
-
# program when extracting the archive
|
21
|
-
#
|
22
|
-
# <tt>:list_flags</tt>:: a string of flags to pass to the archiving
|
23
|
-
# program when listing the archive's contents
|
24
|
-
#
|
25
|
-
# THe +archive+ hash may also contain the entry:
|
26
|
-
#
|
27
|
-
# <tt>:unarchiving_program</tt>:: a symbol naming the program to be
|
28
|
-
# used to list/extract the archive. Useful only if this program
|
29
|
-
# differs from the program used to create the archive in the first
|
30
|
-
# place (i.e. - zip & unzip).
|
31
|
-
#
|
32
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
33
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
34
|
-
# License:: GPL 3.0
|
35
|
-
# Website:: http://infinitemonkeywrench.org/
|
36
|
-
#
|
37
|
-
# puts "#{File.basename(__FILE__)}: Put it all in one place so that when something goes wrong you'll know it immediately. You'll regret it, but at least you'll know." # at bottom
|
38
|
-
module IMW
|
39
|
-
module Files
|
40
|
-
|
41
|
-
module BasicFile
|
42
|
-
|
43
|
-
# Is this file an archive?
|
44
|
-
def archive?
|
45
|
-
false
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
module Archive
|
50
|
-
|
51
|
-
attr_reader :archive
|
52
|
-
|
53
|
-
# Is this file an archive?
|
54
|
-
def archive?
|
55
|
-
true
|
56
|
-
end
|
57
|
-
|
58
|
-
public
|
59
|
-
# Create this archive containing the given +paths+, which can be
|
60
|
-
# either a string or list of strings to be interpreted as paths
|
61
|
-
# to files/directories by the shell.
|
62
|
-
#
|
63
|
-
# Options:
|
64
|
-
# <tt>:force</tt> (false):: overwrite any existing archive at this path.
|
65
|
-
def create paths, opts = {}
|
66
|
-
opts = opts.reverse_merge({:force => false})
|
67
|
-
raise IMW::Error.new("An archive already exists at #{@path}.") if exist? and not opts[:force]
|
68
|
-
raise IMW::Error.new("Cannot create an archive of type #{@extname}") unless @archive[:create_flags]
|
69
|
-
paths = [paths] if paths.class == String
|
70
|
-
IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:create_flags], @path, *paths
|
71
|
-
self
|
72
|
-
end
|
73
|
-
|
74
|
-
# Append to this archive the given +paths+, which can be
|
75
|
-
# either a string or list of strings to be interpreted as paths
|
76
|
-
# to files/directories by the shell.
|
77
|
-
def append paths
|
78
|
-
raise IMW::Error.new("Cannot append to an archive of type #{@archive[:program]}.") unless @archive[:append_flags]
|
79
|
-
paths = [paths] if paths.class == String
|
80
|
-
IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:append_flags], @path, *paths
|
81
|
-
self
|
82
|
-
end
|
83
|
-
|
84
|
-
# Extract the files from this archive to the current directory.
|
85
|
-
def extract
|
86
|
-
raise IMW::Error.new("Cannot extract, #{@path} does not exist.") unless exist?
|
87
|
-
program = (@archive[:unarchiving_program] or @archive[:program])
|
88
|
-
IMW.system IMW::EXTERNAL_PROGRAMS[program], @archive[:extract_flags], @path
|
89
|
-
end
|
90
|
-
|
91
|
-
# Return a (sorted) list of contents in this archive.
|
92
|
-
def contents
|
93
|
-
raise IMW::Error.new("Cannot list contents, #{@path} does not exist.") unless exist?
|
94
|
-
program = (@archive[:unarchiving_program] or @archive[:program])
|
95
|
-
output = ''
|
96
|
-
command = [IMW::EXTERNAL_PROGRAMS[program], @archive[:list_flags], @path].join ' '
|
97
|
-
output += `#{command}`
|
98
|
-
archive_contents_string_to_array(output)
|
99
|
-
end
|
100
|
-
|
101
|
-
# Parse and format the output from the archive program's "list"
|
102
|
-
# command into an array of filenames.
|
103
|
-
#
|
104
|
-
# An including class can customize this method to match the
|
105
|
-
# output from the archiving program of that class.
|
106
|
-
def archive_contents_string_to_array string
|
107
|
-
string.split("\n")
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
|
data/lib/imw/files/basicfile.rb
DELETED
@@ -1,122 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. lib/imw/files/file.rb -- base class for files
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# Defines a base class for classes for specific filetypes to subclass.
|
7
|
-
#
|
8
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
9
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
10
|
-
# License:: GPL 3.0
|
11
|
-
# Website:: http://infinitemonkeywrench.org/
|
12
|
-
#
|
13
|
-
# puts "#{File.basename(__FILE__)}: At the very bottom of the office building, wedged between a small boulder and a rotting log you see a weathered manilla file folder. The writing on the tab is too faded to make out." # at bottom
|
14
|
-
module IMW
|
15
|
-
module Files
|
16
|
-
module BasicFile
|
17
|
-
|
18
|
-
attr_reader :uri, :host, :path, :dirname, :basename, :extname, :name
|
19
|
-
|
20
|
-
protected
|
21
|
-
|
22
|
-
def uri= uri
|
23
|
-
@uri = uri.is_a?(URI::Generic) ? uri : URI.parse(uri)
|
24
|
-
@host = self.uri.host
|
25
|
-
@path = local? ? ::File.expand_path(self.uri.path) : self.uri.path
|
26
|
-
@dirname = ::File.dirname path
|
27
|
-
@basename = ::File.basename path
|
28
|
-
@extname = find_extname
|
29
|
-
@name = @basename[0,@basename.length - @extname.length]
|
30
|
-
end
|
31
|
-
|
32
|
-
# Some files (like <tt>.tar.gz</tt>) have an "extra" extension.
|
33
|
-
# Classes in the <tt>IMW::Files</tt> module should define a
|
34
|
-
# class method <tt>extname</tt> which returns the their full
|
35
|
-
# extension.
|
36
|
-
def find_extname
|
37
|
-
self.class.respond_to?(:extname) ? self.class.extname(path) : ::File.extname(path)
|
38
|
-
end
|
39
|
-
|
40
|
-
public
|
41
|
-
|
42
|
-
# Is this file on the local machine (the scheme of the file's URI is nil or
|
43
|
-
def local?
|
44
|
-
host == 'file' || host.nil?
|
45
|
-
end
|
46
|
-
|
47
|
-
# Is this file on a remote machine?
|
48
|
-
def remote?
|
49
|
-
(! local?)
|
50
|
-
end
|
51
|
-
|
52
|
-
# Steal a bunch of class methods from File which only take a
|
53
|
-
# path as a first argument.
|
54
|
-
[:executable?, :executable_real?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
|
55
|
-
define_method class_method do
|
56
|
-
File.send(class_method, path)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
# Is there a real file at the path of this File? Will attempt
|
61
|
-
# to open files online too to check.
|
62
|
-
def exist?
|
63
|
-
if local?
|
64
|
-
::File.exist?(path)
|
65
|
-
else
|
66
|
-
begin
|
67
|
-
true if open(uri)
|
68
|
-
rescue SocketError
|
69
|
-
false
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
alias_method :exists?, :exist?
|
74
|
-
|
75
|
-
# Delete this file.
|
76
|
-
def rm
|
77
|
-
raise IMW::PathError.new("cannot delete remote file #{uri}") unless local?
|
78
|
-
raise IMW::PathError.new("cannot delete #{uri}, doesn't exist!") unless exist?
|
79
|
-
FileUtils.rm path
|
80
|
-
end
|
81
|
-
alias_method :rm!, :rm
|
82
|
-
|
83
|
-
# Copy this file to +new_path+.
|
84
|
-
def cp new_path
|
85
|
-
raise IMW::PathError.new("cannot copy from #{path}, doesn't exist!") unless exist?
|
86
|
-
if local?
|
87
|
-
FileUtils.cp path, new_path
|
88
|
-
else
|
89
|
-
# FIXME better way to do this?
|
90
|
-
File.open(new_path,'w') { |f| f.write(open(uri).read) }
|
91
|
-
end
|
92
|
-
self.class.new(new_path)
|
93
|
-
end
|
94
|
-
|
95
|
-
# Copy this file to +dir+.
|
96
|
-
def cp_to_dir dir
|
97
|
-
cp File.join(File.expand_path(dir),basename)
|
98
|
-
end
|
99
|
-
|
100
|
-
# Move this file to +new_path+.
|
101
|
-
def mv new_path
|
102
|
-
raise IMW::PathError.new("cannot move from #{path}, doesn't exist!") unless exist?
|
103
|
-
if local?
|
104
|
-
FileUtils.mv path, new_path
|
105
|
-
else
|
106
|
-
# FIXME better way to do this?
|
107
|
-
File.open(new_path,'w') { |f| f.write(open(uri).read) }
|
108
|
-
end
|
109
|
-
self.class.new(new_path)
|
110
|
-
end
|
111
|
-
alias_method :mv!, :mv
|
112
|
-
|
113
|
-
# Move this file to +dir+.
|
114
|
-
def mv_to_dir dir
|
115
|
-
mv File.join(File.expand_path(dir),basename)
|
116
|
-
end
|
117
|
-
alias_method :mv_to_dir!, :mv_to_dir
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
|
data/lib/imw/files/binary.rb
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. lib/imw/files/binary.rb -- binary files
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# Class for handling binary data.
|
7
|
-
#
|
8
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
9
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
10
|
-
# License:: GPL 3.0
|
11
|
-
# Website:: http://infinitemonkeywrench.org/
|
12
|
-
#
|
13
|
-
# puts "#{File.basename(__FILE__)}: Something clever" # at bottom
|
14
|
-
module IMW
|
15
|
-
module Files
|
16
|
-
|
17
|
-
class Binary
|
18
|
-
|
19
|
-
include IMW::Files::BasicFile
|
20
|
-
include IMW::Files::Compressible
|
21
|
-
|
22
|
-
def initialize uri
|
23
|
-
self.uri= uri
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
@@ -1,93 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. lib/imw/files/compressed_file.rb -- class describing compressed files
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# Compression of files is handled via the
|
7
|
-
# <tt>IMW::Files::Compressible</tt> module which can be included by
|
8
|
-
# any object that has a <tt>@path</tt> attribute. The methods defined
|
9
|
-
# there compress files and return this
|
10
|
-
# <tt>IMW::Files::CompressedFile</tt> object which has methods for
|
11
|
-
# decompression.
|
12
|
-
#
|
13
|
-
# A subclass of this class must define a +compression+ instance
|
14
|
-
# variable which is a hash with the following keys:
|
15
|
-
#
|
16
|
-
# <tt>:program</tt>:: a symbol naming the program used for
|
17
|
-
# compression/decompression which must be one of the symbols in
|
18
|
-
# <tt>IMW::EXTERNAL_PROGRAMS</tt>
|
19
|
-
#
|
20
|
-
# <tt>:decompression_flags</tt>:: a string of flags to pass to the
|
21
|
-
# compression program when decompressing the file.
|
22
|
-
#
|
23
|
-
# A subclass must also define the method +decompressed_path+ which
|
24
|
-
# returns the path of the file post-decompression.
|
25
|
-
#
|
26
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
27
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
28
|
-
# License:: GPL 3.0
|
29
|
-
# Website:: http://infinitemonkeywrench.org/
|
30
|
-
#
|
31
|
-
# puts "#{File.basename(__FILE__)}: Have you ever folded up the wrapper of a soda straw into a little accordian shape and let a drop of water soak into it?" # at bottom
|
32
|
-
module IMW
|
33
|
-
module Files
|
34
|
-
|
35
|
-
module BasicFile
|
36
|
-
def compressed?
|
37
|
-
false
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
# A module which provides methods for decompressing a compressed
|
42
|
-
# file. An including should define an instance variable
|
43
|
-
# <tt>@compression</tt> with two keys:
|
44
|
-
#
|
45
|
-
# <tt>:program</tt>:: a symbol from <tt>IMW::EXTERNAL_PROGRAMS</tt>
|
46
|
-
# <tt>:decompression_flags</tt>:: a string specifying flags to pass to the decompression program
|
47
|
-
module CompressedFile
|
48
|
-
|
49
|
-
attr_reader :compression
|
50
|
-
|
51
|
-
# Is this file compressed?
|
52
|
-
def compressed?
|
53
|
-
true
|
54
|
-
end
|
55
|
-
|
56
|
-
# Construct the command passed to the shell to decompress this
|
57
|
-
# file.
|
58
|
-
def decompression_command
|
59
|
-
[IMW::EXTERNAL_PROGRAMS[@compression[:program]],@compression[:decompression_flags],@path].join ' '
|
60
|
-
end
|
61
|
-
|
62
|
-
public
|
63
|
-
# Decompress this file in its present directory overwriting any
|
64
|
-
# existing files and without saving the original compressed
|
65
|
-
# file.
|
66
|
-
def decompress!
|
67
|
-
raise IMW::PathError.new("cannot decompress #{@path}, doesn't exist!") unless exist?
|
68
|
-
FileUtils.cd(@dirname) { IMW.system decompression_command }
|
69
|
-
IMW.open(decompressed_path)
|
70
|
-
end
|
71
|
-
|
72
|
-
# Decompress this file in its present directory, overwriting any
|
73
|
-
# existing files while keeping the original compressed file.
|
74
|
-
#
|
75
|
-
# The implementation is a little stupid, as the file is
|
76
|
-
# needlessly copied.
|
77
|
-
def decompress
|
78
|
-
raise IMW::PathError.new("cannot decompress #{@path}, doesn't exist!") unless exist?
|
79
|
-
begin
|
80
|
-
FileUtils.cp(@path,@path + 'copy')
|
81
|
-
decompress!
|
82
|
-
ensure
|
83
|
-
FileUtils.mv(@path + 'copy',@path)
|
84
|
-
end
|
85
|
-
IMW.open(decompressed_path)
|
86
|
-
end
|
87
|
-
|
88
|
-
end
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
|
93
|
-
|