imw 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: imw
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dhruv Bansal
|
|
@@ -10,7 +10,7 @@ autorequire:
|
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
12
|
|
|
13
|
-
date: 2010-
|
|
13
|
+
date: 2010-05-12 00:00:00 -05:00
|
|
14
14
|
default_executable: imw
|
|
15
15
|
dependencies: []
|
|
16
16
|
|
|
@@ -23,61 +23,67 @@ extensions: []
|
|
|
23
23
|
extra_rdoc_files:
|
|
24
24
|
- LICENSE
|
|
25
25
|
- README.rdoc
|
|
26
|
+
- TODO
|
|
26
27
|
files:
|
|
27
28
|
- .gitignore
|
|
28
29
|
- CHANGELOG
|
|
29
30
|
- LICENSE
|
|
30
31
|
- README.rdoc
|
|
31
32
|
- Rakefile
|
|
33
|
+
- TODO
|
|
32
34
|
- VERSION
|
|
33
35
|
- bin/imw
|
|
34
36
|
- etc/imwrc.rb
|
|
37
|
+
- examples/dataset.rb
|
|
35
38
|
- lib/imw.rb
|
|
36
39
|
- lib/imw/boot.rb
|
|
37
40
|
- lib/imw/dataset.rb
|
|
38
41
|
- lib/imw/dataset/paths.rb
|
|
39
|
-
- lib/imw/dataset/task.rb
|
|
40
42
|
- lib/imw/dataset/workflow.rb
|
|
41
|
-
- lib/imw/files.rb
|
|
42
|
-
- lib/imw/files/archive.rb
|
|
43
|
-
- lib/imw/files/basicfile.rb
|
|
44
|
-
- lib/imw/files/binary.rb
|
|
45
|
-
- lib/imw/files/compressed_file.rb
|
|
46
|
-
- lib/imw/files/compressed_files_and_archives.rb
|
|
47
|
-
- lib/imw/files/compressible.rb
|
|
48
|
-
- lib/imw/files/csv.rb
|
|
49
|
-
- lib/imw/files/directory.rb
|
|
50
|
-
- lib/imw/files/excel.rb
|
|
51
|
-
- lib/imw/files/json.rb
|
|
52
|
-
- lib/imw/files/sgml.rb
|
|
53
|
-
- lib/imw/files/text.rb
|
|
54
|
-
- lib/imw/files/yaml.rb
|
|
55
|
-
- lib/imw/packagers.rb
|
|
56
|
-
- lib/imw/packagers/archiver.rb
|
|
57
|
-
- lib/imw/packagers/s3_mover.rb
|
|
58
43
|
- lib/imw/parsers.rb
|
|
59
44
|
- lib/imw/parsers/html_parser.rb
|
|
60
45
|
- lib/imw/parsers/html_parser/matchers.rb
|
|
61
46
|
- lib/imw/parsers/line_parser.rb
|
|
62
47
|
- lib/imw/parsers/regexp_parser.rb
|
|
63
48
|
- lib/imw/repository.rb
|
|
49
|
+
- lib/imw/resource.rb
|
|
50
|
+
- lib/imw/resources.rb
|
|
51
|
+
- lib/imw/resources/archive.rb
|
|
52
|
+
- lib/imw/resources/archives_and_compressed.rb
|
|
53
|
+
- lib/imw/resources/archives_and_compressed/bz2.rb
|
|
54
|
+
- lib/imw/resources/archives_and_compressed/gz.rb
|
|
55
|
+
- lib/imw/resources/archives_and_compressed/rar.rb
|
|
56
|
+
- lib/imw/resources/archives_and_compressed/tar.rb
|
|
57
|
+
- lib/imw/resources/archives_and_compressed/tarbz2.rb
|
|
58
|
+
- lib/imw/resources/archives_and_compressed/targz.rb
|
|
59
|
+
- lib/imw/resources/archives_and_compressed/zip.rb
|
|
60
|
+
- lib/imw/resources/compressed_file.rb
|
|
61
|
+
- lib/imw/resources/compressible.rb
|
|
62
|
+
- lib/imw/resources/formats.rb
|
|
63
|
+
- lib/imw/resources/formats/delimited.rb
|
|
64
|
+
- lib/imw/resources/formats/excel.rb
|
|
65
|
+
- lib/imw/resources/formats/json.rb
|
|
66
|
+
- lib/imw/resources/formats/sgml.rb
|
|
67
|
+
- lib/imw/resources/formats/yaml.rb
|
|
68
|
+
- lib/imw/resources/local.rb
|
|
69
|
+
- lib/imw/resources/remote.rb
|
|
70
|
+
- lib/imw/resources/schemes.rb
|
|
71
|
+
- lib/imw/resources/schemes/hdfs.rb
|
|
72
|
+
- lib/imw/resources/schemes/http.rb
|
|
73
|
+
- lib/imw/resources/schemes/s3.rb
|
|
64
74
|
- lib/imw/runner.rb
|
|
75
|
+
- lib/imw/transforms.rb
|
|
76
|
+
- lib/imw/transforms/archiver.rb
|
|
77
|
+
- lib/imw/transforms/transferer.rb
|
|
65
78
|
- lib/imw/utils.rb
|
|
66
|
-
- lib/imw/utils/components.rb
|
|
67
|
-
- lib/imw/utils/config.rb
|
|
68
79
|
- lib/imw/utils/error.rb
|
|
80
|
+
- lib/imw/utils/extensions.rb
|
|
69
81
|
- lib/imw/utils/extensions/array.rb
|
|
70
|
-
- lib/imw/utils/extensions/class/attribute_accessors.rb
|
|
71
|
-
- lib/imw/utils/extensions/core.rb
|
|
72
|
-
- lib/imw/utils/extensions/dir.rb
|
|
73
|
-
- lib/imw/utils/extensions/file_core.rb
|
|
74
82
|
- lib/imw/utils/extensions/hash.rb
|
|
75
83
|
- lib/imw/utils/extensions/hpricot.rb
|
|
76
84
|
- lib/imw/utils/extensions/string.rb
|
|
77
85
|
- lib/imw/utils/extensions/struct.rb
|
|
78
86
|
- lib/imw/utils/extensions/symbol.rb
|
|
79
|
-
- lib/imw/utils/extensions/typed_struct.rb
|
|
80
|
-
- lib/imw/utils/extensions/uri.rb
|
|
81
87
|
- lib/imw/utils/log.rb
|
|
82
88
|
- lib/imw/utils/misc.rb
|
|
83
89
|
- lib/imw/utils/paths.rb
|
|
@@ -85,45 +91,47 @@ files:
|
|
|
85
91
|
- lib/imw/utils/uuid.rb
|
|
86
92
|
- lib/imw/utils/validate.rb
|
|
87
93
|
- lib/imw/utils/version.rb
|
|
88
|
-
- lib/imw/utils/view.rb
|
|
89
|
-
- lib/imw/utils/view/dump_csv.rb
|
|
90
|
-
- lib/imw/utils/view/dump_csv_older.rb
|
|
91
94
|
- spec/data/sample.csv
|
|
95
|
+
- spec/data/sample.json
|
|
92
96
|
- spec/data/sample.tsv
|
|
93
97
|
- spec/data/sample.txt
|
|
94
98
|
- spec/data/sample.xml
|
|
95
99
|
- spec/data/sample.yaml
|
|
96
|
-
- spec/imw/dataset/
|
|
97
|
-
- spec/imw/dataset/
|
|
98
|
-
- spec/imw/files/archive_spec.rb
|
|
99
|
-
- spec/imw/files/basicfile_spec.rb
|
|
100
|
-
- spec/imw/files/bz2_spec.rb
|
|
101
|
-
- spec/imw/files/compressed_file_spec.rb
|
|
102
|
-
- spec/imw/files/compressible_spec.rb
|
|
103
|
-
- spec/imw/files/file_spec.rb
|
|
104
|
-
- spec/imw/files/gz_spec.rb
|
|
105
|
-
- spec/imw/files/rar_spec.rb
|
|
106
|
-
- spec/imw/files/tar_spec.rb
|
|
107
|
-
- spec/imw/files/text_spec.rb
|
|
108
|
-
- spec/imw/files/zip_spec.rb
|
|
109
|
-
- spec/imw/files_spec.rb
|
|
110
|
-
- spec/imw/packagers/archiver_spec.rb
|
|
111
|
-
- spec/imw/packagers/s3_mover_spec.rb
|
|
100
|
+
- spec/imw/dataset/paths_spec.rb
|
|
101
|
+
- spec/imw/dataset/workflow_spec.rb
|
|
112
102
|
- spec/imw/parsers/line_parser_spec.rb
|
|
113
103
|
- spec/imw/parsers/regexp_parser_spec.rb
|
|
114
|
-
- spec/imw/
|
|
115
|
-
- spec/imw/
|
|
104
|
+
- spec/imw/resource_spec.rb
|
|
105
|
+
- spec/imw/resources/archive_spec.rb
|
|
106
|
+
- spec/imw/resources/archives_and_compressed/bz2_spec.rb
|
|
107
|
+
- spec/imw/resources/archives_and_compressed/gz_spec.rb
|
|
108
|
+
- spec/imw/resources/archives_and_compressed/rar_spec.rb
|
|
109
|
+
- spec/imw/resources/archives_and_compressed/tar_spec.rb
|
|
110
|
+
- spec/imw/resources/archives_and_compressed/tarbz2_spec.rb
|
|
111
|
+
- spec/imw/resources/archives_and_compressed/targz_spec.rb
|
|
112
|
+
- spec/imw/resources/archives_and_compressed/zip_spec.rb
|
|
113
|
+
- spec/imw/resources/compressed_file_spec.rb
|
|
114
|
+
- spec/imw/resources/compressible_spec.rb
|
|
115
|
+
- spec/imw/resources/formats/delimited_spec.rb
|
|
116
|
+
- spec/imw/resources/formats/json_spec.rb
|
|
117
|
+
- spec/imw/resources/formats/sgml_spec.rb
|
|
118
|
+
- spec/imw/resources/formats/yaml_spec.rb
|
|
119
|
+
- spec/imw/resources/local_spec.rb
|
|
120
|
+
- spec/imw/resources/remote_spec.rb
|
|
121
|
+
- spec/imw/resources/schemes/hdfs_spec.rb
|
|
122
|
+
- spec/imw/resources/schemes/http_spec.rb
|
|
123
|
+
- spec/imw/resources/schemes/s3_spec.rb
|
|
124
|
+
- spec/imw/transforms/archiver_spec.rb
|
|
125
|
+
- spec/imw/transforms/transferer_spec.rb
|
|
116
126
|
- spec/imw/utils/paths_spec.rb
|
|
117
|
-
- spec/imw/
|
|
118
|
-
- spec/imw/workflow/rip_spec.rb
|
|
127
|
+
- spec/imw/utils/shared_paths_spec.rb
|
|
119
128
|
- spec/rcov.opts
|
|
120
129
|
- spec/spec.opts
|
|
121
130
|
- spec/spec_helper.rb
|
|
122
|
-
- spec/support/archive_contents_matcher.rb
|
|
123
131
|
- spec/support/custom_matchers.rb
|
|
124
|
-
- spec/support/directory_contents_matcher.rb
|
|
125
132
|
- spec/support/extensions.rb
|
|
126
133
|
- spec/support/file_contents_matcher.rb
|
|
134
|
+
- spec/support/paths_matcher.rb
|
|
127
135
|
- spec/support/random.rb
|
|
128
136
|
- spec/support/without_regard_to_order_matcher.rb
|
|
129
137
|
has_rdoc: true
|
|
@@ -155,34 +163,39 @@ signing_key:
|
|
|
155
163
|
specification_version: 3
|
|
156
164
|
summary: The Infinite Monkeywrench (IMW) makes acquiring, extracting, transforming, loading, and packaging data easy.
|
|
157
165
|
test_files:
|
|
158
|
-
- spec/imw/
|
|
159
|
-
- spec/imw/
|
|
160
|
-
- spec/imw/
|
|
161
|
-
- spec/imw/
|
|
162
|
-
- spec/imw/
|
|
163
|
-
- spec/imw/
|
|
166
|
+
- spec/imw/resources/compressed_file_spec.rb
|
|
167
|
+
- spec/imw/resources/archives_and_compressed/targz_spec.rb
|
|
168
|
+
- spec/imw/resources/archives_and_compressed/tar_spec.rb
|
|
169
|
+
- spec/imw/resources/archives_and_compressed/zip_spec.rb
|
|
170
|
+
- spec/imw/resources/archives_and_compressed/tarbz2_spec.rb
|
|
171
|
+
- spec/imw/resources/archives_and_compressed/bz2_spec.rb
|
|
172
|
+
- spec/imw/resources/archives_and_compressed/rar_spec.rb
|
|
173
|
+
- spec/imw/resources/archives_and_compressed/gz_spec.rb
|
|
174
|
+
- spec/imw/resources/archive_spec.rb
|
|
175
|
+
- spec/imw/resources/local_spec.rb
|
|
176
|
+
- spec/imw/resources/remote_spec.rb
|
|
177
|
+
- spec/imw/resources/compressible_spec.rb
|
|
178
|
+
- spec/imw/resources/formats/json_spec.rb
|
|
179
|
+
- spec/imw/resources/formats/yaml_spec.rb
|
|
180
|
+
- spec/imw/resources/formats/delimited_spec.rb
|
|
181
|
+
- spec/imw/resources/formats/sgml_spec.rb
|
|
182
|
+
- spec/imw/resources/schemes/http_spec.rb
|
|
183
|
+
- spec/imw/resources/schemes/hdfs_spec.rb
|
|
184
|
+
- spec/imw/resources/schemes/s3_spec.rb
|
|
185
|
+
- spec/imw/dataset/paths_spec.rb
|
|
186
|
+
- spec/imw/dataset/workflow_spec.rb
|
|
164
187
|
- spec/imw/parsers/line_parser_spec.rb
|
|
165
188
|
- spec/imw/parsers/regexp_parser_spec.rb
|
|
166
|
-
- spec/imw/
|
|
167
|
-
- spec/imw/
|
|
168
|
-
- spec/imw/
|
|
169
|
-
- spec/imw/files/archive_spec.rb
|
|
170
|
-
- spec/imw/files/compressible_spec.rb
|
|
171
|
-
- spec/imw/files/tar_spec.rb
|
|
172
|
-
- spec/imw/files/zip_spec.rb
|
|
173
|
-
- spec/imw/files/text_spec.rb
|
|
174
|
-
- spec/imw/files/bz2_spec.rb
|
|
175
|
-
- spec/imw/files/rar_spec.rb
|
|
176
|
-
- spec/imw/files/gz_spec.rb
|
|
177
|
-
- spec/imw/files_spec.rb
|
|
189
|
+
- spec/imw/resource_spec.rb
|
|
190
|
+
- spec/imw/transforms/archiver_spec.rb
|
|
191
|
+
- spec/imw/transforms/transferer_spec.rb
|
|
178
192
|
- spec/imw/utils/paths_spec.rb
|
|
179
|
-
- spec/imw/utils/
|
|
180
|
-
- spec/imw/utils/extensions/file_core_spec.rb
|
|
193
|
+
- spec/imw/utils/shared_paths_spec.rb
|
|
181
194
|
- spec/spec_helper.rb
|
|
182
195
|
- spec/support/without_regard_to_order_matcher.rb
|
|
183
196
|
- spec/support/extensions.rb
|
|
184
|
-
- spec/support/archive_contents_matcher.rb
|
|
185
197
|
- spec/support/custom_matchers.rb
|
|
186
198
|
- spec/support/random.rb
|
|
199
|
+
- spec/support/paths_matcher.rb
|
|
187
200
|
- spec/support/file_contents_matcher.rb
|
|
188
|
-
-
|
|
201
|
+
- examples/dataset.rb
|
data/lib/imw/dataset/task.rb
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
require 'rake'
|
|
2
|
-
|
|
3
|
-
module IMW
|
|
4
|
-
|
|
5
|
-
Task = Class.new(Rake::Task)
|
|
6
|
-
FileTask = Class.new(Rake::FileTask)
|
|
7
|
-
FileCreationTask = Class.new(Rake::FileCreationTask)
|
|
8
|
-
|
|
9
|
-
class Dataset
|
|
10
|
-
include Rake::TaskManager
|
|
11
|
-
|
|
12
|
-
# Return a new (or existing) <tt>IMW::Task</tt> with the given
|
|
13
|
-
# +name+. Dependencies can be declared and a block passed in just
|
|
14
|
-
# as in Rake.
|
|
15
|
-
def task name, &block
|
|
16
|
-
self.define_task IMW::Task, name, &block
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
# Return a new (or existing) <tt>IMW::FileTask</tt> with the given
|
|
20
|
-
# +name+. Dependencies can be declared and a block passed in just
|
|
21
|
-
# as in Rake.
|
|
22
|
-
def file name, &block
|
|
23
|
-
self.define_task IMW::FileTask, name, &block
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# Return a new (or existing) <tt>IMW::FileCreationTask</tt> with the given
|
|
27
|
-
# +name+. Dependencies can be declared and a block passed in just
|
|
28
|
-
# as in Rake.
|
|
29
|
-
def file_create name, &block
|
|
30
|
-
self.define_task IMW::FileCreationTask, name, &block
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Override this method to define default tasks for a subclass of
|
|
34
|
-
# IMW::Dataset.
|
|
35
|
-
def set_tasks
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
data/lib/imw/files/archive.rb
DELETED
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/files/archive.rb -- describes archives of files
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Module for describing known archive types. An including archive
|
|
7
|
-
# type's class must define an instance variable +archive+ which is a
|
|
8
|
-
# hash with the following required keys:
|
|
9
|
-
#
|
|
10
|
-
# <tt>:program</tt>:: a symbol naming the program to be used. It
|
|
11
|
-
# should match one of the symbols in <tt>IMW::EXTERNAL_PROGRAMS</tt>
|
|
12
|
-
#
|
|
13
|
-
# <tt>:create_flags</tt>:: a string of flags to pass to the archiving
|
|
14
|
-
# program when creating the archive
|
|
15
|
-
#
|
|
16
|
-
# <tt>:append_flags</tt>:: a string of flags to pass to the archiving
|
|
17
|
-
# program when appending files to the archive
|
|
18
|
-
#
|
|
19
|
-
# <tt>:extract_flags</tt>:: a string of flags to pass to the archiving
|
|
20
|
-
# program when extracting the archive
|
|
21
|
-
#
|
|
22
|
-
# <tt>:list_flags</tt>:: a string of flags to pass to the archiving
|
|
23
|
-
# program when listing the archive's contents
|
|
24
|
-
#
|
|
25
|
-
# THe +archive+ hash may also contain the entry:
|
|
26
|
-
#
|
|
27
|
-
# <tt>:unarchiving_program</tt>:: a symbol naming the program to be
|
|
28
|
-
# used to list/extract the archive. Useful only if this program
|
|
29
|
-
# differs from the program used to create the archive in the first
|
|
30
|
-
# place (i.e. - zip & unzip).
|
|
31
|
-
#
|
|
32
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
33
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
34
|
-
# License:: GPL 3.0
|
|
35
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
36
|
-
#
|
|
37
|
-
# puts "#{File.basename(__FILE__)}: Put it all in one place so that when something goes wrong you'll know it immediately. You'll regret it, but at least you'll know." # at bottom
|
|
38
|
-
module IMW
|
|
39
|
-
module Files
|
|
40
|
-
|
|
41
|
-
module BasicFile
|
|
42
|
-
|
|
43
|
-
# Is this file an archive?
|
|
44
|
-
def archive?
|
|
45
|
-
false
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
module Archive
|
|
50
|
-
|
|
51
|
-
attr_reader :archive
|
|
52
|
-
|
|
53
|
-
# Is this file an archive?
|
|
54
|
-
def archive?
|
|
55
|
-
true
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
public
|
|
59
|
-
# Create this archive containing the given +paths+, which can be
|
|
60
|
-
# either a string or list of strings to be interpreted as paths
|
|
61
|
-
# to files/directories by the shell.
|
|
62
|
-
#
|
|
63
|
-
# Options:
|
|
64
|
-
# <tt>:force</tt> (false):: overwrite any existing archive at this path.
|
|
65
|
-
def create paths, opts = {}
|
|
66
|
-
opts = opts.reverse_merge({:force => false})
|
|
67
|
-
raise IMW::Error.new("An archive already exists at #{@path}.") if exist? and not opts[:force]
|
|
68
|
-
raise IMW::Error.new("Cannot create an archive of type #{@extname}") unless @archive[:create_flags]
|
|
69
|
-
paths = [paths] if paths.class == String
|
|
70
|
-
IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:create_flags], @path, *paths
|
|
71
|
-
self
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
# Append to this archive the given +paths+, which can be
|
|
75
|
-
# either a string or list of strings to be interpreted as paths
|
|
76
|
-
# to files/directories by the shell.
|
|
77
|
-
def append paths
|
|
78
|
-
raise IMW::Error.new("Cannot append to an archive of type #{@archive[:program]}.") unless @archive[:append_flags]
|
|
79
|
-
paths = [paths] if paths.class == String
|
|
80
|
-
IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:append_flags], @path, *paths
|
|
81
|
-
self
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
# Extract the files from this archive to the current directory.
|
|
85
|
-
def extract
|
|
86
|
-
raise IMW::Error.new("Cannot extract, #{@path} does not exist.") unless exist?
|
|
87
|
-
program = (@archive[:unarchiving_program] or @archive[:program])
|
|
88
|
-
IMW.system IMW::EXTERNAL_PROGRAMS[program], @archive[:extract_flags], @path
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
# Return a (sorted) list of contents in this archive.
|
|
92
|
-
def contents
|
|
93
|
-
raise IMW::Error.new("Cannot list contents, #{@path} does not exist.") unless exist?
|
|
94
|
-
program = (@archive[:unarchiving_program] or @archive[:program])
|
|
95
|
-
output = ''
|
|
96
|
-
command = [IMW::EXTERNAL_PROGRAMS[program], @archive[:list_flags], @path].join ' '
|
|
97
|
-
output += `#{command}`
|
|
98
|
-
archive_contents_string_to_array(output)
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
# Parse and format the output from the archive program's "list"
|
|
102
|
-
# command into an array of filenames.
|
|
103
|
-
#
|
|
104
|
-
# An including class can customize this method to match the
|
|
105
|
-
# output from the archiving program of that class.
|
|
106
|
-
def archive_contents_string_to_array string
|
|
107
|
-
string.split("\n")
|
|
108
|
-
end
|
|
109
|
-
end
|
|
110
|
-
end
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
|
data/lib/imw/files/basicfile.rb
DELETED
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/files/file.rb -- base class for files
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Defines a base class for classes for specific filetypes to subclass.
|
|
7
|
-
#
|
|
8
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
9
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
10
|
-
# License:: GPL 3.0
|
|
11
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
12
|
-
#
|
|
13
|
-
# puts "#{File.basename(__FILE__)}: At the very bottom of the office building, wedged between a small boulder and a rotting log you see a weathered manilla file folder. The writing on the tab is too faded to make out." # at bottom
|
|
14
|
-
module IMW
|
|
15
|
-
module Files
|
|
16
|
-
module BasicFile
|
|
17
|
-
|
|
18
|
-
attr_reader :uri, :host, :path, :dirname, :basename, :extname, :name
|
|
19
|
-
|
|
20
|
-
protected
|
|
21
|
-
|
|
22
|
-
def uri= uri
|
|
23
|
-
@uri = uri.is_a?(URI::Generic) ? uri : URI.parse(uri)
|
|
24
|
-
@host = self.uri.host
|
|
25
|
-
@path = local? ? ::File.expand_path(self.uri.path) : self.uri.path
|
|
26
|
-
@dirname = ::File.dirname path
|
|
27
|
-
@basename = ::File.basename path
|
|
28
|
-
@extname = find_extname
|
|
29
|
-
@name = @basename[0,@basename.length - @extname.length]
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
# Some files (like <tt>.tar.gz</tt>) have an "extra" extension.
|
|
33
|
-
# Classes in the <tt>IMW::Files</tt> module should define a
|
|
34
|
-
# class method <tt>extname</tt> which returns the their full
|
|
35
|
-
# extension.
|
|
36
|
-
def find_extname
|
|
37
|
-
self.class.respond_to?(:extname) ? self.class.extname(path) : ::File.extname(path)
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
public
|
|
41
|
-
|
|
42
|
-
# Is this file on the local machine (the scheme of the file's URI is nil or
|
|
43
|
-
def local?
|
|
44
|
-
host == 'file' || host.nil?
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# Is this file on a remote machine?
|
|
48
|
-
def remote?
|
|
49
|
-
(! local?)
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
# Steal a bunch of class methods from File which only take a
|
|
53
|
-
# path as a first argument.
|
|
54
|
-
[:executable?, :executable_real?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
|
|
55
|
-
define_method class_method do
|
|
56
|
-
File.send(class_method, path)
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
# Is there a real file at the path of this File? Will attempt
|
|
61
|
-
# to open files online too to check.
|
|
62
|
-
def exist?
|
|
63
|
-
if local?
|
|
64
|
-
::File.exist?(path)
|
|
65
|
-
else
|
|
66
|
-
begin
|
|
67
|
-
true if open(uri)
|
|
68
|
-
rescue SocketError
|
|
69
|
-
false
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
alias_method :exists?, :exist?
|
|
74
|
-
|
|
75
|
-
# Delete this file.
|
|
76
|
-
def rm
|
|
77
|
-
raise IMW::PathError.new("cannot delete remote file #{uri}") unless local?
|
|
78
|
-
raise IMW::PathError.new("cannot delete #{uri}, doesn't exist!") unless exist?
|
|
79
|
-
FileUtils.rm path
|
|
80
|
-
end
|
|
81
|
-
alias_method :rm!, :rm
|
|
82
|
-
|
|
83
|
-
# Copy this file to +new_path+.
|
|
84
|
-
def cp new_path
|
|
85
|
-
raise IMW::PathError.new("cannot copy from #{path}, doesn't exist!") unless exist?
|
|
86
|
-
if local?
|
|
87
|
-
FileUtils.cp path, new_path
|
|
88
|
-
else
|
|
89
|
-
# FIXME better way to do this?
|
|
90
|
-
File.open(new_path,'w') { |f| f.write(open(uri).read) }
|
|
91
|
-
end
|
|
92
|
-
self.class.new(new_path)
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
# Copy this file to +dir+.
|
|
96
|
-
def cp_to_dir dir
|
|
97
|
-
cp File.join(File.expand_path(dir),basename)
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
# Move this file to +new_path+.
|
|
101
|
-
def mv new_path
|
|
102
|
-
raise IMW::PathError.new("cannot move from #{path}, doesn't exist!") unless exist?
|
|
103
|
-
if local?
|
|
104
|
-
FileUtils.mv path, new_path
|
|
105
|
-
else
|
|
106
|
-
# FIXME better way to do this?
|
|
107
|
-
File.open(new_path,'w') { |f| f.write(open(uri).read) }
|
|
108
|
-
end
|
|
109
|
-
self.class.new(new_path)
|
|
110
|
-
end
|
|
111
|
-
alias_method :mv!, :mv
|
|
112
|
-
|
|
113
|
-
# Move this file to +dir+.
|
|
114
|
-
def mv_to_dir dir
|
|
115
|
-
mv File.join(File.expand_path(dir),basename)
|
|
116
|
-
end
|
|
117
|
-
alias_method :mv_to_dir!, :mv_to_dir
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
|
data/lib/imw/files/binary.rb
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/files/binary.rb -- binary files
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Class for handling binary data.
|
|
7
|
-
#
|
|
8
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
9
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
10
|
-
# License:: GPL 3.0
|
|
11
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
12
|
-
#
|
|
13
|
-
# puts "#{File.basename(__FILE__)}: Something clever" # at bottom
|
|
14
|
-
module IMW
|
|
15
|
-
module Files
|
|
16
|
-
|
|
17
|
-
class Binary
|
|
18
|
-
|
|
19
|
-
include IMW::Files::BasicFile
|
|
20
|
-
include IMW::Files::Compressible
|
|
21
|
-
|
|
22
|
-
def initialize uri
|
|
23
|
-
self.uri= uri
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
end
|
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/files/compressed_file.rb -- class describing compressed files
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Compression of files is handled via the
|
|
7
|
-
# <tt>IMW::Files::Compressible</tt> module which can be included by
|
|
8
|
-
# any object that has a <tt>@path</tt> attribute. The methods defined
|
|
9
|
-
# there compress files and return this
|
|
10
|
-
# <tt>IMW::Files::CompressedFile</tt> object which has methods for
|
|
11
|
-
# decompression.
|
|
12
|
-
#
|
|
13
|
-
# A subclass of this class must define a +compression+ instance
|
|
14
|
-
# variable which is a hash with the following keys:
|
|
15
|
-
#
|
|
16
|
-
# <tt>:program</tt>:: a symbol naming the program used for
|
|
17
|
-
# compression/decompression which must be one of the symbols in
|
|
18
|
-
# <tt>IMW::EXTERNAL_PROGRAMS</tt>
|
|
19
|
-
#
|
|
20
|
-
# <tt>:decompression_flags</tt>:: a string of flags to pass to the
|
|
21
|
-
# compression program when decompressing the file.
|
|
22
|
-
#
|
|
23
|
-
# A subclass must also define the method +decompressed_path+ which
|
|
24
|
-
# returns the path of the file post-decompression.
|
|
25
|
-
#
|
|
26
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
27
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
28
|
-
# License:: GPL 3.0
|
|
29
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
30
|
-
#
|
|
31
|
-
# puts "#{File.basename(__FILE__)}: Have you ever folded up the wrapper of a soda straw into a little accordian shape and let a drop of water soak into it?" # at bottom
|
|
32
|
-
module IMW
|
|
33
|
-
module Files
|
|
34
|
-
|
|
35
|
-
module BasicFile
|
|
36
|
-
def compressed?
|
|
37
|
-
false
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
# A module which provides methods for decompressing a compressed
|
|
42
|
-
# file. An including should define an instance variable
|
|
43
|
-
# <tt>@compression</tt> with two keys:
|
|
44
|
-
#
|
|
45
|
-
# <tt>:program</tt>:: a symbol from <tt>IMW::EXTERNAL_PROGRAMS</tt>
|
|
46
|
-
# <tt>:decompression_flags</tt>:: a string specifying flags to pass to the decompression program
|
|
47
|
-
module CompressedFile
|
|
48
|
-
|
|
49
|
-
attr_reader :compression
|
|
50
|
-
|
|
51
|
-
# Is this file compressed?
|
|
52
|
-
def compressed?
|
|
53
|
-
true
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
# Construct the command passed to the shell to decompress this
|
|
57
|
-
# file.
|
|
58
|
-
def decompression_command
|
|
59
|
-
[IMW::EXTERNAL_PROGRAMS[@compression[:program]],@compression[:decompression_flags],@path].join ' '
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
public
|
|
63
|
-
# Decompress this file in its present directory overwriting any
|
|
64
|
-
# existing files and without saving the original compressed
|
|
65
|
-
# file.
|
|
66
|
-
def decompress!
|
|
67
|
-
raise IMW::PathError.new("cannot decompress #{@path}, doesn't exist!") unless exist?
|
|
68
|
-
FileUtils.cd(@dirname) { IMW.system decompression_command }
|
|
69
|
-
IMW.open(decompressed_path)
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
# Decompress this file in its present directory, overwriting any
|
|
73
|
-
# existing files while keeping the original compressed file.
|
|
74
|
-
#
|
|
75
|
-
# The implementation is a little stupid, as the file is
|
|
76
|
-
# needlessly copied.
|
|
77
|
-
def decompress
|
|
78
|
-
raise IMW::PathError.new("cannot decompress #{@path}, doesn't exist!") unless exist?
|
|
79
|
-
begin
|
|
80
|
-
FileUtils.cp(@path,@path + 'copy')
|
|
81
|
-
decompress!
|
|
82
|
-
ensure
|
|
83
|
-
FileUtils.mv(@path + 'copy',@path)
|
|
84
|
-
end
|
|
85
|
-
IMW.open(decompressed_path)
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
end
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|