imw 0.2.18 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
data/lib/imw/archives.rb
DELETED
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
|
|
3
|
-
# Contains modules which define the behavior of archive files.
|
|
4
|
-
module Archives
|
|
5
|
-
|
|
6
|
-
# Handlers for archives.
|
|
7
|
-
HANDLERS = [
|
|
8
|
-
["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.bz2$/i } ],
|
|
9
|
-
["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tbz2$/i } ],
|
|
10
|
-
["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.gz$/i } ],
|
|
11
|
-
["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tgz$/i } ],
|
|
12
|
-
["Archives::Tar", Proc.new { |r| r.is_local? && r.path =~ /\.tar$/i } ],
|
|
13
|
-
["Archives::Rar", Proc.new { |r| r.is_local? && r.path =~ /\.rar$/i } ],
|
|
14
|
-
["Archives::Zip", Proc.new { |r| r.is_local? && r.path =~ /\.zip$/i } ]
|
|
15
|
-
]
|
|
16
|
-
|
|
17
|
-
autoload :Rar, 'imw/archives/rar'
|
|
18
|
-
autoload :Tar, 'imw/archives/tar'
|
|
19
|
-
autoload :Tarbz2, 'imw/archives/tarbz2'
|
|
20
|
-
autoload :Targz, 'imw/archives/targz'
|
|
21
|
-
autoload :Zip, 'imw/archives/zip'
|
|
22
|
-
|
|
23
|
-
# Defines methods for creating, appending to, extracting, and
|
|
24
|
-
# listing an archive file. This module isn't used to directly
|
|
25
|
-
# extend an IMW::Resource -- instead, format specifc modules
|
|
26
|
-
# (e.g. - IMW::Resources::Archives::Tarbz2) include this module
|
|
27
|
-
# and define the specific settings (command-line flags, &c.)
|
|
28
|
-
# required to make things work.
|
|
29
|
-
module Base
|
|
30
|
-
|
|
31
|
-
attr_accessor :archive_settings
|
|
32
|
-
|
|
33
|
-
# Is this file an archive?
|
|
34
|
-
#
|
|
35
|
-
# @return [true, false]
|
|
36
|
-
def is_archive?
|
|
37
|
-
true
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# Create an archive of the given +input_paths+.
|
|
41
|
-
#
|
|
42
|
-
# The input paths must be strings and will be shell-escaped
|
|
43
|
-
# before further processing. This means you cannot use a shell
|
|
44
|
-
# glob!
|
|
45
|
-
#
|
|
46
|
-
# @param [String] input_paths the paths to add to this archive
|
|
47
|
-
# @return [IMW::Resource] the resutling archive
|
|
48
|
-
def create *input_paths
|
|
49
|
-
should_have_archive_setting!("Cannot create archive #{path}", :program, :create)
|
|
50
|
-
IMW.system archive_settings[:program], archive_settings[:create], path, *input_paths.flatten
|
|
51
|
-
self
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# Append to this archive the given +input_paths+.
|
|
55
|
-
#
|
|
56
|
-
# The input paths must be strings and will be shell-escaped
|
|
57
|
-
# before further processing. This means you cannot use a shell
|
|
58
|
-
# glob!
|
|
59
|
-
#
|
|
60
|
-
# @param [String] input_paths the paths to add to this archive
|
|
61
|
-
# @return [IMW::Resource] the resutling archive
|
|
62
|
-
def append *input_paths
|
|
63
|
-
should_have_archive_setting!("Cannot append to archive #{path}", :program, :append)
|
|
64
|
-
IMW.system archive_settings[:program], archive_settings[:append], path, *input_paths.flatten
|
|
65
|
-
self
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
# Extract the files from this archive to the current directory.
|
|
69
|
-
#
|
|
70
|
-
# @return [IMW::Resource] this archive
|
|
71
|
-
def extract
|
|
72
|
-
should_exist!("Cannot extract archive.")
|
|
73
|
-
should_have_archive_setting!("Cannot extract archive #{path}", :extract, [:unarchving_program, :program])
|
|
74
|
-
program = archive_settings[:unarchiving_program] || archive_settings[:program]
|
|
75
|
-
IMW.system program, archive_settings[:extract], path
|
|
76
|
-
self
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
# Return a (sorted) list of contents in this archive.
|
|
80
|
-
#
|
|
81
|
-
# @return [Array<String>] a list of paths in the archive.
|
|
82
|
-
def contents
|
|
83
|
-
should_exist!("Cannot list archive contents.")
|
|
84
|
-
should_have_archive_setting!("Cannot list archive #{path}", :list, [:unarchiving_program, :program])
|
|
85
|
-
program = archive_settings[:unarchiving_program] || archive_settings[:program]
|
|
86
|
-
# FIXME this needs to be more robust
|
|
87
|
-
flags = archive_settings[:list]
|
|
88
|
-
flags = flags.join(' ') if flags.is_a?(Array)
|
|
89
|
-
command = [program, flags, path.gsub(' ', '\ ')].join(' ')
|
|
90
|
-
output = `#{command}`
|
|
91
|
-
archive_contents_string_to_array(output)
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
protected
|
|
95
|
-
|
|
96
|
-
def should_have_archive_setting! message=nil,*settings # :nodoc:
|
|
97
|
-
settings.each do |setting|
|
|
98
|
-
if setting.is_a?(Array)
|
|
99
|
-
raise IMW::Error.new([message, "Must define one of #{setting.join(', ')} in archive_settings"].compact.join(', ')) unless setting.any? { |optional_setting| archive_settings[optional_setting] }
|
|
100
|
-
else
|
|
101
|
-
raise IMW::Error.new([message, "Must define #{setting} in archive_setings"].compact.join(', ')) unless archive_settings[setting]
|
|
102
|
-
end
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
# Parse and format the output from the archive program's "list"
|
|
107
|
-
# command into an array of filenames.
|
|
108
|
-
#
|
|
109
|
-
# An including class can override this method to match the
|
|
110
|
-
# output from the archiving program of that class.
|
|
111
|
-
#
|
|
112
|
-
# @param [String] string the raw output from the archive program's "list" command
|
|
113
|
-
# @return [Array] a list of paths in the archive
|
|
114
|
-
def archive_contents_string_to_array string
|
|
115
|
-
string.split("\n")
|
|
116
|
-
end
|
|
117
|
-
end
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
|
data/lib/imw/archives/rar.rb
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
module Archives
|
|
3
|
-
module Rar
|
|
4
|
-
|
|
5
|
-
include IMW::Archives::Base
|
|
6
|
-
|
|
7
|
-
def archive_settings
|
|
8
|
-
@archive_settings ||= {
|
|
9
|
-
:program => :rar,
|
|
10
|
-
:create => ['a', '-o+', '-inul'],
|
|
11
|
-
:append => ['a', '-o+', '-inul'],
|
|
12
|
-
:list => "vb",
|
|
13
|
-
:extract => ['x', '-o+', '-inul']
|
|
14
|
-
}
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
|
data/lib/imw/archives/tar.rb
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
module Archives
|
|
3
|
-
module Tar
|
|
4
|
-
|
|
5
|
-
include IMW::Archives::Base
|
|
6
|
-
|
|
7
|
-
def archive_settings
|
|
8
|
-
@archive_settings ||= {
|
|
9
|
-
:create => "-cf",
|
|
10
|
-
:append => "-rf",
|
|
11
|
-
:list => "-tf",
|
|
12
|
-
:extract => "-xf",
|
|
13
|
-
:program => :tar
|
|
14
|
-
}
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
|
data/lib/imw/archives/tarbz2.rb
DELETED
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
module Archives
|
|
3
|
-
module Tarbz2
|
|
4
|
-
|
|
5
|
-
#
|
|
6
|
-
# It's a compressed file
|
|
7
|
-
#
|
|
8
|
-
|
|
9
|
-
include IMW::CompressedFiles::Base
|
|
10
|
-
|
|
11
|
-
def compression_settings
|
|
12
|
-
@compression_settings ||= {
|
|
13
|
-
:program => :bzip2,
|
|
14
|
-
:decompression_program => :bunzip2,
|
|
15
|
-
:decompress => '',
|
|
16
|
-
:extension => 'bz2'
|
|
17
|
-
}
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
#
|
|
21
|
-
# But it's also an archive
|
|
22
|
-
#
|
|
23
|
-
|
|
24
|
-
include IMW::Archives::Base
|
|
25
|
-
|
|
26
|
-
def archive_settings
|
|
27
|
-
@archive_settings ||= {
|
|
28
|
-
:program => :tar,
|
|
29
|
-
:create => '-cf',
|
|
30
|
-
:list => "-tjf",
|
|
31
|
-
:extract => "-xjf"
|
|
32
|
-
}
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Overrides default behvaior of IMW::Files::Archive#create to
|
|
36
|
-
# compress files after creating them.
|
|
37
|
-
def create *input_paths
|
|
38
|
-
IMW.system(archive_settings[:program], archive_settings[:create], path_between_archive_and_compression, *input_paths.flatten)
|
|
39
|
-
IMW.open(path_between_archive_and_compression).compress!
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
def decompressed_basename
|
|
43
|
-
case extname
|
|
44
|
-
when '.tar.bz2' then basename[0..-5] # .tar.bz2 => .tar
|
|
45
|
-
when '.tbz2' then basename.gsub(/tbz2$/, 'tar') # .tbz2 => .tar
|
|
46
|
-
else basename[0..-(extname.size + 1)]
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
protected
|
|
52
|
-
def path_between_archive_and_compression
|
|
53
|
-
File.join(dirname,name + '.tar')
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
public
|
|
57
|
-
|
|
58
|
-
#
|
|
59
|
-
# It's a compressed file AND an archive!
|
|
60
|
-
#
|
|
61
|
-
|
|
62
|
-
def extname
|
|
63
|
-
case path
|
|
64
|
-
when /\.tar\.bz2$/ then '.tar.bz2'
|
|
65
|
-
when /\.tbz2$/ then '.tbz2'
|
|
66
|
-
else File.extname(path)
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
|
data/lib/imw/archives/targz.rb
DELETED
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
module Archives
|
|
3
|
-
module Targz
|
|
4
|
-
|
|
5
|
-
#
|
|
6
|
-
# It's a compressed file
|
|
7
|
-
#
|
|
8
|
-
|
|
9
|
-
include IMW::CompressedFiles::Base
|
|
10
|
-
|
|
11
|
-
def compression_settings
|
|
12
|
-
@compression_settings ||= {
|
|
13
|
-
:program => :gzip,
|
|
14
|
-
:decompression_program => :gunzip,
|
|
15
|
-
:decompress => '',
|
|
16
|
-
:extension => 'gz'
|
|
17
|
-
}
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
#
|
|
21
|
-
# But it's also an archive
|
|
22
|
-
#
|
|
23
|
-
|
|
24
|
-
include IMW::Archives::Base
|
|
25
|
-
|
|
26
|
-
def archive_settings
|
|
27
|
-
@archive_settings ||= {
|
|
28
|
-
:program => :tar,
|
|
29
|
-
:list => "-tzf",
|
|
30
|
-
:create => '-cf',
|
|
31
|
-
:extract => "-xzf"
|
|
32
|
-
}
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Overrides default behvaior of IMW::Files::Archive#create to
|
|
36
|
-
# compress files after creating them.
|
|
37
|
-
def create *input_paths
|
|
38
|
-
IMW.system(archive_settings[:program], archive_settings[:create].split, path_between_archive_and_compression, *input_paths.flatten)
|
|
39
|
-
tar = IMW.open(path_between_archive_and_compression)
|
|
40
|
-
tar.compression_settings = compression_settings
|
|
41
|
-
tar.compress!
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
def decompressed_basename
|
|
45
|
-
case extname
|
|
46
|
-
when '.tar.gz' then basename[0..-4] # .tar.gz => .tar
|
|
47
|
-
when '.tgz' then basename.gsub(/tgz$/, 'tar') # .tgz => .tar
|
|
48
|
-
else basename[0..-(extname.size + 1)]
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
protected
|
|
53
|
-
def path_between_archive_and_compression
|
|
54
|
-
File.join(dirname,name + '.tar')
|
|
55
|
-
end
|
|
56
|
-
public
|
|
57
|
-
|
|
58
|
-
#
|
|
59
|
-
# It's both an archive and a compressed file!
|
|
60
|
-
#
|
|
61
|
-
|
|
62
|
-
def extname
|
|
63
|
-
case path
|
|
64
|
-
when /\.tar\.gz$/ then '.tar.gz'
|
|
65
|
-
when /\.tgz$/ then '.tgz'
|
|
66
|
-
else File.extname(path)
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
|
data/lib/imw/archives/zip.rb
DELETED
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
module Archives
|
|
3
|
-
module Zip
|
|
4
|
-
|
|
5
|
-
include IMW::Archives::Base
|
|
6
|
-
|
|
7
|
-
def archive_settings
|
|
8
|
-
@archive_settings ||= {
|
|
9
|
-
:program => :zip,
|
|
10
|
-
:create => "-qqr",
|
|
11
|
-
:append => "-qqg",
|
|
12
|
-
:list => "-l",
|
|
13
|
-
:extract => "-qqo",
|
|
14
|
-
:unarchiving_program => :unzip
|
|
15
|
-
}
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
protected
|
|
19
|
-
|
|
20
|
-
# The `unzip' program outputs data in a very annoying format:
|
|
21
|
-
#
|
|
22
|
-
# Archive: data.zip
|
|
23
|
-
# Length Date Time Name
|
|
24
|
-
# -------- ---- ---- ----
|
|
25
|
-
# 18510 07-28-08 15:58 data/4d7Qrgz7.csv
|
|
26
|
-
# 3418 07-28-08 15:41 data/7S.csv
|
|
27
|
-
# 23353 07-28-08 15:41 data/g.csv
|
|
28
|
-
# 711 07-28-08 15:58 data/g.xml
|
|
29
|
-
# 1095 07-28-08 15:41 data/L.xml
|
|
30
|
-
# 2399 07-28-08 15:58 data/mTAu9H3.xml
|
|
31
|
-
# 152 07-28-08 15:58 data/vaHBS2t5R.dat
|
|
32
|
-
# -------- -------
|
|
33
|
-
# 49638 7 files
|
|
34
|
-
#
|
|
35
|
-
# which is parsed by this method.
|
|
36
|
-
def archive_contents_string_to_array string
|
|
37
|
-
rows = string.split("\n")
|
|
38
|
-
# ignore the first 3 lines of the output and also discared the
|
|
39
|
-
# last 2 (5 = 2 + 3)
|
|
40
|
-
file_rows = rows[3,(rows.length - 5)]
|
|
41
|
-
file_rows.map do |row|
|
|
42
|
-
if row
|
|
43
|
-
columns = row.lstrip.rstrip.split(' ')
|
|
44
|
-
# grab the filename in the fourth column
|
|
45
|
-
columns[3..-1].join(' ')
|
|
46
|
-
end
|
|
47
|
-
end.compact
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
end
|
data/lib/imw/boot.rb
DELETED
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
|
|
3
|
-
# IMW looks for configuration settings in the following places, in
|
|
4
|
-
# order of increasing precedence:
|
|
5
|
-
#
|
|
6
|
-
# 1. Settings defined directly in this file.
|
|
7
|
-
#
|
|
8
|
-
# 2. From the <tt>etc/imwrc</tt> file in the IMW root directory.
|
|
9
|
-
#
|
|
10
|
-
# 3. From the <tt>.imwrc</tt> file in the user's home directory (the
|
|
11
|
-
# filename can be changed; see
|
|
12
|
-
# <tt>IMW::Config::USER_CONFIG_FILE_BASENAME</tt>).
|
|
13
|
-
#
|
|
14
|
-
# 4. From the file defined by the environment variable +IMWRC+ (the
|
|
15
|
-
# value can be changed; see
|
|
16
|
-
# <tt>IMW::Config::USER_CONFIG_FILE_ENV_VARIABLE</tt>
|
|
17
|
-
#
|
|
18
|
-
# Settings not found in one configuration location will be searched
|
|
19
|
-
# for in locations of lesser precedence.
|
|
20
|
-
#
|
|
21
|
-
# *Note:* configuration files are plain Ruby code that will be directly
|
|
22
|
-
# evaluated.
|
|
23
|
-
#
|
|
24
|
-
# Relevant settings include
|
|
25
|
-
#
|
|
26
|
-
# * interfaces with external programs (+tar+, +wget+, &c.)
|
|
27
|
-
# * paths to directories where IMW reads/writes files
|
|
28
|
-
# * correspondences between file extensions and IMW file classes
|
|
29
|
-
#
|
|
30
|
-
# For more detailed information, see the default configuration file,
|
|
31
|
-
# <tt>etc/imwrc</tt>.
|
|
32
|
-
module Config
|
|
33
|
-
|
|
34
|
-
# Root of the IMW source base.
|
|
35
|
-
def self.imw_root
|
|
36
|
-
File.expand_path File.join(File.dirname(__FILE__), '../..')
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
#
|
|
40
|
-
# User configuration file
|
|
41
|
-
#
|
|
42
|
-
# By default, the file ~/.imwrc (.imwrc, in your home directory --
|
|
43
|
-
# note no .rb extension) is sourced at top level. If the $IMWRC
|
|
44
|
-
# environment variable is set, that file will be sourced instead.
|
|
45
|
-
#
|
|
46
|
-
# Any code within this file will override settings in
|
|
47
|
-
# /etc/imwrc.rb which itself overrides IMW_ROOT/etc/imwrc.rb
|
|
48
|
-
#
|
|
49
|
-
USER_CONFIG_FILE = File.join(ENV['HOME'] || '', '.imwrc')
|
|
50
|
-
# Environment variable to override user configuration file location.
|
|
51
|
-
ENV_CONFIG_FILE = "IMWRC"
|
|
52
|
-
def self.user_config_file # :nodoc:
|
|
53
|
-
File.expand_path(ENV[ENV_CONFIG_FILE] || USER_CONFIG_FILE)
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
# Path to site-wide config file (overwrites IMW defaults but
|
|
57
|
-
# overridden by user defaults).
|
|
58
|
-
SITE_CONFIG_FILE = "/etc/imwrc.rb"
|
|
59
|
-
def self.site_config_file # :nodoc:
|
|
60
|
-
SITE_CONFIG_FILE
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
def self.default_config_file # :nodoc:
|
|
64
|
-
File.join(imw_root, "etc/imwrc.rb")
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
# Source the config files
|
|
68
|
-
def self.load_config
|
|
69
|
-
if File.exist?(user_config_file)
|
|
70
|
-
load user_config_file
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
if File.exist?(site_config_file)
|
|
74
|
-
load site_config_file
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
load default_config_file
|
|
78
|
-
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
#
|
|
84
|
-
# Load the config files
|
|
85
|
-
#
|
|
86
|
-
IMW::Config.load_config
|
|
87
|
-
|