imw 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
@@ -0,0 +1,18 @@
|
|
1
|
+
module IMW
|
2
|
+
module Resources
|
3
|
+
module CompressedFiles
|
4
|
+
module Gz
|
5
|
+
|
6
|
+
include IMW::Resources::CompressedFile
|
7
|
+
|
8
|
+
def compression_settings
|
9
|
+
@compression_settings ||= {
|
10
|
+
:decompression_program => :gunzip,
|
11
|
+
:decompress => '-fd'
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'imw/resources/archive'
|
2
|
+
|
3
|
+
module IMW
|
4
|
+
module Resources
|
5
|
+
module Archives
|
6
|
+
module Rar
|
7
|
+
|
8
|
+
include IMW::Resources::Archive
|
9
|
+
|
10
|
+
def archive_settings
|
11
|
+
@archive_settings ||= {
|
12
|
+
:program => :rar,
|
13
|
+
:create => ['a', '-o+', '-inul'],
|
14
|
+
:append => ['a', '-o+', '-inul'],
|
15
|
+
:list => "vb",
|
16
|
+
:extract => ['x', '-o+', '-inul']
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'imw/resources/archive'
|
2
|
+
|
3
|
+
module IMW
|
4
|
+
module Resources
|
5
|
+
module Archives
|
6
|
+
module Tar
|
7
|
+
|
8
|
+
include IMW::Resources::Archive
|
9
|
+
|
10
|
+
def archive_settings
|
11
|
+
@archive_settings ||= {
|
12
|
+
:create => "-cf",
|
13
|
+
:append => "-rf",
|
14
|
+
:list => "-tf",
|
15
|
+
:extract => "-xf",
|
16
|
+
:program => :tar
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'imw/resources/archive'
|
2
|
+
require 'imw/resources/compressed_file'
|
3
|
+
|
4
|
+
module IMW
|
5
|
+
module Resources
|
6
|
+
module Archives
|
7
|
+
module Tarbz2
|
8
|
+
|
9
|
+
#
|
10
|
+
# It's a compressed file
|
11
|
+
#
|
12
|
+
|
13
|
+
include IMW::Resources::CompressedFile
|
14
|
+
|
15
|
+
def compression_settings
|
16
|
+
@compression_settings ||= {
|
17
|
+
:program => :bzip2,
|
18
|
+
:decompression_program => :bunzip2,
|
19
|
+
:decompress => '',
|
20
|
+
:extension => 'bz2'
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
# But it's also an archive
|
26
|
+
#
|
27
|
+
|
28
|
+
include IMW::Resources::Archive
|
29
|
+
|
30
|
+
def archive_settings
|
31
|
+
@archive_settings ||= {
|
32
|
+
:program => :tar,
|
33
|
+
:create => '-cf',
|
34
|
+
:list => "-tjf",
|
35
|
+
:extract => "-xjf"
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
# Overrides default behvaior of IMW::Files::Archive#create to
|
40
|
+
# compress files after creating them.
|
41
|
+
def create *input_paths
|
42
|
+
IMW.system(archive_settings[:program], archive_settings[:create], path_between_archive_and_compression, *input_paths.flatten)
|
43
|
+
IMW.open(path_between_archive_and_compression).compress!
|
44
|
+
end
|
45
|
+
|
46
|
+
def decompressed_basename
|
47
|
+
case extname
|
48
|
+
when '.tar.bz2' then basename[0..-5] # .tar.bz2 => .tar
|
49
|
+
when '.tbz2' then basename.gsub(/tbz2$/, 'tar') # .tbz2 => .tar
|
50
|
+
else basename[0..-(extname.size + 1)]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
protected
|
56
|
+
def path_between_archive_and_compression
|
57
|
+
File.join(dirname,name + '.tar')
|
58
|
+
end
|
59
|
+
|
60
|
+
public
|
61
|
+
|
62
|
+
#
|
63
|
+
# It's a compressed file AND an archive!
|
64
|
+
#
|
65
|
+
|
66
|
+
def extname
|
67
|
+
case path
|
68
|
+
when /\.tar\.bz2$/ then '.tar.bz2'
|
69
|
+
when /\.tbz2$/ then '.tbz2'
|
70
|
+
else File.extname(path)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'imw/resources/archive'
|
2
|
+
require 'imw/resources/compressed_file'
|
3
|
+
|
4
|
+
module IMW
|
5
|
+
module Resources
|
6
|
+
module Archives
|
7
|
+
module Targz
|
8
|
+
|
9
|
+
#
|
10
|
+
# It's a compressed file
|
11
|
+
#
|
12
|
+
|
13
|
+
include IMW::Resources::CompressedFile
|
14
|
+
|
15
|
+
def compression_settings
|
16
|
+
@compression_settings ||= {
|
17
|
+
:program => :gzip,
|
18
|
+
:decompression_program => :gunzip,
|
19
|
+
:decompress => '',
|
20
|
+
:extension => 'gz'
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
# But it's also an archive
|
26
|
+
#
|
27
|
+
|
28
|
+
include IMW::Resources::Archive
|
29
|
+
|
30
|
+
def archive_settings
|
31
|
+
@archive_settings ||= {
|
32
|
+
:program => :tar,
|
33
|
+
:list => "-tzf",
|
34
|
+
:create => '-cf',
|
35
|
+
:extract => "-xzf"
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
# Overrides default behvaior of IMW::Files::Archive#create to
|
40
|
+
# compress files after creating them.
|
41
|
+
def create *input_paths
|
42
|
+
IMW.system(archive_settings[:program], archive_settings[:create].split, path_between_archive_and_compression, *input_paths.flatten)
|
43
|
+
tar = IMW.open(path_between_archive_and_compression)
|
44
|
+
tar.compression_settings = compression_settings
|
45
|
+
tar.compress!
|
46
|
+
end
|
47
|
+
|
48
|
+
def decompressed_basename
|
49
|
+
case extname
|
50
|
+
when '.tar.gz' then basename[0..-4] # .tar.gz => .tar
|
51
|
+
when '.tgz' then basename.gsub(/tgz$/, 'tar') # .tgz => .tar
|
52
|
+
else basename[0..-(extname.size + 1)]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
protected
|
57
|
+
def path_between_archive_and_compression
|
58
|
+
File.join(dirname,name + '.tar')
|
59
|
+
end
|
60
|
+
public
|
61
|
+
|
62
|
+
#
|
63
|
+
# It's both an archive and a compressed file!
|
64
|
+
#
|
65
|
+
|
66
|
+
def extname
|
67
|
+
case path
|
68
|
+
when /\.tar\.gz$/ then '.tar.gz'
|
69
|
+
when /\.tgz$/ then '.tgz'
|
70
|
+
else File.extname(path)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'imw/resources/archive'
|
2
|
+
|
3
|
+
module IMW
|
4
|
+
module Resources
|
5
|
+
module Archives
|
6
|
+
module Zip
|
7
|
+
|
8
|
+
include IMW::Resources::Archive
|
9
|
+
|
10
|
+
def archive_settings
|
11
|
+
@archive_settings ||= {
|
12
|
+
:program => :zip,
|
13
|
+
:create => "-qqr",
|
14
|
+
:append => "-qqg",
|
15
|
+
:list => "-l",
|
16
|
+
:extract => "-qqo",
|
17
|
+
:unarchiving_program => :unzip
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
|
23
|
+
# The `unzip' program outputs data in a very annoying format:
|
24
|
+
#
|
25
|
+
# Archive: data.zip
|
26
|
+
# Length Date Time Name
|
27
|
+
# -------- ---- ---- ----
|
28
|
+
# 18510 07-28-08 15:58 data/4d7Qrgz7.csv
|
29
|
+
# 3418 07-28-08 15:41 data/7S.csv
|
30
|
+
# 23353 07-28-08 15:41 data/g.csv
|
31
|
+
# 711 07-28-08 15:58 data/g.xml
|
32
|
+
# 1095 07-28-08 15:41 data/L.xml
|
33
|
+
# 2399 07-28-08 15:58 data/mTAu9H3.xml
|
34
|
+
# 152 07-28-08 15:58 data/vaHBS2t5R.dat
|
35
|
+
# -------- -------
|
36
|
+
# 49638 7 files
|
37
|
+
#
|
38
|
+
# which is parsed by this method.
|
39
|
+
def archive_contents_string_to_array string
|
40
|
+
rows = string.split("\n")
|
41
|
+
# ignore the first 3 lines of the output and also discared the
|
42
|
+
# last 2 (5 = 2 + 3)
|
43
|
+
file_rows = rows[3,(rows.length - 5)]
|
44
|
+
file_rows.map do |row|
|
45
|
+
if row
|
46
|
+
columns = row.lstrip.rstrip.split(' ')
|
47
|
+
# grab the filename in the fourth column
|
48
|
+
columns[3..-1].join(' ')
|
49
|
+
end
|
50
|
+
end.compact
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module IMW
|
2
|
+
module Resources
|
3
|
+
autoload :Compressible, 'imw/resources/compressible'
|
4
|
+
autoload :CompressedFile, 'imw/resources/compressed_file'
|
5
|
+
autoload :Archive, 'imw/resources/archive'
|
6
|
+
autoload :Archives, 'imw/resources/archive'
|
7
|
+
autoload :CompressedFiles, 'imw/resources/compressed_file'
|
8
|
+
|
9
|
+
# Handlers which augment the resource with methods for archiving,
|
10
|
+
# extracting, compressing, decompressing...
|
11
|
+
ARCHIVE_AND_COMPRESSED_HANDLERS = [
|
12
|
+
|
13
|
+
# try compressible first -- compressed files below will override it
|
14
|
+
["Compressible", Proc.new { |r| r.is_local? } ],
|
15
|
+
|
16
|
+
# order is important! -- tar.bz2 must come before .bz2, &c.
|
17
|
+
["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.bz2$/ } ],
|
18
|
+
["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tbz2$/ } ],
|
19
|
+
["CompressedFiles::Bz2", Proc.new { |r| r.is_local? && r.path =~ /\.bz2$/ && r.path !~ /\.tar\.bz2$/ && r.path !~ /\.tbz2$/ } ],
|
20
|
+
["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.gz$/ } ],
|
21
|
+
["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tgz$/ } ],
|
22
|
+
["CompressedFiles::Gz", Proc.new { |r| r.is_local? && r.path =~ /\.gz$/ && r.path !~ /\.tar\.gz$/ && r.path !~ /\.tgz$/ } ],
|
23
|
+
["Archives::Tar", Proc.new { |r| r.is_local? && r.path =~ /\.tar$/ } ],
|
24
|
+
["Archives::Rar", Proc.new { |r| r.is_local? && r.path =~ /\.rar$/ } ],
|
25
|
+
["Archives::Zip", Proc.new { |r| r.is_local? && r.path =~ /\.zip$/ } ]
|
26
|
+
|
27
|
+
]
|
28
|
+
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module IMW
|
2
|
+
module Resources
|
3
|
+
|
4
|
+
module CompressedFiles
|
5
|
+
autoload :Bz2, 'imw/resources/archives_and_compressed/bz2'
|
6
|
+
autoload :Gz, 'imw/resources/archives_and_compressed/gz'
|
7
|
+
end
|
8
|
+
|
9
|
+
# Defines methods for decompressing a compressed file. This
|
10
|
+
# module isn't used to directly extend an IMW::Resource --
|
11
|
+
# instead, format specific modules (e.g. -
|
12
|
+
# IMW::Resources::CompressedFiles::Bz2) include this module and
|
13
|
+
# further define the command-line flags &c. needed to make
|
14
|
+
# everything work.
|
15
|
+
module CompressedFile
|
16
|
+
|
17
|
+
attr_accessor :compression_settings
|
18
|
+
|
19
|
+
# Is this file compressed?
|
20
|
+
#
|
21
|
+
# @return [true, false]
|
22
|
+
def is_compressed?
|
23
|
+
true
|
24
|
+
end
|
25
|
+
|
26
|
+
# Can this file be compressed?
|
27
|
+
#
|
28
|
+
# @return [true, false]
|
29
|
+
def is_compressible?
|
30
|
+
false
|
31
|
+
end
|
32
|
+
|
33
|
+
# The basename of this resource after it is decompressed
|
34
|
+
#
|
35
|
+
# IMW::Resource.new('/path/to/my_file.txt.bz2').decompressed_basename
|
36
|
+
# => 'my_file.txt'
|
37
|
+
#
|
38
|
+
# @return [String] the decompressed basename
|
39
|
+
def decompressed_basename
|
40
|
+
basename[0..-(extname.size + 1)]
|
41
|
+
end
|
42
|
+
|
43
|
+
# The path of this resource after it is decompressed
|
44
|
+
#
|
45
|
+
# IMW::Resource.new('/path/to/my_file.txt.bz2').decompressed_basename
|
46
|
+
# => '/path/to/my_file.txt'
|
47
|
+
#
|
48
|
+
# @return [String] the decompressed path
|
49
|
+
def decompressed_path
|
50
|
+
File.join(dirname, decompressed_basename)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Decompress this file in its present directory overwriting any
|
54
|
+
# existing files and without saving the original compressed
|
55
|
+
# file.
|
56
|
+
#
|
57
|
+
# @return [IMW::Resource] the decompressed resource
|
58
|
+
def decompress!
|
59
|
+
should_exist!("Cannot decompress.")
|
60
|
+
program = compression_settings[:decompression_program] || compression_settings[:program]
|
61
|
+
FileUtils.cd(dirname) { IMW.system(program, compression_settings[:decompress], path) }
|
62
|
+
IMW.open(decompressed_path)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Decompress this file in its present directory, overwriting any
|
66
|
+
# existing files while keeping the original compressed file.
|
67
|
+
#
|
68
|
+
# FIXME The implementation is a little stupid as the file is
|
69
|
+
# needlessly copied.
|
70
|
+
#
|
71
|
+
# @return [IMW::Resource] the decompressed resource
|
72
|
+
def decompress
|
73
|
+
should_exist!("Cannot decompress.")
|
74
|
+
begin
|
75
|
+
copy = cp(path + '.imw_copy')
|
76
|
+
regular_file = decompress!
|
77
|
+
copy.mv(path)
|
78
|
+
regular_file
|
79
|
+
ensure
|
80
|
+
copy.mv(path) if copy && copy.exist?
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module IMW
|
2
|
+
|
3
|
+
# Default settings used when compressing files. <tt>:program</tt>
|
4
|
+
# defines the name of the command-line program to use,
|
5
|
+
# <tt>:compress</tt> gives the flags to use when compressing, and
|
6
|
+
# <tt>:extension</tt> gives the extension (_without_ the `.') added
|
7
|
+
# by the program after compressing.
|
8
|
+
COMPRESSION_SETTINGS = {
|
9
|
+
:program => 'bzip2',
|
10
|
+
:compress => '',
|
11
|
+
:extension => 'bz2'
|
12
|
+
} unless defined?(COMPRESSION_SETTINGS)
|
13
|
+
|
14
|
+
module Resources
|
15
|
+
|
16
|
+
# Defines methods for compressing a file. The default compression
|
17
|
+
# program is defined in IMW::COMPRESSION_SETTINGS though a
|
18
|
+
# particular resource can change the values in its
|
19
|
+
# +compression_settings+ hash.
|
20
|
+
module Compressible
|
21
|
+
|
22
|
+
# Compression settings.
|
23
|
+
attr_accessor :compression_settings
|
24
|
+
|
25
|
+
# Is this file compressible?
|
26
|
+
#
|
27
|
+
# @return [true]
|
28
|
+
def is_compressible?
|
29
|
+
true
|
30
|
+
end
|
31
|
+
|
32
|
+
# Defines the compression settings used for this
|
33
|
+
# resource. <tt>:program</tt> defines the name of the
|
34
|
+
# command-line program to use, <tt>:compress</tt> gives the
|
35
|
+
# flags to use when compressing, and <tt>:extension</tt> gives
|
36
|
+
# the extension (_without_ the `.') added by the program after
|
37
|
+
# compressing.
|
38
|
+
#
|
39
|
+
# @return [Hash]
|
40
|
+
def compression_settings
|
41
|
+
@compression_settings ||= COMPRESSION_SETTINGS
|
42
|
+
end
|
43
|
+
|
44
|
+
# Compress this resource in place, overwriting it.
|
45
|
+
#
|
46
|
+
# This resource's +compression_settings+ method is used to
|
47
|
+
# determine the method of compression.
|
48
|
+
#
|
49
|
+
# @return [IMW::Resource] the compressed file
|
50
|
+
def compress!
|
51
|
+
should_exist!("Cannot compress.")
|
52
|
+
IMW.system(*[compression_settings[:program], compression_settings[:compress], path])
|
53
|
+
IMW.open(File.join(dirname,basename + "." + compression_settings[:extension]))
|
54
|
+
end
|
55
|
+
|
56
|
+
# Compress this resource without overwriting it.
|
57
|
+
#
|
58
|
+
# FIXME The implementation is a little stupid as the file is
|
59
|
+
# needlessly copied.
|
60
|
+
#
|
61
|
+
# @return [IMW::Resource] the compressed file
|
62
|
+
def compress options={}
|
63
|
+
should_exist!("Cannot compress.")
|
64
|
+
begin
|
65
|
+
copy = cp(path + '.imw_copy')
|
66
|
+
compressed_file = compress!
|
67
|
+
copy.mv(path)
|
68
|
+
compressed_file
|
69
|
+
ensure
|
70
|
+
copy.mv(path) if copy.exist?
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module IMW
|
2
|
+
module Resources
|
3
|
+
module Formats
|
4
|
+
|
5
|
+
# Defines methods used for parsing and writing delimited data
|
6
|
+
# formats (CSV, TSV, &c.) with the FasterCSV library. This
|
7
|
+
# module is not used to directly extend a resource. Instead,
|
8
|
+
# more specific modules (e.g. - IMW::Resources::Formats::Csv)
|
9
|
+
# include this one and also define +delimited_options+ which is
|
10
|
+
# actually what's passed to FasterCSV.
|
11
|
+
#
|
12
|
+
# @abstract
|
13
|
+
module Delimited
|
14
|
+
|
15
|
+
attr_accessor :delimited_settings
|
16
|
+
|
17
|
+
# Return the data in this delimited resource as an array of
|
18
|
+
# arrays.
|
19
|
+
#
|
20
|
+
# Yield each outer array (row) if passed a block.
|
21
|
+
#
|
22
|
+
# @return [Array] the full data matrix
|
23
|
+
# @yield [Array] each row of the data
|
24
|
+
def load &block
|
25
|
+
require 'fastercsv'
|
26
|
+
FasterCSV.parse(read, delimited_options, &block)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Map each row in this delimited resource.
|
30
|
+
#
|
31
|
+
# @yield [Array] each row of the data
|
32
|
+
def map &block
|
33
|
+
load.map(&block)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Dump an array of arrays into this resource.
|
37
|
+
#
|
38
|
+
# @param [Array] data array of arrays to dump
|
39
|
+
# @param [Hash] options
|
40
|
+
# @option options [true, false] :persist Keep this resource's IO object open after dumping
|
41
|
+
def dump data, options={}
|
42
|
+
require 'fastercsv'
|
43
|
+
data.each do |row|
|
44
|
+
write(FasterCSV.generate_line(row, delimited_options))
|
45
|
+
end
|
46
|
+
io.close unless options[:persist]
|
47
|
+
self
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
module Csv
|
52
|
+
include Delimited
|
53
|
+
|
54
|
+
# Default options to be passed to
|
55
|
+
# FasterCSV[http://fastercsv.rubyforge.org/]; see its
|
56
|
+
# documentation for more information.
|
57
|
+
#
|
58
|
+
# @return [Hash]
|
59
|
+
def delimited_options
|
60
|
+
@delimited_options ||= {
|
61
|
+
:col_sep => ',',
|
62
|
+
:headers => false,
|
63
|
+
:return_headers => false,
|
64
|
+
:write_headers => true,
|
65
|
+
:skip_blanks => false,
|
66
|
+
:force_quotes => false
|
67
|
+
}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
module Tsv
|
72
|
+
include Delimited
|
73
|
+
|
74
|
+
# Default options to be passed to
|
75
|
+
# FasterCSV[http://fastercsv.rubyforge.org/]; see its
|
76
|
+
# documentation for more information.
|
77
|
+
#
|
78
|
+
# @return [Hash]
|
79
|
+
def delimited_options
|
80
|
+
@delimited_options ||= {
|
81
|
+
:col_sep => "\t",
|
82
|
+
:headers => false,
|
83
|
+
:return_headers => false,
|
84
|
+
:write_headers => true,
|
85
|
+
:skip_blanks => false,
|
86
|
+
:force_quotes => false
|
87
|
+
}
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
module IMW
|
2
|
+
module Resources
|
3
|
+
module Formats
|
4
|
+
|
5
|
+
# Defines methods for reading and writing Microsoft Excel data.
|
6
|
+
module Excel
|
7
|
+
|
8
|
+
attr_accessor :book, :sheet
|
9
|
+
|
10
|
+
def self.extended obj
|
11
|
+
if obj.exist?
|
12
|
+
@book = Spreadsheet.open path
|
13
|
+
@sheet = book.worksheet(0)
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def book
|
20
|
+
return @book if @book
|
21
|
+
if exists?
|
22
|
+
@book = Spreadsheet.open(path)
|
23
|
+
else
|
24
|
+
@book = Spreadsheet::Workbook.new
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def sheet
|
29
|
+
@sheet = @book.create_worksheet
|
30
|
+
@sheet
|
31
|
+
end
|
32
|
+
|
33
|
+
#If an Excel file exists at the location specified by uri then
|
34
|
+
#it is opened and can be read out with a subsequent call to
|
35
|
+
#load(). Otherwise, a new workbook is created and can be written
|
36
|
+
#to with the dump() method.
|
37
|
+
def initialize uri, mode='r', options={}
|
38
|
+
self.uri = uri
|
39
|
+
@max_lines = options[:max_lines] || 65000
|
40
|
+
@idx = 0
|
41
|
+
@book_idx = 0
|
42
|
+
@sht_idx = 0
|
43
|
+
unless self.exist?
|
44
|
+
make_new_book
|
45
|
+
make_new_sheet
|
46
|
+
else
|
47
|
+
get_existing_book
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
#Returns the data in an existing workbook as an
|
52
|
+
#array of arrays. Only capable of reading a single sheet.
|
53
|
+
def load
|
54
|
+
@sheet.map{|row| row.to_a}
|
55
|
+
end
|
56
|
+
|
57
|
+
#Dumps data, which is assumed to be an array of arrays, to a
|
58
|
+
#newly created Excel workbook. Attempting to dump to a book
|
59
|
+
#that already exists will typically result in file corruption.
|
60
|
+
#Raises a 'too many lines' error if the number of lines
|
61
|
+
#of data exceeds max_lines.
|
62
|
+
def dump data
|
63
|
+
data.each do |line|
|
64
|
+
raise "too many lines" if too_many?
|
65
|
+
self << line
|
66
|
+
end
|
67
|
+
save unless no_data?
|
68
|
+
end
|
69
|
+
|
70
|
+
#Processes a single line of data and updates internal variables.
|
71
|
+
#You shouldn't need to call this directly.
|
72
|
+
def << line
|
73
|
+
@sheet.row(@sht_row).concat( line )
|
74
|
+
@sht_row += 1
|
75
|
+
@idx += 1
|
76
|
+
end
|
77
|
+
|
78
|
+
#Instantiates a new Excel workbook in memory. You shouldn't
|
79
|
+
#need to call this directly.
|
80
|
+
def make_new_book
|
81
|
+
@book = Spreadsheet::Workbook.new
|
82
|
+
@book_idx += 1
|
83
|
+
end
|
84
|
+
|
85
|
+
#Makes a new worksheet for a pre-existing Excel workbook.
|
86
|
+
#This should be called after recovering from the
|
87
|
+
#'too many lines' error.
|
88
|
+
def make_new_sheet
|
89
|
+
@sheet = @book.create_worksheet
|
90
|
+
@sht_idx += 1
|
91
|
+
@sht_row = 0 #always start at row 0 in a new sheet
|
92
|
+
end
|
93
|
+
|
94
|
+
#Opens an existing Excel workbook. You shoudn't need to
|
95
|
+
#call this directly.
|
96
|
+
def get_existing_book
|
97
|
+
@sht_row = @sheet.row_count #would like to be able to dump new data, doesn't work
|
98
|
+
@sht_idx += 1
|
99
|
+
end
|
100
|
+
|
101
|
+
#Increments the current sheet to the next one in
|
102
|
+
#an open book. Not necessary at the moment.
|
103
|
+
def incr_sheet
|
104
|
+
@sheet = book.worksheet @sht_idx
|
105
|
+
end
|
106
|
+
|
107
|
+
#There are too many lines if the number of rows attempting
|
108
|
+
#to be written exceeds max_lines.
|
109
|
+
def too_many?
|
110
|
+
@sht_row >= @max_lines
|
111
|
+
end
|
112
|
+
|
113
|
+
#There is no data if the number of rows attempting to be written
|
114
|
+
#is zero.
|
115
|
+
def no_data?
|
116
|
+
@sht_row == 0
|
117
|
+
end
|
118
|
+
|
119
|
+
#Saves the workbook.
|
120
|
+
def save
|
121
|
+
@book.write path
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|