imw 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
module IMW
|
|
2
|
+
module Resources
|
|
3
|
+
module CompressedFiles
|
|
4
|
+
module Gz
|
|
5
|
+
|
|
6
|
+
include IMW::Resources::CompressedFile
|
|
7
|
+
|
|
8
|
+
def compression_settings
|
|
9
|
+
@compression_settings ||= {
|
|
10
|
+
:decompression_program => :gunzip,
|
|
11
|
+
:decompress => '-fd'
|
|
12
|
+
}
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require 'imw/resources/archive'
|
|
2
|
+
|
|
3
|
+
module IMW
|
|
4
|
+
module Resources
|
|
5
|
+
module Archives
|
|
6
|
+
module Rar
|
|
7
|
+
|
|
8
|
+
include IMW::Resources::Archive
|
|
9
|
+
|
|
10
|
+
def archive_settings
|
|
11
|
+
@archive_settings ||= {
|
|
12
|
+
:program => :rar,
|
|
13
|
+
:create => ['a', '-o+', '-inul'],
|
|
14
|
+
:append => ['a', '-o+', '-inul'],
|
|
15
|
+
:list => "vb",
|
|
16
|
+
:extract => ['x', '-o+', '-inul']
|
|
17
|
+
}
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require 'imw/resources/archive'
|
|
2
|
+
|
|
3
|
+
module IMW
|
|
4
|
+
module Resources
|
|
5
|
+
module Archives
|
|
6
|
+
module Tar
|
|
7
|
+
|
|
8
|
+
include IMW::Resources::Archive
|
|
9
|
+
|
|
10
|
+
def archive_settings
|
|
11
|
+
@archive_settings ||= {
|
|
12
|
+
:create => "-cf",
|
|
13
|
+
:append => "-rf",
|
|
14
|
+
:list => "-tf",
|
|
15
|
+
:extract => "-xf",
|
|
16
|
+
:program => :tar
|
|
17
|
+
}
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
require 'imw/resources/archive'
|
|
2
|
+
require 'imw/resources/compressed_file'
|
|
3
|
+
|
|
4
|
+
module IMW
|
|
5
|
+
module Resources
|
|
6
|
+
module Archives
|
|
7
|
+
module Tarbz2
|
|
8
|
+
|
|
9
|
+
#
|
|
10
|
+
# It's a compressed file
|
|
11
|
+
#
|
|
12
|
+
|
|
13
|
+
include IMW::Resources::CompressedFile
|
|
14
|
+
|
|
15
|
+
def compression_settings
|
|
16
|
+
@compression_settings ||= {
|
|
17
|
+
:program => :bzip2,
|
|
18
|
+
:decompression_program => :bunzip2,
|
|
19
|
+
:decompress => '',
|
|
20
|
+
:extension => 'bz2'
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
#
|
|
25
|
+
# But it's also an archive
|
|
26
|
+
#
|
|
27
|
+
|
|
28
|
+
include IMW::Resources::Archive
|
|
29
|
+
|
|
30
|
+
def archive_settings
|
|
31
|
+
@archive_settings ||= {
|
|
32
|
+
:program => :tar,
|
|
33
|
+
:create => '-cf',
|
|
34
|
+
:list => "-tjf",
|
|
35
|
+
:extract => "-xjf"
|
|
36
|
+
}
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Overrides default behvaior of IMW::Files::Archive#create to
|
|
40
|
+
# compress files after creating them.
|
|
41
|
+
def create *input_paths
|
|
42
|
+
IMW.system(archive_settings[:program], archive_settings[:create], path_between_archive_and_compression, *input_paths.flatten)
|
|
43
|
+
IMW.open(path_between_archive_and_compression).compress!
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def decompressed_basename
|
|
47
|
+
case extname
|
|
48
|
+
when '.tar.bz2' then basename[0..-5] # .tar.bz2 => .tar
|
|
49
|
+
when '.tbz2' then basename.gsub(/tbz2$/, 'tar') # .tbz2 => .tar
|
|
50
|
+
else basename[0..-(extname.size + 1)]
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
protected
|
|
56
|
+
def path_between_archive_and_compression
|
|
57
|
+
File.join(dirname,name + '.tar')
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
public
|
|
61
|
+
|
|
62
|
+
#
|
|
63
|
+
# It's a compressed file AND an archive!
|
|
64
|
+
#
|
|
65
|
+
|
|
66
|
+
def extname
|
|
67
|
+
case path
|
|
68
|
+
when /\.tar\.bz2$/ then '.tar.bz2'
|
|
69
|
+
when /\.tbz2$/ then '.tbz2'
|
|
70
|
+
else File.extname(path)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
require 'imw/resources/archive'
|
|
2
|
+
require 'imw/resources/compressed_file'
|
|
3
|
+
|
|
4
|
+
module IMW
|
|
5
|
+
module Resources
|
|
6
|
+
module Archives
|
|
7
|
+
module Targz
|
|
8
|
+
|
|
9
|
+
#
|
|
10
|
+
# It's a compressed file
|
|
11
|
+
#
|
|
12
|
+
|
|
13
|
+
include IMW::Resources::CompressedFile
|
|
14
|
+
|
|
15
|
+
def compression_settings
|
|
16
|
+
@compression_settings ||= {
|
|
17
|
+
:program => :gzip,
|
|
18
|
+
:decompression_program => :gunzip,
|
|
19
|
+
:decompress => '',
|
|
20
|
+
:extension => 'gz'
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
#
|
|
25
|
+
# But it's also an archive
|
|
26
|
+
#
|
|
27
|
+
|
|
28
|
+
include IMW::Resources::Archive
|
|
29
|
+
|
|
30
|
+
def archive_settings
|
|
31
|
+
@archive_settings ||= {
|
|
32
|
+
:program => :tar,
|
|
33
|
+
:list => "-tzf",
|
|
34
|
+
:create => '-cf',
|
|
35
|
+
:extract => "-xzf"
|
|
36
|
+
}
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Overrides default behvaior of IMW::Files::Archive#create to
|
|
40
|
+
# compress files after creating them.
|
|
41
|
+
def create *input_paths
|
|
42
|
+
IMW.system(archive_settings[:program], archive_settings[:create].split, path_between_archive_and_compression, *input_paths.flatten)
|
|
43
|
+
tar = IMW.open(path_between_archive_and_compression)
|
|
44
|
+
tar.compression_settings = compression_settings
|
|
45
|
+
tar.compress!
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def decompressed_basename
|
|
49
|
+
case extname
|
|
50
|
+
when '.tar.gz' then basename[0..-4] # .tar.gz => .tar
|
|
51
|
+
when '.tgz' then basename.gsub(/tgz$/, 'tar') # .tgz => .tar
|
|
52
|
+
else basename[0..-(extname.size + 1)]
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
protected
|
|
57
|
+
def path_between_archive_and_compression
|
|
58
|
+
File.join(dirname,name + '.tar')
|
|
59
|
+
end
|
|
60
|
+
public
|
|
61
|
+
|
|
62
|
+
#
|
|
63
|
+
# It's both an archive and a compressed file!
|
|
64
|
+
#
|
|
65
|
+
|
|
66
|
+
def extname
|
|
67
|
+
case path
|
|
68
|
+
when /\.tar\.gz$/ then '.tar.gz'
|
|
69
|
+
when /\.tgz$/ then '.tgz'
|
|
70
|
+
else File.extname(path)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
require 'imw/resources/archive'
|
|
2
|
+
|
|
3
|
+
module IMW
|
|
4
|
+
module Resources
|
|
5
|
+
module Archives
|
|
6
|
+
module Zip
|
|
7
|
+
|
|
8
|
+
include IMW::Resources::Archive
|
|
9
|
+
|
|
10
|
+
def archive_settings
|
|
11
|
+
@archive_settings ||= {
|
|
12
|
+
:program => :zip,
|
|
13
|
+
:create => "-qqr",
|
|
14
|
+
:append => "-qqg",
|
|
15
|
+
:list => "-l",
|
|
16
|
+
:extract => "-qqo",
|
|
17
|
+
:unarchiving_program => :unzip
|
|
18
|
+
}
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
protected
|
|
22
|
+
|
|
23
|
+
# The `unzip' program outputs data in a very annoying format:
|
|
24
|
+
#
|
|
25
|
+
# Archive: data.zip
|
|
26
|
+
# Length Date Time Name
|
|
27
|
+
# -------- ---- ---- ----
|
|
28
|
+
# 18510 07-28-08 15:58 data/4d7Qrgz7.csv
|
|
29
|
+
# 3418 07-28-08 15:41 data/7S.csv
|
|
30
|
+
# 23353 07-28-08 15:41 data/g.csv
|
|
31
|
+
# 711 07-28-08 15:58 data/g.xml
|
|
32
|
+
# 1095 07-28-08 15:41 data/L.xml
|
|
33
|
+
# 2399 07-28-08 15:58 data/mTAu9H3.xml
|
|
34
|
+
# 152 07-28-08 15:58 data/vaHBS2t5R.dat
|
|
35
|
+
# -------- -------
|
|
36
|
+
# 49638 7 files
|
|
37
|
+
#
|
|
38
|
+
# which is parsed by this method.
|
|
39
|
+
def archive_contents_string_to_array string
|
|
40
|
+
rows = string.split("\n")
|
|
41
|
+
# ignore the first 3 lines of the output and also discared the
|
|
42
|
+
# last 2 (5 = 2 + 3)
|
|
43
|
+
file_rows = rows[3,(rows.length - 5)]
|
|
44
|
+
file_rows.map do |row|
|
|
45
|
+
if row
|
|
46
|
+
columns = row.lstrip.rstrip.split(' ')
|
|
47
|
+
# grab the filename in the fourth column
|
|
48
|
+
columns[3..-1].join(' ')
|
|
49
|
+
end
|
|
50
|
+
end.compact
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
module IMW
|
|
2
|
+
module Resources
|
|
3
|
+
autoload :Compressible, 'imw/resources/compressible'
|
|
4
|
+
autoload :CompressedFile, 'imw/resources/compressed_file'
|
|
5
|
+
autoload :Archive, 'imw/resources/archive'
|
|
6
|
+
autoload :Archives, 'imw/resources/archive'
|
|
7
|
+
autoload :CompressedFiles, 'imw/resources/compressed_file'
|
|
8
|
+
|
|
9
|
+
# Handlers which augment the resource with methods for archiving,
|
|
10
|
+
# extracting, compressing, decompressing...
|
|
11
|
+
ARCHIVE_AND_COMPRESSED_HANDLERS = [
|
|
12
|
+
|
|
13
|
+
# try compressible first -- compressed files below will override it
|
|
14
|
+
["Compressible", Proc.new { |r| r.is_local? } ],
|
|
15
|
+
|
|
16
|
+
# order is important! -- tar.bz2 must come before .bz2, &c.
|
|
17
|
+
["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.bz2$/ } ],
|
|
18
|
+
["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tbz2$/ } ],
|
|
19
|
+
["CompressedFiles::Bz2", Proc.new { |r| r.is_local? && r.path =~ /\.bz2$/ && r.path !~ /\.tar\.bz2$/ && r.path !~ /\.tbz2$/ } ],
|
|
20
|
+
["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.gz$/ } ],
|
|
21
|
+
["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tgz$/ } ],
|
|
22
|
+
["CompressedFiles::Gz", Proc.new { |r| r.is_local? && r.path =~ /\.gz$/ && r.path !~ /\.tar\.gz$/ && r.path !~ /\.tgz$/ } ],
|
|
23
|
+
["Archives::Tar", Proc.new { |r| r.is_local? && r.path =~ /\.tar$/ } ],
|
|
24
|
+
["Archives::Rar", Proc.new { |r| r.is_local? && r.path =~ /\.rar$/ } ],
|
|
25
|
+
["Archives::Zip", Proc.new { |r| r.is_local? && r.path =~ /\.zip$/ } ]
|
|
26
|
+
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
module IMW
|
|
2
|
+
module Resources
|
|
3
|
+
|
|
4
|
+
module CompressedFiles
|
|
5
|
+
autoload :Bz2, 'imw/resources/archives_and_compressed/bz2'
|
|
6
|
+
autoload :Gz, 'imw/resources/archives_and_compressed/gz'
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# Defines methods for decompressing a compressed file. This
|
|
10
|
+
# module isn't used to directly extend an IMW::Resource --
|
|
11
|
+
# instead, format specific modules (e.g. -
|
|
12
|
+
# IMW::Resources::CompressedFiles::Bz2) include this module and
|
|
13
|
+
# further define the command-line flags &c. needed to make
|
|
14
|
+
# everything work.
|
|
15
|
+
module CompressedFile
|
|
16
|
+
|
|
17
|
+
attr_accessor :compression_settings
|
|
18
|
+
|
|
19
|
+
# Is this file compressed?
|
|
20
|
+
#
|
|
21
|
+
# @return [true, false]
|
|
22
|
+
def is_compressed?
|
|
23
|
+
true
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Can this file be compressed?
|
|
27
|
+
#
|
|
28
|
+
# @return [true, false]
|
|
29
|
+
def is_compressible?
|
|
30
|
+
false
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# The basename of this resource after it is decompressed
|
|
34
|
+
#
|
|
35
|
+
# IMW::Resource.new('/path/to/my_file.txt.bz2').decompressed_basename
|
|
36
|
+
# => 'my_file.txt'
|
|
37
|
+
#
|
|
38
|
+
# @return [String] the decompressed basename
|
|
39
|
+
def decompressed_basename
|
|
40
|
+
basename[0..-(extname.size + 1)]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# The path of this resource after it is decompressed
|
|
44
|
+
#
|
|
45
|
+
# IMW::Resource.new('/path/to/my_file.txt.bz2').decompressed_basename
|
|
46
|
+
# => '/path/to/my_file.txt'
|
|
47
|
+
#
|
|
48
|
+
# @return [String] the decompressed path
|
|
49
|
+
def decompressed_path
|
|
50
|
+
File.join(dirname, decompressed_basename)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Decompress this file in its present directory overwriting any
|
|
54
|
+
# existing files and without saving the original compressed
|
|
55
|
+
# file.
|
|
56
|
+
#
|
|
57
|
+
# @return [IMW::Resource] the decompressed resource
|
|
58
|
+
def decompress!
|
|
59
|
+
should_exist!("Cannot decompress.")
|
|
60
|
+
program = compression_settings[:decompression_program] || compression_settings[:program]
|
|
61
|
+
FileUtils.cd(dirname) { IMW.system(program, compression_settings[:decompress], path) }
|
|
62
|
+
IMW.open(decompressed_path)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Decompress this file in its present directory, overwriting any
|
|
66
|
+
# existing files while keeping the original compressed file.
|
|
67
|
+
#
|
|
68
|
+
# FIXME The implementation is a little stupid as the file is
|
|
69
|
+
# needlessly copied.
|
|
70
|
+
#
|
|
71
|
+
# @return [IMW::Resource] the decompressed resource
|
|
72
|
+
def decompress
|
|
73
|
+
should_exist!("Cannot decompress.")
|
|
74
|
+
begin
|
|
75
|
+
copy = cp(path + '.imw_copy')
|
|
76
|
+
regular_file = decompress!
|
|
77
|
+
copy.mv(path)
|
|
78
|
+
regular_file
|
|
79
|
+
ensure
|
|
80
|
+
copy.mv(path) if copy && copy.exist?
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
module IMW
|
|
2
|
+
|
|
3
|
+
# Default settings used when compressing files. <tt>:program</tt>
|
|
4
|
+
# defines the name of the command-line program to use,
|
|
5
|
+
# <tt>:compress</tt> gives the flags to use when compressing, and
|
|
6
|
+
# <tt>:extension</tt> gives the extension (_without_ the `.') added
|
|
7
|
+
# by the program after compressing.
|
|
8
|
+
COMPRESSION_SETTINGS = {
|
|
9
|
+
:program => 'bzip2',
|
|
10
|
+
:compress => '',
|
|
11
|
+
:extension => 'bz2'
|
|
12
|
+
} unless defined?(COMPRESSION_SETTINGS)
|
|
13
|
+
|
|
14
|
+
module Resources
|
|
15
|
+
|
|
16
|
+
# Defines methods for compressing a file. The default compression
|
|
17
|
+
# program is defined in IMW::COMPRESSION_SETTINGS though a
|
|
18
|
+
# particular resource can change the values in its
|
|
19
|
+
# +compression_settings+ hash.
|
|
20
|
+
module Compressible
|
|
21
|
+
|
|
22
|
+
# Compression settings.
|
|
23
|
+
attr_accessor :compression_settings
|
|
24
|
+
|
|
25
|
+
# Is this file compressible?
|
|
26
|
+
#
|
|
27
|
+
# @return [true]
|
|
28
|
+
def is_compressible?
|
|
29
|
+
true
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Defines the compression settings used for this
|
|
33
|
+
# resource. <tt>:program</tt> defines the name of the
|
|
34
|
+
# command-line program to use, <tt>:compress</tt> gives the
|
|
35
|
+
# flags to use when compressing, and <tt>:extension</tt> gives
|
|
36
|
+
# the extension (_without_ the `.') added by the program after
|
|
37
|
+
# compressing.
|
|
38
|
+
#
|
|
39
|
+
# @return [Hash]
|
|
40
|
+
def compression_settings
|
|
41
|
+
@compression_settings ||= COMPRESSION_SETTINGS
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Compress this resource in place, overwriting it.
|
|
45
|
+
#
|
|
46
|
+
# This resource's +compression_settings+ method is used to
|
|
47
|
+
# determine the method of compression.
|
|
48
|
+
#
|
|
49
|
+
# @return [IMW::Resource] the compressed file
|
|
50
|
+
def compress!
|
|
51
|
+
should_exist!("Cannot compress.")
|
|
52
|
+
IMW.system(*[compression_settings[:program], compression_settings[:compress], path])
|
|
53
|
+
IMW.open(File.join(dirname,basename + "." + compression_settings[:extension]))
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Compress this resource without overwriting it.
|
|
57
|
+
#
|
|
58
|
+
# FIXME The implementation is a little stupid as the file is
|
|
59
|
+
# needlessly copied.
|
|
60
|
+
#
|
|
61
|
+
# @return [IMW::Resource] the compressed file
|
|
62
|
+
def compress options={}
|
|
63
|
+
should_exist!("Cannot compress.")
|
|
64
|
+
begin
|
|
65
|
+
copy = cp(path + '.imw_copy')
|
|
66
|
+
compressed_file = compress!
|
|
67
|
+
copy.mv(path)
|
|
68
|
+
compressed_file
|
|
69
|
+
ensure
|
|
70
|
+
copy.mv(path) if copy.exist?
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
module IMW
|
|
2
|
+
module Resources
|
|
3
|
+
module Formats
|
|
4
|
+
|
|
5
|
+
# Defines methods used for parsing and writing delimited data
|
|
6
|
+
# formats (CSV, TSV, &c.) with the FasterCSV library. This
|
|
7
|
+
# module is not used to directly extend a resource. Instead,
|
|
8
|
+
# more specific modules (e.g. - IMW::Resources::Formats::Csv)
|
|
9
|
+
# include this one and also define +delimited_options+ which is
|
|
10
|
+
# actually what's passed to FasterCSV.
|
|
11
|
+
#
|
|
12
|
+
# @abstract
|
|
13
|
+
module Delimited
|
|
14
|
+
|
|
15
|
+
attr_accessor :delimited_settings
|
|
16
|
+
|
|
17
|
+
# Return the data in this delimited resource as an array of
|
|
18
|
+
# arrays.
|
|
19
|
+
#
|
|
20
|
+
# Yield each outer array (row) if passed a block.
|
|
21
|
+
#
|
|
22
|
+
# @return [Array] the full data matrix
|
|
23
|
+
# @yield [Array] each row of the data
|
|
24
|
+
def load &block
|
|
25
|
+
require 'fastercsv'
|
|
26
|
+
FasterCSV.parse(read, delimited_options, &block)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Map each row in this delimited resource.
|
|
30
|
+
#
|
|
31
|
+
# @yield [Array] each row of the data
|
|
32
|
+
def map &block
|
|
33
|
+
load.map(&block)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Dump an array of arrays into this resource.
|
|
37
|
+
#
|
|
38
|
+
# @param [Array] data array of arrays to dump
|
|
39
|
+
# @param [Hash] options
|
|
40
|
+
# @option options [true, false] :persist Keep this resource's IO object open after dumping
|
|
41
|
+
def dump data, options={}
|
|
42
|
+
require 'fastercsv'
|
|
43
|
+
data.each do |row|
|
|
44
|
+
write(FasterCSV.generate_line(row, delimited_options))
|
|
45
|
+
end
|
|
46
|
+
io.close unless options[:persist]
|
|
47
|
+
self
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
module Csv
|
|
52
|
+
include Delimited
|
|
53
|
+
|
|
54
|
+
# Default options to be passed to
|
|
55
|
+
# FasterCSV[http://fastercsv.rubyforge.org/]; see its
|
|
56
|
+
# documentation for more information.
|
|
57
|
+
#
|
|
58
|
+
# @return [Hash]
|
|
59
|
+
def delimited_options
|
|
60
|
+
@delimited_options ||= {
|
|
61
|
+
:col_sep => ',',
|
|
62
|
+
:headers => false,
|
|
63
|
+
:return_headers => false,
|
|
64
|
+
:write_headers => true,
|
|
65
|
+
:skip_blanks => false,
|
|
66
|
+
:force_quotes => false
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
module Tsv
|
|
72
|
+
include Delimited
|
|
73
|
+
|
|
74
|
+
# Default options to be passed to
|
|
75
|
+
# FasterCSV[http://fastercsv.rubyforge.org/]; see its
|
|
76
|
+
# documentation for more information.
|
|
77
|
+
#
|
|
78
|
+
# @return [Hash]
|
|
79
|
+
def delimited_options
|
|
80
|
+
@delimited_options ||= {
|
|
81
|
+
:col_sep => "\t",
|
|
82
|
+
:headers => false,
|
|
83
|
+
:return_headers => false,
|
|
84
|
+
:write_headers => true,
|
|
85
|
+
:skip_blanks => false,
|
|
86
|
+
:force_quotes => false
|
|
87
|
+
}
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
module IMW
|
|
2
|
+
module Resources
|
|
3
|
+
module Formats
|
|
4
|
+
|
|
5
|
+
# Defines methods for reading and writing Microsoft Excel data.
|
|
6
|
+
module Excel
|
|
7
|
+
|
|
8
|
+
attr_accessor :book, :sheet
|
|
9
|
+
|
|
10
|
+
def self.extended obj
|
|
11
|
+
if obj.exist?
|
|
12
|
+
@book = Spreadsheet.open path
|
|
13
|
+
@sheet = book.worksheet(0)
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def book
|
|
20
|
+
return @book if @book
|
|
21
|
+
if exists?
|
|
22
|
+
@book = Spreadsheet.open(path)
|
|
23
|
+
else
|
|
24
|
+
@book = Spreadsheet::Workbook.new
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def sheet
|
|
29
|
+
@sheet = @book.create_worksheet
|
|
30
|
+
@sheet
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
#If an Excel file exists at the location specified by uri then
|
|
34
|
+
#it is opened and can be read out with a subsequent call to
|
|
35
|
+
#load(). Otherwise, a new workbook is created and can be written
|
|
36
|
+
#to with the dump() method.
|
|
37
|
+
def initialize uri, mode='r', options={}
|
|
38
|
+
self.uri = uri
|
|
39
|
+
@max_lines = options[:max_lines] || 65000
|
|
40
|
+
@idx = 0
|
|
41
|
+
@book_idx = 0
|
|
42
|
+
@sht_idx = 0
|
|
43
|
+
unless self.exist?
|
|
44
|
+
make_new_book
|
|
45
|
+
make_new_sheet
|
|
46
|
+
else
|
|
47
|
+
get_existing_book
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
#Returns the data in an existing workbook as an
|
|
52
|
+
#array of arrays. Only capable of reading a single sheet.
|
|
53
|
+
def load
|
|
54
|
+
@sheet.map{|row| row.to_a}
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
#Dumps data, which is assumed to be an array of arrays, to a
|
|
58
|
+
#newly created Excel workbook. Attempting to dump to a book
|
|
59
|
+
#that already exists will typically result in file corruption.
|
|
60
|
+
#Raises a 'too many lines' error if the number of lines
|
|
61
|
+
#of data exceeds max_lines.
|
|
62
|
+
def dump data
|
|
63
|
+
data.each do |line|
|
|
64
|
+
raise "too many lines" if too_many?
|
|
65
|
+
self << line
|
|
66
|
+
end
|
|
67
|
+
save unless no_data?
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
#Processes a single line of data and updates internal variables.
|
|
71
|
+
#You shouldn't need to call this directly.
|
|
72
|
+
def << line
|
|
73
|
+
@sheet.row(@sht_row).concat( line )
|
|
74
|
+
@sht_row += 1
|
|
75
|
+
@idx += 1
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
#Instantiates a new Excel workbook in memory. You shouldn't
|
|
79
|
+
#need to call this directly.
|
|
80
|
+
def make_new_book
|
|
81
|
+
@book = Spreadsheet::Workbook.new
|
|
82
|
+
@book_idx += 1
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
#Makes a new worksheet for a pre-existing Excel workbook.
|
|
86
|
+
#This should be called after recovering from the
|
|
87
|
+
#'too many lines' error.
|
|
88
|
+
def make_new_sheet
|
|
89
|
+
@sheet = @book.create_worksheet
|
|
90
|
+
@sht_idx += 1
|
|
91
|
+
@sht_row = 0 #always start at row 0 in a new sheet
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
#Opens an existing Excel workbook. You shoudn't need to
|
|
95
|
+
#call this directly.
|
|
96
|
+
def get_existing_book
|
|
97
|
+
@sht_row = @sheet.row_count #would like to be able to dump new data, doesn't work
|
|
98
|
+
@sht_idx += 1
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
#Increments the current sheet to the next one in
|
|
102
|
+
#an open book. Not necessary at the moment.
|
|
103
|
+
def incr_sheet
|
|
104
|
+
@sheet = book.worksheet @sht_idx
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
#There are too many lines if the number of rows attempting
|
|
108
|
+
#to be written exceeds max_lines.
|
|
109
|
+
def too_many?
|
|
110
|
+
@sht_row >= @max_lines
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
#There is no data if the number of rows attempting to be written
|
|
114
|
+
#is zero.
|
|
115
|
+
def no_data?
|
|
116
|
+
@sht_row == 0
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
#Saves the workbook.
|
|
120
|
+
def save
|
|
121
|
+
@book.write path
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|