imw 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +15 -0
- data/CHANGELOG +0 -0
- data/LICENSE +674 -0
- data/README.rdoc +101 -0
- data/Rakefile +20 -0
- data/VERSION +1 -0
- data/etc/imwrc.rb +76 -0
- data/lib/imw.rb +42 -0
- data/lib/imw/boot.rb +58 -0
- data/lib/imw/dataset.rb +233 -0
- data/lib/imw/dataset/datamapper.rb +66 -0
- data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
- data/lib/imw/dataset/loaddump.rb +50 -0
- data/lib/imw/dataset/old/file_collection.rb +88 -0
- data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
- data/lib/imw/dataset/scaffold.rb +132 -0
- data/lib/imw/dataset/scraped_uri.rb +305 -0
- data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
- data/lib/imw/dataset/scrub/scrub.rb +147 -0
- data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
- data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
- data/lib/imw/dataset/scrub/slug.rb +101 -0
- data/lib/imw/dataset/stats.rb +73 -0
- data/lib/imw/dataset/stats/counter.rb +23 -0
- data/lib/imw/dataset/task.rb +38 -0
- data/lib/imw/dataset/workflow.rb +81 -0
- data/lib/imw/files.rb +110 -0
- data/lib/imw/files/archive.rb +113 -0
- data/lib/imw/files/basicfile.rb +122 -0
- data/lib/imw/files/binary.rb +28 -0
- data/lib/imw/files/compressed_file.rb +93 -0
- data/lib/imw/files/compressed_files_and_archives.rb +348 -0
- data/lib/imw/files/compressible.rb +103 -0
- data/lib/imw/files/csv.rb +112 -0
- data/lib/imw/files/json.rb +41 -0
- data/lib/imw/files/sgml.rb +65 -0
- data/lib/imw/files/text.rb +68 -0
- data/lib/imw/files/yaml.rb +46 -0
- data/lib/imw/packagers.rb +8 -0
- data/lib/imw/packagers/archiver.rb +108 -0
- data/lib/imw/packagers/s3_mover.rb +28 -0
- data/lib/imw/parsers.rb +7 -0
- data/lib/imw/parsers/html_parser.rb +382 -0
- data/lib/imw/parsers/html_parser/matchers.rb +306 -0
- data/lib/imw/parsers/line_parser.rb +87 -0
- data/lib/imw/parsers/regexp_parser.rb +72 -0
- data/lib/imw/utils.rb +24 -0
- data/lib/imw/utils/components.rb +61 -0
- data/lib/imw/utils/config.rb +46 -0
- data/lib/imw/utils/error.rb +54 -0
- data/lib/imw/utils/extensions/array.rb +125 -0
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
- data/lib/imw/utils/extensions/core.rb +43 -0
- data/lib/imw/utils/extensions/dir.rb +24 -0
- data/lib/imw/utils/extensions/file_core.rb +64 -0
- data/lib/imw/utils/extensions/hash.rb +218 -0
- data/lib/imw/utils/extensions/hpricot.rb +48 -0
- data/lib/imw/utils/extensions/string.rb +49 -0
- data/lib/imw/utils/extensions/struct.rb +42 -0
- data/lib/imw/utils/extensions/symbol.rb +28 -0
- data/lib/imw/utils/extensions/typed_struct.rb +22 -0
- data/lib/imw/utils/extensions/uri.rb +59 -0
- data/lib/imw/utils/log.rb +67 -0
- data/lib/imw/utils/misc.rb +63 -0
- data/lib/imw/utils/paths.rb +115 -0
- data/lib/imw/utils/uri.rb +59 -0
- data/lib/imw/utils/uuid.rb +33 -0
- data/lib/imw/utils/validate.rb +38 -0
- data/lib/imw/utils/version.rb +12 -0
- data/lib/imw/utils/view.rb +113 -0
- data/lib/imw/utils/view/dump_csv.rb +112 -0
- data/lib/imw/utils/view/dump_csv_older.rb +117 -0
- data/spec/data/sample.csv +131 -0
- data/spec/data/sample.tsv +131 -0
- data/spec/data/sample.txt +131 -0
- data/spec/data/sample.xml +653 -0
- data/spec/data/sample.yaml +652 -0
- data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
- data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
- data/spec/imw/files/archive_spec.rb +118 -0
- data/spec/imw/files/basicfile_spec.rb +121 -0
- data/spec/imw/files/bz2_spec.rb +32 -0
- data/spec/imw/files/compressed_file_spec.rb +96 -0
- data/spec/imw/files/compressible_spec.rb +100 -0
- data/spec/imw/files/file_spec.rb +144 -0
- data/spec/imw/files/gz_spec.rb +32 -0
- data/spec/imw/files/rar_spec.rb +33 -0
- data/spec/imw/files/tar_spec.rb +31 -0
- data/spec/imw/files/text_spec.rb +23 -0
- data/spec/imw/files/zip_spec.rb +31 -0
- data/spec/imw/files_spec.rb +38 -0
- data/spec/imw/packagers/archiver_spec.rb +125 -0
- data/spec/imw/packagers/s3_mover_spec.rb +7 -0
- data/spec/imw/parsers/line_parser_spec.rb +96 -0
- data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
- data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
- data/spec/imw/utils/extensions/find_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +38 -0
- data/spec/imw/workflow/rip/local_spec.rb +89 -0
- data/spec/imw/workflow/rip_spec.rb +27 -0
- data/spec/rcov.opts +1 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/support/archive_contents_matcher.rb +94 -0
- data/spec/support/custom_matchers.rb +21 -0
- data/spec/support/directory_contents_matcher.rb +61 -0
- data/spec/support/extensions.rb +18 -0
- data/spec/support/file_contents_matcher.rb +50 -0
- data/spec/support/random.rb +210 -0
- data/spec/support/without_regard_to_order_matcher.rb +58 -0
- metadata +196 -0
@@ -0,0 +1,113 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/files/archive.rb -- describes archives of files
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Module for describing known archive types. An including archive
|
7
|
+
# type's class must define an instance variable +archive+ which is a
|
8
|
+
# hash with the following required keys:
|
9
|
+
#
|
10
|
+
# <tt>:program</tt>:: a symbol naming the program to be used. It
|
11
|
+
# should match one of the symbols in <tt>IMW::EXTERNAL_PROGRAMS</tt>
|
12
|
+
#
|
13
|
+
# <tt>:create_flags</tt>:: a string of flags to pass to the archiving
|
14
|
+
# program when creating the archive
|
15
|
+
#
|
16
|
+
# <tt>:append_flags</tt>:: a string of flags to pass to the archiving
|
17
|
+
# program when appending files to the archive
|
18
|
+
#
|
19
|
+
# <tt>:extract_flags</tt>:: a string of flags to pass to the archiving
|
20
|
+
# program when extracting the archive
|
21
|
+
#
|
22
|
+
# <tt>:list_flags</tt>:: a string of flags to pass to the archiving
|
23
|
+
# program when listing the archive's contents
|
24
|
+
#
|
25
|
+
# THe +archive+ hash may also contain the entry:
|
26
|
+
#
|
27
|
+
# <tt>:unarchiving_program</tt>:: a symbol naming the program to be
|
28
|
+
# used to list/extract the archive. Useful only if this program
|
29
|
+
# differs from the program used to create the archive in the first
|
30
|
+
# place (i.e. - zip & unzip).
|
31
|
+
#
|
32
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
33
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
34
|
+
# License:: GPL 3.0
|
35
|
+
# Website:: http://infinitemonkeywrench.org/
|
36
|
+
#
|
37
|
+
# puts "#{File.basename(__FILE__)}: Put it all in one place so that when something goes wrong you'll know it immediately. You'll regret it, but at least you'll know." # at bottom
|
38
|
+
module IMW
|
39
|
+
module Files
|
40
|
+
|
41
|
+
module BasicFile
|
42
|
+
|
43
|
+
# Is this file an archive?
|
44
|
+
def archive?
|
45
|
+
false
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
module Archive
|
50
|
+
|
51
|
+
attr_reader :archive
|
52
|
+
|
53
|
+
# Is this file an archive?
|
54
|
+
def archive?
|
55
|
+
true
|
56
|
+
end
|
57
|
+
|
58
|
+
public
|
59
|
+
# Create this archive containing the given +paths+, which can be
|
60
|
+
# either a string or list of strings to be interpreted as paths
|
61
|
+
# to files/directories by the shell.
|
62
|
+
#
|
63
|
+
# Options:
|
64
|
+
# <tt>:force</tt> (false):: overwrite any existing archive at this path.
|
65
|
+
def create paths, opts = {}
|
66
|
+
opts = opts.reverse_merge({:force => false})
|
67
|
+
raise IMW::Error.new("An archive already exists at #{@path}.") if exist? and not opts[:force]
|
68
|
+
raise IMW::Error.new("Cannot create an archive of type #{@extname}") unless @archive[:create_flags]
|
69
|
+
paths = [paths] if paths.class == String
|
70
|
+
IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:create_flags], @path, *paths
|
71
|
+
self
|
72
|
+
end
|
73
|
+
|
74
|
+
# Append to this archive the given +paths+, which can be
|
75
|
+
# either a string or list of strings to be interpreted as paths
|
76
|
+
# to files/directories by the shell.
|
77
|
+
def append paths
|
78
|
+
raise IMW::Error.new("Cannot append to an archive of type #{@archive[:program]}.") unless @archive[:append_flags]
|
79
|
+
paths = [paths] if paths.class == String
|
80
|
+
IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:append_flags], @path, *paths
|
81
|
+
self
|
82
|
+
end
|
83
|
+
|
84
|
+
# Extract the files from this archive to the current directory.
|
85
|
+
def extract
|
86
|
+
raise IMW::Error.new("Cannot extract, #{@path} does not exist.") unless exist?
|
87
|
+
program = (@archive[:unarchiving_program] or @archive[:program])
|
88
|
+
IMW.system IMW::EXTERNAL_PROGRAMS[program], @archive[:extract_flags], @path
|
89
|
+
end
|
90
|
+
|
91
|
+
# Return a (sorted) list of contents in this archive.
|
92
|
+
def contents
|
93
|
+
raise IMW::Error.new("Cannot list contents, #{@path} does not exist.") unless exist?
|
94
|
+
program = (@archive[:unarchiving_program] or @archive[:program])
|
95
|
+
output = ''
|
96
|
+
command = [IMW::EXTERNAL_PROGRAMS[program], @archive[:list_flags], @path].join ' '
|
97
|
+
output += `#{command}`
|
98
|
+
archive_contents_string_to_array(output)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Parse and format the output from the archive program's "list"
|
102
|
+
# command into an array of filenames.
|
103
|
+
#
|
104
|
+
# An including class can customize this method to match the
|
105
|
+
# output from the archiving program of that class.
|
106
|
+
def archive_contents_string_to_array string
|
107
|
+
string.split("\n")
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
|
@@ -0,0 +1,122 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/files/file.rb -- base class for files
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Defines a base class for classes for specific filetypes to subclass.
|
7
|
+
#
|
8
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
9
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
10
|
+
# License:: GPL 3.0
|
11
|
+
# Website:: http://infinitemonkeywrench.org/
|
12
|
+
#
|
13
|
+
# puts "#{File.basename(__FILE__)}: At the very bottom of the office building, wedged between a small boulder and a rotting log you see a weathered manilla file folder. The writing on the tab is too faded to make out." # at bottom
|
14
|
+
module IMW
|
15
|
+
module Files
|
16
|
+
module BasicFile
|
17
|
+
|
18
|
+
attr_reader :uri, :host, :path, :dirname, :basename, :extname, :name
|
19
|
+
|
20
|
+
protected
|
21
|
+
|
22
|
+
def uri= uri
|
23
|
+
@uri = URI.parse(uri) if uri.is_a?(String)
|
24
|
+
@host = self.uri.host
|
25
|
+
@path = local? ? ::File.expand_path(self.uri.path) : self.uri.path
|
26
|
+
@dirname = ::File.dirname path
|
27
|
+
@basename = ::File.basename path
|
28
|
+
@extname = find_extname
|
29
|
+
@name = @basename[0,@basename.length - @extname.length]
|
30
|
+
end
|
31
|
+
|
32
|
+
# Some files (like <tt>.tar.gz</tt>) have an "extra" extension.
|
33
|
+
# Classes in the <tt>IMW::Files</tt> module should define a
|
34
|
+
# class method <tt>extname</tt> which returns the their full
|
35
|
+
# extension.
|
36
|
+
def find_extname
|
37
|
+
self.class.respond_to?(:extname) ? self.class.extname(path) : ::File.extname(path)
|
38
|
+
end
|
39
|
+
|
40
|
+
public
|
41
|
+
|
42
|
+
# Is this file on the local machine (the scheme of the file's URI is nil or
|
43
|
+
def local?
|
44
|
+
host == 'file' || host.nil?
|
45
|
+
end
|
46
|
+
|
47
|
+
# Is this file on a remote machine?
|
48
|
+
def remote?
|
49
|
+
(! local?)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Steal a bunch of class methods from File which only take a
|
53
|
+
# path as a first argument.
|
54
|
+
[:executable?, :executable_real?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
|
55
|
+
define_method class_method do
|
56
|
+
File.send(class_method, path) if local?
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Is there a real file at the path of this File? Will attempt
|
61
|
+
# to open files online too to check.
|
62
|
+
def exist?
|
63
|
+
if local?
|
64
|
+
::File.exist?(path) ? true : false
|
65
|
+
else
|
66
|
+
begin
|
67
|
+
true if open(uri)
|
68
|
+
rescue SocketError
|
69
|
+
false
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
alias_method :exists?, :exist?
|
74
|
+
|
75
|
+
# Delete this file.
|
76
|
+
def rm
|
77
|
+
raise IMW::PathError.new("cannot delete remote file #{uri}") unless local?
|
78
|
+
raise IMW::PathError.new("cannot delete #{uri}, doesn't exist!") unless exist?
|
79
|
+
FileUtils.rm path
|
80
|
+
end
|
81
|
+
alias_method :rm!, :rm
|
82
|
+
|
83
|
+
# Copy this file to +new_path+.
|
84
|
+
def cp new_path
|
85
|
+
raise IMW::PathError.new("cannot copy from #{path}, doesn't exist!") unless exist?
|
86
|
+
if local?
|
87
|
+
FileUtils.cp path, new_path
|
88
|
+
else
|
89
|
+
# FIXME better way to do this?
|
90
|
+
File.open(new_path,'w') { |f| f.write(open(uri).read) }
|
91
|
+
end
|
92
|
+
self.class.new(new_path)
|
93
|
+
end
|
94
|
+
|
95
|
+
# Copy this file to +dir+.
|
96
|
+
def cp_to_dir dir
|
97
|
+
cp File.join(File.expand_path(dir),basename)
|
98
|
+
end
|
99
|
+
|
100
|
+
# Move this file to +new_path+.
|
101
|
+
def mv new_path
|
102
|
+
raise IMW::PathError.new("cannot move from #{path}, doesn't exist!") unless exist?
|
103
|
+
if local?
|
104
|
+
FileUtils.mv path, new_path
|
105
|
+
else
|
106
|
+
# FIXME better way to do this?
|
107
|
+
File.open(new_path,'w') { |f| f.write(open(uri).read) }
|
108
|
+
end
|
109
|
+
self.class.new(new_path)
|
110
|
+
end
|
111
|
+
alias_method :mv!, :mv
|
112
|
+
|
113
|
+
# Move this file to +dir+.
|
114
|
+
def mv_to_dir dir
|
115
|
+
mv File.join(File.expand_path(dir),basename)
|
116
|
+
end
|
117
|
+
alias_method :mv_to_dir!, :mv_to_dir
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/files/binary.rb -- binary files
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Class for handling binary data.
|
7
|
+
#
|
8
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
9
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
10
|
+
# License:: GPL 3.0
|
11
|
+
# Website:: http://infinitemonkeywrench.org/
|
12
|
+
#
|
13
|
+
# puts "#{File.basename(__FILE__)}: Something clever" # at bottom
|
14
|
+
module IMW
|
15
|
+
module Files
|
16
|
+
|
17
|
+
class Binary
|
18
|
+
|
19
|
+
include IMW::Files::BasicFile
|
20
|
+
include IMW::Files::Compressible
|
21
|
+
|
22
|
+
def initialize uri
|
23
|
+
self.uri= uri
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/files/compressed_file.rb -- class describing compressed files
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Compression of files is handled via the
|
7
|
+
# <tt>IMW::Files::Compressible</tt> module which can be included by
|
8
|
+
# any object that has a <tt>@path</tt> attribute. The methods defined
|
9
|
+
# there compress files and return this
|
10
|
+
# <tt>IMW::Files::CompressedFile</tt> object which has methods for
|
11
|
+
# decompression.
|
12
|
+
#
|
13
|
+
# A subclass of this class must define a +compression+ instance
|
14
|
+
# variable which is a hash with the following keys:
|
15
|
+
#
|
16
|
+
# <tt>:program</tt>:: a symbol naming the program used for
|
17
|
+
# compression/decompression which must be one of the symbols in
|
18
|
+
# <tt>IMW::EXTERNAL_PROGRAMS</tt>
|
19
|
+
#
|
20
|
+
# <tt>:decompression_flags</tt>:: a string of flags to pass to the
|
21
|
+
# compression program when decompressing the file.
|
22
|
+
#
|
23
|
+
# A subclass must also define the method +decompressed_path+ which
|
24
|
+
# returns the path of the file post-decompression.
|
25
|
+
#
|
26
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
27
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
28
|
+
# License:: GPL 3.0
|
29
|
+
# Website:: http://infinitemonkeywrench.org/
|
30
|
+
#
|
31
|
+
# puts "#{File.basename(__FILE__)}: Have you ever folded up the wrapper of a soda straw into a little accordian shape and let a drop of water soak into it?" # at bottom
|
32
|
+
module IMW
|
33
|
+
module Files
|
34
|
+
|
35
|
+
module BasicFile
|
36
|
+
def compressed?
|
37
|
+
false
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# A module which provides methods for decompressing a compressed
|
42
|
+
# file. An including should define an instance variable
|
43
|
+
# <tt>@compression</tt> with two keys:
|
44
|
+
#
|
45
|
+
# <tt>:program</tt>:: a symbol from <tt>IMW::EXTERNAL_PROGRAMS</tt>
|
46
|
+
# <tt>:decompression_flags</tt>:: a string specifying flags to pass to the decompression program
|
47
|
+
module CompressedFile
|
48
|
+
|
49
|
+
attr_reader :compression
|
50
|
+
|
51
|
+
# Is this file compressed?
|
52
|
+
def compressed?
|
53
|
+
true
|
54
|
+
end
|
55
|
+
|
56
|
+
# Construct the command passed to the shell to decompress this
|
57
|
+
# file.
|
58
|
+
def decompression_command
|
59
|
+
[IMW::EXTERNAL_PROGRAMS[@compression[:program]],@compression[:decompression_flags],@path].join ' '
|
60
|
+
end
|
61
|
+
|
62
|
+
public
|
63
|
+
# Decompress this file in its present directory overwriting any
|
64
|
+
# existing files and without saving the original compressed
|
65
|
+
# file.
|
66
|
+
def decompress!
|
67
|
+
raise IMW::PathError.new("cannot decompress #{@path}, doesn't exist!") unless exist?
|
68
|
+
FileUtils.cd(@dirname) { IMW.system decompression_command }
|
69
|
+
IMW.open(decompressed_path)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Decompress this file in its present directory, overwriting any
|
73
|
+
# existing files while keeping the original compressed file.
|
74
|
+
#
|
75
|
+
# The implementation is a little stupid, as the file is
|
76
|
+
# needlessly copied.
|
77
|
+
def decompress
|
78
|
+
raise IMW::PathError.new("cannot decompress #{@path}, doesn't exist!") unless exist?
|
79
|
+
begin
|
80
|
+
FileUtils.cp(@path,@path + 'copy')
|
81
|
+
decompress!
|
82
|
+
ensure
|
83
|
+
FileUtils.mv(@path + 'copy',@path)
|
84
|
+
end
|
85
|
+
IMW.open(decompressed_path)
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
|
@@ -0,0 +1,348 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/files/compressed_files_and_archives.rb -- require farm
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Just required all the archive and compressed formats (+tar+, +bz2+,
|
7
|
+
# &c.)
|
8
|
+
#
|
9
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
10
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
11
|
+
# License:: GPL 3.0
|
12
|
+
# Website:: http://infinitemonkeywrench.org/
|
13
|
+
#
|
14
|
+
# puts "#{File.basename(__FILE__)}: Something clever" # at bottom
|
15
|
+
module IMW
|
16
|
+
module Files
|
17
|
+
|
18
|
+
# A class to wrap a +tar+ archive.
|
19
|
+
#
|
20
|
+
# Creation, appending, listing, and extraction flags are stored in
|
21
|
+
# <tt>IMW::Files::Tar::DEFAULT_FLAGS</tt> and all are passed to
|
22
|
+
# the <tt>:tar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
|
23
|
+
class Tar
|
24
|
+
|
25
|
+
include IMW::Files::BasicFile
|
26
|
+
include IMW::Files::Archive
|
27
|
+
include IMW::Files::Compressible
|
28
|
+
|
29
|
+
# The default flags used creating, appending to, listing, and
|
30
|
+
# extracting a tar archive.
|
31
|
+
DEFAULT_FLAGS = {
|
32
|
+
:create => "-cf",
|
33
|
+
:append => "-rf",
|
34
|
+
:list => "-tf",
|
35
|
+
:extract => "-xf",
|
36
|
+
:program => :tar
|
37
|
+
}
|
38
|
+
|
39
|
+
def initialize uri, *args
|
40
|
+
self.uri= uri
|
41
|
+
@archive = {
|
42
|
+
:program => DEFAULT_FLAGS[:program],
|
43
|
+
:create_flags => DEFAULT_FLAGS[:create],
|
44
|
+
:append_flags => DEFAULT_FLAGS[:append],
|
45
|
+
:list_flags => DEFAULT_FLAGS[:list],
|
46
|
+
:extract_flags => DEFAULT_FLAGS[:extract]
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end # Tar
|
50
|
+
|
51
|
+
# A class to wrap a <tt>tar.gz</tt> archive.
|
52
|
+
#
|
53
|
+
# Creation, appending, listing, and extraction flags are stored in
|
54
|
+
# <tt>IMW::Files::TarGz::DEFAULT_FLAGS</tt> and all are passed to
|
55
|
+
# the <tt>:tar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
|
56
|
+
class TarGz
|
57
|
+
|
58
|
+
include IMW::Files::BasicFile
|
59
|
+
include IMW::Files::Archive
|
60
|
+
include IMW::Files::CompressedFile
|
61
|
+
|
62
|
+
# The default flags used creating, appending to, listing, and
|
63
|
+
# extracting a <tt>tar.gz</tt> archive.
|
64
|
+
DEFAULT_FLAGS = {
|
65
|
+
:decompression_program => :gzip,
|
66
|
+
:decompression_flags => '-fd',
|
67
|
+
:archive_program => :tar,
|
68
|
+
:archive_list_flags => "-tf",
|
69
|
+
:archive_extract_flags => "-xzf"
|
70
|
+
}
|
71
|
+
|
72
|
+
def initialize uri, *args
|
73
|
+
self.uri= uri
|
74
|
+
@compression = {
|
75
|
+
:program => DEFAULT_FLAGS[:decompression_program],
|
76
|
+
:decompression_flags => DEFAULT_FLAGS[:decompression_flags]
|
77
|
+
}
|
78
|
+
@archive = {
|
79
|
+
:program => DEFAULT_FLAGS[:archive_program],
|
80
|
+
:list_flags => DEFAULT_FLAGS[:archive_list_flags],
|
81
|
+
:extract_flags => DEFAULT_FLAGS[:archive_extract_flags]
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
# Returns the path of the file after decompression.
|
86
|
+
def decompressed_path
|
87
|
+
if /\.tar\.gz$/.match @path then
|
88
|
+
@path.gsub /\.tar\.gz$/, ".tar"
|
89
|
+
elsif /\.tgz$/.match @path then
|
90
|
+
@path.gsub /\.tgz$/, ".tar"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.extname path
|
95
|
+
if /\.tar\.gz$/.match path then
|
96
|
+
".tar.gz"
|
97
|
+
elsif /\.tgz$/.match path then
|
98
|
+
".tgz"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end # TarGz
|
103
|
+
|
104
|
+
# A class to wrap a <tt>tar.bz2</tt> archive.
|
105
|
+
#
|
106
|
+
# Creation, appending, listing, and extraction flags are stored in
|
107
|
+
# <tt>IMW::Files::TarBz2::DEFAULT_FLAGS</tt> and all are passed to
|
108
|
+
# the <tt>:tar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
|
109
|
+
class TarBz2
|
110
|
+
|
111
|
+
include IMW::Files::BasicFile
|
112
|
+
include IMW::Files::Archive
|
113
|
+
include IMW::Files::CompressedFile
|
114
|
+
|
115
|
+
# The default flags used creating, appending to, listing, and
|
116
|
+
# extracting a <tt>tar.bz2</tt> archive.
|
117
|
+
DEFAULT_FLAGS = {
|
118
|
+
:decompression_program => :bzip2,
|
119
|
+
:decompression_flags => '-fd',
|
120
|
+
:archive_program => :tar,
|
121
|
+
:archive_create_flags => '-cf',
|
122
|
+
:archive_list_flags => "-tf",
|
123
|
+
:archive_extract_flags => "-xjf"
|
124
|
+
}
|
125
|
+
|
126
|
+
def self.extname path
|
127
|
+
if /\.tar\.bz2$/.match path then
|
128
|
+
".tar.bz2"
|
129
|
+
elsif /\.tbz2$/.match path then
|
130
|
+
".tbz2"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def initialize uri, *args
|
135
|
+
self.uri= uri
|
136
|
+
@compression = {
|
137
|
+
:program => DEFAULT_FLAGS[:decompression_program],
|
138
|
+
:decompression_flags => DEFAULT_FLAGS[:decompression]
|
139
|
+
}
|
140
|
+
@archive = {
|
141
|
+
:program => DEFAULT_FLAGS[:archive_program],
|
142
|
+
:list_flags => DEFAULT_FLAGS[:archive_list_flags],
|
143
|
+
:extract_flags => DEFAULT_FLAGS[:archive_extract_flags],
|
144
|
+
:create_flags => DEFAULT_FLAGS[:archive_create_flags]
|
145
|
+
}
|
146
|
+
end
|
147
|
+
|
148
|
+
# Returns the path of the file after decompression.
|
149
|
+
def decompressed_path
|
150
|
+
if /\.tar\.bz2$/.match @path then
|
151
|
+
@path.gsub /\.tar\.bz2$/, ".tar"
|
152
|
+
elsif /\.tbz2$/.match @path then
|
153
|
+
@path.gsub /\.tbz2$/, ".tar"
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# Overrides default behvaior of IMW::Files::Archive#create to
|
158
|
+
# compress files after creating them.
|
159
|
+
def create paths, opts={}
|
160
|
+
opts = opts.reverse_merge({:force => false})
|
161
|
+
raise IMW::Error.new("An archive already exists at #{@path}.") if exist? and not opts[:force]
|
162
|
+
paths = [paths] if paths.class == String
|
163
|
+
IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:create_flags], path_between_archive_and_compression, *paths
|
164
|
+
IMW.open(path_between_archive_and_compression).compress!(:bzip2)
|
165
|
+
end
|
166
|
+
|
167
|
+
protected
|
168
|
+
def path_between_archive_and_compression
|
169
|
+
File.join(dirname,name + '.tar')
|
170
|
+
end
|
171
|
+
|
172
|
+
end # TarBz2
|
173
|
+
|
174
|
+
# A class to wrap a +rar+ archive.
|
175
|
+
#
|
176
|
+
# Creation, appending, listing, and extraction flags are stored in
|
177
|
+
# <tt>IMW::Files::Rar::DEFAULT_FLAGS</tt> and all are passed to
|
178
|
+
# the <tt>:rar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
|
179
|
+
class Rar
|
180
|
+
|
181
|
+
include IMW::Files::BasicFile
|
182
|
+
include IMW::Files::Archive
|
183
|
+
|
184
|
+
# The default flags used creating, appending to, listing, and
|
185
|
+
# extracting a rar archive.
|
186
|
+
DEFAULT_FLAGS = {
|
187
|
+
:create => "a -r -o+ -inul",
|
188
|
+
:append => "a -r -o+ -inul",
|
189
|
+
:list => "vb",
|
190
|
+
:extract => "x -o+ -inul"
|
191
|
+
}
|
192
|
+
|
193
|
+
def initialize uri, *args
|
194
|
+
self.uri= uri
|
195
|
+
@archive = {
|
196
|
+
:program => :rar,
|
197
|
+
:create_flags => DEFAULT_FLAGS[:create],
|
198
|
+
:append_flags => DEFAULT_FLAGS[:append],
|
199
|
+
:list_flags => DEFAULT_FLAGS[:list],
|
200
|
+
:extract_flags => DEFAULT_FLAGS[:extract]
|
201
|
+
}
|
202
|
+
end
|
203
|
+
end # Rar
|
204
|
+
|
205
|
+
# A class to wrap a +zip+ archive.
|
206
|
+
#
|
207
|
+
# Creation, appending, listing, and extraction flags are stored in
|
208
|
+
# <tt>IMW::Files::Zip::DEFAULT_FLAGS</tt> and all are passed to
|
209
|
+
# the <tt>:zip</tt> and <tt>:unzip</tt> programs in
|
210
|
+
# <tt>IMW::EXTERAL_PROGRAMS</tt>.
|
211
|
+
class Zip
|
212
|
+
|
213
|
+
include IMW::Files::BasicFile
|
214
|
+
include IMW::Files::Archive
|
215
|
+
|
216
|
+
# The default flags used creating, appending to, listing, and
|
217
|
+
# extracting a zip archive.
|
218
|
+
DEFAULT_FLAGS = {
|
219
|
+
:create => "-q -r",
|
220
|
+
:append => "-q -g",
|
221
|
+
:list => "-l",
|
222
|
+
:extract => "-q -o",
|
223
|
+
:unarchiving_program => :unzip
|
224
|
+
}
|
225
|
+
|
226
|
+
def initialize uri, *args
|
227
|
+
self.uri= uri
|
228
|
+
@archive = {
|
229
|
+
:program => :zip,
|
230
|
+
:create_flags => DEFAULT_FLAGS[:create],
|
231
|
+
:append_flags => DEFAULT_FLAGS[:append],
|
232
|
+
:list_flags => DEFAULT_FLAGS[:list],
|
233
|
+
:extract_flags => DEFAULT_FLAGS[:extract],
|
234
|
+
:unarchiving_program => DEFAULT_FLAGS[:unarchiving_program]
|
235
|
+
}
|
236
|
+
end
|
237
|
+
|
238
|
+
# The `unzip' program outputs data in a very annoying format:
|
239
|
+
#
|
240
|
+
# Archive: data.zip
|
241
|
+
# Length Date Time Name
|
242
|
+
# -------- ---- ---- ----
|
243
|
+
# 18510 07-28-08 15:58 data/4d7Qrgz7.csv
|
244
|
+
# 3418 07-28-08 15:41 data/7S.csv
|
245
|
+
# 23353 07-28-08 15:41 data/g.csv
|
246
|
+
# 711 07-28-08 15:58 data/g.xml
|
247
|
+
# 1095 07-28-08 15:41 data/L.xml
|
248
|
+
# 2399 07-28-08 15:58 data/mTAu9H3.xml
|
249
|
+
# 152 07-28-08 15:58 data/vaHBS2t5R.dat
|
250
|
+
# -------- -------
|
251
|
+
# 49638 7 files
|
252
|
+
#
|
253
|
+
# which is parsed by this method.
|
254
|
+
def archive_contents_string_to_array string
|
255
|
+
rows = string.split("\n")
|
256
|
+
# ignore the first 3 lines of the output and also discared the
|
257
|
+
# last 2 (5 = 2 + 3)
|
258
|
+
file_rows = rows[3,(rows.length - 5)]
|
259
|
+
file_rows.map! do |row|
|
260
|
+
# discard extra whitespace before after main text
|
261
|
+
row.lstrip!.rstrip!
|
262
|
+
# split the remaining text at spaces...columns beyond the
|
263
|
+
# third are part of the filename and should be joined with a
|
264
|
+
# space again in case of a filename with a space
|
265
|
+
row.split(' ')[3,row.size].join(' ')
|
266
|
+
end
|
267
|
+
file_rows
|
268
|
+
end
|
269
|
+
end # Zip
|
270
|
+
|
271
|
+
# A class to wrap a <tt>gz</tt> compressed file.
|
272
|
+
#
|
273
|
+
# The decompressing flags are stored in
|
274
|
+
# <tt>IMW::Files::Gz::DEFAULT_FLAGS</tt> and all are passed to the
|
275
|
+
# <tt>:gzip</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
|
276
|
+
class Gz
|
277
|
+
|
278
|
+
include IMW::Files::BasicFile
|
279
|
+
include IMW::Files::CompressedFile
|
280
|
+
|
281
|
+
# The default flags used in extracting a <tt>gz</tt> file.
|
282
|
+
DEFAULT_FLAGS = {
|
283
|
+
:program => :gzip,
|
284
|
+
:decompression => '-fd'
|
285
|
+
}
|
286
|
+
|
287
|
+
def initialize uri, *args
|
288
|
+
self.uri= uri
|
289
|
+
@compression = {
|
290
|
+
:program => DEFAULT_FLAGS[:program],
|
291
|
+
:decompression_flags => DEFAULT_FLAGS[:decompression]
|
292
|
+
}
|
293
|
+
end
|
294
|
+
|
295
|
+
def decompressed_path
|
296
|
+
@path.gsub /\.gz$/, ""
|
297
|
+
end
|
298
|
+
end # Gz
|
299
|
+
|
300
|
+
# A class to wrap a <tt>bz2</tt> compressed file.
|
301
|
+
#
|
302
|
+
# The decompressing flags are stored in
|
303
|
+
# <tt>IMW::Files::Bz2::DEFAULT_FLAGS</tt> and all are passed to
|
304
|
+
# the <tt>:bzip2</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
|
305
|
+
class Bz2
|
306
|
+
|
307
|
+
include IMW::Files::BasicFile
|
308
|
+
include IMW::Files::CompressedFile
|
309
|
+
|
310
|
+
# The default flags used in extracting a <tt>bz2</tt> file.
|
311
|
+
DEFAULT_FLAGS = {
|
312
|
+
:program => :bzip2,
|
313
|
+
:decompression => '-fd'
|
314
|
+
}
|
315
|
+
|
316
|
+
def initialize uri, *args
|
317
|
+
self.uri= uri
|
318
|
+
raise IMW::Error.new("#{@extname} is not a valid extension for a bzip2 compressed file.") unless @extname == '.bz2'
|
319
|
+
@compression = {
|
320
|
+
:program => DEFAULT_FLAGS[:program],
|
321
|
+
:decompression_flags => DEFAULT_FLAGS[:decompression]
|
322
|
+
}
|
323
|
+
end
|
324
|
+
|
325
|
+
# Returns the path of the file after decompression.
|
326
|
+
def decompressed_path
|
327
|
+
@path.gsub /\.bz2$/, ""
|
328
|
+
end
|
329
|
+
end # Bz2
|
330
|
+
|
331
|
+
|
332
|
+
# make sure that tar.bz2 precedes bz2 and so on...
|
333
|
+
FILE_REGEXPS << [/\.tar\.bz2$/, IMW::Files::TarBz2]
|
334
|
+
FILE_REGEXPS << [/\.tbz2$/, IMW::Files::TarBz2]
|
335
|
+
|
336
|
+
FILE_REGEXPS << [/\.tar\.gz$/, IMW::Files::TarGz]
|
337
|
+
FILE_REGEXPS << [/\.tgz$/, IMW::Files::TarGz]
|
338
|
+
|
339
|
+
FILE_REGEXPS << [/\.tar$/, IMW::Files::Tar]
|
340
|
+
FILE_REGEXPS << [/\.bz2$/, IMW::Files::Bz2]
|
341
|
+
FILE_REGEXPS << [/\.gz$/, IMW::Files::Gz]
|
342
|
+
FILE_REGEXPS << [/\.rar$/, IMW::Files::Rar]
|
343
|
+
FILE_REGEXPS << [/\.zip$/, IMW::Files::Zip]
|
344
|
+
|
345
|
+
end # Files
|
346
|
+
end # IMW
|
347
|
+
|
348
|
+
|