imw 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. data/.gitignore +15 -0
  2. data/CHANGELOG +0 -0
  3. data/LICENSE +674 -0
  4. data/README.rdoc +101 -0
  5. data/Rakefile +20 -0
  6. data/VERSION +1 -0
  7. data/etc/imwrc.rb +76 -0
  8. data/lib/imw.rb +42 -0
  9. data/lib/imw/boot.rb +58 -0
  10. data/lib/imw/dataset.rb +233 -0
  11. data/lib/imw/dataset/datamapper.rb +66 -0
  12. data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
  13. data/lib/imw/dataset/loaddump.rb +50 -0
  14. data/lib/imw/dataset/old/file_collection.rb +88 -0
  15. data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
  16. data/lib/imw/dataset/scaffold.rb +132 -0
  17. data/lib/imw/dataset/scraped_uri.rb +305 -0
  18. data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
  19. data/lib/imw/dataset/scrub/scrub.rb +147 -0
  20. data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
  21. data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
  22. data/lib/imw/dataset/scrub/slug.rb +101 -0
  23. data/lib/imw/dataset/stats.rb +73 -0
  24. data/lib/imw/dataset/stats/counter.rb +23 -0
  25. data/lib/imw/dataset/task.rb +38 -0
  26. data/lib/imw/dataset/workflow.rb +81 -0
  27. data/lib/imw/files.rb +110 -0
  28. data/lib/imw/files/archive.rb +113 -0
  29. data/lib/imw/files/basicfile.rb +122 -0
  30. data/lib/imw/files/binary.rb +28 -0
  31. data/lib/imw/files/compressed_file.rb +93 -0
  32. data/lib/imw/files/compressed_files_and_archives.rb +348 -0
  33. data/lib/imw/files/compressible.rb +103 -0
  34. data/lib/imw/files/csv.rb +112 -0
  35. data/lib/imw/files/json.rb +41 -0
  36. data/lib/imw/files/sgml.rb +65 -0
  37. data/lib/imw/files/text.rb +68 -0
  38. data/lib/imw/files/yaml.rb +46 -0
  39. data/lib/imw/packagers.rb +8 -0
  40. data/lib/imw/packagers/archiver.rb +108 -0
  41. data/lib/imw/packagers/s3_mover.rb +28 -0
  42. data/lib/imw/parsers.rb +7 -0
  43. data/lib/imw/parsers/html_parser.rb +382 -0
  44. data/lib/imw/parsers/html_parser/matchers.rb +306 -0
  45. data/lib/imw/parsers/line_parser.rb +87 -0
  46. data/lib/imw/parsers/regexp_parser.rb +72 -0
  47. data/lib/imw/utils.rb +24 -0
  48. data/lib/imw/utils/components.rb +61 -0
  49. data/lib/imw/utils/config.rb +46 -0
  50. data/lib/imw/utils/error.rb +54 -0
  51. data/lib/imw/utils/extensions/array.rb +125 -0
  52. data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
  53. data/lib/imw/utils/extensions/core.rb +43 -0
  54. data/lib/imw/utils/extensions/dir.rb +24 -0
  55. data/lib/imw/utils/extensions/file_core.rb +64 -0
  56. data/lib/imw/utils/extensions/hash.rb +218 -0
  57. data/lib/imw/utils/extensions/hpricot.rb +48 -0
  58. data/lib/imw/utils/extensions/string.rb +49 -0
  59. data/lib/imw/utils/extensions/struct.rb +42 -0
  60. data/lib/imw/utils/extensions/symbol.rb +28 -0
  61. data/lib/imw/utils/extensions/typed_struct.rb +22 -0
  62. data/lib/imw/utils/extensions/uri.rb +59 -0
  63. data/lib/imw/utils/log.rb +67 -0
  64. data/lib/imw/utils/misc.rb +63 -0
  65. data/lib/imw/utils/paths.rb +115 -0
  66. data/lib/imw/utils/uri.rb +59 -0
  67. data/lib/imw/utils/uuid.rb +33 -0
  68. data/lib/imw/utils/validate.rb +38 -0
  69. data/lib/imw/utils/version.rb +12 -0
  70. data/lib/imw/utils/view.rb +113 -0
  71. data/lib/imw/utils/view/dump_csv.rb +112 -0
  72. data/lib/imw/utils/view/dump_csv_older.rb +117 -0
  73. data/spec/data/sample.csv +131 -0
  74. data/spec/data/sample.tsv +131 -0
  75. data/spec/data/sample.txt +131 -0
  76. data/spec/data/sample.xml +653 -0
  77. data/spec/data/sample.yaml +652 -0
  78. data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
  79. data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
  80. data/spec/imw/files/archive_spec.rb +118 -0
  81. data/spec/imw/files/basicfile_spec.rb +121 -0
  82. data/spec/imw/files/bz2_spec.rb +32 -0
  83. data/spec/imw/files/compressed_file_spec.rb +96 -0
  84. data/spec/imw/files/compressible_spec.rb +100 -0
  85. data/spec/imw/files/file_spec.rb +144 -0
  86. data/spec/imw/files/gz_spec.rb +32 -0
  87. data/spec/imw/files/rar_spec.rb +33 -0
  88. data/spec/imw/files/tar_spec.rb +31 -0
  89. data/spec/imw/files/text_spec.rb +23 -0
  90. data/spec/imw/files/zip_spec.rb +31 -0
  91. data/spec/imw/files_spec.rb +38 -0
  92. data/spec/imw/packagers/archiver_spec.rb +125 -0
  93. data/spec/imw/packagers/s3_mover_spec.rb +7 -0
  94. data/spec/imw/parsers/line_parser_spec.rb +96 -0
  95. data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
  96. data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
  97. data/spec/imw/utils/extensions/find_spec.rb +113 -0
  98. data/spec/imw/utils/paths_spec.rb +38 -0
  99. data/spec/imw/workflow/rip/local_spec.rb +89 -0
  100. data/spec/imw/workflow/rip_spec.rb +27 -0
  101. data/spec/rcov.opts +1 -0
  102. data/spec/spec.opts +4 -0
  103. data/spec/spec_helper.rb +32 -0
  104. data/spec/support/archive_contents_matcher.rb +94 -0
  105. data/spec/support/custom_matchers.rb +21 -0
  106. data/spec/support/directory_contents_matcher.rb +61 -0
  107. data/spec/support/extensions.rb +18 -0
  108. data/spec/support/file_contents_matcher.rb +50 -0
  109. data/spec/support/random.rb +210 -0
  110. data/spec/support/without_regard_to_order_matcher.rb +58 -0
  111. metadata +196 -0
@@ -0,0 +1,113 @@
1
+ #
2
+ # h2. lib/imw/files/archive.rb -- describes archives of files
3
+ #
4
+ # == About
5
+ #
6
+ # Module for describing known archive types. An including archive
7
+ # type's class must define an instance variable +archive+ which is a
8
+ # hash with the following required keys:
9
+ #
10
+ # <tt>:program</tt>:: a symbol naming the program to be used. It
11
+ # should match one of the symbols in <tt>IMW::EXTERNAL_PROGRAMS</tt>
12
+ #
13
+ # <tt>:create_flags</tt>:: a string of flags to pass to the archiving
14
+ # program when creating the archive
15
+ #
16
+ # <tt>:append_flags</tt>:: a string of flags to pass to the archiving
17
+ # program when appending files to the archive
18
+ #
19
+ # <tt>:extract_flags</tt>:: a string of flags to pass to the archiving
20
+ # program when extracting the archive
21
+ #
22
+ # <tt>:list_flags</tt>:: a string of flags to pass to the archiving
23
+ # program when listing the archive's contents
24
+ #
25
+ # THe +archive+ hash may also contain the entry:
26
+ #
27
+ # <tt>:unarchiving_program</tt>:: a symbol naming the program to be
28
+ # used to list/extract the archive. Useful only if this program
29
+ # differs from the program used to create the archive in the first
30
+ # place (i.e. - zip & unzip).
31
+ #
32
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
33
+ # Copyright:: Copyright (c) 2008 infochimps.org
34
+ # License:: GPL 3.0
35
+ # Website:: http://infinitemonkeywrench.org/
36
+ #
37
+ # puts "#{File.basename(__FILE__)}: Put it all in one place so that when something goes wrong you'll know it immediately. You'll regret it, but at least you'll know." # at bottom
38
+ module IMW
39
+ module Files
40
+
41
+ module BasicFile
42
+
43
+ # Is this file an archive?
44
+ def archive?
45
+ false
46
+ end
47
+ end
48
+
49
+ module Archive
50
+
51
+ attr_reader :archive
52
+
53
+ # Is this file an archive?
54
+ def archive?
55
+ true
56
+ end
57
+
58
+ public
59
+ # Create this archive containing the given +paths+, which can be
60
+ # either a string or list of strings to be interpreted as paths
61
+ # to files/directories by the shell.
62
+ #
63
+ # Options:
64
+ # <tt>:force</tt> (false):: overwrite any existing archive at this path.
65
+ def create paths, opts = {}
66
+ opts = opts.reverse_merge({:force => false})
67
+ raise IMW::Error.new("An archive already exists at #{@path}.") if exist? and not opts[:force]
68
+ raise IMW::Error.new("Cannot create an archive of type #{@extname}") unless @archive[:create_flags]
69
+ paths = [paths] if paths.class == String
70
+ IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:create_flags], @path, *paths
71
+ self
72
+ end
73
+
74
+ # Append to this archive the given +paths+, which can be
75
+ # either a string or list of strings to be interpreted as paths
76
+ # to files/directories by the shell.
77
+ def append paths
78
+ raise IMW::Error.new("Cannot append to an archive of type #{@archive[:program]}.") unless @archive[:append_flags]
79
+ paths = [paths] if paths.class == String
80
+ IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:append_flags], @path, *paths
81
+ self
82
+ end
83
+
84
+ # Extract the files from this archive to the current directory.
85
+ def extract
86
+ raise IMW::Error.new("Cannot extract, #{@path} does not exist.") unless exist?
87
+ program = (@archive[:unarchiving_program] or @archive[:program])
88
+ IMW.system IMW::EXTERNAL_PROGRAMS[program], @archive[:extract_flags], @path
89
+ end
90
+
91
+ # Return a (sorted) list of contents in this archive.
92
+ def contents
93
+ raise IMW::Error.new("Cannot list contents, #{@path} does not exist.") unless exist?
94
+ program = (@archive[:unarchiving_program] or @archive[:program])
95
+ output = ''
96
+ command = [IMW::EXTERNAL_PROGRAMS[program], @archive[:list_flags], @path].join ' '
97
+ output += `#{command}`
98
+ archive_contents_string_to_array(output)
99
+ end
100
+
101
+ # Parse and format the output from the archive program's "list"
102
+ # command into an array of filenames.
103
+ #
104
+ # An including class can customize this method to match the
105
+ # output from the archiving program of that class.
106
+ def archive_contents_string_to_array string
107
+ string.split("\n")
108
+ end
109
+ end
110
+ end
111
+ end
112
+
113
+
@@ -0,0 +1,122 @@
1
+ #
2
+ # h2. lib/imw/files/file.rb -- base class for files
3
+ #
4
+ # == About
5
+ #
6
+ # Defines a base class for classes for specific filetypes to subclass.
7
+ #
8
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
+ # Copyright:: Copyright (c) 2008 infochimps.org
10
+ # License:: GPL 3.0
11
+ # Website:: http://infinitemonkeywrench.org/
12
+ #
13
+ # puts "#{File.basename(__FILE__)}: At the very bottom of the office building, wedged between a small boulder and a rotting log you see a weathered manilla file folder. The writing on the tab is too faded to make out." # at bottom
14
+ module IMW
15
+ module Files
16
+ module BasicFile
17
+
18
+ attr_reader :uri, :host, :path, :dirname, :basename, :extname, :name
19
+
20
+ protected
21
+
22
+ def uri= uri
23
+ @uri = URI.parse(uri) if uri.is_a?(String)
24
+ @host = self.uri.host
25
+ @path = local? ? ::File.expand_path(self.uri.path) : self.uri.path
26
+ @dirname = ::File.dirname path
27
+ @basename = ::File.basename path
28
+ @extname = find_extname
29
+ @name = @basename[0,@basename.length - @extname.length]
30
+ end
31
+
32
+ # Some files (like <tt>.tar.gz</tt>) have an "extra" extension.
33
+ # Classes in the <tt>IMW::Files</tt> module should define a
34
+ # class method <tt>extname</tt> which returns the their full
35
+ # extension.
36
+ def find_extname
37
+ self.class.respond_to?(:extname) ? self.class.extname(path) : ::File.extname(path)
38
+ end
39
+
40
+ public
41
+
42
+ # Is this file on the local machine (the scheme of the file's URI is nil or
43
+ def local?
44
+ host == 'file' || host.nil?
45
+ end
46
+
47
+ # Is this file on a remote machine?
48
+ def remote?
49
+ (! local?)
50
+ end
51
+
52
+ # Steal a bunch of class methods from File which only take a
53
+ # path as a first argument.
54
+ [:executable?, :executable_real?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
55
+ define_method class_method do
56
+ File.send(class_method, path) if local?
57
+ end
58
+ end
59
+
60
+ # Is there a real file at the path of this File? Will attempt
61
+ # to open files online too to check.
62
+ def exist?
63
+ if local?
64
+ ::File.exist?(path) ? true : false
65
+ else
66
+ begin
67
+ true if open(uri)
68
+ rescue SocketError
69
+ false
70
+ end
71
+ end
72
+ end
73
+ alias_method :exists?, :exist?
74
+
75
+ # Delete this file.
76
+ def rm
77
+ raise IMW::PathError.new("cannot delete remote file #{uri}") unless local?
78
+ raise IMW::PathError.new("cannot delete #{uri}, doesn't exist!") unless exist?
79
+ FileUtils.rm path
80
+ end
81
+ alias_method :rm!, :rm
82
+
83
+ # Copy this file to +new_path+.
84
+ def cp new_path
85
+ raise IMW::PathError.new("cannot copy from #{path}, doesn't exist!") unless exist?
86
+ if local?
87
+ FileUtils.cp path, new_path
88
+ else
89
+ # FIXME better way to do this?
90
+ File.open(new_path,'w') { |f| f.write(open(uri).read) }
91
+ end
92
+ self.class.new(new_path)
93
+ end
94
+
95
+ # Copy this file to +dir+.
96
+ def cp_to_dir dir
97
+ cp File.join(File.expand_path(dir),basename)
98
+ end
99
+
100
+ # Move this file to +new_path+.
101
+ def mv new_path
102
+ raise IMW::PathError.new("cannot move from #{path}, doesn't exist!") unless exist?
103
+ if local?
104
+ FileUtils.mv path, new_path
105
+ else
106
+ # FIXME better way to do this?
107
+ File.open(new_path,'w') { |f| f.write(open(uri).read) }
108
+ end
109
+ self.class.new(new_path)
110
+ end
111
+ alias_method :mv!, :mv
112
+
113
+ # Move this file to +dir+.
114
+ def mv_to_dir dir
115
+ mv File.join(File.expand_path(dir),basename)
116
+ end
117
+ alias_method :mv_to_dir!, :mv_to_dir
118
+ end
119
+ end
120
+ end
121
+
122
+
@@ -0,0 +1,28 @@
1
+ #
2
+ # h2. lib/imw/files/binary.rb -- binary files
3
+ #
4
+ # == About
5
+ #
6
+ # Class for handling binary data.
7
+ #
8
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
+ # Copyright:: Copyright (c) 2008 infochimps.org
10
+ # License:: GPL 3.0
11
+ # Website:: http://infinitemonkeywrench.org/
12
+ #
13
+ # puts "#{File.basename(__FILE__)}: Something clever" # at bottom
14
+ module IMW
15
+ module Files
16
+
17
+ class Binary
18
+
19
+ include IMW::Files::BasicFile
20
+ include IMW::Files::Compressible
21
+
22
+ def initialize uri
23
+ self.uri= uri
24
+ end
25
+
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,93 @@
1
+ #
2
+ # h2. lib/imw/files/compressed_file.rb -- class describing compressed files
3
+ #
4
+ # == About
5
+ #
6
+ # Compression of files is handled via the
7
+ # <tt>IMW::Files::Compressible</tt> module which can be included by
8
+ # any object that has a <tt>@path</tt> attribute. The methods defined
9
+ # there compress files and return this
10
+ # <tt>IMW::Files::CompressedFile</tt> object which has methods for
11
+ # decompression.
12
+ #
13
+ # A subclass of this class must define a +compression+ instance
14
+ # variable which is a hash with the following keys:
15
+ #
16
+ # <tt>:program</tt>:: a symbol naming the program used for
17
+ # compression/decompression which must be one of the symbols in
18
+ # <tt>IMW::EXTERNAL_PROGRAMS</tt>
19
+ #
20
+ # <tt>:decompression_flags</tt>:: a string of flags to pass to the
21
+ # compression program when decompressing the file.
22
+ #
23
+ # A subclass must also define the method +decompressed_path+ which
24
+ # returns the path of the file post-decompression.
25
+ #
26
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
27
+ # Copyright:: Copyright (c) 2008 infochimps.org
28
+ # License:: GPL 3.0
29
+ # Website:: http://infinitemonkeywrench.org/
30
+ #
31
+ # puts "#{File.basename(__FILE__)}: Have you ever folded up the wrapper of a soda straw into a little accordian shape and let a drop of water soak into it?" # at bottom
32
+ module IMW
33
+ module Files
34
+
35
+ module BasicFile
36
+ def compressed?
37
+ false
38
+ end
39
+ end
40
+
41
+ # A module which provides methods for decompressing a compressed
42
+ # file. An including should define an instance variable
43
+ # <tt>@compression</tt> with two keys:
44
+ #
45
+ # <tt>:program</tt>:: a symbol from <tt>IMW::EXTERNAL_PROGRAMS</tt>
46
+ # <tt>:decompression_flags</tt>:: a string specifying flags to pass to the decompression program
47
+ module CompressedFile
48
+
49
+ attr_reader :compression
50
+
51
+ # Is this file compressed?
52
+ def compressed?
53
+ true
54
+ end
55
+
56
+ # Construct the command passed to the shell to decompress this
57
+ # file.
58
+ def decompression_command
59
+ [IMW::EXTERNAL_PROGRAMS[@compression[:program]],@compression[:decompression_flags],@path].join ' '
60
+ end
61
+
62
+ public
63
+ # Decompress this file in its present directory overwriting any
64
+ # existing files and without saving the original compressed
65
+ # file.
66
+ def decompress!
67
+ raise IMW::PathError.new("cannot decompress #{@path}, doesn't exist!") unless exist?
68
+ FileUtils.cd(@dirname) { IMW.system decompression_command }
69
+ IMW.open(decompressed_path)
70
+ end
71
+
72
+ # Decompress this file in its present directory, overwriting any
73
+ # existing files while keeping the original compressed file.
74
+ #
75
+ # The implementation is a little stupid, as the file is
76
+ # needlessly copied.
77
+ def decompress
78
+ raise IMW::PathError.new("cannot decompress #{@path}, doesn't exist!") unless exist?
79
+ begin
80
+ FileUtils.cp(@path,@path + 'copy')
81
+ decompress!
82
+ ensure
83
+ FileUtils.mv(@path + 'copy',@path)
84
+ end
85
+ IMW.open(decompressed_path)
86
+ end
87
+
88
+ end
89
+ end
90
+ end
91
+
92
+
93
+
@@ -0,0 +1,348 @@
1
+ #
2
+ # h2. lib/imw/files/compressed_files_and_archives.rb -- require farm
3
+ #
4
+ # == About
5
+ #
6
+ # Just required all the archive and compressed formats (+tar+, +bz2+,
7
+ # &c.)
8
+ #
9
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
+ # Copyright:: Copyright (c) 2008 infochimps.org
11
+ # License:: GPL 3.0
12
+ # Website:: http://infinitemonkeywrench.org/
13
+ #
14
+ # puts "#{File.basename(__FILE__)}: Something clever" # at bottom
15
+ module IMW
16
+ module Files
17
+
18
+ # A class to wrap a +tar+ archive.
19
+ #
20
+ # Creation, appending, listing, and extraction flags are stored in
21
+ # <tt>IMW::Files::Tar::DEFAULT_FLAGS</tt> and all are passed to
22
+ # the <tt>:tar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
23
+ class Tar
24
+
25
+ include IMW::Files::BasicFile
26
+ include IMW::Files::Archive
27
+ include IMW::Files::Compressible
28
+
29
+ # The default flags used creating, appending to, listing, and
30
+ # extracting a tar archive.
31
+ DEFAULT_FLAGS = {
32
+ :create => "-cf",
33
+ :append => "-rf",
34
+ :list => "-tf",
35
+ :extract => "-xf",
36
+ :program => :tar
37
+ }
38
+
39
+ def initialize uri, *args
40
+ self.uri= uri
41
+ @archive = {
42
+ :program => DEFAULT_FLAGS[:program],
43
+ :create_flags => DEFAULT_FLAGS[:create],
44
+ :append_flags => DEFAULT_FLAGS[:append],
45
+ :list_flags => DEFAULT_FLAGS[:list],
46
+ :extract_flags => DEFAULT_FLAGS[:extract]
47
+ }
48
+ end
49
+ end # Tar
50
+
51
+ # A class to wrap a <tt>tar.gz</tt> archive.
52
+ #
53
+ # Creation, appending, listing, and extraction flags are stored in
54
+ # <tt>IMW::Files::TarGz::DEFAULT_FLAGS</tt> and all are passed to
55
+ # the <tt>:tar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
56
+ class TarGz
57
+
58
+ include IMW::Files::BasicFile
59
+ include IMW::Files::Archive
60
+ include IMW::Files::CompressedFile
61
+
62
+ # The default flags used creating, appending to, listing, and
63
+ # extracting a <tt>tar.gz</tt> archive.
64
+ DEFAULT_FLAGS = {
65
+ :decompression_program => :gzip,
66
+ :decompression_flags => '-fd',
67
+ :archive_program => :tar,
68
+ :archive_list_flags => "-tf",
69
+ :archive_extract_flags => "-xzf"
70
+ }
71
+
72
+ def initialize uri, *args
73
+ self.uri= uri
74
+ @compression = {
75
+ :program => DEFAULT_FLAGS[:decompression_program],
76
+ :decompression_flags => DEFAULT_FLAGS[:decompression_flags]
77
+ }
78
+ @archive = {
79
+ :program => DEFAULT_FLAGS[:archive_program],
80
+ :list_flags => DEFAULT_FLAGS[:archive_list_flags],
81
+ :extract_flags => DEFAULT_FLAGS[:archive_extract_flags]
82
+ }
83
+ end
84
+
85
+ # Returns the path of the file after decompression.
86
+ def decompressed_path
87
+ if /\.tar\.gz$/.match @path then
88
+ @path.gsub /\.tar\.gz$/, ".tar"
89
+ elsif /\.tgz$/.match @path then
90
+ @path.gsub /\.tgz$/, ".tar"
91
+ end
92
+ end
93
+
94
+ def self.extname path
95
+ if /\.tar\.gz$/.match path then
96
+ ".tar.gz"
97
+ elsif /\.tgz$/.match path then
98
+ ".tgz"
99
+ end
100
+ end
101
+
102
+ end # TarGz
103
+
104
+ # A class to wrap a <tt>tar.bz2</tt> archive.
105
+ #
106
+ # Creation, appending, listing, and extraction flags are stored in
107
+ # <tt>IMW::Files::TarBz2::DEFAULT_FLAGS</tt> and all are passed to
108
+ # the <tt>:tar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
109
+ class TarBz2
110
+
111
+ include IMW::Files::BasicFile
112
+ include IMW::Files::Archive
113
+ include IMW::Files::CompressedFile
114
+
115
+ # The default flags used creating, appending to, listing, and
116
+ # extracting a <tt>tar.bz2</tt> archive.
117
+ DEFAULT_FLAGS = {
118
+ :decompression_program => :bzip2,
119
+ :decompression_flags => '-fd',
120
+ :archive_program => :tar,
121
+ :archive_create_flags => '-cf',
122
+ :archive_list_flags => "-tf",
123
+ :archive_extract_flags => "-xjf"
124
+ }
125
+
126
+ def self.extname path
127
+ if /\.tar\.bz2$/.match path then
128
+ ".tar.bz2"
129
+ elsif /\.tbz2$/.match path then
130
+ ".tbz2"
131
+ end
132
+ end
133
+
134
+ def initialize uri, *args
135
+ self.uri= uri
136
+ @compression = {
137
+ :program => DEFAULT_FLAGS[:decompression_program],
138
+ :decompression_flags => DEFAULT_FLAGS[:decompression]
139
+ }
140
+ @archive = {
141
+ :program => DEFAULT_FLAGS[:archive_program],
142
+ :list_flags => DEFAULT_FLAGS[:archive_list_flags],
143
+ :extract_flags => DEFAULT_FLAGS[:archive_extract_flags],
144
+ :create_flags => DEFAULT_FLAGS[:archive_create_flags]
145
+ }
146
+ end
147
+
148
+ # Returns the path of the file after decompression.
149
+ def decompressed_path
150
+ if /\.tar\.bz2$/.match @path then
151
+ @path.gsub /\.tar\.bz2$/, ".tar"
152
+ elsif /\.tbz2$/.match @path then
153
+ @path.gsub /\.tbz2$/, ".tar"
154
+ end
155
+ end
156
+
157
+ # Overrides default behvaior of IMW::Files::Archive#create to
158
+ # compress files after creating them.
159
+ def create paths, opts={}
160
+ opts = opts.reverse_merge({:force => false})
161
+ raise IMW::Error.new("An archive already exists at #{@path}.") if exist? and not opts[:force]
162
+ paths = [paths] if paths.class == String
163
+ IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:create_flags], path_between_archive_and_compression, *paths
164
+ IMW.open(path_between_archive_and_compression).compress!(:bzip2)
165
+ end
166
+
167
+ protected
168
+ def path_between_archive_and_compression
169
+ File.join(dirname,name + '.tar')
170
+ end
171
+
172
+ end # TarBz2
173
+
174
+ # A class to wrap a +rar+ archive.
175
+ #
176
+ # Creation, appending, listing, and extraction flags are stored in
177
+ # <tt>IMW::Files::Rar::DEFAULT_FLAGS</tt> and all are passed to
178
+ # the <tt>:rar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
179
+ class Rar
180
+
181
+ include IMW::Files::BasicFile
182
+ include IMW::Files::Archive
183
+
184
+ # The default flags used creating, appending to, listing, and
185
+ # extracting a rar archive.
186
+ DEFAULT_FLAGS = {
187
+ :create => "a -r -o+ -inul",
188
+ :append => "a -r -o+ -inul",
189
+ :list => "vb",
190
+ :extract => "x -o+ -inul"
191
+ }
192
+
193
+ def initialize uri, *args
194
+ self.uri= uri
195
+ @archive = {
196
+ :program => :rar,
197
+ :create_flags => DEFAULT_FLAGS[:create],
198
+ :append_flags => DEFAULT_FLAGS[:append],
199
+ :list_flags => DEFAULT_FLAGS[:list],
200
+ :extract_flags => DEFAULT_FLAGS[:extract]
201
+ }
202
+ end
203
+ end # Rar
204
+
205
+ # A class to wrap a +zip+ archive.
206
+ #
207
+ # Creation, appending, listing, and extraction flags are stored in
208
+ # <tt>IMW::Files::Zip::DEFAULT_FLAGS</tt> and all are passed to
209
+ # the <tt>:zip</tt> and <tt>:unzip</tt> programs in
210
+ # <tt>IMW::EXTERAL_PROGRAMS</tt>.
211
+ class Zip
212
+
213
+ include IMW::Files::BasicFile
214
+ include IMW::Files::Archive
215
+
216
+ # The default flags used creating, appending to, listing, and
217
+ # extracting a zip archive.
218
+ DEFAULT_FLAGS = {
219
+ :create => "-q -r",
220
+ :append => "-q -g",
221
+ :list => "-l",
222
+ :extract => "-q -o",
223
+ :unarchiving_program => :unzip
224
+ }
225
+
226
+ def initialize uri, *args
227
+ self.uri= uri
228
+ @archive = {
229
+ :program => :zip,
230
+ :create_flags => DEFAULT_FLAGS[:create],
231
+ :append_flags => DEFAULT_FLAGS[:append],
232
+ :list_flags => DEFAULT_FLAGS[:list],
233
+ :extract_flags => DEFAULT_FLAGS[:extract],
234
+ :unarchiving_program => DEFAULT_FLAGS[:unarchiving_program]
235
+ }
236
+ end
237
+
238
+ # The `unzip' program outputs data in a very annoying format:
239
+ #
240
+ # Archive: data.zip
241
+ # Length Date Time Name
242
+ # -------- ---- ---- ----
243
+ # 18510 07-28-08 15:58 data/4d7Qrgz7.csv
244
+ # 3418 07-28-08 15:41 data/7S.csv
245
+ # 23353 07-28-08 15:41 data/g.csv
246
+ # 711 07-28-08 15:58 data/g.xml
247
+ # 1095 07-28-08 15:41 data/L.xml
248
+ # 2399 07-28-08 15:58 data/mTAu9H3.xml
249
+ # 152 07-28-08 15:58 data/vaHBS2t5R.dat
250
+ # -------- -------
251
+ # 49638 7 files
252
+ #
253
+ # which is parsed by this method.
254
+ def archive_contents_string_to_array string
255
+ rows = string.split("\n")
256
+ # ignore the first 3 lines of the output and also discared the
257
+ # last 2 (5 = 2 + 3)
258
+ file_rows = rows[3,(rows.length - 5)]
259
+ file_rows.map! do |row|
260
+ # discard extra whitespace before after main text
261
+ row.lstrip!.rstrip!
262
+ # split the remaining text at spaces...columns beyond the
263
+ # third are part of the filename and should be joined with a
264
+ # space again in case of a filename with a space
265
+ row.split(' ')[3,row.size].join(' ')
266
+ end
267
+ file_rows
268
+ end
269
+ end # Zip
270
+
271
+ # A class to wrap a <tt>gz</tt> compressed file.
272
+ #
273
+ # The decompressing flags are stored in
274
+ # <tt>IMW::Files::Gz::DEFAULT_FLAGS</tt> and all are passed to the
275
+ # <tt>:gzip</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
276
+ class Gz
277
+
278
+ include IMW::Files::BasicFile
279
+ include IMW::Files::CompressedFile
280
+
281
+ # The default flags used in extracting a <tt>gz</tt> file.
282
+ DEFAULT_FLAGS = {
283
+ :program => :gzip,
284
+ :decompression => '-fd'
285
+ }
286
+
287
+ def initialize uri, *args
288
+ self.uri= uri
289
+ @compression = {
290
+ :program => DEFAULT_FLAGS[:program],
291
+ :decompression_flags => DEFAULT_FLAGS[:decompression]
292
+ }
293
+ end
294
+
295
+ def decompressed_path
296
+ @path.gsub /\.gz$/, ""
297
+ end
298
+ end # Gz
299
+
300
+ # A class to wrap a <tt>bz2</tt> compressed file.
301
+ #
302
+ # The decompressing flags are stored in
303
+ # <tt>IMW::Files::Bz2::DEFAULT_FLAGS</tt> and all are passed to
304
+ # the <tt>:bzip2</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
305
+ class Bz2
306
+
307
+ include IMW::Files::BasicFile
308
+ include IMW::Files::CompressedFile
309
+
310
+ # The default flags used in extracting a <tt>bz2</tt> file.
311
+ DEFAULT_FLAGS = {
312
+ :program => :bzip2,
313
+ :decompression => '-fd'
314
+ }
315
+
316
+ def initialize uri, *args
317
+ self.uri= uri
318
+ raise IMW::Error.new("#{@extname} is not a valid extension for a bzip2 compressed file.") unless @extname == '.bz2'
319
+ @compression = {
320
+ :program => DEFAULT_FLAGS[:program],
321
+ :decompression_flags => DEFAULT_FLAGS[:decompression]
322
+ }
323
+ end
324
+
325
+ # Returns the path of the file after decompression.
326
+ def decompressed_path
327
+ @path.gsub /\.bz2$/, ""
328
+ end
329
+ end # Bz2
330
+
331
+
332
+ # make sure that tar.bz2 precedes bz2 and so on...
333
+ FILE_REGEXPS << [/\.tar\.bz2$/, IMW::Files::TarBz2]
334
+ FILE_REGEXPS << [/\.tbz2$/, IMW::Files::TarBz2]
335
+
336
+ FILE_REGEXPS << [/\.tar\.gz$/, IMW::Files::TarGz]
337
+ FILE_REGEXPS << [/\.tgz$/, IMW::Files::TarGz]
338
+
339
+ FILE_REGEXPS << [/\.tar$/, IMW::Files::Tar]
340
+ FILE_REGEXPS << [/\.bz2$/, IMW::Files::Bz2]
341
+ FILE_REGEXPS << [/\.gz$/, IMW::Files::Gz]
342
+ FILE_REGEXPS << [/\.rar$/, IMW::Files::Rar]
343
+ FILE_REGEXPS << [/\.zip$/, IMW::Files::Zip]
344
+
345
+ end # Files
346
+ end # IMW
347
+
348
+