imw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. data/.gitignore +15 -0
  2. data/CHANGELOG +0 -0
  3. data/LICENSE +674 -0
  4. data/README.rdoc +101 -0
  5. data/Rakefile +20 -0
  6. data/VERSION +1 -0
  7. data/etc/imwrc.rb +76 -0
  8. data/lib/imw.rb +42 -0
  9. data/lib/imw/boot.rb +58 -0
  10. data/lib/imw/dataset.rb +233 -0
  11. data/lib/imw/dataset/datamapper.rb +66 -0
  12. data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
  13. data/lib/imw/dataset/loaddump.rb +50 -0
  14. data/lib/imw/dataset/old/file_collection.rb +88 -0
  15. data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
  16. data/lib/imw/dataset/scaffold.rb +132 -0
  17. data/lib/imw/dataset/scraped_uri.rb +305 -0
  18. data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
  19. data/lib/imw/dataset/scrub/scrub.rb +147 -0
  20. data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
  21. data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
  22. data/lib/imw/dataset/scrub/slug.rb +101 -0
  23. data/lib/imw/dataset/stats.rb +73 -0
  24. data/lib/imw/dataset/stats/counter.rb +23 -0
  25. data/lib/imw/dataset/task.rb +38 -0
  26. data/lib/imw/dataset/workflow.rb +81 -0
  27. data/lib/imw/files.rb +110 -0
  28. data/lib/imw/files/archive.rb +113 -0
  29. data/lib/imw/files/basicfile.rb +122 -0
  30. data/lib/imw/files/binary.rb +28 -0
  31. data/lib/imw/files/compressed_file.rb +93 -0
  32. data/lib/imw/files/compressed_files_and_archives.rb +348 -0
  33. data/lib/imw/files/compressible.rb +103 -0
  34. data/lib/imw/files/csv.rb +112 -0
  35. data/lib/imw/files/json.rb +41 -0
  36. data/lib/imw/files/sgml.rb +65 -0
  37. data/lib/imw/files/text.rb +68 -0
  38. data/lib/imw/files/yaml.rb +46 -0
  39. data/lib/imw/packagers.rb +8 -0
  40. data/lib/imw/packagers/archiver.rb +108 -0
  41. data/lib/imw/packagers/s3_mover.rb +28 -0
  42. data/lib/imw/parsers.rb +7 -0
  43. data/lib/imw/parsers/html_parser.rb +382 -0
  44. data/lib/imw/parsers/html_parser/matchers.rb +306 -0
  45. data/lib/imw/parsers/line_parser.rb +87 -0
  46. data/lib/imw/parsers/regexp_parser.rb +72 -0
  47. data/lib/imw/utils.rb +24 -0
  48. data/lib/imw/utils/components.rb +61 -0
  49. data/lib/imw/utils/config.rb +46 -0
  50. data/lib/imw/utils/error.rb +54 -0
  51. data/lib/imw/utils/extensions/array.rb +125 -0
  52. data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
  53. data/lib/imw/utils/extensions/core.rb +43 -0
  54. data/lib/imw/utils/extensions/dir.rb +24 -0
  55. data/lib/imw/utils/extensions/file_core.rb +64 -0
  56. data/lib/imw/utils/extensions/hash.rb +218 -0
  57. data/lib/imw/utils/extensions/hpricot.rb +48 -0
  58. data/lib/imw/utils/extensions/string.rb +49 -0
  59. data/lib/imw/utils/extensions/struct.rb +42 -0
  60. data/lib/imw/utils/extensions/symbol.rb +28 -0
  61. data/lib/imw/utils/extensions/typed_struct.rb +22 -0
  62. data/lib/imw/utils/extensions/uri.rb +59 -0
  63. data/lib/imw/utils/log.rb +67 -0
  64. data/lib/imw/utils/misc.rb +63 -0
  65. data/lib/imw/utils/paths.rb +115 -0
  66. data/lib/imw/utils/uri.rb +59 -0
  67. data/lib/imw/utils/uuid.rb +33 -0
  68. data/lib/imw/utils/validate.rb +38 -0
  69. data/lib/imw/utils/version.rb +12 -0
  70. data/lib/imw/utils/view.rb +113 -0
  71. data/lib/imw/utils/view/dump_csv.rb +112 -0
  72. data/lib/imw/utils/view/dump_csv_older.rb +117 -0
  73. data/spec/data/sample.csv +131 -0
  74. data/spec/data/sample.tsv +131 -0
  75. data/spec/data/sample.txt +131 -0
  76. data/spec/data/sample.xml +653 -0
  77. data/spec/data/sample.yaml +652 -0
  78. data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
  79. data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
  80. data/spec/imw/files/archive_spec.rb +118 -0
  81. data/spec/imw/files/basicfile_spec.rb +121 -0
  82. data/spec/imw/files/bz2_spec.rb +32 -0
  83. data/spec/imw/files/compressed_file_spec.rb +96 -0
  84. data/spec/imw/files/compressible_spec.rb +100 -0
  85. data/spec/imw/files/file_spec.rb +144 -0
  86. data/spec/imw/files/gz_spec.rb +32 -0
  87. data/spec/imw/files/rar_spec.rb +33 -0
  88. data/spec/imw/files/tar_spec.rb +31 -0
  89. data/spec/imw/files/text_spec.rb +23 -0
  90. data/spec/imw/files/zip_spec.rb +31 -0
  91. data/spec/imw/files_spec.rb +38 -0
  92. data/spec/imw/packagers/archiver_spec.rb +125 -0
  93. data/spec/imw/packagers/s3_mover_spec.rb +7 -0
  94. data/spec/imw/parsers/line_parser_spec.rb +96 -0
  95. data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
  96. data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
  97. data/spec/imw/utils/extensions/find_spec.rb +113 -0
  98. data/spec/imw/utils/paths_spec.rb +38 -0
  99. data/spec/imw/workflow/rip/local_spec.rb +89 -0
  100. data/spec/imw/workflow/rip_spec.rb +27 -0
  101. data/spec/rcov.opts +1 -0
  102. data/spec/spec.opts +4 -0
  103. data/spec/spec_helper.rb +32 -0
  104. data/spec/support/archive_contents_matcher.rb +94 -0
  105. data/spec/support/custom_matchers.rb +21 -0
  106. data/spec/support/directory_contents_matcher.rb +61 -0
  107. data/spec/support/extensions.rb +18 -0
  108. data/spec/support/file_contents_matcher.rb +50 -0
  109. data/spec/support/random.rb +210 -0
  110. data/spec/support/without_regard_to_order_matcher.rb +58 -0
  111. metadata +196 -0
@@ -0,0 +1,113 @@
1
+ #
2
+ # h2. lib/imw/files/archive.rb -- describes archives of files
3
+ #
4
+ # == About
5
+ #
6
+ # Module for describing known archive types. An including archive
7
+ # type's class must define an instance variable +archive+ which is a
8
+ # hash with the following required keys:
9
+ #
10
+ # <tt>:program</tt>:: a symbol naming the program to be used. It
11
+ # should match one of the symbols in <tt>IMW::EXTERNAL_PROGRAMS</tt>
12
+ #
13
+ # <tt>:create_flags</tt>:: a string of flags to pass to the archiving
14
+ # program when creating the archive
15
+ #
16
+ # <tt>:append_flags</tt>:: a string of flags to pass to the archiving
17
+ # program when appending files to the archive
18
+ #
19
+ # <tt>:extract_flags</tt>:: a string of flags to pass to the archiving
20
+ # program when extracting the archive
21
+ #
22
+ # <tt>:list_flags</tt>:: a string of flags to pass to the archiving
23
+ # program when listing the archive's contents
24
+ #
25
+ # THe +archive+ hash may also contain the entry:
26
+ #
27
+ # <tt>:unarchiving_program</tt>:: a symbol naming the program to be
28
+ # used to list/extract the archive. Useful only if this program
29
+ # differs from the program used to create the archive in the first
30
+ # place (i.e. - zip & unzip).
31
+ #
32
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
33
+ # Copyright:: Copyright (c) 2008 infochimps.org
34
+ # License:: GPL 3.0
35
+ # Website:: http://infinitemonkeywrench.org/
36
+ #
37
+ # puts "#{File.basename(__FILE__)}: Put it all in one place so that when something goes wrong you'll know it immediately. You'll regret it, but at least you'll know." # at bottom
38
+ module IMW
39
+ module Files
40
+
41
+ module BasicFile
42
+
43
+ # Is this file an archive?
44
+ def archive?
45
+ false
46
+ end
47
+ end
48
+
49
+ module Archive
50
+
51
+ attr_reader :archive
52
+
53
+ # Is this file an archive?
54
+ def archive?
55
+ true
56
+ end
57
+
58
+ public
59
+ # Create this archive containing the given +paths+, which can be
60
+ # either a string or list of strings to be interpreted as paths
61
+ # to files/directories by the shell.
62
+ #
63
+ # Options:
64
+ # <tt>:force</tt> (false):: overwrite any existing archive at this path.
65
+ def create paths, opts = {}
66
+ opts = opts.reverse_merge({:force => false})
67
+ raise IMW::Error.new("An archive already exists at #{@path}.") if exist? and not opts[:force]
68
+ raise IMW::Error.new("Cannot create an archive of type #{@extname}") unless @archive[:create_flags]
69
+ paths = [paths] if paths.class == String
70
+ IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:create_flags], @path, *paths
71
+ self
72
+ end
73
+
74
+ # Append to this archive the given +paths+, which can be
75
+ # either a string or list of strings to be interpreted as paths
76
+ # to files/directories by the shell.
77
+ def append paths
78
+ raise IMW::Error.new("Cannot append to an archive of type #{@archive[:program]}.") unless @archive[:append_flags]
79
+ paths = [paths] if paths.class == String
80
+ IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:append_flags], @path, *paths
81
+ self
82
+ end
83
+
84
+ # Extract the files from this archive to the current directory.
85
+ def extract
86
+ raise IMW::Error.new("Cannot extract, #{@path} does not exist.") unless exist?
87
+ program = (@archive[:unarchiving_program] or @archive[:program])
88
+ IMW.system IMW::EXTERNAL_PROGRAMS[program], @archive[:extract_flags], @path
89
+ end
90
+
91
+ # Return a (sorted) list of contents in this archive.
92
+ def contents
93
+ raise IMW::Error.new("Cannot list contents, #{@path} does not exist.") unless exist?
94
+ program = (@archive[:unarchiving_program] or @archive[:program])
95
+ output = ''
96
+ command = [IMW::EXTERNAL_PROGRAMS[program], @archive[:list_flags], @path].join ' '
97
+ output += `#{command}`
98
+ archive_contents_string_to_array(output)
99
+ end
100
+
101
+ # Parse and format the output from the archive program's "list"
102
+ # command into an array of filenames.
103
+ #
104
+ # An including class can customize this method to match the
105
+ # output from the archiving program of that class.
106
+ def archive_contents_string_to_array string
107
+ string.split("\n")
108
+ end
109
+ end
110
+ end
111
+ end
112
+
113
+
@@ -0,0 +1,122 @@
1
+ #
2
+ # h2. lib/imw/files/file.rb -- base class for files
3
+ #
4
+ # == About
5
+ #
6
+ # Defines a base class for classes for specific filetypes to subclass.
7
+ #
8
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
+ # Copyright:: Copyright (c) 2008 infochimps.org
10
+ # License:: GPL 3.0
11
+ # Website:: http://infinitemonkeywrench.org/
12
+ #
13
+ # puts "#{File.basename(__FILE__)}: At the very bottom of the office building, wedged between a small boulder and a rotting log you see a weathered manilla file folder. The writing on the tab is too faded to make out." # at bottom
14
+ module IMW
15
+ module Files
16
+ module BasicFile
17
+
18
+ attr_reader :uri, :host, :path, :dirname, :basename, :extname, :name
19
+
20
+ protected
21
+
22
+ def uri= uri
23
+ @uri = URI.parse(uri) if uri.is_a?(String)
24
+ @host = self.uri.host
25
+ @path = local? ? ::File.expand_path(self.uri.path) : self.uri.path
26
+ @dirname = ::File.dirname path
27
+ @basename = ::File.basename path
28
+ @extname = find_extname
29
+ @name = @basename[0,@basename.length - @extname.length]
30
+ end
31
+
32
+ # Some files (like <tt>.tar.gz</tt>) have an "extra" extension.
33
+ # Classes in the <tt>IMW::Files</tt> module should define a
34
+ # class method <tt>extname</tt> which returns the their full
35
+ # extension.
36
+ def find_extname
37
+ self.class.respond_to?(:extname) ? self.class.extname(path) : ::File.extname(path)
38
+ end
39
+
40
+ public
41
+
42
+ # Is this file on the local machine (the scheme of the file's URI is nil or
43
+ def local?
44
+ host == 'file' || host.nil?
45
+ end
46
+
47
+ # Is this file on a remote machine?
48
+ def remote?
49
+ (! local?)
50
+ end
51
+
52
+ # Steal a bunch of class methods from File which only take a
53
+ # path as a first argument.
54
+ [:executable?, :executable_real?, :file?, :directory?, :ftype, :owned?, :pipe?, :readable?, :readable_real?, :setgid?, :setuid?, :size, :size?, :socket?, :split, :stat, :sticky?, :writable?, :writable_real?, :zero?].each do |class_method|
55
+ define_method class_method do
56
+ File.send(class_method, path) if local?
57
+ end
58
+ end
59
+
60
+ # Is there a real file at the path of this File? Will attempt
61
+ # to open files online too to check.
62
+ def exist?
63
+ if local?
64
+ ::File.exist?(path) ? true : false
65
+ else
66
+ begin
67
+ true if open(uri)
68
+ rescue SocketError
69
+ false
70
+ end
71
+ end
72
+ end
73
+ alias_method :exists?, :exist?
74
+
75
+ # Delete this file.
76
+ def rm
77
+ raise IMW::PathError.new("cannot delete remote file #{uri}") unless local?
78
+ raise IMW::PathError.new("cannot delete #{uri}, doesn't exist!") unless exist?
79
+ FileUtils.rm path
80
+ end
81
+ alias_method :rm!, :rm
82
+
83
+ # Copy this file to +new_path+.
84
+ def cp new_path
85
+ raise IMW::PathError.new("cannot copy from #{path}, doesn't exist!") unless exist?
86
+ if local?
87
+ FileUtils.cp path, new_path
88
+ else
89
+ # FIXME better way to do this?
90
+ File.open(new_path,'w') { |f| f.write(open(uri).read) }
91
+ end
92
+ self.class.new(new_path)
93
+ end
94
+
95
+ # Copy this file to +dir+.
96
+ def cp_to_dir dir
97
+ cp File.join(File.expand_path(dir),basename)
98
+ end
99
+
100
+ # Move this file to +new_path+.
101
+ def mv new_path
102
+ raise IMW::PathError.new("cannot move from #{path}, doesn't exist!") unless exist?
103
+ if local?
104
+ FileUtils.mv path, new_path
105
+ else
106
+ # FIXME better way to do this?
107
+ File.open(new_path,'w') { |f| f.write(open(uri).read) }
108
+ end
109
+ self.class.new(new_path)
110
+ end
111
+ alias_method :mv!, :mv
112
+
113
+ # Move this file to +dir+.
114
+ def mv_to_dir dir
115
+ mv File.join(File.expand_path(dir),basename)
116
+ end
117
+ alias_method :mv_to_dir!, :mv_to_dir
118
+ end
119
+ end
120
+ end
121
+
122
+
@@ -0,0 +1,28 @@
1
+ #
2
+ # h2. lib/imw/files/binary.rb -- binary files
3
+ #
4
+ # == About
5
+ #
6
+ # Class for handling binary data.
7
+ #
8
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
+ # Copyright:: Copyright (c) 2008 infochimps.org
10
+ # License:: GPL 3.0
11
+ # Website:: http://infinitemonkeywrench.org/
12
+ #
13
+ # puts "#{File.basename(__FILE__)}: Something clever" # at bottom
14
+ module IMW
15
+ module Files
16
+
17
+ class Binary
18
+
19
+ include IMW::Files::BasicFile
20
+ include IMW::Files::Compressible
21
+
22
+ def initialize uri
23
+ self.uri= uri
24
+ end
25
+
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,93 @@
1
+ #
2
+ # h2. lib/imw/files/compressed_file.rb -- class describing compressed files
3
+ #
4
+ # == About
5
+ #
6
+ # Compression of files is handled via the
7
+ # <tt>IMW::Files::Compressible</tt> module which can be included by
8
+ # any object that has a <tt>@path</tt> attribute. The methods defined
9
+ # there compress files and return this
10
+ # <tt>IMW::Files::CompressedFile</tt> object which has methods for
11
+ # decompression.
12
+ #
13
+ # A subclass of this class must define a +compression+ instance
14
+ # variable which is a hash with the following keys:
15
+ #
16
+ # <tt>:program</tt>:: a symbol naming the program used for
17
+ # compression/decompression which must be one of the symbols in
18
+ # <tt>IMW::EXTERNAL_PROGRAMS</tt>
19
+ #
20
+ # <tt>:decompression_flags</tt>:: a string of flags to pass to the
21
+ # compression program when decompressing the file.
22
+ #
23
+ # A subclass must also define the method +decompressed_path+ which
24
+ # returns the path of the file post-decompression.
25
+ #
26
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
27
+ # Copyright:: Copyright (c) 2008 infochimps.org
28
+ # License:: GPL 3.0
29
+ # Website:: http://infinitemonkeywrench.org/
30
+ #
31
+ # puts "#{File.basename(__FILE__)}: Have you ever folded up the wrapper of a soda straw into a little accordian shape and let a drop of water soak into it?" # at bottom
32
+ module IMW
33
+ module Files
34
+
35
+ module BasicFile
36
+ def compressed?
37
+ false
38
+ end
39
+ end
40
+
41
+ # A module which provides methods for decompressing a compressed
42
+ # file. An including should define an instance variable
43
+ # <tt>@compression</tt> with two keys:
44
+ #
45
+ # <tt>:program</tt>:: a symbol from <tt>IMW::EXTERNAL_PROGRAMS</tt>
46
+ # <tt>:decompression_flags</tt>:: a string specifying flags to pass to the decompression program
47
+ module CompressedFile
48
+
49
+ attr_reader :compression
50
+
51
+ # Is this file compressed?
52
+ def compressed?
53
+ true
54
+ end
55
+
56
+ # Construct the command passed to the shell to decompress this
57
+ # file.
58
+ def decompression_command
59
+ [IMW::EXTERNAL_PROGRAMS[@compression[:program]],@compression[:decompression_flags],@path].join ' '
60
+ end
61
+
62
+ public
63
+ # Decompress this file in its present directory overwriting any
64
+ # existing files and without saving the original compressed
65
+ # file.
66
+ def decompress!
67
+ raise IMW::PathError.new("cannot decompress #{@path}, doesn't exist!") unless exist?
68
+ FileUtils.cd(@dirname) { IMW.system decompression_command }
69
+ IMW.open(decompressed_path)
70
+ end
71
+
72
+ # Decompress this file in its present directory, overwriting any
73
+ # existing files while keeping the original compressed file.
74
+ #
75
+ # The implementation is a little stupid, as the file is
76
+ # needlessly copied.
77
+ def decompress
78
+ raise IMW::PathError.new("cannot decompress #{@path}, doesn't exist!") unless exist?
79
+ begin
80
+ FileUtils.cp(@path,@path + 'copy')
81
+ decompress!
82
+ ensure
83
+ FileUtils.mv(@path + 'copy',@path)
84
+ end
85
+ IMW.open(decompressed_path)
86
+ end
87
+
88
+ end
89
+ end
90
+ end
91
+
92
+
93
+
@@ -0,0 +1,348 @@
1
+ #
2
+ # h2. lib/imw/files/compressed_files_and_archives.rb -- require farm
3
+ #
4
+ # == About
5
+ #
6
+ # Just required all the archive and compressed formats (+tar+, +bz2+,
7
+ # &c.)
8
+ #
9
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
+ # Copyright:: Copyright (c) 2008 infochimps.org
11
+ # License:: GPL 3.0
12
+ # Website:: http://infinitemonkeywrench.org/
13
+ #
14
+ # puts "#{File.basename(__FILE__)}: Something clever" # at bottom
15
+ module IMW
16
+ module Files
17
+
18
+ # A class to wrap a +tar+ archive.
19
+ #
20
+ # Creation, appending, listing, and extraction flags are stored in
21
+ # <tt>IMW::Files::Tar::DEFAULT_FLAGS</tt> and all are passed to
22
+ # the <tt>:tar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
23
+ class Tar
24
+
25
+ include IMW::Files::BasicFile
26
+ include IMW::Files::Archive
27
+ include IMW::Files::Compressible
28
+
29
+ # The default flags used creating, appending to, listing, and
30
+ # extracting a tar archive.
31
+ DEFAULT_FLAGS = {
32
+ :create => "-cf",
33
+ :append => "-rf",
34
+ :list => "-tf",
35
+ :extract => "-xf",
36
+ :program => :tar
37
+ }
38
+
39
+ def initialize uri, *args
40
+ self.uri= uri
41
+ @archive = {
42
+ :program => DEFAULT_FLAGS[:program],
43
+ :create_flags => DEFAULT_FLAGS[:create],
44
+ :append_flags => DEFAULT_FLAGS[:append],
45
+ :list_flags => DEFAULT_FLAGS[:list],
46
+ :extract_flags => DEFAULT_FLAGS[:extract]
47
+ }
48
+ end
49
+ end # Tar
50
+
51
+ # A class to wrap a <tt>tar.gz</tt> archive.
52
+ #
53
+ # Creation, appending, listing, and extraction flags are stored in
54
+ # <tt>IMW::Files::TarGz::DEFAULT_FLAGS</tt> and all are passed to
55
+ # the <tt>:tar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
56
+ class TarGz
57
+
58
+ include IMW::Files::BasicFile
59
+ include IMW::Files::Archive
60
+ include IMW::Files::CompressedFile
61
+
62
+ # The default flags used creating, appending to, listing, and
63
+ # extracting a <tt>tar.gz</tt> archive.
64
+ DEFAULT_FLAGS = {
65
+ :decompression_program => :gzip,
66
+ :decompression_flags => '-fd',
67
+ :archive_program => :tar,
68
+ :archive_list_flags => "-tf",
69
+ :archive_extract_flags => "-xzf"
70
+ }
71
+
72
+ def initialize uri, *args
73
+ self.uri= uri
74
+ @compression = {
75
+ :program => DEFAULT_FLAGS[:decompression_program],
76
+ :decompression_flags => DEFAULT_FLAGS[:decompression_flags]
77
+ }
78
+ @archive = {
79
+ :program => DEFAULT_FLAGS[:archive_program],
80
+ :list_flags => DEFAULT_FLAGS[:archive_list_flags],
81
+ :extract_flags => DEFAULT_FLAGS[:archive_extract_flags]
82
+ }
83
+ end
84
+
85
+ # Returns the path of the file after decompression.
86
+ def decompressed_path
87
+ if /\.tar\.gz$/.match @path then
88
+ @path.gsub /\.tar\.gz$/, ".tar"
89
+ elsif /\.tgz$/.match @path then
90
+ @path.gsub /\.tgz$/, ".tar"
91
+ end
92
+ end
93
+
94
+ def self.extname path
95
+ if /\.tar\.gz$/.match path then
96
+ ".tar.gz"
97
+ elsif /\.tgz$/.match path then
98
+ ".tgz"
99
+ end
100
+ end
101
+
102
+ end # TarGz
103
+
104
+ # A class to wrap a <tt>tar.bz2</tt> archive.
105
+ #
106
+ # Creation, appending, listing, and extraction flags are stored in
107
+ # <tt>IMW::Files::TarBz2::DEFAULT_FLAGS</tt> and all are passed to
108
+ # the <tt>:tar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
109
+ class TarBz2
110
+
111
+ include IMW::Files::BasicFile
112
+ include IMW::Files::Archive
113
+ include IMW::Files::CompressedFile
114
+
115
+ # The default flags used creating, appending to, listing, and
116
+ # extracting a <tt>tar.bz2</tt> archive.
117
+ DEFAULT_FLAGS = {
118
+ :decompression_program => :bzip2,
119
+ :decompression_flags => '-fd',
120
+ :archive_program => :tar,
121
+ :archive_create_flags => '-cf',
122
+ :archive_list_flags => "-tf",
123
+ :archive_extract_flags => "-xjf"
124
+ }
125
+
126
+ def self.extname path
127
+ if /\.tar\.bz2$/.match path then
128
+ ".tar.bz2"
129
+ elsif /\.tbz2$/.match path then
130
+ ".tbz2"
131
+ end
132
+ end
133
+
134
+ def initialize uri, *args
135
+ self.uri= uri
136
+ @compression = {
137
+ :program => DEFAULT_FLAGS[:decompression_program],
138
+ :decompression_flags => DEFAULT_FLAGS[:decompression]
139
+ }
140
+ @archive = {
141
+ :program => DEFAULT_FLAGS[:archive_program],
142
+ :list_flags => DEFAULT_FLAGS[:archive_list_flags],
143
+ :extract_flags => DEFAULT_FLAGS[:archive_extract_flags],
144
+ :create_flags => DEFAULT_FLAGS[:archive_create_flags]
145
+ }
146
+ end
147
+
148
+ # Returns the path of the file after decompression.
149
+ def decompressed_path
150
+ if /\.tar\.bz2$/.match @path then
151
+ @path.gsub /\.tar\.bz2$/, ".tar"
152
+ elsif /\.tbz2$/.match @path then
153
+ @path.gsub /\.tbz2$/, ".tar"
154
+ end
155
+ end
156
+
157
+ # Overrides default behvaior of IMW::Files::Archive#create to
158
+ # compress files after creating them.
159
+ def create paths, opts={}
160
+ opts = opts.reverse_merge({:force => false})
161
+ raise IMW::Error.new("An archive already exists at #{@path}.") if exist? and not opts[:force]
162
+ paths = [paths] if paths.class == String
163
+ IMW.system IMW::EXTERNAL_PROGRAMS[@archive[:program]], @archive[:create_flags], path_between_archive_and_compression, *paths
164
+ IMW.open(path_between_archive_and_compression).compress!(:bzip2)
165
+ end
166
+
167
+ protected
168
+ def path_between_archive_and_compression
169
+ File.join(dirname,name + '.tar')
170
+ end
171
+
172
+ end # TarBz2
173
+
174
+ # A class to wrap a +rar+ archive.
175
+ #
176
+ # Creation, appending, listing, and extraction flags are stored in
177
+ # <tt>IMW::Files::Rar::DEFAULT_FLAGS</tt> and all are passed to
178
+ # the <tt>:rar</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
179
+ class Rar
180
+
181
+ include IMW::Files::BasicFile
182
+ include IMW::Files::Archive
183
+
184
+ # The default flags used creating, appending to, listing, and
185
+ # extracting a rar archive.
186
+ DEFAULT_FLAGS = {
187
+ :create => "a -r -o+ -inul",
188
+ :append => "a -r -o+ -inul",
189
+ :list => "vb",
190
+ :extract => "x -o+ -inul"
191
+ }
192
+
193
+ def initialize uri, *args
194
+ self.uri= uri
195
+ @archive = {
196
+ :program => :rar,
197
+ :create_flags => DEFAULT_FLAGS[:create],
198
+ :append_flags => DEFAULT_FLAGS[:append],
199
+ :list_flags => DEFAULT_FLAGS[:list],
200
+ :extract_flags => DEFAULT_FLAGS[:extract]
201
+ }
202
+ end
203
+ end # Rar
204
+
205
+ # A class to wrap a +zip+ archive.
206
+ #
207
+ # Creation, appending, listing, and extraction flags are stored in
208
+ # <tt>IMW::Files::Zip::DEFAULT_FLAGS</tt> and all are passed to
209
+ # the <tt>:zip</tt> and <tt>:unzip</tt> programs in
210
+ # <tt>IMW::EXTERAL_PROGRAMS</tt>.
211
+ class Zip
212
+
213
+ include IMW::Files::BasicFile
214
+ include IMW::Files::Archive
215
+
216
+ # The default flags used creating, appending to, listing, and
217
+ # extracting a zip archive.
218
+ DEFAULT_FLAGS = {
219
+ :create => "-q -r",
220
+ :append => "-q -g",
221
+ :list => "-l",
222
+ :extract => "-q -o",
223
+ :unarchiving_program => :unzip
224
+ }
225
+
226
+ def initialize uri, *args
227
+ self.uri= uri
228
+ @archive = {
229
+ :program => :zip,
230
+ :create_flags => DEFAULT_FLAGS[:create],
231
+ :append_flags => DEFAULT_FLAGS[:append],
232
+ :list_flags => DEFAULT_FLAGS[:list],
233
+ :extract_flags => DEFAULT_FLAGS[:extract],
234
+ :unarchiving_program => DEFAULT_FLAGS[:unarchiving_program]
235
+ }
236
+ end
237
+
238
+ # The `unzip' program outputs data in a very annoying format:
239
+ #
240
+ # Archive: data.zip
241
+ # Length Date Time Name
242
+ # -------- ---- ---- ----
243
+ # 18510 07-28-08 15:58 data/4d7Qrgz7.csv
244
+ # 3418 07-28-08 15:41 data/7S.csv
245
+ # 23353 07-28-08 15:41 data/g.csv
246
+ # 711 07-28-08 15:58 data/g.xml
247
+ # 1095 07-28-08 15:41 data/L.xml
248
+ # 2399 07-28-08 15:58 data/mTAu9H3.xml
249
+ # 152 07-28-08 15:58 data/vaHBS2t5R.dat
250
+ # -------- -------
251
+ # 49638 7 files
252
+ #
253
+ # which is parsed by this method.
254
+ def archive_contents_string_to_array string
255
+ rows = string.split("\n")
256
+ # ignore the first 3 lines of the output and also discared the
257
+ # last 2 (5 = 2 + 3)
258
+ file_rows = rows[3,(rows.length - 5)]
259
+ file_rows.map! do |row|
260
+ # discard extra whitespace before after main text
261
+ row.lstrip!.rstrip!
262
+ # split the remaining text at spaces...columns beyond the
263
+ # third are part of the filename and should be joined with a
264
+ # space again in case of a filename with a space
265
+ row.split(' ')[3,row.size].join(' ')
266
+ end
267
+ file_rows
268
+ end
269
+ end # Zip
270
+
271
+ # A class to wrap a <tt>gz</tt> compressed file.
272
+ #
273
+ # The decompressing flags are stored in
274
+ # <tt>IMW::Files::Gz::DEFAULT_FLAGS</tt> and all are passed to the
275
+ # <tt>:gzip</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
276
+ class Gz
277
+
278
+ include IMW::Files::BasicFile
279
+ include IMW::Files::CompressedFile
280
+
281
+ # The default flags used in extracting a <tt>gz</tt> file.
282
+ DEFAULT_FLAGS = {
283
+ :program => :gzip,
284
+ :decompression => '-fd'
285
+ }
286
+
287
+ def initialize uri, *args
288
+ self.uri= uri
289
+ @compression = {
290
+ :program => DEFAULT_FLAGS[:program],
291
+ :decompression_flags => DEFAULT_FLAGS[:decompression]
292
+ }
293
+ end
294
+
295
+ def decompressed_path
296
+ @path.gsub /\.gz$/, ""
297
+ end
298
+ end # Gz
299
+
300
+ # A class to wrap a <tt>bz2</tt> compressed file.
301
+ #
302
+ # The decompressing flags are stored in
303
+ # <tt>IMW::Files::Bz2::DEFAULT_FLAGS</tt> and all are passed to
304
+ # the <tt>:bzip2</tt> program in <tt>IMW::EXTERAL_PROGRAMS</tt>.
305
+ class Bz2
306
+
307
+ include IMW::Files::BasicFile
308
+ include IMW::Files::CompressedFile
309
+
310
+ # The default flags used in extracting a <tt>bz2</tt> file.
311
+ DEFAULT_FLAGS = {
312
+ :program => :bzip2,
313
+ :decompression => '-fd'
314
+ }
315
+
316
+ def initialize uri, *args
317
+ self.uri= uri
318
+ raise IMW::Error.new("#{@extname} is not a valid extension for a bzip2 compressed file.") unless @extname == '.bz2'
319
+ @compression = {
320
+ :program => DEFAULT_FLAGS[:program],
321
+ :decompression_flags => DEFAULT_FLAGS[:decompression]
322
+ }
323
+ end
324
+
325
+ # Returns the path of the file after decompression.
326
+ def decompressed_path
327
+ @path.gsub /\.bz2$/, ""
328
+ end
329
+ end # Bz2
330
+
331
+
332
+ # make sure that tar.bz2 precedes bz2 and so on...
333
+ FILE_REGEXPS << [/\.tar\.bz2$/, IMW::Files::TarBz2]
334
+ FILE_REGEXPS << [/\.tbz2$/, IMW::Files::TarBz2]
335
+
336
+ FILE_REGEXPS << [/\.tar\.gz$/, IMW::Files::TarGz]
337
+ FILE_REGEXPS << [/\.tgz$/, IMW::Files::TarGz]
338
+
339
+ FILE_REGEXPS << [/\.tar$/, IMW::Files::Tar]
340
+ FILE_REGEXPS << [/\.bz2$/, IMW::Files::Bz2]
341
+ FILE_REGEXPS << [/\.gz$/, IMW::Files::Gz]
342
+ FILE_REGEXPS << [/\.rar$/, IMW::Files::Rar]
343
+ FILE_REGEXPS << [/\.zip$/, IMW::Files::Zip]
344
+
345
+ end # Files
346
+ end # IMW
347
+
348
+