imw 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. data/.gitignore +15 -0
  2. data/CHANGELOG +0 -0
  3. data/LICENSE +674 -0
  4. data/README.rdoc +101 -0
  5. data/Rakefile +20 -0
  6. data/VERSION +1 -0
  7. data/etc/imwrc.rb +76 -0
  8. data/lib/imw.rb +42 -0
  9. data/lib/imw/boot.rb +58 -0
  10. data/lib/imw/dataset.rb +233 -0
  11. data/lib/imw/dataset/datamapper.rb +66 -0
  12. data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
  13. data/lib/imw/dataset/loaddump.rb +50 -0
  14. data/lib/imw/dataset/old/file_collection.rb +88 -0
  15. data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
  16. data/lib/imw/dataset/scaffold.rb +132 -0
  17. data/lib/imw/dataset/scraped_uri.rb +305 -0
  18. data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
  19. data/lib/imw/dataset/scrub/scrub.rb +147 -0
  20. data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
  21. data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
  22. data/lib/imw/dataset/scrub/slug.rb +101 -0
  23. data/lib/imw/dataset/stats.rb +73 -0
  24. data/lib/imw/dataset/stats/counter.rb +23 -0
  25. data/lib/imw/dataset/task.rb +38 -0
  26. data/lib/imw/dataset/workflow.rb +81 -0
  27. data/lib/imw/files.rb +110 -0
  28. data/lib/imw/files/archive.rb +113 -0
  29. data/lib/imw/files/basicfile.rb +122 -0
  30. data/lib/imw/files/binary.rb +28 -0
  31. data/lib/imw/files/compressed_file.rb +93 -0
  32. data/lib/imw/files/compressed_files_and_archives.rb +348 -0
  33. data/lib/imw/files/compressible.rb +103 -0
  34. data/lib/imw/files/csv.rb +112 -0
  35. data/lib/imw/files/json.rb +41 -0
  36. data/lib/imw/files/sgml.rb +65 -0
  37. data/lib/imw/files/text.rb +68 -0
  38. data/lib/imw/files/yaml.rb +46 -0
  39. data/lib/imw/packagers.rb +8 -0
  40. data/lib/imw/packagers/archiver.rb +108 -0
  41. data/lib/imw/packagers/s3_mover.rb +28 -0
  42. data/lib/imw/parsers.rb +7 -0
  43. data/lib/imw/parsers/html_parser.rb +382 -0
  44. data/lib/imw/parsers/html_parser/matchers.rb +306 -0
  45. data/lib/imw/parsers/line_parser.rb +87 -0
  46. data/lib/imw/parsers/regexp_parser.rb +72 -0
  47. data/lib/imw/utils.rb +24 -0
  48. data/lib/imw/utils/components.rb +61 -0
  49. data/lib/imw/utils/config.rb +46 -0
  50. data/lib/imw/utils/error.rb +54 -0
  51. data/lib/imw/utils/extensions/array.rb +125 -0
  52. data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
  53. data/lib/imw/utils/extensions/core.rb +43 -0
  54. data/lib/imw/utils/extensions/dir.rb +24 -0
  55. data/lib/imw/utils/extensions/file_core.rb +64 -0
  56. data/lib/imw/utils/extensions/hash.rb +218 -0
  57. data/lib/imw/utils/extensions/hpricot.rb +48 -0
  58. data/lib/imw/utils/extensions/string.rb +49 -0
  59. data/lib/imw/utils/extensions/struct.rb +42 -0
  60. data/lib/imw/utils/extensions/symbol.rb +28 -0
  61. data/lib/imw/utils/extensions/typed_struct.rb +22 -0
  62. data/lib/imw/utils/extensions/uri.rb +59 -0
  63. data/lib/imw/utils/log.rb +67 -0
  64. data/lib/imw/utils/misc.rb +63 -0
  65. data/lib/imw/utils/paths.rb +115 -0
  66. data/lib/imw/utils/uri.rb +59 -0
  67. data/lib/imw/utils/uuid.rb +33 -0
  68. data/lib/imw/utils/validate.rb +38 -0
  69. data/lib/imw/utils/version.rb +12 -0
  70. data/lib/imw/utils/view.rb +113 -0
  71. data/lib/imw/utils/view/dump_csv.rb +112 -0
  72. data/lib/imw/utils/view/dump_csv_older.rb +117 -0
  73. data/spec/data/sample.csv +131 -0
  74. data/spec/data/sample.tsv +131 -0
  75. data/spec/data/sample.txt +131 -0
  76. data/spec/data/sample.xml +653 -0
  77. data/spec/data/sample.yaml +652 -0
  78. data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
  79. data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
  80. data/spec/imw/files/archive_spec.rb +118 -0
  81. data/spec/imw/files/basicfile_spec.rb +121 -0
  82. data/spec/imw/files/bz2_spec.rb +32 -0
  83. data/spec/imw/files/compressed_file_spec.rb +96 -0
  84. data/spec/imw/files/compressible_spec.rb +100 -0
  85. data/spec/imw/files/file_spec.rb +144 -0
  86. data/spec/imw/files/gz_spec.rb +32 -0
  87. data/spec/imw/files/rar_spec.rb +33 -0
  88. data/spec/imw/files/tar_spec.rb +31 -0
  89. data/spec/imw/files/text_spec.rb +23 -0
  90. data/spec/imw/files/zip_spec.rb +31 -0
  91. data/spec/imw/files_spec.rb +38 -0
  92. data/spec/imw/packagers/archiver_spec.rb +125 -0
  93. data/spec/imw/packagers/s3_mover_spec.rb +7 -0
  94. data/spec/imw/parsers/line_parser_spec.rb +96 -0
  95. data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
  96. data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
  97. data/spec/imw/utils/extensions/find_spec.rb +113 -0
  98. data/spec/imw/utils/paths_spec.rb +38 -0
  99. data/spec/imw/workflow/rip/local_spec.rb +89 -0
  100. data/spec/imw/workflow/rip_spec.rb +27 -0
  101. data/spec/rcov.opts +1 -0
  102. data/spec/spec.opts +4 -0
  103. data/spec/spec_helper.rb +32 -0
  104. data/spec/support/archive_contents_matcher.rb +94 -0
  105. data/spec/support/custom_matchers.rb +21 -0
  106. data/spec/support/directory_contents_matcher.rb +61 -0
  107. data/spec/support/extensions.rb +18 -0
  108. data/spec/support/file_contents_matcher.rb +50 -0
  109. data/spec/support/random.rb +210 -0
  110. data/spec/support/without_regard_to_order_matcher.rb +58 -0
  111. metadata +196 -0
@@ -0,0 +1,27 @@
1
+ #
2
+ # h2. spec/imw/workflow/rip_spec.rb -- spec for rip.rb
3
+ #
4
+ # == About
5
+ #
6
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
7
+ # Copyright:: Copyright (c) 2008 infochimps.org
8
+ # License:: GPL 3.0
9
+ # Website:: http://infinitemonkeywrench.org/
10
+ #
11
+ require File.join(File.dirname(__FILE__),'../../spec_helper')
12
+
13
+ # require 'imw/workflow/rip'
14
+ #
15
+ # describe Source do
16
+ #
17
+ # before(:all) do
18
+ # @source = IMW::Source.new(:fake_source)
19
+ # @source.stub("returns path to ripd directory",:path_to => IMW::DIRECTORIES[:dump] + "/source_rip_spec")
20
+ # end
21
+ #
22
+ # it "should raise an error when asked to rip in an unknown way" do
23
+ # @source.rip_from :silly_way
24
+ # end
25
+ # end
26
+
27
+ # puts "#{File.basename(__FILE__)}: Bending over, you hear a thunderous RRRRRRRIIIIIP and then scuttle off to check your pants..." # at bottom
@@ -0,0 +1 @@
1
+ --exclude "bin/*,etc/*,meta/*,gems/*,old/*,spec/*"
@@ -0,0 +1,4 @@
1
+ --colour
2
+ --format progress
3
+ --loadby mtime
4
+ --reverse
@@ -0,0 +1,32 @@
1
+ IMW_ROOT_DIR = File.join(File.expand_path(File.dirname(__FILE__)), '..') unless defined? IMW_ROOT_DIR
2
+ IMW_SPEC_DIR = File.join(IMW_ROOT_DIR, 'spec') unless defined? IMW_SPEC_DIR
3
+ IMW_LIB_DIR = File.join(IMW_ROOT_DIR, 'lib') unless defined? IMW_LIB_DIR
4
+ $: << IMW_LIB_DIR
5
+
6
+ require 'rubygems'
7
+ require 'spec'
8
+ require 'fileutils'
9
+ require 'imw'
10
+
11
+ Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
12
+
13
+ module IMWTest
14
+ TMP_DIR = "/tmp/imwtest"
15
+ end
16
+
17
+ Spec::Runner.configure do |config|
18
+
19
+ config.include CustomMatchers
20
+
21
+ config.before do
22
+ FileUtils.mkdir_p IMWTest::TMP_DIR
23
+ FileUtils.cd IMWTest::TMP_DIR
24
+ end
25
+
26
+ config.after do
27
+ FileUtils.rm_rf IMWTest::TMP_DIR
28
+ end
29
+ end
30
+
31
+
32
+
@@ -0,0 +1,94 @@
1
+ #
2
+ # h2. spec/matchers/archive_contents_matcher.rb -- matches contents of archive to disk
3
+ #
4
+ # == About
5
+ #
6
+ # An RSpec matcher which tests that an archive of files has the same
7
+ # contents as various paths on disk.
8
+ #
9
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
+ # Copyright:: Copyright (c) 2008 infochimps.org
11
+ # License:: GPL 3.0
12
+ # Website:: http://infinitemonkeywrench.org/
13
+ #
14
+
15
+ require 'find'
16
+
17
+ module Spec
18
+ module Matchers
19
+ module IMW
20
+
21
+ # Match the contents of the archive against files or directories
22
+ # in +paths+.
23
+ #
24
+ # Options include:
25
+ #
26
+ # <tt>:relative_to</tt>:: a leading path which will be stripped
27
+ # from all +paths+ before comparison with the contents of the
28
+ # directory.
29
+ class ArchiveContentsMatchPaths
30
+
31
+ private
32
+ def initialize paths,opts = {}
33
+ opts.reverse_merge!({:relative_to => nil})
34
+ paths = [paths] if paths.class == String
35
+ @paths = paths
36
+ @relative_to = opts[:relative_to]
37
+ find_paths_contents
38
+ end
39
+
40
+ def find_paths_contents
41
+ # find all the files
42
+ contents = []
43
+ @paths.each do |path|
44
+ path = File.expand_path path
45
+ if File.file? path then
46
+ contents << path
47
+ elsif File.directory? path then
48
+ contents += Find.files_in_directory(path)
49
+ end
50
+ end
51
+
52
+ # strip leading path
53
+ contents.map! do |path|
54
+ # the +1 is because we want a relative path
55
+ path = path[@relative_to.length + 1,path.size]
56
+ end
57
+
58
+ @paths_contents = contents.to_set
59
+ end
60
+
61
+ def pretty_print set
62
+ set.to_a.join("\n\t")
63
+ end
64
+
65
+ public
66
+ def matches? archive
67
+ @archive = archive
68
+ @archive_contents = @archive.contents.to_set
69
+ @archive_contents == @paths_contents
70
+ end
71
+
72
+ def failure_message
73
+ missing_from_archive = "missing from archive:\n\t#{pretty_print(@paths_contents - @archive_contents)}\n"
74
+ missing_from_paths = "missing from paths:\n\t#{pretty_print(@archive_contents - @paths_contents)}\n"
75
+ common = "common to both:\n\t#{pretty_print(@archive_contents & @paths_contents)}\n"
76
+ "expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to be identical.\n#{missing_from_archive}\n#{missing_from_paths}\n#{common}"
77
+ end
78
+
79
+ def negative_failure_message
80
+ "expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to differ."
81
+ end
82
+
83
+ end
84
+
85
+ # Invokes the matcher <tt>Spec::Matchers::IMW::ArchiveContentsMatchPaths
86
+ def contain_paths_like paths, opts = {}
87
+ ArchiveContentsMatchPaths.new(paths,opts)
88
+ end
89
+
90
+ end
91
+ end
92
+ end
93
+
94
+ # puts "#{File.basename(__FILE__)}: An archive is something that is bigger on the inside than it is on the outside." # at bottom
@@ -0,0 +1,21 @@
1
+ module CustomMatchers
2
+ def contain *paths
3
+ paths = paths.flatten
4
+ simple_matcher("contain #{paths.inspect}") do |given, matcher|
5
+ given_contents = Dir[given + "/**/*"].map do |abs_path|
6
+ abs_path[(given.length + 1)..-1]
7
+ end
8
+ matcher.failure_message = "expected #{given} to contain #{paths.inspect}, instead it contained #{given_contents.inspect}"
9
+ matcher.negative_failure_message = "expected #{given} not to contain #{paths.inspect}"
10
+ paths.all? { |path| given_contents.include?(path) }
11
+ end
12
+ end
13
+
14
+ def exist
15
+ simple_matcher("exist") do |given, matcher|
16
+ matcher.failure_message = "expected #{given} to exist on disk"
17
+ matcher.failure_message = "expected #{given} not to exist on disk"
18
+ File.exist?(given)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,61 @@
1
+ #
2
+ # h2. spec/matchers/directory_contents_matcher.rb -- matches files between directories
3
+ #
4
+ # == About
5
+ #
6
+ # An RSpec matcher which tests that two directories share the same set
7
+ # of files.
8
+ #
9
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
+ # Copyright:: Copyright (c) 2008 infochimps.org
11
+ # License:: GPL 3.0
12
+ # Website:: http://infinitemonkeywrench.org/
13
+ #
14
+
15
+ require 'set'
16
+ require 'find'
17
+
18
+ module Spec
19
+ module Matchers
20
+ module IMW
21
+
22
+ class DirectoryContentsMatcher
23
+ private
24
+ def initialize dir
25
+ @dir = File.expand_path(dir)
26
+ @dir_files = Find.files_relative_to_directory(@dir).to_set
27
+ end
28
+
29
+ # Pretty print a set of files.
30
+ def format_files_for_printing files
31
+ files.to_a.join("\n\t")
32
+ end
33
+
34
+ public
35
+ def matches? target
36
+ @target = target
37
+ @target_files = Find.files_relative_to_directory(@target).to_set
38
+ @target_files == @dir_files
39
+ end
40
+
41
+ def failure_message
42
+ files_missing_from_dir = format_files_for_printing(@target_files - @dir_files)
43
+ files_missing_from_target = format_files_for_printing(@dir_files - @target_files)
44
+ files_in_common = format_files_for_printing(@dir_files & @target_files)
45
+ "expected files in #{@dir} and #{@target} to be identical.\n\nfiles missing from #{@dir}:\n\t#{files_missing_from_dir}\n\nfiles missing from #{@target}:\n\t#{files_missing_from_target}\n\nfiles in common:\n\t#{files_in_common}"
46
+ end
47
+
48
+ def negative_failure_message
49
+ "expected files in #{@dir} and #{@target} to be different"
50
+ end
51
+ end
52
+
53
+ # Checks that files in one directory match those in another.
54
+ def contain_files_matching_directory dir
55
+ DirectoryContentsMatcher.new(dir)
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ # puts "#{File.basename(__FILE__)}: From far away, the two filing cabinets appear to be identical. Upon closer inspection, one of them is actually a Maine lobster. Delicious!" # at bottom
@@ -0,0 +1,18 @@
1
+ class Array
2
+ def random
3
+ self[rand(length)]
4
+ end
5
+ end
6
+
7
+ class Hash
8
+ # Stolen from ActiveSupport::CoreExtensions::Hash::ReverseMerge.
9
+ def reverse_merge(other_hash)
10
+ other_hash.merge(self)
11
+ end
12
+
13
+ # Stolen from ActiveSupport::CoreExtensions::Hash::ReverseMerge.
14
+ def reverse_merge!(other_hash)
15
+ replace(reverse_merge(other_hash))
16
+ end
17
+ end
18
+
@@ -0,0 +1,50 @@
1
+ #
2
+ # h2. spec/imw/matchers/file_contents_matcher.rb -- matches contents of two files
3
+ #
4
+ # == About
5
+ #
6
+ # An RSpec matcher which tests that two files have the same contents
7
+ # on disk.
8
+ #
9
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
+ # Copyright:: Copyright (c) 2008 infochimps.org
11
+ # License:: GPL 3.0
12
+ # Website:: http://infinitemonkeywrench.org/
13
+ #
14
+
15
+ require 'ftools'
16
+
17
+ module Spec
18
+ module Matchers
19
+ module IMW
20
+
21
+ class FileContentsMatcher
22
+ def initialize orig
23
+ @orig = File.expand_path orig
24
+ end
25
+
26
+ def matches? copy
27
+ @copy = File.expand_path copy
28
+ File.compare(@orig,@copy)
29
+ end
30
+
31
+ def failure_message
32
+ "files #{@orig} and #{@copy} are different"
33
+ end
34
+
35
+ def negative_failure_message
36
+ "expected files #{@orig} and #{@copy} to differ"
37
+ end
38
+ end
39
+
40
+ # Matches the contents of one file against another using
41
+ # File.compare.
42
+ def have_contents_matching_those_of path
43
+ FileContentsMatcher.new(path)
44
+ end
45
+
46
+ end
47
+ end
48
+ end
49
+
50
+ # puts "#{File.basename(__FILE__)}: From far away, the folders appear the same; from up close, they are different." # at bottom
@@ -0,0 +1,210 @@
1
+ require 'fileutils'
2
+
3
+ module IMWTest
4
+ module Random
5
+
6
+ STRING_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ']
7
+ TEXT_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ',"\n"]
8
+ FILENAME_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["-","_"]
9
+ FILENAME_MAX_LENGTH = 9
10
+ TEXT_MAX_LENGTH = 1024
11
+ EXTENSIONS = {
12
+ /\.csv$/ => :csv_file,
13
+ /\.xml$/ => :xml_file,
14
+ /\.html$/ => :html_file,
15
+ /\.tar$/ => :tar_file,
16
+ /\.tar\.gz$/ => :targz_file,
17
+ /\.tar\.bz2$/ => :tarbz2_file,
18
+ /\.rar$/ => :rar_file,
19
+ /\.zip$/ => :zip_file
20
+ }
21
+ EXTERNAL_PROGRAMS = if defined?(IMW) && defined?(IMW::EXTERNAL_PROGRAMS)
22
+ IMW::EXTERNAL_PROGRAMS
23
+ else
24
+ {
25
+ :tar => "tar",
26
+ :rar => "rar",
27
+ :zip => "zip",
28
+ :unzip => "unzip",
29
+ :gzip => "gzip",
30
+ :bzip2 => "bzip2",
31
+ :wget => "wget"
32
+ }
33
+ end
34
+
35
+ private
36
+ # Return a random filename. Optional +length+ to set the maximum
37
+ # length of the filename returned.
38
+ def self.basename options = {}
39
+ length = (options[:length] or FILENAME_MAX_LENGTH)
40
+ filename = (1..length).map { |i| FILENAME_CHARS.random }.join
41
+
42
+ # filenames beginning with hyphens suck
43
+ while (filename[0,1] == '-') do
44
+ filename[0] = FILENAME_CHARS.random
45
+ end
46
+ filename
47
+ end
48
+
49
+ # Return a random string of text up. Control the length with
50
+ # optional +length+ and also the presence of +newlines+.
51
+ def self.text options = {}
52
+ length = (options[:length] or TEXT_MAX_LENGTH)
53
+ char_pool = options[:newlines] ? TEXT_CHARS : STRING_CHARS
54
+ (1..length).map { |i| char_pool.random }.join
55
+ end
56
+
57
+ public
58
+ # Create a random file by matching the extension of the given
59
+ # +filename+ or a text file if no match is found.
60
+ def self.file filename
61
+ match = EXTENSIONS.find { |regex,func| regex.match filename }
62
+ match ? self.send(match.last,filename) : self.text_file(filename)
63
+ end
64
+
65
+ # Create a random text file at +filename+ containing a maximum of
66
+ # +length+ characters.
67
+ def self.text_file filename, options = {}
68
+ File.open(filename,'w') { |f| f.write text(:newlines => true) }
69
+ end
70
+
71
+ # Create a comma-separated value file containing random text at
72
+ # +filename+ with the maximum +num_rows+, the given +num_columns+,
73
+ # and the maximum +entry_length+.
74
+ def self.csv_file(filename,num_rows = 500, num_columns = 9, entry_length = 9)
75
+ f = File.open(filename,'w')
76
+ rand(num_rows).times do # rows
77
+ num_columns.times do # columns
78
+ f.write(text(:length => entry_length)) # entry
79
+ f.write ','
80
+ end
81
+ f.write(text(:length => entry_length)) # last entry
82
+ f.write("\n")
83
+ end
84
+ f.close
85
+ end
86
+
87
+ # Create an XML file at +filename+ of the maximum +length+.
88
+ #
89
+ # At the present moment, this file contains random text in a very
90
+ # boring single-element XML tree. Randomizing the tree has not
91
+ # been implemented.
92
+ def self.xml_file filename, options = {}
93
+ options = options.reverse_merge({:max_depth => 5, :starting_depth => 1, :depth => nil, :pretty_print => true})
94
+ File.open(filename,'w') do |file|
95
+ file.write "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
96
+ file.write "<xml>" + text + "</xml>"
97
+ file.close
98
+ end
99
+ end
100
+
101
+
102
+ # Create an HTML file at +filename+ of the maximum +length+.
103
+ #
104
+ # At the present moment, this file contains random text in a very
105
+ # boring bare-bones HTML with a single element body. Randomizing
106
+ # the tree has not been implemented.
107
+ def self.html_file(filename, title_length = 100, body_length = 5000)
108
+ f = File.open(filename,'w')
109
+ f.write "<html><head><title>" + string(title_length) + "</title></head><body>" + string(body_length) + "</body></html>"
110
+ f.close
111
+ end
112
+
113
+ # Create a tar archive at the given +filename+ containing random
114
+ # files.
115
+ def self.tar_file filename
116
+ tmpd = File.dirname(filename) + '/dir'
117
+ directory_with_files(tmpd)
118
+ FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:tar]} -cf file.tar *") }
119
+ FileUtils.cp(tmpd + "/file.tar",filename)
120
+ FileUtils.rm_rf(tmpd)
121
+ end
122
+
123
+ # Create a tar.gz archive at the given +filename+ containing
124
+ # random files.
125
+ def self.targz_file filename
126
+ tar = File.dirname(filename) + "/file.tar"
127
+ targz = tar + ".gz"
128
+ tar_file tar
129
+ system("#{EXTERNAL_PROGRAMS[:gzip]} #{tar}")
130
+ FileUtils.cp(targz,filename)
131
+ FileUtils.rm(targz)
132
+ end
133
+
134
+ # Create a tar.bz2 archive at the given +filename+ containing
135
+ # random files.
136
+ def self.tarbz2_file filename
137
+ tar = File.dirname(filename) + "/file.tar"
138
+ tarbz2 = tar + ".bz2"
139
+ tar_file tar
140
+ system("#{EXTERNAL_PROGRAMS[:bzip2]} #{tar}")
141
+ FileUtils.cp(tarbz2,filename)
142
+ FileUtils.rm(tarbz2)
143
+ end
144
+
145
+ # Create a compressed rar archive at the given +filename+
146
+ # containing random files.
147
+ def self.rar_file filename
148
+ tmpd = File.dirname(filename) + '/dir'
149
+ directory_with_files(tmpd)
150
+ FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:rar]} a -r -o+ file.rar *") }
151
+ FileUtils.cp(tmpd + "/file.rar",filename)
152
+ FileUtils.rm_rf(tmpd)
153
+ end
154
+
155
+ # Create a compressed zip archive at the given +filename+
156
+ # containing random files.
157
+ def self.zip_file filename
158
+ tmpd = File.dirname(filename) + '/dir'
159
+ directory_with_files(tmpd)
160
+ FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:zip]} -r file.zip *") }
161
+ FileUtils.cp(tmpd + "/file.zip",filename)
162
+ FileUtils.rm_rf(tmpd)
163
+ end
164
+
165
+ # Creates +directory+ and fills it with random files containing
166
+ # random data.
167
+ #
168
+ # Options (with their default values in parentheses) include:
169
+ #
170
+ # <tt>:extensions</tt> (<tt>[txt,csv,dat,xml]</tt>):: extensions to use. If an extension is known (see <tt>IMWTest::Random::EXTENSIONS</tt>) then appropriately formatted random data will be used If an extension is not known, it will be treated as text. The extension +dir+ will create a directory which will itself be filled with random files in the same way as its parent.
171
+ # <tt>:max_depth</tt> (3):: maximum depth to nest directories
172
+ # <tt>:starting_depth</tt> (1):: the default depth the parent directory is assumed to have
173
+ # <tt>:num_files</tt> (10):: the maximum number of files per directory
174
+ # <tt>:force</tt> (false):: force overwriting of existing directories
175
+ def self.directory_with_files(directory,options = {})
176
+ directory = File.expand_path(directory)
177
+ options = options.reverse_merge({:extensions => ['txt','csv','dat'],:max_depth => 3,:force => false,:starting_depth => 1, :num_files => 3})
178
+ depth = options[:starting_depth]
179
+
180
+ if File.exist?(directory) then
181
+ if options[:force] then
182
+ FileUtils.rm_rf(directory)
183
+ else
184
+ raise "#{directory} already exists"
185
+ end
186
+ end
187
+ FileUtils.mkdir_p(directory)
188
+
189
+ (rand(options[:num_files]) + 2).times do
190
+ ext = options[:extensions].random
191
+ name = self.basename
192
+ if ext == 'dir' then
193
+ if depth <= options[:max_depth] then
194
+ newd = directory + '/' + name
195
+ FileUtils.mkdir(newd)
196
+ directory_with_files(newd,options.merge({:starting_depth => (depth + 1)}))
197
+ else
198
+ next
199
+ end
200
+ else
201
+ file(directory + '/' + name + '.' + ext)
202
+ end
203
+ end
204
+ end
205
+
206
+ end
207
+ end
208
+
209
+
210
+