imw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. data/.gitignore +15 -0
  2. data/CHANGELOG +0 -0
  3. data/LICENSE +674 -0
  4. data/README.rdoc +101 -0
  5. data/Rakefile +20 -0
  6. data/VERSION +1 -0
  7. data/etc/imwrc.rb +76 -0
  8. data/lib/imw.rb +42 -0
  9. data/lib/imw/boot.rb +58 -0
  10. data/lib/imw/dataset.rb +233 -0
  11. data/lib/imw/dataset/datamapper.rb +66 -0
  12. data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
  13. data/lib/imw/dataset/loaddump.rb +50 -0
  14. data/lib/imw/dataset/old/file_collection.rb +88 -0
  15. data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
  16. data/lib/imw/dataset/scaffold.rb +132 -0
  17. data/lib/imw/dataset/scraped_uri.rb +305 -0
  18. data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
  19. data/lib/imw/dataset/scrub/scrub.rb +147 -0
  20. data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
  21. data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
  22. data/lib/imw/dataset/scrub/slug.rb +101 -0
  23. data/lib/imw/dataset/stats.rb +73 -0
  24. data/lib/imw/dataset/stats/counter.rb +23 -0
  25. data/lib/imw/dataset/task.rb +38 -0
  26. data/lib/imw/dataset/workflow.rb +81 -0
  27. data/lib/imw/files.rb +110 -0
  28. data/lib/imw/files/archive.rb +113 -0
  29. data/lib/imw/files/basicfile.rb +122 -0
  30. data/lib/imw/files/binary.rb +28 -0
  31. data/lib/imw/files/compressed_file.rb +93 -0
  32. data/lib/imw/files/compressed_files_and_archives.rb +348 -0
  33. data/lib/imw/files/compressible.rb +103 -0
  34. data/lib/imw/files/csv.rb +112 -0
  35. data/lib/imw/files/json.rb +41 -0
  36. data/lib/imw/files/sgml.rb +65 -0
  37. data/lib/imw/files/text.rb +68 -0
  38. data/lib/imw/files/yaml.rb +46 -0
  39. data/lib/imw/packagers.rb +8 -0
  40. data/lib/imw/packagers/archiver.rb +108 -0
  41. data/lib/imw/packagers/s3_mover.rb +28 -0
  42. data/lib/imw/parsers.rb +7 -0
  43. data/lib/imw/parsers/html_parser.rb +382 -0
  44. data/lib/imw/parsers/html_parser/matchers.rb +306 -0
  45. data/lib/imw/parsers/line_parser.rb +87 -0
  46. data/lib/imw/parsers/regexp_parser.rb +72 -0
  47. data/lib/imw/utils.rb +24 -0
  48. data/lib/imw/utils/components.rb +61 -0
  49. data/lib/imw/utils/config.rb +46 -0
  50. data/lib/imw/utils/error.rb +54 -0
  51. data/lib/imw/utils/extensions/array.rb +125 -0
  52. data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
  53. data/lib/imw/utils/extensions/core.rb +43 -0
  54. data/lib/imw/utils/extensions/dir.rb +24 -0
  55. data/lib/imw/utils/extensions/file_core.rb +64 -0
  56. data/lib/imw/utils/extensions/hash.rb +218 -0
  57. data/lib/imw/utils/extensions/hpricot.rb +48 -0
  58. data/lib/imw/utils/extensions/string.rb +49 -0
  59. data/lib/imw/utils/extensions/struct.rb +42 -0
  60. data/lib/imw/utils/extensions/symbol.rb +28 -0
  61. data/lib/imw/utils/extensions/typed_struct.rb +22 -0
  62. data/lib/imw/utils/extensions/uri.rb +59 -0
  63. data/lib/imw/utils/log.rb +67 -0
  64. data/lib/imw/utils/misc.rb +63 -0
  65. data/lib/imw/utils/paths.rb +115 -0
  66. data/lib/imw/utils/uri.rb +59 -0
  67. data/lib/imw/utils/uuid.rb +33 -0
  68. data/lib/imw/utils/validate.rb +38 -0
  69. data/lib/imw/utils/version.rb +12 -0
  70. data/lib/imw/utils/view.rb +113 -0
  71. data/lib/imw/utils/view/dump_csv.rb +112 -0
  72. data/lib/imw/utils/view/dump_csv_older.rb +117 -0
  73. data/spec/data/sample.csv +131 -0
  74. data/spec/data/sample.tsv +131 -0
  75. data/spec/data/sample.txt +131 -0
  76. data/spec/data/sample.xml +653 -0
  77. data/spec/data/sample.yaml +652 -0
  78. data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
  79. data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
  80. data/spec/imw/files/archive_spec.rb +118 -0
  81. data/spec/imw/files/basicfile_spec.rb +121 -0
  82. data/spec/imw/files/bz2_spec.rb +32 -0
  83. data/spec/imw/files/compressed_file_spec.rb +96 -0
  84. data/spec/imw/files/compressible_spec.rb +100 -0
  85. data/spec/imw/files/file_spec.rb +144 -0
  86. data/spec/imw/files/gz_spec.rb +32 -0
  87. data/spec/imw/files/rar_spec.rb +33 -0
  88. data/spec/imw/files/tar_spec.rb +31 -0
  89. data/spec/imw/files/text_spec.rb +23 -0
  90. data/spec/imw/files/zip_spec.rb +31 -0
  91. data/spec/imw/files_spec.rb +38 -0
  92. data/spec/imw/packagers/archiver_spec.rb +125 -0
  93. data/spec/imw/packagers/s3_mover_spec.rb +7 -0
  94. data/spec/imw/parsers/line_parser_spec.rb +96 -0
  95. data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
  96. data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
  97. data/spec/imw/utils/extensions/find_spec.rb +113 -0
  98. data/spec/imw/utils/paths_spec.rb +38 -0
  99. data/spec/imw/workflow/rip/local_spec.rb +89 -0
  100. data/spec/imw/workflow/rip_spec.rb +27 -0
  101. data/spec/rcov.opts +1 -0
  102. data/spec/spec.opts +4 -0
  103. data/spec/spec_helper.rb +32 -0
  104. data/spec/support/archive_contents_matcher.rb +94 -0
  105. data/spec/support/custom_matchers.rb +21 -0
  106. data/spec/support/directory_contents_matcher.rb +61 -0
  107. data/spec/support/extensions.rb +18 -0
  108. data/spec/support/file_contents_matcher.rb +50 -0
  109. data/spec/support/random.rb +210 -0
  110. data/spec/support/without_regard_to_order_matcher.rb +58 -0
  111. metadata +196 -0
@@ -0,0 +1,27 @@
1
+ #
2
+ # h2. spec/imw/workflow/rip_spec.rb -- spec for rip.rb
3
+ #
4
+ # == About
5
+ #
6
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
7
+ # Copyright:: Copyright (c) 2008 infochimps.org
8
+ # License:: GPL 3.0
9
+ # Website:: http://infinitemonkeywrench.org/
10
+ #
11
+ require File.join(File.dirname(__FILE__),'../../spec_helper')
12
+
13
+ # require 'imw/workflow/rip'
14
+ #
15
+ # describe Source do
16
+ #
17
+ # before(:all) do
18
+ # @source = IMW::Source.new(:fake_source)
19
+ # @source.stub("returns path to ripd directory",:path_to => IMW::DIRECTORIES[:dump] + "/source_rip_spec")
20
+ # end
21
+ #
22
+ # it "should raise an error when asked to rip in an unknown way" do
23
+ # @source.rip_from :silly_way
24
+ # end
25
+ # end
26
+
27
+ # puts "#{File.basename(__FILE__)}: Bending over, you hear a thunderous RRRRRRRIIIIIP and then scuttle off to check your pants..." # at bottom
@@ -0,0 +1 @@
1
+ --exclude "bin/*,etc/*,meta/*,gems/*,old/*,spec/*"
@@ -0,0 +1,4 @@
1
+ --colour
2
+ --format progress
3
+ --loadby mtime
4
+ --reverse
@@ -0,0 +1,32 @@
1
+ IMW_ROOT_DIR = File.join(File.expand_path(File.dirname(__FILE__)), '..') unless defined? IMW_ROOT_DIR
2
+ IMW_SPEC_DIR = File.join(IMW_ROOT_DIR, 'spec') unless defined? IMW_SPEC_DIR
3
+ IMW_LIB_DIR = File.join(IMW_ROOT_DIR, 'lib') unless defined? IMW_LIB_DIR
4
+ $: << IMW_LIB_DIR
5
+
6
+ require 'rubygems'
7
+ require 'spec'
8
+ require 'fileutils'
9
+ require 'imw'
10
+
11
+ Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
12
+
13
+ module IMWTest
14
+ TMP_DIR = "/tmp/imwtest"
15
+ end
16
+
17
+ Spec::Runner.configure do |config|
18
+
19
+ config.include CustomMatchers
20
+
21
+ config.before do
22
+ FileUtils.mkdir_p IMWTest::TMP_DIR
23
+ FileUtils.cd IMWTest::TMP_DIR
24
+ end
25
+
26
+ config.after do
27
+ FileUtils.rm_rf IMWTest::TMP_DIR
28
+ end
29
+ end
30
+
31
+
32
+
@@ -0,0 +1,94 @@
1
+ #
2
+ # h2. spec/matchers/archive_contents_matcher.rb -- matches contents of archive to disk
3
+ #
4
+ # == About
5
+ #
6
+ # An RSpec matcher which tests that an archive of files has the same
7
+ # contents as various paths on disk.
8
+ #
9
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
+ # Copyright:: Copyright (c) 2008 infochimps.org
11
+ # License:: GPL 3.0
12
+ # Website:: http://infinitemonkeywrench.org/
13
+ #
14
+
15
+ require 'find'
16
+
17
+ module Spec
18
+ module Matchers
19
+ module IMW
20
+
21
+ # Match the contents of the archive against files or directories
22
+ # in +paths+.
23
+ #
24
+ # Options include:
25
+ #
26
+ # <tt>:relative_to</tt>:: a leading path which will be stripped
27
+ # from all +paths+ before comparison with the contents of the
28
+ # directory.
29
+ class ArchiveContentsMatchPaths
30
+
31
+ private
32
+ def initialize paths,opts = {}
33
+ opts.reverse_merge!({:relative_to => nil})
34
+ paths = [paths] if paths.class == String
35
+ @paths = paths
36
+ @relative_to = opts[:relative_to]
37
+ find_paths_contents
38
+ end
39
+
40
+ def find_paths_contents
41
+ # find all the files
42
+ contents = []
43
+ @paths.each do |path|
44
+ path = File.expand_path path
45
+ if File.file? path then
46
+ contents << path
47
+ elsif File.directory? path then
48
+ contents += Find.files_in_directory(path)
49
+ end
50
+ end
51
+
52
+ # strip leading path
53
+ contents.map! do |path|
54
+ # the +1 is because we want a relative path
55
+ path = path[@relative_to.length + 1,path.size]
56
+ end
57
+
58
+ @paths_contents = contents.to_set
59
+ end
60
+
61
+ def pretty_print set
62
+ set.to_a.join("\n\t")
63
+ end
64
+
65
+ public
66
+ def matches? archive
67
+ @archive = archive
68
+ @archive_contents = @archive.contents.to_set
69
+ @archive_contents == @paths_contents
70
+ end
71
+
72
+ def failure_message
73
+ missing_from_archive = "missing from archive:\n\t#{pretty_print(@paths_contents - @archive_contents)}\n"
74
+ missing_from_paths = "missing from paths:\n\t#{pretty_print(@archive_contents - @paths_contents)}\n"
75
+ common = "common to both:\n\t#{pretty_print(@archive_contents & @paths_contents)}\n"
76
+ "expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to be identical.\n#{missing_from_archive}\n#{missing_from_paths}\n#{common}"
77
+ end
78
+
79
+ def negative_failure_message
80
+ "expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to differ."
81
+ end
82
+
83
+ end
84
+
85
+ # Invokes the matcher <tt>Spec::Matchers::IMW::ArchiveContentsMatchPaths
86
+ def contain_paths_like paths, opts = {}
87
+ ArchiveContentsMatchPaths.new(paths,opts)
88
+ end
89
+
90
+ end
91
+ end
92
+ end
93
+
94
+ # puts "#{File.basename(__FILE__)}: An archive is something that is bigger on the inside than it is on the outside." # at bottom
@@ -0,0 +1,21 @@
1
+ module CustomMatchers
2
+ def contain *paths
3
+ paths = paths.flatten
4
+ simple_matcher("contain #{paths.inspect}") do |given, matcher|
5
+ given_contents = Dir[given + "/**/*"].map do |abs_path|
6
+ abs_path[(given.length + 1)..-1]
7
+ end
8
+ matcher.failure_message = "expected #{given} to contain #{paths.inspect}, instead it contained #{given_contents.inspect}"
9
+ matcher.negative_failure_message = "expected #{given} not to contain #{paths.inspect}"
10
+ paths.all? { |path| given_contents.include?(path) }
11
+ end
12
+ end
13
+
14
+ def exist
15
+ simple_matcher("exist") do |given, matcher|
16
+ matcher.failure_message = "expected #{given} to exist on disk"
17
+ matcher.failure_message = "expected #{given} not to exist on disk"
18
+ File.exist?(given)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,61 @@
1
+ #
2
+ # h2. spec/matchers/directory_contents_matcher.rb -- matches files between directories
3
+ #
4
+ # == About
5
+ #
6
+ # An RSpec matcher which tests that two directories share the same set
7
+ # of files.
8
+ #
9
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
+ # Copyright:: Copyright (c) 2008 infochimps.org
11
+ # License:: GPL 3.0
12
+ # Website:: http://infinitemonkeywrench.org/
13
+ #
14
+
15
+ require 'set'
16
+ require 'find'
17
+
18
+ module Spec
19
+ module Matchers
20
+ module IMW
21
+
22
+ class DirectoryContentsMatcher
23
+ private
24
+ def initialize dir
25
+ @dir = File.expand_path(dir)
26
+ @dir_files = Find.files_relative_to_directory(@dir).to_set
27
+ end
28
+
29
+ # Pretty print a set of files.
30
+ def format_files_for_printing files
31
+ files.to_a.join("\n\t")
32
+ end
33
+
34
+ public
35
+ def matches? target
36
+ @target = target
37
+ @target_files = Find.files_relative_to_directory(@target).to_set
38
+ @target_files == @dir_files
39
+ end
40
+
41
+ def failure_message
42
+ files_missing_from_dir = format_files_for_printing(@target_files - @dir_files)
43
+ files_missing_from_target = format_files_for_printing(@dir_files - @target_files)
44
+ files_in_common = format_files_for_printing(@dir_files & @target_files)
45
+ "expected files in #{@dir} and #{@target} to be identical.\n\nfiles missing from #{@dir}:\n\t#{files_missing_from_dir}\n\nfiles missing from #{@target}:\n\t#{files_missing_from_target}\n\nfiles in common:\n\t#{files_in_common}"
46
+ end
47
+
48
+ def negative_failure_message
49
+ "expected files in #{@dir} and #{@target} to be different"
50
+ end
51
+ end
52
+
53
+ # Checks that files in one directory match those in another.
54
+ def contain_files_matching_directory dir
55
+ DirectoryContentsMatcher.new(dir)
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ # puts "#{File.basename(__FILE__)}: From far away, the two filing cabinets appear to be identical. Upon closer inspection, one of them is actually a Maine lobster. Delicious!" # at bottom
@@ -0,0 +1,18 @@
1
+ class Array
2
+ def random
3
+ self[rand(length)]
4
+ end
5
+ end
6
+
7
+ class Hash
8
+ # Stolen from ActiveSupport::CoreExtensions::Hash::ReverseMerge.
9
+ def reverse_merge(other_hash)
10
+ other_hash.merge(self)
11
+ end
12
+
13
+ # Stolen from ActiveSupport::CoreExtensions::Hash::ReverseMerge.
14
+ def reverse_merge!(other_hash)
15
+ replace(reverse_merge(other_hash))
16
+ end
17
+ end
18
+
@@ -0,0 +1,50 @@
1
+ #
2
+ # h2. spec/imw/matchers/file_contents_matcher.rb -- matches contents of two files
3
+ #
4
+ # == About
5
+ #
6
+ # An RSpec matcher which tests that two files have the same contents
7
+ # on disk.
8
+ #
9
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
+ # Copyright:: Copyright (c) 2008 infochimps.org
11
+ # License:: GPL 3.0
12
+ # Website:: http://infinitemonkeywrench.org/
13
+ #
14
+
15
+ require 'ftools'
16
+
17
+ module Spec
18
+ module Matchers
19
+ module IMW
20
+
21
+ class FileContentsMatcher
22
+ def initialize orig
23
+ @orig = File.expand_path orig
24
+ end
25
+
26
+ def matches? copy
27
+ @copy = File.expand_path copy
28
+ File.compare(@orig,@copy)
29
+ end
30
+
31
+ def failure_message
32
+ "files #{@orig} and #{@copy} are different"
33
+ end
34
+
35
+ def negative_failure_message
36
+ "expected files #{@orig} and #{@copy} to differ"
37
+ end
38
+ end
39
+
40
+ # Matches the contents of one file against another using
41
+ # File.compare.
42
+ def have_contents_matching_those_of path
43
+ FileContentsMatcher.new(path)
44
+ end
45
+
46
+ end
47
+ end
48
+ end
49
+
50
+ # puts "#{File.basename(__FILE__)}: From far away, the folders appear the same; from up close, they are different." # at bottom
@@ -0,0 +1,210 @@
1
+ require 'fileutils'
2
+
3
+ module IMWTest
4
+ module Random
5
+
6
+ STRING_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ']
7
+ TEXT_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ',"\n"]
8
+ FILENAME_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["-","_"]
9
+ FILENAME_MAX_LENGTH = 9
10
+ TEXT_MAX_LENGTH = 1024
11
+ EXTENSIONS = {
12
+ /\.csv$/ => :csv_file,
13
+ /\.xml$/ => :xml_file,
14
+ /\.html$/ => :html_file,
15
+ /\.tar$/ => :tar_file,
16
+ /\.tar\.gz$/ => :targz_file,
17
+ /\.tar\.bz2$/ => :tarbz2_file,
18
+ /\.rar$/ => :rar_file,
19
+ /\.zip$/ => :zip_file
20
+ }
21
+ EXTERNAL_PROGRAMS = if defined?(IMW) && defined?(IMW::EXTERNAL_PROGRAMS)
22
+ IMW::EXTERNAL_PROGRAMS
23
+ else
24
+ {
25
+ :tar => "tar",
26
+ :rar => "rar",
27
+ :zip => "zip",
28
+ :unzip => "unzip",
29
+ :gzip => "gzip",
30
+ :bzip2 => "bzip2",
31
+ :wget => "wget"
32
+ }
33
+ end
34
+
35
+ private
36
+ # Return a random filename. Optional +length+ to set the maximum
37
+ # length of the filename returned.
38
+ def self.basename options = {}
39
+ length = (options[:length] or FILENAME_MAX_LENGTH)
40
+ filename = (1..length).map { |i| FILENAME_CHARS.random }.join
41
+
42
+ # filenames beginning with hyphens suck
43
+ while (filename[0,1] == '-') do
44
+ filename[0] = FILENAME_CHARS.random
45
+ end
46
+ filename
47
+ end
48
+
49
+ # Return a random string of text up. Control the length with
50
+ # optional +length+ and also the presence of +newlines+.
51
+ def self.text options = {}
52
+ length = (options[:length] or TEXT_MAX_LENGTH)
53
+ char_pool = options[:newlines] ? TEXT_CHARS : STRING_CHARS
54
+ (1..length).map { |i| char_pool.random }.join
55
+ end
56
+
57
+ public
58
+ # Create a random file by matching the extension of the given
59
+ # +filename+ or a text file if no match is found.
60
+ def self.file filename
61
+ match = EXTENSIONS.find { |regex,func| regex.match filename }
62
+ match ? self.send(match.last,filename) : self.text_file(filename)
63
+ end
64
+
65
+ # Create a random text file at +filename+ containing a maximum of
66
+ # +length+ characters.
67
+ def self.text_file filename, options = {}
68
+ File.open(filename,'w') { |f| f.write text(:newlines => true) }
69
+ end
70
+
71
+ # Create a comma-separated value file containing random text at
72
+ # +filename+ with the maximum +num_rows+, the given +num_columns+,
73
+ # and the maximum +entry_length+.
74
+ def self.csv_file(filename,num_rows = 500, num_columns = 9, entry_length = 9)
75
+ f = File.open(filename,'w')
76
+ rand(num_rows).times do # rows
77
+ num_columns.times do # columns
78
+ f.write(text(:length => entry_length)) # entry
79
+ f.write ','
80
+ end
81
+ f.write(text(:length => entry_length)) # last entry
82
+ f.write("\n")
83
+ end
84
+ f.close
85
+ end
86
+
87
+ # Create an XML file at +filename+ of the maximum +length+.
88
+ #
89
+ # At the present moment, this file contains random text in a very
90
+ # boring single-element XML tree. Randomizing the tree has not
91
+ # been implemented.
92
+ def self.xml_file filename, options = {}
93
+ options = options.reverse_merge({:max_depth => 5, :starting_depth => 1, :depth => nil, :pretty_print => true})
94
+ File.open(filename,'w') do |file|
95
+ file.write "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
96
+ file.write "<xml>" + text + "</xml>"
97
+ file.close
98
+ end
99
+ end
100
+
101
+
102
+ # Create an HTML file at +filename+ of the maximum +length+.
103
+ #
104
+ # At the present moment, this file contains random text in a very
105
+ # boring bare-bones HTML with a single element body. Randomizing
106
+ # the tree has not been implemented.
107
+ def self.html_file(filename, title_length = 100, body_length = 5000)
108
+ f = File.open(filename,'w')
109
+ f.write "<html><head><title>" + string(title_length) + "</title></head><body>" + string(body_length) + "</body></html>"
110
+ f.close
111
+ end
112
+
113
+ # Create a tar archive at the given +filename+ containing random
114
+ # files.
115
+ def self.tar_file filename
116
+ tmpd = File.dirname(filename) + '/dir'
117
+ directory_with_files(tmpd)
118
+ FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:tar]} -cf file.tar *") }
119
+ FileUtils.cp(tmpd + "/file.tar",filename)
120
+ FileUtils.rm_rf(tmpd)
121
+ end
122
+
123
+ # Create a tar.gz archive at the given +filename+ containing
124
+ # random files.
125
+ def self.targz_file filename
126
+ tar = File.dirname(filename) + "/file.tar"
127
+ targz = tar + ".gz"
128
+ tar_file tar
129
+ system("#{EXTERNAL_PROGRAMS[:gzip]} #{tar}")
130
+ FileUtils.cp(targz,filename)
131
+ FileUtils.rm(targz)
132
+ end
133
+
134
+ # Create a tar.bz2 archive at the given +filename+ containing
135
+ # random files.
136
+ def self.tarbz2_file filename
137
+ tar = File.dirname(filename) + "/file.tar"
138
+ tarbz2 = tar + ".bz2"
139
+ tar_file tar
140
+ system("#{EXTERNAL_PROGRAMS[:bzip2]} #{tar}")
141
+ FileUtils.cp(tarbz2,filename)
142
+ FileUtils.rm(tarbz2)
143
+ end
144
+
145
+ # Create a compressed rar archive at the given +filename+
146
+ # containing random files.
147
+ def self.rar_file filename
148
+ tmpd = File.dirname(filename) + '/dir'
149
+ directory_with_files(tmpd)
150
+ FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:rar]} a -r -o+ file.rar *") }
151
+ FileUtils.cp(tmpd + "/file.rar",filename)
152
+ FileUtils.rm_rf(tmpd)
153
+ end
154
+
155
+ # Create a compressed zip archive at the given +filename+
156
+ # containing random files.
157
+ def self.zip_file filename
158
+ tmpd = File.dirname(filename) + '/dir'
159
+ directory_with_files(tmpd)
160
+ FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:zip]} -r file.zip *") }
161
+ FileUtils.cp(tmpd + "/file.zip",filename)
162
+ FileUtils.rm_rf(tmpd)
163
+ end
164
+
165
+ # Creates +directory+ and fills it with random files containing
166
+ # random data.
167
+ #
168
+ # Options (with their default values in parentheses) include:
169
+ #
170
+ # <tt>:extensions</tt> (<tt>[txt,csv,dat,xml]</tt>):: extensions to use. If an extension is known (see <tt>IMWTest::Random::EXTENSIONS</tt>) then appropriately formatted random data will be used If an extension is not known, it will be treated as text. The extension +dir+ will create a directory which will itself be filled with random files in the same way as its parent.
171
+ # <tt>:max_depth</tt> (3):: maximum depth to nest directories
172
+ # <tt>:starting_depth</tt> (1):: the default depth the parent directory is assumed to have
173
+ # <tt>:num_files</tt> (10):: the maximum number of files per directory
174
+ # <tt>:force</tt> (false):: force overwriting of existing directories
175
+ def self.directory_with_files(directory,options = {})
176
+ directory = File.expand_path(directory)
177
+ options = options.reverse_merge({:extensions => ['txt','csv','dat'],:max_depth => 3,:force => false,:starting_depth => 1, :num_files => 3})
178
+ depth = options[:starting_depth]
179
+
180
+ if File.exist?(directory) then
181
+ if options[:force] then
182
+ FileUtils.rm_rf(directory)
183
+ else
184
+ raise "#{directory} already exists"
185
+ end
186
+ end
187
+ FileUtils.mkdir_p(directory)
188
+
189
+ (rand(options[:num_files]) + 2).times do
190
+ ext = options[:extensions].random
191
+ name = self.basename
192
+ if ext == 'dir' then
193
+ if depth <= options[:max_depth] then
194
+ newd = directory + '/' + name
195
+ FileUtils.mkdir(newd)
196
+ directory_with_files(newd,options.merge({:starting_depth => (depth + 1)}))
197
+ else
198
+ next
199
+ end
200
+ else
201
+ file(directory + '/' + name + '.' + ext)
202
+ end
203
+ end
204
+ end
205
+
206
+ end
207
+ end
208
+
209
+
210
+