imw 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +15 -0
- data/CHANGELOG +0 -0
- data/LICENSE +674 -0
- data/README.rdoc +101 -0
- data/Rakefile +20 -0
- data/VERSION +1 -0
- data/etc/imwrc.rb +76 -0
- data/lib/imw.rb +42 -0
- data/lib/imw/boot.rb +58 -0
- data/lib/imw/dataset.rb +233 -0
- data/lib/imw/dataset/datamapper.rb +66 -0
- data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
- data/lib/imw/dataset/loaddump.rb +50 -0
- data/lib/imw/dataset/old/file_collection.rb +88 -0
- data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
- data/lib/imw/dataset/scaffold.rb +132 -0
- data/lib/imw/dataset/scraped_uri.rb +305 -0
- data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
- data/lib/imw/dataset/scrub/scrub.rb +147 -0
- data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
- data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
- data/lib/imw/dataset/scrub/slug.rb +101 -0
- data/lib/imw/dataset/stats.rb +73 -0
- data/lib/imw/dataset/stats/counter.rb +23 -0
- data/lib/imw/dataset/task.rb +38 -0
- data/lib/imw/dataset/workflow.rb +81 -0
- data/lib/imw/files.rb +110 -0
- data/lib/imw/files/archive.rb +113 -0
- data/lib/imw/files/basicfile.rb +122 -0
- data/lib/imw/files/binary.rb +28 -0
- data/lib/imw/files/compressed_file.rb +93 -0
- data/lib/imw/files/compressed_files_and_archives.rb +348 -0
- data/lib/imw/files/compressible.rb +103 -0
- data/lib/imw/files/csv.rb +112 -0
- data/lib/imw/files/json.rb +41 -0
- data/lib/imw/files/sgml.rb +65 -0
- data/lib/imw/files/text.rb +68 -0
- data/lib/imw/files/yaml.rb +46 -0
- data/lib/imw/packagers.rb +8 -0
- data/lib/imw/packagers/archiver.rb +108 -0
- data/lib/imw/packagers/s3_mover.rb +28 -0
- data/lib/imw/parsers.rb +7 -0
- data/lib/imw/parsers/html_parser.rb +382 -0
- data/lib/imw/parsers/html_parser/matchers.rb +306 -0
- data/lib/imw/parsers/line_parser.rb +87 -0
- data/lib/imw/parsers/regexp_parser.rb +72 -0
- data/lib/imw/utils.rb +24 -0
- data/lib/imw/utils/components.rb +61 -0
- data/lib/imw/utils/config.rb +46 -0
- data/lib/imw/utils/error.rb +54 -0
- data/lib/imw/utils/extensions/array.rb +125 -0
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
- data/lib/imw/utils/extensions/core.rb +43 -0
- data/lib/imw/utils/extensions/dir.rb +24 -0
- data/lib/imw/utils/extensions/file_core.rb +64 -0
- data/lib/imw/utils/extensions/hash.rb +218 -0
- data/lib/imw/utils/extensions/hpricot.rb +48 -0
- data/lib/imw/utils/extensions/string.rb +49 -0
- data/lib/imw/utils/extensions/struct.rb +42 -0
- data/lib/imw/utils/extensions/symbol.rb +28 -0
- data/lib/imw/utils/extensions/typed_struct.rb +22 -0
- data/lib/imw/utils/extensions/uri.rb +59 -0
- data/lib/imw/utils/log.rb +67 -0
- data/lib/imw/utils/misc.rb +63 -0
- data/lib/imw/utils/paths.rb +115 -0
- data/lib/imw/utils/uri.rb +59 -0
- data/lib/imw/utils/uuid.rb +33 -0
- data/lib/imw/utils/validate.rb +38 -0
- data/lib/imw/utils/version.rb +12 -0
- data/lib/imw/utils/view.rb +113 -0
- data/lib/imw/utils/view/dump_csv.rb +112 -0
- data/lib/imw/utils/view/dump_csv_older.rb +117 -0
- data/spec/data/sample.csv +131 -0
- data/spec/data/sample.tsv +131 -0
- data/spec/data/sample.txt +131 -0
- data/spec/data/sample.xml +653 -0
- data/spec/data/sample.yaml +652 -0
- data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
- data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
- data/spec/imw/files/archive_spec.rb +118 -0
- data/spec/imw/files/basicfile_spec.rb +121 -0
- data/spec/imw/files/bz2_spec.rb +32 -0
- data/spec/imw/files/compressed_file_spec.rb +96 -0
- data/spec/imw/files/compressible_spec.rb +100 -0
- data/spec/imw/files/file_spec.rb +144 -0
- data/spec/imw/files/gz_spec.rb +32 -0
- data/spec/imw/files/rar_spec.rb +33 -0
- data/spec/imw/files/tar_spec.rb +31 -0
- data/spec/imw/files/text_spec.rb +23 -0
- data/spec/imw/files/zip_spec.rb +31 -0
- data/spec/imw/files_spec.rb +38 -0
- data/spec/imw/packagers/archiver_spec.rb +125 -0
- data/spec/imw/packagers/s3_mover_spec.rb +7 -0
- data/spec/imw/parsers/line_parser_spec.rb +96 -0
- data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
- data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
- data/spec/imw/utils/extensions/find_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +38 -0
- data/spec/imw/workflow/rip/local_spec.rb +89 -0
- data/spec/imw/workflow/rip_spec.rb +27 -0
- data/spec/rcov.opts +1 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/support/archive_contents_matcher.rb +94 -0
- data/spec/support/custom_matchers.rb +21 -0
- data/spec/support/directory_contents_matcher.rb +61 -0
- data/spec/support/extensions.rb +18 -0
- data/spec/support/file_contents_matcher.rb +50 -0
- data/spec/support/random.rb +210 -0
- data/spec/support/without_regard_to_order_matcher.rb +58 -0
- metadata +196 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# h2. spec/imw/workflow/rip_spec.rb -- spec for rip.rb
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
7
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
8
|
+
# License:: GPL 3.0
|
9
|
+
# Website:: http://infinitemonkeywrench.org/
|
10
|
+
#
|
11
|
+
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
12
|
+
|
13
|
+
# require 'imw/workflow/rip'
|
14
|
+
#
|
15
|
+
# describe Source do
|
16
|
+
#
|
17
|
+
# before(:all) do
|
18
|
+
# @source = IMW::Source.new(:fake_source)
|
19
|
+
# @source.stub("returns path to ripd directory",:path_to => IMW::DIRECTORIES[:dump] + "/source_rip_spec")
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# it "should raise an error when asked to rip in an unknown way" do
|
23
|
+
# @source.rip_from :silly_way
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
|
27
|
+
# puts "#{File.basename(__FILE__)}: Bending over, you hear a thunderous RRRRRRRIIIIIP and then scuttle off to check your pants..." # at bottom
|
data/spec/rcov.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--exclude "bin/*,etc/*,meta/*,gems/*,old/*,spec/*"
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
IMW_ROOT_DIR = File.join(File.expand_path(File.dirname(__FILE__)), '..') unless defined? IMW_ROOT_DIR
|
2
|
+
IMW_SPEC_DIR = File.join(IMW_ROOT_DIR, 'spec') unless defined? IMW_SPEC_DIR
|
3
|
+
IMW_LIB_DIR = File.join(IMW_ROOT_DIR, 'lib') unless defined? IMW_LIB_DIR
|
4
|
+
$: << IMW_LIB_DIR
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
require 'spec'
|
8
|
+
require 'fileutils'
|
9
|
+
require 'imw'
|
10
|
+
|
11
|
+
Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
|
12
|
+
|
13
|
+
module IMWTest
|
14
|
+
TMP_DIR = "/tmp/imwtest"
|
15
|
+
end
|
16
|
+
|
17
|
+
Spec::Runner.configure do |config|
|
18
|
+
|
19
|
+
config.include CustomMatchers
|
20
|
+
|
21
|
+
config.before do
|
22
|
+
FileUtils.mkdir_p IMWTest::TMP_DIR
|
23
|
+
FileUtils.cd IMWTest::TMP_DIR
|
24
|
+
end
|
25
|
+
|
26
|
+
config.after do
|
27
|
+
FileUtils.rm_rf IMWTest::TMP_DIR
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
|
@@ -0,0 +1,94 @@
|
|
1
|
+
#
|
2
|
+
# h2. spec/matchers/archive_contents_matcher.rb -- matches contents of archive to disk
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# An RSpec matcher which tests that an archive of files has the same
|
7
|
+
# contents as various paths on disk.
|
8
|
+
#
|
9
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
10
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
11
|
+
# License:: GPL 3.0
|
12
|
+
# Website:: http://infinitemonkeywrench.org/
|
13
|
+
#
|
14
|
+
|
15
|
+
require 'find'
|
16
|
+
|
17
|
+
module Spec
|
18
|
+
module Matchers
|
19
|
+
module IMW
|
20
|
+
|
21
|
+
# Match the contents of the archive against files or directories
|
22
|
+
# in +paths+.
|
23
|
+
#
|
24
|
+
# Options include:
|
25
|
+
#
|
26
|
+
# <tt>:relative_to</tt>:: a leading path which will be stripped
|
27
|
+
# from all +paths+ before comparison with the contents of the
|
28
|
+
# directory.
|
29
|
+
class ArchiveContentsMatchPaths
|
30
|
+
|
31
|
+
private
|
32
|
+
def initialize paths,opts = {}
|
33
|
+
opts.reverse_merge!({:relative_to => nil})
|
34
|
+
paths = [paths] if paths.class == String
|
35
|
+
@paths = paths
|
36
|
+
@relative_to = opts[:relative_to]
|
37
|
+
find_paths_contents
|
38
|
+
end
|
39
|
+
|
40
|
+
def find_paths_contents
|
41
|
+
# find all the files
|
42
|
+
contents = []
|
43
|
+
@paths.each do |path|
|
44
|
+
path = File.expand_path path
|
45
|
+
if File.file? path then
|
46
|
+
contents << path
|
47
|
+
elsif File.directory? path then
|
48
|
+
contents += Find.files_in_directory(path)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# strip leading path
|
53
|
+
contents.map! do |path|
|
54
|
+
# the +1 is because we want a relative path
|
55
|
+
path = path[@relative_to.length + 1,path.size]
|
56
|
+
end
|
57
|
+
|
58
|
+
@paths_contents = contents.to_set
|
59
|
+
end
|
60
|
+
|
61
|
+
def pretty_print set
|
62
|
+
set.to_a.join("\n\t")
|
63
|
+
end
|
64
|
+
|
65
|
+
public
|
66
|
+
def matches? archive
|
67
|
+
@archive = archive
|
68
|
+
@archive_contents = @archive.contents.to_set
|
69
|
+
@archive_contents == @paths_contents
|
70
|
+
end
|
71
|
+
|
72
|
+
def failure_message
|
73
|
+
missing_from_archive = "missing from archive:\n\t#{pretty_print(@paths_contents - @archive_contents)}\n"
|
74
|
+
missing_from_paths = "missing from paths:\n\t#{pretty_print(@archive_contents - @paths_contents)}\n"
|
75
|
+
common = "common to both:\n\t#{pretty_print(@archive_contents & @paths_contents)}\n"
|
76
|
+
"expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to be identical.\n#{missing_from_archive}\n#{missing_from_paths}\n#{common}"
|
77
|
+
end
|
78
|
+
|
79
|
+
def negative_failure_message
|
80
|
+
"expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to differ."
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
# Invokes the matcher <tt>Spec::Matchers::IMW::ArchiveContentsMatchPaths
|
86
|
+
def contain_paths_like paths, opts = {}
|
87
|
+
ArchiveContentsMatchPaths.new(paths,opts)
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# puts "#{File.basename(__FILE__)}: An archive is something that is bigger on the inside than it is on the outside." # at bottom
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module CustomMatchers
|
2
|
+
def contain *paths
|
3
|
+
paths = paths.flatten
|
4
|
+
simple_matcher("contain #{paths.inspect}") do |given, matcher|
|
5
|
+
given_contents = Dir[given + "/**/*"].map do |abs_path|
|
6
|
+
abs_path[(given.length + 1)..-1]
|
7
|
+
end
|
8
|
+
matcher.failure_message = "expected #{given} to contain #{paths.inspect}, instead it contained #{given_contents.inspect}"
|
9
|
+
matcher.negative_failure_message = "expected #{given} not to contain #{paths.inspect}"
|
10
|
+
paths.all? { |path| given_contents.include?(path) }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def exist
|
15
|
+
simple_matcher("exist") do |given, matcher|
|
16
|
+
matcher.failure_message = "expected #{given} to exist on disk"
|
17
|
+
matcher.failure_message = "expected #{given} not to exist on disk"
|
18
|
+
File.exist?(given)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#
|
2
|
+
# h2. spec/matchers/directory_contents_matcher.rb -- matches files between directories
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# An RSpec matcher which tests that two directories share the same set
|
7
|
+
# of files.
|
8
|
+
#
|
9
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
10
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
11
|
+
# License:: GPL 3.0
|
12
|
+
# Website:: http://infinitemonkeywrench.org/
|
13
|
+
#
|
14
|
+
|
15
|
+
require 'set'
|
16
|
+
require 'find'
|
17
|
+
|
18
|
+
module Spec
|
19
|
+
module Matchers
|
20
|
+
module IMW
|
21
|
+
|
22
|
+
class DirectoryContentsMatcher
|
23
|
+
private
|
24
|
+
def initialize dir
|
25
|
+
@dir = File.expand_path(dir)
|
26
|
+
@dir_files = Find.files_relative_to_directory(@dir).to_set
|
27
|
+
end
|
28
|
+
|
29
|
+
# Pretty print a set of files.
|
30
|
+
def format_files_for_printing files
|
31
|
+
files.to_a.join("\n\t")
|
32
|
+
end
|
33
|
+
|
34
|
+
public
|
35
|
+
def matches? target
|
36
|
+
@target = target
|
37
|
+
@target_files = Find.files_relative_to_directory(@target).to_set
|
38
|
+
@target_files == @dir_files
|
39
|
+
end
|
40
|
+
|
41
|
+
def failure_message
|
42
|
+
files_missing_from_dir = format_files_for_printing(@target_files - @dir_files)
|
43
|
+
files_missing_from_target = format_files_for_printing(@dir_files - @target_files)
|
44
|
+
files_in_common = format_files_for_printing(@dir_files & @target_files)
|
45
|
+
"expected files in #{@dir} and #{@target} to be identical.\n\nfiles missing from #{@dir}:\n\t#{files_missing_from_dir}\n\nfiles missing from #{@target}:\n\t#{files_missing_from_target}\n\nfiles in common:\n\t#{files_in_common}"
|
46
|
+
end
|
47
|
+
|
48
|
+
def negative_failure_message
|
49
|
+
"expected files in #{@dir} and #{@target} to be different"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Checks that files in one directory match those in another.
|
54
|
+
def contain_files_matching_directory dir
|
55
|
+
DirectoryContentsMatcher.new(dir)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# puts "#{File.basename(__FILE__)}: From far away, the two filing cabinets appear to be identical. Upon closer inspection, one of them is actually a Maine lobster. Delicious!" # at bottom
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class Array
|
2
|
+
def random
|
3
|
+
self[rand(length)]
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
class Hash
|
8
|
+
# Stolen from ActiveSupport::CoreExtensions::Hash::ReverseMerge.
|
9
|
+
def reverse_merge(other_hash)
|
10
|
+
other_hash.merge(self)
|
11
|
+
end
|
12
|
+
|
13
|
+
# Stolen from ActiveSupport::CoreExtensions::Hash::ReverseMerge.
|
14
|
+
def reverse_merge!(other_hash)
|
15
|
+
replace(reverse_merge(other_hash))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#
|
2
|
+
# h2. spec/imw/matchers/file_contents_matcher.rb -- matches contents of two files
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# An RSpec matcher which tests that two files have the same contents
|
7
|
+
# on disk.
|
8
|
+
#
|
9
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
10
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
11
|
+
# License:: GPL 3.0
|
12
|
+
# Website:: http://infinitemonkeywrench.org/
|
13
|
+
#
|
14
|
+
|
15
|
+
require 'ftools'
|
16
|
+
|
17
|
+
module Spec
|
18
|
+
module Matchers
|
19
|
+
module IMW
|
20
|
+
|
21
|
+
class FileContentsMatcher
|
22
|
+
def initialize orig
|
23
|
+
@orig = File.expand_path orig
|
24
|
+
end
|
25
|
+
|
26
|
+
def matches? copy
|
27
|
+
@copy = File.expand_path copy
|
28
|
+
File.compare(@orig,@copy)
|
29
|
+
end
|
30
|
+
|
31
|
+
def failure_message
|
32
|
+
"files #{@orig} and #{@copy} are different"
|
33
|
+
end
|
34
|
+
|
35
|
+
def negative_failure_message
|
36
|
+
"expected files #{@orig} and #{@copy} to differ"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Matches the contents of one file against another using
|
41
|
+
# File.compare.
|
42
|
+
def have_contents_matching_those_of path
|
43
|
+
FileContentsMatcher.new(path)
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# puts "#{File.basename(__FILE__)}: From far away, the folders appear the same; from up close, they are different." # at bottom
|
@@ -0,0 +1,210 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
module IMWTest
|
4
|
+
module Random
|
5
|
+
|
6
|
+
STRING_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ']
|
7
|
+
TEXT_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ',"\n"]
|
8
|
+
FILENAME_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["-","_"]
|
9
|
+
FILENAME_MAX_LENGTH = 9
|
10
|
+
TEXT_MAX_LENGTH = 1024
|
11
|
+
EXTENSIONS = {
|
12
|
+
/\.csv$/ => :csv_file,
|
13
|
+
/\.xml$/ => :xml_file,
|
14
|
+
/\.html$/ => :html_file,
|
15
|
+
/\.tar$/ => :tar_file,
|
16
|
+
/\.tar\.gz$/ => :targz_file,
|
17
|
+
/\.tar\.bz2$/ => :tarbz2_file,
|
18
|
+
/\.rar$/ => :rar_file,
|
19
|
+
/\.zip$/ => :zip_file
|
20
|
+
}
|
21
|
+
EXTERNAL_PROGRAMS = if defined?(IMW) && defined?(IMW::EXTERNAL_PROGRAMS)
|
22
|
+
IMW::EXTERNAL_PROGRAMS
|
23
|
+
else
|
24
|
+
{
|
25
|
+
:tar => "tar",
|
26
|
+
:rar => "rar",
|
27
|
+
:zip => "zip",
|
28
|
+
:unzip => "unzip",
|
29
|
+
:gzip => "gzip",
|
30
|
+
:bzip2 => "bzip2",
|
31
|
+
:wget => "wget"
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
# Return a random filename. Optional +length+ to set the maximum
|
37
|
+
# length of the filename returned.
|
38
|
+
def self.basename options = {}
|
39
|
+
length = (options[:length] or FILENAME_MAX_LENGTH)
|
40
|
+
filename = (1..length).map { |i| FILENAME_CHARS.random }.join
|
41
|
+
|
42
|
+
# filenames beginning with hyphens suck
|
43
|
+
while (filename[0,1] == '-') do
|
44
|
+
filename[0] = FILENAME_CHARS.random
|
45
|
+
end
|
46
|
+
filename
|
47
|
+
end
|
48
|
+
|
49
|
+
# Return a random string of text up. Control the length with
|
50
|
+
# optional +length+ and also the presence of +newlines+.
|
51
|
+
def self.text options = {}
|
52
|
+
length = (options[:length] or TEXT_MAX_LENGTH)
|
53
|
+
char_pool = options[:newlines] ? TEXT_CHARS : STRING_CHARS
|
54
|
+
(1..length).map { |i| char_pool.random }.join
|
55
|
+
end
|
56
|
+
|
57
|
+
public
|
58
|
+
# Create a random file by matching the extension of the given
|
59
|
+
# +filename+ or a text file if no match is found.
|
60
|
+
def self.file filename
|
61
|
+
match = EXTENSIONS.find { |regex,func| regex.match filename }
|
62
|
+
match ? self.send(match.last,filename) : self.text_file(filename)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Create a random text file at +filename+ containing a maximum of
|
66
|
+
# +length+ characters.
|
67
|
+
def self.text_file filename, options = {}
|
68
|
+
File.open(filename,'w') { |f| f.write text(:newlines => true) }
|
69
|
+
end
|
70
|
+
|
71
|
+
# Create a comma-separated value file containing random text at
|
72
|
+
# +filename+ with the maximum +num_rows+, the given +num_columns+,
|
73
|
+
# and the maximum +entry_length+.
|
74
|
+
def self.csv_file(filename,num_rows = 500, num_columns = 9, entry_length = 9)
|
75
|
+
f = File.open(filename,'w')
|
76
|
+
rand(num_rows).times do # rows
|
77
|
+
num_columns.times do # columns
|
78
|
+
f.write(text(:length => entry_length)) # entry
|
79
|
+
f.write ','
|
80
|
+
end
|
81
|
+
f.write(text(:length => entry_length)) # last entry
|
82
|
+
f.write("\n")
|
83
|
+
end
|
84
|
+
f.close
|
85
|
+
end
|
86
|
+
|
87
|
+
# Create an XML file at +filename+ of the maximum +length+.
|
88
|
+
#
|
89
|
+
# At the present moment, this file contains random text in a very
|
90
|
+
# boring single-element XML tree. Randomizing the tree has not
|
91
|
+
# been implemented.
|
92
|
+
def self.xml_file filename, options = {}
|
93
|
+
options = options.reverse_merge({:max_depth => 5, :starting_depth => 1, :depth => nil, :pretty_print => true})
|
94
|
+
File.open(filename,'w') do |file|
|
95
|
+
file.write "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
96
|
+
file.write "<xml>" + text + "</xml>"
|
97
|
+
file.close
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
# Create an HTML file at +filename+ of the maximum +length+.
|
103
|
+
#
|
104
|
+
# At the present moment, this file contains random text in a very
|
105
|
+
# boring bare-bones HTML with a single element body. Randomizing
|
106
|
+
# the tree has not been implemented.
|
107
|
+
def self.html_file(filename, title_length = 100, body_length = 5000)
|
108
|
+
f = File.open(filename,'w')
|
109
|
+
f.write "<html><head><title>" + string(title_length) + "</title></head><body>" + string(body_length) + "</body></html>"
|
110
|
+
f.close
|
111
|
+
end
|
112
|
+
|
113
|
+
# Create a tar archive at the given +filename+ containing random
|
114
|
+
# files.
|
115
|
+
def self.tar_file filename
|
116
|
+
tmpd = File.dirname(filename) + '/dir'
|
117
|
+
directory_with_files(tmpd)
|
118
|
+
FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:tar]} -cf file.tar *") }
|
119
|
+
FileUtils.cp(tmpd + "/file.tar",filename)
|
120
|
+
FileUtils.rm_rf(tmpd)
|
121
|
+
end
|
122
|
+
|
123
|
+
# Create a tar.gz archive at the given +filename+ containing
|
124
|
+
# random files.
|
125
|
+
def self.targz_file filename
|
126
|
+
tar = File.dirname(filename) + "/file.tar"
|
127
|
+
targz = tar + ".gz"
|
128
|
+
tar_file tar
|
129
|
+
system("#{EXTERNAL_PROGRAMS[:gzip]} #{tar}")
|
130
|
+
FileUtils.cp(targz,filename)
|
131
|
+
FileUtils.rm(targz)
|
132
|
+
end
|
133
|
+
|
134
|
+
# Create a tar.bz2 archive at the given +filename+ containing
|
135
|
+
# random files.
|
136
|
+
def self.tarbz2_file filename
|
137
|
+
tar = File.dirname(filename) + "/file.tar"
|
138
|
+
tarbz2 = tar + ".bz2"
|
139
|
+
tar_file tar
|
140
|
+
system("#{EXTERNAL_PROGRAMS[:bzip2]} #{tar}")
|
141
|
+
FileUtils.cp(tarbz2,filename)
|
142
|
+
FileUtils.rm(tarbz2)
|
143
|
+
end
|
144
|
+
|
145
|
+
# Create a compressed rar archive at the given +filename+
|
146
|
+
# containing random files.
|
147
|
+
def self.rar_file filename
|
148
|
+
tmpd = File.dirname(filename) + '/dir'
|
149
|
+
directory_with_files(tmpd)
|
150
|
+
FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:rar]} a -r -o+ file.rar *") }
|
151
|
+
FileUtils.cp(tmpd + "/file.rar",filename)
|
152
|
+
FileUtils.rm_rf(tmpd)
|
153
|
+
end
|
154
|
+
|
155
|
+
# Create a compressed zip archive at the given +filename+
|
156
|
+
# containing random files.
|
157
|
+
def self.zip_file filename
|
158
|
+
tmpd = File.dirname(filename) + '/dir'
|
159
|
+
directory_with_files(tmpd)
|
160
|
+
FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:zip]} -r file.zip *") }
|
161
|
+
FileUtils.cp(tmpd + "/file.zip",filename)
|
162
|
+
FileUtils.rm_rf(tmpd)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Creates +directory+ and fills it with random files containing
|
166
|
+
# random data.
|
167
|
+
#
|
168
|
+
# Options (with their default values in parentheses) include:
|
169
|
+
#
|
170
|
+
# <tt>:extensions</tt> (<tt>[txt,csv,dat,xml]</tt>):: extensions to use. If an extension is known (see <tt>IMWTest::Random::EXTENSIONS</tt>) then appropriately formatted random data will be used If an extension is not known, it will be treated as text. The extension +dir+ will create a directory which will itself be filled with random files in the same way as its parent.
|
171
|
+
# <tt>:max_depth</tt> (3):: maximum depth to nest directories
|
172
|
+
# <tt>:starting_depth</tt> (1):: the default depth the parent directory is assumed to have
|
173
|
+
# <tt>:num_files</tt> (10):: the maximum number of files per directory
|
174
|
+
# <tt>:force</tt> (false):: force overwriting of existing directories
|
175
|
+
def self.directory_with_files(directory,options = {})
|
176
|
+
directory = File.expand_path(directory)
|
177
|
+
options = options.reverse_merge({:extensions => ['txt','csv','dat'],:max_depth => 3,:force => false,:starting_depth => 1, :num_files => 3})
|
178
|
+
depth = options[:starting_depth]
|
179
|
+
|
180
|
+
if File.exist?(directory) then
|
181
|
+
if options[:force] then
|
182
|
+
FileUtils.rm_rf(directory)
|
183
|
+
else
|
184
|
+
raise "#{directory} already exists"
|
185
|
+
end
|
186
|
+
end
|
187
|
+
FileUtils.mkdir_p(directory)
|
188
|
+
|
189
|
+
(rand(options[:num_files]) + 2).times do
|
190
|
+
ext = options[:extensions].random
|
191
|
+
name = self.basename
|
192
|
+
if ext == 'dir' then
|
193
|
+
if depth <= options[:max_depth] then
|
194
|
+
newd = directory + '/' + name
|
195
|
+
FileUtils.mkdir(newd)
|
196
|
+
directory_with_files(newd,options.merge({:starting_depth => (depth + 1)}))
|
197
|
+
else
|
198
|
+
next
|
199
|
+
end
|
200
|
+
else
|
201
|
+
file(directory + '/' + name + '.' + ext)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
|
210
|
+
|