imw 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +15 -0
- data/CHANGELOG +0 -0
- data/LICENSE +674 -0
- data/README.rdoc +101 -0
- data/Rakefile +20 -0
- data/VERSION +1 -0
- data/etc/imwrc.rb +76 -0
- data/lib/imw.rb +42 -0
- data/lib/imw/boot.rb +58 -0
- data/lib/imw/dataset.rb +233 -0
- data/lib/imw/dataset/datamapper.rb +66 -0
- data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
- data/lib/imw/dataset/loaddump.rb +50 -0
- data/lib/imw/dataset/old/file_collection.rb +88 -0
- data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
- data/lib/imw/dataset/scaffold.rb +132 -0
- data/lib/imw/dataset/scraped_uri.rb +305 -0
- data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
- data/lib/imw/dataset/scrub/scrub.rb +147 -0
- data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
- data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
- data/lib/imw/dataset/scrub/slug.rb +101 -0
- data/lib/imw/dataset/stats.rb +73 -0
- data/lib/imw/dataset/stats/counter.rb +23 -0
- data/lib/imw/dataset/task.rb +38 -0
- data/lib/imw/dataset/workflow.rb +81 -0
- data/lib/imw/files.rb +110 -0
- data/lib/imw/files/archive.rb +113 -0
- data/lib/imw/files/basicfile.rb +122 -0
- data/lib/imw/files/binary.rb +28 -0
- data/lib/imw/files/compressed_file.rb +93 -0
- data/lib/imw/files/compressed_files_and_archives.rb +348 -0
- data/lib/imw/files/compressible.rb +103 -0
- data/lib/imw/files/csv.rb +112 -0
- data/lib/imw/files/json.rb +41 -0
- data/lib/imw/files/sgml.rb +65 -0
- data/lib/imw/files/text.rb +68 -0
- data/lib/imw/files/yaml.rb +46 -0
- data/lib/imw/packagers.rb +8 -0
- data/lib/imw/packagers/archiver.rb +108 -0
- data/lib/imw/packagers/s3_mover.rb +28 -0
- data/lib/imw/parsers.rb +7 -0
- data/lib/imw/parsers/html_parser.rb +382 -0
- data/lib/imw/parsers/html_parser/matchers.rb +306 -0
- data/lib/imw/parsers/line_parser.rb +87 -0
- data/lib/imw/parsers/regexp_parser.rb +72 -0
- data/lib/imw/utils.rb +24 -0
- data/lib/imw/utils/components.rb +61 -0
- data/lib/imw/utils/config.rb +46 -0
- data/lib/imw/utils/error.rb +54 -0
- data/lib/imw/utils/extensions/array.rb +125 -0
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
- data/lib/imw/utils/extensions/core.rb +43 -0
- data/lib/imw/utils/extensions/dir.rb +24 -0
- data/lib/imw/utils/extensions/file_core.rb +64 -0
- data/lib/imw/utils/extensions/hash.rb +218 -0
- data/lib/imw/utils/extensions/hpricot.rb +48 -0
- data/lib/imw/utils/extensions/string.rb +49 -0
- data/lib/imw/utils/extensions/struct.rb +42 -0
- data/lib/imw/utils/extensions/symbol.rb +28 -0
- data/lib/imw/utils/extensions/typed_struct.rb +22 -0
- data/lib/imw/utils/extensions/uri.rb +59 -0
- data/lib/imw/utils/log.rb +67 -0
- data/lib/imw/utils/misc.rb +63 -0
- data/lib/imw/utils/paths.rb +115 -0
- data/lib/imw/utils/uri.rb +59 -0
- data/lib/imw/utils/uuid.rb +33 -0
- data/lib/imw/utils/validate.rb +38 -0
- data/lib/imw/utils/version.rb +12 -0
- data/lib/imw/utils/view.rb +113 -0
- data/lib/imw/utils/view/dump_csv.rb +112 -0
- data/lib/imw/utils/view/dump_csv_older.rb +117 -0
- data/spec/data/sample.csv +131 -0
- data/spec/data/sample.tsv +131 -0
- data/spec/data/sample.txt +131 -0
- data/spec/data/sample.xml +653 -0
- data/spec/data/sample.yaml +652 -0
- data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
- data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
- data/spec/imw/files/archive_spec.rb +118 -0
- data/spec/imw/files/basicfile_spec.rb +121 -0
- data/spec/imw/files/bz2_spec.rb +32 -0
- data/spec/imw/files/compressed_file_spec.rb +96 -0
- data/spec/imw/files/compressible_spec.rb +100 -0
- data/spec/imw/files/file_spec.rb +144 -0
- data/spec/imw/files/gz_spec.rb +32 -0
- data/spec/imw/files/rar_spec.rb +33 -0
- data/spec/imw/files/tar_spec.rb +31 -0
- data/spec/imw/files/text_spec.rb +23 -0
- data/spec/imw/files/zip_spec.rb +31 -0
- data/spec/imw/files_spec.rb +38 -0
- data/spec/imw/packagers/archiver_spec.rb +125 -0
- data/spec/imw/packagers/s3_mover_spec.rb +7 -0
- data/spec/imw/parsers/line_parser_spec.rb +96 -0
- data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
- data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
- data/spec/imw/utils/extensions/find_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +38 -0
- data/spec/imw/workflow/rip/local_spec.rb +89 -0
- data/spec/imw/workflow/rip_spec.rb +27 -0
- data/spec/rcov.opts +1 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/support/archive_contents_matcher.rb +94 -0
- data/spec/support/custom_matchers.rb +21 -0
- data/spec/support/directory_contents_matcher.rb +61 -0
- data/spec/support/extensions.rb +18 -0
- data/spec/support/file_contents_matcher.rb +50 -0
- data/spec/support/random.rb +210 -0
- data/spec/support/without_regard_to_order_matcher.rb +58 -0
- metadata +196 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. spec/imw/workflow/rip_spec.rb -- spec for rip.rb
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
7
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
8
|
+
# License:: GPL 3.0
|
|
9
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
10
|
+
#
|
|
11
|
+
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
12
|
+
|
|
13
|
+
# require 'imw/workflow/rip'
|
|
14
|
+
#
|
|
15
|
+
# describe Source do
|
|
16
|
+
#
|
|
17
|
+
# before(:all) do
|
|
18
|
+
# @source = IMW::Source.new(:fake_source)
|
|
19
|
+
# @source.stub("returns path to ripd directory",:path_to => IMW::DIRECTORIES[:dump] + "/source_rip_spec")
|
|
20
|
+
# end
|
|
21
|
+
#
|
|
22
|
+
# it "should raise an error when asked to rip in an unknown way" do
|
|
23
|
+
# @source.rip_from :silly_way
|
|
24
|
+
# end
|
|
25
|
+
# end
|
|
26
|
+
|
|
27
|
+
# puts "#{File.basename(__FILE__)}: Bending over, you hear a thunderous RRRRRRRIIIIIP and then scuttle off to check your pants..." # at bottom
|
data/spec/rcov.opts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
--exclude "bin/*,etc/*,meta/*,gems/*,old/*,spec/*"
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
IMW_ROOT_DIR = File.join(File.expand_path(File.dirname(__FILE__)), '..') unless defined? IMW_ROOT_DIR
|
|
2
|
+
IMW_SPEC_DIR = File.join(IMW_ROOT_DIR, 'spec') unless defined? IMW_SPEC_DIR
|
|
3
|
+
IMW_LIB_DIR = File.join(IMW_ROOT_DIR, 'lib') unless defined? IMW_LIB_DIR
|
|
4
|
+
$: << IMW_LIB_DIR
|
|
5
|
+
|
|
6
|
+
require 'rubygems'
|
|
7
|
+
require 'spec'
|
|
8
|
+
require 'fileutils'
|
|
9
|
+
require 'imw'
|
|
10
|
+
|
|
11
|
+
Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
|
|
12
|
+
|
|
13
|
+
module IMWTest
|
|
14
|
+
TMP_DIR = "/tmp/imwtest"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
Spec::Runner.configure do |config|
|
|
18
|
+
|
|
19
|
+
config.include CustomMatchers
|
|
20
|
+
|
|
21
|
+
config.before do
|
|
22
|
+
FileUtils.mkdir_p IMWTest::TMP_DIR
|
|
23
|
+
FileUtils.cd IMWTest::TMP_DIR
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
config.after do
|
|
27
|
+
FileUtils.rm_rf IMWTest::TMP_DIR
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. spec/matchers/archive_contents_matcher.rb -- matches contents of archive to disk
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# An RSpec matcher which tests that an archive of files has the same
|
|
7
|
+
# contents as various paths on disk.
|
|
8
|
+
#
|
|
9
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
10
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
11
|
+
# License:: GPL 3.0
|
|
12
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
13
|
+
#
|
|
14
|
+
|
|
15
|
+
require 'find'
|
|
16
|
+
|
|
17
|
+
module Spec
|
|
18
|
+
module Matchers
|
|
19
|
+
module IMW
|
|
20
|
+
|
|
21
|
+
# Match the contents of the archive against files or directories
|
|
22
|
+
# in +paths+.
|
|
23
|
+
#
|
|
24
|
+
# Options include:
|
|
25
|
+
#
|
|
26
|
+
# <tt>:relative_to</tt>:: a leading path which will be stripped
|
|
27
|
+
# from all +paths+ before comparison with the contents of the
|
|
28
|
+
# directory.
|
|
29
|
+
class ArchiveContentsMatchPaths
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
def initialize paths,opts = {}
|
|
33
|
+
opts.reverse_merge!({:relative_to => nil})
|
|
34
|
+
paths = [paths] if paths.class == String
|
|
35
|
+
@paths = paths
|
|
36
|
+
@relative_to = opts[:relative_to]
|
|
37
|
+
find_paths_contents
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def find_paths_contents
|
|
41
|
+
# find all the files
|
|
42
|
+
contents = []
|
|
43
|
+
@paths.each do |path|
|
|
44
|
+
path = File.expand_path path
|
|
45
|
+
if File.file? path then
|
|
46
|
+
contents << path
|
|
47
|
+
elsif File.directory? path then
|
|
48
|
+
contents += Find.files_in_directory(path)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# strip leading path
|
|
53
|
+
contents.map! do |path|
|
|
54
|
+
# the +1 is because we want a relative path
|
|
55
|
+
path = path[@relative_to.length + 1,path.size]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
@paths_contents = contents.to_set
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def pretty_print set
|
|
62
|
+
set.to_a.join("\n\t")
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
public
|
|
66
|
+
def matches? archive
|
|
67
|
+
@archive = archive
|
|
68
|
+
@archive_contents = @archive.contents.to_set
|
|
69
|
+
@archive_contents == @paths_contents
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def failure_message
|
|
73
|
+
missing_from_archive = "missing from archive:\n\t#{pretty_print(@paths_contents - @archive_contents)}\n"
|
|
74
|
+
missing_from_paths = "missing from paths:\n\t#{pretty_print(@archive_contents - @paths_contents)}\n"
|
|
75
|
+
common = "common to both:\n\t#{pretty_print(@archive_contents & @paths_contents)}\n"
|
|
76
|
+
"expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to be identical.\n#{missing_from_archive}\n#{missing_from_paths}\n#{common}"
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def negative_failure_message
|
|
80
|
+
"expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to differ."
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Invokes the matcher <tt>Spec::Matchers::IMW::ArchiveContentsMatchPaths
|
|
86
|
+
def contain_paths_like paths, opts = {}
|
|
87
|
+
ArchiveContentsMatchPaths.new(paths,opts)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# puts "#{File.basename(__FILE__)}: An archive is something that is bigger on the inside than it is on the outside." # at bottom
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
module CustomMatchers
|
|
2
|
+
def contain *paths
|
|
3
|
+
paths = paths.flatten
|
|
4
|
+
simple_matcher("contain #{paths.inspect}") do |given, matcher|
|
|
5
|
+
given_contents = Dir[given + "/**/*"].map do |abs_path|
|
|
6
|
+
abs_path[(given.length + 1)..-1]
|
|
7
|
+
end
|
|
8
|
+
matcher.failure_message = "expected #{given} to contain #{paths.inspect}, instead it contained #{given_contents.inspect}"
|
|
9
|
+
matcher.negative_failure_message = "expected #{given} not to contain #{paths.inspect}"
|
|
10
|
+
paths.all? { |path| given_contents.include?(path) }
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def exist
|
|
15
|
+
simple_matcher("exist") do |given, matcher|
|
|
16
|
+
matcher.failure_message = "expected #{given} to exist on disk"
|
|
17
|
+
matcher.failure_message = "expected #{given} not to exist on disk"
|
|
18
|
+
File.exist?(given)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. spec/matchers/directory_contents_matcher.rb -- matches files between directories
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# An RSpec matcher which tests that two directories share the same set
|
|
7
|
+
# of files.
|
|
8
|
+
#
|
|
9
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
10
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
11
|
+
# License:: GPL 3.0
|
|
12
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
13
|
+
#
|
|
14
|
+
|
|
15
|
+
require 'set'
|
|
16
|
+
require 'find'
|
|
17
|
+
|
|
18
|
+
module Spec
|
|
19
|
+
module Matchers
|
|
20
|
+
module IMW
|
|
21
|
+
|
|
22
|
+
class DirectoryContentsMatcher
|
|
23
|
+
private
|
|
24
|
+
def initialize dir
|
|
25
|
+
@dir = File.expand_path(dir)
|
|
26
|
+
@dir_files = Find.files_relative_to_directory(@dir).to_set
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Pretty print a set of files.
|
|
30
|
+
def format_files_for_printing files
|
|
31
|
+
files.to_a.join("\n\t")
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
public
|
|
35
|
+
def matches? target
|
|
36
|
+
@target = target
|
|
37
|
+
@target_files = Find.files_relative_to_directory(@target).to_set
|
|
38
|
+
@target_files == @dir_files
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def failure_message
|
|
42
|
+
files_missing_from_dir = format_files_for_printing(@target_files - @dir_files)
|
|
43
|
+
files_missing_from_target = format_files_for_printing(@dir_files - @target_files)
|
|
44
|
+
files_in_common = format_files_for_printing(@dir_files & @target_files)
|
|
45
|
+
"expected files in #{@dir} and #{@target} to be identical.\n\nfiles missing from #{@dir}:\n\t#{files_missing_from_dir}\n\nfiles missing from #{@target}:\n\t#{files_missing_from_target}\n\nfiles in common:\n\t#{files_in_common}"
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def negative_failure_message
|
|
49
|
+
"expected files in #{@dir} and #{@target} to be different"
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Checks that files in one directory match those in another.
|
|
54
|
+
def contain_files_matching_directory dir
|
|
55
|
+
DirectoryContentsMatcher.new(dir)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# puts "#{File.basename(__FILE__)}: From far away, the two filing cabinets appear to be identical. Upon closer inspection, one of them is actually a Maine lobster. Delicious!" # at bottom
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
class Array
|
|
2
|
+
def random
|
|
3
|
+
self[rand(length)]
|
|
4
|
+
end
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
class Hash
|
|
8
|
+
# Stolen from ActiveSupport::CoreExtensions::Hash::ReverseMerge.
|
|
9
|
+
def reverse_merge(other_hash)
|
|
10
|
+
other_hash.merge(self)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Stolen from ActiveSupport::CoreExtensions::Hash::ReverseMerge.
|
|
14
|
+
def reverse_merge!(other_hash)
|
|
15
|
+
replace(reverse_merge(other_hash))
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. spec/imw/matchers/file_contents_matcher.rb -- matches contents of two files
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# An RSpec matcher which tests that two files have the same contents
|
|
7
|
+
# on disk.
|
|
8
|
+
#
|
|
9
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
10
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
11
|
+
# License:: GPL 3.0
|
|
12
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
13
|
+
#
|
|
14
|
+
|
|
15
|
+
require 'ftools'
|
|
16
|
+
|
|
17
|
+
module Spec
|
|
18
|
+
module Matchers
|
|
19
|
+
module IMW
|
|
20
|
+
|
|
21
|
+
class FileContentsMatcher
|
|
22
|
+
def initialize orig
|
|
23
|
+
@orig = File.expand_path orig
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def matches? copy
|
|
27
|
+
@copy = File.expand_path copy
|
|
28
|
+
File.compare(@orig,@copy)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def failure_message
|
|
32
|
+
"files #{@orig} and #{@copy} are different"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def negative_failure_message
|
|
36
|
+
"expected files #{@orig} and #{@copy} to differ"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Matches the contents of one file against another using
|
|
41
|
+
# File.compare.
|
|
42
|
+
def have_contents_matching_those_of path
|
|
43
|
+
FileContentsMatcher.new(path)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# puts "#{File.basename(__FILE__)}: From far away, the folders appear the same; from up close, they are different." # at bottom
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
require 'fileutils'
|
|
2
|
+
|
|
3
|
+
module IMWTest
|
|
4
|
+
module Random
|
|
5
|
+
|
|
6
|
+
STRING_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ']
|
|
7
|
+
TEXT_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ',"\n"]
|
|
8
|
+
FILENAME_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["-","_"]
|
|
9
|
+
FILENAME_MAX_LENGTH = 9
|
|
10
|
+
TEXT_MAX_LENGTH = 1024
|
|
11
|
+
EXTENSIONS = {
|
|
12
|
+
/\.csv$/ => :csv_file,
|
|
13
|
+
/\.xml$/ => :xml_file,
|
|
14
|
+
/\.html$/ => :html_file,
|
|
15
|
+
/\.tar$/ => :tar_file,
|
|
16
|
+
/\.tar\.gz$/ => :targz_file,
|
|
17
|
+
/\.tar\.bz2$/ => :tarbz2_file,
|
|
18
|
+
/\.rar$/ => :rar_file,
|
|
19
|
+
/\.zip$/ => :zip_file
|
|
20
|
+
}
|
|
21
|
+
EXTERNAL_PROGRAMS = if defined?(IMW) && defined?(IMW::EXTERNAL_PROGRAMS)
|
|
22
|
+
IMW::EXTERNAL_PROGRAMS
|
|
23
|
+
else
|
|
24
|
+
{
|
|
25
|
+
:tar => "tar",
|
|
26
|
+
:rar => "rar",
|
|
27
|
+
:zip => "zip",
|
|
28
|
+
:unzip => "unzip",
|
|
29
|
+
:gzip => "gzip",
|
|
30
|
+
:bzip2 => "bzip2",
|
|
31
|
+
:wget => "wget"
|
|
32
|
+
}
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
# Return a random filename. Optional +length+ to set the maximum
|
|
37
|
+
# length of the filename returned.
|
|
38
|
+
def self.basename options = {}
|
|
39
|
+
length = (options[:length] or FILENAME_MAX_LENGTH)
|
|
40
|
+
filename = (1..length).map { |i| FILENAME_CHARS.random }.join
|
|
41
|
+
|
|
42
|
+
# filenames beginning with hyphens suck
|
|
43
|
+
while (filename[0,1] == '-') do
|
|
44
|
+
filename[0] = FILENAME_CHARS.random
|
|
45
|
+
end
|
|
46
|
+
filename
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Return a random string of text up. Control the length with
|
|
50
|
+
# optional +length+ and also the presence of +newlines+.
|
|
51
|
+
def self.text options = {}
|
|
52
|
+
length = (options[:length] or TEXT_MAX_LENGTH)
|
|
53
|
+
char_pool = options[:newlines] ? TEXT_CHARS : STRING_CHARS
|
|
54
|
+
(1..length).map { |i| char_pool.random }.join
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
public
|
|
58
|
+
# Create a random file by matching the extension of the given
|
|
59
|
+
# +filename+ or a text file if no match is found.
|
|
60
|
+
def self.file filename
|
|
61
|
+
match = EXTENSIONS.find { |regex,func| regex.match filename }
|
|
62
|
+
match ? self.send(match.last,filename) : self.text_file(filename)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Create a random text file at +filename+ containing a maximum of
|
|
66
|
+
# +length+ characters.
|
|
67
|
+
def self.text_file filename, options = {}
|
|
68
|
+
File.open(filename,'w') { |f| f.write text(:newlines => true) }
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Create a comma-separated value file containing random text at
|
|
72
|
+
# +filename+ with the maximum +num_rows+, the given +num_columns+,
|
|
73
|
+
# and the maximum +entry_length+.
|
|
74
|
+
def self.csv_file(filename,num_rows = 500, num_columns = 9, entry_length = 9)
|
|
75
|
+
f = File.open(filename,'w')
|
|
76
|
+
rand(num_rows).times do # rows
|
|
77
|
+
num_columns.times do # columns
|
|
78
|
+
f.write(text(:length => entry_length)) # entry
|
|
79
|
+
f.write ','
|
|
80
|
+
end
|
|
81
|
+
f.write(text(:length => entry_length)) # last entry
|
|
82
|
+
f.write("\n")
|
|
83
|
+
end
|
|
84
|
+
f.close
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Create an XML file at +filename+ of the maximum +length+.
|
|
88
|
+
#
|
|
89
|
+
# At the present moment, this file contains random text in a very
|
|
90
|
+
# boring single-element XML tree. Randomizing the tree has not
|
|
91
|
+
# been implemented.
|
|
92
|
+
def self.xml_file filename, options = {}
|
|
93
|
+
options = options.reverse_merge({:max_depth => 5, :starting_depth => 1, :depth => nil, :pretty_print => true})
|
|
94
|
+
File.open(filename,'w') do |file|
|
|
95
|
+
file.write "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
|
96
|
+
file.write "<xml>" + text + "</xml>"
|
|
97
|
+
file.close
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# Create an HTML file at +filename+ of the maximum +length+.
|
|
103
|
+
#
|
|
104
|
+
# At the present moment, this file contains random text in a very
|
|
105
|
+
# boring bare-bones HTML with a single element body. Randomizing
|
|
106
|
+
# the tree has not been implemented.
|
|
107
|
+
def self.html_file(filename, title_length = 100, body_length = 5000)
|
|
108
|
+
f = File.open(filename,'w')
|
|
109
|
+
f.write "<html><head><title>" + string(title_length) + "</title></head><body>" + string(body_length) + "</body></html>"
|
|
110
|
+
f.close
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Create a tar archive at the given +filename+ containing random
|
|
114
|
+
# files.
|
|
115
|
+
def self.tar_file filename
|
|
116
|
+
tmpd = File.dirname(filename) + '/dir'
|
|
117
|
+
directory_with_files(tmpd)
|
|
118
|
+
FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:tar]} -cf file.tar *") }
|
|
119
|
+
FileUtils.cp(tmpd + "/file.tar",filename)
|
|
120
|
+
FileUtils.rm_rf(tmpd)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Create a tar.gz archive at the given +filename+ containing
|
|
124
|
+
# random files.
|
|
125
|
+
def self.targz_file filename
|
|
126
|
+
tar = File.dirname(filename) + "/file.tar"
|
|
127
|
+
targz = tar + ".gz"
|
|
128
|
+
tar_file tar
|
|
129
|
+
system("#{EXTERNAL_PROGRAMS[:gzip]} #{tar}")
|
|
130
|
+
FileUtils.cp(targz,filename)
|
|
131
|
+
FileUtils.rm(targz)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Create a tar.bz2 archive at the given +filename+ containing
|
|
135
|
+
# random files.
|
|
136
|
+
def self.tarbz2_file filename
|
|
137
|
+
tar = File.dirname(filename) + "/file.tar"
|
|
138
|
+
tarbz2 = tar + ".bz2"
|
|
139
|
+
tar_file tar
|
|
140
|
+
system("#{EXTERNAL_PROGRAMS[:bzip2]} #{tar}")
|
|
141
|
+
FileUtils.cp(tarbz2,filename)
|
|
142
|
+
FileUtils.rm(tarbz2)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Create a compressed rar archive at the given +filename+
|
|
146
|
+
# containing random files.
|
|
147
|
+
def self.rar_file filename
|
|
148
|
+
tmpd = File.dirname(filename) + '/dir'
|
|
149
|
+
directory_with_files(tmpd)
|
|
150
|
+
FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:rar]} a -r -o+ file.rar *") }
|
|
151
|
+
FileUtils.cp(tmpd + "/file.rar",filename)
|
|
152
|
+
FileUtils.rm_rf(tmpd)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Create a compressed zip archive at the given +filename+
|
|
156
|
+
# containing random files.
|
|
157
|
+
def self.zip_file filename
|
|
158
|
+
tmpd = File.dirname(filename) + '/dir'
|
|
159
|
+
directory_with_files(tmpd)
|
|
160
|
+
FileUtils.cd(tmpd) {|dir| system("#{EXTERNAL_PROGRAMS[:zip]} -r file.zip *") }
|
|
161
|
+
FileUtils.cp(tmpd + "/file.zip",filename)
|
|
162
|
+
FileUtils.rm_rf(tmpd)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Creates +directory+ and fills it with random files containing
|
|
166
|
+
# random data.
|
|
167
|
+
#
|
|
168
|
+
# Options (with their default values in parentheses) include:
|
|
169
|
+
#
|
|
170
|
+
# <tt>:extensions</tt> (<tt>[txt,csv,dat,xml]</tt>):: extensions to use. If an extension is known (see <tt>IMWTest::Random::EXTENSIONS</tt>) then appropriately formatted random data will be used If an extension is not known, it will be treated as text. The extension +dir+ will create a directory which will itself be filled with random files in the same way as its parent.
|
|
171
|
+
# <tt>:max_depth</tt> (3):: maximum depth to nest directories
|
|
172
|
+
# <tt>:starting_depth</tt> (1):: the default depth the parent directory is assumed to have
|
|
173
|
+
# <tt>:num_files</tt> (10):: the maximum number of files per directory
|
|
174
|
+
# <tt>:force</tt> (false):: force overwriting of existing directories
|
|
175
|
+
def self.directory_with_files(directory,options = {})
|
|
176
|
+
directory = File.expand_path(directory)
|
|
177
|
+
options = options.reverse_merge({:extensions => ['txt','csv','dat'],:max_depth => 3,:force => false,:starting_depth => 1, :num_files => 3})
|
|
178
|
+
depth = options[:starting_depth]
|
|
179
|
+
|
|
180
|
+
if File.exist?(directory) then
|
|
181
|
+
if options[:force] then
|
|
182
|
+
FileUtils.rm_rf(directory)
|
|
183
|
+
else
|
|
184
|
+
raise "#{directory} already exists"
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
FileUtils.mkdir_p(directory)
|
|
188
|
+
|
|
189
|
+
(rand(options[:num_files]) + 2).times do
|
|
190
|
+
ext = options[:extensions].random
|
|
191
|
+
name = self.basename
|
|
192
|
+
if ext == 'dir' then
|
|
193
|
+
if depth <= options[:max_depth] then
|
|
194
|
+
newd = directory + '/' + name
|
|
195
|
+
FileUtils.mkdir(newd)
|
|
196
|
+
directory_with_files(newd,options.merge({:starting_depth => (depth + 1)}))
|
|
197
|
+
else
|
|
198
|
+
next
|
|
199
|
+
end
|
|
200
|
+
else
|
|
201
|
+
file(directory + '/' + name + '.' + ext)
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
|