imw 0.2.18 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
share_examples_for "an object that manages paths" do
|
|
4
|
-
before do
|
|
5
|
-
@path_manager.add_path :testing, '/testing'
|
|
6
|
-
@path_manager.add_path :first, '/1'
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
it 'returns a string when given a string' do
|
|
10
|
-
@path_manager.path_to('hi').should == 'hi'
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
it 'returns a path when given a registered symbol' do
|
|
14
|
-
@path_manager.path_to(:testing).should == '/testing'
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
it 'raises an error when given a unregistered symbol' do
|
|
18
|
-
lambda { @path_manager.path_to(:foobar) }.should raise_error(IMW::PathError)
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
it 'returns a constructed path when passed a mixture of symbols, strings, and arrays ' do
|
|
22
|
-
@path_manager.path_to( [:testing, 'hi'], [[['there']]]).should == '/testing/hi/there'
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
it 'will correctly expand paths themselves defined via symbols' do
|
|
26
|
-
@path_manager.add_path(:first, :testing, '1')
|
|
27
|
-
@path_manager.path_to(:first).should == '/testing/1'
|
|
28
|
-
end
|
|
29
|
-
end
|
data/spec/imw_spec.rb
DELETED
data/spec/rcov.opts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
--exclude "bin/*,etc/*,meta/*,gems/*,old/*,spec/*"
|
data/spec/spec_helper.rb
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
IMW_ROOT_DIR = File.join(File.expand_path(File.dirname(__FILE__)), '..') unless defined? IMW_ROOT_DIR
|
|
2
|
-
IMW_SPEC_DIR = File.join(IMW_ROOT_DIR, 'spec') unless defined? IMW_SPEC_DIR
|
|
3
|
-
IMW_LIB_DIR = File.join(IMW_ROOT_DIR, 'lib') unless defined? IMW_LIB_DIR
|
|
4
|
-
$: << IMW_LIB_DIR
|
|
5
|
-
|
|
6
|
-
require 'imw'
|
|
7
|
-
require 'spec'
|
|
8
|
-
|
|
9
|
-
Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
|
|
10
|
-
|
|
11
|
-
module IMWTest
|
|
12
|
-
TMP_DIR = "/tmp/imwtest" unless defined?(TMP_DIR)
|
|
13
|
-
DATA_DIR = File.join(IMW_SPEC_DIR, 'data') unless defined?(DATA_DIR)
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
Spec::Runner.configure do |config|
|
|
17
|
-
|
|
18
|
-
config.include IMWTest::CustomMatchers
|
|
19
|
-
|
|
20
|
-
config.before do
|
|
21
|
-
FileUtils.mkdir_p IMWTest::TMP_DIR
|
|
22
|
-
FileUtils.cd IMWTest::TMP_DIR
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
config.after do
|
|
26
|
-
FileUtils.rm_rf IMWTest::TMP_DIR
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
module IMWTest
|
|
2
|
-
module CustomMatchers
|
|
3
|
-
|
|
4
|
-
# Check to see whether the given directory (a String) contains the
|
|
5
|
-
# given +paths+
|
|
6
|
-
#
|
|
7
|
-
# @param [Array<String>] paths
|
|
8
|
-
def contain *paths
|
|
9
|
-
paths = paths.flatten
|
|
10
|
-
simple_matcher("contain #{paths.inspect}") do |given, matcher|
|
|
11
|
-
given_contents = Dir[given + "/**/*"].map do |abs_path|
|
|
12
|
-
abs_path[(given.length + 1)..-1]
|
|
13
|
-
end
|
|
14
|
-
matcher.failure_message = "expected #{given} to contain #{paths.inspect}, instead it contained #{given_contents.inspect}"
|
|
15
|
-
matcher.negative_failure_message = "expected #{given} not to contain #{paths.inspect}"
|
|
16
|
-
paths.all? { |path| given_contents.include?(path.gsub(/\/+$/,'')) }
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
def exist
|
|
21
|
-
simple_matcher("exist") do |given, matcher|
|
|
22
|
-
matcher.failure_message = "expected #{given} to exist on disk"
|
|
23
|
-
matcher.failure_message = "expected #{given} not to exist on disk"
|
|
24
|
-
File.exist?(given)
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
end
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
module IMWTest
|
|
2
|
-
module CustomMatchers
|
|
3
|
-
|
|
4
|
-
class FileContentsMatcher
|
|
5
|
-
def initialize orig
|
|
6
|
-
@orig = File.expand_path orig
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
def matches? copy
|
|
10
|
-
@copy = File.expand_path copy
|
|
11
|
-
File.compare(@orig,@copy)
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def failure_message
|
|
15
|
-
"files #{@orig} and #{@copy} are different"
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def negative_failure_message
|
|
19
|
-
"expected files #{@orig} and #{@copy} to differ"
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Matches the contents of one file against another using
|
|
24
|
-
# File.compare.
|
|
25
|
-
def have_contents_matching_those_of path
|
|
26
|
-
FileContentsMatcher.new(path)
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
end
|
|
30
|
-
end
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
require 'set'
|
|
2
|
-
|
|
3
|
-
module IMWTest
|
|
4
|
-
|
|
5
|
-
module CustomMatchers
|
|
6
|
-
|
|
7
|
-
class PathsMatcher
|
|
8
|
-
|
|
9
|
-
attr_accessor :given, :given_contents, :given_base, :to_match, :to_match_contents, :to_match_base
|
|
10
|
-
|
|
11
|
-
def initialize given, options={}
|
|
12
|
-
@given_base = options[:given_base] || options[:relative_to]
|
|
13
|
-
@to_match_base = options[:to_match_base]
|
|
14
|
-
@given = given
|
|
15
|
-
@given_contents = get_contents(given, given_base)
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def matches? to_match
|
|
19
|
-
@to_match = to_match
|
|
20
|
-
@to_match_contents = get_contents(to_match, to_match_base)
|
|
21
|
-
to_match_contents == given_contents
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
def failure_message
|
|
25
|
-
given_string = given_contents.to_a.join("\n\t")
|
|
26
|
-
to_match_string = to_match_contents.to_a.join("\n\t")
|
|
27
|
-
"expected contents to be identical.\n\ngiven #{given.inspect}:\n\t#{given_string}\n\nto match #{to_match}:\n\t#{to_match_string}"
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
def negative_failure_message
|
|
31
|
-
"expected contents of #{given} and #{to_match} to be different"
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
protected
|
|
35
|
-
def get_contents obj, base=nil
|
|
36
|
-
if obj.is_a?(String) || obj.is_a?(Array)
|
|
37
|
-
contents = [obj].flatten.map do |raw_path|
|
|
38
|
-
path = File.expand_path(raw_path)
|
|
39
|
-
if File.directory?(path)
|
|
40
|
-
Dir[path + "/**/*"]
|
|
41
|
-
else
|
|
42
|
-
path
|
|
43
|
-
end
|
|
44
|
-
end.flatten
|
|
45
|
-
else
|
|
46
|
-
# obj is an IMW obj (archive or directory) so it has a
|
|
47
|
-
# contents method
|
|
48
|
-
contents = obj.send(obj.respond_to?(:all_contents) ? :all_contents : :contents)
|
|
49
|
-
end
|
|
50
|
-
if base
|
|
51
|
-
contents.map do |path|
|
|
52
|
-
new_path = path[base.length + 1..-1]
|
|
53
|
-
new_path = nil if !new_path.nil? && new_path.size == 0
|
|
54
|
-
new_path
|
|
55
|
-
end.compact.to_set
|
|
56
|
-
else
|
|
57
|
-
contents.to_set
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def contain_paths_like given, options={}
|
|
63
|
-
PathsMatcher.new(given, options)
|
|
64
|
-
end
|
|
65
|
-
end
|
|
66
|
-
end
|
data/spec/support/random.rb
DELETED
|
@@ -1,213 +0,0 @@
|
|
|
1
|
-
require 'fileutils'
|
|
2
|
-
|
|
3
|
-
module IMWTest
|
|
4
|
-
module Random
|
|
5
|
-
|
|
6
|
-
STRING_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' '] unless defined?(STRING_CHARS)
|
|
7
|
-
TEXT_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ',"\n"] unless defined?(TEXT_CHARS)
|
|
8
|
-
FILENAME_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["-","_",' '] unless defined?(FILENAME_CHARS)
|
|
9
|
-
FILENAME_MAX_LENGTH = 9 unless defined?(FILENAME_MAX_LENGTH)
|
|
10
|
-
TEXT_MAX_LENGTH = 1024 unless defined?(TEXT_MAX_LENGTH)
|
|
11
|
-
EXTENSIONS = [
|
|
12
|
-
[/\.csv$/ , :csv_file],
|
|
13
|
-
[/\.xml$/ , :xml_file],
|
|
14
|
-
[/\.html$/ , :html_file],
|
|
15
|
-
[/\.tar\.gz$/ , :targz_file],
|
|
16
|
-
[/\.tar\.bz2$/ , :tarbz2_file],
|
|
17
|
-
[/\.bz2$/ , :bz2_file],
|
|
18
|
-
[/\.gz$/ , :gz_file],
|
|
19
|
-
[/\.tar$/ , :tar_file],
|
|
20
|
-
[/\.rar$/ , :rar_file],
|
|
21
|
-
[/\.zip$/ , :zip_file]
|
|
22
|
-
] unless defined?(EXTENSIONS)
|
|
23
|
-
# Return a random filename. Optional +length+ to set the maximum
|
|
24
|
-
# length of the filename returned.
|
|
25
|
-
def self.basename options = {}
|
|
26
|
-
length = (options[:length] or FILENAME_MAX_LENGTH)
|
|
27
|
-
filename = (1..length).map { |i| FILENAME_CHARS.choice }.join
|
|
28
|
-
|
|
29
|
-
# filenames beginning with hyphens suck
|
|
30
|
-
while (filename[0,1] == '-') do
|
|
31
|
-
filename[0] = FILENAME_CHARS.choice
|
|
32
|
-
end
|
|
33
|
-
filename
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# Return a random string of text up. Control the length with
|
|
37
|
-
# optional +length+ and also the presence of +newlines+.
|
|
38
|
-
def self.text options = {}
|
|
39
|
-
length = (options[:length] or TEXT_MAX_LENGTH)
|
|
40
|
-
char_pool = options[:newlines] ? TEXT_CHARS : STRING_CHARS
|
|
41
|
-
(1..length).map { |i| char_pool.choice }.join
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
# Create a random file by matching the extension of the given
|
|
45
|
-
# +filename+ or a text file if no match is found.
|
|
46
|
-
def self.file filename
|
|
47
|
-
match = EXTENSIONS.find { |regex,func| regex.match filename }
|
|
48
|
-
match ? self.send(match.last,filename) : self.text_file(filename)
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Create a random text file at +filename+ containing a maximum of
|
|
52
|
-
# +length+ characters.
|
|
53
|
-
def self.text_file filename, options = {}
|
|
54
|
-
File.open(filename,'w') { |f| f.write text(:newlines => true) }
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# Create a comma-separated value file containing random text at
|
|
58
|
-
# +filename+ with the maximum +num_rows+, the given +num_columns+,
|
|
59
|
-
# and the maximum +entry_length+.
|
|
60
|
-
def self.csv_file(filename,num_rows = 500, num_columns = 9, entry_length = 9)
|
|
61
|
-
f = File.open(filename,'w')
|
|
62
|
-
rand(num_rows).times do # rows
|
|
63
|
-
num_columns.times do # columns
|
|
64
|
-
f.write(text(:length => entry_length)) # entry
|
|
65
|
-
f.write ','
|
|
66
|
-
end
|
|
67
|
-
f.write(text(:length => entry_length)) # last entry
|
|
68
|
-
f.write("\n")
|
|
69
|
-
end
|
|
70
|
-
f.close
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
# Create an XML file at +filename+ of the maximum +length+.
|
|
74
|
-
#
|
|
75
|
-
# At the present moment, this file contains random text in a very
|
|
76
|
-
# boring single-element XML tree. Randomizing the tree has not
|
|
77
|
-
# been implemented.
|
|
78
|
-
def self.xml_file filename, options = {}
|
|
79
|
-
options = options.reverse_merge({:max_depth => 5, :starting_depth => 1, :depth => nil, :pretty_print => true})
|
|
80
|
-
File.open(filename,'w') do |file|
|
|
81
|
-
file.write "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
|
82
|
-
file.write "<xml>" + text + "</xml>"
|
|
83
|
-
file.close
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
# Create an HTML file at +filename+ of the maximum +length+.
|
|
89
|
-
#
|
|
90
|
-
# At the present moment, this file contains random text in a very
|
|
91
|
-
# boring bare-bones HTML with a single element body. Randomizing
|
|
92
|
-
# the tree has not been implemented.
|
|
93
|
-
def self.html_file(filename, title_length = 100, body_length = 5000)
|
|
94
|
-
f = File.open(filename,'w')
|
|
95
|
-
f.write "<html><head><title>" + string(title_length) + "</title></head><body>" + string(body_length) + "</body></html>"
|
|
96
|
-
f.close
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
# Create a tar archive at the given +filename+ containing random
|
|
100
|
-
# files.
|
|
101
|
-
def self.tar_file filename
|
|
102
|
-
tmpd = File.dirname(filename) + '/dir'
|
|
103
|
-
directory_with_files(tmpd)
|
|
104
|
-
FileUtils.cd(tmpd) {|dir| system("tar -cf file.tar *") }
|
|
105
|
-
FileUtils.cp(tmpd + "/file.tar",filename)
|
|
106
|
-
FileUtils.rm_rf(tmpd)
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
# Create a tar.gz archive at the given +filename+ containing
|
|
110
|
-
# random files.
|
|
111
|
-
def self.targz_file filename
|
|
112
|
-
tar = File.dirname(filename) + "/file.tar"
|
|
113
|
-
targz = tar + ".gz"
|
|
114
|
-
tar_file tar
|
|
115
|
-
system("gzip #{tar}")
|
|
116
|
-
FileUtils.cp(targz,filename)
|
|
117
|
-
FileUtils.rm(targz)
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
# Create a tar.bz2 archive at the given +filename+ containing
|
|
121
|
-
# random files.
|
|
122
|
-
def self.tarbz2_file filename
|
|
123
|
-
tar = File.dirname(filename) + "/file.tar"
|
|
124
|
-
tarbz2 = tar + ".bz2"
|
|
125
|
-
tar_file tar
|
|
126
|
-
system("bzip2 #{tar}")
|
|
127
|
-
FileUtils.cp(tarbz2,filename)
|
|
128
|
-
FileUtils.rm(tarbz2)
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
# Create a .bz2 file at the given +filename+.
|
|
132
|
-
def self.bz2_file filename
|
|
133
|
-
text_path = File.dirname(filename) + "/fake_file"
|
|
134
|
-
text_file(text_path)
|
|
135
|
-
system("bzip2 #{text_path}")
|
|
136
|
-
FileUtils.mv(text_path + ".bz2", filename)
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
# Create a .gz file at the given +filename+.
|
|
140
|
-
def self.gz_file filename
|
|
141
|
-
text_path = File.dirname(filename) + "/fake_file"
|
|
142
|
-
text_file(text_path)
|
|
143
|
-
system("gzip #{text_path}")
|
|
144
|
-
FileUtils.mv(text_path + ".gz", filename)
|
|
145
|
-
end
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
# Create a compressed rar archive at the given +filename+
|
|
149
|
-
# containing random files.
|
|
150
|
-
def self.rar_file filename
|
|
151
|
-
tmpd = File.dirname(filename) + '/dir'
|
|
152
|
-
directory_with_files(tmpd)
|
|
153
|
-
FileUtils.cd(tmpd) {|dir| system("rar a -o+ -inul file.rar *") }
|
|
154
|
-
FileUtils.cp(tmpd + "/file.rar",filename)
|
|
155
|
-
FileUtils.rm_rf(tmpd)
|
|
156
|
-
end
|
|
157
|
-
|
|
158
|
-
# Create a compressed zip archive at the given +filename+
|
|
159
|
-
# containing random files.
|
|
160
|
-
def self.zip_file filename
|
|
161
|
-
tmpd = File.dirname(filename) + '/dir'
|
|
162
|
-
directory_with_files(tmpd)
|
|
163
|
-
FileUtils.cd(tmpd) {|dir| system("zip -qqr file.zip *") }
|
|
164
|
-
FileUtils.cp(tmpd + "/file.zip",filename)
|
|
165
|
-
FileUtils.rm_rf(tmpd)
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
# Creates +directory+ and fills it with random files containing
|
|
169
|
-
# random data.
|
|
170
|
-
#
|
|
171
|
-
# Options (with their default values in parentheses) include:
|
|
172
|
-
#
|
|
173
|
-
# <tt>:extensions</tt> (<tt>[txt,csv,dat,xml]</tt>):: extensions to use. If an extension is known (see <tt>IMWTest::Random::EXTENSIONS</tt>) then appropriately formatted random data will be used If an extension is not known, it will be treated as text. The extension +dir+ will create a directory which will itself be filled with random files in the same way as its parent.
|
|
174
|
-
# <tt>:max_depth</tt> (3):: maximum depth to nest directories
|
|
175
|
-
# <tt>:starting_depth</tt> (1):: the default depth the parent directory is assumed to have
|
|
176
|
-
# <tt>:num_files</tt> (10):: the maximum number of files per directory
|
|
177
|
-
# <tt>:force</tt> (false):: force overwriting of existing directories
|
|
178
|
-
def self.directory_with_files(directory,options = {})
|
|
179
|
-
directory = File.expand_path(directory)
|
|
180
|
-
options = options.reverse_merge({:extensions => ['txt','csv','dat'],:max_depth => 3,:force => false,:starting_depth => 1, :num_files => 3})
|
|
181
|
-
depth = options[:starting_depth]
|
|
182
|
-
|
|
183
|
-
if File.exist?(directory) then
|
|
184
|
-
if options[:force] then
|
|
185
|
-
FileUtils.rm_rf(directory)
|
|
186
|
-
else
|
|
187
|
-
raise "#{directory} already exists"
|
|
188
|
-
end
|
|
189
|
-
end
|
|
190
|
-
FileUtils.mkdir_p(directory)
|
|
191
|
-
|
|
192
|
-
(rand(options[:num_files]) + 2).times do
|
|
193
|
-
ext = options[:extensions].choice
|
|
194
|
-
name = self.basename
|
|
195
|
-
if ext == 'dir' then
|
|
196
|
-
if depth <= options[:max_depth] then
|
|
197
|
-
newd = directory + '/' + name
|
|
198
|
-
FileUtils.mkdir(newd)
|
|
199
|
-
directory_with_files(newd,options.merge({:starting_depth => (depth + 1)}))
|
|
200
|
-
else
|
|
201
|
-
next
|
|
202
|
-
end
|
|
203
|
-
else
|
|
204
|
-
file(directory + '/' + name + '.' + ext)
|
|
205
|
-
end
|
|
206
|
-
end
|
|
207
|
-
end
|
|
208
|
-
|
|
209
|
-
end
|
|
210
|
-
end
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
require 'set'
|
|
2
|
-
require 'imw/utils'
|
|
3
|
-
|
|
4
|
-
module IMWTest
|
|
5
|
-
module CustomMatchers
|
|
6
|
-
|
|
7
|
-
# Match the contents of two arrays without regard to the order
|
|
8
|
-
# of their elements by treating each as a set.
|
|
9
|
-
class WithoutRegardToOrder
|
|
10
|
-
|
|
11
|
-
private
|
|
12
|
-
def initialize known_array
|
|
13
|
-
@known_array = known_array.to_set
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
public
|
|
17
|
-
def matches? array_to_test
|
|
18
|
-
@array_to_test = array_to_test.to_set
|
|
19
|
-
@array_to_test == @known_array
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def failure_message
|
|
23
|
-
missing_from_array_to_test = "missing from array to test: #{(@known_array - @array_to_test).to_a.quote_items_with "and"}\n"
|
|
24
|
-
missing_from_known_array = "missing from known array: #{(@array_to_test - @known_array).to_a.quote_items_with "and"}\n"
|
|
25
|
-
common_to_both = "common to both: #{(@array_to_test & @known_array).to_a.quote_items_with "and"}\n"
|
|
26
|
-
"expected contents of the arrays to be identical:\n\n#{missing_from_array_to_test}\n#{missing_from_known_array}\n#{common_to_both}"
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
def negative_failure_message
|
|
30
|
-
"expected contents of the arrays to differ."
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# Check that the contents of one array match another without
|
|
35
|
-
# regard to ordering.
|
|
36
|
-
def match_without_regard_to_order known_array
|
|
37
|
-
WithoutRegardToOrder.new(known_array)
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
|