imw 0.2.18 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
require File.dirname(__FILE__) + "/../archives_spec"
|
|
3
|
-
require File.dirname(__FILE__) + "/../compressed_files_spec"
|
|
4
|
-
|
|
5
|
-
describe IMW::Archives::Tarbz2 do
|
|
6
|
-
@cannot_append = true
|
|
7
|
-
before do
|
|
8
|
-
@extension = 'tar.bz2'
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
it_should_behave_like 'an archive of files'
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
describe IMW::Archives::Tarbz2 do
|
|
15
|
-
before do
|
|
16
|
-
@extension = 'tar.bz2'
|
|
17
|
-
end
|
|
18
|
-
it_should_behave_like 'a compressed file'
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
require File.dirname(__FILE__) + "/../archives_spec"
|
|
3
|
-
require File.dirname(__FILE__) + "/../compressed_files_spec"
|
|
4
|
-
|
|
5
|
-
describe IMW::Archives::Targz do
|
|
6
|
-
@cannot_append = true
|
|
7
|
-
before do
|
|
8
|
-
@extension = 'tar.gz'
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
it_should_behave_like 'an archive of files'
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
describe IMW::Archives::Targz do
|
|
16
|
-
before do
|
|
17
|
-
@extension = 'tar.gz'
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
it_should_behave_like 'a compressed file'
|
|
21
|
-
end
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
require File.dirname(__FILE__) + "/../archives_spec"
|
|
3
|
-
|
|
4
|
-
describe IMW::Archives::Zip do
|
|
5
|
-
|
|
6
|
-
before do
|
|
7
|
-
@extension = 'zip'
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it_should_behave_like 'an archive of files'
|
|
11
|
-
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
data/spec/imw/archives_spec.rb
DELETED
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../spec_helper')
|
|
2
|
-
|
|
3
|
-
# To use this shared example group define instance variables
|
|
4
|
-
# <tt>@extension</tt> and <tt>@cannot_append</tt> in your tests:
|
|
5
|
-
#
|
|
6
|
-
# @cannot_append = true
|
|
7
|
-
# before do
|
|
8
|
-
# # Notice that there is NO leading '.'
|
|
9
|
-
# @extension = 'tar.bz2'
|
|
10
|
-
# end
|
|
11
|
-
#
|
|
12
|
-
# it_should_behave_like "an archive of files"
|
|
13
|
-
#
|
|
14
|
-
# The <tt>@extension</tt> should correspond to an IMW module with a
|
|
15
|
-
# registered handler.
|
|
16
|
-
#
|
|
17
|
-
# If <tt>@cannot_append</tt> evaluates to true then the specs for
|
|
18
|
-
# appending to files will check for an error (this is because one
|
|
19
|
-
# typically cannot append to compressed archives). This instance
|
|
20
|
-
# variable should be defined OUTSIDE a before block.
|
|
21
|
-
|
|
22
|
-
share_examples_for "an archive of files" do
|
|
23
|
-
|
|
24
|
-
before do
|
|
25
|
-
@root = File.join(IMWTest::TMP_DIR, 'an_archive_of_files_shared_example_group')
|
|
26
|
-
@initial_directory = 'initial'
|
|
27
|
-
@appending_directory = 'appending'
|
|
28
|
-
@extraction_directory = 'extraction'
|
|
29
|
-
FileUtils.mkdir_p(@root)
|
|
30
|
-
FileUtils.cd(@root)
|
|
31
|
-
IMWTest::Random.directory_with_files(@initial_directory)
|
|
32
|
-
IMWTest::Random.directory_with_files(@appending_directory)
|
|
33
|
-
FileUtils.mkdir(@extraction_directory)
|
|
34
|
-
@archive = IMW::Resource.new("archive.#{@extension}") # define @extension in another spec
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
it "can create an archive" do
|
|
38
|
-
@archive.create(*Dir[@initial_directory + '/**/*'])
|
|
39
|
-
@archive.should contain_paths_like(@initial_directory, :relative_to => @root)
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
it "returns an IMW resource when creating" do
|
|
43
|
-
@archive.create(*Dir[@initial_directory + '/**/*']).class.should == IMW::Resource
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
if @cannot_append
|
|
47
|
-
it "cannot append to an archive which already exists" do
|
|
48
|
-
@archive.create(*Dir[@initial_directory + "/**/*"])
|
|
49
|
-
lambda { @archive.append(*Dir[@appending_directory + "/**/*"]) }.should raise_error(IMW::Error)
|
|
50
|
-
end
|
|
51
|
-
else
|
|
52
|
-
it "can append to an archive which already exists" do
|
|
53
|
-
@archive.create(*Dir[@initial_directory + "/**/*"])
|
|
54
|
-
@archive.append(*Dir[@appending_directory + "/**/*"])
|
|
55
|
-
@archive.should contain_paths_like([@initial_directory,@appending_directory], :relative_to => @root)
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
it "can append to an archive which doesn't already exist" do
|
|
59
|
-
@archive.append(*Dir[@appending_directory + "/**/*"])
|
|
60
|
-
@archive.should contain_paths_like(@appending_directory, :relative_to => @root)
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
it "returns an IMW resource when appending" do
|
|
64
|
-
@archive.append(*Dir[@appending_directory + "/**/*"]).class.should == IMW::Resource
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
it "can extract files which match the original ones it archived" do
|
|
70
|
-
@archive.create(*Dir[@initial_directory + "/**/*"])
|
|
71
|
-
FileUtils.cd @extraction_directory do
|
|
72
|
-
@archive.extract
|
|
73
|
-
end
|
|
74
|
-
@initial_directory.should contain_paths_like(@extraction_directory, :given_base => File.join(@root, @extraction_directory, @initial_directory), :to_match_base => File.join(@root, @initial_directory))
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
end
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
require File.dirname(__FILE__) + "/../compressed_files_spec"
|
|
3
|
-
|
|
4
|
-
describe IMW::CompressedFiles::Bz2 do
|
|
5
|
-
|
|
6
|
-
before do
|
|
7
|
-
@extension = 'bz2'
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it_should_behave_like 'a compressed file'
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
describe IMW::CompressedFiles::Compressible do
|
|
4
|
-
|
|
5
|
-
before do
|
|
6
|
-
IMWTest::Random.file('foobar.txt')
|
|
7
|
-
@resource = IMW::Resource.new('foobar.txt')
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it "should extend a local resource " do
|
|
11
|
-
@resource.is_compressible?.should be_true
|
|
12
|
-
@resource.is_compressed?.should be_false
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
it "can compress a resource in place" do
|
|
16
|
-
compressed_file = @resource.compress!
|
|
17
|
-
|
|
18
|
-
# only the compressed file should now exist
|
|
19
|
-
compressed_file.exist?.should be_true
|
|
20
|
-
@resource.exist?.should be_false
|
|
21
|
-
|
|
22
|
-
compressed_file.is_compressed?.should be_true
|
|
23
|
-
compressed_file.is_compressible?.should be_false
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
it "can compress a resource without overwriting the original file" do
|
|
27
|
-
compressed_file = @resource.compress
|
|
28
|
-
|
|
29
|
-
# both files should now exist
|
|
30
|
-
compressed_file.exist?.should be_true
|
|
31
|
-
@resource.exist?.should be_true
|
|
32
|
-
|
|
33
|
-
compressed_file.is_compressed?.should be_true
|
|
34
|
-
compressed_file.is_compressible?.should be_false
|
|
35
|
-
end
|
|
36
|
-
end
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
require File.dirname(__FILE__) + "/../compressed_files_spec"
|
|
3
|
-
|
|
4
|
-
describe IMW::CompressedFiles::Gz do
|
|
5
|
-
|
|
6
|
-
before do
|
|
7
|
-
@extension = 'gz'
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it_should_behave_like 'a compressed file'
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../spec_helper')
|
|
2
|
-
|
|
3
|
-
# To use this shared example group define an instance variable
|
|
4
|
-
# <tt>@extension</tt> in your tests:
|
|
5
|
-
#
|
|
6
|
-
# before do
|
|
7
|
-
# # Notice that there is NO leading '.'
|
|
8
|
-
# @extension = 'gz'
|
|
9
|
-
# end
|
|
10
|
-
#
|
|
11
|
-
# it_should_behave_like "a compressed file"
|
|
12
|
-
#
|
|
13
|
-
# The <tt>@extension</tt> should correspond to an IMW module with a
|
|
14
|
-
# registered handler.
|
|
15
|
-
|
|
16
|
-
share_examples_for "a compressed file" do
|
|
17
|
-
|
|
18
|
-
before do
|
|
19
|
-
@root = File.join(IMWTest::TMP_DIR, 'a_compressed_file_shared_example_group')
|
|
20
|
-
FileUtils.mkdir_p(@root)
|
|
21
|
-
FileUtils.cd(@root)
|
|
22
|
-
IMWTest::Random.file("compressed_file.#{@extension}") # define @extension in another spec
|
|
23
|
-
@compressed_file = IMW::Resource.new("compressed_file.#{@extension}")
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
it "should know that it is compressed" do
|
|
27
|
-
@compressed_file.is_compressed?.should be_true
|
|
28
|
-
@compressed_file.is_compressible?.should be_false
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
it "can decompress the file in place" do
|
|
32
|
-
uncompressed_file = @compressed_file.decompress!
|
|
33
|
-
@compressed_file.exist?.should be_false
|
|
34
|
-
uncompressed_file.exist?.should be_true
|
|
35
|
-
uncompressed_file.is_compressed?.should be_false
|
|
36
|
-
uncompressed_file.is_compressible?.should be_true
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
it "can decompress the file without deleting the original file" do
|
|
40
|
-
uncompressed_file = @compressed_file.decompress
|
|
41
|
-
@compressed_file.exist?.should be_true
|
|
42
|
-
uncompressed_file.exist?.should be_true
|
|
43
|
-
uncompressed_file.is_compressed?.should be_false
|
|
44
|
-
uncompressed_file.is_compressible?.should be_true
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
end
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
require File.dirname(__FILE__) + "/../utils/shared_paths_spec"
|
|
3
|
-
|
|
4
|
-
describe IMW::Dataset do
|
|
5
|
-
|
|
6
|
-
describe 'setting default paths' do
|
|
7
|
-
|
|
8
|
-
before do
|
|
9
|
-
@dataset = IMW::Dataset.new(:testing, :root => IMWTest::TMP_DIR)
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
it "should set its root path to the value given" do
|
|
13
|
-
@dataset.path_to(:root).should == IMWTest::TMP_DIR
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
it "should set paths for each workflow dir" do
|
|
17
|
-
@dataset.workflow_dirs.each do |dir|
|
|
18
|
-
@dataset.path_to(dir).should == File.join(IMWTest::TMP_DIR, dir.to_s)
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
before do
|
|
23
|
-
@path_manager = @dataset
|
|
24
|
-
end
|
|
25
|
-
it_should_behave_like "an object that manages paths"
|
|
26
|
-
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
require 'imw/dataset/workflow'
|
|
3
|
-
describe IMW::Workflow do
|
|
4
|
-
|
|
5
|
-
before do
|
|
6
|
-
@dataset = IMW::Dataset.new :testing
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
it "should dynamically define methods for each workflow step" do
|
|
10
|
-
@dataset.workflow_steps.each do |step|
|
|
11
|
-
@dataset.respond_to?(step).should be_true
|
|
12
|
-
end
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
describe "initializing workflow" do
|
|
16
|
-
it "should not make any directories if no tasks are invoked" do
|
|
17
|
-
@dataset.path_to(:root).should_not contain(*@dataset.workflow_dirs.map(&:to_s))
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
it "should only make directories once a task is invoked" do
|
|
21
|
-
@dataset[:initialize].invoke
|
|
22
|
-
@dataset.path_to(:root).should contain(*@dataset.workflow_dirs.map(&:to_s))
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
describe "cleaning workflow directories" do
|
|
27
|
-
it "should clean without error even if there's nothing to clean" do
|
|
28
|
-
@dataset[:clean].invoke
|
|
29
|
-
@dataset.path_to(:root).should_not contain(*@dataset.workflow_dirs.map(&:to_s))
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
it "should remove workflow directories when invoked" do
|
|
33
|
-
@dataset[:initialize].invoke
|
|
34
|
-
IMWTest::Random.file(@dataset.path_to(:ripd, 'foobar.txt')) # put a file in
|
|
35
|
-
@dataset[:clean].invoke
|
|
36
|
-
@dataset.path_to(:root).should_not contain(*@dataset.workflow_dirs.map(&:to_s))
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
end
|
|
41
|
-
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
describe IMW::Formats::Csv do
|
|
4
|
-
# we don't test Tsv as the differences from Csv are trivial and
|
|
5
|
-
# effect only code within the FasterCSV library
|
|
6
|
-
|
|
7
|
-
before do
|
|
8
|
-
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/delimited/sample.csv'))
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
it "should be able to parse the CSV" do
|
|
12
|
-
@sample.load[1].last.should == 'lemurinus'
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
it "should be able to write CSV" do
|
|
16
|
-
data = [['foobar', 1, 2], ['bazbooz', 3, 4]]
|
|
17
|
-
IMW.open!('test.csv') { |f| f << data }
|
|
18
|
-
IMW.open('test.csv').load[1].last.should == "4"
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
describe "guessing a schema" do
|
|
22
|
-
|
|
23
|
-
Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].each do |path|
|
|
24
|
-
it "should correctly guess that with_schema/#{File.basename(path)} has headers in its first row" do
|
|
25
|
-
IMW.open(path).fields_in_first_line?.should == true
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/without_schema/*')].each do |path|
|
|
30
|
-
it "should correctly guess that without_schema/#{File.basename(path)} does not have headers in its first row" do
|
|
31
|
-
IMW.open(path).fields_in_first_line?.should == false
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
it "should automatically set the headers on a source with guessed headers" do
|
|
36
|
-
resource = IMW.open(Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].first)
|
|
37
|
-
resource.guess_fields!
|
|
38
|
-
resource.delimited_options[:headers].class.should == Array
|
|
39
|
-
resource.schema.should_not be_empty
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
end
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
describe IMW::Formats::Excel do
|
|
4
|
-
|
|
5
|
-
before do
|
|
6
|
-
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/excel/sample.xls'))
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
it "should be able to parse the Excel document" do
|
|
10
|
-
@sample.load[1].last.should == 'lemurinus'
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
it "should be able to create a snippet" do
|
|
14
|
-
@sample.snippet[1].last.should == 'lemurinus'
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
# it "should be able to write CSV" do
|
|
18
|
-
# data = [['foobar', 1, 2], ['bazbooz', 3, 4]]
|
|
19
|
-
# IMW.open!('test.csv').emit(data)
|
|
20
|
-
# IMW.open('test.csv').load[1].last.should == "4"
|
|
21
|
-
# end
|
|
22
|
-
|
|
23
|
-
# it "should raise an error on an invalid schema" do
|
|
24
|
-
# lambda { @sample.schema = [{:name => :foobar, :has_many => {:associations => [:foo, :bar]}}] }.should raise_error(IMW::SchemaError)
|
|
25
|
-
# end
|
|
26
|
-
|
|
27
|
-
# it "should accept a valid schema" do
|
|
28
|
-
# @sample.schema = [:foo, :bar, :baz]
|
|
29
|
-
# @sample.schema.should == [{:name => 'foo'}, {:name => 'bar'}, {:name => 'baz'}]
|
|
30
|
-
# end
|
|
31
|
-
|
|
32
|
-
# describe "guessing a schema" do
|
|
33
|
-
|
|
34
|
-
# Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].each do |path|
|
|
35
|
-
# it "should correctly guess that with_schema/#{File.basename(path)} has headers in its first row" do
|
|
36
|
-
# IMW.open(path).headers_in_first_line?.should == true
|
|
37
|
-
# end
|
|
38
|
-
# end
|
|
39
|
-
|
|
40
|
-
# Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/without_schema/*')].each do |path|
|
|
41
|
-
# it "should correctly guess that without_schema/#{File.basename(path)} does not have headers in its first row" do
|
|
42
|
-
# IMW.open(path).headers_in_first_line?.should == false
|
|
43
|
-
# end
|
|
44
|
-
# end
|
|
45
|
-
|
|
46
|
-
# it "should automatically set the headers on a source with guessed headers" do
|
|
47
|
-
# resource = IMW.open(Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].first)
|
|
48
|
-
# resource.guess_schema!
|
|
49
|
-
# resource.delimited_options[:headers].class.should == Array
|
|
50
|
-
# resource.schema.should_not be_empty
|
|
51
|
-
# end
|
|
52
|
-
|
|
53
|
-
# end
|
|
54
|
-
|
|
55
|
-
end
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
describe IMW::Formats::Json do
|
|
4
|
-
|
|
5
|
-
before do
|
|
6
|
-
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/json/sample.json'))
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
it "should be able to parse the JSON" do
|
|
10
|
-
@sample.load["Aotus"].first['id'].should == 1
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
it "should be able to write JSON" do
|
|
14
|
-
IMW.open!('test.json') { |f| f.emit({ 'foobar' => 3, 'bazbooz' => 4 }) }
|
|
15
|
-
IMW.open('test.json').load['foobar'].should == 3
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
end
|