imw 0.2.18 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
data/spec/imw/resource_spec.rb
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Resource do
|
|
4
|
-
|
|
5
|
-
describe "handling missing methods" do
|
|
6
|
-
before do
|
|
7
|
-
@resource = IMW::Resource.new('/home/foof.txt', :no_modules => true)
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it "should return false when querying with a method that isn't defined" do
|
|
11
|
-
@resource.is_remote?.should be_false
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
it "should raise an IMW::NoMethodError in any other case" do
|
|
15
|
-
lambda { @resource.do_seomthing }.should raise_error(IMW::NoMethodError)
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
it "should print the modules it's been extended by when raising an IMW::NoMethodError" do
|
|
19
|
-
begin
|
|
20
|
-
@resource.extend(IMW::Schemes::Local::Base)
|
|
21
|
-
@resource.do_something
|
|
22
|
-
rescue IMW::NoMethodError => e
|
|
23
|
-
e.message.should match(/extended by IMW::Schemes::Local::Base/)
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
describe IMW::Schemes::HDFS do
|
|
4
|
-
before do
|
|
5
|
-
def fake_hdfs_resource path, options={}
|
|
6
|
-
if options == false
|
|
7
|
-
response = ""
|
|
8
|
-
else
|
|
9
|
-
response = " #{options[:dirs] || 0} #{options[:files] || 1} #{options[:size] || 1000} hdfs://localhost#{path}"
|
|
10
|
-
end
|
|
11
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:count, path).at_least(:once).and_return(response)
|
|
12
|
-
IMW.open("hdfs://#{path}")
|
|
13
|
-
end
|
|
14
|
-
@path = '/path/to/myfile'
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
describe "refreshing its properties" do
|
|
18
|
-
it "should correctly get properties for a resource which exists" do
|
|
19
|
-
resource = fake_hdfs_resource(@path, :dirs => 2, :files => 3, :size => 1000)
|
|
20
|
-
resource.exist?.should be_true
|
|
21
|
-
resource.num_dirs.should == 2
|
|
22
|
-
resource.num_files.should == 3
|
|
23
|
-
resource.size.should == 1000
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
it "should gracefully handle a resource which doesn't exist" do
|
|
27
|
-
resource = fake_hdfs_resource(@path, false)
|
|
28
|
-
resource.exist?.should be_false
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
it "should execute the correct command to delete the path" do
|
|
32
|
-
resource = fake_hdfs_resource(@path)
|
|
33
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:rm, resource.path)
|
|
34
|
-
resource.rm
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
it "should execute the correct command to delete the path when skipping the trash" do
|
|
38
|
-
resource = fake_hdfs_resource(@path)
|
|
39
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:rm, '-skipTrash', resource.path)
|
|
40
|
-
resource.rm :skip_trash => true
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
it "should recognize a file and extend it properly" do
|
|
44
|
-
resource = fake_hdfs_resource(@path)
|
|
45
|
-
resource.num_dirs.should == 0
|
|
46
|
-
resource.num_files.should == 1
|
|
47
|
-
resource.exist?.should be_true
|
|
48
|
-
resource.is_directory?.should be_false
|
|
49
|
-
resource.modules.should include(IMW::Schemes::HDFSFile)
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
it "should recognize a directory and extend it properly" do
|
|
53
|
-
resource = fake_hdfs_resource(@path, :dirs => 2, :files => 1)
|
|
54
|
-
resource.num_dirs.should == 2
|
|
55
|
-
resource.num_files.should == 1
|
|
56
|
-
resource.exist?.should be_true
|
|
57
|
-
resource.is_directory?.should be_true
|
|
58
|
-
resource.modules.should include(IMW::Schemes::HDFSDirectory)
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
it "should be able to join path segments to a directory" do
|
|
62
|
-
resource = fake_hdfs_resource(@path, :dirs => 2)
|
|
63
|
-
sub_resource = fake_hdfs_resource("#{@path}/a/b/c")
|
|
64
|
-
resource.join('a', 'b/c').to_s.should == sub_resource.to_s
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
end
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
describe IMW::Schemes::HTTP do
|
|
4
|
-
|
|
5
|
-
describe "finding its effective basename" do
|
|
6
|
-
it "should use the real basename when present" do
|
|
7
|
-
IMW.open('http://www.google.com/foobar').effective_basename.should == 'foobar'
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it "should use '_index' when at the root (without a slash)" do
|
|
11
|
-
IMW.open('http://www.google.com').effective_basename.should == '_index'
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
it "should use '_index' when at the root (even when a slash is given)" do
|
|
15
|
-
IMW.open('http://www.google.com/').effective_basename.should == '_index'
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
end
|
|
19
|
-
end
|
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
describe IMW::Schemes::Local::Base do
|
|
4
|
-
|
|
5
|
-
it "should not extend a local file with LocalDirectory" do
|
|
6
|
-
@file = IMW::Resource.new('foo.txt', :no_modules => true)
|
|
7
|
-
@file.should_not_receive(:extend).with(IMW::Schemes::Local::LocalDirectory)
|
|
8
|
-
IMW::Resource.extend_instance!(@file)
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
it "should not extend a local directory with LocalFile" do
|
|
12
|
-
@dir = IMW::Resource.new(IMWTest::TMP_DIR, :no_modules => true)
|
|
13
|
-
@dir.should_not_receive(:extend).with(IMW::Schemes::Local::LocalFile)
|
|
14
|
-
IMW::Resource.extend_instance!(@dir)
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
it "should correctly resolve relative paths" do
|
|
18
|
-
IMW.open('foobar').dirname.should == IMWTest::TMP_DIR
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
it "should be able to return its directory as an IMW object" do
|
|
22
|
-
IMW.open('/path/to/file').dir.path.should == '/path/to'
|
|
23
|
-
IMW.open('/').dir.path.should == '/'
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
describe IMW::Schemes::Local::LocalFile do
|
|
30
|
-
before do
|
|
31
|
-
IMWTest::Random.file('original.txt')
|
|
32
|
-
@file = IMW::Resource.new('original.txt')
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
it "can delete the file" do
|
|
36
|
-
@file.rm
|
|
37
|
-
@file.exist?.should be_false
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
it "can read a file" do
|
|
41
|
-
@file.read.size.should > 0
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
it "can load the lines of a file" do
|
|
45
|
-
data = @file.load
|
|
46
|
-
data.size.should > 0
|
|
47
|
-
data.class.should == Array
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
it "can iterate over the lines of a file" do
|
|
51
|
-
@file.load do |line|
|
|
52
|
-
line.class.should == String
|
|
53
|
-
break
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
it "can map the lines of a file" do
|
|
58
|
-
@file.map do |line|
|
|
59
|
-
line[0..5]
|
|
60
|
-
end.class.should == Array
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
it "can produce a snippet" do
|
|
64
|
-
path = IMWTest::DATA_DIR + "/formats/none/sample"
|
|
65
|
-
# FIXME only look at the first 100 bytes b/c of subsequent non-ascii chars...
|
|
66
|
-
IMW.open(path).snippet[0..100].should == File.new(path).read(101)
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
it "can produce a summary with size and line count" do
|
|
70
|
-
@file.summary[:num_lines].should > 0
|
|
71
|
-
@file.summary[:size].should > 0
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
describe IMW::Schemes::Local::LocalDirectory do
|
|
76
|
-
before do
|
|
77
|
-
FileUtils.mkdir_p('dir')
|
|
78
|
-
FileUtils.mkdir_p('dir/subdir')
|
|
79
|
-
FileUtils.cd('dir') do
|
|
80
|
-
IMWTest::Random.file('file1.tsv')
|
|
81
|
-
IMWTest::Random.file('file2.tsv')
|
|
82
|
-
FileUtils.cd('subdir') do
|
|
83
|
-
IMWTest::Random.file('file3.csv')
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
@dir = IMW::Resource.new('dir')
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
it "can delete an empty directory" do
|
|
90
|
-
FileUtils.mkdir('empty')
|
|
91
|
-
dir = IMW.open('empty')
|
|
92
|
-
dir.rmdir
|
|
93
|
-
dir.exist?.should be_false
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
it "can recursively delete a directory" do
|
|
97
|
-
@dir.rm_rf
|
|
98
|
-
@dir.exist?.should be_false
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
it "can list its contents" do
|
|
102
|
-
@dir.contents.size.should == 3
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
it "can list its contents recursively" do
|
|
106
|
-
@dir.all_contents.size.should == 4
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
it "can list its contents as IMW::Resource objects" do
|
|
110
|
-
@dir.resources.map(&:class).uniq.first.should == IMW::Resource
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
describe "checking whether it contains other resources" do
|
|
114
|
-
|
|
115
|
-
it "should return false for remote paths" do
|
|
116
|
-
@dir.contains?("http://google.com").should be_false
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
it "should return true for its own path" do
|
|
120
|
-
@dir.contains?(@dir.path).should be_true
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
it "should return false for a path that doesn't start with its path" do
|
|
124
|
-
@dir.contains?(File.expand_path('foo')).should be_false
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
it "should return false for a path that starts with its path but doesn't exist" do
|
|
128
|
-
@dir.contains?(File.expand_path('dir/foo/baz')).should be_false
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
it "should return true for a path that starts with its path and exists" do
|
|
132
|
-
FileUtils.mkdir_p('dir/foo/baz')
|
|
133
|
-
@dir.contains?(File.expand_path('dir/foo/baz')).should be_true
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
it "can join with a path" do
|
|
139
|
-
@dir.join("a", "b/c").to_s.should == File.join(@dir.path, 'a/b/c')
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
it "can create a subdirectory" do
|
|
143
|
-
@dir.join("mallaco").exist?.should be_false
|
|
144
|
-
subdir = @dir.subdir!("mallaco")
|
|
145
|
-
subdir.exist?.should be_true
|
|
146
|
-
subdir.directory?.should be_true
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
describe 'can package itself to' do
|
|
150
|
-
['tar', 'tar.bz2', 'tar.gz', 'zip', 'rar'].each do |extension|
|
|
151
|
-
it "a #{extension} archive" do
|
|
152
|
-
@dir.package("package.#{extension}").exist?.should be_true # FIXME should explicitly check paths are correct in archive
|
|
153
|
-
end
|
|
154
|
-
end
|
|
155
|
-
end
|
|
156
|
-
|
|
157
|
-
it "can produce a summary with size and number of files" do
|
|
158
|
-
@dir.create
|
|
159
|
-
@dir.summary[:num_files].should == @dir.contents.size
|
|
160
|
-
@dir.summary[:size].should > 0
|
|
161
|
-
end
|
|
162
|
-
|
|
163
|
-
end
|
|
164
|
-
|
|
165
|
-
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
describe IMW::Schemes::Remote::Base do
|
|
4
|
-
end
|
|
5
|
-
|
|
6
|
-
describe IMW::Schemes::Remote::RemoteFile do
|
|
7
|
-
|
|
8
|
-
before do
|
|
9
|
-
# skip modules or else it will automatically become HTML!
|
|
10
|
-
@file = IMW.open('http://www.google.com', :skip_modules => ["Schemes::HTTP", "Formats::HTML"])
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
describe 'with the file' do
|
|
14
|
-
|
|
15
|
-
it "can read a remote file" do
|
|
16
|
-
@file.read.size.should > 0
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
it "can load the lines of a remote file" do
|
|
20
|
-
data = @file.load
|
|
21
|
-
data.size.should > 0
|
|
22
|
-
data.class.should == Array
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
it "can iterate over the lines of a remote file" do
|
|
26
|
-
@file.load do |line|
|
|
27
|
-
line.class.should == String
|
|
28
|
-
break
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
it "can map the lines of a remote file" do
|
|
33
|
-
@file.map do |line|
|
|
34
|
-
line[0..5]
|
|
35
|
-
end.class.should == Array
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
end
|
data/spec/imw/schemes/s3_spec.rb
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
describe IMW::Schemes::S3 do
|
|
4
|
-
|
|
5
|
-
describe 'manipulating S3 paths' do
|
|
6
|
-
before do
|
|
7
|
-
@resource = IMW::Resource.new('s3://mybucket/foobar/foo.txt')
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it "should set the bucket" do
|
|
11
|
-
@resource.bucket.should == 'mybucket'
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
it "can generate an S3N url" do
|
|
15
|
-
@resource.s3n_url.should == 's3n://mybucket/foobar/foo.txt'
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
it "can join path segments" do
|
|
19
|
-
@resource.join('a', 'b/c').to_s.should == File.join(@resource.to_s, 'a/b/c')
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
describe "reading S3 files" do
|
|
24
|
-
before { IMW::Schemes::S3.make_connection! }
|
|
25
|
-
['file', 'file with spaces', 'file with # fragment'].each do |f|
|
|
26
|
-
it "can read a file named '#{f}' from S3" do
|
|
27
|
-
IMW::Resource.new("s3://imw.infinitemonkeys.info/spec/schemes/s3/#{f}").read.chomp.should == 'ok'
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
end
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Tools::Aggregator do
|
|
4
|
-
before do
|
|
5
|
-
@dir = 'agg_dir'
|
|
6
|
-
FileUtils.mkdir_p(@dir)
|
|
7
|
-
|
|
8
|
-
# remote files
|
|
9
|
-
@homepage = "http://www.google.com"
|
|
10
|
-
@website = "http://www.google.com/support/"
|
|
11
|
-
@remote_files = [@homepage, @website]
|
|
12
|
-
|
|
13
|
-
# regular files
|
|
14
|
-
@csv = "foobar-csv.csv"
|
|
15
|
-
@xml = "foobar-xml.xml"
|
|
16
|
-
@txt = "foobar-txt.txt"
|
|
17
|
-
@blah = "foobar"
|
|
18
|
-
@files = [@csv, @xml, @txt, @blah]
|
|
19
|
-
|
|
20
|
-
# compressed files
|
|
21
|
-
@bz2 = "foobar-bz2.bz2"
|
|
22
|
-
@gz = "foobar-gz.gz"
|
|
23
|
-
@compressed_files = [@bz2, @gz]
|
|
24
|
-
|
|
25
|
-
# archives
|
|
26
|
-
@zip = "foobar-zip.zip"
|
|
27
|
-
@tarbz2 = "foobar-tarbz2.tar.bz2"
|
|
28
|
-
@targz = "foobar-targz.tar.gz"
|
|
29
|
-
@tar = "foobar-tar.tar"
|
|
30
|
-
@rar = "foobar-rar.rar"
|
|
31
|
-
@archives = [@zip, @tarbz2, @targz, @rar, @tar]
|
|
32
|
-
|
|
33
|
-
@local_files = @files + @compressed_files + @archives
|
|
34
|
-
|
|
35
|
-
@all_files = @remote_files + @local_files
|
|
36
|
-
|
|
37
|
-
@local_files.each do |path|
|
|
38
|
-
IMWTest::Random.file path
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
@aggregator = IMW::Tools::Aggregator.new @dir
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
it "should copy regular files to its directory" do
|
|
45
|
-
@aggregator.aggregate *@files
|
|
46
|
-
@aggregator.dir.path.should contain(*@files)
|
|
47
|
-
@files.each { |path| IMW.open(path).exist?.should be_true }
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
it "should copy remote files to its archive directory" do
|
|
51
|
-
@aggregator.aggregate *@remote_files
|
|
52
|
-
@aggregator.dir.path.should contain('_index', 'support') # _index from Http#effective_basename on http://www.google.com
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
it "should uncompress compressed files to its directory" do
|
|
56
|
-
@aggregator.aggregate *@compressed_files
|
|
57
|
-
@aggregator.dir.path.should contain('foobar-bz2', 'foobar-gz')
|
|
58
|
-
@aggregator.dir.path.should_not contain(*@compressed_files)
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
it "should copy the content of archive files to its archive directory (but not the actual archives)" do
|
|
62
|
-
@aggregator.aggregate *@archives
|
|
63
|
-
@archives.each do |archive|
|
|
64
|
-
@aggregator.dir.path.should_not contain(archive)
|
|
65
|
-
@aggregator.dir.path.should contain(*IMW.open(archive).contents)
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
|
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Tools::Archiver do
|
|
4
|
-
before do
|
|
5
|
-
@name = 'foobar'
|
|
6
|
-
|
|
7
|
-
# remote files
|
|
8
|
-
@homepage = "http://www.google.com"
|
|
9
|
-
@website = "http://www.google.com/support/"
|
|
10
|
-
@remote_files = [@homepage, @website]
|
|
11
|
-
|
|
12
|
-
# regular files
|
|
13
|
-
@csv = "foobar-csv.csv"
|
|
14
|
-
@xml = "foobar-xml.xml"
|
|
15
|
-
@txt = "foobar-txt.txt"
|
|
16
|
-
@blah = "foobar"
|
|
17
|
-
@files = [@csv, @xml, @txt, @blah]
|
|
18
|
-
|
|
19
|
-
# compressed files
|
|
20
|
-
@bz2 = "foobar-bz2.bz2"
|
|
21
|
-
@gz = "foobar-gz.gz"
|
|
22
|
-
@compressed_files = [@bz2, @gz]
|
|
23
|
-
|
|
24
|
-
# archives
|
|
25
|
-
@zip = "foobar-zip.zip"
|
|
26
|
-
@tarbz2 = "foobar-tarbz2.tar.bz2"
|
|
27
|
-
@targz = "foobar-targz.tar.gz"
|
|
28
|
-
@tar = "foobar-tar.tar"
|
|
29
|
-
@rar = "foobar-rar.rar"
|
|
30
|
-
@archives = [@zip, @tarbz2, @targz, @rar, @tar]
|
|
31
|
-
|
|
32
|
-
@local_files = @files + @compressed_files + @archives
|
|
33
|
-
|
|
34
|
-
@all_files = @remote_files + @local_files
|
|
35
|
-
|
|
36
|
-
@local_files.each do |path|
|
|
37
|
-
IMWTest::Random.file path
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
@archiver = IMW::Tools::Archiver.new @name, @all_files
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
after do
|
|
44
|
-
@archiver.clean!
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
describe "preparing input files" do
|
|
48
|
-
|
|
49
|
-
describe "before preparing input files" do
|
|
50
|
-
it "should not be prepared when initialized" do
|
|
51
|
-
@archiver.prepared?.should be_false
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
describe "after preparing files" do
|
|
56
|
-
before { @archiver.prepare! }
|
|
57
|
-
|
|
58
|
-
it "should be prepared" do
|
|
59
|
-
@archiver.prepared?.should be_true
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
it "should name its archive directory properly" do
|
|
63
|
-
@archiver.tmp_dir.should contain(@name)
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
it "should copy regular files to its archive directory" do
|
|
67
|
-
@archiver.dir.should contain(*@files)
|
|
68
|
-
@local_files.each { |path| IMW.open(path).exist?.should be_true }
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
it "should copy remote files to its archive directory" do
|
|
72
|
-
@archiver.dir.should contain('_index', 'support') # _index from Http#effective_basename on http://www.google.com
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
it "should uncompress compressed files to its archive directory" do
|
|
76
|
-
@archiver.dir.should contain('foobar-bz2', 'foobar-gz')
|
|
77
|
-
@archiver.dir.should_not contain(*@compressed_files)
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
it "should copy the content of archive files to its archive directory (but not the actual archives)" do
|
|
81
|
-
@archives.each do |archive|
|
|
82
|
-
@archiver.dir.should_not contain(archive)
|
|
83
|
-
@archiver.dir.should contain(*IMW.open(archive).contents)
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
end
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
describe "when packaging files" do
|
|
91
|
-
@packages = ["package.tar.bz2", "package.zip", "package.tar.gz", "package.tar", "package.rar"]
|
|
92
|
-
|
|
93
|
-
@packages.each do |package|
|
|
94
|
-
it "should create a #{package} file containing all the files and return it" do
|
|
95
|
-
output = @archiver.package!(package)
|
|
96
|
-
output.basename.should == package
|
|
97
|
-
@archiver.tmp_dir.should contain(IMW.open(package).contents)
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
describe 'when packaging into multiple output formats' do
|
|
102
|
-
|
|
103
|
-
it "should prepare input files without being asked" do
|
|
104
|
-
@archiver.prepared?.should be_false
|
|
105
|
-
@archiver.package! 'package.tar.bz2'
|
|
106
|
-
@archiver.prepared?.should be_true
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
it "should not prepare input files once they've already been prepared" do
|
|
110
|
-
@archiver.prepared?.should be_false
|
|
111
|
-
@archiver.package! 'package.tar.bz2'
|
|
112
|
-
@archiver.prepared?.should be_true
|
|
113
|
-
@archiver.should_not_receive(:prepare!)
|
|
114
|
-
@archiver.package! 'package.tar.gz'
|
|
115
|
-
end
|
|
116
|
-
end
|
|
117
|
-
end
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
|