imw 0.2.18 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
data/spec/imw/resource_spec.rb
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../spec_helper"
|
2
|
-
|
3
|
-
describe IMW::Resource do
|
4
|
-
|
5
|
-
describe "handling missing methods" do
|
6
|
-
before do
|
7
|
-
@resource = IMW::Resource.new('/home/foof.txt', :no_modules => true)
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should return false when querying with a method that isn't defined" do
|
11
|
-
@resource.is_remote?.should be_false
|
12
|
-
end
|
13
|
-
|
14
|
-
it "should raise an IMW::NoMethodError in any other case" do
|
15
|
-
lambda { @resource.do_seomthing }.should raise_error(IMW::NoMethodError)
|
16
|
-
end
|
17
|
-
|
18
|
-
it "should print the modules it's been extended by when raising an IMW::NoMethodError" do
|
19
|
-
begin
|
20
|
-
@resource.extend(IMW::Schemes::Local::Base)
|
21
|
-
@resource.do_something
|
22
|
-
rescue IMW::NoMethodError => e
|
23
|
-
e.message.should match(/extended by IMW::Schemes::Local::Base/)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
@@ -1,67 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
2
|
-
|
3
|
-
describe IMW::Schemes::HDFS do
|
4
|
-
before do
|
5
|
-
def fake_hdfs_resource path, options={}
|
6
|
-
if options == false
|
7
|
-
response = ""
|
8
|
-
else
|
9
|
-
response = " #{options[:dirs] || 0} #{options[:files] || 1} #{options[:size] || 1000} hdfs://localhost#{path}"
|
10
|
-
end
|
11
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:count, path).at_least(:once).and_return(response)
|
12
|
-
IMW.open("hdfs://#{path}")
|
13
|
-
end
|
14
|
-
@path = '/path/to/myfile'
|
15
|
-
end
|
16
|
-
|
17
|
-
describe "refreshing its properties" do
|
18
|
-
it "should correctly get properties for a resource which exists" do
|
19
|
-
resource = fake_hdfs_resource(@path, :dirs => 2, :files => 3, :size => 1000)
|
20
|
-
resource.exist?.should be_true
|
21
|
-
resource.num_dirs.should == 2
|
22
|
-
resource.num_files.should == 3
|
23
|
-
resource.size.should == 1000
|
24
|
-
end
|
25
|
-
|
26
|
-
it "should gracefully handle a resource which doesn't exist" do
|
27
|
-
resource = fake_hdfs_resource(@path, false)
|
28
|
-
resource.exist?.should be_false
|
29
|
-
end
|
30
|
-
|
31
|
-
it "should execute the correct command to delete the path" do
|
32
|
-
resource = fake_hdfs_resource(@path)
|
33
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:rm, resource.path)
|
34
|
-
resource.rm
|
35
|
-
end
|
36
|
-
|
37
|
-
it "should execute the correct command to delete the path when skipping the trash" do
|
38
|
-
resource = fake_hdfs_resource(@path)
|
39
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:rm, '-skipTrash', resource.path)
|
40
|
-
resource.rm :skip_trash => true
|
41
|
-
end
|
42
|
-
|
43
|
-
it "should recognize a file and extend it properly" do
|
44
|
-
resource = fake_hdfs_resource(@path)
|
45
|
-
resource.num_dirs.should == 0
|
46
|
-
resource.num_files.should == 1
|
47
|
-
resource.exist?.should be_true
|
48
|
-
resource.is_directory?.should be_false
|
49
|
-
resource.modules.should include(IMW::Schemes::HDFSFile)
|
50
|
-
end
|
51
|
-
|
52
|
-
it "should recognize a directory and extend it properly" do
|
53
|
-
resource = fake_hdfs_resource(@path, :dirs => 2, :files => 1)
|
54
|
-
resource.num_dirs.should == 2
|
55
|
-
resource.num_files.should == 1
|
56
|
-
resource.exist?.should be_true
|
57
|
-
resource.is_directory?.should be_true
|
58
|
-
resource.modules.should include(IMW::Schemes::HDFSDirectory)
|
59
|
-
end
|
60
|
-
|
61
|
-
it "should be able to join path segments to a directory" do
|
62
|
-
resource = fake_hdfs_resource(@path, :dirs => 2)
|
63
|
-
sub_resource = fake_hdfs_resource("#{@path}/a/b/c")
|
64
|
-
resource.join('a', 'b/c').to_s.should == sub_resource.to_s
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
2
|
-
|
3
|
-
describe IMW::Schemes::HTTP do
|
4
|
-
|
5
|
-
describe "finding its effective basename" do
|
6
|
-
it "should use the real basename when present" do
|
7
|
-
IMW.open('http://www.google.com/foobar').effective_basename.should == 'foobar'
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should use '_index' when at the root (without a slash)" do
|
11
|
-
IMW.open('http://www.google.com').effective_basename.should == '_index'
|
12
|
-
end
|
13
|
-
|
14
|
-
it "should use '_index' when at the root (even when a slash is given)" do
|
15
|
-
IMW.open('http://www.google.com/').effective_basename.should == '_index'
|
16
|
-
end
|
17
|
-
|
18
|
-
end
|
19
|
-
end
|
@@ -1,165 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
2
|
-
|
3
|
-
describe IMW::Schemes::Local::Base do
|
4
|
-
|
5
|
-
it "should not extend a local file with LocalDirectory" do
|
6
|
-
@file = IMW::Resource.new('foo.txt', :no_modules => true)
|
7
|
-
@file.should_not_receive(:extend).with(IMW::Schemes::Local::LocalDirectory)
|
8
|
-
IMW::Resource.extend_instance!(@file)
|
9
|
-
end
|
10
|
-
|
11
|
-
it "should not extend a local directory with LocalFile" do
|
12
|
-
@dir = IMW::Resource.new(IMWTest::TMP_DIR, :no_modules => true)
|
13
|
-
@dir.should_not_receive(:extend).with(IMW::Schemes::Local::LocalFile)
|
14
|
-
IMW::Resource.extend_instance!(@dir)
|
15
|
-
end
|
16
|
-
|
17
|
-
it "should correctly resolve relative paths" do
|
18
|
-
IMW.open('foobar').dirname.should == IMWTest::TMP_DIR
|
19
|
-
end
|
20
|
-
|
21
|
-
it "should be able to return its directory as an IMW object" do
|
22
|
-
IMW.open('/path/to/file').dir.path.should == '/path/to'
|
23
|
-
IMW.open('/').dir.path.should == '/'
|
24
|
-
end
|
25
|
-
|
26
|
-
|
27
|
-
end
|
28
|
-
|
29
|
-
describe IMW::Schemes::Local::LocalFile do
|
30
|
-
before do
|
31
|
-
IMWTest::Random.file('original.txt')
|
32
|
-
@file = IMW::Resource.new('original.txt')
|
33
|
-
end
|
34
|
-
|
35
|
-
it "can delete the file" do
|
36
|
-
@file.rm
|
37
|
-
@file.exist?.should be_false
|
38
|
-
end
|
39
|
-
|
40
|
-
it "can read a file" do
|
41
|
-
@file.read.size.should > 0
|
42
|
-
end
|
43
|
-
|
44
|
-
it "can load the lines of a file" do
|
45
|
-
data = @file.load
|
46
|
-
data.size.should > 0
|
47
|
-
data.class.should == Array
|
48
|
-
end
|
49
|
-
|
50
|
-
it "can iterate over the lines of a file" do
|
51
|
-
@file.load do |line|
|
52
|
-
line.class.should == String
|
53
|
-
break
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
it "can map the lines of a file" do
|
58
|
-
@file.map do |line|
|
59
|
-
line[0..5]
|
60
|
-
end.class.should == Array
|
61
|
-
end
|
62
|
-
|
63
|
-
it "can produce a snippet" do
|
64
|
-
path = IMWTest::DATA_DIR + "/formats/none/sample"
|
65
|
-
# FIXME only look at the first 100 bytes b/c of subsequent non-ascii chars...
|
66
|
-
IMW.open(path).snippet[0..100].should == File.new(path).read(101)
|
67
|
-
end
|
68
|
-
|
69
|
-
it "can produce a summary with size and line count" do
|
70
|
-
@file.summary[:num_lines].should > 0
|
71
|
-
@file.summary[:size].should > 0
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
describe IMW::Schemes::Local::LocalDirectory do
|
76
|
-
before do
|
77
|
-
FileUtils.mkdir_p('dir')
|
78
|
-
FileUtils.mkdir_p('dir/subdir')
|
79
|
-
FileUtils.cd('dir') do
|
80
|
-
IMWTest::Random.file('file1.tsv')
|
81
|
-
IMWTest::Random.file('file2.tsv')
|
82
|
-
FileUtils.cd('subdir') do
|
83
|
-
IMWTest::Random.file('file3.csv')
|
84
|
-
end
|
85
|
-
end
|
86
|
-
@dir = IMW::Resource.new('dir')
|
87
|
-
end
|
88
|
-
|
89
|
-
it "can delete an empty directory" do
|
90
|
-
FileUtils.mkdir('empty')
|
91
|
-
dir = IMW.open('empty')
|
92
|
-
dir.rmdir
|
93
|
-
dir.exist?.should be_false
|
94
|
-
end
|
95
|
-
|
96
|
-
it "can recursively delete a directory" do
|
97
|
-
@dir.rm_rf
|
98
|
-
@dir.exist?.should be_false
|
99
|
-
end
|
100
|
-
|
101
|
-
it "can list its contents" do
|
102
|
-
@dir.contents.size.should == 3
|
103
|
-
end
|
104
|
-
|
105
|
-
it "can list its contents recursively" do
|
106
|
-
@dir.all_contents.size.should == 4
|
107
|
-
end
|
108
|
-
|
109
|
-
it "can list its contents as IMW::Resource objects" do
|
110
|
-
@dir.resources.map(&:class).uniq.first.should == IMW::Resource
|
111
|
-
end
|
112
|
-
|
113
|
-
describe "checking whether it contains other resources" do
|
114
|
-
|
115
|
-
it "should return false for remote paths" do
|
116
|
-
@dir.contains?("http://google.com").should be_false
|
117
|
-
end
|
118
|
-
|
119
|
-
it "should return true for its own path" do
|
120
|
-
@dir.contains?(@dir.path).should be_true
|
121
|
-
end
|
122
|
-
|
123
|
-
it "should return false for a path that doesn't start with its path" do
|
124
|
-
@dir.contains?(File.expand_path('foo')).should be_false
|
125
|
-
end
|
126
|
-
|
127
|
-
it "should return false for a path that starts with its path but doesn't exist" do
|
128
|
-
@dir.contains?(File.expand_path('dir/foo/baz')).should be_false
|
129
|
-
end
|
130
|
-
|
131
|
-
it "should return true for a path that starts with its path and exists" do
|
132
|
-
FileUtils.mkdir_p('dir/foo/baz')
|
133
|
-
@dir.contains?(File.expand_path('dir/foo/baz')).should be_true
|
134
|
-
end
|
135
|
-
|
136
|
-
end
|
137
|
-
|
138
|
-
it "can join with a path" do
|
139
|
-
@dir.join("a", "b/c").to_s.should == File.join(@dir.path, 'a/b/c')
|
140
|
-
end
|
141
|
-
|
142
|
-
it "can create a subdirectory" do
|
143
|
-
@dir.join("mallaco").exist?.should be_false
|
144
|
-
subdir = @dir.subdir!("mallaco")
|
145
|
-
subdir.exist?.should be_true
|
146
|
-
subdir.directory?.should be_true
|
147
|
-
end
|
148
|
-
|
149
|
-
describe 'can package itself to' do
|
150
|
-
['tar', 'tar.bz2', 'tar.gz', 'zip', 'rar'].each do |extension|
|
151
|
-
it "a #{extension} archive" do
|
152
|
-
@dir.package("package.#{extension}").exist?.should be_true # FIXME should explicitly check paths are correct in archive
|
153
|
-
end
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
it "can produce a summary with size and number of files" do
|
158
|
-
@dir.create
|
159
|
-
@dir.summary[:num_files].should == @dir.contents.size
|
160
|
-
@dir.summary[:size].should > 0
|
161
|
-
end
|
162
|
-
|
163
|
-
end
|
164
|
-
|
165
|
-
|
@@ -1,38 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
2
|
-
|
3
|
-
describe IMW::Schemes::Remote::Base do
|
4
|
-
end
|
5
|
-
|
6
|
-
describe IMW::Schemes::Remote::RemoteFile do
|
7
|
-
|
8
|
-
before do
|
9
|
-
# skip modules or else it will automatically become HTML!
|
10
|
-
@file = IMW.open('http://www.google.com', :skip_modules => ["Schemes::HTTP", "Formats::HTML"])
|
11
|
-
end
|
12
|
-
|
13
|
-
describe 'with the file' do
|
14
|
-
|
15
|
-
it "can read a remote file" do
|
16
|
-
@file.read.size.should > 0
|
17
|
-
end
|
18
|
-
|
19
|
-
it "can load the lines of a remote file" do
|
20
|
-
data = @file.load
|
21
|
-
data.size.should > 0
|
22
|
-
data.class.should == Array
|
23
|
-
end
|
24
|
-
|
25
|
-
it "can iterate over the lines of a remote file" do
|
26
|
-
@file.load do |line|
|
27
|
-
line.class.should == String
|
28
|
-
break
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
it "can map the lines of a remote file" do
|
33
|
-
@file.map do |line|
|
34
|
-
line[0..5]
|
35
|
-
end.class.should == Array
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
data/spec/imw/schemes/s3_spec.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
2
|
-
|
3
|
-
describe IMW::Schemes::S3 do
|
4
|
-
|
5
|
-
describe 'manipulating S3 paths' do
|
6
|
-
before do
|
7
|
-
@resource = IMW::Resource.new('s3://mybucket/foobar/foo.txt')
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should set the bucket" do
|
11
|
-
@resource.bucket.should == 'mybucket'
|
12
|
-
end
|
13
|
-
|
14
|
-
it "can generate an S3N url" do
|
15
|
-
@resource.s3n_url.should == 's3n://mybucket/foobar/foo.txt'
|
16
|
-
end
|
17
|
-
|
18
|
-
it "can join path segments" do
|
19
|
-
@resource.join('a', 'b/c').to_s.should == File.join(@resource.to_s, 'a/b/c')
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
describe "reading S3 files" do
|
24
|
-
before { IMW::Schemes::S3.make_connection! }
|
25
|
-
['file', 'file with spaces', 'file with # fragment'].each do |f|
|
26
|
-
it "can read a file named '#{f}' from S3" do
|
27
|
-
IMW::Resource.new("s3://imw.infinitemonkeys.info/spec/schemes/s3/#{f}").read.chomp.should == 'ok'
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
@@ -1,71 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
-
|
3
|
-
describe IMW::Tools::Aggregator do
|
4
|
-
before do
|
5
|
-
@dir = 'agg_dir'
|
6
|
-
FileUtils.mkdir_p(@dir)
|
7
|
-
|
8
|
-
# remote files
|
9
|
-
@homepage = "http://www.google.com"
|
10
|
-
@website = "http://www.google.com/support/"
|
11
|
-
@remote_files = [@homepage, @website]
|
12
|
-
|
13
|
-
# regular files
|
14
|
-
@csv = "foobar-csv.csv"
|
15
|
-
@xml = "foobar-xml.xml"
|
16
|
-
@txt = "foobar-txt.txt"
|
17
|
-
@blah = "foobar"
|
18
|
-
@files = [@csv, @xml, @txt, @blah]
|
19
|
-
|
20
|
-
# compressed files
|
21
|
-
@bz2 = "foobar-bz2.bz2"
|
22
|
-
@gz = "foobar-gz.gz"
|
23
|
-
@compressed_files = [@bz2, @gz]
|
24
|
-
|
25
|
-
# archives
|
26
|
-
@zip = "foobar-zip.zip"
|
27
|
-
@tarbz2 = "foobar-tarbz2.tar.bz2"
|
28
|
-
@targz = "foobar-targz.tar.gz"
|
29
|
-
@tar = "foobar-tar.tar"
|
30
|
-
@rar = "foobar-rar.rar"
|
31
|
-
@archives = [@zip, @tarbz2, @targz, @rar, @tar]
|
32
|
-
|
33
|
-
@local_files = @files + @compressed_files + @archives
|
34
|
-
|
35
|
-
@all_files = @remote_files + @local_files
|
36
|
-
|
37
|
-
@local_files.each do |path|
|
38
|
-
IMWTest::Random.file path
|
39
|
-
end
|
40
|
-
|
41
|
-
@aggregator = IMW::Tools::Aggregator.new @dir
|
42
|
-
end
|
43
|
-
|
44
|
-
it "should copy regular files to its directory" do
|
45
|
-
@aggregator.aggregate *@files
|
46
|
-
@aggregator.dir.path.should contain(*@files)
|
47
|
-
@files.each { |path| IMW.open(path).exist?.should be_true }
|
48
|
-
end
|
49
|
-
|
50
|
-
it "should copy remote files to its archive directory" do
|
51
|
-
@aggregator.aggregate *@remote_files
|
52
|
-
@aggregator.dir.path.should contain('_index', 'support') # _index from Http#effective_basename on http://www.google.com
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should uncompress compressed files to its directory" do
|
56
|
-
@aggregator.aggregate *@compressed_files
|
57
|
-
@aggregator.dir.path.should contain('foobar-bz2', 'foobar-gz')
|
58
|
-
@aggregator.dir.path.should_not contain(*@compressed_files)
|
59
|
-
end
|
60
|
-
|
61
|
-
it "should copy the content of archive files to its archive directory (but not the actual archives)" do
|
62
|
-
@aggregator.aggregate *@archives
|
63
|
-
@archives.each do |archive|
|
64
|
-
@aggregator.dir.path.should_not contain(archive)
|
65
|
-
@aggregator.dir.path.should contain(*IMW.open(archive).contents)
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
end
|
70
|
-
|
71
|
-
|
@@ -1,120 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
-
|
3
|
-
describe IMW::Tools::Archiver do
|
4
|
-
before do
|
5
|
-
@name = 'foobar'
|
6
|
-
|
7
|
-
# remote files
|
8
|
-
@homepage = "http://www.google.com"
|
9
|
-
@website = "http://www.google.com/support/"
|
10
|
-
@remote_files = [@homepage, @website]
|
11
|
-
|
12
|
-
# regular files
|
13
|
-
@csv = "foobar-csv.csv"
|
14
|
-
@xml = "foobar-xml.xml"
|
15
|
-
@txt = "foobar-txt.txt"
|
16
|
-
@blah = "foobar"
|
17
|
-
@files = [@csv, @xml, @txt, @blah]
|
18
|
-
|
19
|
-
# compressed files
|
20
|
-
@bz2 = "foobar-bz2.bz2"
|
21
|
-
@gz = "foobar-gz.gz"
|
22
|
-
@compressed_files = [@bz2, @gz]
|
23
|
-
|
24
|
-
# archives
|
25
|
-
@zip = "foobar-zip.zip"
|
26
|
-
@tarbz2 = "foobar-tarbz2.tar.bz2"
|
27
|
-
@targz = "foobar-targz.tar.gz"
|
28
|
-
@tar = "foobar-tar.tar"
|
29
|
-
@rar = "foobar-rar.rar"
|
30
|
-
@archives = [@zip, @tarbz2, @targz, @rar, @tar]
|
31
|
-
|
32
|
-
@local_files = @files + @compressed_files + @archives
|
33
|
-
|
34
|
-
@all_files = @remote_files + @local_files
|
35
|
-
|
36
|
-
@local_files.each do |path|
|
37
|
-
IMWTest::Random.file path
|
38
|
-
end
|
39
|
-
|
40
|
-
@archiver = IMW::Tools::Archiver.new @name, @all_files
|
41
|
-
end
|
42
|
-
|
43
|
-
after do
|
44
|
-
@archiver.clean!
|
45
|
-
end
|
46
|
-
|
47
|
-
describe "preparing input files" do
|
48
|
-
|
49
|
-
describe "before preparing input files" do
|
50
|
-
it "should not be prepared when initialized" do
|
51
|
-
@archiver.prepared?.should be_false
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
describe "after preparing files" do
|
56
|
-
before { @archiver.prepare! }
|
57
|
-
|
58
|
-
it "should be prepared" do
|
59
|
-
@archiver.prepared?.should be_true
|
60
|
-
end
|
61
|
-
|
62
|
-
it "should name its archive directory properly" do
|
63
|
-
@archiver.tmp_dir.should contain(@name)
|
64
|
-
end
|
65
|
-
|
66
|
-
it "should copy regular files to its archive directory" do
|
67
|
-
@archiver.dir.should contain(*@files)
|
68
|
-
@local_files.each { |path| IMW.open(path).exist?.should be_true }
|
69
|
-
end
|
70
|
-
|
71
|
-
it "should copy remote files to its archive directory" do
|
72
|
-
@archiver.dir.should contain('_index', 'support') # _index from Http#effective_basename on http://www.google.com
|
73
|
-
end
|
74
|
-
|
75
|
-
it "should uncompress compressed files to its archive directory" do
|
76
|
-
@archiver.dir.should contain('foobar-bz2', 'foobar-gz')
|
77
|
-
@archiver.dir.should_not contain(*@compressed_files)
|
78
|
-
end
|
79
|
-
|
80
|
-
it "should copy the content of archive files to its archive directory (but not the actual archives)" do
|
81
|
-
@archives.each do |archive|
|
82
|
-
@archiver.dir.should_not contain(archive)
|
83
|
-
@archiver.dir.should contain(*IMW.open(archive).contents)
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
describe "when packaging files" do
|
91
|
-
@packages = ["package.tar.bz2", "package.zip", "package.tar.gz", "package.tar", "package.rar"]
|
92
|
-
|
93
|
-
@packages.each do |package|
|
94
|
-
it "should create a #{package} file containing all the files and return it" do
|
95
|
-
output = @archiver.package!(package)
|
96
|
-
output.basename.should == package
|
97
|
-
@archiver.tmp_dir.should contain(IMW.open(package).contents)
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
describe 'when packaging into multiple output formats' do
|
102
|
-
|
103
|
-
it "should prepare input files without being asked" do
|
104
|
-
@archiver.prepared?.should be_false
|
105
|
-
@archiver.package! 'package.tar.bz2'
|
106
|
-
@archiver.prepared?.should be_true
|
107
|
-
end
|
108
|
-
|
109
|
-
it "should not prepare input files once they've already been prepared" do
|
110
|
-
@archiver.prepared?.should be_false
|
111
|
-
@archiver.package! 'package.tar.bz2'
|
112
|
-
@archiver.prepared?.should be_true
|
113
|
-
@archiver.should_not_receive(:prepare!)
|
114
|
-
@archiver.package! 'package.tar.gz'
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
|