imw 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +23 -0
- data/Gemfile.lock +47 -0
- data/LICENSE +20 -674
- data/README.rdoc +3 -4
- data/VERSION +1 -1
- data/lib/imw.rb +64 -35
- data/lib/imw/dataset.rb +12 -2
- data/lib/imw/formats.rb +4 -2
- data/lib/imw/formats/delimited.rb +96 -36
- data/lib/imw/formats/excel.rb +69 -101
- data/lib/imw/formats/json.rb +3 -5
- data/lib/imw/formats/pdf.rb +71 -0
- data/lib/imw/formats/yaml.rb +3 -5
- data/lib/imw/metadata.rb +66 -0
- data/lib/imw/metadata/contains_metadata.rb +44 -0
- data/lib/imw/metadata/dsl.rb +111 -0
- data/lib/imw/metadata/field.rb +65 -0
- data/lib/imw/metadata/schema.rb +227 -0
- data/lib/imw/metadata/schematized.rb +27 -0
- data/lib/imw/parsers.rb +1 -0
- data/lib/imw/parsers/flat.rb +44 -0
- data/lib/imw/resource.rb +36 -224
- data/lib/imw/schemes.rb +3 -1
- data/lib/imw/schemes/hdfs.rb +12 -1
- data/lib/imw/schemes/http.rb +1 -2
- data/lib/imw/schemes/local.rb +139 -16
- data/lib/imw/schemes/remote.rb +14 -9
- data/lib/imw/schemes/s3.rb +12 -0
- data/lib/imw/schemes/sql.rb +117 -0
- data/lib/imw/tools.rb +5 -3
- data/lib/imw/tools/downloader.rb +63 -0
- data/lib/imw/tools/summarizer.rb +21 -10
- data/lib/imw/utils.rb +10 -0
- data/lib/imw/utils/dynamically_extendable.rb +137 -0
- data/lib/imw/utils/error.rb +3 -0
- data/lib/imw/utils/extensions.rb +0 -4
- data/lib/imw/utils/extensions/array.rb +6 -7
- data/lib/imw/utils/extensions/hash.rb +3 -5
- data/lib/imw/utils/extensions/string.rb +3 -3
- data/lib/imw/utils/has_uri.rb +114 -0
- data/spec/data/{sample.csv → formats/delimited/sample.csv} +1 -1
- data/spec/data/{sample.tsv → formats/delimited/sample.tsv} +0 -0
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +11 -0
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +16 -0
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +11 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +12 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +13 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +22 -0
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +10 -0
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +15 -0
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +10 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +11 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +12 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +21 -0
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +1 -0
- data/spec/data/formats/none/sample +650 -0
- data/spec/data/formats/sgml/sample.xml +617 -0
- data/spec/data/formats/text/sample.txt +650 -0
- data/spec/data/formats/yaml/sample.yaml +410 -0
- data/spec/data/schema-tabular.yaml +11 -0
- data/spec/imw/formats/delimited_spec.rb +34 -2
- data/spec/imw/formats/excel_spec.rb +55 -0
- data/spec/imw/formats/json_spec.rb +3 -3
- data/spec/imw/formats/sgml_spec.rb +4 -4
- data/spec/imw/formats/yaml_spec.rb +3 -3
- data/spec/imw/metadata/field_spec.rb +26 -0
- data/spec/imw/metadata/schema_spec.rb +27 -0
- data/spec/imw/metadata_spec.rb +39 -0
- data/spec/imw/parsers/line_parser_spec.rb +1 -1
- data/spec/imw/resource_spec.rb +0 -100
- data/spec/imw/schemes/hdfs_spec.rb +19 -13
- data/spec/imw/schemes/local_spec.rb +59 -3
- data/spec/imw/schemes/s3_spec.rb +4 -0
- data/spec/imw/utils/dynamically_extendable_spec.rb +69 -0
- data/spec/imw/utils/has_uri_spec.rb +55 -0
- data/spec/spec_helper.rb +1 -2
- data/spec/support/random.rb +4 -4
- metadata +58 -17
- data/CHANGELOG +0 -0
- data/TODO +0 -18
- data/spec/data/sample.json +0 -782
- data/spec/data/sample.txt +0 -131
- data/spec/data/sample.xml +0 -653
- data/spec/data/sample.yaml +0 -651
- data/spec/spec.opts +0 -4
- data/spec/support/extensions.rb +0 -18
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
+
|
|
3
|
+
describe IMW::Formats::Excel do
|
|
4
|
+
|
|
5
|
+
before do
|
|
6
|
+
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/excel/sample.xls'))
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
it "should be able to parse the Excel document" do
|
|
10
|
+
@sample.load[1].last.should == 'lemurinus'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "should be able to create a snippet" do
|
|
14
|
+
@sample.snippet[1].last.should == 'lemurinus'
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# it "should be able to write CSV" do
|
|
18
|
+
# data = [['foobar', 1, 2], ['bazbooz', 3, 4]]
|
|
19
|
+
# IMW.open!('test.csv').emit(data)
|
|
20
|
+
# IMW.open('test.csv').load[1].last.should == "4"
|
|
21
|
+
# end
|
|
22
|
+
|
|
23
|
+
# it "should raise an error on an invalid schema" do
|
|
24
|
+
# lambda { @sample.schema = [{:name => :foobar, :has_many => {:associations => [:foo, :bar]}}] }.should raise_error(IMW::SchemaError)
|
|
25
|
+
# end
|
|
26
|
+
|
|
27
|
+
# it "should accept a valid schema" do
|
|
28
|
+
# @sample.schema = [:foo, :bar, :baz]
|
|
29
|
+
# @sample.schema.should == [{:name => 'foo'}, {:name => 'bar'}, {:name => 'baz'}]
|
|
30
|
+
# end
|
|
31
|
+
|
|
32
|
+
# describe "guessing a schema" do
|
|
33
|
+
|
|
34
|
+
# Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].each do |path|
|
|
35
|
+
# it "should correctly guess that with_schema/#{File.basename(path)} has headers in its first row" do
|
|
36
|
+
# IMW.open(path).headers_in_first_line?.should == true
|
|
37
|
+
# end
|
|
38
|
+
# end
|
|
39
|
+
|
|
40
|
+
# Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/without_schema/*')].each do |path|
|
|
41
|
+
# it "should correctly guess that without_schema/#{File.basename(path)} does not have headers in its first row" do
|
|
42
|
+
# IMW.open(path).headers_in_first_line?.should == false
|
|
43
|
+
# end
|
|
44
|
+
# end
|
|
45
|
+
|
|
46
|
+
# it "should automatically set the headers on a source with guessed headers" do
|
|
47
|
+
# resource = IMW.open(Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].first)
|
|
48
|
+
# resource.guess_schema!
|
|
49
|
+
# resource.delimited_options[:headers].class.should == Array
|
|
50
|
+
# resource.schema.should_not be_empty
|
|
51
|
+
# end
|
|
52
|
+
|
|
53
|
+
# end
|
|
54
|
+
|
|
55
|
+
end
|
|
@@ -3,15 +3,15 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
|
3
3
|
describe IMW::Formats::Json do
|
|
4
4
|
|
|
5
5
|
before do
|
|
6
|
-
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.json'))
|
|
6
|
+
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/json/sample.json'))
|
|
7
7
|
end
|
|
8
8
|
|
|
9
9
|
it "should be able to parse the JSON" do
|
|
10
|
-
@sample.load.first['id'].should == 1
|
|
10
|
+
@sample.load["Aotus"].first['id'].should == 1
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
it "should be able to write JSON" do
|
|
14
|
-
IMW.open!('test.json').
|
|
14
|
+
IMW.open!('test.json') { |f| f.emit({ 'foobar' => 3, 'bazbooz' => 4 }) }
|
|
15
15
|
IMW.open('test.json').load['foobar'].should == 3
|
|
16
16
|
end
|
|
17
17
|
|
|
@@ -4,21 +4,21 @@ describe IMW::Formats::Xml do
|
|
|
4
4
|
# just spec Xml now as the others are identical
|
|
5
5
|
|
|
6
6
|
before do
|
|
7
|
-
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.xml'))
|
|
7
|
+
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/sgml/sample.xml'))
|
|
8
8
|
end
|
|
9
9
|
|
|
10
10
|
it "should be able to load the XML" do
|
|
11
|
-
((@sample.load/"
|
|
11
|
+
((@sample.load/"genus").first/"name").first.inner_text.should == 'Mandrillus'
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
it "should yield the XML when load is given a block" do
|
|
15
15
|
@sample.load do |xml|
|
|
16
|
-
((xml/"
|
|
16
|
+
((xml/"genus").first/"name").first.inner_text.should == 'Mandrillus'
|
|
17
17
|
end
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
it "should parse the XML" do
|
|
21
|
-
@sample.parse(:
|
|
21
|
+
@sample.parse(:species => ['species[@id]'])[:species].size.should == 130
|
|
22
22
|
end
|
|
23
23
|
end
|
|
24
24
|
|
|
@@ -3,16 +3,16 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
|
3
3
|
describe IMW::Formats::Yaml do
|
|
4
4
|
|
|
5
5
|
before do
|
|
6
|
-
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.yaml'))
|
|
6
|
+
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/yaml/sample.yaml')).load
|
|
7
7
|
end
|
|
8
8
|
|
|
9
9
|
it "should be able to parse the YAML" do
|
|
10
|
-
@sample.first[
|
|
10
|
+
@sample['Lophocebus'].first[:id].should == 94
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
it "should be able to write YAML" do
|
|
14
14
|
data = { 'foobar' => 3, 'bazbooz' => 4 }
|
|
15
|
-
IMW.open!('test.yaml').
|
|
15
|
+
IMW.open!('test.yaml') { |f| f.emit(data) }
|
|
16
16
|
IMW.open('test.yaml').load['foobar'].should == 3
|
|
17
17
|
end
|
|
18
18
|
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
+
|
|
3
|
+
describe IMW::Metadata::Field do
|
|
4
|
+
|
|
5
|
+
describe "initializing" do
|
|
6
|
+
it "should parse a symbol or string into a hash" do
|
|
7
|
+
IMW::Metadata::Field.new(:foobar).should == { :name => "foobar" }
|
|
8
|
+
IMW::Metadata::Field.new('foobar').should == { :name => 'foobar' }
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it "should raise an error on a Hash without a :name key" do
|
|
12
|
+
lambda { IMW::Metadata::Field.new(:foo => 'bar') }.should raise_error(IMW::ArgumentError)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it "should accept a Hash with a :name key" do
|
|
16
|
+
data = { :name => :foobar, :title => "Bazbooz", :unit => "m" }
|
|
17
|
+
IMW::Metadata::Field.new(data).should == data
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it "should dup a field if given one" do
|
|
21
|
+
orig_field = IMW::Metadata::Field.new('foobar')
|
|
22
|
+
IMW::Metadata::Field.new(orig_field).should == orig_field
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
+
|
|
3
|
+
describe IMW::Metadata::Schema do
|
|
4
|
+
|
|
5
|
+
describe "initializing" do
|
|
6
|
+
it "should accept an array" do
|
|
7
|
+
IMW::Metadata::Schema.new([1,2,3]).should == [{:name => '1'}, {:name => '2'}, {:name => '3'}]
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
it "should dup a Schema if given one" do
|
|
11
|
+
orig_schema = IMW::Metadata::Schema.new([1,2,3])
|
|
12
|
+
IMW::Metadata::Schema.new(orig_schema).should == orig_schema
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
describe 'loading' do
|
|
17
|
+
it "should load an Array in a resource" do
|
|
18
|
+
resource = IMW.open('some_resource')
|
|
19
|
+
resource.should_receive(:load).and_return(%w[foo bar baz])
|
|
20
|
+
IMW.should_receive(:open).and_return(resource)
|
|
21
|
+
IMW::Metadata::Schema.load(resource.to_s).map { |field| field[:name] }.should == %w[foo bar baz]
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/../spec_helper"
|
|
2
|
+
|
|
3
|
+
describe IMW::Metadata do
|
|
4
|
+
|
|
5
|
+
describe "initalizing" do
|
|
6
|
+
|
|
7
|
+
it "should accept a hash" do
|
|
8
|
+
IMW::Metadata.new('a' => ['a', 'b']).should == { 'a' => [{:name => 'a'}, {:name => 'b'}] }
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
describe 'loading' do
|
|
13
|
+
|
|
14
|
+
it "should accept a Hash in a resource" do
|
|
15
|
+
data = {'a' => ['a', 'b']}
|
|
16
|
+
resource = IMW.open('some_resource')
|
|
17
|
+
IMW.should_receive(:open).with(resource).and_return(resource)
|
|
18
|
+
resource.should_receive(:load).and_return(data)
|
|
19
|
+
IMW::Metadata.load(resource).should == { 'a' => [{:name => 'a'}, {:name => 'b'}] }
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
describe "constructing absolute URIs" do
|
|
24
|
+
|
|
25
|
+
before { @metadata = IMW::Metadata.new }
|
|
26
|
+
|
|
27
|
+
it "should return the resource given without a base" do
|
|
28
|
+
@metadata.send(:absolute_uri, 'path/to/something').should == 'path/to/something'
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it "should return the absolute URI with a base" do
|
|
32
|
+
path = File.join(IMWTest::TMP_DIR, 'metadata.yaml')
|
|
33
|
+
FileUtils.mkdir_p(path)
|
|
34
|
+
@metadata.base = path
|
|
35
|
+
@metadata.send(:absolute_uri, 'path/to/something').should == File.join(IMWTest::TMP_DIR, '/path/to/something')
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
@@ -4,7 +4,7 @@ require 'ostruct'
|
|
|
4
4
|
describe IMW::Parsers::LineParser do
|
|
5
5
|
|
|
6
6
|
before do
|
|
7
|
-
@path = File.
|
|
7
|
+
@path = File.join(IMWTest::DATA_DIR, 'formats/delimited/sample.csv')
|
|
8
8
|
@file = File.new(@path)
|
|
9
9
|
@fields = [:id, :name, :genus, :species]
|
|
10
10
|
end
|
data/spec/imw/resource_spec.rb
CHANGED
|
@@ -25,106 +25,6 @@ describe IMW::Resource do
|
|
|
25
25
|
end
|
|
26
26
|
end
|
|
27
27
|
|
|
28
|
-
describe "parsing various and sundry URIs should correctly parse a" do
|
|
29
|
-
|
|
30
|
-
before do
|
|
31
|
-
IMW::Resource.should_receive(:extend_resource!).with(an_instance_of(IMW::Resource), {})
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
it "local file path" do
|
|
35
|
-
resource = IMW::Resource.new("/home/foo.txt")
|
|
36
|
-
resource.stub!(:path).and_return("/home/foo.txt")
|
|
37
|
-
|
|
38
|
-
resource.scheme.should be_nil
|
|
39
|
-
resource.dirname.should == '/home'
|
|
40
|
-
resource.basename.should == 'foo.txt'
|
|
41
|
-
resource.extname.should == '.txt'
|
|
42
|
-
resource.extension.should == 'txt'
|
|
43
|
-
resource.name.should == 'foo'
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
it "local file path with spaces in the name" do
|
|
47
|
-
resource = IMW::Resource.new("/home/foo bar.txt")
|
|
48
|
-
resource.stub!(:path).and_return("/home/foo bar.txt")
|
|
49
|
-
resource.name.should == 'foo bar'
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
it "local file path with an explicit file:// scheme" do
|
|
53
|
-
resource = IMW::Resource.new("file:///home/foo.txt")
|
|
54
|
-
resource.scheme.should == 'file'
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
it "web URL with query and fragment" do
|
|
58
|
-
resource = IMW::Resource.new("http://mysite.com/some/page?param=value#frag")
|
|
59
|
-
resource.stub!(:path).and_return("/some/page")
|
|
60
|
-
resource.scheme.should == 'http'
|
|
61
|
-
resource.dirname.should == '/some'
|
|
62
|
-
resource.basename.should == 'page'
|
|
63
|
-
resource.extname.should == ''
|
|
64
|
-
resource.extension.should == ''
|
|
65
|
-
resource.name.should == 'page'
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
it "should open a URI without attempting to extend with modules if so asked" do
|
|
71
|
-
IMW::Resource.should_not_receive(:extend_resource!)
|
|
72
|
-
IMW::Resource.new("/path/to/some/file.txt", :no_modules => true)
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
describe "extending resources with specific modules" do
|
|
76
|
-
before do
|
|
77
|
-
@resource = IMW::Resource.new('http://www.infochimps.com/data', :no_modules => true)
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
it "should use a specific module when asked with a string" do
|
|
81
|
-
IMW::Resource.extend_resource!(@resource, :use_modules => ["Formats::Csv"])
|
|
82
|
-
@resource.resource_modules.should include(IMW::Formats::Csv)
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
it "should use a specific module when asked with a module" do
|
|
86
|
-
IMW::Resource.extend_resource!(@resource, :use_modules => [IMW::Formats::Csv])
|
|
87
|
-
@resource.resource_modules.should include(IMW::Formats::Csv)
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
it "should not use a specific module when asked with a string" do
|
|
91
|
-
IMW::Resource.extend_resource!(@resource, :skip_modules => ["Schemes::HTTP"])
|
|
92
|
-
@resource.resource_modules.should_not include(IMW::Schemes::HTTP)
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
it "should not use a specific module when asked with a module" do
|
|
96
|
-
IMW::Resource.extend_resource!(@resource, :skip_modules => [IMW::Schemes::HTTP])
|
|
97
|
-
@resource.resource_modules.should_not include(IMW::Schemes::HTTP)
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
describe "registering a new handler" do
|
|
103
|
-
after do
|
|
104
|
-
IMW::USER_DEFINED_HANDLERS.delete_if { true }
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
it "should raise an error if the module given isn't a module or string" do
|
|
108
|
-
lambda { IMW.register_handler 3, // }.should raise_error(IMW::ArgumentError)
|
|
109
|
-
lambda { IMW.register_handler "IMW", // }.should_not raise_error(IMW::ArgumentError)
|
|
110
|
-
lambda { IMW.register_handler IMW, // }.should_not raise_error(IMW::ArgumentError)
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
it "should raise an error if the handler given isn't a Regexp, Proc, or true" do
|
|
114
|
-
lambda { IMW.register_handler IMW, 3 }.should raise_error(IMW::ArgumentError)
|
|
115
|
-
lambda { IMW.register_handler IMW, /foo/ }.should_not raise_error(IMW::ArgumentError)
|
|
116
|
-
lambda { IMW.register_handler IMW, Proc.new { |r| true } }.should_not raise_error(IMW::ArgumentError)
|
|
117
|
-
lambda { IMW.register_handler IMW, true }.should_not raise_error(IMW::ArgumentError)
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
it "should use a valid handler when appropriate" do
|
|
121
|
-
NewModule = Module.new
|
|
122
|
-
IMW.register_handler NewModule, /\.foo$/
|
|
123
|
-
IMW.open('/path/to/something.foo').resource_modules.should include(NewModule)
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
end
|
|
127
|
-
|
|
128
28
|
end
|
|
129
29
|
|
|
130
30
|
|
|
@@ -2,13 +2,13 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
|
2
2
|
|
|
3
3
|
describe IMW::Schemes::HDFS do
|
|
4
4
|
before do
|
|
5
|
-
def fake_hdfs_resource path,
|
|
6
|
-
if
|
|
7
|
-
response = " #{num_dirs} #{num_files} #{size} hdfs://localhost#{path}"
|
|
8
|
-
else
|
|
5
|
+
def fake_hdfs_resource path, options={}
|
|
6
|
+
if options == false
|
|
9
7
|
response = ""
|
|
8
|
+
else
|
|
9
|
+
response = " #{options[:dirs] || 0} #{options[:files] || 1} #{options[:size] || 1000} hdfs://localhost#{path}"
|
|
10
10
|
end
|
|
11
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:count, path).and_return(response)
|
|
11
|
+
IMW::Schemes::HDFS.should_receive(:fs).with(:count, path).at_least(:once).and_return(response)
|
|
12
12
|
IMW.open("hdfs://#{path}")
|
|
13
13
|
end
|
|
14
14
|
@path = '/path/to/myfile'
|
|
@@ -16,7 +16,7 @@ describe IMW::Schemes::HDFS do
|
|
|
16
16
|
|
|
17
17
|
describe "refreshing its properties" do
|
|
18
18
|
it "should correctly get properties for a resource which exists" do
|
|
19
|
-
resource = fake_hdfs_resource(@path, 2, 3, 1000)
|
|
19
|
+
resource = fake_hdfs_resource(@path, :dirs => 2, :files => 3, :size => 1000)
|
|
20
20
|
resource.exist?.should be_true
|
|
21
21
|
resource.num_dirs.should == 2
|
|
22
22
|
resource.num_files.should == 3
|
|
@@ -24,38 +24,44 @@ describe IMW::Schemes::HDFS do
|
|
|
24
24
|
end
|
|
25
25
|
|
|
26
26
|
it "should gracefully handle a resource which doesn't exist" do
|
|
27
|
-
resource = fake_hdfs_resource(@path)
|
|
27
|
+
resource = fake_hdfs_resource(@path, false)
|
|
28
28
|
resource.exist?.should be_false
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
it "should execute the correct command to delete the path" do
|
|
32
|
-
resource = fake_hdfs_resource(@path
|
|
32
|
+
resource = fake_hdfs_resource(@path)
|
|
33
33
|
IMW::Schemes::HDFS.should_receive(:fs).with(:rm, resource.path)
|
|
34
34
|
resource.rm
|
|
35
35
|
end
|
|
36
36
|
|
|
37
37
|
it "should execute the correct command to delete the path when skipping the trash" do
|
|
38
|
-
resource = fake_hdfs_resource(@path
|
|
38
|
+
resource = fake_hdfs_resource(@path)
|
|
39
39
|
IMW::Schemes::HDFS.should_receive(:fs).with(:rm, '-skipTrash', resource.path)
|
|
40
40
|
resource.rm :skip_trash => true
|
|
41
41
|
end
|
|
42
42
|
|
|
43
43
|
it "should recognize a file and extend it properly" do
|
|
44
|
-
resource = fake_hdfs_resource(@path
|
|
44
|
+
resource = fake_hdfs_resource(@path)
|
|
45
45
|
resource.num_dirs.should == 0
|
|
46
46
|
resource.num_files.should == 1
|
|
47
47
|
resource.exist?.should be_true
|
|
48
48
|
resource.is_directory?.should be_false
|
|
49
|
-
resource.
|
|
49
|
+
resource.modules.should include(IMW::Schemes::HDFSFile)
|
|
50
50
|
end
|
|
51
51
|
|
|
52
52
|
it "should recognize a directory and extend it properly" do
|
|
53
|
-
resource = fake_hdfs_resource(@path, 2, 1
|
|
53
|
+
resource = fake_hdfs_resource(@path, :dirs => 2, :files => 1)
|
|
54
54
|
resource.num_dirs.should == 2
|
|
55
55
|
resource.num_files.should == 1
|
|
56
56
|
resource.exist?.should be_true
|
|
57
57
|
resource.is_directory?.should be_true
|
|
58
|
-
resource.
|
|
58
|
+
resource.modules.should include(IMW::Schemes::HDFSDirectory)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it "should be able to join path segments to a directory" do
|
|
62
|
+
resource = fake_hdfs_resource(@path, :dirs => 2)
|
|
63
|
+
sub_resource = fake_hdfs_resource("#{@path}/a/b/c")
|
|
64
|
+
resource.join('a', 'b/c').to_s.should == sub_resource.to_s
|
|
59
65
|
end
|
|
60
66
|
end
|
|
61
67
|
end
|
|
@@ -5,13 +5,13 @@ describe IMW::Schemes::Local::Base do
|
|
|
5
5
|
it "should not extend a local file with LocalDirectory" do
|
|
6
6
|
@file = IMW::Resource.new('foo.txt', :no_modules => true)
|
|
7
7
|
@file.should_not_receive(:extend).with(IMW::Schemes::Local::LocalDirectory)
|
|
8
|
-
@file
|
|
8
|
+
IMW::Resource.extend_instance!(@file)
|
|
9
9
|
end
|
|
10
10
|
|
|
11
11
|
it "should not extend a local directory with LocalFile" do
|
|
12
12
|
@dir = IMW::Resource.new(IMWTest::TMP_DIR, :no_modules => true)
|
|
13
13
|
@dir.should_not_receive(:extend).with(IMW::Schemes::Local::LocalFile)
|
|
14
|
-
@dir
|
|
14
|
+
IMW::Resource.extend_instance!(@dir)
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
it "should correctly resolve relative paths" do
|
|
@@ -37,7 +37,6 @@ describe IMW::Schemes::Local::LocalFile do
|
|
|
37
37
|
@file.exist?.should be_false
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
-
|
|
41
40
|
it "can read a file" do
|
|
42
41
|
@file.read.size.should > 0
|
|
43
42
|
end
|
|
@@ -60,6 +59,12 @@ describe IMW::Schemes::Local::LocalFile do
|
|
|
60
59
|
line[0..5]
|
|
61
60
|
end.class.should == Array
|
|
62
61
|
end
|
|
62
|
+
|
|
63
|
+
it "can produce a snippet" do
|
|
64
|
+
path = IMWTest::DATA_DIR + "/formats/none/sample"
|
|
65
|
+
# FIXME only look at the first 100 bytes b/c of subsequent non-ascii chars...
|
|
66
|
+
IMW.open(path).snippet[0..100].should == File.new(path).read(101)
|
|
67
|
+
end
|
|
63
68
|
end
|
|
64
69
|
|
|
65
70
|
describe IMW::Schemes::Local::LocalDirectory do
|
|
@@ -100,6 +105,57 @@ describe IMW::Schemes::Local::LocalDirectory do
|
|
|
100
105
|
@dir.resources.map(&:class).uniq.first.should == IMW::Resource
|
|
101
106
|
end
|
|
102
107
|
|
|
108
|
+
describe "checking whether it contains other resources" do
|
|
109
|
+
|
|
110
|
+
it "should return false for remote paths" do
|
|
111
|
+
@dir.contains?("http://google.com").should be_false
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
it "should return true for its own path" do
|
|
115
|
+
@dir.contains?(@dir.path).should be_true
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
it "should return false for a path that doesn't start with its path" do
|
|
119
|
+
@dir.contains?(File.expand_path('foo')).should be_false
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
it "should return false for a path that starts with its path but doesn't exist" do
|
|
123
|
+
@dir.contains?(File.expand_path('dir/foo/baz')).should be_false
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
it "should return true for a path that starts with its path and exists" do
|
|
127
|
+
FileUtils.mkdir_p('dir/foo/baz')
|
|
128
|
+
@dir.contains?(File.expand_path('dir/foo/baz')).should be_true
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
describe "handling schemata" do
|
|
134
|
+
|
|
135
|
+
it "should recognize a YAML schema file" do
|
|
136
|
+
schemata_path = File.join(@dir.path, 'schema.yaml')
|
|
137
|
+
IMWTest::Random.file(schemata_path)
|
|
138
|
+
@dir.schemata_path.should == schemata_path
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
it "should recognize a JSON schema file" do
|
|
142
|
+
schemata_path = File.join(@dir.path, 'schema.json')
|
|
143
|
+
IMWTest::Random.file(schemata_path)
|
|
144
|
+
@dir.schemata_path.should == schemata_path
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
it "should recognize a funny-named YAML schema file" do
|
|
148
|
+
schemata_path = File.join(@dir.path, 'schema-1838293.yml')
|
|
149
|
+
IMWTest::Random.file(schemata_path)
|
|
150
|
+
@dir.schemata_path.should == schemata_path
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
it "can join with a path" do
|
|
156
|
+
@dir.join("a", "b/c").to_s.should == File.join(@dir.path, 'a/b/c')
|
|
157
|
+
end
|
|
158
|
+
|
|
103
159
|
describe 'can package itself to' do
|
|
104
160
|
['tar', 'tar.bz2', 'tar.gz', 'zip', 'rar'].each do |extension|
|
|
105
161
|
it "a #{extension} archive" do
|