imw 0.2.7 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +23 -0
- data/Gemfile.lock +47 -0
- data/LICENSE +20 -674
- data/README.rdoc +3 -4
- data/VERSION +1 -1
- data/lib/imw.rb +64 -35
- data/lib/imw/dataset.rb +12 -2
- data/lib/imw/formats.rb +4 -2
- data/lib/imw/formats/delimited.rb +96 -36
- data/lib/imw/formats/excel.rb +69 -101
- data/lib/imw/formats/json.rb +3 -5
- data/lib/imw/formats/pdf.rb +71 -0
- data/lib/imw/formats/yaml.rb +3 -5
- data/lib/imw/metadata.rb +66 -0
- data/lib/imw/metadata/contains_metadata.rb +44 -0
- data/lib/imw/metadata/dsl.rb +111 -0
- data/lib/imw/metadata/field.rb +65 -0
- data/lib/imw/metadata/schema.rb +227 -0
- data/lib/imw/metadata/schematized.rb +27 -0
- data/lib/imw/parsers.rb +1 -0
- data/lib/imw/parsers/flat.rb +44 -0
- data/lib/imw/resource.rb +36 -224
- data/lib/imw/schemes.rb +3 -1
- data/lib/imw/schemes/hdfs.rb +12 -1
- data/lib/imw/schemes/http.rb +1 -2
- data/lib/imw/schemes/local.rb +139 -16
- data/lib/imw/schemes/remote.rb +14 -9
- data/lib/imw/schemes/s3.rb +12 -0
- data/lib/imw/schemes/sql.rb +117 -0
- data/lib/imw/tools.rb +5 -3
- data/lib/imw/tools/downloader.rb +63 -0
- data/lib/imw/tools/summarizer.rb +21 -10
- data/lib/imw/utils.rb +10 -0
- data/lib/imw/utils/dynamically_extendable.rb +137 -0
- data/lib/imw/utils/error.rb +3 -0
- data/lib/imw/utils/extensions.rb +0 -4
- data/lib/imw/utils/extensions/array.rb +6 -7
- data/lib/imw/utils/extensions/hash.rb +3 -5
- data/lib/imw/utils/extensions/string.rb +3 -3
- data/lib/imw/utils/has_uri.rb +114 -0
- data/spec/data/{sample.csv → formats/delimited/sample.csv} +1 -1
- data/spec/data/{sample.tsv → formats/delimited/sample.tsv} +0 -0
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +11 -0
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +16 -0
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +11 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +12 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +13 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +22 -0
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +22 -0
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +10 -0
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +15 -0
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +10 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +11 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +12 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +21 -0
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +21 -0
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +1 -0
- data/spec/data/formats/none/sample +650 -0
- data/spec/data/formats/sgml/sample.xml +617 -0
- data/spec/data/formats/text/sample.txt +650 -0
- data/spec/data/formats/yaml/sample.yaml +410 -0
- data/spec/data/schema-tabular.yaml +11 -0
- data/spec/imw/formats/delimited_spec.rb +34 -2
- data/spec/imw/formats/excel_spec.rb +55 -0
- data/spec/imw/formats/json_spec.rb +3 -3
- data/spec/imw/formats/sgml_spec.rb +4 -4
- data/spec/imw/formats/yaml_spec.rb +3 -3
- data/spec/imw/metadata/field_spec.rb +26 -0
- data/spec/imw/metadata/schema_spec.rb +27 -0
- data/spec/imw/metadata_spec.rb +39 -0
- data/spec/imw/parsers/line_parser_spec.rb +1 -1
- data/spec/imw/resource_spec.rb +0 -100
- data/spec/imw/schemes/hdfs_spec.rb +19 -13
- data/spec/imw/schemes/local_spec.rb +59 -3
- data/spec/imw/schemes/s3_spec.rb +4 -0
- data/spec/imw/utils/dynamically_extendable_spec.rb +69 -0
- data/spec/imw/utils/has_uri_spec.rb +55 -0
- data/spec/spec_helper.rb +1 -2
- data/spec/support/random.rb +4 -4
- metadata +58 -17
- data/CHANGELOG +0 -0
- data/TODO +0 -18
- data/spec/data/sample.json +0 -782
- data/spec/data/sample.txt +0 -131
- data/spec/data/sample.xml +0 -653
- data/spec/data/sample.yaml +0 -651
- data/spec/spec.opts +0 -4
- data/spec/support/extensions.rb +0 -18
@@ -0,0 +1,55 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
2
|
+
|
3
|
+
describe IMW::Formats::Excel do
|
4
|
+
|
5
|
+
before do
|
6
|
+
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/excel/sample.xls'))
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should be able to parse the Excel document" do
|
10
|
+
@sample.load[1].last.should == 'lemurinus'
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should be able to create a snippet" do
|
14
|
+
@sample.snippet[1].last.should == 'lemurinus'
|
15
|
+
end
|
16
|
+
|
17
|
+
# it "should be able to write CSV" do
|
18
|
+
# data = [['foobar', 1, 2], ['bazbooz', 3, 4]]
|
19
|
+
# IMW.open!('test.csv').emit(data)
|
20
|
+
# IMW.open('test.csv').load[1].last.should == "4"
|
21
|
+
# end
|
22
|
+
|
23
|
+
# it "should raise an error on an invalid schema" do
|
24
|
+
# lambda { @sample.schema = [{:name => :foobar, :has_many => {:associations => [:foo, :bar]}}] }.should raise_error(IMW::SchemaError)
|
25
|
+
# end
|
26
|
+
|
27
|
+
# it "should accept a valid schema" do
|
28
|
+
# @sample.schema = [:foo, :bar, :baz]
|
29
|
+
# @sample.schema.should == [{:name => 'foo'}, {:name => 'bar'}, {:name => 'baz'}]
|
30
|
+
# end
|
31
|
+
|
32
|
+
# describe "guessing a schema" do
|
33
|
+
|
34
|
+
# Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].each do |path|
|
35
|
+
# it "should correctly guess that with_schema/#{File.basename(path)} has headers in its first row" do
|
36
|
+
# IMW.open(path).headers_in_first_line?.should == true
|
37
|
+
# end
|
38
|
+
# end
|
39
|
+
|
40
|
+
# Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/without_schema/*')].each do |path|
|
41
|
+
# it "should correctly guess that without_schema/#{File.basename(path)} does not have headers in its first row" do
|
42
|
+
# IMW.open(path).headers_in_first_line?.should == false
|
43
|
+
# end
|
44
|
+
# end
|
45
|
+
|
46
|
+
# it "should automatically set the headers on a source with guessed headers" do
|
47
|
+
# resource = IMW.open(Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].first)
|
48
|
+
# resource.guess_schema!
|
49
|
+
# resource.delimited_options[:headers].class.should == Array
|
50
|
+
# resource.schema.should_not be_empty
|
51
|
+
# end
|
52
|
+
|
53
|
+
# end
|
54
|
+
|
55
|
+
end
|
@@ -3,15 +3,15 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
3
3
|
describe IMW::Formats::Json do
|
4
4
|
|
5
5
|
before do
|
6
|
-
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.json'))
|
6
|
+
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/json/sample.json'))
|
7
7
|
end
|
8
8
|
|
9
9
|
it "should be able to parse the JSON" do
|
10
|
-
@sample.load.first['id'].should == 1
|
10
|
+
@sample.load["Aotus"].first['id'].should == 1
|
11
11
|
end
|
12
12
|
|
13
13
|
it "should be able to write JSON" do
|
14
|
-
IMW.open!('test.json').
|
14
|
+
IMW.open!('test.json') { |f| f.emit({ 'foobar' => 3, 'bazbooz' => 4 }) }
|
15
15
|
IMW.open('test.json').load['foobar'].should == 3
|
16
16
|
end
|
17
17
|
|
@@ -4,21 +4,21 @@ describe IMW::Formats::Xml do
|
|
4
4
|
# just spec Xml now as the others are identical
|
5
5
|
|
6
6
|
before do
|
7
|
-
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.xml'))
|
7
|
+
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/sgml/sample.xml'))
|
8
8
|
end
|
9
9
|
|
10
10
|
it "should be able to load the XML" do
|
11
|
-
((@sample.load/"
|
11
|
+
((@sample.load/"genus").first/"name").first.inner_text.should == 'Mandrillus'
|
12
12
|
end
|
13
13
|
|
14
14
|
it "should yield the XML when load is given a block" do
|
15
15
|
@sample.load do |xml|
|
16
|
-
((xml/"
|
16
|
+
((xml/"genus").first/"name").first.inner_text.should == 'Mandrillus'
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
20
|
it "should parse the XML" do
|
21
|
-
@sample.parse(:
|
21
|
+
@sample.parse(:species => ['species[@id]'])[:species].size.should == 130
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
@@ -3,16 +3,16 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
3
3
|
describe IMW::Formats::Yaml do
|
4
4
|
|
5
5
|
before do
|
6
|
-
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.yaml'))
|
6
|
+
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/yaml/sample.yaml')).load
|
7
7
|
end
|
8
8
|
|
9
9
|
it "should be able to parse the YAML" do
|
10
|
-
@sample.first[
|
10
|
+
@sample['Lophocebus'].first[:id].should == 94
|
11
11
|
end
|
12
12
|
|
13
13
|
it "should be able to write YAML" do
|
14
14
|
data = { 'foobar' => 3, 'bazbooz' => 4 }
|
15
|
-
IMW.open!('test.yaml').
|
15
|
+
IMW.open!('test.yaml') { |f| f.emit(data) }
|
16
16
|
IMW.open('test.yaml').load['foobar'].should == 3
|
17
17
|
end
|
18
18
|
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
+
|
3
|
+
describe IMW::Metadata::Field do
|
4
|
+
|
5
|
+
describe "initializing" do
|
6
|
+
it "should parse a symbol or string into a hash" do
|
7
|
+
IMW::Metadata::Field.new(:foobar).should == { :name => "foobar" }
|
8
|
+
IMW::Metadata::Field.new('foobar').should == { :name => 'foobar' }
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should raise an error on a Hash without a :name key" do
|
12
|
+
lambda { IMW::Metadata::Field.new(:foo => 'bar') }.should raise_error(IMW::ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should accept a Hash with a :name key" do
|
16
|
+
data = { :name => :foobar, :title => "Bazbooz", :unit => "m" }
|
17
|
+
IMW::Metadata::Field.new(data).should == data
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should dup a field if given one" do
|
21
|
+
orig_field = IMW::Metadata::Field.new('foobar')
|
22
|
+
IMW::Metadata::Field.new(orig_field).should == orig_field
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
+
|
3
|
+
describe IMW::Metadata::Schema do
|
4
|
+
|
5
|
+
describe "initializing" do
|
6
|
+
it "should accept an array" do
|
7
|
+
IMW::Metadata::Schema.new([1,2,3]).should == [{:name => '1'}, {:name => '2'}, {:name => '3'}]
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should dup a Schema if given one" do
|
11
|
+
orig_schema = IMW::Metadata::Schema.new([1,2,3])
|
12
|
+
IMW::Metadata::Schema.new(orig_schema).should == orig_schema
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe 'loading' do
|
17
|
+
it "should load an Array in a resource" do
|
18
|
+
resource = IMW.open('some_resource')
|
19
|
+
resource.should_receive(:load).and_return(%w[foo bar baz])
|
20
|
+
IMW.should_receive(:open).and_return(resource)
|
21
|
+
IMW::Metadata::Schema.load(resource.to_s).map { |field| field[:name] }.should == %w[foo bar baz]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../spec_helper"
|
2
|
+
|
3
|
+
describe IMW::Metadata do
|
4
|
+
|
5
|
+
describe "initalizing" do
|
6
|
+
|
7
|
+
it "should accept a hash" do
|
8
|
+
IMW::Metadata.new('a' => ['a', 'b']).should == { 'a' => [{:name => 'a'}, {:name => 'b'}] }
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
describe 'loading' do
|
13
|
+
|
14
|
+
it "should accept a Hash in a resource" do
|
15
|
+
data = {'a' => ['a', 'b']}
|
16
|
+
resource = IMW.open('some_resource')
|
17
|
+
IMW.should_receive(:open).with(resource).and_return(resource)
|
18
|
+
resource.should_receive(:load).and_return(data)
|
19
|
+
IMW::Metadata.load(resource).should == { 'a' => [{:name => 'a'}, {:name => 'b'}] }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "constructing absolute URIs" do
|
24
|
+
|
25
|
+
before { @metadata = IMW::Metadata.new }
|
26
|
+
|
27
|
+
it "should return the resource given without a base" do
|
28
|
+
@metadata.send(:absolute_uri, 'path/to/something').should == 'path/to/something'
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should return the absolute URI with a base" do
|
32
|
+
path = File.join(IMWTest::TMP_DIR, 'metadata.yaml')
|
33
|
+
FileUtils.mkdir_p(path)
|
34
|
+
@metadata.base = path
|
35
|
+
@metadata.send(:absolute_uri, 'path/to/something').should == File.join(IMWTest::TMP_DIR, '/path/to/something')
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -4,7 +4,7 @@ require 'ostruct'
|
|
4
4
|
describe IMW::Parsers::LineParser do
|
5
5
|
|
6
6
|
before do
|
7
|
-
@path = File.
|
7
|
+
@path = File.join(IMWTest::DATA_DIR, 'formats/delimited/sample.csv')
|
8
8
|
@file = File.new(@path)
|
9
9
|
@fields = [:id, :name, :genus, :species]
|
10
10
|
end
|
data/spec/imw/resource_spec.rb
CHANGED
@@ -25,106 +25,6 @@ describe IMW::Resource do
|
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
|
-
describe "parsing various and sundry URIs should correctly parse a" do
|
29
|
-
|
30
|
-
before do
|
31
|
-
IMW::Resource.should_receive(:extend_resource!).with(an_instance_of(IMW::Resource), {})
|
32
|
-
end
|
33
|
-
|
34
|
-
it "local file path" do
|
35
|
-
resource = IMW::Resource.new("/home/foo.txt")
|
36
|
-
resource.stub!(:path).and_return("/home/foo.txt")
|
37
|
-
|
38
|
-
resource.scheme.should be_nil
|
39
|
-
resource.dirname.should == '/home'
|
40
|
-
resource.basename.should == 'foo.txt'
|
41
|
-
resource.extname.should == '.txt'
|
42
|
-
resource.extension.should == 'txt'
|
43
|
-
resource.name.should == 'foo'
|
44
|
-
end
|
45
|
-
|
46
|
-
it "local file path with spaces in the name" do
|
47
|
-
resource = IMW::Resource.new("/home/foo bar.txt")
|
48
|
-
resource.stub!(:path).and_return("/home/foo bar.txt")
|
49
|
-
resource.name.should == 'foo bar'
|
50
|
-
end
|
51
|
-
|
52
|
-
it "local file path with an explicit file:// scheme" do
|
53
|
-
resource = IMW::Resource.new("file:///home/foo.txt")
|
54
|
-
resource.scheme.should == 'file'
|
55
|
-
end
|
56
|
-
|
57
|
-
it "web URL with query and fragment" do
|
58
|
-
resource = IMW::Resource.new("http://mysite.com/some/page?param=value#frag")
|
59
|
-
resource.stub!(:path).and_return("/some/page")
|
60
|
-
resource.scheme.should == 'http'
|
61
|
-
resource.dirname.should == '/some'
|
62
|
-
resource.basename.should == 'page'
|
63
|
-
resource.extname.should == ''
|
64
|
-
resource.extension.should == ''
|
65
|
-
resource.name.should == 'page'
|
66
|
-
end
|
67
|
-
|
68
|
-
end
|
69
|
-
|
70
|
-
it "should open a URI without attempting to extend with modules if so asked" do
|
71
|
-
IMW::Resource.should_not_receive(:extend_resource!)
|
72
|
-
IMW::Resource.new("/path/to/some/file.txt", :no_modules => true)
|
73
|
-
end
|
74
|
-
|
75
|
-
describe "extending resources with specific modules" do
|
76
|
-
before do
|
77
|
-
@resource = IMW::Resource.new('http://www.infochimps.com/data', :no_modules => true)
|
78
|
-
end
|
79
|
-
|
80
|
-
it "should use a specific module when asked with a string" do
|
81
|
-
IMW::Resource.extend_resource!(@resource, :use_modules => ["Formats::Csv"])
|
82
|
-
@resource.resource_modules.should include(IMW::Formats::Csv)
|
83
|
-
end
|
84
|
-
|
85
|
-
it "should use a specific module when asked with a module" do
|
86
|
-
IMW::Resource.extend_resource!(@resource, :use_modules => [IMW::Formats::Csv])
|
87
|
-
@resource.resource_modules.should include(IMW::Formats::Csv)
|
88
|
-
end
|
89
|
-
|
90
|
-
it "should not use a specific module when asked with a string" do
|
91
|
-
IMW::Resource.extend_resource!(@resource, :skip_modules => ["Schemes::HTTP"])
|
92
|
-
@resource.resource_modules.should_not include(IMW::Schemes::HTTP)
|
93
|
-
end
|
94
|
-
|
95
|
-
it "should not use a specific module when asked with a module" do
|
96
|
-
IMW::Resource.extend_resource!(@resource, :skip_modules => [IMW::Schemes::HTTP])
|
97
|
-
@resource.resource_modules.should_not include(IMW::Schemes::HTTP)
|
98
|
-
end
|
99
|
-
|
100
|
-
end
|
101
|
-
|
102
|
-
describe "registering a new handler" do
|
103
|
-
after do
|
104
|
-
IMW::USER_DEFINED_HANDLERS.delete_if { true }
|
105
|
-
end
|
106
|
-
|
107
|
-
it "should raise an error if the module given isn't a module or string" do
|
108
|
-
lambda { IMW.register_handler 3, // }.should raise_error(IMW::ArgumentError)
|
109
|
-
lambda { IMW.register_handler "IMW", // }.should_not raise_error(IMW::ArgumentError)
|
110
|
-
lambda { IMW.register_handler IMW, // }.should_not raise_error(IMW::ArgumentError)
|
111
|
-
end
|
112
|
-
|
113
|
-
it "should raise an error if the handler given isn't a Regexp, Proc, or true" do
|
114
|
-
lambda { IMW.register_handler IMW, 3 }.should raise_error(IMW::ArgumentError)
|
115
|
-
lambda { IMW.register_handler IMW, /foo/ }.should_not raise_error(IMW::ArgumentError)
|
116
|
-
lambda { IMW.register_handler IMW, Proc.new { |r| true } }.should_not raise_error(IMW::ArgumentError)
|
117
|
-
lambda { IMW.register_handler IMW, true }.should_not raise_error(IMW::ArgumentError)
|
118
|
-
end
|
119
|
-
|
120
|
-
it "should use a valid handler when appropriate" do
|
121
|
-
NewModule = Module.new
|
122
|
-
IMW.register_handler NewModule, /\.foo$/
|
123
|
-
IMW.open('/path/to/something.foo').resource_modules.should include(NewModule)
|
124
|
-
end
|
125
|
-
|
126
|
-
end
|
127
|
-
|
128
28
|
end
|
129
29
|
|
130
30
|
|
@@ -2,13 +2,13 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
2
|
|
3
3
|
describe IMW::Schemes::HDFS do
|
4
4
|
before do
|
5
|
-
def fake_hdfs_resource path,
|
6
|
-
if
|
7
|
-
response = " #{num_dirs} #{num_files} #{size} hdfs://localhost#{path}"
|
8
|
-
else
|
5
|
+
def fake_hdfs_resource path, options={}
|
6
|
+
if options == false
|
9
7
|
response = ""
|
8
|
+
else
|
9
|
+
response = " #{options[:dirs] || 0} #{options[:files] || 1} #{options[:size] || 1000} hdfs://localhost#{path}"
|
10
10
|
end
|
11
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:count, path).and_return(response)
|
11
|
+
IMW::Schemes::HDFS.should_receive(:fs).with(:count, path).at_least(:once).and_return(response)
|
12
12
|
IMW.open("hdfs://#{path}")
|
13
13
|
end
|
14
14
|
@path = '/path/to/myfile'
|
@@ -16,7 +16,7 @@ describe IMW::Schemes::HDFS do
|
|
16
16
|
|
17
17
|
describe "refreshing its properties" do
|
18
18
|
it "should correctly get properties for a resource which exists" do
|
19
|
-
resource = fake_hdfs_resource(@path, 2, 3, 1000)
|
19
|
+
resource = fake_hdfs_resource(@path, :dirs => 2, :files => 3, :size => 1000)
|
20
20
|
resource.exist?.should be_true
|
21
21
|
resource.num_dirs.should == 2
|
22
22
|
resource.num_files.should == 3
|
@@ -24,38 +24,44 @@ describe IMW::Schemes::HDFS do
|
|
24
24
|
end
|
25
25
|
|
26
26
|
it "should gracefully handle a resource which doesn't exist" do
|
27
|
-
resource = fake_hdfs_resource(@path)
|
27
|
+
resource = fake_hdfs_resource(@path, false)
|
28
28
|
resource.exist?.should be_false
|
29
29
|
end
|
30
30
|
|
31
31
|
it "should execute the correct command to delete the path" do
|
32
|
-
resource = fake_hdfs_resource(@path
|
32
|
+
resource = fake_hdfs_resource(@path)
|
33
33
|
IMW::Schemes::HDFS.should_receive(:fs).with(:rm, resource.path)
|
34
34
|
resource.rm
|
35
35
|
end
|
36
36
|
|
37
37
|
it "should execute the correct command to delete the path when skipping the trash" do
|
38
|
-
resource = fake_hdfs_resource(@path
|
38
|
+
resource = fake_hdfs_resource(@path)
|
39
39
|
IMW::Schemes::HDFS.should_receive(:fs).with(:rm, '-skipTrash', resource.path)
|
40
40
|
resource.rm :skip_trash => true
|
41
41
|
end
|
42
42
|
|
43
43
|
it "should recognize a file and extend it properly" do
|
44
|
-
resource = fake_hdfs_resource(@path
|
44
|
+
resource = fake_hdfs_resource(@path)
|
45
45
|
resource.num_dirs.should == 0
|
46
46
|
resource.num_files.should == 1
|
47
47
|
resource.exist?.should be_true
|
48
48
|
resource.is_directory?.should be_false
|
49
|
-
resource.
|
49
|
+
resource.modules.should include(IMW::Schemes::HDFSFile)
|
50
50
|
end
|
51
51
|
|
52
52
|
it "should recognize a directory and extend it properly" do
|
53
|
-
resource = fake_hdfs_resource(@path, 2, 1
|
53
|
+
resource = fake_hdfs_resource(@path, :dirs => 2, :files => 1)
|
54
54
|
resource.num_dirs.should == 2
|
55
55
|
resource.num_files.should == 1
|
56
56
|
resource.exist?.should be_true
|
57
57
|
resource.is_directory?.should be_true
|
58
|
-
resource.
|
58
|
+
resource.modules.should include(IMW::Schemes::HDFSDirectory)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should be able to join path segments to a directory" do
|
62
|
+
resource = fake_hdfs_resource(@path, :dirs => 2)
|
63
|
+
sub_resource = fake_hdfs_resource("#{@path}/a/b/c")
|
64
|
+
resource.join('a', 'b/c').to_s.should == sub_resource.to_s
|
59
65
|
end
|
60
66
|
end
|
61
67
|
end
|
@@ -5,13 +5,13 @@ describe IMW::Schemes::Local::Base do
|
|
5
5
|
it "should not extend a local file with LocalDirectory" do
|
6
6
|
@file = IMW::Resource.new('foo.txt', :no_modules => true)
|
7
7
|
@file.should_not_receive(:extend).with(IMW::Schemes::Local::LocalDirectory)
|
8
|
-
@file
|
8
|
+
IMW::Resource.extend_instance!(@file)
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should not extend a local directory with LocalFile" do
|
12
12
|
@dir = IMW::Resource.new(IMWTest::TMP_DIR, :no_modules => true)
|
13
13
|
@dir.should_not_receive(:extend).with(IMW::Schemes::Local::LocalFile)
|
14
|
-
@dir
|
14
|
+
IMW::Resource.extend_instance!(@dir)
|
15
15
|
end
|
16
16
|
|
17
17
|
it "should correctly resolve relative paths" do
|
@@ -37,7 +37,6 @@ describe IMW::Schemes::Local::LocalFile do
|
|
37
37
|
@file.exist?.should be_false
|
38
38
|
end
|
39
39
|
|
40
|
-
|
41
40
|
it "can read a file" do
|
42
41
|
@file.read.size.should > 0
|
43
42
|
end
|
@@ -60,6 +59,12 @@ describe IMW::Schemes::Local::LocalFile do
|
|
60
59
|
line[0..5]
|
61
60
|
end.class.should == Array
|
62
61
|
end
|
62
|
+
|
63
|
+
it "can produce a snippet" do
|
64
|
+
path = IMWTest::DATA_DIR + "/formats/none/sample"
|
65
|
+
# FIXME only look at the first 100 bytes b/c of subsequent non-ascii chars...
|
66
|
+
IMW.open(path).snippet[0..100].should == File.new(path).read(101)
|
67
|
+
end
|
63
68
|
end
|
64
69
|
|
65
70
|
describe IMW::Schemes::Local::LocalDirectory do
|
@@ -100,6 +105,57 @@ describe IMW::Schemes::Local::LocalDirectory do
|
|
100
105
|
@dir.resources.map(&:class).uniq.first.should == IMW::Resource
|
101
106
|
end
|
102
107
|
|
108
|
+
describe "checking whether it contains other resources" do
|
109
|
+
|
110
|
+
it "should return false for remote paths" do
|
111
|
+
@dir.contains?("http://google.com").should be_false
|
112
|
+
end
|
113
|
+
|
114
|
+
it "should return true for its own path" do
|
115
|
+
@dir.contains?(@dir.path).should be_true
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should return false for a path that doesn't start with its path" do
|
119
|
+
@dir.contains?(File.expand_path('foo')).should be_false
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should return false for a path that starts with its path but doesn't exist" do
|
123
|
+
@dir.contains?(File.expand_path('dir/foo/baz')).should be_false
|
124
|
+
end
|
125
|
+
|
126
|
+
it "should return true for a path that starts with its path and exists" do
|
127
|
+
FileUtils.mkdir_p('dir/foo/baz')
|
128
|
+
@dir.contains?(File.expand_path('dir/foo/baz')).should be_true
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
describe "handling schemata" do
|
134
|
+
|
135
|
+
it "should recognize a YAML schema file" do
|
136
|
+
schemata_path = File.join(@dir.path, 'schema.yaml')
|
137
|
+
IMWTest::Random.file(schemata_path)
|
138
|
+
@dir.schemata_path.should == schemata_path
|
139
|
+
end
|
140
|
+
|
141
|
+
it "should recognize a JSON schema file" do
|
142
|
+
schemata_path = File.join(@dir.path, 'schema.json')
|
143
|
+
IMWTest::Random.file(schemata_path)
|
144
|
+
@dir.schemata_path.should == schemata_path
|
145
|
+
end
|
146
|
+
|
147
|
+
it "should recognize a funny-named YAML schema file" do
|
148
|
+
schemata_path = File.join(@dir.path, 'schema-1838293.yml')
|
149
|
+
IMWTest::Random.file(schemata_path)
|
150
|
+
@dir.schemata_path.should == schemata_path
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
it "can join with a path" do
|
156
|
+
@dir.join("a", "b/c").to_s.should == File.join(@dir.path, 'a/b/c')
|
157
|
+
end
|
158
|
+
|
103
159
|
describe 'can package itself to' do
|
104
160
|
['tar', 'tar.bz2', 'tar.gz', 'zip', 'rar'].each do |extension|
|
105
161
|
it "a #{extension} archive" do
|