imw 0.2.7 → 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. data/Gemfile +23 -0
  2. data/Gemfile.lock +47 -0
  3. data/LICENSE +20 -674
  4. data/README.rdoc +3 -4
  5. data/VERSION +1 -1
  6. data/lib/imw.rb +64 -35
  7. data/lib/imw/dataset.rb +12 -2
  8. data/lib/imw/formats.rb +4 -2
  9. data/lib/imw/formats/delimited.rb +96 -36
  10. data/lib/imw/formats/excel.rb +69 -101
  11. data/lib/imw/formats/json.rb +3 -5
  12. data/lib/imw/formats/pdf.rb +71 -0
  13. data/lib/imw/formats/yaml.rb +3 -5
  14. data/lib/imw/metadata.rb +66 -0
  15. data/lib/imw/metadata/contains_metadata.rb +44 -0
  16. data/lib/imw/metadata/dsl.rb +111 -0
  17. data/lib/imw/metadata/field.rb +65 -0
  18. data/lib/imw/metadata/schema.rb +227 -0
  19. data/lib/imw/metadata/schematized.rb +27 -0
  20. data/lib/imw/parsers.rb +1 -0
  21. data/lib/imw/parsers/flat.rb +44 -0
  22. data/lib/imw/resource.rb +36 -224
  23. data/lib/imw/schemes.rb +3 -1
  24. data/lib/imw/schemes/hdfs.rb +12 -1
  25. data/lib/imw/schemes/http.rb +1 -2
  26. data/lib/imw/schemes/local.rb +139 -16
  27. data/lib/imw/schemes/remote.rb +14 -9
  28. data/lib/imw/schemes/s3.rb +12 -0
  29. data/lib/imw/schemes/sql.rb +117 -0
  30. data/lib/imw/tools.rb +5 -3
  31. data/lib/imw/tools/downloader.rb +63 -0
  32. data/lib/imw/tools/summarizer.rb +21 -10
  33. data/lib/imw/utils.rb +10 -0
  34. data/lib/imw/utils/dynamically_extendable.rb +137 -0
  35. data/lib/imw/utils/error.rb +3 -0
  36. data/lib/imw/utils/extensions.rb +0 -4
  37. data/lib/imw/utils/extensions/array.rb +6 -7
  38. data/lib/imw/utils/extensions/hash.rb +3 -5
  39. data/lib/imw/utils/extensions/string.rb +3 -3
  40. data/lib/imw/utils/has_uri.rb +114 -0
  41. data/spec/data/{sample.csv → formats/delimited/sample.csv} +1 -1
  42. data/spec/data/{sample.tsv → formats/delimited/sample.tsv} +0 -0
  43. data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +11 -0
  44. data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +16 -0
  45. data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +11 -0
  46. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +22 -0
  47. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +22 -0
  48. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +12 -0
  49. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +13 -0
  50. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +22 -0
  51. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +22 -0
  52. data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +10 -0
  53. data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +15 -0
  54. data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +10 -0
  55. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +21 -0
  56. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +21 -0
  57. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +11 -0
  58. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +12 -0
  59. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +21 -0
  60. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +21 -0
  61. data/spec/data/formats/excel/sample.xls +0 -0
  62. data/spec/data/formats/json/sample.json +1 -0
  63. data/spec/data/formats/none/sample +650 -0
  64. data/spec/data/formats/sgml/sample.xml +617 -0
  65. data/spec/data/formats/text/sample.txt +650 -0
  66. data/spec/data/formats/yaml/sample.yaml +410 -0
  67. data/spec/data/schema-tabular.yaml +11 -0
  68. data/spec/imw/formats/delimited_spec.rb +34 -2
  69. data/spec/imw/formats/excel_spec.rb +55 -0
  70. data/spec/imw/formats/json_spec.rb +3 -3
  71. data/spec/imw/formats/sgml_spec.rb +4 -4
  72. data/spec/imw/formats/yaml_spec.rb +3 -3
  73. data/spec/imw/metadata/field_spec.rb +26 -0
  74. data/spec/imw/metadata/schema_spec.rb +27 -0
  75. data/spec/imw/metadata_spec.rb +39 -0
  76. data/spec/imw/parsers/line_parser_spec.rb +1 -1
  77. data/spec/imw/resource_spec.rb +0 -100
  78. data/spec/imw/schemes/hdfs_spec.rb +19 -13
  79. data/spec/imw/schemes/local_spec.rb +59 -3
  80. data/spec/imw/schemes/s3_spec.rb +4 -0
  81. data/spec/imw/utils/dynamically_extendable_spec.rb +69 -0
  82. data/spec/imw/utils/has_uri_spec.rb +55 -0
  83. data/spec/spec_helper.rb +1 -2
  84. data/spec/support/random.rb +4 -4
  85. metadata +58 -17
  86. data/CHANGELOG +0 -0
  87. data/TODO +0 -18
  88. data/spec/data/sample.json +0 -782
  89. data/spec/data/sample.txt +0 -131
  90. data/spec/data/sample.xml +0 -653
  91. data/spec/data/sample.yaml +0 -651
  92. data/spec/spec.opts +0 -4
  93. data/spec/support/extensions.rb +0 -18
@@ -0,0 +1,55 @@
1
+ require File.join(File.dirname(__FILE__),'../../spec_helper')
2
+
3
+ describe IMW::Formats::Excel do
4
+
5
+ before do
6
+ @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/excel/sample.xls'))
7
+ end
8
+
9
+ it "should be able to parse the Excel document" do
10
+ @sample.load[1].last.should == 'lemurinus'
11
+ end
12
+
13
+ it "should be able to create a snippet" do
14
+ @sample.snippet[1].last.should == 'lemurinus'
15
+ end
16
+
17
+ # it "should be able to write CSV" do
18
+ # data = [['foobar', 1, 2], ['bazbooz', 3, 4]]
19
+ # IMW.open!('test.csv').emit(data)
20
+ # IMW.open('test.csv').load[1].last.should == "4"
21
+ # end
22
+
23
+ # it "should raise an error on an invalid schema" do
24
+ # lambda { @sample.schema = [{:name => :foobar, :has_many => {:associations => [:foo, :bar]}}] }.should raise_error(IMW::SchemaError)
25
+ # end
26
+
27
+ # it "should accept a valid schema" do
28
+ # @sample.schema = [:foo, :bar, :baz]
29
+ # @sample.schema.should == [{:name => 'foo'}, {:name => 'bar'}, {:name => 'baz'}]
30
+ # end
31
+
32
+ # describe "guessing a schema" do
33
+
34
+ # Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].each do |path|
35
+ # it "should correctly guess that with_schema/#{File.basename(path)} has headers in its first row" do
36
+ # IMW.open(path).headers_in_first_line?.should == true
37
+ # end
38
+ # end
39
+
40
+ # Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/without_schema/*')].each do |path|
41
+ # it "should correctly guess that without_schema/#{File.basename(path)} does not have headers in its first row" do
42
+ # IMW.open(path).headers_in_first_line?.should == false
43
+ # end
44
+ # end
45
+
46
+ # it "should automatically set the headers on a source with guessed headers" do
47
+ # resource = IMW.open(Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].first)
48
+ # resource.guess_schema!
49
+ # resource.delimited_options[:headers].class.should == Array
50
+ # resource.schema.should_not be_empty
51
+ # end
52
+
53
+ # end
54
+
55
+ end
@@ -3,15 +3,15 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
3
3
  describe IMW::Formats::Json do
4
4
 
5
5
  before do
6
- @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.json'))
6
+ @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/json/sample.json'))
7
7
  end
8
8
 
9
9
  it "should be able to parse the JSON" do
10
- @sample.load.first['id'].should == 1
10
+ @sample.load["Aotus"].first['id'].should == 1
11
11
  end
12
12
 
13
13
  it "should be able to write JSON" do
14
- IMW.open!('test.json').dump({ 'foobar' => 3, 'bazbooz' => 4 })
14
+ IMW.open!('test.json') { |f| f.emit({ 'foobar' => 3, 'bazbooz' => 4 }) }
15
15
  IMW.open('test.json').load['foobar'].should == 3
16
16
  end
17
17
 
@@ -4,21 +4,21 @@ describe IMW::Formats::Xml do
4
4
  # just spec Xml now as the others are identical
5
5
 
6
6
  before do
7
- @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.xml'))
7
+ @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/sgml/sample.xml'))
8
8
  end
9
9
 
10
10
  it "should be able to load the XML" do
11
- ((@sample.load/"monkey").first/"genus").inner_text.should == 'Aotus'
11
+ ((@sample.load/"genus").first/"name").first.inner_text.should == 'Mandrillus'
12
12
  end
13
13
 
14
14
  it "should yield the XML when load is given a block" do
15
15
  @sample.load do |xml|
16
- ((xml/"monkey").first/"genus").inner_text.should == 'Aotus'
16
+ ((xml/"genus").first/"name").first.inner_text.should == 'Mandrillus'
17
17
  end
18
18
  end
19
19
 
20
20
  it "should parse the XML" do
21
- @sample.parse(:monkeys => ['monkey'])[:monkeys].size.should == 130
21
+ @sample.parse(:species => ['species[@id]'])[:species].size.should == 130
22
22
  end
23
23
  end
24
24
 
@@ -3,16 +3,16 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
3
3
  describe IMW::Formats::Yaml do
4
4
 
5
5
  before do
6
- @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.yaml'))
6
+ @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/yaml/sample.yaml')).load
7
7
  end
8
8
 
9
9
  it "should be able to parse the YAML" do
10
- @sample.first['id'].should == 1
10
+ @sample['Lophocebus'].first[:id].should == 94
11
11
  end
12
12
 
13
13
  it "should be able to write YAML" do
14
14
  data = { 'foobar' => 3, 'bazbooz' => 4 }
15
- IMW.open!('test.yaml').dump(data)
15
+ IMW.open!('test.yaml') { |f| f.emit(data) }
16
16
  IMW.open('test.yaml').load['foobar'].should == 3
17
17
  end
18
18
 
@@ -0,0 +1,26 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Metadata::Field do
4
+
5
+ describe "initializing" do
6
+ it "should parse a symbol or string into a hash" do
7
+ IMW::Metadata::Field.new(:foobar).should == { :name => "foobar" }
8
+ IMW::Metadata::Field.new('foobar').should == { :name => 'foobar' }
9
+ end
10
+
11
+ it "should raise an error on a Hash without a :name key" do
12
+ lambda { IMW::Metadata::Field.new(:foo => 'bar') }.should raise_error(IMW::ArgumentError)
13
+ end
14
+
15
+ it "should accept a Hash with a :name key" do
16
+ data = { :name => :foobar, :title => "Bazbooz", :unit => "m" }
17
+ IMW::Metadata::Field.new(data).should == data
18
+ end
19
+
20
+ it "should dup a field if given one" do
21
+ orig_field = IMW::Metadata::Field.new('foobar')
22
+ IMW::Metadata::Field.new(orig_field).should == orig_field
23
+ end
24
+ end
25
+
26
+ end
@@ -0,0 +1,27 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Metadata::Schema do
4
+
5
+ describe "initializing" do
6
+ it "should accept an array" do
7
+ IMW::Metadata::Schema.new([1,2,3]).should == [{:name => '1'}, {:name => '2'}, {:name => '3'}]
8
+ end
9
+
10
+ it "should dup a Schema if given one" do
11
+ orig_schema = IMW::Metadata::Schema.new([1,2,3])
12
+ IMW::Metadata::Schema.new(orig_schema).should == orig_schema
13
+ end
14
+ end
15
+
16
+ describe 'loading' do
17
+ it "should load an Array in a resource" do
18
+ resource = IMW.open('some_resource')
19
+ resource.should_receive(:load).and_return(%w[foo bar baz])
20
+ IMW.should_receive(:open).and_return(resource)
21
+ IMW::Metadata::Schema.load(resource.to_s).map { |field| field[:name] }.should == %w[foo bar baz]
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
@@ -0,0 +1,39 @@
1
+ require File.dirname(__FILE__) + "/../spec_helper"
2
+
3
+ describe IMW::Metadata do
4
+
5
+ describe "initalizing" do
6
+
7
+ it "should accept a hash" do
8
+ IMW::Metadata.new('a' => ['a', 'b']).should == { 'a' => [{:name => 'a'}, {:name => 'b'}] }
9
+ end
10
+ end
11
+
12
+ describe 'loading' do
13
+
14
+ it "should accept a Hash in a resource" do
15
+ data = {'a' => ['a', 'b']}
16
+ resource = IMW.open('some_resource')
17
+ IMW.should_receive(:open).with(resource).and_return(resource)
18
+ resource.should_receive(:load).and_return(data)
19
+ IMW::Metadata.load(resource).should == { 'a' => [{:name => 'a'}, {:name => 'b'}] }
20
+ end
21
+ end
22
+
23
+ describe "constructing absolute URIs" do
24
+
25
+ before { @metadata = IMW::Metadata.new }
26
+
27
+ it "should return the resource given without a base" do
28
+ @metadata.send(:absolute_uri, 'path/to/something').should == 'path/to/something'
29
+ end
30
+
31
+ it "should return the absolute URI with a base" do
32
+ path = File.join(IMWTest::TMP_DIR, 'metadata.yaml')
33
+ FileUtils.mkdir_p(path)
34
+ @metadata.base = path
35
+ @metadata.send(:absolute_uri, 'path/to/something').should == File.join(IMWTest::TMP_DIR, '/path/to/something')
36
+ end
37
+ end
38
+
39
+ end
@@ -4,7 +4,7 @@ require 'ostruct'
4
4
  describe IMW::Parsers::LineParser do
5
5
 
6
6
  before do
7
- @path = File.dirname(__FILE__) + "/../../data/sample.csv"
7
+ @path = File.join(IMWTest::DATA_DIR, 'formats/delimited/sample.csv')
8
8
  @file = File.new(@path)
9
9
  @fields = [:id, :name, :genus, :species]
10
10
  end
@@ -25,106 +25,6 @@ describe IMW::Resource do
25
25
  end
26
26
  end
27
27
 
28
- describe "parsing various and sundry URIs should correctly parse a" do
29
-
30
- before do
31
- IMW::Resource.should_receive(:extend_resource!).with(an_instance_of(IMW::Resource), {})
32
- end
33
-
34
- it "local file path" do
35
- resource = IMW::Resource.new("/home/foo.txt")
36
- resource.stub!(:path).and_return("/home/foo.txt")
37
-
38
- resource.scheme.should be_nil
39
- resource.dirname.should == '/home'
40
- resource.basename.should == 'foo.txt'
41
- resource.extname.should == '.txt'
42
- resource.extension.should == 'txt'
43
- resource.name.should == 'foo'
44
- end
45
-
46
- it "local file path with spaces in the name" do
47
- resource = IMW::Resource.new("/home/foo bar.txt")
48
- resource.stub!(:path).and_return("/home/foo bar.txt")
49
- resource.name.should == 'foo bar'
50
- end
51
-
52
- it "local file path with an explicit file:// scheme" do
53
- resource = IMW::Resource.new("file:///home/foo.txt")
54
- resource.scheme.should == 'file'
55
- end
56
-
57
- it "web URL with query and fragment" do
58
- resource = IMW::Resource.new("http://mysite.com/some/page?param=value#frag")
59
- resource.stub!(:path).and_return("/some/page")
60
- resource.scheme.should == 'http'
61
- resource.dirname.should == '/some'
62
- resource.basename.should == 'page'
63
- resource.extname.should == ''
64
- resource.extension.should == ''
65
- resource.name.should == 'page'
66
- end
67
-
68
- end
69
-
70
- it "should open a URI without attempting to extend with modules if so asked" do
71
- IMW::Resource.should_not_receive(:extend_resource!)
72
- IMW::Resource.new("/path/to/some/file.txt", :no_modules => true)
73
- end
74
-
75
- describe "extending resources with specific modules" do
76
- before do
77
- @resource = IMW::Resource.new('http://www.infochimps.com/data', :no_modules => true)
78
- end
79
-
80
- it "should use a specific module when asked with a string" do
81
- IMW::Resource.extend_resource!(@resource, :use_modules => ["Formats::Csv"])
82
- @resource.resource_modules.should include(IMW::Formats::Csv)
83
- end
84
-
85
- it "should use a specific module when asked with a module" do
86
- IMW::Resource.extend_resource!(@resource, :use_modules => [IMW::Formats::Csv])
87
- @resource.resource_modules.should include(IMW::Formats::Csv)
88
- end
89
-
90
- it "should not use a specific module when asked with a string" do
91
- IMW::Resource.extend_resource!(@resource, :skip_modules => ["Schemes::HTTP"])
92
- @resource.resource_modules.should_not include(IMW::Schemes::HTTP)
93
- end
94
-
95
- it "should not use a specific module when asked with a module" do
96
- IMW::Resource.extend_resource!(@resource, :skip_modules => [IMW::Schemes::HTTP])
97
- @resource.resource_modules.should_not include(IMW::Schemes::HTTP)
98
- end
99
-
100
- end
101
-
102
- describe "registering a new handler" do
103
- after do
104
- IMW::USER_DEFINED_HANDLERS.delete_if { true }
105
- end
106
-
107
- it "should raise an error if the module given isn't a module or string" do
108
- lambda { IMW.register_handler 3, // }.should raise_error(IMW::ArgumentError)
109
- lambda { IMW.register_handler "IMW", // }.should_not raise_error(IMW::ArgumentError)
110
- lambda { IMW.register_handler IMW, // }.should_not raise_error(IMW::ArgumentError)
111
- end
112
-
113
- it "should raise an error if the handler given isn't a Regexp, Proc, or true" do
114
- lambda { IMW.register_handler IMW, 3 }.should raise_error(IMW::ArgumentError)
115
- lambda { IMW.register_handler IMW, /foo/ }.should_not raise_error(IMW::ArgumentError)
116
- lambda { IMW.register_handler IMW, Proc.new { |r| true } }.should_not raise_error(IMW::ArgumentError)
117
- lambda { IMW.register_handler IMW, true }.should_not raise_error(IMW::ArgumentError)
118
- end
119
-
120
- it "should use a valid handler when appropriate" do
121
- NewModule = Module.new
122
- IMW.register_handler NewModule, /\.foo$/
123
- IMW.open('/path/to/something.foo').resource_modules.should include(NewModule)
124
- end
125
-
126
- end
127
-
128
28
  end
129
29
 
130
30
 
@@ -2,13 +2,13 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
2
2
 
3
3
  describe IMW::Schemes::HDFS do
4
4
  before do
5
- def fake_hdfs_resource path, num_dirs=nil, num_files=nil, size=nil
6
- if num_dirs && num_files && size
7
- response = " #{num_dirs} #{num_files} #{size} hdfs://localhost#{path}"
8
- else
5
+ def fake_hdfs_resource path, options={}
6
+ if options == false
9
7
  response = ""
8
+ else
9
+ response = " #{options[:dirs] || 0} #{options[:files] || 1} #{options[:size] || 1000} hdfs://localhost#{path}"
10
10
  end
11
- IMW::Schemes::HDFS.should_receive(:fs).with(:count, path).and_return(response)
11
+ IMW::Schemes::HDFS.should_receive(:fs).with(:count, path).at_least(:once).and_return(response)
12
12
  IMW.open("hdfs://#{path}")
13
13
  end
14
14
  @path = '/path/to/myfile'
@@ -16,7 +16,7 @@ describe IMW::Schemes::HDFS do
16
16
 
17
17
  describe "refreshing its properties" do
18
18
  it "should correctly get properties for a resource which exists" do
19
- resource = fake_hdfs_resource(@path, 2, 3, 1000)
19
+ resource = fake_hdfs_resource(@path, :dirs => 2, :files => 3, :size => 1000)
20
20
  resource.exist?.should be_true
21
21
  resource.num_dirs.should == 2
22
22
  resource.num_files.should == 3
@@ -24,38 +24,44 @@ describe IMW::Schemes::HDFS do
24
24
  end
25
25
 
26
26
  it "should gracefully handle a resource which doesn't exist" do
27
- resource = fake_hdfs_resource(@path)
27
+ resource = fake_hdfs_resource(@path, false)
28
28
  resource.exist?.should be_false
29
29
  end
30
30
 
31
31
  it "should execute the correct command to delete the path" do
32
- resource = fake_hdfs_resource(@path, 2, 3, 1000)
32
+ resource = fake_hdfs_resource(@path)
33
33
  IMW::Schemes::HDFS.should_receive(:fs).with(:rm, resource.path)
34
34
  resource.rm
35
35
  end
36
36
 
37
37
  it "should execute the correct command to delete the path when skipping the trash" do
38
- resource = fake_hdfs_resource(@path, 2, 3, 1000)
38
+ resource = fake_hdfs_resource(@path)
39
39
  IMW::Schemes::HDFS.should_receive(:fs).with(:rm, '-skipTrash', resource.path)
40
40
  resource.rm :skip_trash => true
41
41
  end
42
42
 
43
43
  it "should recognize a file and extend it properly" do
44
- resource = fake_hdfs_resource(@path, 0, 1, 1000)
44
+ resource = fake_hdfs_resource(@path)
45
45
  resource.num_dirs.should == 0
46
46
  resource.num_files.should == 1
47
47
  resource.exist?.should be_true
48
48
  resource.is_directory?.should be_false
49
- resource.resource_modules.should include(IMW::Schemes::HDFSFile)
49
+ resource.modules.should include(IMW::Schemes::HDFSFile)
50
50
  end
51
51
 
52
52
  it "should recognize a directory and extend it properly" do
53
- resource = fake_hdfs_resource(@path, 2, 1, 1000)
53
+ resource = fake_hdfs_resource(@path, :dirs => 2, :files => 1)
54
54
  resource.num_dirs.should == 2
55
55
  resource.num_files.should == 1
56
56
  resource.exist?.should be_true
57
57
  resource.is_directory?.should be_true
58
- resource.resource_modules.should include(IMW::Schemes::HDFSDirectory)
58
+ resource.modules.should include(IMW::Schemes::HDFSDirectory)
59
+ end
60
+
61
+ it "should be able to join path segments to a directory" do
62
+ resource = fake_hdfs_resource(@path, :dirs => 2)
63
+ sub_resource = fake_hdfs_resource("#{@path}/a/b/c")
64
+ resource.join('a', 'b/c').to_s.should == sub_resource.to_s
59
65
  end
60
66
  end
61
67
  end
@@ -5,13 +5,13 @@ describe IMW::Schemes::Local::Base do
5
5
  it "should not extend a local file with LocalDirectory" do
6
6
  @file = IMW::Resource.new('foo.txt', :no_modules => true)
7
7
  @file.should_not_receive(:extend).with(IMW::Schemes::Local::LocalDirectory)
8
- @file.extend_appropriately!
8
+ IMW::Resource.extend_instance!(@file)
9
9
  end
10
10
 
11
11
  it "should not extend a local directory with LocalFile" do
12
12
  @dir = IMW::Resource.new(IMWTest::TMP_DIR, :no_modules => true)
13
13
  @dir.should_not_receive(:extend).with(IMW::Schemes::Local::LocalFile)
14
- @dir.extend_appropriately!
14
+ IMW::Resource.extend_instance!(@dir)
15
15
  end
16
16
 
17
17
  it "should correctly resolve relative paths" do
@@ -37,7 +37,6 @@ describe IMW::Schemes::Local::LocalFile do
37
37
  @file.exist?.should be_false
38
38
  end
39
39
 
40
-
41
40
  it "can read a file" do
42
41
  @file.read.size.should > 0
43
42
  end
@@ -60,6 +59,12 @@ describe IMW::Schemes::Local::LocalFile do
60
59
  line[0..5]
61
60
  end.class.should == Array
62
61
  end
62
+
63
+ it "can produce a snippet" do
64
+ path = IMWTest::DATA_DIR + "/formats/none/sample"
65
+ # FIXME only look at the first 100 bytes b/c of subsequent non-ascii chars...
66
+ IMW.open(path).snippet[0..100].should == File.new(path).read(101)
67
+ end
63
68
  end
64
69
 
65
70
  describe IMW::Schemes::Local::LocalDirectory do
@@ -100,6 +105,57 @@ describe IMW::Schemes::Local::LocalDirectory do
100
105
  @dir.resources.map(&:class).uniq.first.should == IMW::Resource
101
106
  end
102
107
 
108
+ describe "checking whether it contains other resources" do
109
+
110
+ it "should return false for remote paths" do
111
+ @dir.contains?("http://google.com").should be_false
112
+ end
113
+
114
+ it "should return true for its own path" do
115
+ @dir.contains?(@dir.path).should be_true
116
+ end
117
+
118
+ it "should return false for a path that doesn't start with its path" do
119
+ @dir.contains?(File.expand_path('foo')).should be_false
120
+ end
121
+
122
+ it "should return false for a path that starts with its path but doesn't exist" do
123
+ @dir.contains?(File.expand_path('dir/foo/baz')).should be_false
124
+ end
125
+
126
+ it "should return true for a path that starts with its path and exists" do
127
+ FileUtils.mkdir_p('dir/foo/baz')
128
+ @dir.contains?(File.expand_path('dir/foo/baz')).should be_true
129
+ end
130
+
131
+ end
132
+
133
+ describe "handling schemata" do
134
+
135
+ it "should recognize a YAML schema file" do
136
+ schemata_path = File.join(@dir.path, 'schema.yaml')
137
+ IMWTest::Random.file(schemata_path)
138
+ @dir.schemata_path.should == schemata_path
139
+ end
140
+
141
+ it "should recognize a JSON schema file" do
142
+ schemata_path = File.join(@dir.path, 'schema.json')
143
+ IMWTest::Random.file(schemata_path)
144
+ @dir.schemata_path.should == schemata_path
145
+ end
146
+
147
+ it "should recognize a funny-named YAML schema file" do
148
+ schemata_path = File.join(@dir.path, 'schema-1838293.yml')
149
+ IMWTest::Random.file(schemata_path)
150
+ @dir.schemata_path.should == schemata_path
151
+ end
152
+
153
+ end
154
+
155
+ it "can join with a path" do
156
+ @dir.join("a", "b/c").to_s.should == File.join(@dir.path, 'a/b/c')
157
+ end
158
+
103
159
  describe 'can package itself to' do
104
160
  ['tar', 'tar.bz2', 'tar.gz', 'zip', 'rar'].each do |extension|
105
161
  it "a #{extension} archive" do