imw 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. data/Gemfile +23 -0
  2. data/Gemfile.lock +47 -0
  3. data/LICENSE +20 -674
  4. data/README.rdoc +3 -4
  5. data/VERSION +1 -1
  6. data/lib/imw.rb +64 -35
  7. data/lib/imw/dataset.rb +12 -2
  8. data/lib/imw/formats.rb +4 -2
  9. data/lib/imw/formats/delimited.rb +96 -36
  10. data/lib/imw/formats/excel.rb +69 -101
  11. data/lib/imw/formats/json.rb +3 -5
  12. data/lib/imw/formats/pdf.rb +71 -0
  13. data/lib/imw/formats/yaml.rb +3 -5
  14. data/lib/imw/metadata.rb +66 -0
  15. data/lib/imw/metadata/contains_metadata.rb +44 -0
  16. data/lib/imw/metadata/dsl.rb +111 -0
  17. data/lib/imw/metadata/field.rb +65 -0
  18. data/lib/imw/metadata/schema.rb +227 -0
  19. data/lib/imw/metadata/schematized.rb +27 -0
  20. data/lib/imw/parsers.rb +1 -0
  21. data/lib/imw/parsers/flat.rb +44 -0
  22. data/lib/imw/resource.rb +36 -224
  23. data/lib/imw/schemes.rb +3 -1
  24. data/lib/imw/schemes/hdfs.rb +12 -1
  25. data/lib/imw/schemes/http.rb +1 -2
  26. data/lib/imw/schemes/local.rb +139 -16
  27. data/lib/imw/schemes/remote.rb +14 -9
  28. data/lib/imw/schemes/s3.rb +12 -0
  29. data/lib/imw/schemes/sql.rb +117 -0
  30. data/lib/imw/tools.rb +5 -3
  31. data/lib/imw/tools/downloader.rb +63 -0
  32. data/lib/imw/tools/summarizer.rb +21 -10
  33. data/lib/imw/utils.rb +10 -0
  34. data/lib/imw/utils/dynamically_extendable.rb +137 -0
  35. data/lib/imw/utils/error.rb +3 -0
  36. data/lib/imw/utils/extensions.rb +0 -4
  37. data/lib/imw/utils/extensions/array.rb +6 -7
  38. data/lib/imw/utils/extensions/hash.rb +3 -5
  39. data/lib/imw/utils/extensions/string.rb +3 -3
  40. data/lib/imw/utils/has_uri.rb +114 -0
  41. data/spec/data/{sample.csv → formats/delimited/sample.csv} +1 -1
  42. data/spec/data/{sample.tsv → formats/delimited/sample.tsv} +0 -0
  43. data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +11 -0
  44. data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +16 -0
  45. data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +11 -0
  46. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +22 -0
  47. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +22 -0
  48. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +12 -0
  49. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +13 -0
  50. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +22 -0
  51. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +22 -0
  52. data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +10 -0
  53. data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +15 -0
  54. data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +10 -0
  55. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +21 -0
  56. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +21 -0
  57. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +11 -0
  58. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +12 -0
  59. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +21 -0
  60. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +21 -0
  61. data/spec/data/formats/excel/sample.xls +0 -0
  62. data/spec/data/formats/json/sample.json +1 -0
  63. data/spec/data/formats/none/sample +650 -0
  64. data/spec/data/formats/sgml/sample.xml +617 -0
  65. data/spec/data/formats/text/sample.txt +650 -0
  66. data/spec/data/formats/yaml/sample.yaml +410 -0
  67. data/spec/data/schema-tabular.yaml +11 -0
  68. data/spec/imw/formats/delimited_spec.rb +34 -2
  69. data/spec/imw/formats/excel_spec.rb +55 -0
  70. data/spec/imw/formats/json_spec.rb +3 -3
  71. data/spec/imw/formats/sgml_spec.rb +4 -4
  72. data/spec/imw/formats/yaml_spec.rb +3 -3
  73. data/spec/imw/metadata/field_spec.rb +26 -0
  74. data/spec/imw/metadata/schema_spec.rb +27 -0
  75. data/spec/imw/metadata_spec.rb +39 -0
  76. data/spec/imw/parsers/line_parser_spec.rb +1 -1
  77. data/spec/imw/resource_spec.rb +0 -100
  78. data/spec/imw/schemes/hdfs_spec.rb +19 -13
  79. data/spec/imw/schemes/local_spec.rb +59 -3
  80. data/spec/imw/schemes/s3_spec.rb +4 -0
  81. data/spec/imw/utils/dynamically_extendable_spec.rb +69 -0
  82. data/spec/imw/utils/has_uri_spec.rb +55 -0
  83. data/spec/spec_helper.rb +1 -2
  84. data/spec/support/random.rb +4 -4
  85. metadata +58 -17
  86. data/CHANGELOG +0 -0
  87. data/TODO +0 -18
  88. data/spec/data/sample.json +0 -782
  89. data/spec/data/sample.txt +0 -131
  90. data/spec/data/sample.xml +0 -653
  91. data/spec/data/sample.yaml +0 -651
  92. data/spec/spec.opts +0 -4
  93. data/spec/support/extensions.rb +0 -18
@@ -0,0 +1,55 @@
1
+ require File.join(File.dirname(__FILE__),'../../spec_helper')
2
+
3
+ describe IMW::Formats::Excel do
4
+
5
+ before do
6
+ @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/excel/sample.xls'))
7
+ end
8
+
9
+ it "should be able to parse the Excel document" do
10
+ @sample.load[1].last.should == 'lemurinus'
11
+ end
12
+
13
+ it "should be able to create a snippet" do
14
+ @sample.snippet[1].last.should == 'lemurinus'
15
+ end
16
+
17
+ # it "should be able to write CSV" do
18
+ # data = [['foobar', 1, 2], ['bazbooz', 3, 4]]
19
+ # IMW.open!('test.csv').emit(data)
20
+ # IMW.open('test.csv').load[1].last.should == "4"
21
+ # end
22
+
23
+ # it "should raise an error on an invalid schema" do
24
+ # lambda { @sample.schema = [{:name => :foobar, :has_many => {:associations => [:foo, :bar]}}] }.should raise_error(IMW::SchemaError)
25
+ # end
26
+
27
+ # it "should accept a valid schema" do
28
+ # @sample.schema = [:foo, :bar, :baz]
29
+ # @sample.schema.should == [{:name => 'foo'}, {:name => 'bar'}, {:name => 'baz'}]
30
+ # end
31
+
32
+ # describe "guessing a schema" do
33
+
34
+ # Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].each do |path|
35
+ # it "should correctly guess that with_schema/#{File.basename(path)} has headers in its first row" do
36
+ # IMW.open(path).headers_in_first_line?.should == true
37
+ # end
38
+ # end
39
+
40
+ # Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/without_schema/*')].each do |path|
41
+ # it "should correctly guess that without_schema/#{File.basename(path)} does not have headers in its first row" do
42
+ # IMW.open(path).headers_in_first_line?.should == false
43
+ # end
44
+ # end
45
+
46
+ # it "should automatically set the headers on a source with guessed headers" do
47
+ # resource = IMW.open(Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].first)
48
+ # resource.guess_schema!
49
+ # resource.delimited_options[:headers].class.should == Array
50
+ # resource.schema.should_not be_empty
51
+ # end
52
+
53
+ # end
54
+
55
+ end
@@ -3,15 +3,15 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
3
3
  describe IMW::Formats::Json do
4
4
 
5
5
  before do
6
- @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.json'))
6
+ @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/json/sample.json'))
7
7
  end
8
8
 
9
9
  it "should be able to parse the JSON" do
10
- @sample.load.first['id'].should == 1
10
+ @sample.load["Aotus"].first['id'].should == 1
11
11
  end
12
12
 
13
13
  it "should be able to write JSON" do
14
- IMW.open!('test.json').dump({ 'foobar' => 3, 'bazbooz' => 4 })
14
+ IMW.open!('test.json') { |f| f.emit({ 'foobar' => 3, 'bazbooz' => 4 }) }
15
15
  IMW.open('test.json').load['foobar'].should == 3
16
16
  end
17
17
 
@@ -4,21 +4,21 @@ describe IMW::Formats::Xml do
4
4
  # just spec Xml now as the others are identical
5
5
 
6
6
  before do
7
- @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.xml'))
7
+ @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/sgml/sample.xml'))
8
8
  end
9
9
 
10
10
  it "should be able to load the XML" do
11
- ((@sample.load/"monkey").first/"genus").inner_text.should == 'Aotus'
11
+ ((@sample.load/"genus").first/"name").first.inner_text.should == 'Mandrillus'
12
12
  end
13
13
 
14
14
  it "should yield the XML when load is given a block" do
15
15
  @sample.load do |xml|
16
- ((xml/"monkey").first/"genus").inner_text.should == 'Aotus'
16
+ ((xml/"genus").first/"name").first.inner_text.should == 'Mandrillus'
17
17
  end
18
18
  end
19
19
 
20
20
  it "should parse the XML" do
21
- @sample.parse(:monkeys => ['monkey'])[:monkeys].size.should == 130
21
+ @sample.parse(:species => ['species[@id]'])[:species].size.should == 130
22
22
  end
23
23
  end
24
24
 
@@ -3,16 +3,16 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
3
3
  describe IMW::Formats::Yaml do
4
4
 
5
5
  before do
6
- @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'sample.yaml'))
6
+ @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/yaml/sample.yaml')).load
7
7
  end
8
8
 
9
9
  it "should be able to parse the YAML" do
10
- @sample.first['id'].should == 1
10
+ @sample['Lophocebus'].first[:id].should == 94
11
11
  end
12
12
 
13
13
  it "should be able to write YAML" do
14
14
  data = { 'foobar' => 3, 'bazbooz' => 4 }
15
- IMW.open!('test.yaml').dump(data)
15
+ IMW.open!('test.yaml') { |f| f.emit(data) }
16
16
  IMW.open('test.yaml').load['foobar'].should == 3
17
17
  end
18
18
 
@@ -0,0 +1,26 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Metadata::Field do
4
+
5
+ describe "initializing" do
6
+ it "should parse a symbol or string into a hash" do
7
+ IMW::Metadata::Field.new(:foobar).should == { :name => "foobar" }
8
+ IMW::Metadata::Field.new('foobar').should == { :name => 'foobar' }
9
+ end
10
+
11
+ it "should raise an error on a Hash without a :name key" do
12
+ lambda { IMW::Metadata::Field.new(:foo => 'bar') }.should raise_error(IMW::ArgumentError)
13
+ end
14
+
15
+ it "should accept a Hash with a :name key" do
16
+ data = { :name => :foobar, :title => "Bazbooz", :unit => "m" }
17
+ IMW::Metadata::Field.new(data).should == data
18
+ end
19
+
20
+ it "should dup a field if given one" do
21
+ orig_field = IMW::Metadata::Field.new('foobar')
22
+ IMW::Metadata::Field.new(orig_field).should == orig_field
23
+ end
24
+ end
25
+
26
+ end
@@ -0,0 +1,27 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Metadata::Schema do
4
+
5
+ describe "initializing" do
6
+ it "should accept an array" do
7
+ IMW::Metadata::Schema.new([1,2,3]).should == [{:name => '1'}, {:name => '2'}, {:name => '3'}]
8
+ end
9
+
10
+ it "should dup a Schema if given one" do
11
+ orig_schema = IMW::Metadata::Schema.new([1,2,3])
12
+ IMW::Metadata::Schema.new(orig_schema).should == orig_schema
13
+ end
14
+ end
15
+
16
+ describe 'loading' do
17
+ it "should load an Array in a resource" do
18
+ resource = IMW.open('some_resource')
19
+ resource.should_receive(:load).and_return(%w[foo bar baz])
20
+ IMW.should_receive(:open).and_return(resource)
21
+ IMW::Metadata::Schema.load(resource.to_s).map { |field| field[:name] }.should == %w[foo bar baz]
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
@@ -0,0 +1,39 @@
1
+ require File.dirname(__FILE__) + "/../spec_helper"
2
+
3
+ describe IMW::Metadata do
4
+
5
+ describe "initalizing" do
6
+
7
+ it "should accept a hash" do
8
+ IMW::Metadata.new('a' => ['a', 'b']).should == { 'a' => [{:name => 'a'}, {:name => 'b'}] }
9
+ end
10
+ end
11
+
12
+ describe 'loading' do
13
+
14
+ it "should accept a Hash in a resource" do
15
+ data = {'a' => ['a', 'b']}
16
+ resource = IMW.open('some_resource')
17
+ IMW.should_receive(:open).with(resource).and_return(resource)
18
+ resource.should_receive(:load).and_return(data)
19
+ IMW::Metadata.load(resource).should == { 'a' => [{:name => 'a'}, {:name => 'b'}] }
20
+ end
21
+ end
22
+
23
+ describe "constructing absolute URIs" do
24
+
25
+ before { @metadata = IMW::Metadata.new }
26
+
27
+ it "should return the resource given without a base" do
28
+ @metadata.send(:absolute_uri, 'path/to/something').should == 'path/to/something'
29
+ end
30
+
31
+ it "should return the absolute URI with a base" do
32
+ path = File.join(IMWTest::TMP_DIR, 'metadata.yaml')
33
+ FileUtils.mkdir_p(path)
34
+ @metadata.base = path
35
+ @metadata.send(:absolute_uri, 'path/to/something').should == File.join(IMWTest::TMP_DIR, '/path/to/something')
36
+ end
37
+ end
38
+
39
+ end
@@ -4,7 +4,7 @@ require 'ostruct'
4
4
  describe IMW::Parsers::LineParser do
5
5
 
6
6
  before do
7
- @path = File.dirname(__FILE__) + "/../../data/sample.csv"
7
+ @path = File.join(IMWTest::DATA_DIR, 'formats/delimited/sample.csv')
8
8
  @file = File.new(@path)
9
9
  @fields = [:id, :name, :genus, :species]
10
10
  end
@@ -25,106 +25,6 @@ describe IMW::Resource do
25
25
  end
26
26
  end
27
27
 
28
- describe "parsing various and sundry URIs should correctly parse a" do
29
-
30
- before do
31
- IMW::Resource.should_receive(:extend_resource!).with(an_instance_of(IMW::Resource), {})
32
- end
33
-
34
- it "local file path" do
35
- resource = IMW::Resource.new("/home/foo.txt")
36
- resource.stub!(:path).and_return("/home/foo.txt")
37
-
38
- resource.scheme.should be_nil
39
- resource.dirname.should == '/home'
40
- resource.basename.should == 'foo.txt'
41
- resource.extname.should == '.txt'
42
- resource.extension.should == 'txt'
43
- resource.name.should == 'foo'
44
- end
45
-
46
- it "local file path with spaces in the name" do
47
- resource = IMW::Resource.new("/home/foo bar.txt")
48
- resource.stub!(:path).and_return("/home/foo bar.txt")
49
- resource.name.should == 'foo bar'
50
- end
51
-
52
- it "local file path with an explicit file:// scheme" do
53
- resource = IMW::Resource.new("file:///home/foo.txt")
54
- resource.scheme.should == 'file'
55
- end
56
-
57
- it "web URL with query and fragment" do
58
- resource = IMW::Resource.new("http://mysite.com/some/page?param=value#frag")
59
- resource.stub!(:path).and_return("/some/page")
60
- resource.scheme.should == 'http'
61
- resource.dirname.should == '/some'
62
- resource.basename.should == 'page'
63
- resource.extname.should == ''
64
- resource.extension.should == ''
65
- resource.name.should == 'page'
66
- end
67
-
68
- end
69
-
70
- it "should open a URI without attempting to extend with modules if so asked" do
71
- IMW::Resource.should_not_receive(:extend_resource!)
72
- IMW::Resource.new("/path/to/some/file.txt", :no_modules => true)
73
- end
74
-
75
- describe "extending resources with specific modules" do
76
- before do
77
- @resource = IMW::Resource.new('http://www.infochimps.com/data', :no_modules => true)
78
- end
79
-
80
- it "should use a specific module when asked with a string" do
81
- IMW::Resource.extend_resource!(@resource, :use_modules => ["Formats::Csv"])
82
- @resource.resource_modules.should include(IMW::Formats::Csv)
83
- end
84
-
85
- it "should use a specific module when asked with a module" do
86
- IMW::Resource.extend_resource!(@resource, :use_modules => [IMW::Formats::Csv])
87
- @resource.resource_modules.should include(IMW::Formats::Csv)
88
- end
89
-
90
- it "should not use a specific module when asked with a string" do
91
- IMW::Resource.extend_resource!(@resource, :skip_modules => ["Schemes::HTTP"])
92
- @resource.resource_modules.should_not include(IMW::Schemes::HTTP)
93
- end
94
-
95
- it "should not use a specific module when asked with a module" do
96
- IMW::Resource.extend_resource!(@resource, :skip_modules => [IMW::Schemes::HTTP])
97
- @resource.resource_modules.should_not include(IMW::Schemes::HTTP)
98
- end
99
-
100
- end
101
-
102
- describe "registering a new handler" do
103
- after do
104
- IMW::USER_DEFINED_HANDLERS.delete_if { true }
105
- end
106
-
107
- it "should raise an error if the module given isn't a module or string" do
108
- lambda { IMW.register_handler 3, // }.should raise_error(IMW::ArgumentError)
109
- lambda { IMW.register_handler "IMW", // }.should_not raise_error(IMW::ArgumentError)
110
- lambda { IMW.register_handler IMW, // }.should_not raise_error(IMW::ArgumentError)
111
- end
112
-
113
- it "should raise an error if the handler given isn't a Regexp, Proc, or true" do
114
- lambda { IMW.register_handler IMW, 3 }.should raise_error(IMW::ArgumentError)
115
- lambda { IMW.register_handler IMW, /foo/ }.should_not raise_error(IMW::ArgumentError)
116
- lambda { IMW.register_handler IMW, Proc.new { |r| true } }.should_not raise_error(IMW::ArgumentError)
117
- lambda { IMW.register_handler IMW, true }.should_not raise_error(IMW::ArgumentError)
118
- end
119
-
120
- it "should use a valid handler when appropriate" do
121
- NewModule = Module.new
122
- IMW.register_handler NewModule, /\.foo$/
123
- IMW.open('/path/to/something.foo').resource_modules.should include(NewModule)
124
- end
125
-
126
- end
127
-
128
28
  end
129
29
 
130
30
 
@@ -2,13 +2,13 @@ require File.join(File.dirname(__FILE__),'../../spec_helper')
2
2
 
3
3
  describe IMW::Schemes::HDFS do
4
4
  before do
5
- def fake_hdfs_resource path, num_dirs=nil, num_files=nil, size=nil
6
- if num_dirs && num_files && size
7
- response = " #{num_dirs} #{num_files} #{size} hdfs://localhost#{path}"
8
- else
5
+ def fake_hdfs_resource path, options={}
6
+ if options == false
9
7
  response = ""
8
+ else
9
+ response = " #{options[:dirs] || 0} #{options[:files] || 1} #{options[:size] || 1000} hdfs://localhost#{path}"
10
10
  end
11
- IMW::Schemes::HDFS.should_receive(:fs).with(:count, path).and_return(response)
11
+ IMW::Schemes::HDFS.should_receive(:fs).with(:count, path).at_least(:once).and_return(response)
12
12
  IMW.open("hdfs://#{path}")
13
13
  end
14
14
  @path = '/path/to/myfile'
@@ -16,7 +16,7 @@ describe IMW::Schemes::HDFS do
16
16
 
17
17
  describe "refreshing its properties" do
18
18
  it "should correctly get properties for a resource which exists" do
19
- resource = fake_hdfs_resource(@path, 2, 3, 1000)
19
+ resource = fake_hdfs_resource(@path, :dirs => 2, :files => 3, :size => 1000)
20
20
  resource.exist?.should be_true
21
21
  resource.num_dirs.should == 2
22
22
  resource.num_files.should == 3
@@ -24,38 +24,44 @@ describe IMW::Schemes::HDFS do
24
24
  end
25
25
 
26
26
  it "should gracefully handle a resource which doesn't exist" do
27
- resource = fake_hdfs_resource(@path)
27
+ resource = fake_hdfs_resource(@path, false)
28
28
  resource.exist?.should be_false
29
29
  end
30
30
 
31
31
  it "should execute the correct command to delete the path" do
32
- resource = fake_hdfs_resource(@path, 2, 3, 1000)
32
+ resource = fake_hdfs_resource(@path)
33
33
  IMW::Schemes::HDFS.should_receive(:fs).with(:rm, resource.path)
34
34
  resource.rm
35
35
  end
36
36
 
37
37
  it "should execute the correct command to delete the path when skipping the trash" do
38
- resource = fake_hdfs_resource(@path, 2, 3, 1000)
38
+ resource = fake_hdfs_resource(@path)
39
39
  IMW::Schemes::HDFS.should_receive(:fs).with(:rm, '-skipTrash', resource.path)
40
40
  resource.rm :skip_trash => true
41
41
  end
42
42
 
43
43
  it "should recognize a file and extend it properly" do
44
- resource = fake_hdfs_resource(@path, 0, 1, 1000)
44
+ resource = fake_hdfs_resource(@path)
45
45
  resource.num_dirs.should == 0
46
46
  resource.num_files.should == 1
47
47
  resource.exist?.should be_true
48
48
  resource.is_directory?.should be_false
49
- resource.resource_modules.should include(IMW::Schemes::HDFSFile)
49
+ resource.modules.should include(IMW::Schemes::HDFSFile)
50
50
  end
51
51
 
52
52
  it "should recognize a directory and extend it properly" do
53
- resource = fake_hdfs_resource(@path, 2, 1, 1000)
53
+ resource = fake_hdfs_resource(@path, :dirs => 2, :files => 1)
54
54
  resource.num_dirs.should == 2
55
55
  resource.num_files.should == 1
56
56
  resource.exist?.should be_true
57
57
  resource.is_directory?.should be_true
58
- resource.resource_modules.should include(IMW::Schemes::HDFSDirectory)
58
+ resource.modules.should include(IMW::Schemes::HDFSDirectory)
59
+ end
60
+
61
+ it "should be able to join path segments to a directory" do
62
+ resource = fake_hdfs_resource(@path, :dirs => 2)
63
+ sub_resource = fake_hdfs_resource("#{@path}/a/b/c")
64
+ resource.join('a', 'b/c').to_s.should == sub_resource.to_s
59
65
  end
60
66
  end
61
67
  end
@@ -5,13 +5,13 @@ describe IMW::Schemes::Local::Base do
5
5
  it "should not extend a local file with LocalDirectory" do
6
6
  @file = IMW::Resource.new('foo.txt', :no_modules => true)
7
7
  @file.should_not_receive(:extend).with(IMW::Schemes::Local::LocalDirectory)
8
- @file.extend_appropriately!
8
+ IMW::Resource.extend_instance!(@file)
9
9
  end
10
10
 
11
11
  it "should not extend a local directory with LocalFile" do
12
12
  @dir = IMW::Resource.new(IMWTest::TMP_DIR, :no_modules => true)
13
13
  @dir.should_not_receive(:extend).with(IMW::Schemes::Local::LocalFile)
14
- @dir.extend_appropriately!
14
+ IMW::Resource.extend_instance!(@dir)
15
15
  end
16
16
 
17
17
  it "should correctly resolve relative paths" do
@@ -37,7 +37,6 @@ describe IMW::Schemes::Local::LocalFile do
37
37
  @file.exist?.should be_false
38
38
  end
39
39
 
40
-
41
40
  it "can read a file" do
42
41
  @file.read.size.should > 0
43
42
  end
@@ -60,6 +59,12 @@ describe IMW::Schemes::Local::LocalFile do
60
59
  line[0..5]
61
60
  end.class.should == Array
62
61
  end
62
+
63
+ it "can produce a snippet" do
64
+ path = IMWTest::DATA_DIR + "/formats/none/sample"
65
+ # FIXME only look at the first 100 bytes b/c of subsequent non-ascii chars...
66
+ IMW.open(path).snippet[0..100].should == File.new(path).read(101)
67
+ end
63
68
  end
64
69
 
65
70
  describe IMW::Schemes::Local::LocalDirectory do
@@ -100,6 +105,57 @@ describe IMW::Schemes::Local::LocalDirectory do
100
105
  @dir.resources.map(&:class).uniq.first.should == IMW::Resource
101
106
  end
102
107
 
108
+ describe "checking whether it contains other resources" do
109
+
110
+ it "should return false for remote paths" do
111
+ @dir.contains?("http://google.com").should be_false
112
+ end
113
+
114
+ it "should return true for its own path" do
115
+ @dir.contains?(@dir.path).should be_true
116
+ end
117
+
118
+ it "should return false for a path that doesn't start with its path" do
119
+ @dir.contains?(File.expand_path('foo')).should be_false
120
+ end
121
+
122
+ it "should return false for a path that starts with its path but doesn't exist" do
123
+ @dir.contains?(File.expand_path('dir/foo/baz')).should be_false
124
+ end
125
+
126
+ it "should return true for a path that starts with its path and exists" do
127
+ FileUtils.mkdir_p('dir/foo/baz')
128
+ @dir.contains?(File.expand_path('dir/foo/baz')).should be_true
129
+ end
130
+
131
+ end
132
+
133
+ describe "handling schemata" do
134
+
135
+ it "should recognize a YAML schema file" do
136
+ schemata_path = File.join(@dir.path, 'schema.yaml')
137
+ IMWTest::Random.file(schemata_path)
138
+ @dir.schemata_path.should == schemata_path
139
+ end
140
+
141
+ it "should recognize a JSON schema file" do
142
+ schemata_path = File.join(@dir.path, 'schema.json')
143
+ IMWTest::Random.file(schemata_path)
144
+ @dir.schemata_path.should == schemata_path
145
+ end
146
+
147
+ it "should recognize a funny-named YAML schema file" do
148
+ schemata_path = File.join(@dir.path, 'schema-1838293.yml')
149
+ IMWTest::Random.file(schemata_path)
150
+ @dir.schemata_path.should == schemata_path
151
+ end
152
+
153
+ end
154
+
155
+ it "can join with a path" do
156
+ @dir.join("a", "b/c").to_s.should == File.join(@dir.path, 'a/b/c')
157
+ end
158
+
103
159
  describe 'can package itself to' do
104
160
  ['tar', 'tar.bz2', 'tar.gz', 'zip', 'rar'].each do |extension|
105
161
  it "a #{extension} archive" do