imw 0.2.16 → 0.2.17

Sign up to get free protection for your applications and to get access to all the features.
data/lib/imw/resource.rb CHANGED
@@ -73,23 +73,20 @@ module IMW
73
73
  #
74
74
  # @param [String, Addressable::URI] uri
75
75
  # @param [Hash] options
76
- # @option options [true, false] no_modules
77
76
  # @option options [String] mode the mode to open the resource in (will be ignored when inapplicable)
78
- # @option options [IMW::Metadata::Record, Array] schema the schema of this resource
79
77
  # @return [IMW::Resource]
80
78
  def initialize uri, options={}
81
79
  self.uri = uri
82
80
  self.resource_options = options
83
81
  self.mode = options[:mode] || 'r'
84
- self.schema = options[:schema] if options[:schema]
85
82
  extend_appropriately!(options)
86
83
  end
87
84
 
88
85
  # Provides resources with a wrapped Addressable::URI object.
89
86
  include IMW::Utils::HasURI
90
87
 
91
- # Provides resources with a schema.
92
- include IMW::Metadata::Schematized
88
+ # Provides resources with a summary, metadata, & schema.
89
+ include IMW::Metadata::HasSummary
93
90
 
94
91
  # Gives IMW::Resource instances with the ability to dynamically
95
92
  # extend themselves with modules chosen from a set of handlers
@@ -35,7 +35,7 @@ module IMW
35
35
  def effective_basename
36
36
  (basename.blank? || basename =~ %r{^/*$}) ? "_index" : basename
37
37
  end
38
-
38
+
39
39
  # Send a GET request to this resource's URI.
40
40
  #
41
41
  # If the response doesn't have HTTP code 2xx, a RestClient
@@ -71,6 +71,8 @@ module IMW
71
71
  # Defines methods for appropriate for a local file.
72
72
  module LocalFile
73
73
 
74
+ include IMW::Metadata::HasMetadata
75
+
74
76
  # Is this resource a regular file?
75
77
  #
76
78
  # @return [true, false]
@@ -173,7 +175,7 @@ module IMW
173
175
  def snippet
174
176
  returning([]) do |snip|
175
177
  (io.read(1024) || '').bytes.each do |byte|
176
- # CR LF SPACE ~
178
+ # CR LF SPACE ~
177
179
  snip << byte.chr if byte == 13 || byte == 10 || byte >= 32 && byte <= 126
178
180
  end
179
181
  end.join
@@ -206,19 +208,17 @@ module IMW
206
208
  # - basename
207
209
  # - size
208
210
  # - extension
209
- # - snippet
210
- def summary
211
- data = {
212
- :basename => basename,
213
- :size => size,
214
- :extension => extension,
215
- :num_lines => num_lines
216
- }
217
- data[:snippet] = snippet if respond_to?(:snippet)
218
- data[:schema] = schema if respond_to?(:schema)
219
- data
211
+ # - num_lines
212
+ def external_summary
213
+ super().merge({
214
+ :size => size,
215
+ :num_lines => num_lines
216
+ })
220
217
  end
221
218
 
219
+
220
+
221
+
222
222
  protected
223
223
 
224
224
  # Return a triple of line, word, and character counts for this
@@ -231,7 +231,7 @@ module IMW
231
231
  @wc ||= begin
232
232
  `wc #{path}`.chomp.strip.split.map(&:to_i)
233
233
  rescue
234
- [0,0,0] # FIXME
234
+ [nil,nil,nil] # FIXME
235
235
  end
236
236
  end
237
237
 
@@ -388,44 +388,16 @@ module IMW
388
388
  end
389
389
  end
390
390
 
391
- # Return a hash summarizing this directory with a key
392
- # <tt>:contents</tt> containing an array of hashes summarizing
393
- # this directories contents.
394
- #
395
391
  # The directory summary includes the following information
396
- # - basename
397
392
  # - size
398
393
  # - num_files
399
- # - contents
400
394
  #
401
395
  # @return [Hash]
402
- def summary
403
- {
404
- :basename => basename,
405
- :size => size,
406
- :num_files => contents.length,
407
- :contents => resources.map do |resource|
408
- resource.guess_schema! if guess_schema? && resource.respond_to?(:guess_schema!)
409
- resource_summary = resource.summary
410
- resource_summary[:schema] = metadata[resource] if metadata && metadata.describe?(resource) # this should be handled by 'resources' method above
411
- resource_summary
412
- end
413
- }
414
- end
415
-
416
- # Whether or not to have this directory's resources guess
417
- # their schemas when none is provided.
418
- #
419
- # @return [true, false]
420
- def guess_schema?
421
- (!! @guess_schema)
422
- end
423
-
424
- # Force this directory's resources to guess at their schema.
425
- #
426
- # @return [true]
427
- def guess_schema!
428
- @guess_schema = true
396
+ def external_summary
397
+ super().merge({
398
+ :size => size,
399
+ :num_files => contents.length,
400
+ })
429
401
  end
430
402
 
431
403
  end
@@ -46,6 +46,18 @@ module IMW
46
46
  end
47
47
  end
48
48
 
49
+ # Return a summary of this database.
50
+ #
51
+ # Purposefully does not call +super+.
52
+ #
53
+ # @return [Hash]
54
+ def external_summary
55
+ {
56
+ :uri => uri.to_s,
57
+ :database => database
58
+ }
59
+ end
60
+
49
61
  # The (cached) database connection for this resource.
50
62
  #
51
63
  # @return [DBI::DatabaseHandle]
@@ -43,26 +43,25 @@ module IMW
43
43
 
44
44
  # Return a summary of the +inputs+ to this Summarizer.
45
45
  #
46
- # Delegates to the +summary+ method of each constituent
47
- # IMW::Resource in +inputs+.
46
+ # Will swallow errors.
48
47
  #
49
48
  # @return [Array<Hash>]
50
49
  def summary
51
- @summary ||= inputs.map do |input|
52
- #input.guess_schema! if input.schema.nil? && input.respond_to?(:guess_schema!)
53
- (input.respond_to?(:summary) ? input.summary : {}) rescue {}
54
- end
50
+ @summary ||= summary! rescue []
55
51
  end
56
52
 
57
- # The metadata employed by this Summarizer.
58
- #
59
- # It can be set by setting <tt>options[:metadata]</tt>.
53
+ # Return a summary of the +inputs+ to this summarizer.
54
+ #
55
+ # Delegates to the +summary+ method of each constituent
56
+ # IMW::Resource in +inputs+.
60
57
  #
61
- # @return [IMW::Metadata, nil]
62
- def metadata
63
- @metadata ||= options[:metadata] && IMW::Metadata.load(options[:metadata])
58
+ # @return [Array]
59
+ def summary!
60
+ inputs.map do |input|
61
+ (input.respond_to?(:summary) ? input.summary : {})
62
+ end
64
63
  end
65
-
64
+
66
65
  protected
67
66
  # Set new inputs for this summarizer.
68
67
  #
@@ -18,32 +18,23 @@ describe IMW::Formats::Csv do
18
18
  IMW.open('test.csv').load[1].last.should == "4"
19
19
  end
20
20
 
21
- it "should raise an error on an invalid schema" do
22
- lambda { @sample.schema = [{:name => :foobar, :has_many => {:associations => [:foo, :bar]}}] }.should raise_error(IMW::SchemaError)
23
- end
24
-
25
- it "should accept a valid schema" do
26
- @sample.schema = [:foo, :bar, :baz]
27
- @sample.schema.should == [{:name => 'foo'}, {:name => 'bar'}, {:name => 'baz'}]
28
- end
29
-
30
21
  describe "guessing a schema" do
31
22
 
32
23
  Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].each do |path|
33
24
  it "should correctly guess that with_schema/#{File.basename(path)} has headers in its first row" do
34
- IMW.open(path).headers_in_first_line?.should == true
25
+ IMW.open(path).fields_in_first_line?.should == true
35
26
  end
36
27
  end
37
28
 
38
29
  Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/without_schema/*')].each do |path|
39
30
  it "should correctly guess that without_schema/#{File.basename(path)} does not have headers in its first row" do
40
- IMW.open(path).headers_in_first_line?.should == false
31
+ IMW.open(path).fields_in_first_line?.should == false
41
32
  end
42
33
  end
43
34
 
44
35
  it "should automatically set the headers on a source with guessed headers" do
45
36
  resource = IMW.open(Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].first)
46
- resource.guess_schema!
37
+ resource.guess_fields!
47
38
  resource.delimited_options[:headers].class.should == Array
48
39
  resource.schema.should_not be_empty
49
40
  end
@@ -0,0 +1,56 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Metadata::ContainsMetadata do
4
+
5
+ before do
6
+ class Foo
7
+ attr_accessor :contents
8
+ def path ; IMWTest::TMP_DIR ; end
9
+ def basename ; File.basename(IMWTest::TMP_DIR) ; end
10
+ include IMW::Metadata::ContainsMetadata
11
+ end
12
+ @foo = Foo.new
13
+ @foo.contents = []
14
+ end
15
+
16
+ describe 'finding the default metadata URI' do
17
+ it "should return the default metadata URI when 'contents' is empty" do
18
+ @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, File.basename(IMWTest::TMP_DIR) + ".icss.yaml")
19
+ end
20
+
21
+ it "should return the default metadata URI when 'contents' doesn't contain any metadata files" do
22
+ @foo.contents.concat ['bar.txt', 'crazy_file.yaml', 'foo.json'].map { |p| File.join(IMWTest::TMP_DIR, p) }
23
+ @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, File.basename(IMWTest::TMP_DIR) + ".icss.yaml")
24
+ end
25
+
26
+ %w[my-projects.icss.yaml stupid-crazy-fool-of-a-dataset-icss.json foobar-25.metadata.buzz.yml].each do |basename|
27
+ it "should return the metadata URI when 'contents' contains a URI matching '#{basename}'" do
28
+ @foo.contents.concat ['bar.txt', 'crazy_file.yaml', 'foo.json', basename].map { |p| File.join(IMWTest::TMP_DIR, p) }
29
+ @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, basename)
30
+ end
31
+ end
32
+ end
33
+
34
+
35
+ describe 'returning its metadata' do
36
+ it "should return 'nil' when no metadata exists on disk" do
37
+ @foo.metadata.should be_nil
38
+ end
39
+
40
+ it "should return Metadata when metadata exists on disk" do
41
+ IMW.open!(@foo.default_metadata_uri) do |f|
42
+ f.write <<YAML
43
+ ---
44
+ foo:
45
+ description: bar
46
+ fields: baz
47
+ YAML
48
+ end
49
+ @foo.metadata.class.should == IMW::Metadata
50
+ @foo.metadata['foo']['description'].should == 'bar'
51
+ end
52
+ end
53
+
54
+
55
+
56
+ end
@@ -3,17 +3,16 @@ require File.dirname(__FILE__) + "/../../spec_helper"
3
3
  describe IMW::Metadata::Field do
4
4
 
5
5
  describe "initializing" do
6
- it "should parse a symbol or string into a hash" do
7
- IMW::Metadata::Field.new(:foobar).should == { :name => "foobar" }
8
- IMW::Metadata::Field.new('foobar').should == { :name => 'foobar' }
6
+ it "should parse a string into a hash" do
7
+ IMW::Metadata::Field.new('foobar').should == { "name" => 'foobar' }
9
8
  end
10
9
 
11
10
  it "should raise an error on a Hash without a :name key" do
12
- lambda { IMW::Metadata::Field.new(:foo => 'bar') }.should raise_error(IMW::ArgumentError)
11
+ lambda { IMW::Metadata::Field.new('foo' => 'bar') }.should raise_error(IMW::ArgumentError)
13
12
  end
14
13
 
15
14
  it "should accept a Hash with a :name key" do
16
- data = { :name => :foobar, :title => "Bazbooz", :unit => "m" }
15
+ data = { 'name' => :foobar, 'title' => "Bazbooz", 'unit' => "m" }
17
16
  IMW::Metadata::Field.new(data).should == data
18
17
  end
19
18
 
@@ -0,0 +1,58 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Metadata::HasMetadata do
4
+
5
+ before do
6
+ class Foo
7
+ def uri ; File.join(IMWTest::TMP_DIR, 'test', 'subdir', 'foobar.csv') ; end
8
+ def basename ; File.basename(uri) ; end
9
+ def extension ; 'csv' ; end
10
+ def dir ; IMW.open(File.join(IMWTest::TMP_DIR, 'test', 'subdir')) ; end
11
+ include IMW::Metadata::HasMetadata
12
+ end
13
+ @foo = Foo.new
14
+ end
15
+
16
+ it "should be able to build a schema" do
17
+ @foo.schema.should include(:type, :namespace, :name, :doc, :fields, :non_avro)
18
+ end
19
+
20
+ describe "finding its metadata" do
21
+
22
+ before do
23
+ FileUtils.mkdir_p(@foo.dir.path)
24
+ IMWTest::Random.file(File.join(@foo.dir.path, 'foobar.csv'))
25
+ end
26
+
27
+ it "should return 'nil' when it can't find any metadata" do
28
+ @foo.metadata.should be_nil
29
+ end
30
+
31
+ it "should return 'nil' when a metadata file is found that doesn't describe it" do
32
+ IMW.open!("has_metadata_test.icss.yaml") do |f|
33
+ f.write <<YAML
34
+ ---
35
+ foobar.csv:
36
+ description: bar
37
+ fields: ["baz", "booz"]
38
+ YAML
39
+ end
40
+ @foo.metadata.should be_nil
41
+ end
42
+
43
+ # it "should return the metadata when a metadata file is found that does describe it" do
44
+ # IMW.open!("has_metadata_test.icss.yaml") do |f|
45
+ # f.write <<YAML
46
+ # ---
47
+ # #{IMWTest::TMP_DIR}/test/subdir/foobar.csv:
48
+ # description: bar
49
+ # fields: ["baz", "booz"]
50
+ # YAML
51
+ # end
52
+ # @foo.metadata.class.should == IMW::Metadata
53
+ # @foo.metadata[@foo]['description'].should == 'bar'
54
+ # end
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,32 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Metadata::HasSummary do
4
+
5
+ before do
6
+ class Foo
7
+ def initialize(*args) ; @args = args ; end
8
+ def uri ; File.join(IMWTest::TMP_DIR, *@args) ; end
9
+ def basename ; File.basename(uri) ; end
10
+ def extension ; File.extname(@args.last || '').gsub(/^\./,'') ; end
11
+ include IMW::Metadata::HasSummary
12
+ end
13
+ @foo = Foo.new('foo', 'bar.csv')
14
+ end
15
+
16
+ it "should build a summary from an external summary" do
17
+ @foo.summary.should include(:uri, :basename, :extension)
18
+ end
19
+
20
+ it "should build a summary from an external summary and a schema when possible" do
21
+ @foo.stub!(:schema).and_return({:foo => 'bar'})
22
+ @foo.summary[:schema].should == {:foo => 'bar'}
23
+ end
24
+
25
+ it "should be able to build an external summary describing how it's situated in the world" do
26
+ @foo.summary[:uri].should == File.join(IMWTest::TMP_DIR, 'foo', 'bar.csv')
27
+ @foo.summary[:basename].should == 'bar.csv'
28
+ @foo.summary[:extension].should == 'csv'
29
+ end
30
+
31
+ end
32
+
@@ -3,25 +3,22 @@ require File.dirname(__FILE__) + "/../../spec_helper"
3
3
  describe IMW::Metadata::Schema do
4
4
 
5
5
  describe "initializing" do
6
- it "should accept an array" do
7
- IMW::Metadata::Schema.new([1,2,3]).should == [{:name => '1'}, {:name => '2'}, {:name => '3'}]
6
+ it "should merge with a Hash" do
7
+ IMW::Metadata::Schema.new({:foo => 'foobar'}).should == { :foo => 'foobar' }
8
8
  end
9
9
 
10
- it "should dup a Schema if given one" do
11
- orig_schema = IMW::Metadata::Schema.new([1,2,3])
12
- IMW::Metadata::Schema.new(orig_schema).should == orig_schema
10
+ it "should merge with a Schema" do
11
+ IMW::Metadata::Schema.new(IMW::Metadata::Schema.new({:foo => 'foobar'})).should == { :foo => 'foobar' }
13
12
  end
14
- end
15
13
 
16
- describe 'loading' do
17
- it "should load an Array in a resource" do
18
- resource = IMW.open('some_resource')
19
- resource.should_receive(:load).and_return(%w[foo bar baz])
20
- IMW.should_receive(:open).and_return(resource)
21
- IMW::Metadata::Schema.load(resource.to_s).map { |field| field[:name] }.should == %w[foo bar baz]
14
+ it "should ignore anything else" do
15
+ IMW::Metadata::Schema.new('foobar').should == {}
22
16
  end
23
17
 
18
+ it "should accept empty args" do
19
+ IMW::Metadata::Schema.new.should == {}
20
+ end
21
+
24
22
  end
25
23
 
26
24
  end
27
-