imw 0.2.16 → 0.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/imw/resource.rb CHANGED
@@ -73,23 +73,20 @@ module IMW
73
73
  #
74
74
  # @param [String, Addressable::URI] uri
75
75
  # @param [Hash] options
76
- # @option options [true, false] no_modules
77
76
  # @option options [String] mode the mode to open the resource in (will be ignored when inapplicable)
78
- # @option options [IMW::Metadata::Record, Array] schema the schema of this resource
79
77
  # @return [IMW::Resource]
80
78
  def initialize uri, options={}
81
79
  self.uri = uri
82
80
  self.resource_options = options
83
81
  self.mode = options[:mode] || 'r'
84
- self.schema = options[:schema] if options[:schema]
85
82
  extend_appropriately!(options)
86
83
  end
87
84
 
88
85
  # Provides resources with a wrapped Addressable::URI object.
89
86
  include IMW::Utils::HasURI
90
87
 
91
- # Provides resources with a schema.
92
- include IMW::Metadata::Schematized
88
+ # Provides resources with a summary, metadata, & schema.
89
+ include IMW::Metadata::HasSummary
93
90
 
94
91
  # Gives IMW::Resource instances with the ability to dynamically
95
92
  # extend themselves with modules chosen from a set of handlers
@@ -35,7 +35,7 @@ module IMW
35
35
  def effective_basename
36
36
  (basename.blank? || basename =~ %r{^/*$}) ? "_index" : basename
37
37
  end
38
-
38
+
39
39
  # Send a GET request to this resource's URI.
40
40
  #
41
41
  # If the response doesn't have HTTP code 2xx, a RestClient
@@ -71,6 +71,8 @@ module IMW
71
71
  # Defines methods for appropriate for a local file.
72
72
  module LocalFile
73
73
 
74
+ include IMW::Metadata::HasMetadata
75
+
74
76
  # Is this resource a regular file?
75
77
  #
76
78
  # @return [true, false]
@@ -173,7 +175,7 @@ module IMW
173
175
  def snippet
174
176
  returning([]) do |snip|
175
177
  (io.read(1024) || '').bytes.each do |byte|
176
- # CR LF SPACE ~
178
+ # CR LF SPACE ~
177
179
  snip << byte.chr if byte == 13 || byte == 10 || byte >= 32 && byte <= 126
178
180
  end
179
181
  end.join
@@ -206,19 +208,17 @@ module IMW
206
208
  # - basename
207
209
  # - size
208
210
  # - extension
209
- # - snippet
210
- def summary
211
- data = {
212
- :basename => basename,
213
- :size => size,
214
- :extension => extension,
215
- :num_lines => num_lines
216
- }
217
- data[:snippet] = snippet if respond_to?(:snippet)
218
- data[:schema] = schema if respond_to?(:schema)
219
- data
211
+ # - num_lines
212
+ def external_summary
213
+ super().merge({
214
+ :size => size,
215
+ :num_lines => num_lines
216
+ })
220
217
  end
221
218
 
219
+
220
+
221
+
222
222
  protected
223
223
 
224
224
  # Return a triple of line, word, and character counts for this
@@ -231,7 +231,7 @@ module IMW
231
231
  @wc ||= begin
232
232
  `wc #{path}`.chomp.strip.split.map(&:to_i)
233
233
  rescue
234
- [0,0,0] # FIXME
234
+ [nil,nil,nil] # FIXME
235
235
  end
236
236
  end
237
237
 
@@ -388,44 +388,16 @@ module IMW
388
388
  end
389
389
  end
390
390
 
391
- # Return a hash summarizing this directory with a key
392
- # <tt>:contents</tt> containing an array of hashes summarizing
393
- # this directories contents.
394
- #
395
391
  # The directory summary includes the following information
396
- # - basename
397
392
  # - size
398
393
  # - num_files
399
- # - contents
400
394
  #
401
395
  # @return [Hash]
402
- def summary
403
- {
404
- :basename => basename,
405
- :size => size,
406
- :num_files => contents.length,
407
- :contents => resources.map do |resource|
408
- resource.guess_schema! if guess_schema? && resource.respond_to?(:guess_schema!)
409
- resource_summary = resource.summary
410
- resource_summary[:schema] = metadata[resource] if metadata && metadata.describe?(resource) # this should be handled by 'resources' method above
411
- resource_summary
412
- end
413
- }
414
- end
415
-
416
- # Whether or not to have this directory's resources guess
417
- # their schemas when none is provided.
418
- #
419
- # @return [true, false]
420
- def guess_schema?
421
- (!! @guess_schema)
422
- end
423
-
424
- # Force this directory's resources to guess at their schema.
425
- #
426
- # @return [true]
427
- def guess_schema!
428
- @guess_schema = true
396
+ def external_summary
397
+ super().merge({
398
+ :size => size,
399
+ :num_files => contents.length,
400
+ })
429
401
  end
430
402
 
431
403
  end
@@ -46,6 +46,18 @@ module IMW
46
46
  end
47
47
  end
48
48
 
49
+ # Return a summary of this database.
50
+ #
51
+ # Purposefully does not call +super+.
52
+ #
53
+ # @return [Hash]
54
+ def external_summary
55
+ {
56
+ :uri => uri.to_s,
57
+ :database => database
58
+ }
59
+ end
60
+
49
61
  # The (cached) database connection for this resource.
50
62
  #
51
63
  # @return [DBI::DatabaseHandle]
@@ -43,26 +43,25 @@ module IMW
43
43
 
44
44
  # Return a summary of the +inputs+ to this Summarizer.
45
45
  #
46
- # Delegates to the +summary+ method of each constituent
47
- # IMW::Resource in +inputs+.
46
+ # Will swallow errors.
48
47
  #
49
48
  # @return [Array<Hash>]
50
49
  def summary
51
- @summary ||= inputs.map do |input|
52
- #input.guess_schema! if input.schema.nil? && input.respond_to?(:guess_schema!)
53
- (input.respond_to?(:summary) ? input.summary : {}) rescue {}
54
- end
50
+ @summary ||= summary! rescue []
55
51
  end
56
52
 
57
- # The metadata employed by this Summarizer.
58
- #
59
- # It can be set by setting <tt>options[:metadata]</tt>.
53
+ # Return a summary of the +inputs+ to this summarizer.
54
+ #
55
+ # Delegates to the +summary+ method of each constituent
56
+ # IMW::Resource in +inputs+.
60
57
  #
61
- # @return [IMW::Metadata, nil]
62
- def metadata
63
- @metadata ||= options[:metadata] && IMW::Metadata.load(options[:metadata])
58
+ # @return [Array]
59
+ def summary!
60
+ inputs.map do |input|
61
+ (input.respond_to?(:summary) ? input.summary : {})
62
+ end
64
63
  end
65
-
64
+
66
65
  protected
67
66
  # Set new inputs for this summarizer.
68
67
  #
@@ -18,32 +18,23 @@ describe IMW::Formats::Csv do
18
18
  IMW.open('test.csv').load[1].last.should == "4"
19
19
  end
20
20
 
21
- it "should raise an error on an invalid schema" do
22
- lambda { @sample.schema = [{:name => :foobar, :has_many => {:associations => [:foo, :bar]}}] }.should raise_error(IMW::SchemaError)
23
- end
24
-
25
- it "should accept a valid schema" do
26
- @sample.schema = [:foo, :bar, :baz]
27
- @sample.schema.should == [{:name => 'foo'}, {:name => 'bar'}, {:name => 'baz'}]
28
- end
29
-
30
21
  describe "guessing a schema" do
31
22
 
32
23
  Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].each do |path|
33
24
  it "should correctly guess that with_schema/#{File.basename(path)} has headers in its first row" do
34
- IMW.open(path).headers_in_first_line?.should == true
25
+ IMW.open(path).fields_in_first_line?.should == true
35
26
  end
36
27
  end
37
28
 
38
29
  Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/without_schema/*')].each do |path|
39
30
  it "should correctly guess that without_schema/#{File.basename(path)} does not have headers in its first row" do
40
- IMW.open(path).headers_in_first_line?.should == false
31
+ IMW.open(path).fields_in_first_line?.should == false
41
32
  end
42
33
  end
43
34
 
44
35
  it "should automatically set the headers on a source with guessed headers" do
45
36
  resource = IMW.open(Dir[File.join(IMWTest::DATA_DIR, 'formats/delimited/with_schema/*')].first)
46
- resource.guess_schema!
37
+ resource.guess_fields!
47
38
  resource.delimited_options[:headers].class.should == Array
48
39
  resource.schema.should_not be_empty
49
40
  end
@@ -0,0 +1,56 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Metadata::ContainsMetadata do
4
+
5
+ before do
6
+ class Foo
7
+ attr_accessor :contents
8
+ def path ; IMWTest::TMP_DIR ; end
9
+ def basename ; File.basename(IMWTest::TMP_DIR) ; end
10
+ include IMW::Metadata::ContainsMetadata
11
+ end
12
+ @foo = Foo.new
13
+ @foo.contents = []
14
+ end
15
+
16
+ describe 'finding the default metadata URI' do
17
+ it "should return the default metadata URI when 'contents' is empty" do
18
+ @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, File.basename(IMWTest::TMP_DIR) + ".icss.yaml")
19
+ end
20
+
21
+ it "should return the default metadata URI when 'contents' doesn't contain any metadata files" do
22
+ @foo.contents.concat ['bar.txt', 'crazy_file.yaml', 'foo.json'].map { |p| File.join(IMWTest::TMP_DIR, p) }
23
+ @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, File.basename(IMWTest::TMP_DIR) + ".icss.yaml")
24
+ end
25
+
26
+ %w[my-projects.icss.yaml stupid-crazy-fool-of-a-dataset-icss.json foobar-25.metadata.buzz.yml].each do |basename|
27
+ it "should return the metadata URI when 'contents' contains a URI matching '#{basename}'" do
28
+ @foo.contents.concat ['bar.txt', 'crazy_file.yaml', 'foo.json', basename].map { |p| File.join(IMWTest::TMP_DIR, p) }
29
+ @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, basename)
30
+ end
31
+ end
32
+ end
33
+
34
+
35
+ describe 'returning its metadata' do
36
+ it "should return 'nil' when no metadata exists on disk" do
37
+ @foo.metadata.should be_nil
38
+ end
39
+
40
+ it "should return Metadata when metadata exists on disk" do
41
+ IMW.open!(@foo.default_metadata_uri) do |f|
42
+ f.write <<YAML
43
+ ---
44
+ foo:
45
+ description: bar
46
+ fields: baz
47
+ YAML
48
+ end
49
+ @foo.metadata.class.should == IMW::Metadata
50
+ @foo.metadata['foo']['description'].should == 'bar'
51
+ end
52
+ end
53
+
54
+
55
+
56
+ end
@@ -3,17 +3,16 @@ require File.dirname(__FILE__) + "/../../spec_helper"
3
3
  describe IMW::Metadata::Field do
4
4
 
5
5
  describe "initializing" do
6
- it "should parse a symbol or string into a hash" do
7
- IMW::Metadata::Field.new(:foobar).should == { :name => "foobar" }
8
- IMW::Metadata::Field.new('foobar').should == { :name => 'foobar' }
6
+ it "should parse a string into a hash" do
7
+ IMW::Metadata::Field.new('foobar').should == { "name" => 'foobar' }
9
8
  end
10
9
 
11
10
  it "should raise an error on a Hash without a :name key" do
12
- lambda { IMW::Metadata::Field.new(:foo => 'bar') }.should raise_error(IMW::ArgumentError)
11
+ lambda { IMW::Metadata::Field.new('foo' => 'bar') }.should raise_error(IMW::ArgumentError)
13
12
  end
14
13
 
15
14
  it "should accept a Hash with a :name key" do
16
- data = { :name => :foobar, :title => "Bazbooz", :unit => "m" }
15
+ data = { 'name' => :foobar, 'title' => "Bazbooz", 'unit' => "m" }
17
16
  IMW::Metadata::Field.new(data).should == data
18
17
  end
19
18
 
@@ -0,0 +1,58 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Metadata::HasMetadata do
4
+
5
+ before do
6
+ class Foo
7
+ def uri ; File.join(IMWTest::TMP_DIR, 'test', 'subdir', 'foobar.csv') ; end
8
+ def basename ; File.basename(uri) ; end
9
+ def extension ; 'csv' ; end
10
+ def dir ; IMW.open(File.join(IMWTest::TMP_DIR, 'test', 'subdir')) ; end
11
+ include IMW::Metadata::HasMetadata
12
+ end
13
+ @foo = Foo.new
14
+ end
15
+
16
+ it "should be able to build a schema" do
17
+ @foo.schema.should include(:type, :namespace, :name, :doc, :fields, :non_avro)
18
+ end
19
+
20
+ describe "finding its metadata" do
21
+
22
+ before do
23
+ FileUtils.mkdir_p(@foo.dir.path)
24
+ IMWTest::Random.file(File.join(@foo.dir.path, 'foobar.csv'))
25
+ end
26
+
27
+ it "should return 'nil' when it can't find any metadata" do
28
+ @foo.metadata.should be_nil
29
+ end
30
+
31
+ it "should return 'nil' when a metadata file is found that doesn't describe it" do
32
+ IMW.open!("has_metadata_test.icss.yaml") do |f|
33
+ f.write <<YAML
34
+ ---
35
+ foobar.csv:
36
+ description: bar
37
+ fields: ["baz", "booz"]
38
+ YAML
39
+ end
40
+ @foo.metadata.should be_nil
41
+ end
42
+
43
+ # it "should return the metadata when a metadata file is found that does describe it" do
44
+ # IMW.open!("has_metadata_test.icss.yaml") do |f|
45
+ # f.write <<YAML
46
+ # ---
47
+ # #{IMWTest::TMP_DIR}/test/subdir/foobar.csv:
48
+ # description: bar
49
+ # fields: ["baz", "booz"]
50
+ # YAML
51
+ # end
52
+ # @foo.metadata.class.should == IMW::Metadata
53
+ # @foo.metadata[@foo]['description'].should == 'bar'
54
+ # end
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,32 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Metadata::HasSummary do
4
+
5
+ before do
6
+ class Foo
7
+ def initialize(*args) ; @args = args ; end
8
+ def uri ; File.join(IMWTest::TMP_DIR, *@args) ; end
9
+ def basename ; File.basename(uri) ; end
10
+ def extension ; File.extname(@args.last || '').gsub(/^\./,'') ; end
11
+ include IMW::Metadata::HasSummary
12
+ end
13
+ @foo = Foo.new('foo', 'bar.csv')
14
+ end
15
+
16
+ it "should build a summary from an external summary" do
17
+ @foo.summary.should include(:uri, :basename, :extension)
18
+ end
19
+
20
+ it "should build a summary from an external summary and a schema when possible" do
21
+ @foo.stub!(:schema).and_return({:foo => 'bar'})
22
+ @foo.summary[:schema].should == {:foo => 'bar'}
23
+ end
24
+
25
+ it "should be able to build an external summary describing how it's situated in the world" do
26
+ @foo.summary[:uri].should == File.join(IMWTest::TMP_DIR, 'foo', 'bar.csv')
27
+ @foo.summary[:basename].should == 'bar.csv'
28
+ @foo.summary[:extension].should == 'csv'
29
+ end
30
+
31
+ end
32
+
@@ -3,25 +3,22 @@ require File.dirname(__FILE__) + "/../../spec_helper"
3
3
  describe IMW::Metadata::Schema do
4
4
 
5
5
  describe "initializing" do
6
- it "should accept an array" do
7
- IMW::Metadata::Schema.new([1,2,3]).should == [{:name => '1'}, {:name => '2'}, {:name => '3'}]
6
+ it "should merge with a Hash" do
7
+ IMW::Metadata::Schema.new({:foo => 'foobar'}).should == { :foo => 'foobar' }
8
8
  end
9
9
 
10
- it "should dup a Schema if given one" do
11
- orig_schema = IMW::Metadata::Schema.new([1,2,3])
12
- IMW::Metadata::Schema.new(orig_schema).should == orig_schema
10
+ it "should merge with a Schema" do
11
+ IMW::Metadata::Schema.new(IMW::Metadata::Schema.new({:foo => 'foobar'})).should == { :foo => 'foobar' }
13
12
  end
14
- end
15
13
 
16
- describe 'loading' do
17
- it "should load an Array in a resource" do
18
- resource = IMW.open('some_resource')
19
- resource.should_receive(:load).and_return(%w[foo bar baz])
20
- IMW.should_receive(:open).and_return(resource)
21
- IMW::Metadata::Schema.load(resource.to_s).map { |field| field[:name] }.should == %w[foo bar baz]
14
+ it "should ignore anything else" do
15
+ IMW::Metadata::Schema.new('foobar').should == {}
22
16
  end
23
17
 
18
+ it "should accept empty args" do
19
+ IMW::Metadata::Schema.new.should == {}
20
+ end
21
+
24
22
  end
25
23
 
26
24
  end
27
-