imw 0.2.16 → 0.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/imw/dataset.rb +1 -1
- data/lib/imw/formats/delimited.rb +9 -14
- data/lib/imw/metadata.rb +38 -21
- data/lib/imw/metadata/contains_metadata.rb +35 -25
- data/lib/imw/metadata/field.rb +0 -28
- data/lib/imw/metadata/has_metadata.rb +93 -0
- data/lib/imw/metadata/has_summary.rb +51 -0
- data/lib/imw/metadata/schema.rb +6 -216
- data/lib/imw/resource.rb +2 -5
- data/lib/imw/schemes/http.rb +1 -1
- data/lib/imw/schemes/local.rb +18 -46
- data/lib/imw/schemes/sql.rb +12 -0
- data/lib/imw/tools/summarizer.rb +12 -13
- data/spec/imw/formats/delimited_spec.rb +3 -12
- data/spec/imw/metadata/contains_metadata_spec.rb +56 -0
- data/spec/imw/metadata/field_spec.rb +4 -5
- data/spec/imw/metadata/has_metadata_spec.rb +58 -0
- data/spec/imw/metadata/has_summary_spec.rb +32 -0
- data/spec/imw/metadata/schema_spec.rb +10 -13
- data/spec/imw/metadata_spec.rb +68 -21
- data/spec/imw/schemes/local_spec.rb +12 -22
- data/spec/imw/schemes/s3_spec.rb +0 -1
- metadata +12 -5
- data/lib/imw/metadata/schematized.rb +0 -27
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.17
|
data/lib/imw/dataset.rb
CHANGED
@@ -11,12 +11,6 @@ module IMW
|
|
11
11
|
# @abstract
|
12
12
|
module Delimited
|
13
13
|
|
14
|
-
# Ensure that this delimited resource is described by a an
|
15
|
-
# ordered collection of flat fields.
|
16
|
-
def validate_schema!
|
17
|
-
raise IMW::SchemaError.new("#{self.class} resources must be described by an ordered set of flat fields") if schema.any?(&:nested?)
|
18
|
-
end
|
19
|
-
|
20
14
|
# Default options to be passed to
|
21
15
|
# FasterCSV[http://fastercsv.rubyforge.org/]; see its
|
22
16
|
# documentation for more information.
|
@@ -24,7 +18,7 @@ module IMW
|
|
24
18
|
# @return [Hash]
|
25
19
|
def delimited_options
|
26
20
|
@delimited_options ||= {
|
27
|
-
:headers =>
|
21
|
+
:headers => fields && fields.map { |field| field['name'] }
|
28
22
|
}.merge(resource_options_compatible_with_faster_csv)
|
29
23
|
end
|
30
24
|
|
@@ -68,7 +62,7 @@ module IMW
|
|
68
62
|
# of this delimited data is a row of headers.
|
69
63
|
#
|
70
64
|
# @return [true, false]
|
71
|
-
def
|
65
|
+
def fields_in_first_line?
|
72
66
|
# grab the header and up to 10 body rows
|
73
67
|
require 'fastercsv'
|
74
68
|
copy = FasterCSV.new(io, resource_options_compatible_with_faster_csv.merge(:headers => false))
|
@@ -93,15 +87,16 @@ module IMW
|
|
93
87
|
determinant && determinant >= 0.05
|
94
88
|
end
|
95
89
|
|
96
|
-
# If it seems like there are
|
97
|
-
# data then go ahead and use them to define
|
90
|
+
# If it seems like there are fields in the first line of this
|
91
|
+
# data then go ahead and use them to define this resource's
|
92
|
+
# fields.
|
98
93
|
#
|
99
|
-
# Will overwrite
|
100
|
-
def
|
101
|
-
return unless
|
94
|
+
# Will overwrite any fields already present for this resource.
|
95
|
+
def guess_fields!
|
96
|
+
return unless fields_in_first_line?
|
102
97
|
copy = FasterCSV.new(io, resource_options_compatible_with_faster_csv.merge(:headers => false))
|
103
98
|
names = (copy.shift || []) rescue []
|
104
|
-
self.
|
99
|
+
self.fields = names.map { |n| { 'name' => n } }
|
105
100
|
delimited_options[:headers] = names
|
106
101
|
end
|
107
102
|
|
data/lib/imw/metadata.rb
CHANGED
@@ -4,13 +4,13 @@ module IMW
|
|
4
4
|
# with a dataset's fields.
|
5
5
|
class Metadata < Hash
|
6
6
|
|
7
|
-
autoload :Field,
|
8
|
-
autoload :Schema,
|
9
|
-
autoload :Schematized, 'imw/metadata/schematized'
|
10
|
-
autoload :DSL, 'imw/metadata/dsl'
|
7
|
+
autoload :Field, 'imw/metadata/field'
|
8
|
+
autoload :Schema, 'imw/metadata/schema'
|
11
9
|
autoload :ContainsMetadata, 'imw/metadata/contains_metadata'
|
10
|
+
autoload :HasSummary, 'imw/metadata/has_summary'
|
11
|
+
autoload :HasMetadata, 'imw/metadata/has_metadata'
|
12
12
|
|
13
|
-
# The resource this
|
13
|
+
# The resource this metadata is anchored to.
|
14
14
|
#
|
15
15
|
# This attribute is useful for letting relative paths in a
|
16
16
|
# schema file refer to a common base URL.
|
@@ -18,12 +18,12 @@ module IMW
|
|
18
18
|
# @return [IMW::Resource]
|
19
19
|
attr_reader :base
|
20
20
|
|
21
|
-
# Set the resource this
|
21
|
+
# Set the base resource this metdata is anchored to.
|
22
22
|
#
|
23
23
|
# @param [IMW::Resource, String, Addressable::URI] new_base
|
24
24
|
def base= new_base
|
25
25
|
base_resource = IMW.open(new_base)
|
26
|
-
base_resource.should_exist!("
|
26
|
+
base_resource.should_exist!("Metadata base directory must exist")
|
27
27
|
raise IMW::PathError.new("Metadata base must be a directory") unless base_resource.is_directory?
|
28
28
|
@base = base_resource
|
29
29
|
end
|
@@ -31,34 +31,51 @@ module IMW
|
|
31
31
|
def initialize obj=nil, options={}
|
32
32
|
super()
|
33
33
|
self.base = options[:base] if options[:base]
|
34
|
-
|
34
|
+
if obj
|
35
|
+
obj.each_pair do |resource, metadata|
|
36
|
+
self[resource] = metadata
|
37
|
+
end
|
38
|
+
end
|
35
39
|
end
|
36
40
|
|
37
|
-
def self.load
|
38
|
-
resource = IMW.open(
|
41
|
+
def self.load obj, options={}
|
42
|
+
resource = IMW.open(obj)
|
39
43
|
new(resource.load, {:base => resource.dirname}.merge(options))
|
40
44
|
end
|
41
45
|
|
42
|
-
def []=
|
43
|
-
|
44
|
-
|
46
|
+
def []= resource, metadata
|
47
|
+
super(absolute_uri(resource), metadata)
|
48
|
+
end
|
49
|
+
|
50
|
+
def [] resource
|
51
|
+
super(absolute_uri(resource))
|
52
|
+
end
|
53
|
+
|
54
|
+
def describe? resource
|
55
|
+
self[(absolute_uri(resource))]
|
45
56
|
end
|
57
|
+
alias_method :describes?, :describe?
|
46
58
|
|
47
|
-
def
|
48
|
-
|
59
|
+
def description_for resource
|
60
|
+
return unless describes?(resource)
|
61
|
+
self[resource]['description']
|
49
62
|
end
|
50
63
|
|
51
|
-
def
|
52
|
-
|
64
|
+
def fields_for resource
|
65
|
+
return unless describes?(resource)
|
66
|
+
(self[resource]['fields'] || []).map { |f| Metadata::Field.new(f) }
|
53
67
|
end
|
54
68
|
|
55
69
|
protected
|
56
70
|
|
57
|
-
def absolute_uri
|
58
|
-
|
59
|
-
|
71
|
+
def absolute_uri resource
|
72
|
+
obj = IMW.open(resource)
|
73
|
+
if base && obj.uri.to_s !~ %r{(^/|://)} # relative path
|
74
|
+
s = base.join(obj.uri.to_s).uri.to_s
|
75
|
+
s
|
60
76
|
else
|
61
|
-
|
77
|
+
s = obj.uri.to_s
|
78
|
+
s
|
62
79
|
end
|
63
80
|
end
|
64
81
|
|
@@ -1,44 +1,54 @@
|
|
1
|
-
module IMW
|
1
|
+
module IMW
|
2
2
|
class Metadata
|
3
3
|
|
4
|
-
# A module
|
5
|
-
#
|
4
|
+
# A module for finding metadata describing the sub-resources of a
|
5
|
+
# given resource.
|
6
|
+
#
|
7
|
+
# An including class describing the parent resource must define
|
8
|
+
# the +contents+ method which must return an Array of Strings
|
9
|
+
# contained within the parent . These objects will be matched
|
10
|
+
# against possible metadata URIs and the corresponding
|
11
|
+
# IMW::Metadata class created on the fly.
|
12
|
+
#
|
13
|
+
# In case no such object is found, the class should also define
|
14
|
+
# the +basename+ and +path+ methods which will be used to generate
|
15
|
+
# a default URI where metadata about the parent's resources should
|
16
|
+
# live.
|
6
17
|
module ContainsMetadata
|
7
18
|
|
8
|
-
# The
|
19
|
+
# The URI containing the metadata for this resource and its
|
20
|
+
# contents.
|
9
21
|
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
22
|
+
# Looks for an existing JSON or YAML file containing the strings
|
23
|
+
# "icss" or "metadata" directly contained within this resource.
|
24
|
+
#
|
25
|
+
# If none are found, defaults to a URI named after this
|
26
|
+
# resource's basename with the string ".icss.yaml" appended.
|
13
27
|
#
|
14
28
|
# @return [String, nil]
|
15
|
-
def
|
16
|
-
|
29
|
+
def default_metadata_uri
|
30
|
+
contents.detect { |path| path =~ /(icss|metadata).*\.(ya?ml|json)$/i } || File.join(path, "#{basename}.icss.yaml")
|
17
31
|
end
|
18
32
|
|
19
|
-
#
|
20
|
-
attr_writer :metadata_uri
|
21
|
-
|
22
|
-
# Does this resource contain metadata for other resources it
|
23
|
-
# contains?
|
33
|
+
# Return the metadata for this resource if it exists.
|
24
34
|
#
|
25
|
-
#
|
26
|
-
def metadata?
|
27
|
-
(!! metadata_uri)
|
28
|
-
end
|
29
|
-
|
30
|
-
# Return the metadata for this resource.
|
35
|
+
# Will look for an existing resource at +default_metadata_uri+.
|
31
36
|
#
|
32
37
|
# @return [IMW::Metadata, nil]
|
33
38
|
def metadata
|
34
|
-
@metadata
|
39
|
+
return @metadata if @metadata
|
40
|
+
obj = IMW.open(default_metadata_uri)
|
41
|
+
self.metadata=(obj) if obj.exist?
|
42
|
+
@metadata
|
35
43
|
end
|
36
44
|
|
37
|
-
#
|
38
|
-
|
45
|
+
# Set the metadata for this resource to +obj+.
|
46
|
+
#
|
47
|
+
# @param [String, Addressable::URI, IMW::Resource] obj
|
48
|
+
def metadata= obj
|
49
|
+
@metadata = IMW::Metadata.load(obj)
|
50
|
+
end
|
39
51
|
|
40
52
|
end
|
41
53
|
end
|
42
54
|
end
|
43
|
-
|
44
|
-
|
data/lib/imw/metadata/field.rb
CHANGED
@@ -16,22 +16,6 @@ module IMW
|
|
16
16
|
#
|
17
17
|
# IMW::Metadata::Field.new 'name' => 'id', 'type' => :integer, 'title' => "ID", 'description' => "Auto-incremented."
|
18
18
|
# #=> { 'name' => 'id', 'type' => :integer, 'title' > "ID", 'description' => "Auto-incremented." }
|
19
|
-
#
|
20
|
-
# Some properties make a field special:
|
21
|
-
#
|
22
|
-
# <tt>has_many</tt>::
|
23
|
-
# Denotes that this record is in a "has_many" relationship with
|
24
|
-
# one or more other records. The corresponding value should be
|
25
|
-
# an array
|
26
|
-
#
|
27
|
-
# <tt>has_one</tt>::
|
28
|
-
# Denotes that this record is in a "has_one" relationship with
|
29
|
-
# one or more other records. The corresponding value should be
|
30
|
-
# an Array in which each key names the joined record and each
|
31
|
-
# value is an Array of fields describing the joined record..
|
32
|
-
#
|
33
|
-
# @see IMW::Metadata::Record for more usage of the
|
34
|
-
# <tt>:has_many</tt> and <tt>:has_one</tt> properties.
|
35
19
|
class Field < Hash
|
36
20
|
|
37
21
|
def initialize obj
|
@@ -43,23 +27,11 @@ module IMW
|
|
43
27
|
self['name'] = obj.to_s.strip
|
44
28
|
end
|
45
29
|
end
|
46
|
-
|
47
|
-
def hierarchical?
|
48
|
-
has_key?('has_many') || has_key?('has_one')
|
49
|
-
end
|
50
|
-
alias_method :nested?, :hierarchical?
|
51
|
-
|
52
|
-
def flat?
|
53
|
-
! hierarchical?
|
54
|
-
end
|
55
30
|
|
56
31
|
def titleize
|
57
32
|
self['title'] || self['name'].capitalize # FIXME we can do better than this!
|
58
33
|
end
|
59
34
|
|
60
|
-
def associations
|
61
|
-
end
|
62
|
-
|
63
35
|
end
|
64
36
|
end
|
65
37
|
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module IMW
|
2
|
+
class Metadata
|
3
|
+
|
4
|
+
|
5
|
+
# A module which defines how a resource finds Metadata that it can
|
6
|
+
# look up metadata about itself.
|
7
|
+
#
|
8
|
+
# "metadata" in this context is defined as accessors for
|
9
|
+
# +metadata+ (IMW::Metadata), +schema+ (IMW::Metadata::Schema),
|
10
|
+
# +fields+ (IMW::Metadata::Field), and +description+ (String).
|
11
|
+
#
|
12
|
+
# An including class should define a method +dir+ which should
|
13
|
+
# return an object that might contain Metadata, i.e. - that
|
14
|
+
# includes the IMW::Metadata::ContainsMetadata module.
|
15
|
+
#
|
16
|
+
# An including class can optionally define the methods +snippet+
|
17
|
+
# which returns a snippet of the resource as well as
|
18
|
+
# +record_count+ to return a count of how many records the
|
19
|
+
# resource contains.
|
20
|
+
module HasMetadata
|
21
|
+
|
22
|
+
# The schema for this object.
|
23
|
+
#
|
24
|
+
# @return [Hash]
|
25
|
+
def schema
|
26
|
+
return @schema if @schema
|
27
|
+
@schema = IMW::Metadata::Schema.new
|
28
|
+
@schema[:type] = "record"
|
29
|
+
@schema[:namespace] = "schema.imw.resource"
|
30
|
+
@schema[:name] = (basename || '')
|
31
|
+
@schema[:doc] = description
|
32
|
+
@schema[:fields] = fields
|
33
|
+
|
34
|
+
@schema[:non_avro ] = {}
|
35
|
+
@schema[:non_avro][:snippet] = snippet if respond_to?(:snippet)
|
36
|
+
@schema[:non_avro][:record_count] = record_count if respond_to?(:record_count)
|
37
|
+
@schema
|
38
|
+
end
|
39
|
+
|
40
|
+
# Return the metadata object that contains metadata for this
|
41
|
+
# resource.
|
42
|
+
#
|
43
|
+
# Will look in this resource's directory and recursively upward
|
44
|
+
# till the root directory is reached or a metadata file is
|
45
|
+
# discovered.
|
46
|
+
#
|
47
|
+
# @return [IMW::Metadata, nil]
|
48
|
+
def metadata
|
49
|
+
return @metadata if @metadata
|
50
|
+
d = dir
|
51
|
+
while d.path != '/'
|
52
|
+
break if d.metadata && d.metadata.describes?(self)
|
53
|
+
d = d.dir
|
54
|
+
end
|
55
|
+
@metadata = d.metadata
|
56
|
+
end
|
57
|
+
|
58
|
+
# The fields for this resource's data.
|
59
|
+
#
|
60
|
+
# Each field will be a Hash of information.
|
61
|
+
#
|
62
|
+
# @return [Array<Hash>]
|
63
|
+
def fields
|
64
|
+
@fields ||= metadata && metadata.fields_for(self)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Set the fields for this resource.
|
68
|
+
#
|
69
|
+
# @param [Array<Hash>] new_fields
|
70
|
+
# @return [Array<Hash>]
|
71
|
+
def fields= new_fields
|
72
|
+
@fields = new_fields.map { |f| Metadata::Field.new(f) }
|
73
|
+
end
|
74
|
+
|
75
|
+
# A description for this Resource.
|
76
|
+
#
|
77
|
+
# @return [String]
|
78
|
+
def description
|
79
|
+
@description ||= metadata && metadata.description_for(self)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Set the description of this Resource.
|
83
|
+
#
|
84
|
+
# @param [String] new_description
|
85
|
+
# @return [String]
|
86
|
+
def description= new_description
|
87
|
+
@description = new_description
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module IMW
|
2
|
+
class Metadata
|
3
|
+
|
4
|
+
# A module for generating a summary & schema of a resource.
|
5
|
+
#
|
6
|
+
# The including class should define methods +uri+, +basename+, +extension+.
|
7
|
+
module HasSummary
|
8
|
+
|
9
|
+
# Return a full summary of this Resource.
|
10
|
+
#
|
11
|
+
# The summary will include "external" information about how this
|
12
|
+
# resource appears to the world (via its URI), "internal"
|
13
|
+
# metadata about this resource (its description, &c.), as well
|
14
|
+
# as the structure of this resource's data (it's schema's fields
|
15
|
+
# and a snippet).
|
16
|
+
#
|
17
|
+
# Will return a Hash, with a <tt>:schema</tt> key which maps to
|
18
|
+
# a well-formed AVRO schema for this resource.
|
19
|
+
#
|
20
|
+
# @return [Hash]
|
21
|
+
def summary
|
22
|
+
return @summary if @summary
|
23
|
+
@summary = external_summary
|
24
|
+
@summary[:schema] = schema if respond_to?(:schema)
|
25
|
+
@summary[:contents] = resources.map(&:summary) if respond_to?(:resources)
|
26
|
+
@summary
|
27
|
+
end
|
28
|
+
|
29
|
+
# Return information (usually scheme-dependent) on how this
|
30
|
+
# resource is situated in the world, i.e. - its URI, its size,
|
31
|
+
# how many lines it has, &c.
|
32
|
+
#
|
33
|
+
# Modules which override this should chain with +super+:
|
34
|
+
#
|
35
|
+
# # in my_scheme.rb
|
36
|
+
# def external_summary
|
37
|
+
# super().merge(:user => 'bob', :password => 'smith')
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# @return [Hash]
|
41
|
+
def external_summary
|
42
|
+
{
|
43
|
+
:uri => uri.to_s,
|
44
|
+
:basename => basename,
|
45
|
+
:extension => extension
|
46
|
+
}
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
data/lib/imw/metadata/schema.rb
CHANGED
@@ -1,227 +1,17 @@
|
|
1
1
|
module IMW
|
2
|
+
|
2
3
|
class Metadata
|
3
4
|
|
4
|
-
#
|
5
|
-
#
|
6
|
-
# A Schema is built on top of an Array because it is often
|
7
|
-
# important to have an ordering for a record's fields.
|
8
|
-
#
|
9
|
-
# For fields with no such ordering, an Array also works because
|
10
|
-
# each of its element will be a field with a +name+ that can be
|
11
|
-
# used to index the corresponding field.
|
12
|
-
#
|
13
|
-
# A Schema is instantiated with a basic Ruby data structure.
|
14
|
-
#
|
15
|
-
# == Tabular Data
|
16
|
-
#
|
17
|
-
# Tabular data formats (CSV, TSV, &c.) contain flat records
|
18
|
-
# consisting of repeating rows with the same fields in the same
|
19
|
-
# position. A sample of delimited data looks like
|
20
|
-
#
|
21
|
-
# ID,Name,Genus,Species
|
22
|
-
# 001,Gray-bellied Night Monkey,Aotus,lemurinus
|
23
|
-
# 002,Panamanian Night Monkey,Aotus,zonalis
|
24
|
-
# 003,Hernández-Camacho's Night Monkey,Aotus,jorgehernandezi
|
25
|
-
# 004,Gray-handed Night Monkey,Aotus,griseimembra
|
26
|
-
# 005,Hershkovitz's Night Monkey,Aotus,hershkovitzi
|
27
|
-
# ...
|
28
|
-
#
|
29
|
-
# The schema of these records is summarized as a Ruby data
|
30
|
-
# structure in the following way
|
31
|
-
#
|
32
|
-
# [
|
33
|
-
# { :name => :id, :type => :integer },
|
34
|
-
# { :name => :name, :type => :string, :title => "Common Name" },
|
35
|
-
# { :name => :genus, :type => :string, :title => "Genus" },
|
36
|
-
# { :name => :species, :type => :string, :title => "Species" }
|
37
|
-
# ]
|
38
|
-
#
|
39
|
-
# The outer-most Array represents each row and each Hash in the
|
40
|
-
# Array represents one of the fields in a row. A Schema
|
41
|
-
# initialized with the above Ruby code can be thought of and
|
42
|
-
# played with as an Array of Hashes even though it really is a
|
43
|
-
# Schema object of Field objects.
|
44
|
-
#
|
45
|
-
# == Hierarchical Data
|
46
|
-
#
|
47
|
-
# Hierarchical data formats (JSON, YAML, XML, &c.) can have
|
48
|
-
# arbitrarily complex records with fields within fields and so on.
|
49
|
-
# A sample of hierarchical XML data looks like
|
50
|
-
#
|
51
|
-
# <genera>
|
52
|
-
# <genus>
|
53
|
-
# <name>Mandrillus</name>
|
54
|
-
# <species>
|
55
|
-
# <species id="113">
|
56
|
-
# <name>sphinx</name>
|
57
|
-
# <common_name>Mandrill</common_name>
|
58
|
-
# </species>
|
59
|
-
# <species id="114">
|
60
|
-
# <name>leucophaeus</name>
|
61
|
-
# <common_name>Drill</common_name>
|
62
|
-
# </species>
|
63
|
-
# </species>
|
64
|
-
# </genus>
|
65
|
-
# <genus>
|
66
|
-
# <name>Rungwecebus</name>
|
67
|
-
# <species>
|
68
|
-
# <species id="100">
|
69
|
-
# <name>kipunji</name>
|
70
|
-
# <common_name>Kipunji</common_name>
|
71
|
-
# </species>
|
72
|
-
# </species>
|
73
|
-
# </genus>
|
74
|
-
#
|
75
|
-
# These records are described by the following Ruby data structure
|
76
|
-
#
|
77
|
-
# [
|
78
|
-
# { :name => :genera,
|
79
|
-
# :has_many => [
|
80
|
-
# { :name => 'name', :type => :string, title => "Genus" },
|
81
|
-
# { :name => 'species',
|
82
|
-
# :has_many => [
|
83
|
-
# { :name => :id, :type => :integer },
|
84
|
-
# { :name => :name, :type => :string, :title => "Species" },
|
85
|
-
# { :name => :common_name, :type => :string, :title => "Common Name" }
|
86
|
-
# ]
|
87
|
-
# }
|
88
|
-
# ]
|
89
|
-
# }
|
90
|
-
# ]
|
91
|
-
#
|
92
|
-
# By IMW convention, the outer-most element of the Schema is still
|
93
|
-
# an Array describing a collection of identical records even
|
94
|
-
# though XML data must have a single root node, limiting the
|
95
|
-
# collection to a single record.
|
96
|
-
#
|
97
|
-
# The first field of the Schema is named +genera+ and it uses the
|
98
|
-
# special field property +has_many+ to denote that the field
|
99
|
-
# points to a collection of sub-records.
|
5
|
+
# Represents a schema for data.
|
100
6
|
#
|
101
|
-
#
|
102
|
-
|
103
|
-
# two fields are +name+ and +species+. +name+ is a simple String
|
104
|
-
# value while +species+ itself points at another collection of
|
105
|
-
# objects.
|
106
|
-
#
|
107
|
-
# This second-level nested record (a particular species) is itself
|
108
|
-
# composed of the three (flat) fields +id+, +name+, and
|
109
|
-
# +common_name+. Note that the Schema doesn't know (or care) that
|
110
|
-
# the +id+ field is contained in an XML attribute while the +name+
|
111
|
-
# and +common_name+ fields are contained as text within daughter
|
112
|
-
# nodes.
|
113
|
-
#
|
114
|
-
# A different way of structure the same information, this time
|
115
|
-
# expressed in YAML:
|
116
|
-
#
|
117
|
-
# ---
|
118
|
-
# Mandrillus:
|
119
|
-
# - :species: sphinx
|
120
|
-
# :name: Mandrill
|
121
|
-
# :id: "113"
|
122
|
-
# - :species: leucophaeus
|
123
|
-
# :name: Drill
|
124
|
-
# :id: "114"
|
125
|
-
# Rungwecebus:
|
126
|
-
# - :species: kipunji
|
127
|
-
# :name: Kipunji
|
128
|
-
# :id: "100"
|
129
|
-
#
|
130
|
-
# Would lead to a different Schema
|
131
|
-
#
|
132
|
-
# [
|
133
|
-
# { :name => :genus, :title => "Genus",
|
134
|
-
# :has_many => [
|
135
|
-
# { :name => :id, :type => :integer },
|
136
|
-
# { :name => :name, :type => :string, :title => "Common Name" },
|
137
|
-
# { :name => :species, :type => :string, :title => "Species" }
|
138
|
-
# ]
|
139
|
-
# }
|
140
|
-
# ]
|
141
|
-
#
|
142
|
-
# Where the unnecessary outer wrapper field +genera+ has been
|
143
|
-
# dispensed with.
|
144
|
-
#
|
145
|
-
# In addition to "has many" relationships a record can have a
|
146
|
-
# "has_one" relationship. The above data might be expressed
|
147
|
-
#
|
148
|
-
# ---
|
149
|
-
# Mandrillus:
|
150
|
-
# - species: sphinx
|
151
|
-
# name: Mandrill
|
152
|
-
# id: "113"
|
153
|
-
# discoverer:
|
154
|
-
# name: Dr. Monkeypants
|
155
|
-
# year: 1838
|
156
|
-
# - species: leucophaeus
|
157
|
-
# name: Drill
|
158
|
-
# id: "114"
|
159
|
-
# discoverer:
|
160
|
-
# name: Ms. Cecelia Apefingers
|
161
|
-
# year: 1921
|
162
|
-
#
|
163
|
-
# would result in the following Schema:
|
164
|
-
#
|
165
|
-
# [
|
166
|
-
# { :name => :genus, :title => "Genus",
|
167
|
-
# :has_many => [
|
168
|
-
# { :name => :id, :type => :integer },
|
169
|
-
# { :name => :name, :type => :string, :title => "Common Name" },
|
170
|
-
# { :name => :species, :type => :string },
|
171
|
-
# { :name => :discoverer,
|
172
|
-
# :has_one => [
|
173
|
-
# { :name => 'name', :type => :string },
|
174
|
-
# { :name => 'year', :type => :integer }
|
175
|
-
# ]
|
176
|
-
# }
|
177
|
-
# ]
|
178
|
-
# }
|
179
|
-
# ]
|
180
|
-
#
|
181
|
-
# The +discoverer+ field is marked as +has_one+ which means the
|
182
|
-
# +name+ and +year+ fields in the corresponding Array will be
|
183
|
-
# interpreted as fields in a single attached sub-record.
|
184
|
-
#
|
185
|
-
# = Compact Schemas
|
186
|
-
#
|
187
|
-
# The internal hashes in a Schema specification are really Field
|
188
|
-
# objects and the initializer will promote Strings and Symbols to
|
189
|
-
# Field objects automatically. This means that the above Schema
|
190
|
-
# specification could be replaced by
|
191
|
-
#
|
192
|
-
# [
|
193
|
-
# { :name => :genus
|
194
|
-
# :has_many => [
|
195
|
-
# :id,
|
196
|
-
# :name,
|
197
|
-
# :species,
|
198
|
-
# { :name => :discoverer,
|
199
|
-
# :has_one => [
|
200
|
-
# :name,
|
201
|
-
# :year
|
202
|
-
# ]
|
203
|
-
# }
|
204
|
-
# ]
|
205
|
-
# }
|
206
|
-
# ]
|
207
|
-
#
|
208
|
-
# though there is an accompanying loss of metadata about each
|
209
|
-
# field.
|
210
|
-
class Schema < Array
|
7
|
+
# FIXME add methods that help couple nicely with Avro schemata.
|
8
|
+
class Schema < Hash
|
211
9
|
|
212
|
-
def initialize
|
10
|
+
def initialize obj=nil
|
213
11
|
super()
|
214
|
-
|
215
|
-
end
|
216
|
-
|
217
|
-
def self.load resource
|
218
|
-
new(IMW.open(resource).load)
|
12
|
+
merge!(obj) if obj.is_a?(Hash) || obj.is_a?(Schema)
|
219
13
|
end
|
220
14
|
|
221
|
-
def [] index
|
222
|
-
[Integer, Range].include?(index.class) ? super(index) : detect { |field| field[:name].to_s == index.to_s }
|
223
|
-
end
|
224
|
-
|
225
15
|
end
|
226
16
|
end
|
227
17
|
end
|