imw 0.2.18 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
data/lib/imw/formats/yaml.rb
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
module Formats
|
|
3
|
-
|
|
4
|
-
# Provides methods for reading and writing YAML data.
|
|
5
|
-
module Yaml
|
|
6
|
-
|
|
7
|
-
include Enumerable
|
|
8
|
-
|
|
9
|
-
# Return the content of this resource.
|
|
10
|
-
#
|
|
11
|
-
# Will pass a block to the outermost YAML data structure's each
|
|
12
|
-
# method.
|
|
13
|
-
#
|
|
14
|
-
# @return [Hash, Array, String, Fixnum] whatever the YAML contained
|
|
15
|
-
def load &block
|
|
16
|
-
require 'yaml'
|
|
17
|
-
yaml = YAML.load(io)
|
|
18
|
-
if block_given?
|
|
19
|
-
yaml.each(&block)
|
|
20
|
-
else
|
|
21
|
-
yaml
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
# Iterate over the elements in the YAML.
|
|
26
|
-
def each &block
|
|
27
|
-
load(&block)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
# Emit the +data+ into this resource. It must be opened for
|
|
31
|
-
# writing.
|
|
32
|
-
#
|
|
33
|
-
# @param [Hash, String, Array, Fixnum] data the Ruby object to emit
|
|
34
|
-
def emit data, options={}
|
|
35
|
-
require 'yaml'
|
|
36
|
-
write(data.to_yaml)
|
|
37
|
-
self
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
end
|
data/lib/imw/metadata.rb
DELETED
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
|
|
3
|
-
# A collection of classes for describing the metadata associated
|
|
4
|
-
# with a dataset's fields.
|
|
5
|
-
class Metadata < Hash
|
|
6
|
-
|
|
7
|
-
autoload :Field, 'imw/metadata/field'
|
|
8
|
-
autoload :Schema, 'imw/metadata/schema'
|
|
9
|
-
autoload :ContainsMetadata, 'imw/metadata/contains_metadata'
|
|
10
|
-
autoload :HasSummary, 'imw/metadata/has_summary'
|
|
11
|
-
autoload :HasMetadata, 'imw/metadata/has_metadata'
|
|
12
|
-
|
|
13
|
-
# The resource this metadata is anchored to.
|
|
14
|
-
#
|
|
15
|
-
# This attribute is useful for letting relative paths in a
|
|
16
|
-
# schema file refer to a common base URL.
|
|
17
|
-
#
|
|
18
|
-
# @return [IMW::Resource]
|
|
19
|
-
attr_reader :base
|
|
20
|
-
|
|
21
|
-
# Set the base resource this metdata is anchored to.
|
|
22
|
-
#
|
|
23
|
-
# @param [IMW::Resource, String, Addressable::URI] new_base
|
|
24
|
-
def base= new_base
|
|
25
|
-
base_resource = IMW.open(new_base)
|
|
26
|
-
base_resource.should_exist!("Metadata base directory must exist")
|
|
27
|
-
raise IMW::PathError.new("Metadata base must be a directory") unless base_resource.is_directory?
|
|
28
|
-
@base = base_resource
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
def initialize obj=nil, options={}
|
|
32
|
-
super()
|
|
33
|
-
self.base = options[:base] if options[:base]
|
|
34
|
-
if obj
|
|
35
|
-
obj.each_pair do |resource, metadata|
|
|
36
|
-
self[resource] = metadata
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def self.load obj, options={}
|
|
42
|
-
resource = IMW.open(obj)
|
|
43
|
-
new(resource.load, {:base => resource.dirname}.merge(options))
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
def []= resource, metadata
|
|
47
|
-
super(absolute_uri(resource), metadata)
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def [] resource
|
|
51
|
-
super(absolute_uri(resource))
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
def describe? resource
|
|
55
|
-
self[(absolute_uri(resource))]
|
|
56
|
-
end
|
|
57
|
-
alias_method :describes?, :describe?
|
|
58
|
-
|
|
59
|
-
def description_for resource
|
|
60
|
-
return unless describes?(resource)
|
|
61
|
-
self[resource]['description']
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
def fields_for resource
|
|
65
|
-
return unless describes?(resource)
|
|
66
|
-
(self[resource]['fields'] || []).map { |f| Metadata::Field.new(f) }
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
protected
|
|
70
|
-
|
|
71
|
-
def absolute_uri resource
|
|
72
|
-
obj = IMW.open(resource)
|
|
73
|
-
if base && obj.uri.to_s !~ %r{(^/|://)} # relative path
|
|
74
|
-
s = base.join(obj.uri.to_s).uri.to_s
|
|
75
|
-
s
|
|
76
|
-
else
|
|
77
|
-
s = obj.uri.to_s
|
|
78
|
-
s
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
end
|
|
83
|
-
end
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
class Metadata
|
|
3
|
-
|
|
4
|
-
# A module for finding metadata describing the sub-resources of a
|
|
5
|
-
# given resource.
|
|
6
|
-
#
|
|
7
|
-
# An including class describing the parent resource must define
|
|
8
|
-
# the +contents+ method which must return an Array of Strings
|
|
9
|
-
# contained within the parent . These objects will be matched
|
|
10
|
-
# against possible metadata URIs and the corresponding
|
|
11
|
-
# IMW::Metadata class created on the fly.
|
|
12
|
-
#
|
|
13
|
-
# In case no such object is found, the class should also define
|
|
14
|
-
# the +basename+ and +path+ methods which will be used to generate
|
|
15
|
-
# a default URI where metadata about the parent's resources should
|
|
16
|
-
# live.
|
|
17
|
-
module ContainsMetadata
|
|
18
|
-
|
|
19
|
-
# The URI containing the metadata for this resource and its
|
|
20
|
-
# contents.
|
|
21
|
-
#
|
|
22
|
-
# Looks for an existing JSON or YAML file containing the strings
|
|
23
|
-
# "icss" or "metadata" directly contained within this resource.
|
|
24
|
-
#
|
|
25
|
-
# If none are found, defaults to a URI named after this
|
|
26
|
-
# resource's basename with the string ".icss.yaml" appended.
|
|
27
|
-
#
|
|
28
|
-
# @return [String, nil]
|
|
29
|
-
def default_metadata_uri
|
|
30
|
-
contents.detect { |path| path =~ /metadata.*\.(ya?ml|json)$/i } || File.join(path, "#{basename}.metadata.yaml")
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Return the metadata for this resource if it exists.
|
|
34
|
-
#
|
|
35
|
-
# Will look for an existing resource at +default_metadata_uri+.
|
|
36
|
-
#
|
|
37
|
-
# @return [IMW::Metadata, nil]
|
|
38
|
-
def metadata
|
|
39
|
-
return @metadata if @metadata
|
|
40
|
-
obj = IMW.open(default_metadata_uri)
|
|
41
|
-
self.metadata=(obj) if obj.exist?
|
|
42
|
-
@metadata
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# Set the metadata for this resource to +obj+.
|
|
46
|
-
#
|
|
47
|
-
# @param [String, Addressable::URI, IMW::Resource] obj
|
|
48
|
-
def metadata= obj
|
|
49
|
-
@metadata = IMW::Metadata.load(obj)
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
end
|
data/lib/imw/metadata/dsl.rb
DELETED
|
@@ -1,111 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
class Metadata
|
|
3
|
-
|
|
4
|
-
# A module which defines a DSL that can be used to define metadata
|
|
5
|
-
# for an object.
|
|
6
|
-
module DSL
|
|
7
|
-
|
|
8
|
-
# Open a new resource at the given URI.
|
|
9
|
-
#
|
|
10
|
-
# If this dataset has metadata and it describes the resource
|
|
11
|
-
# then configure the resource to understand its schema..
|
|
12
|
-
#
|
|
13
|
-
# The +schema+ property passed via the options hash will
|
|
14
|
-
# override this.
|
|
15
|
-
#
|
|
16
|
-
# @param [String, Addressable::Uri, IMW::Resource] uri
|
|
17
|
-
# @param [Hash] options
|
|
18
|
-
# @return [IMW::Resource]
|
|
19
|
-
# @see IMW.open
|
|
20
|
-
def open uri, options={}, &block
|
|
21
|
-
schema_options = (options[:schema].nil? && metadata && metadata.describe?(uri)) ? {:schema => metadata[uri]} : {}
|
|
22
|
-
IMW.open(uri, options.merge(schema_options), &block)
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def open! uri, options={}, &block
|
|
26
|
-
self.open(uri, options.merge(:mode => 'w'), &block)
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# When called without a block return this object's metadata.
|
|
30
|
-
#
|
|
31
|
-
# metadata
|
|
32
|
-
# #=> { '/path/to/file' => [...], '/path/to/other/file' => [...], ... }
|
|
33
|
-
#
|
|
34
|
-
# When called with a block, accumulate schema and fields into
|
|
35
|
-
# this object's metadata
|
|
36
|
-
#
|
|
37
|
-
# metadata do
|
|
38
|
-
#
|
|
39
|
-
# schema "/path/to/file" do
|
|
40
|
-
# # ...
|
|
41
|
-
# end
|
|
42
|
-
#
|
|
43
|
-
# schema "/path/to/other/file" do
|
|
44
|
-
# # ...
|
|
45
|
-
# end
|
|
46
|
-
# end
|
|
47
|
-
#
|
|
48
|
-
# @see [IMW::Metadata::Schema]
|
|
49
|
-
# @see [IMW::Metadata::Field]
|
|
50
|
-
# @return [IMW::Metadata]
|
|
51
|
-
def metadata arg=nil, options={}, &block
|
|
52
|
-
case arg
|
|
53
|
-
when Hash
|
|
54
|
-
@metadata ||= Metadata.new(arg, options)
|
|
55
|
-
when nil
|
|
56
|
-
@metadata ||= Metadata.new nil, options
|
|
57
|
-
else
|
|
58
|
-
@metadata ||= Metadata.load(arg, options)
|
|
59
|
-
end
|
|
60
|
-
@metadata.base = options[:base] if options[:base]
|
|
61
|
-
return @metadata unless block_given?
|
|
62
|
-
yield
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
def schema resource, options={}, &block
|
|
66
|
-
new_field_accumulator!
|
|
67
|
-
yield
|
|
68
|
-
metadata[resource] = Schema.new(last_field_accumulator!)
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def field name, options={}
|
|
72
|
-
accumulate_field Field.new(options.merge(:name => name))
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def has_one name, options={}, &block
|
|
76
|
-
new_field_accumulator!
|
|
77
|
-
yield
|
|
78
|
-
accumulate_field Field.new(options.merge(:name => name, :has_one => last_field_accumulator!))
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
def has_many name, options={}, &block
|
|
82
|
-
new_field_accumulator!
|
|
83
|
-
yield
|
|
84
|
-
accumulate_field Field.new(options.merge(:name => name, :has_many => last_field_accumulator!))
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
protected
|
|
88
|
-
|
|
89
|
-
def field_accumulators # :nodoc:
|
|
90
|
-
@field_accumulators ||= []
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
def new_field_accumulator! # :nodoc:
|
|
94
|
-
field_accumulators.push([])
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
def last_field_accumulator! # :nodoc:
|
|
98
|
-
field_accumulators.pop
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
def field_accumulator? # :nodoc:
|
|
102
|
-
! field_accumulators.empty?
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
def accumulate_field f # :nodoc:
|
|
106
|
-
# raise IMW::SchemaError.new("No record or sub-record to accumulate fields in!") unless field_accumulator?
|
|
107
|
-
field_accumulators.last << f if field_accumulator?
|
|
108
|
-
end
|
|
109
|
-
end
|
|
110
|
-
end
|
|
111
|
-
end
|
data/lib/imw/metadata/field.rb
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
|
|
3
|
-
class Metadata
|
|
4
|
-
|
|
5
|
-
# Conceptually, a field is a "slot" for which "records" can have
|
|
6
|
-
# values.
|
|
7
|
-
#
|
|
8
|
-
# An IMW::Metadata::Field is essentially a Hash that has one required
|
|
9
|
-
# property: a name.
|
|
10
|
-
#
|
|
11
|
-
# IMW::Metadata::Field.new('id')
|
|
12
|
-
# #=> { 'name' => 'id' }
|
|
13
|
-
#
|
|
14
|
-
# But you can declare as many other properties as you want (as long
|
|
15
|
-
# as you include a +name+):
|
|
16
|
-
#
|
|
17
|
-
# IMW::Metadata::Field.new 'name' => 'id', 'type' => :integer, 'title' => "ID", 'description' => "Auto-incremented."
|
|
18
|
-
# #=> { 'name' => 'id', 'type' => :integer, 'title' > "ID", 'description' => "Auto-incremented." }
|
|
19
|
-
class Field < Hash
|
|
20
|
-
|
|
21
|
-
def initialize obj
|
|
22
|
-
super()
|
|
23
|
-
if obj.is_a?(Hash) || obj.is_a?(Field)
|
|
24
|
-
merge!(obj)
|
|
25
|
-
raise IMW::ArgumentError.new("A field must have a name") if obj['name'].blank?
|
|
26
|
-
else
|
|
27
|
-
self['name'] = obj.to_s.strip
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
def titleize
|
|
32
|
-
self['title'] || self['name'].capitalize # FIXME we can do better than this!
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
end
|
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
class Metadata
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
# A module which defines how a resource finds Metadata that it can
|
|
6
|
-
# look up metadata about itself.
|
|
7
|
-
#
|
|
8
|
-
# "metadata" in this context is defined as accessors for
|
|
9
|
-
# +metadata+ (IMW::Metadata), +schema+ (IMW::Metadata::Schema),
|
|
10
|
-
# +fields+ (IMW::Metadata::Field), and +description+ (String).
|
|
11
|
-
#
|
|
12
|
-
# An including class should define a method +dir+ which should
|
|
13
|
-
# return an object that might contain Metadata, i.e. - that
|
|
14
|
-
# includes the IMW::Metadata::ContainsMetadata module.
|
|
15
|
-
#
|
|
16
|
-
# An including class can optionally define the methods +snippet+
|
|
17
|
-
# which returns a snippet of the resource as well as
|
|
18
|
-
# +record_count+ to return a count of how many records the
|
|
19
|
-
# resource contains.
|
|
20
|
-
module HasMetadata
|
|
21
|
-
|
|
22
|
-
# The schema for this object.
|
|
23
|
-
#
|
|
24
|
-
# @return [Hash]
|
|
25
|
-
def schema
|
|
26
|
-
return @schema if @schema
|
|
27
|
-
@schema = IMW::Metadata::Schema.new
|
|
28
|
-
@schema[:type] = "record"
|
|
29
|
-
@schema[:namespace] = "schema.imw.resource"
|
|
30
|
-
@schema[:name] = (basename || '')
|
|
31
|
-
begin
|
|
32
|
-
@schema[:doc] = description
|
|
33
|
-
@schema[:fields] = fields
|
|
34
|
-
|
|
35
|
-
@schema[:non_avro ] = {}
|
|
36
|
-
@schema[:non_avro][:snippet] = snippet if respond_to?(:snippet) rescue nil
|
|
37
|
-
@schema[:non_avro][:record_count] = record_count if respond_to?(:record_count)
|
|
38
|
-
@schema
|
|
39
|
-
rescue => e
|
|
40
|
-
$stdout.puts "Error in producing schema for #{self}: #{e.class} -- #{e.message}"
|
|
41
|
-
return @schema
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# Return the metadata object that contains metadata for this
|
|
46
|
-
# resource.
|
|
47
|
-
#
|
|
48
|
-
# Will look in this resource's directory and recursively upward
|
|
49
|
-
# till the root directory is reached or a metadata file is
|
|
50
|
-
# discovered.
|
|
51
|
-
#
|
|
52
|
-
# @return [IMW::Metadata, nil]
|
|
53
|
-
def metadata
|
|
54
|
-
return @metadata if @metadata
|
|
55
|
-
d = dir
|
|
56
|
-
while d.path != '/'
|
|
57
|
-
break if d.metadata && d.metadata.describes?(self)
|
|
58
|
-
d = d.dir
|
|
59
|
-
end
|
|
60
|
-
@metadata = d.metadata
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
# The fields for this resource's data.
|
|
64
|
-
#
|
|
65
|
-
# Each field will be a Hash of information.
|
|
66
|
-
#
|
|
67
|
-
# @return [Array<Hash>]
|
|
68
|
-
def fields
|
|
69
|
-
@fields ||= metadata && metadata.fields_for(self)
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
# Set the fields for this resource.
|
|
73
|
-
#
|
|
74
|
-
# @param [Array<Hash>] new_fields
|
|
75
|
-
# @return [Array<Hash>]
|
|
76
|
-
def fields= new_fields
|
|
77
|
-
@fields = new_fields.map { |f| Metadata::Field.new(f) }
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
# A description for this Resource.
|
|
81
|
-
#
|
|
82
|
-
# @return [String]
|
|
83
|
-
def description
|
|
84
|
-
@description ||= metadata && metadata.description_for(self)
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
# Set the description of this Resource.
|
|
88
|
-
#
|
|
89
|
-
# @param [String] new_description
|
|
90
|
-
# @return [String]
|
|
91
|
-
def description= new_description
|
|
92
|
-
@description = new_description
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
end
|
|
96
|
-
end
|
|
97
|
-
end
|
|
98
|
-
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
class Metadata
|
|
3
|
-
|
|
4
|
-
# A module for generating a summary & schema of a resource.
|
|
5
|
-
#
|
|
6
|
-
# The including class should define methods +uri+, +basename+, +extension+.
|
|
7
|
-
module HasSummary
|
|
8
|
-
|
|
9
|
-
# Return a full summary of this Resource.
|
|
10
|
-
#
|
|
11
|
-
# The summary will include "external" information about how this
|
|
12
|
-
# resource appears to the world (via its URI), "internal"
|
|
13
|
-
# metadata about this resource (its description, &c.), as well
|
|
14
|
-
# as the structure of this resource's data (it's schema's fields
|
|
15
|
-
# and a snippet).
|
|
16
|
-
#
|
|
17
|
-
# Will return a Hash, with a <tt>:schema</tt> key which maps to
|
|
18
|
-
# a well-formed AVRO schema for this resource.
|
|
19
|
-
#
|
|
20
|
-
# @return [Hash]
|
|
21
|
-
def summary
|
|
22
|
-
return @summary if @summary
|
|
23
|
-
@summary = {}
|
|
24
|
-
begin
|
|
25
|
-
@summary.merge!(external_summary)
|
|
26
|
-
@summary[:schema] = schema if respond_to?(:schema)
|
|
27
|
-
@summary[:contents] = resources.map(&:summary) if respond_to?(:resources)
|
|
28
|
-
@summary
|
|
29
|
-
rescue => e
|
|
30
|
-
# IMW.warn "Error in producing summary for #{self}: #{e.class} -- #{e.message}"
|
|
31
|
-
return @summary
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Return information (usually scheme-dependent) on how this
|
|
36
|
-
# resource is situated in the world, i.e. - its URI, its size,
|
|
37
|
-
# how many lines it has, &c.
|
|
38
|
-
#
|
|
39
|
-
# Modules which override this should chain with +super+:
|
|
40
|
-
#
|
|
41
|
-
# # in my_scheme.rb
|
|
42
|
-
# def external_summary
|
|
43
|
-
# super().merge(:user => 'bob', :password => 'smith')
|
|
44
|
-
# end
|
|
45
|
-
#
|
|
46
|
-
# @return [Hash]
|
|
47
|
-
def external_summary
|
|
48
|
-
{
|
|
49
|
-
:uri => uri.to_s,
|
|
50
|
-
:basename => basename,
|
|
51
|
-
:extension => extension
|
|
52
|
-
}
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
end
|
|
57
|
-
end
|