imw 0.2.18 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. data/Gemfile +7 -26
  2. data/Gemfile.lock +13 -38
  3. data/{LICENSE → LICENSE.txt} +1 -1
  4. data/README.textile +35 -0
  5. data/Rakefile +45 -22
  6. data/VERSION +1 -1
  7. data/examples/foo.rb +19 -0
  8. data/examples/html_selector.rb +22 -0
  9. data/examples/nes_game_list.csv +625 -0
  10. data/examples/nes_gamespot.csv +1371 -0
  11. data/examples/nes_nintendo.csv +624 -0
  12. data/examples/nes_unlicensed.csv +89 -0
  13. data/examples/nes_wikipedia.csv +710 -0
  14. data/examples/nibbler_test.rb +24 -0
  15. data/examples/script.rb +19 -0
  16. data/lib/imw.rb +28 -140
  17. data/lib/imw/error.rb +9 -0
  18. data/lib/imw/recordizer.rb +8 -0
  19. data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
  20. data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
  21. data/lib/imw/resource.rb +3 -119
  22. data/lib/imw/serializer.rb +7 -0
  23. data/lib/imw/serializer/json_serializer.rb +17 -0
  24. data/lib/imw/uri.rb +41 -0
  25. data/spec/resource_spec.rb +78 -0
  26. data/spec/uri_spec.rb +55 -0
  27. metadata +81 -232
  28. data/README.rdoc +0 -371
  29. data/bin/imw +0 -5
  30. data/bin/tsv_to_json.rb +0 -29
  31. data/etc/imwrc.rb +0 -26
  32. data/examples/dataset.rb +0 -12
  33. data/examples/metadata.yml +0 -10
  34. data/lib/imw/archives.rb +0 -120
  35. data/lib/imw/archives/rar.rb +0 -19
  36. data/lib/imw/archives/tar.rb +0 -19
  37. data/lib/imw/archives/tarbz2.rb +0 -73
  38. data/lib/imw/archives/targz.rb +0 -73
  39. data/lib/imw/archives/zip.rb +0 -51
  40. data/lib/imw/boot.rb +0 -87
  41. data/lib/imw/compressed_files.rb +0 -94
  42. data/lib/imw/compressed_files/bz2.rb +0 -16
  43. data/lib/imw/compressed_files/compressible.rb +0 -75
  44. data/lib/imw/compressed_files/gz.rb +0 -16
  45. data/lib/imw/dataset.rb +0 -125
  46. data/lib/imw/dataset/paths.rb +0 -29
  47. data/lib/imw/dataset/workflow.rb +0 -195
  48. data/lib/imw/formats.rb +0 -33
  49. data/lib/imw/formats/delimited.rb +0 -170
  50. data/lib/imw/formats/excel.rb +0 -100
  51. data/lib/imw/formats/json.rb +0 -41
  52. data/lib/imw/formats/pdf.rb +0 -71
  53. data/lib/imw/formats/sgml.rb +0 -69
  54. data/lib/imw/formats/yaml.rb +0 -41
  55. data/lib/imw/metadata.rb +0 -83
  56. data/lib/imw/metadata/contains_metadata.rb +0 -54
  57. data/lib/imw/metadata/dsl.rb +0 -111
  58. data/lib/imw/metadata/field.rb +0 -37
  59. data/lib/imw/metadata/has_metadata.rb +0 -98
  60. data/lib/imw/metadata/has_summary.rb +0 -57
  61. data/lib/imw/metadata/schema.rb +0 -17
  62. data/lib/imw/parsers.rb +0 -8
  63. data/lib/imw/parsers/flat.rb +0 -44
  64. data/lib/imw/parsers/html_parser.rb +0 -387
  65. data/lib/imw/parsers/html_parser/matchers.rb +0 -289
  66. data/lib/imw/parsers/line_parser.rb +0 -87
  67. data/lib/imw/parsers/regexp_parser.rb +0 -72
  68. data/lib/imw/repository.rb +0 -12
  69. data/lib/imw/runner.rb +0 -118
  70. data/lib/imw/schemes.rb +0 -23
  71. data/lib/imw/schemes/ftp.rb +0 -142
  72. data/lib/imw/schemes/hdfs.rb +0 -251
  73. data/lib/imw/schemes/http.rb +0 -165
  74. data/lib/imw/schemes/local.rb +0 -409
  75. data/lib/imw/schemes/remote.rb +0 -119
  76. data/lib/imw/schemes/s3.rb +0 -143
  77. data/lib/imw/schemes/sql.rb +0 -129
  78. data/lib/imw/tools.rb +0 -12
  79. data/lib/imw/tools/aggregator.rb +0 -148
  80. data/lib/imw/tools/archiver.rb +0 -220
  81. data/lib/imw/tools/downloader.rb +0 -63
  82. data/lib/imw/tools/extension_analyzer.rb +0 -114
  83. data/lib/imw/tools/summarizer.rb +0 -83
  84. data/lib/imw/tools/transferer.rb +0 -167
  85. data/lib/imw/utils.rb +0 -74
  86. data/lib/imw/utils/dynamically_extendable.rb +0 -137
  87. data/lib/imw/utils/error.rb +0 -59
  88. data/lib/imw/utils/extensions/hpricot.rb +0 -34
  89. data/lib/imw/utils/has_uri.rb +0 -131
  90. data/lib/imw/utils/log.rb +0 -92
  91. data/lib/imw/utils/misc.rb +0 -57
  92. data/lib/imw/utils/paths.rb +0 -146
  93. data/lib/imw/utils/uri.rb +0 -59
  94. data/lib/imw/utils/uuid.rb +0 -33
  95. data/lib/imw/utils/validate.rb +0 -38
  96. data/lib/imw/utils/version.rb +0 -11
  97. data/spec/data/formats/delimited/sample.csv +0 -131
  98. data/spec/data/formats/delimited/sample.tsv +0 -131
  99. data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
  100. data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
  101. data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
  102. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
  103. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
  104. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
  105. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
  106. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
  107. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
  108. data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
  109. data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
  110. data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
  111. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
  112. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
  113. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
  114. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
  115. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
  116. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
  117. data/spec/data/formats/excel/sample.xls +0 -0
  118. data/spec/data/formats/json/sample.json +0 -1
  119. data/spec/data/formats/none/sample +0 -650
  120. data/spec/data/formats/sgml/sample.xml +0 -617
  121. data/spec/data/formats/text/sample.txt +0 -650
  122. data/spec/data/formats/yaml/sample.yaml +0 -410
  123. data/spec/data/schema-tabular.yaml +0 -11
  124. data/spec/imw/archives/rar_spec.rb +0 -16
  125. data/spec/imw/archives/tar_spec.rb +0 -16
  126. data/spec/imw/archives/tarbz2_spec.rb +0 -24
  127. data/spec/imw/archives/targz_spec.rb +0 -21
  128. data/spec/imw/archives/zip_spec.rb +0 -16
  129. data/spec/imw/archives_spec.rb +0 -77
  130. data/spec/imw/compressed_files/bz2_spec.rb +0 -15
  131. data/spec/imw/compressed_files/compressible_spec.rb +0 -36
  132. data/spec/imw/compressed_files/gz_spec.rb +0 -15
  133. data/spec/imw/compressed_files_spec.rb +0 -47
  134. data/spec/imw/dataset/paths_spec.rb +0 -32
  135. data/spec/imw/dataset/workflow_spec.rb +0 -41
  136. data/spec/imw/formats/delimited_spec.rb +0 -44
  137. data/spec/imw/formats/excel_spec.rb +0 -55
  138. data/spec/imw/formats/json_spec.rb +0 -18
  139. data/spec/imw/formats/sgml_spec.rb +0 -24
  140. data/spec/imw/formats/yaml_spec.rb +0 -19
  141. data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
  142. data/spec/imw/metadata/field_spec.rb +0 -25
  143. data/spec/imw/metadata/has_metadata_spec.rb +0 -58
  144. data/spec/imw/metadata/has_summary_spec.rb +0 -32
  145. data/spec/imw/metadata/schema_spec.rb +0 -24
  146. data/spec/imw/metadata_spec.rb +0 -86
  147. data/spec/imw/parsers/line_parser_spec.rb +0 -96
  148. data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
  149. data/spec/imw/resource_spec.rb +0 -32
  150. data/spec/imw/schemes/hdfs_spec.rb +0 -67
  151. data/spec/imw/schemes/http_spec.rb +0 -19
  152. data/spec/imw/schemes/local_spec.rb +0 -165
  153. data/spec/imw/schemes/remote_spec.rb +0 -38
  154. data/spec/imw/schemes/s3_spec.rb +0 -31
  155. data/spec/imw/schemes/sql_spec.rb +0 -3
  156. data/spec/imw/tools/aggregator_spec.rb +0 -71
  157. data/spec/imw/tools/archiver_spec.rb +0 -120
  158. data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
  159. data/spec/imw/tools/summarizer_spec.rb +0 -8
  160. data/spec/imw/tools/transferer_spec.rb +0 -195
  161. data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
  162. data/spec/imw/utils/has_uri_spec.rb +0 -61
  163. data/spec/imw/utils/paths_spec.rb +0 -10
  164. data/spec/imw/utils/shared_paths_spec.rb +0 -29
  165. data/spec/imw_spec.rb +0 -14
  166. data/spec/rcov.opts +0 -1
  167. data/spec/spec_helper.rb +0 -31
  168. data/spec/support/custom_matchers.rb +0 -28
  169. data/spec/support/file_contents_matcher.rb +0 -30
  170. data/spec/support/paths_matcher.rb +0 -66
  171. data/spec/support/random.rb +0 -213
  172. data/spec/support/without_regard_to_order_matcher.rb +0 -41
@@ -1,41 +0,0 @@
1
- module IMW
2
- module Formats
3
-
4
- # Provides methods for reading and writing YAML data.
5
- module Yaml
6
-
7
- include Enumerable
8
-
9
- # Return the content of this resource.
10
- #
11
- # Will pass a block to the outermost YAML data structure's each
12
- # method.
13
- #
14
- # @return [Hash, Array, String, Fixnum] whatever the YAML contained
15
- def load &block
16
- require 'yaml'
17
- yaml = YAML.load(io)
18
- if block_given?
19
- yaml.each(&block)
20
- else
21
- yaml
22
- end
23
- end
24
-
25
- # Iterate over the elements in the YAML.
26
- def each &block
27
- load(&block)
28
- end
29
-
30
- # Emit the +data+ into this resource. It must be opened for
31
- # writing.
32
- #
33
- # @param [Hash, String, Array, Fixnum] data the Ruby object to emit
34
- def emit data, options={}
35
- require 'yaml'
36
- write(data.to_yaml)
37
- self
38
- end
39
- end
40
- end
41
- end
@@ -1,83 +0,0 @@
1
- module IMW
2
-
3
- # A collection of classes for describing the metadata associated
4
- # with a dataset's fields.
5
- class Metadata < Hash
6
-
7
- autoload :Field, 'imw/metadata/field'
8
- autoload :Schema, 'imw/metadata/schema'
9
- autoload :ContainsMetadata, 'imw/metadata/contains_metadata'
10
- autoload :HasSummary, 'imw/metadata/has_summary'
11
- autoload :HasMetadata, 'imw/metadata/has_metadata'
12
-
13
- # The resource this metadata is anchored to.
14
- #
15
- # This attribute is useful for letting relative paths in a
16
- # schema file refer to a common base URL.
17
- #
18
- # @return [IMW::Resource]
19
- attr_reader :base
20
-
21
- # Set the base resource this metdata is anchored to.
22
- #
23
- # @param [IMW::Resource, String, Addressable::URI] new_base
24
- def base= new_base
25
- base_resource = IMW.open(new_base)
26
- base_resource.should_exist!("Metadata base directory must exist")
27
- raise IMW::PathError.new("Metadata base must be a directory") unless base_resource.is_directory?
28
- @base = base_resource
29
- end
30
-
31
- def initialize obj=nil, options={}
32
- super()
33
- self.base = options[:base] if options[:base]
34
- if obj
35
- obj.each_pair do |resource, metadata|
36
- self[resource] = metadata
37
- end
38
- end
39
- end
40
-
41
- def self.load obj, options={}
42
- resource = IMW.open(obj)
43
- new(resource.load, {:base => resource.dirname}.merge(options))
44
- end
45
-
46
- def []= resource, metadata
47
- super(absolute_uri(resource), metadata)
48
- end
49
-
50
- def [] resource
51
- super(absolute_uri(resource))
52
- end
53
-
54
- def describe? resource
55
- self[(absolute_uri(resource))]
56
- end
57
- alias_method :describes?, :describe?
58
-
59
- def description_for resource
60
- return unless describes?(resource)
61
- self[resource]['description']
62
- end
63
-
64
- def fields_for resource
65
- return unless describes?(resource)
66
- (self[resource]['fields'] || []).map { |f| Metadata::Field.new(f) }
67
- end
68
-
69
- protected
70
-
71
- def absolute_uri resource
72
- obj = IMW.open(resource)
73
- if base && obj.uri.to_s !~ %r{(^/|://)} # relative path
74
- s = base.join(obj.uri.to_s).uri.to_s
75
- s
76
- else
77
- s = obj.uri.to_s
78
- s
79
- end
80
- end
81
-
82
- end
83
- end
@@ -1,54 +0,0 @@
1
- module IMW
2
- class Metadata
3
-
4
- # A module for finding metadata describing the sub-resources of a
5
- # given resource.
6
- #
7
- # An including class describing the parent resource must define
8
- # the +contents+ method which must return an Array of Strings
9
- # contained within the parent . These objects will be matched
10
- # against possible metadata URIs and the corresponding
11
- # IMW::Metadata class created on the fly.
12
- #
13
- # In case no such object is found, the class should also define
14
- # the +basename+ and +path+ methods which will be used to generate
15
- # a default URI where metadata about the parent's resources should
16
- # live.
17
- module ContainsMetadata
18
-
19
- # The URI containing the metadata for this resource and its
20
- # contents.
21
- #
22
- # Looks for an existing JSON or YAML file containing the strings
23
- # "icss" or "metadata" directly contained within this resource.
24
- #
25
- # If none are found, defaults to a URI named after this
26
- # resource's basename with the string ".icss.yaml" appended.
27
- #
28
- # @return [String, nil]
29
- def default_metadata_uri
30
- contents.detect { |path| path =~ /metadata.*\.(ya?ml|json)$/i } || File.join(path, "#{basename}.metadata.yaml")
31
- end
32
-
33
- # Return the metadata for this resource if it exists.
34
- #
35
- # Will look for an existing resource at +default_metadata_uri+.
36
- #
37
- # @return [IMW::Metadata, nil]
38
- def metadata
39
- return @metadata if @metadata
40
- obj = IMW.open(default_metadata_uri)
41
- self.metadata=(obj) if obj.exist?
42
- @metadata
43
- end
44
-
45
- # Set the metadata for this resource to +obj+.
46
- #
47
- # @param [String, Addressable::URI, IMW::Resource] obj
48
- def metadata= obj
49
- @metadata = IMW::Metadata.load(obj)
50
- end
51
-
52
- end
53
- end
54
- end
@@ -1,111 +0,0 @@
1
- module IMW
2
- class Metadata
3
-
4
- # A module which defines a DSL that can be used to define metadata
5
- # for an object.
6
- module DSL
7
-
8
- # Open a new resource at the given URI.
9
- #
10
- # If this dataset has metadata and it describes the resource
11
- # then configure the resource to understand its schema..
12
- #
13
- # The +schema+ property passed via the options hash will
14
- # override this.
15
- #
16
- # @param [String, Addressable::Uri, IMW::Resource] uri
17
- # @param [Hash] options
18
- # @return [IMW::Resource]
19
- # @see IMW.open
20
- def open uri, options={}, &block
21
- schema_options = (options[:schema].nil? && metadata && metadata.describe?(uri)) ? {:schema => metadata[uri]} : {}
22
- IMW.open(uri, options.merge(schema_options), &block)
23
- end
24
-
25
- def open! uri, options={}, &block
26
- self.open(uri, options.merge(:mode => 'w'), &block)
27
- end
28
-
29
- # When called without a block return this object's metadata.
30
- #
31
- # metadata
32
- # #=> { '/path/to/file' => [...], '/path/to/other/file' => [...], ... }
33
- #
34
- # When called with a block, accumulate schema and fields into
35
- # this object's metadata
36
- #
37
- # metadata do
38
- #
39
- # schema "/path/to/file" do
40
- # # ...
41
- # end
42
- #
43
- # schema "/path/to/other/file" do
44
- # # ...
45
- # end
46
- # end
47
- #
48
- # @see [IMW::Metadata::Schema]
49
- # @see [IMW::Metadata::Field]
50
- # @return [IMW::Metadata]
51
- def metadata arg=nil, options={}, &block
52
- case arg
53
- when Hash
54
- @metadata ||= Metadata.new(arg, options)
55
- when nil
56
- @metadata ||= Metadata.new nil, options
57
- else
58
- @metadata ||= Metadata.load(arg, options)
59
- end
60
- @metadata.base = options[:base] if options[:base]
61
- return @metadata unless block_given?
62
- yield
63
- end
64
-
65
- def schema resource, options={}, &block
66
- new_field_accumulator!
67
- yield
68
- metadata[resource] = Schema.new(last_field_accumulator!)
69
- end
70
-
71
- def field name, options={}
72
- accumulate_field Field.new(options.merge(:name => name))
73
- end
74
-
75
- def has_one name, options={}, &block
76
- new_field_accumulator!
77
- yield
78
- accumulate_field Field.new(options.merge(:name => name, :has_one => last_field_accumulator!))
79
- end
80
-
81
- def has_many name, options={}, &block
82
- new_field_accumulator!
83
- yield
84
- accumulate_field Field.new(options.merge(:name => name, :has_many => last_field_accumulator!))
85
- end
86
-
87
- protected
88
-
89
- def field_accumulators # :nodoc:
90
- @field_accumulators ||= []
91
- end
92
-
93
- def new_field_accumulator! # :nodoc:
94
- field_accumulators.push([])
95
- end
96
-
97
- def last_field_accumulator! # :nodoc:
98
- field_accumulators.pop
99
- end
100
-
101
- def field_accumulator? # :nodoc:
102
- ! field_accumulators.empty?
103
- end
104
-
105
- def accumulate_field f # :nodoc:
106
- # raise IMW::SchemaError.new("No record or sub-record to accumulate fields in!") unless field_accumulator?
107
- field_accumulators.last << f if field_accumulator?
108
- end
109
- end
110
- end
111
- end
@@ -1,37 +0,0 @@
1
- module IMW
2
-
3
- class Metadata
4
-
5
- # Conceptually, a field is a "slot" for which "records" can have
6
- # values.
7
- #
8
- # An IMW::Metadata::Field is essentially a Hash that has one required
9
- # property: a name.
10
- #
11
- # IMW::Metadata::Field.new('id')
12
- # #=> { 'name' => 'id' }
13
- #
14
- # But you can declare as many other properties as you want (as long
15
- # as you include a +name+):
16
- #
17
- # IMW::Metadata::Field.new 'name' => 'id', 'type' => :integer, 'title' => "ID", 'description' => "Auto-incremented."
18
- # #=> { 'name' => 'id', 'type' => :integer, 'title' > "ID", 'description' => "Auto-incremented." }
19
- class Field < Hash
20
-
21
- def initialize obj
22
- super()
23
- if obj.is_a?(Hash) || obj.is_a?(Field)
24
- merge!(obj)
25
- raise IMW::ArgumentError.new("A field must have a name") if obj['name'].blank?
26
- else
27
- self['name'] = obj.to_s.strip
28
- end
29
- end
30
-
31
- def titleize
32
- self['title'] || self['name'].capitalize # FIXME we can do better than this!
33
- end
34
-
35
- end
36
- end
37
- end
@@ -1,98 +0,0 @@
1
- module IMW
2
- class Metadata
3
-
4
-
5
- # A module which defines how a resource finds Metadata that it can
6
- # look up metadata about itself.
7
- #
8
- # "metadata" in this context is defined as accessors for
9
- # +metadata+ (IMW::Metadata), +schema+ (IMW::Metadata::Schema),
10
- # +fields+ (IMW::Metadata::Field), and +description+ (String).
11
- #
12
- # An including class should define a method +dir+ which should
13
- # return an object that might contain Metadata, i.e. - that
14
- # includes the IMW::Metadata::ContainsMetadata module.
15
- #
16
- # An including class can optionally define the methods +snippet+
17
- # which returns a snippet of the resource as well as
18
- # +record_count+ to return a count of how many records the
19
- # resource contains.
20
- module HasMetadata
21
-
22
- # The schema for this object.
23
- #
24
- # @return [Hash]
25
- def schema
26
- return @schema if @schema
27
- @schema = IMW::Metadata::Schema.new
28
- @schema[:type] = "record"
29
- @schema[:namespace] = "schema.imw.resource"
30
- @schema[:name] = (basename || '')
31
- begin
32
- @schema[:doc] = description
33
- @schema[:fields] = fields
34
-
35
- @schema[:non_avro ] = {}
36
- @schema[:non_avro][:snippet] = snippet if respond_to?(:snippet) rescue nil
37
- @schema[:non_avro][:record_count] = record_count if respond_to?(:record_count)
38
- @schema
39
- rescue => e
40
- $stdout.puts "Error in producing schema for #{self}: #{e.class} -- #{e.message}"
41
- return @schema
42
- end
43
- end
44
-
45
- # Return the metadata object that contains metadata for this
46
- # resource.
47
- #
48
- # Will look in this resource's directory and recursively upward
49
- # till the root directory is reached or a metadata file is
50
- # discovered.
51
- #
52
- # @return [IMW::Metadata, nil]
53
- def metadata
54
- return @metadata if @metadata
55
- d = dir
56
- while d.path != '/'
57
- break if d.metadata && d.metadata.describes?(self)
58
- d = d.dir
59
- end
60
- @metadata = d.metadata
61
- end
62
-
63
- # The fields for this resource's data.
64
- #
65
- # Each field will be a Hash of information.
66
- #
67
- # @return [Array<Hash>]
68
- def fields
69
- @fields ||= metadata && metadata.fields_for(self)
70
- end
71
-
72
- # Set the fields for this resource.
73
- #
74
- # @param [Array<Hash>] new_fields
75
- # @return [Array<Hash>]
76
- def fields= new_fields
77
- @fields = new_fields.map { |f| Metadata::Field.new(f) }
78
- end
79
-
80
- # A description for this Resource.
81
- #
82
- # @return [String]
83
- def description
84
- @description ||= metadata && metadata.description_for(self)
85
- end
86
-
87
- # Set the description of this Resource.
88
- #
89
- # @param [String] new_description
90
- # @return [String]
91
- def description= new_description
92
- @description = new_description
93
- end
94
-
95
- end
96
- end
97
- end
98
-
@@ -1,57 +0,0 @@
1
- module IMW
2
- class Metadata
3
-
4
- # A module for generating a summary & schema of a resource.
5
- #
6
- # The including class should define methods +uri+, +basename+, +extension+.
7
- module HasSummary
8
-
9
- # Return a full summary of this Resource.
10
- #
11
- # The summary will include "external" information about how this
12
- # resource appears to the world (via its URI), "internal"
13
- # metadata about this resource (its description, &c.), as well
14
- # as the structure of this resource's data (it's schema's fields
15
- # and a snippet).
16
- #
17
- # Will return a Hash, with a <tt>:schema</tt> key which maps to
18
- # a well-formed AVRO schema for this resource.
19
- #
20
- # @return [Hash]
21
- def summary
22
- return @summary if @summary
23
- @summary = {}
24
- begin
25
- @summary.merge!(external_summary)
26
- @summary[:schema] = schema if respond_to?(:schema)
27
- @summary[:contents] = resources.map(&:summary) if respond_to?(:resources)
28
- @summary
29
- rescue => e
30
- # IMW.warn "Error in producing summary for #{self}: #{e.class} -- #{e.message}"
31
- return @summary
32
- end
33
- end
34
-
35
- # Return information (usually scheme-dependent) on how this
36
- # resource is situated in the world, i.e. - its URI, its size,
37
- # how many lines it has, &c.
38
- #
39
- # Modules which override this should chain with +super+:
40
- #
41
- # # in my_scheme.rb
42
- # def external_summary
43
- # super().merge(:user => 'bob', :password => 'smith')
44
- # end
45
- #
46
- # @return [Hash]
47
- def external_summary
48
- {
49
- :uri => uri.to_s,
50
- :basename => basename,
51
- :extension => extension
52
- }
53
- end
54
- end
55
-
56
- end
57
- end