dor-services 4.4.13 → 4.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,324 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'scanf'
4
+ require 'uri'
5
+
6
+ module Dor
7
+ # GeoMetadataDS is a Fedora datastream for geographic metadata. It uses
8
+ # the ISO 19139 metadata standard schema - a metadata standard for Geographic Information
9
+ # The datastream is packaged using RDF to identify the optional ISO 19139 feature catalog
10
+ # @see http://www.isotc211.org
11
+ # @author Darren Hardy
12
+ class GeoMetadataDS < ActiveFedora::NokogiriDatastream
13
+ include SolrDocHelper
14
+
15
+ attr_accessor :geometryType, :zipName, :purl
16
+
17
+ # namespaces
18
+ NS = {
19
+ :rdf => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
20
+ :gco => 'http://www.isotc211.org/2005/gco',
21
+ :gmd => 'http://www.isotc211.org/2005/gmd',
22
+ :gfc => 'http://www.isotc211.org/2005/gfc'
23
+ }
24
+
25
+ # hash with all namespaces
26
+ XMLNS = Hash[NS.map {|k,v| ["xmlns:#{k}", v]}]
27
+
28
+ # schema locations
29
+ NS_XSD = NS.keys.collect {|k| "#{NS[k]} #{NS[k]}/#{k}.xsd"}
30
+
31
+ # [Nokogiri::XSLT::Stylesheet] for ISO 19139 to MODS
32
+ XSLT_GEOMODS = Nokogiri::XSLT(File.read(
33
+ File.join(
34
+ File.dirname(__FILE__), 'geo2mods.xsl')))
35
+
36
+ XSLT_DC = Nokogiri::XSLT(File.new(
37
+ File.expand_path(
38
+ File.dirname(__FILE__) + '/../models/mods2dc.xslt')))
39
+
40
+ # @see http://ruby-doc.org/gems/docs/o/om-1.8.0/OM/XML/Document/ClassMethods.html#method-i-set_terminology
41
+ set_terminology do |t|
42
+ t.root :path => '/rdf:RDF/rdf:Description/gmd:MD_Metadata',
43
+ 'xmlns:gmd' => NS[:gmd],
44
+ 'xmlns:gco' => NS[:gco],
45
+ 'xmlns:rdf' => NS[:rdf]
46
+
47
+ t.id_ :path => '/rdf:RDF/rdf:Description[1]/@rdf:about'
48
+
49
+ p = './'
50
+ t.dataset_id :path => p + 'gmd:dataSetURI/gco:CharacterString'
51
+ t.file_id :path => p + 'gmd:fileIdentifier/gco:CharacterString'
52
+ t.metadata_dt :path => p + 'gmd:dateStamp/gco:Date/text()' # XXX: Allow DateTime
53
+ t.metadata_language :path => p + 'gmd:MD_Metadata/gmd:language/gmd:LanguageCode[@codeSpace="ISO639-2"]/@codeListValue'
54
+
55
+ p = 'gmd:identificationInfo/gmd:MD_DataIdentification/'
56
+ t.abstract :path => p + 'gmd:abstract/gco:CharacterString/text()'
57
+ t.purpose :path => p + 'gmd:purpose/gco:CharacterString/text()'
58
+ t.publisher :path => p + 'gmd:pointOfContact/gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/@codeListValue="pointOfContact"]/gmd:organisationName/gco:CharacterString/text()'
59
+
60
+ p = 'gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/'
61
+ t.title :path => p + 'gmd:title/gco:CharacterString/text()'
62
+ t.publish_dt :path => p + 'gmd:date/gmd:CI_Date/gmd:date/gco:Date/text()'
63
+ t.originator :path => p + 'gmd:citedResponsibleParty/gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/@codeListValue="originator"]/gmd:organisationName/gco:CharacterString/text()'
64
+
65
+ p = 'gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/'
66
+ t.format :path => p + 'gmd:name/gco:CharacterString/text()'#, :index_as => [:facetable]
67
+
68
+ p = 'gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource/'
69
+ t.layername :path => p + 'gmd:name/gco:CharacterString/text()'
70
+
71
+ # XXX should define projection as codeSpace + ':' + code in terminology
72
+ p = 'gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/gmd:referenceSystemIdentifier/gmd:RS_Identifier/'
73
+ t.projection :path => p + 'gmd:code/gco:CharacterString/text()'
74
+ t.projection_code_space :path => p + 'gmd:codeSpace/gco:CharacterString/text()'
75
+ end
76
+
77
+ # @return [Nokogiri::XML::Document] with gmd:MD_Metadata as root node
78
+ # @raise [Dor::ParameterError] if MD_Metadata is missing
79
+ def metadata
80
+ root = ng_xml.xpath('/rdf:RDF/rdf:Description/gmd:MD_Metadata', XMLNS)
81
+ if root.nil? or root.empty?
82
+ raise Dor::ParameterError, "Invalid geoMetadata -- missing MD_Metadata: #{root}"
83
+ else
84
+ Nokogiri::XML(root.first.to_xml)
85
+ end
86
+ end
87
+
88
+ # @return [Nokogiri::XML::Document] with gfc:FC_FeatureCatalogue as root node,
89
+ # or nil if not provided
90
+ def feature_catalogue
91
+ root = ng_xml.xpath('/rdf:RDF/rdf:Description/gfc:FC_FeatureCatalogue', XMLNS)
92
+ if root.nil? or root.empty?
93
+ nil # Feature catalog is optional
94
+ else
95
+ Nokogiri::XML(root.first.to_xml)
96
+ end
97
+ end
98
+
99
+ # @return [Nokogiri::XML::Document] Contains skeleton geoMetadata XML
100
+ # Add your druid as the suffix to rdf:about attributes.
101
+ # Includes all possible xmlns for gmd and gfc
102
+ def self.xml_template
103
+ Nokogiri::XML::Builder.new do |xml|
104
+ xml['rdf'].RDF XMLNS,
105
+ 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
106
+ "xsi:schemaLocation" => NS_XSD.join(' ') do
107
+ xml['rdf'].Description 'rdf:about' => nil do
108
+ xml['gmd'].MD_Metadata
109
+ end
110
+ xml['rdf'].Description 'rdf:about' => nil do
111
+ xml['gfc'].FC_FeatureCatalogue
112
+ end
113
+ end
114
+ end.doc
115
+ end
116
+
117
+ # Generates MODS from ISO 19139
118
+ #
119
+ # @return [Nokogiri::XML::Document] Derived MODS metadata record
120
+ # @raise [CrosswalkError] Raises if the generated MODS is empty or has no children
121
+ #
122
+ # Uses GML SimpleFeatures for the geometry type (e.g., Polygon, LineString, etc.)
123
+ # @see http://portal.opengeospatial.org/files/?artifact_id=25355
124
+ #
125
+ def to_mods(params = {})
126
+ params = params.merge({
127
+ 'geometryType' => "'#{@geometryType.nil?? 'Polygon' : @geometryType}'",
128
+ 'zipName' => "'#{@zipName.nil?? 'data.zip' : @zipName}'",
129
+ 'purl' => "'#{@purl}'"
130
+ })
131
+ doc = XSLT_GEOMODS.transform(metadata.document, params.to_a.flatten)
132
+ unless doc.root and doc.root.children.size > 0
133
+ raise CrosswalkError, 'to_mods produced incorrect xml'
134
+ end
135
+ # ap doc
136
+ doc.xpath('/mods:mods' +
137
+ '/mods:subject' +
138
+ '/mods:cartographics' +
139
+ '/mods:projection',
140
+ 'xmlns:mods' => Dor::DescMetadataDS::MODS_NS).each do |e|
141
+ # Retrieve this mapping from config file
142
+ case e.content.downcase
143
+ when 'epsg:4326', 'epsg::4326', 'urn:ogc:def:crs:epsg::4326'
144
+ e.content = 'World Geodetic System (WGS84)'
145
+ when 'epsg:4269', 'epsg::4269', 'urn:ogc:def:crs:epsg::4269'
146
+ e.content = 'North American Datum (NAD83)'
147
+ end
148
+ end
149
+ doc.xpath('/mods:mods' +
150
+ '/mods:subject' +
151
+ '/mods:cartographics' +
152
+ '/mods:coordinates',
153
+ 'xmlns:mods' => Dor::DescMetadataDS::MODS_NS).each do |e|
154
+ e.content = '(' + self.class.to_coordinates_ddmmss(e.content.to_s) + ')'
155
+ end
156
+ doc
157
+ end
158
+
159
+ def to_dublin_core
160
+ XSLT_DC.transform(to_mods)
161
+ end
162
+
163
+ # @deprecated stub for GeoBlacklight (not Argo -- use to_solr as usual)
164
+ def to_solr_spatial(solr_doc=Hash.new, *args)
165
+ # There are a whole bunch of namespace-related things that can go
166
+ # wrong with this terminology. Until it's fixed in OM, ignore them all.
167
+ begin
168
+ doc = solr_doc#super solr_doc, *args
169
+ bb = to_bbox
170
+ ap({:doc => doc, :bb => bb, :self => self}) if $DEBUG
171
+ {
172
+ :id => self.id.first,
173
+ :druid => URI(self.id.first).path.gsub(%r{^/}, ''),
174
+ :file_id_s => self.file_id.first,
175
+ :geo_bbox => to_solr_bbox,
176
+ :geo_data_type_s => 'vector',
177
+ :geo_format_s => self.format.first,
178
+ :geo_geometry_type_s => 'Polygon',
179
+ :geo_layername_s => File.basename(self.layername.first, '.shp'),
180
+ :geo_ne_pt => Dor::GeoMetadataDS.to_wkt([bb.e, bb.n]),
181
+ :geo_pt => to_solr_centroid,
182
+ :geo_sw_pt => Dor::GeoMetadataDS.to_wkt([bb.w, bb.s]),
183
+ :geo_proj => self.projection.first,
184
+ :dc_coverage_t => to_dc_coverage,
185
+ :dc_creator_t => self.originator.first,
186
+ :dc_date_i => self.publish_dt.map {|i| i.to_s[0..3]},
187
+ :dc_description_t => [self.abstract.first, self.purpose.first].join(";\n"),
188
+ :dc_format_s => 'application/x-esri-shapefile',
189
+ :dc_language_s => self.metadata_language.first,
190
+ :dc_title_t => self.title.first,
191
+ :text => [self.title.first, self.abstract.first, self.purpose.first].join(";\n")
192
+ }.each do |id, v|
193
+ ::Solrizer::Extractor.insert_solr_field_value(doc, id.to_s, v)
194
+ end
195
+
196
+ return doc
197
+ rescue
198
+ solr_doc
199
+ end
200
+ end
201
+
202
+ # @return [Struct] in minX minY maxX maxY order
203
+ # with .w, .e, .n., .s for west, east, north, south as floats
204
+ def to_bbox
205
+ params = { 'xmlns:gmd' => NS[:gmd], 'xmlns:gco' => NS[:gco] }
206
+ bb = metadata.xpath(
207
+ '//gmd:EX_Extent/gmd:geographicElement' +
208
+ '/gmd:EX_GeographicBoundingBox', params).first
209
+ Struct.new(:w, :e, :n, :s).new(
210
+ bb.xpath('gmd:westBoundLongitude/gco:Decimal', params).text.to_f,
211
+ bb.xpath('gmd:eastBoundLongitude/gco:Decimal', params).text.to_f,
212
+ bb.xpath('gmd:northBoundLatitude/gco:Decimal', params).text.to_f,
213
+ bb.xpath('gmd:southBoundLatitude/gco:Decimal', params).text.to_f
214
+ )
215
+ end
216
+
217
+ # @return [Array<Numeric>] (x y) coordinates of center point - assumes #to_bbox
218
+ # @see http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
219
+ def to_centroid
220
+ bb = to_bbox
221
+ [ (bb.w + bb.e)/2, (bb.n + bb.s)/2 ]
222
+ end
223
+
224
+ # A lat-lon rectangle can be indexed with 4 numbers in minX minY maxX maxY order:
225
+ #
226
+ # <field name="geo">-74.093 41.042 -69.347 44.558</field>
227
+ # <field name="geo">POLYGON((...))</field>
228
+ #
229
+ # @param [Symbol] either :solr3 or :solr4
230
+ # @return [String] minX minY maxX maxY for :solr3 or POLYGON((...)) for :solr4
231
+ # @see http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
232
+ def to_solr_bbox format = :solr4
233
+ bb = to_bbox
234
+
235
+ case format
236
+ when :solr3
237
+ [bb.w, bb.s, bb.e, bb.n].join(' ')
238
+ when :solr4
239
+ Dor::GeoMetadataDS.to_wkt [bb.w, bb.s], [bb.e, bb.n]
240
+ else
241
+ raise ArgumentError, "Unsupported format #{format}"
242
+ end
243
+ end
244
+
245
+ # @return [String] in Dublin Core Coverage format
246
+ def to_dc_coverage
247
+ bb = to_bbox
248
+ "x.min=#{bb.w} x.max=#{bb.e} y.min=#{bb.s} y.max=#{bb.n}"
249
+ end
250
+
251
+ # A lat-lon point for the centroid of the bounding box:
252
+ #
253
+ # <field name="geo">69.4325,-78.085007</field>
254
+ # <field name="geo">POINT(-78.085007 69.4325)</field>
255
+ #
256
+ # @param [Symbol] either :solr3 or :solr4
257
+ # @return [String] minX minY maxX maxY for :solr3 or POLYGON((...)) for :solr4
258
+ # @see http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
259
+
260
+ # @return [String] (y,x) coordinates of center point matching the LatLonType Solr type
261
+ # @see http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
262
+ def to_solr_centroid format = :solr4
263
+ x, y = to_centroid
264
+
265
+ case format
266
+ when :solr3
267
+ [y,x].join(',') # for solr.LatLonType
268
+ when :solr4
269
+ Dor::GeoMetadataDS.to_wkt [x, y]
270
+ else
271
+ raise ArgumentError, "Unsupported format #{format}"
272
+ end
273
+ end
274
+
275
+ private
276
+
277
+ # @param [Array<Numeric>] (x,y) coordinates for point or bounding box
278
+ # @return [String] WKT for point or rectangle
279
+ def self.to_wkt xy, xy2 = nil
280
+ if xy2
281
+ w = [xy[0], xy2[0]].min
282
+ e = [xy[0], xy2[0]].max
283
+ s = [xy[1], xy2[1]].min
284
+ n = [xy[1], xy2[1]].max
285
+ "POLYGON((#{w} #{s}, #{w} #{n}, #{e} #{n}, #{e} #{s}, #{w} #{s}))"
286
+ else
287
+ "POINT(#{xy[0]} #{xy[1]})"
288
+ end
289
+ end
290
+
291
+ # Convert to MARC 255 DD into DDMMSS
292
+ # westernmost longitude, easternmost longitude, northernmost latitude, and southernmost latitude
293
+ # e.g., -109.758319 -- -88.990844/48.999336 -- 29.423028
294
+ def self.to_coordinates_ddmmss s
295
+ w, e, n, s = s.scanf('%f -- %f/%f -- %f')
296
+ raise ArgumentError, "Out of bounds latitude: #{n} #{s}" unless n >= -90 and n <= 90 and s >= -90 and s <= 90
297
+ raise ArgumentError, "Out of bounds longitude: #{w} #{e}" unless w >= -180 and w <= 180 and e >= -180 and e <= 180
298
+ w = "#{w < 0 ? 'W' : 'E'} #{Dor::GeoMetadataDS::dd2ddmmss_abs w}"
299
+ e = "#{e < 0 ? 'W' : 'E'} #{Dor::GeoMetadataDS::dd2ddmmss_abs e}"
300
+ n = "#{n < 0 ? 'S' : 'N'} #{Dor::GeoMetadataDS::dd2ddmmss_abs n}"
301
+ s = "#{s < 0 ? 'S' : 'N'} #{Dor::GeoMetadataDS::dd2ddmmss_abs s}"
302
+ "#{w}--#{e}/#{n}--#{s}"
303
+ end
304
+
305
+ # Convert DD.DD to DD MM SS.SS
306
+ # e.g.,
307
+ # * -109.758319 => 109°45ʹ29.9484ʺ
308
+ # * 48.999336 => 48°59ʹ57.609ʺ
309
+ E = 1
310
+ QSEC = 'ʺ'
311
+ QMIN = 'ʹ'
312
+ QDEG = "\u00B0"
313
+ def self.dd2ddmmss_abs f
314
+ dd = f.to_f.abs
315
+ d = dd.floor
316
+ mm = ((dd - d) * 60)
317
+ m = mm.floor
318
+ s = ((mm - mm.floor) * 60).round
319
+ m, s = m+1, 0 if s >= 60
320
+ d, m = d+1, 0 if m >= 60
321
+ "#{d}#{QDEG}" + (m>0 ? "#{m}#{QMIN}" : '') + (s>0 ? "#{s}#{QSEC}" : '')
322
+ end
323
+ end
324
+ end
@@ -78,7 +78,7 @@ module Dor
78
78
  end
79
79
 
80
80
  # @param [String] description optional text describing version change
81
- # @param [Symbol] :significance optional which part of the version tag to increment
81
+ # @param [Symbol] significance optional which part of the version tag to increment
82
82
  # :major, :minor, :admin (see VersionTag#increment)
83
83
  def increment_version(description = nil, significance = nil)
84
84
  if( find_by_terms(:version).size == 0)
@@ -107,7 +107,7 @@ module Dor
107
107
  end
108
108
  end
109
109
 
110
- # @returns [Fixnum] value of the most current versionId
110
+ # @return [Fixnum] value of the most current versionId
111
111
  def current_version_id
112
112
  current_version=current_version_node
113
113
  if current_version.nil?
@@ -151,7 +151,6 @@ module Dor
151
151
 
152
152
  # Appends contentMetadata file resources from the source objects to this object
153
153
  # @param [Array<String>] source_obj_pids ids of the secondary objects that will get their contentMetadata merged into this one
154
- # @param [Logger] logger optional logger to record warnings. Otherwise, warnings get sent to STDOUT
155
154
  def copy_file_resources source_obj_pids
156
155
  primary_cm = contentMetadata.ng_xml
157
156
  base_id = primary_cm.at_xpath('/contentMetadata/@objectId').value
@@ -253,8 +253,8 @@ module Dor
253
253
  end
254
254
  end
255
255
  def agreement
256
- if agreement_object
257
- agreement_object
256
+ if agreement_object and agreement_object.first
257
+ agreement_object.first.pid
258
258
  else
259
259
  ''
260
260
  end
@@ -0,0 +1,36 @@
1
+ module Dor
2
+ module Geoable
3
+ extend ActiveSupport::Concern
4
+ include SolrDocHelper
5
+
6
+ class CrosswalkError < Exception; end
7
+
8
+ included do
9
+ has_metadata :name => 'geoMetadata',
10
+ :type => Dor::GeoMetadataDS,
11
+ :label => 'Geographic Information Metadata in ISO 19139',
12
+ :control_group => 'M'
13
+ end
14
+
15
+ # @return [String] XML
16
+ def fetch_geoMetadata_datastream
17
+ candidates = self.datastreams['identityMetadata'].otherId.collect { |oid| oid.to_s }
18
+ metadata_id = Dor::MetadataService.resolvable(candidates).first
19
+ unless metadata_id.nil?
20
+ return Dor::MetadataService.fetch(metadata_id.to_s)
21
+ else
22
+ return nil
23
+ end
24
+ end
25
+
26
+ def build_geoMetadata_datastream(ds)
27
+ content = fetch_geoMetadata_datastream
28
+ unless content.nil?
29
+ ds.dsLabel = self.label
30
+ ds.ng_xml = Nokogiri::XML(content)
31
+ ds.ng_xml.normalize_text!
32
+ ds.content = ds.ng_xml.to_xml
33
+ end
34
+ end
35
+ end
36
+ end
@@ -14,6 +14,7 @@ module Dor
14
14
  include Versionable
15
15
  include Contentable
16
16
  include Discoverable
17
+ include Geoable
17
18
 
18
19
  end
19
20
 
@@ -37,6 +38,7 @@ end
37
38
  # Publishable = Transfer of metadata to discovery and access systems.
38
39
  # Shelvable = Transfer of content to digital stacks.
39
40
  # Upgradable = Remediation of existing objects when content standards change.
41
+ # Geoable = Descriptive metadata for GIS in ISO 19139/19110.
40
42
 
41
43
  # Required for all DOR objects:
42
44
  # - Identifiable
@@ -35,31 +35,21 @@ module Dor
35
35
  raise Dor::ParameterError, "Invalid subset value: #{subset}"
36
36
  end
37
37
 
38
- basename = version.nil? ? "#{DIFF_FILENAME}.#{subset}.xml" : "#{DIFF_FILENAME}.#{subset}.#{version}.xml"
39
- druid = DruidTools::Druid.new(self.pid, Dor::Config.stacks.local_workspace_root)
40
- diff_cache = File.join(druid.temp_dir, basename)
41
- # check for cached copy before contacting SDR
42
- if File.exists? diff_cache
43
- File.read(diff_cache)
44
- else
45
- # fetch content metadata inventory difference from SDR
46
- if Dor::Config.sdr.rest_client.nil?
47
- raise Dor::ParameterError, 'Missing Dor::Config.sdr.rest_client'
48
- end
49
- sdr_client = Dor::Config.sdr.rest_client
50
- current_content = self.datastreams['contentMetadata'].content
51
- if current_content.nil?
52
- raise Dor::Exception, "Missing contentMetadata datastream"
53
- end
54
- query_string = { :subset => subset.to_s }
55
- query_string[:version] = version.to_s unless version.nil?
56
- query_string = URI.encode_www_form(query_string)
57
- sdr_query = "objects/#{self.pid}/#{DIFF_QUERY}?#{query_string}"
58
- response = sdr_client[sdr_query].post(current_content, :content_type => 'application/xml')
59
- # cache response
60
- File.open(diff_cache, 'w') { |f| f << response }
61
- response
38
+ # fetch content metadata inventory difference from SDR
39
+ if Dor::Config.sdr.rest_client.nil?
40
+ raise Dor::ParameterError, 'Missing Dor::Config.sdr.rest_client'
41
+ end
42
+ sdr_client = Dor::Config.sdr.rest_client
43
+ current_content = self.datastreams['contentMetadata'].content
44
+ if current_content.nil?
45
+ raise Dor::Exception, "Missing contentMetadata datastream"
62
46
  end
47
+ query_string = { :subset => subset.to_s }
48
+ query_string[:version] = version.to_s unless version.nil?
49
+ query_string = URI.encode_www_form(query_string)
50
+ sdr_query = "objects/#{self.pid}/#{DIFF_QUERY}?#{query_string}"
51
+ response = sdr_client[sdr_query].post(current_content, :content_type => 'application/xml')
52
+ response
63
53
  end
64
54
 
65
55
  end