dor-services 4.4.13 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,324 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'scanf'
4
+ require 'uri'
5
+
6
+ module Dor
7
+ # GeoMetadataDS is a Fedora datastream for geographic metadata. It uses
8
+ # the ISO 19139 metadata standard schema - a metadata standard for Geographic Information
9
+ # The datastream is packaged using RDF to identify the optional ISO 19139 feature catalog
10
+ # @see http://www.isotc211.org
11
+ # @author Darren Hardy
12
+ class GeoMetadataDS < ActiveFedora::NokogiriDatastream
13
+ include SolrDocHelper
14
+
15
+ attr_accessor :geometryType, :zipName, :purl
16
+
17
+ # namespaces
18
+ NS = {
19
+ :rdf => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
20
+ :gco => 'http://www.isotc211.org/2005/gco',
21
+ :gmd => 'http://www.isotc211.org/2005/gmd',
22
+ :gfc => 'http://www.isotc211.org/2005/gfc'
23
+ }
24
+
25
+ # hash with all namespaces
26
+ XMLNS = Hash[NS.map {|k,v| ["xmlns:#{k}", v]}]
27
+
28
+ # schema locations
29
+ NS_XSD = NS.keys.collect {|k| "#{NS[k]} #{NS[k]}/#{k}.xsd"}
30
+
31
+ # [Nokogiri::XSLT::Stylesheet] for ISO 19139 to MODS
32
+ XSLT_GEOMODS = Nokogiri::XSLT(File.read(
33
+ File.join(
34
+ File.dirname(__FILE__), 'geo2mods.xsl')))
35
+
36
+ XSLT_DC = Nokogiri::XSLT(File.new(
37
+ File.expand_path(
38
+ File.dirname(__FILE__) + '/../models/mods2dc.xslt')))
39
+
40
+ # @see http://ruby-doc.org/gems/docs/o/om-1.8.0/OM/XML/Document/ClassMethods.html#method-i-set_terminology
41
+ set_terminology do |t|
42
+ t.root :path => '/rdf:RDF/rdf:Description/gmd:MD_Metadata',
43
+ 'xmlns:gmd' => NS[:gmd],
44
+ 'xmlns:gco' => NS[:gco],
45
+ 'xmlns:rdf' => NS[:rdf]
46
+
47
+ t.id_ :path => '/rdf:RDF/rdf:Description[1]/@rdf:about'
48
+
49
+ p = './'
50
+ t.dataset_id :path => p + 'gmd:dataSetURI/gco:CharacterString'
51
+ t.file_id :path => p + 'gmd:fileIdentifier/gco:CharacterString'
52
+ t.metadata_dt :path => p + 'gmd:dateStamp/gco:Date/text()' # XXX: Allow DateTime
53
+ t.metadata_language :path => p + 'gmd:MD_Metadata/gmd:language/gmd:LanguageCode[@codeSpace="ISO639-2"]/@codeListValue'
54
+
55
+ p = 'gmd:identificationInfo/gmd:MD_DataIdentification/'
56
+ t.abstract :path => p + 'gmd:abstract/gco:CharacterString/text()'
57
+ t.purpose :path => p + 'gmd:purpose/gco:CharacterString/text()'
58
+ t.publisher :path => p + 'gmd:pointOfContact/gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/@codeListValue="pointOfContact"]/gmd:organisationName/gco:CharacterString/text()'
59
+
60
+ p = 'gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/'
61
+ t.title :path => p + 'gmd:title/gco:CharacterString/text()'
62
+ t.publish_dt :path => p + 'gmd:date/gmd:CI_Date/gmd:date/gco:Date/text()'
63
+ t.originator :path => p + 'gmd:citedResponsibleParty/gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/@codeListValue="originator"]/gmd:organisationName/gco:CharacterString/text()'
64
+
65
+ p = 'gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/'
66
+ t.format :path => p + 'gmd:name/gco:CharacterString/text()'#, :index_as => [:facetable]
67
+
68
+ p = 'gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource/'
69
+ t.layername :path => p + 'gmd:name/gco:CharacterString/text()'
70
+
71
+ # XXX should define projection as codeSpace + ':' + code in terminology
72
+ p = 'gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/gmd:referenceSystemIdentifier/gmd:RS_Identifier/'
73
+ t.projection :path => p + 'gmd:code/gco:CharacterString/text()'
74
+ t.projection_code_space :path => p + 'gmd:codeSpace/gco:CharacterString/text()'
75
+ end
76
+
77
+ # @return [Nokogiri::XML::Document] with gmd:MD_Metadata as root node
78
+ # @raise [Dor::ParameterError] if MD_Metadata is missing
79
+ def metadata
80
+ root = ng_xml.xpath('/rdf:RDF/rdf:Description/gmd:MD_Metadata', XMLNS)
81
+ if root.nil? or root.empty?
82
+ raise Dor::ParameterError, "Invalid geoMetadata -- missing MD_Metadata: #{root}"
83
+ else
84
+ Nokogiri::XML(root.first.to_xml)
85
+ end
86
+ end
87
+
88
+ # @return [Nokogiri::XML::Document] with gfc:FC_FeatureCatalogue as root node,
89
+ # or nil if not provided
90
+ def feature_catalogue
91
+ root = ng_xml.xpath('/rdf:RDF/rdf:Description/gfc:FC_FeatureCatalogue', XMLNS)
92
+ if root.nil? or root.empty?
93
+ nil # Feature catalog is optional
94
+ else
95
+ Nokogiri::XML(root.first.to_xml)
96
+ end
97
+ end
98
+
99
+ # @return [Nokogiri::XML::Document] Contains skeleton geoMetadata XML
100
+ # Add your druid as the suffix to rdf:about attributes.
101
+ # Includes all possible xmlns for gmd and gfc
102
+ def self.xml_template
103
+ Nokogiri::XML::Builder.new do |xml|
104
+ xml['rdf'].RDF XMLNS,
105
+ 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
106
+ "xsi:schemaLocation" => NS_XSD.join(' ') do
107
+ xml['rdf'].Description 'rdf:about' => nil do
108
+ xml['gmd'].MD_Metadata
109
+ end
110
+ xml['rdf'].Description 'rdf:about' => nil do
111
+ xml['gfc'].FC_FeatureCatalogue
112
+ end
113
+ end
114
+ end.doc
115
+ end
116
+
117
+ # Generates MODS from ISO 19139
118
+ #
119
+ # @return [Nokogiri::XML::Document] Derived MODS metadata record
120
+ # @raise [CrosswalkError] Raises if the generated MODS is empty or has no children
121
+ #
122
+ # Uses GML SimpleFeatures for the geometry type (e.g., Polygon, LineString, etc.)
123
+ # @see http://portal.opengeospatial.org/files/?artifact_id=25355
124
+ #
125
+ def to_mods(params = {})
126
+ params = params.merge({
127
+ 'geometryType' => "'#{@geometryType.nil?? 'Polygon' : @geometryType}'",
128
+ 'zipName' => "'#{@zipName.nil?? 'data.zip' : @zipName}'",
129
+ 'purl' => "'#{@purl}'"
130
+ })
131
+ doc = XSLT_GEOMODS.transform(metadata.document, params.to_a.flatten)
132
+ unless doc.root and doc.root.children.size > 0
133
+ raise CrosswalkError, 'to_mods produced incorrect xml'
134
+ end
135
+ # ap doc
136
+ doc.xpath('/mods:mods' +
137
+ '/mods:subject' +
138
+ '/mods:cartographics' +
139
+ '/mods:projection',
140
+ 'xmlns:mods' => Dor::DescMetadataDS::MODS_NS).each do |e|
141
+ # Retrieve this mapping from config file
142
+ case e.content.downcase
143
+ when 'epsg:4326', 'epsg::4326', 'urn:ogc:def:crs:epsg::4326'
144
+ e.content = 'World Geodetic System (WGS84)'
145
+ when 'epsg:4269', 'epsg::4269', 'urn:ogc:def:crs:epsg::4269'
146
+ e.content = 'North American Datum (NAD83)'
147
+ end
148
+ end
149
+ doc.xpath('/mods:mods' +
150
+ '/mods:subject' +
151
+ '/mods:cartographics' +
152
+ '/mods:coordinates',
153
+ 'xmlns:mods' => Dor::DescMetadataDS::MODS_NS).each do |e|
154
+ e.content = '(' + self.class.to_coordinates_ddmmss(e.content.to_s) + ')'
155
+ end
156
+ doc
157
+ end
158
+
159
+ def to_dublin_core
160
+ XSLT_DC.transform(to_mods)
161
+ end
162
+
163
+ # @deprecated stub for GeoBlacklight (not Argo -- use to_solr as usual)
164
+ def to_solr_spatial(solr_doc=Hash.new, *args)
165
+ # There are a whole bunch of namespace-related things that can go
166
+ # wrong with this terminology. Until it's fixed in OM, ignore them all.
167
+ begin
168
+ doc = solr_doc#super solr_doc, *args
169
+ bb = to_bbox
170
+ ap({:doc => doc, :bb => bb, :self => self}) if $DEBUG
171
+ {
172
+ :id => self.id.first,
173
+ :druid => URI(self.id.first).path.gsub(%r{^/}, ''),
174
+ :file_id_s => self.file_id.first,
175
+ :geo_bbox => to_solr_bbox,
176
+ :geo_data_type_s => 'vector',
177
+ :geo_format_s => self.format.first,
178
+ :geo_geometry_type_s => 'Polygon',
179
+ :geo_layername_s => File.basename(self.layername.first, '.shp'),
180
+ :geo_ne_pt => Dor::GeoMetadataDS.to_wkt([bb.e, bb.n]),
181
+ :geo_pt => to_solr_centroid,
182
+ :geo_sw_pt => Dor::GeoMetadataDS.to_wkt([bb.w, bb.s]),
183
+ :geo_proj => self.projection.first,
184
+ :dc_coverage_t => to_dc_coverage,
185
+ :dc_creator_t => self.originator.first,
186
+ :dc_date_i => self.publish_dt.map {|i| i.to_s[0..3]},
187
+ :dc_description_t => [self.abstract.first, self.purpose.first].join(";\n"),
188
+ :dc_format_s => 'application/x-esri-shapefile',
189
+ :dc_language_s => self.metadata_language.first,
190
+ :dc_title_t => self.title.first,
191
+ :text => [self.title.first, self.abstract.first, self.purpose.first].join(";\n")
192
+ }.each do |id, v|
193
+ ::Solrizer::Extractor.insert_solr_field_value(doc, id.to_s, v)
194
+ end
195
+
196
+ return doc
197
+ rescue
198
+ solr_doc
199
+ end
200
+ end
201
+
202
+ # @return [Struct] in minX minY maxX maxY order
203
+ # with .w, .e, .n., .s for west, east, north, south as floats
204
+ def to_bbox
205
+ params = { 'xmlns:gmd' => NS[:gmd], 'xmlns:gco' => NS[:gco] }
206
+ bb = metadata.xpath(
207
+ '//gmd:EX_Extent/gmd:geographicElement' +
208
+ '/gmd:EX_GeographicBoundingBox', params).first
209
+ Struct.new(:w, :e, :n, :s).new(
210
+ bb.xpath('gmd:westBoundLongitude/gco:Decimal', params).text.to_f,
211
+ bb.xpath('gmd:eastBoundLongitude/gco:Decimal', params).text.to_f,
212
+ bb.xpath('gmd:northBoundLatitude/gco:Decimal', params).text.to_f,
213
+ bb.xpath('gmd:southBoundLatitude/gco:Decimal', params).text.to_f
214
+ )
215
+ end
216
+
217
+ # @return [Array<Numeric>] (x y) coordinates of center point - assumes #to_bbox
218
+ # @see http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
219
+ def to_centroid
220
+ bb = to_bbox
221
+ [ (bb.w + bb.e)/2, (bb.n + bb.s)/2 ]
222
+ end
223
+
224
+ # A lat-lon rectangle can be indexed with 4 numbers in minX minY maxX maxY order:
225
+ #
226
+ # <field name="geo">-74.093 41.042 -69.347 44.558</field>
227
+ # <field name="geo">POLYGON((...))</field>
228
+ #
229
+ # @param [Symbol] either :solr3 or :solr4
230
+ # @return [String] minX minY maxX maxY for :solr3 or POLYGON((...)) for :solr4
231
+ # @see http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
232
+ def to_solr_bbox format = :solr4
233
+ bb = to_bbox
234
+
235
+ case format
236
+ when :solr3
237
+ [bb.w, bb.s, bb.e, bb.n].join(' ')
238
+ when :solr4
239
+ Dor::GeoMetadataDS.to_wkt [bb.w, bb.s], [bb.e, bb.n]
240
+ else
241
+ raise ArgumentError, "Unsupported format #{format}"
242
+ end
243
+ end
244
+
245
+ # @return [String] in Dublin Core Coverage format
246
+ def to_dc_coverage
247
+ bb = to_bbox
248
+ "x.min=#{bb.w} x.max=#{bb.e} y.min=#{bb.s} y.max=#{bb.n}"
249
+ end
250
+
251
+ # A lat-lon point for the centroid of the bounding box:
252
+ #
253
+ # <field name="geo">69.4325,-78.085007</field>
254
+ # <field name="geo">POINT(-78.085007 69.4325)</field>
255
+ #
256
+ # @param [Symbol] either :solr3 or :solr4
257
+ # @return [String] minX minY maxX maxY for :solr3 or POLYGON((...)) for :solr4
258
+ # @see http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
259
+
260
+ # @return [String] (y,x) coordinates of center point matching the LatLonType Solr type
261
+ # @see http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
262
+ def to_solr_centroid format = :solr4
263
+ x, y = to_centroid
264
+
265
+ case format
266
+ when :solr3
267
+ [y,x].join(',') # for solr.LatLonType
268
+ when :solr4
269
+ Dor::GeoMetadataDS.to_wkt [x, y]
270
+ else
271
+ raise ArgumentError, "Unsupported format #{format}"
272
+ end
273
+ end
274
+
275
+ private
276
+
277
+ # @param [Array<Numeric>] (x,y) coordinates for point or bounding box
278
+ # @return [String] WKT for point or rectangle
279
+ def self.to_wkt xy, xy2 = nil
280
+ if xy2
281
+ w = [xy[0], xy2[0]].min
282
+ e = [xy[0], xy2[0]].max
283
+ s = [xy[1], xy2[1]].min
284
+ n = [xy[1], xy2[1]].max
285
+ "POLYGON((#{w} #{s}, #{w} #{n}, #{e} #{n}, #{e} #{s}, #{w} #{s}))"
286
+ else
287
+ "POINT(#{xy[0]} #{xy[1]})"
288
+ end
289
+ end
290
+
291
+ # Convert to MARC 255 DD into DDMMSS
292
+ # westernmost longitude, easternmost longitude, northernmost latitude, and southernmost latitude
293
+ # e.g., -109.758319 -- -88.990844/48.999336 -- 29.423028
294
+ def self.to_coordinates_ddmmss s
295
+ w, e, n, s = s.scanf('%f -- %f/%f -- %f')
296
+ raise ArgumentError, "Out of bounds latitude: #{n} #{s}" unless n >= -90 and n <= 90 and s >= -90 and s <= 90
297
+ raise ArgumentError, "Out of bounds longitude: #{w} #{e}" unless w >= -180 and w <= 180 and e >= -180 and e <= 180
298
+ w = "#{w < 0 ? 'W' : 'E'} #{Dor::GeoMetadataDS::dd2ddmmss_abs w}"
299
+ e = "#{e < 0 ? 'W' : 'E'} #{Dor::GeoMetadataDS::dd2ddmmss_abs e}"
300
+ n = "#{n < 0 ? 'S' : 'N'} #{Dor::GeoMetadataDS::dd2ddmmss_abs n}"
301
+ s = "#{s < 0 ? 'S' : 'N'} #{Dor::GeoMetadataDS::dd2ddmmss_abs s}"
302
+ "#{w}--#{e}/#{n}--#{s}"
303
+ end
304
+
305
+ # Convert DD.DD to DD MM SS.SS
306
+ # e.g.,
307
+ # * -109.758319 => 109°45ʹ29.9484ʺ
308
+ # * 48.999336 => 48°59ʹ57.609ʺ
309
+ E = 1
310
+ QSEC = 'ʺ'
311
+ QMIN = 'ʹ'
312
+ QDEG = "\u00B0"
313
+ def self.dd2ddmmss_abs f
314
+ dd = f.to_f.abs
315
+ d = dd.floor
316
+ mm = ((dd - d) * 60)
317
+ m = mm.floor
318
+ s = ((mm - mm.floor) * 60).round
319
+ m, s = m+1, 0 if s >= 60
320
+ d, m = d+1, 0 if m >= 60
321
+ "#{d}#{QDEG}" + (m>0 ? "#{m}#{QMIN}" : '') + (s>0 ? "#{s}#{QSEC}" : '')
322
+ end
323
+ end
324
+ end
@@ -78,7 +78,7 @@ module Dor
78
78
  end
79
79
 
80
80
  # @param [String] description optional text describing version change
81
- # @param [Symbol] :significance optional which part of the version tag to increment
81
+ # @param [Symbol] significance optional which part of the version tag to increment
82
82
  # :major, :minor, :admin (see VersionTag#increment)
83
83
  def increment_version(description = nil, significance = nil)
84
84
  if( find_by_terms(:version).size == 0)
@@ -107,7 +107,7 @@ module Dor
107
107
  end
108
108
  end
109
109
 
110
- # @returns [Fixnum] value of the most current versionId
110
+ # @return [Fixnum] value of the most current versionId
111
111
  def current_version_id
112
112
  current_version=current_version_node
113
113
  if current_version.nil?
@@ -151,7 +151,6 @@ module Dor
151
151
 
152
152
  # Appends contentMetadata file resources from the source objects to this object
153
153
  # @param [Array<String>] source_obj_pids ids of the secondary objects that will get their contentMetadata merged into this one
154
- # @param [Logger] logger optional logger to record warnings. Otherwise, warnings get sent to STDOUT
155
154
  def copy_file_resources source_obj_pids
156
155
  primary_cm = contentMetadata.ng_xml
157
156
  base_id = primary_cm.at_xpath('/contentMetadata/@objectId').value
@@ -253,8 +253,8 @@ module Dor
253
253
  end
254
254
  end
255
255
  def agreement
256
- if agreement_object
257
- agreement_object
256
+ if agreement_object and agreement_object.first
257
+ agreement_object.first.pid
258
258
  else
259
259
  ''
260
260
  end
@@ -0,0 +1,36 @@
1
+ module Dor
2
+ module Geoable
3
+ extend ActiveSupport::Concern
4
+ include SolrDocHelper
5
+
6
+ class CrosswalkError < Exception; end
7
+
8
+ included do
9
+ has_metadata :name => 'geoMetadata',
10
+ :type => Dor::GeoMetadataDS,
11
+ :label => 'Geographic Information Metadata in ISO 19139',
12
+ :control_group => 'M'
13
+ end
14
+
15
+ # @return [String] XML
16
+ def fetch_geoMetadata_datastream
17
+ candidates = self.datastreams['identityMetadata'].otherId.collect { |oid| oid.to_s }
18
+ metadata_id = Dor::MetadataService.resolvable(candidates).first
19
+ unless metadata_id.nil?
20
+ return Dor::MetadataService.fetch(metadata_id.to_s)
21
+ else
22
+ return nil
23
+ end
24
+ end
25
+
26
+ def build_geoMetadata_datastream(ds)
27
+ content = fetch_geoMetadata_datastream
28
+ unless content.nil?
29
+ ds.dsLabel = self.label
30
+ ds.ng_xml = Nokogiri::XML(content)
31
+ ds.ng_xml.normalize_text!
32
+ ds.content = ds.ng_xml.to_xml
33
+ end
34
+ end
35
+ end
36
+ end
@@ -14,6 +14,7 @@ module Dor
14
14
  include Versionable
15
15
  include Contentable
16
16
  include Discoverable
17
+ include Geoable
17
18
 
18
19
  end
19
20
 
@@ -37,6 +38,7 @@ end
37
38
  # Publishable = Transfer of metadata to discovery and access systems.
38
39
  # Shelvable = Transfer of content to digital stacks.
39
40
  # Upgradable = Remediation of existing objects when content standards change.
41
+ # Geoable = Descriptive metadata for GIS in ISO 19139/19110.
40
42
 
41
43
  # Required for all DOR objects:
42
44
  # - Identifiable
@@ -35,31 +35,21 @@ module Dor
35
35
  raise Dor::ParameterError, "Invalid subset value: #{subset}"
36
36
  end
37
37
 
38
- basename = version.nil? ? "#{DIFF_FILENAME}.#{subset}.xml" : "#{DIFF_FILENAME}.#{subset}.#{version}.xml"
39
- druid = DruidTools::Druid.new(self.pid, Dor::Config.stacks.local_workspace_root)
40
- diff_cache = File.join(druid.temp_dir, basename)
41
- # check for cached copy before contacting SDR
42
- if File.exists? diff_cache
43
- File.read(diff_cache)
44
- else
45
- # fetch content metadata inventory difference from SDR
46
- if Dor::Config.sdr.rest_client.nil?
47
- raise Dor::ParameterError, 'Missing Dor::Config.sdr.rest_client'
48
- end
49
- sdr_client = Dor::Config.sdr.rest_client
50
- current_content = self.datastreams['contentMetadata'].content
51
- if current_content.nil?
52
- raise Dor::Exception, "Missing contentMetadata datastream"
53
- end
54
- query_string = { :subset => subset.to_s }
55
- query_string[:version] = version.to_s unless version.nil?
56
- query_string = URI.encode_www_form(query_string)
57
- sdr_query = "objects/#{self.pid}/#{DIFF_QUERY}?#{query_string}"
58
- response = sdr_client[sdr_query].post(current_content, :content_type => 'application/xml')
59
- # cache response
60
- File.open(diff_cache, 'w') { |f| f << response }
61
- response
38
+ # fetch content metadata inventory difference from SDR
39
+ if Dor::Config.sdr.rest_client.nil?
40
+ raise Dor::ParameterError, 'Missing Dor::Config.sdr.rest_client'
41
+ end
42
+ sdr_client = Dor::Config.sdr.rest_client
43
+ current_content = self.datastreams['contentMetadata'].content
44
+ if current_content.nil?
45
+ raise Dor::Exception, "Missing contentMetadata datastream"
62
46
  end
47
+ query_string = { :subset => subset.to_s }
48
+ query_string[:version] = version.to_s unless version.nil?
49
+ query_string = URI.encode_www_form(query_string)
50
+ sdr_query = "objects/#{self.pid}/#{DIFF_QUERY}?#{query_string}"
51
+ response = sdr_client[sdr_query].post(current_content, :content_type => 'application/xml')
52
+ response
63
53
  end
64
54
 
65
55
  end