dor-services 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/lib/datastreams/content_metadata_ds.rb +12 -0
  2. data/lib/datastreams/embargo_metadata_ds.rb +107 -0
  3. data/lib/datastreams/events_ds.rb +58 -0
  4. data/lib/datastreams/identity_metadata_ds.rb +28 -0
  5. data/lib/datastreams/ng_tidy.rb +19 -0
  6. data/lib/datastreams/simple_dublin_core_ds.rb +23 -0
  7. data/lib/datastreams/workflow_definition_ds.rb +105 -0
  8. data/lib/datastreams/workflow_ds.rb +16 -0
  9. data/lib/dor-services.rb +19 -0
  10. data/lib/dor/admin_policy_object.rb +11 -0
  11. data/lib/dor/base.rb +81 -0
  12. data/lib/dor/cleanup_service.rb +32 -0
  13. data/lib/dor/config.rb +45 -0
  14. data/lib/dor/digital_stacks_service.rb +82 -0
  15. data/lib/dor/druid_utils.rb +41 -0
  16. data/lib/dor/embargo.rb +41 -0
  17. data/lib/dor/exceptions.rb +13 -0
  18. data/lib/dor/item.rb +141 -0
  19. data/lib/dor/metadata_handlers/catalog_handler.rb +22 -0
  20. data/lib/dor/metadata_handlers/mdtoolkit_handler.rb +42 -0
  21. data/lib/dor/metadata_service.rb +88 -0
  22. data/lib/dor/mods2dc.xslt +447 -0
  23. data/lib/dor/provenance_metadata_service.rb +65 -0
  24. data/lib/dor/registration_service.rb +87 -0
  25. data/lib/dor/rsolr.rb +27 -0
  26. data/lib/dor/sdr_ingest_service.rb +117 -0
  27. data/lib/dor/search_service.rb +86 -0
  28. data/lib/dor/suri_service.rb +37 -0
  29. data/lib/dor/tei2dc.xslt +102 -0
  30. data/lib/dor/workflow_object.rb +13 -0
  31. data/lib/dor/workflow_service.rb +111 -0
  32. data/lib/gsearch/demoFoxmlToSolr.xslt +384 -0
  33. data/lib/gsearch/schema.xml +229 -0
  34. data/lib/tasks/rdoc.rake +32 -0
  35. data/lib/xml_models/foxml.rb +261 -0
  36. data/lib/xml_models/identity_metadata/dublin_core.rb +119 -0
  37. data/lib/xml_models/identity_metadata/identity_metadata.rb +288 -0
  38. metadata +462 -0
@@ -0,0 +1,12 @@
1
+ class ContentMetadataDS < ActiveFedora::NokogiriDatastream
2
+
3
+ def public_xml
4
+ result = self.ng_xml.clone
5
+ result.xpath('/contentMetadata/resource[not(file[(@deliver="yes" or @publish="yes")])]').each { |n| n.remove }
6
+ result.xpath('/contentMetadata/resource/file[not(@deliver="yes" or @publish="yes")]').each { |n| n.remove }
7
+ result.xpath('/contentMetadata/resource/file').xpath('@preserve|@shelve|@publish|@deliver').each { |n| n.remove }
8
+ result.xpath('/contentMetadata/resource/file/checksum').each { |n| n.remove }
9
+ result
10
+ end
11
+
12
+ end
@@ -0,0 +1,107 @@
1
+ require 'active_fedora'
2
+ require 'nokogiri'
3
+ require 'time'
4
+
5
+ class EmbargoMetadataDS < ActiveFedora::NokogiriDatastream
6
+
7
+ set_terminology do |t|
8
+ t.root(:path => "embargoMetadata", :xmlns => '', :namespace_prefix => nil)
9
+ t.status(:namespace_prefix => nil)
10
+ t.release_date(:path => "releaseDate", :namespace_prefix => nil)
11
+ t.twenty_pct_status( :path => "twentyPctVisibilityStatus", :namespace_prefix => nil)
12
+ t.twenty_pct_release_date(:path => "twentyPctVisibilityReleaseDate", :namespace_prefix => nil)
13
+
14
+ t.release_access(:path => "releaseAccess", :namespace_prefix => nil)
15
+ end
16
+
17
+ # Default EmbargoMetadataDS xml
18
+ def self.xml_template
19
+ builder = Nokogiri::XML::Builder.new do |xml|
20
+ xml.embargoMetadata {
21
+ xml.status
22
+ xml.releaseDate
23
+ xml.releaseAccess
24
+ xml.twentyPctVisibilityStatus
25
+ xml.twentyPctVisibilityReleaseDate
26
+ }
27
+ end
28
+ return builder.doc
29
+ end
30
+
31
+ def initialize(attrs=nil)
32
+ super
33
+ @attributes[:versionable] = false
34
+ end
35
+
36
+ #################################################################################
37
+ # Convenience methods to get and set embargo properties
38
+ # Hides complexity/verbosity of OM TermOperators for simple, non-repeating values
39
+ #################################################################################
40
+
41
+ def status=(new_status)
42
+ update_values([:status] => new_status)
43
+ self.dirty = true
44
+ end
45
+
46
+ def status
47
+ term_values(:status).first
48
+ end
49
+
50
+ # Sets the release date. Converts the date to beginning-of-day, UTC to help with Solr indexing
51
+ # @param [Time] rd A Time object represeting the release date. By default, it is set to now
52
+ def release_date=(rd=Time.now)
53
+ update_values([:release_date] => rd.beginning_of_day.utc.xmlschema)
54
+ self.dirty = true
55
+ end
56
+
57
+ # Current releaseDate value
58
+ # @return [Time]
59
+ def release_date
60
+ Time.parse(term_values(:release_date).first)
61
+ end
62
+
63
+ def twenty_pct_status=(new_status)
64
+ update_values([:twenty_pct_status] => new_status)
65
+ self.dirty = true
66
+ end
67
+
68
+ def twenty_pct_status
69
+ term_values(:twenty_pct_status).first
70
+ end
71
+
72
+ # Sets the 20% visibility release date. Converts the date to beginning-of-day, UTC to help with Solr indexing
73
+ # @param [Time] rd A Time object represeting the release date. By default, it is set to now
74
+ def twenty_pct_release_date=(rd=Time.now)
75
+ update_values([:twenty_pct_release_date] => rd.beginning_of_day.utc.xmlschema)
76
+ self.dirty = true
77
+ end
78
+
79
+ # Current twentyPctVisibilityReleaseDate value
80
+ # @return [Time]
81
+ def twenty_pct_release_date
82
+ Time.parse(term_values(:twenty_pct_release_date).first)
83
+ end
84
+
85
+ # @return [Nokogiri::XML::Element] The releaseAccess node
86
+ def release_access_node
87
+ find_by_terms(:release_access).first
88
+ end
89
+
90
+ # @return [Nokogiri::XML::Element] The releaseAccess node
91
+ def release_access_node
92
+ find_by_terms(:release_access).first
93
+ end
94
+
95
+ # Sets the embargaAccess node
96
+ # @param [Nokogiri::XML::Document] new_node Document that will replace the existing releaseAccess node
97
+ def release_access_node=(new_doc)
98
+ if(new_doc.root.name != 'releaseAccess')
99
+ raise "Trying to replace releaseAccess with a non-releaseAccess document"
100
+ end
101
+
102
+ term_value_delete(:select => '//embargoMetadata/releaseAccess')
103
+ ng_xml.root.add_child(new_doc.root.clone)
104
+ self.dirty = true
105
+ end
106
+
107
+ end
@@ -0,0 +1,58 @@
1
+ require 'active_fedora'
2
+
3
+ class EventsDS < ActiveFedora::NokogiriDatastream
4
+
5
+ set_terminology do |t|
6
+ t.root(:path => "events", :xmlns => '', :namespace_prefix => nil)
7
+ t.event(:namespace_prefix => nil)
8
+ end
9
+
10
+ # Default EventsDS xml
11
+ def self.xml_template
12
+ builder = Nokogiri::XML::Builder.new do |xml|
13
+ xml.events
14
+ end
15
+ return builder.doc
16
+ end
17
+
18
+ def initialize(attrs=nil)
19
+ super
20
+ @attributes[:versionable] = false
21
+ end
22
+
23
+ # Adds an event to the datastream
24
+ # @param [String] type a tag used to group events together. Sets the type attribute for the event
25
+ # @param [String] who who is responsible for this event. Sets the who attribute for the event
26
+ # @param [String] message what happened. Sets the content of the event with this message
27
+ def add_event(type, who, message)
28
+ ev = ng_xml.create_element "event", message,
29
+ :type => type, :who => who, :when => Time.now.xmlschema
30
+ ng_xml.root.add_child(ev)
31
+ self.dirty = true
32
+ end
33
+
34
+ # Finds events with the desired type attribute
35
+ # @param [String] tag events where type == tag will be returned
36
+ # @yield [who, timestamp, message] The values of the current event
37
+ # @yieldparam [String] who thing responsible for creating the event. Value of the 'who' attribute
38
+ # @yieldparam [Time] timestamp when this event was logged. Value of the 'when' attribute
39
+ # @yieldparam [String] message what happened. Content of the event node
40
+ def find_events_by_type(tag, &block)
41
+ find_by_terms(:event).xpath("//event[@type='#{tag}']").each do |node|
42
+ block.call(node['who'], Time.parse(node['when']), node.content)
43
+ end
44
+ end
45
+
46
+ # Returns all the events in the datastream
47
+ # @yield [type, who, timestamp, message] The values of the current event
48
+ # @yieldparam [String] type tag for this particular event. Value of the 'type' attribute
49
+ # @yieldparam [String] who thing responsible for creating the event. Value of the 'who' attribute
50
+ # @yieldparam [Time] timestamp when this event was logged. Value of the 'when' attribute
51
+ # @yieldparam [String] message what happened. Content of the event node
52
+ def each_event(&block)
53
+ find_by_terms(:event).each do |node|
54
+ block.call(node['type'], node['who'], Time.parse(node['when']), node.content)
55
+ end
56
+ end
57
+
58
+ end
@@ -0,0 +1,28 @@
1
+ class IdentityMetadataDS < ActiveFedora::NokogiriDatastream
2
+
3
+ set_terminology do |t|
4
+ t.root(:path=>"identityMetadata", :xmlns => '')
5
+ t.objectId(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
6
+ t.objectType(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
7
+ t.objectLabel(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
8
+ t.citationCreator(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
9
+ t.sourceId(:index_as=>[:searchable, :displayable, :facetable, :sortable], :attributes=>{:type=>"source"}, :required=>:true, :type=>:string, :namespace_prefix => nil )
10
+ t.otherId(:index_as=>[:searchable, :displayable, :facetable, :sortable], :attributes=>{:type=>"name"}, :required=>:true, :type=>:string, :namespace_prefix => nil )
11
+ t.agreementId(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
12
+ t.tag(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
13
+ t.citationTitle(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
14
+ t.objectCreator(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
15
+ t.adminPolicy(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
16
+ end
17
+
18
+ def self.xml_template
19
+ builder = Nokogiri::XML::Builder.new do |xml|
20
+ xml.identityMetadata {
21
+ xml.citationTitle
22
+ xml.objectCreator
23
+ }
24
+ end
25
+ return builder.doc
26
+ end #self.xml_template
27
+
28
+ end #class
@@ -0,0 +1,19 @@
1
+ class Nokogiri::XML::Text
2
+
3
+ def normalize
4
+ self.content =~ /\S/ ? self.content.gsub(/\s+/,' ').strip : self.content
5
+ end
6
+
7
+ def normalize!
8
+ self.content = self.normalize
9
+ end
10
+
11
+ end
12
+
13
+ class Nokogiri::XML::Node
14
+
15
+ def normalize_text!
16
+ self.xpath('//text()').each { |t| t.normalize! }
17
+ end
18
+
19
+ end
@@ -0,0 +1,23 @@
1
+ class SimpleDublinCoreDs < ActiveFedora::NokogiriDatastream
2
+
3
+ set_terminology do |t|
4
+ t.root(:path=>"dc", :xmlns=>"http://www.openarchives.org/OAI/2.0/oai_dc/", :schema=>"http://cosimo.stanford.edu/standards/oai_dc/v2/oai_dc.xsd", :namespace_prefix => 'oai_dc')
5
+ t.title(:index_as=>[:searchable, :displayable, :facetable, :sortable], :xmlns => "http://purl.org/dc/elements/1.1/", :namespace_prefix => 'dc')
6
+ t.creator(:index_as=>[:searchable, :displayable, :facetable, :sortable], :xmlns => "http://purl.org/dc/elements/1.1/", :namespace_prefix => 'dc')
7
+ t.identifier(:index_as=>[:searchable, :displayable, :sortable], :xmlns => "http://purl.org/dc/elements/1.1/", :namespace_prefix => 'dc')
8
+ end
9
+
10
+ def self.xml_template
11
+ builder = Nokogiri::XML::Builder.new do |xml|
12
+ xml.dc(:xmlns=>"http://www.openarchives.org/OAI/2.0/oai_dc/",
13
+ 'xmlns:dc'=>'http://purl.org/dc/elements/1.1/') {
14
+ xml['dc'].title
15
+ xml['dc'].creator
16
+ xml['dc'].identifier
17
+ }
18
+ end
19
+
20
+ return builder.doc
21
+ end
22
+
23
+ end
@@ -0,0 +1,105 @@
1
+ class WorkflowProcess
2
+
3
+ def initialize(workflow, node)
4
+ @workflow = workflow
5
+ @node = node
6
+ end
7
+
8
+ def name
9
+ @node['name']
10
+ end
11
+
12
+ def sequence
13
+ @node['sequence']
14
+ end
15
+
16
+ def lifecycle
17
+ @node['lifecycle']
18
+ end
19
+
20
+ def label
21
+ @node.at_xpath('label/text()').to_s
22
+ end
23
+
24
+ def prerequisites
25
+ @node.xpath('prereq').collect do |p|
26
+ if (p['repository'].nil? and p['workflow'].nil?) or (p['repository'] == workflow.repository and p['workflow'] == workflow.name)
27
+ p.text.to_s
28
+ else
29
+ [(p['repository'] or workflow.repository),(p['workflow'] or workflow.name),p.text.to_s].join(':')
30
+ end
31
+ end
32
+ end
33
+
34
+ end
35
+
36
+ class WorkflowDefinitionDs < ActiveFedora::NokogiriDatastream
37
+
38
+ define_template :process do |builder,workflow,name,seq,label,lifecycle,prereqs|
39
+ attrs = {:name => name}
40
+ attrs[:sequence] = seq unless seq.nil?
41
+ attrs[:lifecycle] = lifecycle unless lifecycle.nil?
42
+ builder.process(attrs) do |node|
43
+ prereqs.each do |prereq|
44
+ (repo,wf,prereq_name) = prereq.split(/:/)
45
+ if prereq_name.nil?
46
+ prereq_name = repo
47
+ repo = nil
48
+ end
49
+ if (repo == workflow.repository and wf = workflow.name)
50
+ repo = nil
51
+ wf = nil
52
+ end
53
+ attrs = (repo.nil? and wf.nil?) ? {} : { :repository => repo, :workflow => wf }
54
+ node.prereq(attrs) { node.text prereq_name }
55
+ end
56
+ end
57
+ end
58
+
59
+ def add_process(name, seq, label, lifecycle, prereqs)
60
+ add_child_node(ng_xml.at_xpath('/workflow'), :process, self, name, seq, label, lifecycle, prereqs)
61
+ end
62
+
63
+ def processes
64
+ ng_xml.xpath('/workflow/process').collect do |node|
65
+ WorkflowProcess.new(self, node)
66
+ end
67
+ end
68
+
69
+ def name
70
+ ng_xml.at_xpath('/workflow/@id').to_s
71
+ end
72
+
73
+ def repository
74
+ ng_xml.at_xpath('/workflow/@repository').to_s
75
+ end
76
+
77
+ def configuration
78
+ result = {
79
+ 'repository' => repository,
80
+ 'name' => name
81
+ }
82
+ processes.each_pair do |process_name,process|
83
+ result[process_name] = {
84
+ 'prerequisites' => process.prerequisites.collect { |p| p.name }
85
+ }
86
+ end
87
+ end
88
+
89
+ def configuration=(hash)
90
+ ng_xml = Nokogiri::XML(%{<workflow id="#{hash['name']}" repository="#{hash['repository']}"/>})
91
+ i = 0
92
+ hash.each_pair do |k,v|
93
+ if v.is_a?(Hash)
94
+ add_process(k,i+=1,nil,nil,v['prerequisite'])
95
+ end
96
+ end
97
+ end
98
+
99
+ def to_yaml
100
+ s = StringIO.new('')
101
+ YAML.dump(self.configuration, s)
102
+ s.string
103
+ end
104
+
105
+ end
@@ -0,0 +1,16 @@
1
+ class WorkflowDs < ActiveFedora::NokogiriDatastream
2
+
3
+ set_terminology do |t|
4
+ t.root(:path=>"workflow", :xmlns => '', :namespace_prefix => nil)
5
+ t.workflowId(:path=>{:attribute => "id"}, :index_as => [:displayable, :facetable])
6
+ t.process(:path=>'process', :namespace_prefix => nil) {
7
+ t._name(:path=>{:attribute=>"name"}, :index_as => [:displayable, :facetable, :sortable])
8
+ t.status(:path=>{:attribute=>"status"}, :index_as => [:displayable, :facetable, :sortable])
9
+ t.timestamp(:path=>{:attribute=>"datetime"}, :index_as => [:searchable, :sortable])
10
+ t.elapsed(:path=>{:attribute=>"elapsed"})
11
+ t.lifecycle(:path=>{:attribute=>"lifecycle"}, :index_as => [:displayable, :facetable, :sortable])
12
+ t.attempts(:path=>{:attribute=>"attempts"})
13
+ }
14
+ end
15
+
16
+ end
@@ -0,0 +1,19 @@
1
+ require 'dor/config'
2
+ require 'dor/exceptions'
3
+
4
+ # ActiveFedora Classes
5
+ require 'dor/base'
6
+ require 'dor/item'
7
+ require 'dor/admin_policy_object'
8
+ require 'dor/workflow_object'
9
+
10
+ # Services
11
+ require 'dor/metadata_service'
12
+ require 'dor/registration_service'
13
+ require 'dor/suri_service'
14
+ require 'dor/workflow_service'
15
+ require 'dor/digital_stacks_service'
16
+ require 'dor/druid_utils'
17
+ require 'dor/sdr_ingest_service'
18
+ require 'dor/cleanup_service'
19
+ require 'dor/provenance_metadata_service'
@@ -0,0 +1,11 @@
1
+ module Dor
2
+
3
+ class AdminPolicyObject < Base
4
+
5
+ has_metadata :name => "administrativeMetadata", :type => ActiveFedora::NokogiriDatastream
6
+ has_metadata :name => "roleMetadata", :type => ActiveFedora::NokogiriDatastream
7
+ has_metadata :name => "defaultObjectRights", :type => ActiveFedora::NokogiriDatastream
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,81 @@
1
+ require 'active_fedora'
2
+ require 'datastreams/identity_metadata_ds'
3
+ require 'datastreams/simple_dublin_core_ds'
4
+ require 'datastreams/workflow_ds'
5
+ require 'dor/suri_service'
6
+
7
+ module Dor
8
+
9
+ class Base < ::ActiveFedora::Base
10
+
11
+ attr_reader :workflows
12
+
13
+ has_metadata :name => "DC", :type => SimpleDublinCoreDs
14
+ has_metadata :name => "RELS-EXT", :type => ActiveFedora::RelsExtDatastream
15
+ has_metadata :name => "identityMetadata", :type => IdentityMetadataDS
16
+
17
+ # Make a random (and harmless) API-M call to get gsearch to reindex the object
18
+ def self.touch(*pids)
19
+ client = Dor::Config.fedora.client
20
+ pids.collect { |pid|
21
+ response = client["objects/#{pid}/datastreams/DC?dsState=A&ignoreContent=true"].put('', :content_type => 'text/xml')
22
+ response.code
23
+ }
24
+ end
25
+
26
+ def self.get_foxml(pid, interpolate_refs = false)
27
+ foxml = Nokogiri::XML(Dor::Config.fedora.client["objects/#{pid}/objectXML"].get)
28
+ if interpolate_refs
29
+ external_refs = foxml.xpath('//foxml:contentLocation[contains(@REF,"/workflows/")]')
30
+ external_refs.each do |ref|
31
+ begin
32
+ external_doc = Nokogiri::XML(RestClient.get(ref['REF']))
33
+ external_root = external_doc.root
34
+ ref.replace('<foxml:xmlContent/>').first.add_child(external_doc.root)
35
+ external_root.traverse { |node| node.namespace = nil }
36
+ rescue
37
+ ref.remove
38
+ end
39
+ end
40
+ end
41
+ return foxml
42
+ end
43
+
44
+ def initialize(attrs = {})
45
+ unless attrs[:pid]
46
+ attrs = attrs.merge!({:pid=>Dor::SuriService.mint_id})
47
+ @new_object=true
48
+ else
49
+ @new_object = attrs[:new_object] == false ? false : true
50
+ end
51
+ @inner_object = Fedora::FedoraObject.new(attrs)
52
+ @datastreams = {}
53
+ @workflows = {}
54
+ configure_defined_datastreams
55
+ end
56
+
57
+ def identity_metadata
58
+ if self.datastreams.has_key?('identityMetadata')
59
+ IdentityMetadata.from_xml(self.datastreams['identityMetadata'].content)
60
+ else
61
+ nil
62
+ end
63
+ end
64
+
65
+ # Self-aware datastream builders
66
+ def build_datastream(datastream, force = false)
67
+ ds = datastreams[datastream]
68
+ if force or ds.new_object? or (ds.content.to_s.empty?)
69
+ proc = "build_#{datastream}_datastream".to_sym
70
+ content = self.send(proc, ds)
71
+ ds.save
72
+ end
73
+ return ds
74
+ end
75
+
76
+ def reindex
77
+ Dor::SearchService.reindex(self.pid)
78
+ end
79
+
80
+ end
81
+ end