dor-services 2.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/lib/datastreams/content_metadata_ds.rb +12 -0
  2. data/lib/datastreams/embargo_metadata_ds.rb +107 -0
  3. data/lib/datastreams/events_ds.rb +58 -0
  4. data/lib/datastreams/identity_metadata_ds.rb +28 -0
  5. data/lib/datastreams/ng_tidy.rb +19 -0
  6. data/lib/datastreams/simple_dublin_core_ds.rb +23 -0
  7. data/lib/datastreams/workflow_definition_ds.rb +105 -0
  8. data/lib/datastreams/workflow_ds.rb +16 -0
  9. data/lib/dor-services.rb +19 -0
  10. data/lib/dor/admin_policy_object.rb +11 -0
  11. data/lib/dor/base.rb +81 -0
  12. data/lib/dor/cleanup_service.rb +32 -0
  13. data/lib/dor/config.rb +45 -0
  14. data/lib/dor/digital_stacks_service.rb +82 -0
  15. data/lib/dor/druid_utils.rb +41 -0
  16. data/lib/dor/embargo.rb +41 -0
  17. data/lib/dor/exceptions.rb +13 -0
  18. data/lib/dor/item.rb +141 -0
  19. data/lib/dor/metadata_handlers/catalog_handler.rb +22 -0
  20. data/lib/dor/metadata_handlers/mdtoolkit_handler.rb +42 -0
  21. data/lib/dor/metadata_service.rb +88 -0
  22. data/lib/dor/mods2dc.xslt +447 -0
  23. data/lib/dor/provenance_metadata_service.rb +65 -0
  24. data/lib/dor/registration_service.rb +87 -0
  25. data/lib/dor/rsolr.rb +27 -0
  26. data/lib/dor/sdr_ingest_service.rb +117 -0
  27. data/lib/dor/search_service.rb +86 -0
  28. data/lib/dor/suri_service.rb +37 -0
  29. data/lib/dor/tei2dc.xslt +102 -0
  30. data/lib/dor/workflow_object.rb +13 -0
  31. data/lib/dor/workflow_service.rb +111 -0
  32. data/lib/gsearch/demoFoxmlToSolr.xslt +384 -0
  33. data/lib/gsearch/schema.xml +229 -0
  34. data/lib/tasks/rdoc.rake +32 -0
  35. data/lib/xml_models/foxml.rb +261 -0
  36. data/lib/xml_models/identity_metadata/dublin_core.rb +119 -0
  37. data/lib/xml_models/identity_metadata/identity_metadata.rb +288 -0
  38. metadata +462 -0
@@ -0,0 +1,12 @@
1
+ class ContentMetadataDS < ActiveFedora::NokogiriDatastream
2
+
3
+ def public_xml
4
+ result = self.ng_xml.clone
5
+ result.xpath('/contentMetadata/resource[not(file[(@deliver="yes" or @publish="yes")])]').each { |n| n.remove }
6
+ result.xpath('/contentMetadata/resource/file[not(@deliver="yes" or @publish="yes")]').each { |n| n.remove }
7
+ result.xpath('/contentMetadata/resource/file').xpath('@preserve|@shelve|@publish|@deliver').each { |n| n.remove }
8
+ result.xpath('/contentMetadata/resource/file/checksum').each { |n| n.remove }
9
+ result
10
+ end
11
+
12
+ end
@@ -0,0 +1,107 @@
1
+ require 'active_fedora'
2
+ require 'nokogiri'
3
+ require 'time'
4
+
5
+ class EmbargoMetadataDS < ActiveFedora::NokogiriDatastream
6
+
7
+ set_terminology do |t|
8
+ t.root(:path => "embargoMetadata", :xmlns => '', :namespace_prefix => nil)
9
+ t.status(:namespace_prefix => nil)
10
+ t.release_date(:path => "releaseDate", :namespace_prefix => nil)
11
+ t.twenty_pct_status( :path => "twentyPctVisibilityStatus", :namespace_prefix => nil)
12
+ t.twenty_pct_release_date(:path => "twentyPctVisibilityReleaseDate", :namespace_prefix => nil)
13
+
14
+ t.release_access(:path => "releaseAccess", :namespace_prefix => nil)
15
+ end
16
+
17
+ # Default EmbargoMetadataDS xml
18
+ def self.xml_template
19
+ builder = Nokogiri::XML::Builder.new do |xml|
20
+ xml.embargoMetadata {
21
+ xml.status
22
+ xml.releaseDate
23
+ xml.releaseAccess
24
+ xml.twentyPctVisibilityStatus
25
+ xml.twentyPctVisibilityReleaseDate
26
+ }
27
+ end
28
+ return builder.doc
29
+ end
30
+
31
+ def initialize(attrs=nil)
32
+ super
33
+ @attributes[:versionable] = false
34
+ end
35
+
36
+ #################################################################################
37
+ # Convenience methods to get and set embargo properties
38
+ # Hides complexity/verbosity of OM TermOperators for simple, non-repeating values
39
+ #################################################################################
40
+
41
+ def status=(new_status)
42
+ update_values([:status] => new_status)
43
+ self.dirty = true
44
+ end
45
+
46
+ def status
47
+ term_values(:status).first
48
+ end
49
+
50
+ # Sets the release date. Converts the date to beginning-of-day, UTC to help with Solr indexing
51
+ # @param [Time] rd A Time object represeting the release date. By default, it is set to now
52
+ def release_date=(rd=Time.now)
53
+ update_values([:release_date] => rd.beginning_of_day.utc.xmlschema)
54
+ self.dirty = true
55
+ end
56
+
57
+ # Current releaseDate value
58
+ # @return [Time]
59
+ def release_date
60
+ Time.parse(term_values(:release_date).first)
61
+ end
62
+
63
+ def twenty_pct_status=(new_status)
64
+ update_values([:twenty_pct_status] => new_status)
65
+ self.dirty = true
66
+ end
67
+
68
+ def twenty_pct_status
69
+ term_values(:twenty_pct_status).first
70
+ end
71
+
72
+ # Sets the 20% visibility release date. Converts the date to beginning-of-day, UTC to help with Solr indexing
73
+ # @param [Time] rd A Time object represeting the release date. By default, it is set to now
74
+ def twenty_pct_release_date=(rd=Time.now)
75
+ update_values([:twenty_pct_release_date] => rd.beginning_of_day.utc.xmlschema)
76
+ self.dirty = true
77
+ end
78
+
79
+ # Current twentyPctVisibilityReleaseDate value
80
+ # @return [Time]
81
+ def twenty_pct_release_date
82
+ Time.parse(term_values(:twenty_pct_release_date).first)
83
+ end
84
+
85
+ # @return [Nokogiri::XML::Element] The releaseAccess node
86
+ def release_access_node
87
+ find_by_terms(:release_access).first
88
+ end
89
+
90
+ # @return [Nokogiri::XML::Element] The releaseAccess node
91
+ def release_access_node
92
+ find_by_terms(:release_access).first
93
+ end
94
+
95
+ # Sets the embargaAccess node
96
+ # @param [Nokogiri::XML::Document] new_node Document that will replace the existing releaseAccess node
97
+ def release_access_node=(new_doc)
98
+ if(new_doc.root.name != 'releaseAccess')
99
+ raise "Trying to replace releaseAccess with a non-releaseAccess document"
100
+ end
101
+
102
+ term_value_delete(:select => '//embargoMetadata/releaseAccess')
103
+ ng_xml.root.add_child(new_doc.root.clone)
104
+ self.dirty = true
105
+ end
106
+
107
+ end
@@ -0,0 +1,58 @@
1
+ require 'active_fedora'
2
+
3
+ class EventsDS < ActiveFedora::NokogiriDatastream
4
+
5
+ set_terminology do |t|
6
+ t.root(:path => "events", :xmlns => '', :namespace_prefix => nil)
7
+ t.event(:namespace_prefix => nil)
8
+ end
9
+
10
+ # Default EventsDS xml
11
+ def self.xml_template
12
+ builder = Nokogiri::XML::Builder.new do |xml|
13
+ xml.events
14
+ end
15
+ return builder.doc
16
+ end
17
+
18
+ def initialize(attrs=nil)
19
+ super
20
+ @attributes[:versionable] = false
21
+ end
22
+
23
+ # Adds an event to the datastream
24
+ # @param [String] type a tag used to group events together. Sets the type attribute for the event
25
+ # @param [String] who who is responsible for this event. Sets the who attribute for the event
26
+ # @param [String] message what happened. Sets the content of the event with this message
27
+ def add_event(type, who, message)
28
+ ev = ng_xml.create_element "event", message,
29
+ :type => type, :who => who, :when => Time.now.xmlschema
30
+ ng_xml.root.add_child(ev)
31
+ self.dirty = true
32
+ end
33
+
34
+ # Finds events with the desired type attribute
35
+ # @param [String] tag events where type == tag will be returned
36
+ # @yield [who, timestamp, message] The values of the current event
37
+ # @yieldparam [String] who thing responsible for creating the event. Value of the 'who' attribute
38
+ # @yieldparam [Time] timestamp when this event was logged. Value of the 'when' attribute
39
+ # @yieldparam [String] message what happened. Content of the event node
40
+ def find_events_by_type(tag, &block)
41
+ find_by_terms(:event).xpath("//event[@type='#{tag}']").each do |node|
42
+ block.call(node['who'], Time.parse(node['when']), node.content)
43
+ end
44
+ end
45
+
46
+ # Returns all the events in the datastream
47
+ # @yield [type, who, timestamp, message] The values of the current event
48
+ # @yieldparam [String] type tag for this particular event. Value of the 'type' attribute
49
+ # @yieldparam [String] who thing responsible for creating the event. Value of the 'who' attribute
50
+ # @yieldparam [Time] timestamp when this event was logged. Value of the 'when' attribute
51
+ # @yieldparam [String] message what happened. Content of the event node
52
+ def each_event(&block)
53
+ find_by_terms(:event).each do |node|
54
+ block.call(node['type'], node['who'], Time.parse(node['when']), node.content)
55
+ end
56
+ end
57
+
58
+ end
@@ -0,0 +1,28 @@
1
+ class IdentityMetadataDS < ActiveFedora::NokogiriDatastream
2
+
3
+ set_terminology do |t|
4
+ t.root(:path=>"identityMetadata", :xmlns => '')
5
+ t.objectId(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
6
+ t.objectType(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
7
+ t.objectLabel(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
8
+ t.citationCreator(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
9
+ t.sourceId(:index_as=>[:searchable, :displayable, :facetable, :sortable], :attributes=>{:type=>"source"}, :required=>:true, :type=>:string, :namespace_prefix => nil )
10
+ t.otherId(:index_as=>[:searchable, :displayable, :facetable, :sortable], :attributes=>{:type=>"name"}, :required=>:true, :type=>:string, :namespace_prefix => nil )
11
+ t.agreementId(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
12
+ t.tag(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
13
+ t.citationTitle(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
14
+ t.objectCreator(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
15
+ t.adminPolicy(:index_as=>[:searchable, :displayable, :facetable, :sortable], :required=>:true, :type=>:string, :namespace_prefix => nil )
16
+ end
17
+
18
+ def self.xml_template
19
+ builder = Nokogiri::XML::Builder.new do |xml|
20
+ xml.identityMetadata {
21
+ xml.citationTitle
22
+ xml.objectCreator
23
+ }
24
+ end
25
+ return builder.doc
26
+ end #self.xml_template
27
+
28
+ end #class
@@ -0,0 +1,19 @@
1
+ class Nokogiri::XML::Text
2
+
3
+ def normalize
4
+ self.content =~ /\S/ ? self.content.gsub(/\s+/,' ').strip : self.content
5
+ end
6
+
7
+ def normalize!
8
+ self.content = self.normalize
9
+ end
10
+
11
+ end
12
+
13
+ class Nokogiri::XML::Node
14
+
15
+ def normalize_text!
16
+ self.xpath('//text()').each { |t| t.normalize! }
17
+ end
18
+
19
+ end
@@ -0,0 +1,23 @@
1
+ class SimpleDublinCoreDs < ActiveFedora::NokogiriDatastream
2
+
3
+ set_terminology do |t|
4
+ t.root(:path=>"dc", :xmlns=>"http://www.openarchives.org/OAI/2.0/oai_dc/", :schema=>"http://cosimo.stanford.edu/standards/oai_dc/v2/oai_dc.xsd", :namespace_prefix => 'oai_dc')
5
+ t.title(:index_as=>[:searchable, :displayable, :facetable, :sortable], :xmlns => "http://purl.org/dc/elements/1.1/", :namespace_prefix => 'dc')
6
+ t.creator(:index_as=>[:searchable, :displayable, :facetable, :sortable], :xmlns => "http://purl.org/dc/elements/1.1/", :namespace_prefix => 'dc')
7
+ t.identifier(:index_as=>[:searchable, :displayable, :sortable], :xmlns => "http://purl.org/dc/elements/1.1/", :namespace_prefix => 'dc')
8
+ end
9
+
10
+ def self.xml_template
11
+ builder = Nokogiri::XML::Builder.new do |xml|
12
+ xml.dc(:xmlns=>"http://www.openarchives.org/OAI/2.0/oai_dc/",
13
+ 'xmlns:dc'=>'http://purl.org/dc/elements/1.1/') {
14
+ xml['dc'].title
15
+ xml['dc'].creator
16
+ xml['dc'].identifier
17
+ }
18
+ end
19
+
20
+ return builder.doc
21
+ end
22
+
23
+ end
@@ -0,0 +1,105 @@
1
+ class WorkflowProcess
2
+
3
+ def initialize(workflow, node)
4
+ @workflow = workflow
5
+ @node = node
6
+ end
7
+
8
+ def name
9
+ @node['name']
10
+ end
11
+
12
+ def sequence
13
+ @node['sequence']
14
+ end
15
+
16
+ def lifecycle
17
+ @node['lifecycle']
18
+ end
19
+
20
+ def label
21
+ @node.at_xpath('label/text()').to_s
22
+ end
23
+
24
+ def prerequisites
25
+ @node.xpath('prereq').collect do |p|
26
+ if (p['repository'].nil? and p['workflow'].nil?) or (p['repository'] == workflow.repository and p['workflow'] == workflow.name)
27
+ p.text.to_s
28
+ else
29
+ [(p['repository'] or workflow.repository),(p['workflow'] or workflow.name),p.text.to_s].join(':')
30
+ end
31
+ end
32
+ end
33
+
34
+ end
35
+
36
+ class WorkflowDefinitionDs < ActiveFedora::NokogiriDatastream
37
+
38
+ define_template :process do |builder,workflow,name,seq,label,lifecycle,prereqs|
39
+ attrs = {:name => name}
40
+ attrs[:sequence] = seq unless seq.nil?
41
+ attrs[:lifecycle] = lifecycle unless lifecycle.nil?
42
+ builder.process(attrs) do |node|
43
+ prereqs.each do |prereq|
44
+ (repo,wf,prereq_name) = prereq.split(/:/)
45
+ if prereq_name.nil?
46
+ prereq_name = repo
47
+ repo = nil
48
+ end
49
+ if (repo == workflow.repository and wf = workflow.name)
50
+ repo = nil
51
+ wf = nil
52
+ end
53
+ attrs = (repo.nil? and wf.nil?) ? {} : { :repository => repo, :workflow => wf }
54
+ node.prereq(attrs) { node.text prereq_name }
55
+ end
56
+ end
57
+ end
58
+
59
+ def add_process(name, seq, label, lifecycle, prereqs)
60
+ add_child_node(ng_xml.at_xpath('/workflow'), :process, self, name, seq, label, lifecycle, prereqs)
61
+ end
62
+
63
+ def processes
64
+ ng_xml.xpath('/workflow/process').collect do |node|
65
+ WorkflowProcess.new(self, node)
66
+ end
67
+ end
68
+
69
+ def name
70
+ ng_xml.at_xpath('/workflow/@id').to_s
71
+ end
72
+
73
+ def repository
74
+ ng_xml.at_xpath('/workflow/@repository').to_s
75
+ end
76
+
77
+ def configuration
78
+ result = {
79
+ 'repository' => repository,
80
+ 'name' => name
81
+ }
82
+ processes.each_pair do |process_name,process|
83
+ result[process_name] = {
84
+ 'prerequisites' => process.prerequisites.collect { |p| p.name }
85
+ }
86
+ end
87
+ end
88
+
89
+ def configuration=(hash)
90
+ ng_xml = Nokogiri::XML(%{<workflow id="#{hash['name']}" repository="#{hash['repository']}"/>})
91
+ i = 0
92
+ hash.each_pair do |k,v|
93
+ if v.is_a?(Hash)
94
+ add_process(k,i+=1,nil,nil,v['prerequisite'])
95
+ end
96
+ end
97
+ end
98
+
99
+ def to_yaml
100
+ s = StringIO.new('')
101
+ YAML.dump(self.configuration, s)
102
+ s.string
103
+ end
104
+
105
+ end
@@ -0,0 +1,16 @@
1
+ class WorkflowDs < ActiveFedora::NokogiriDatastream
2
+
3
+ set_terminology do |t|
4
+ t.root(:path=>"workflow", :xmlns => '', :namespace_prefix => nil)
5
+ t.workflowId(:path=>{:attribute => "id"}, :index_as => [:displayable, :facetable])
6
+ t.process(:path=>'process', :namespace_prefix => nil) {
7
+ t._name(:path=>{:attribute=>"name"}, :index_as => [:displayable, :facetable, :sortable])
8
+ t.status(:path=>{:attribute=>"status"}, :index_as => [:displayable, :facetable, :sortable])
9
+ t.timestamp(:path=>{:attribute=>"datetime"}, :index_as => [:searchable, :sortable])
10
+ t.elapsed(:path=>{:attribute=>"elapsed"})
11
+ t.lifecycle(:path=>{:attribute=>"lifecycle"}, :index_as => [:displayable, :facetable, :sortable])
12
+ t.attempts(:path=>{:attribute=>"attempts"})
13
+ }
14
+ end
15
+
16
+ end
@@ -0,0 +1,19 @@
1
+ require 'dor/config'
2
+ require 'dor/exceptions'
3
+
4
+ # ActiveFedora Classes
5
+ require 'dor/base'
6
+ require 'dor/item'
7
+ require 'dor/admin_policy_object'
8
+ require 'dor/workflow_object'
9
+
10
+ # Services
11
+ require 'dor/metadata_service'
12
+ require 'dor/registration_service'
13
+ require 'dor/suri_service'
14
+ require 'dor/workflow_service'
15
+ require 'dor/digital_stacks_service'
16
+ require 'dor/druid_utils'
17
+ require 'dor/sdr_ingest_service'
18
+ require 'dor/cleanup_service'
19
+ require 'dor/provenance_metadata_service'
@@ -0,0 +1,11 @@
1
+ module Dor
2
+
3
+ class AdminPolicyObject < Base
4
+
5
+ has_metadata :name => "administrativeMetadata", :type => ActiveFedora::NokogiriDatastream
6
+ has_metadata :name => "roleMetadata", :type => ActiveFedora::NokogiriDatastream
7
+ has_metadata :name => "defaultObjectRights", :type => ActiveFedora::NokogiriDatastream
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,81 @@
1
+ require 'active_fedora'
2
+ require 'datastreams/identity_metadata_ds'
3
+ require 'datastreams/simple_dublin_core_ds'
4
+ require 'datastreams/workflow_ds'
5
+ require 'dor/suri_service'
6
+
7
+ module Dor
8
+
9
+ class Base < ::ActiveFedora::Base
10
+
11
+ attr_reader :workflows
12
+
13
+ has_metadata :name => "DC", :type => SimpleDublinCoreDs
14
+ has_metadata :name => "RELS-EXT", :type => ActiveFedora::RelsExtDatastream
15
+ has_metadata :name => "identityMetadata", :type => IdentityMetadataDS
16
+
17
+ # Make a random (and harmless) API-M call to get gsearch to reindex the object
18
+ def self.touch(*pids)
19
+ client = Dor::Config.fedora.client
20
+ pids.collect { |pid|
21
+ response = client["objects/#{pid}/datastreams/DC?dsState=A&ignoreContent=true"].put('', :content_type => 'text/xml')
22
+ response.code
23
+ }
24
+ end
25
+
26
+ def self.get_foxml(pid, interpolate_refs = false)
27
+ foxml = Nokogiri::XML(Dor::Config.fedora.client["objects/#{pid}/objectXML"].get)
28
+ if interpolate_refs
29
+ external_refs = foxml.xpath('//foxml:contentLocation[contains(@REF,"/workflows/")]')
30
+ external_refs.each do |ref|
31
+ begin
32
+ external_doc = Nokogiri::XML(RestClient.get(ref['REF']))
33
+ external_root = external_doc.root
34
+ ref.replace('<foxml:xmlContent/>').first.add_child(external_doc.root)
35
+ external_root.traverse { |node| node.namespace = nil }
36
+ rescue
37
+ ref.remove
38
+ end
39
+ end
40
+ end
41
+ return foxml
42
+ end
43
+
44
+ def initialize(attrs = {})
45
+ unless attrs[:pid]
46
+ attrs = attrs.merge!({:pid=>Dor::SuriService.mint_id})
47
+ @new_object=true
48
+ else
49
+ @new_object = attrs[:new_object] == false ? false : true
50
+ end
51
+ @inner_object = Fedora::FedoraObject.new(attrs)
52
+ @datastreams = {}
53
+ @workflows = {}
54
+ configure_defined_datastreams
55
+ end
56
+
57
+ def identity_metadata
58
+ if self.datastreams.has_key?('identityMetadata')
59
+ IdentityMetadata.from_xml(self.datastreams['identityMetadata'].content)
60
+ else
61
+ nil
62
+ end
63
+ end
64
+
65
+ # Self-aware datastream builders
66
+ def build_datastream(datastream, force = false)
67
+ ds = datastreams[datastream]
68
+ if force or ds.new_object? or (ds.content.to_s.empty?)
69
+ proc = "build_#{datastream}_datastream".to_sym
70
+ content = self.send(proc, ds)
71
+ ds.save
72
+ end
73
+ return ds
74
+ end
75
+
76
+ def reindex
77
+ Dor::SearchService.reindex(self.pid)
78
+ end
79
+
80
+ end
81
+ end