dor-services 2.2.4 → 4.4.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. checksums.yaml +15 -0
  2. data/bin/dor-indexer +108 -0
  3. data/bin/dor-indexerd +73 -0
  4. data/bin/nokogiri +19 -0
  5. data/bin/rake +19 -0
  6. data/bin/ruby_noexec_wrapper +14 -0
  7. data/bin/solrizer +19 -0
  8. data/bin/solrizerd +19 -0
  9. data/config/certs/README +1 -0
  10. data/config/config_defaults.yml +62 -0
  11. data/config/dev_console_env.rb.example +67 -0
  12. data/config/predicate_mappings.yml +55 -0
  13. data/lib/dor-services.rb +152 -19
  14. data/lib/dor/config.rb +133 -35
  15. data/lib/dor/datastreams/administrative_metadata_ds.rb +84 -0
  16. data/lib/dor/datastreams/content_metadata_ds.rb +337 -0
  17. data/lib/dor/datastreams/datastream_spec_solrizer.rb +18 -0
  18. data/lib/dor/datastreams/default_object_rights_ds.rb +52 -0
  19. data/lib/dor/datastreams/desc_metadata_ds.rb +39 -0
  20. data/lib/{datastreams → dor/datastreams}/embargo_metadata_ds.rb +25 -20
  21. data/lib/{datastreams → dor/datastreams}/events_ds.rb +14 -9
  22. data/lib/dor/datastreams/identity.xsl +8 -0
  23. data/lib/dor/datastreams/identity_metadata_ds.rb +112 -0
  24. data/lib/dor/datastreams/role_metadata_ds.rb +51 -0
  25. data/lib/dor/datastreams/simple_dublin_core_ds.rb +45 -0
  26. data/lib/dor/datastreams/version_metadata_ds.rb +214 -0
  27. data/lib/dor/datastreams/workflow_definition_ds.rb +113 -0
  28. data/lib/dor/datastreams/workflow_ds.rb +103 -0
  29. data/lib/dor/exceptions.rb +0 -1
  30. data/lib/dor/migrations/content_metadata_ds/change_content_type.rb +7 -0
  31. data/lib/dor/migrations/identifiable/assert_adminPolicy.rb +9 -0
  32. data/lib/dor/migrations/identifiable/fix_model_assertions.rb +13 -0
  33. data/lib/dor/migrations/identifiable/record_remediation.rb +18 -0
  34. data/lib/dor/migrations/identifiable/uriify_augmented_contentlocation_refs.rb +18 -0
  35. data/lib/dor/migrations/identifiable/uriify_contentlocation_refs.rb +18 -0
  36. data/lib/dor/migrations/processable/unify_workflows.rb +17 -0
  37. data/lib/dor/migrations/versionable/add_missing_version_md.rb +9 -0
  38. data/lib/dor/models/admin_policy_object.rb +16 -0
  39. data/lib/dor/models/assembleable.rb +14 -0
  40. data/lib/dor/models/collection.rb +14 -0
  41. data/lib/dor/models/contentable.rb +227 -0
  42. data/lib/dor/models/describable.rb +194 -0
  43. data/lib/dor/models/discoverable.rb +66 -0
  44. data/lib/dor/models/editable.rb +267 -0
  45. data/lib/dor/models/embargoable.rb +97 -0
  46. data/lib/dor/models/eventable.rb +12 -0
  47. data/lib/dor/models/governable.rb +162 -0
  48. data/lib/dor/models/identifiable.rb +211 -0
  49. data/lib/dor/models/item.rb +44 -0
  50. data/lib/dor/models/itemizable.rb +66 -0
  51. data/lib/dor/{mods2dc.xslt → models/mods2dc.xslt} +39 -12
  52. data/lib/dor/models/preservable.rb +50 -0
  53. data/lib/dor/models/processable.rb +229 -0
  54. data/lib/dor/models/publishable.rb +74 -0
  55. data/lib/dor/models/set.rb +12 -0
  56. data/lib/dor/models/shelvable.rb +27 -0
  57. data/lib/dor/models/upgradable.rb +74 -0
  58. data/lib/dor/models/versionable.rb +94 -0
  59. data/lib/dor/models/workflow_object.rb +54 -0
  60. data/lib/dor/services/cleanup_service.rb +47 -0
  61. data/lib/dor/services/digital_stacks_service.rb +55 -0
  62. data/lib/dor/services/merge_service.rb +96 -0
  63. data/lib/dor/{metadata_handlers → services/metadata_handlers}/catalog_handler.rb +0 -2
  64. data/lib/dor/{metadata_handlers → services/metadata_handlers}/mdtoolkit_handler.rb +0 -2
  65. data/lib/dor/{metadata_service.rb → services/metadata_service.rb} +1 -3
  66. data/lib/dor/services/registration_service.rb +181 -0
  67. data/lib/dor/services/sdr_ingest_service.rb +181 -0
  68. data/lib/dor/services/search_service.rb +131 -0
  69. data/lib/dor/services/suri_service.rb +32 -0
  70. data/lib/dor/services/technical_metadata_service.rb +226 -0
  71. data/lib/dor/{tei2dc.xslt → services/tei2dc.xslt} +0 -0
  72. data/lib/dor/utils/ng_tidy.rb +37 -0
  73. data/lib/dor/utils/predicate_patch.rb +23 -0
  74. data/lib/dor/utils/solr_doc_helper.rb +9 -0
  75. data/lib/dor/utils/utc_date_field_mapper.rb +7 -0
  76. data/lib/dor/version.rb +3 -0
  77. data/lib/dor/workflow/document.rb +131 -0
  78. data/lib/dor/workflow/graph.rb +166 -0
  79. data/lib/dor/workflow/process.rb +99 -0
  80. data/lib/gsearch/demoFoxmlToSolr.xslt +340 -122
  81. data/lib/tasks/dor.rake +39 -0
  82. metadata +494 -384
  83. data/lib/datastreams/content_metadata_ds.rb +0 -12
  84. data/lib/datastreams/identity_metadata_ds.rb +0 -28
  85. data/lib/datastreams/ng_tidy.rb +0 -19
  86. data/lib/datastreams/simple_dublin_core_ds.rb +0 -23
  87. data/lib/datastreams/workflow_definition_ds.rb +0 -105
  88. data/lib/datastreams/workflow_ds.rb +0 -16
  89. data/lib/dor/admin_policy_object.rb +0 -11
  90. data/lib/dor/base.rb +0 -81
  91. data/lib/dor/cleanup_service.rb +0 -32
  92. data/lib/dor/digital_stacks_service.rb +0 -82
  93. data/lib/dor/druid_utils.rb +0 -41
  94. data/lib/dor/embargo.rb +0 -41
  95. data/lib/dor/item.rb +0 -141
  96. data/lib/dor/provenance_metadata_service.rb +0 -65
  97. data/lib/dor/registration_service.rb +0 -87
  98. data/lib/dor/rsolr.rb +0 -27
  99. data/lib/dor/sdr_ingest_service.rb +0 -117
  100. data/lib/dor/search_service.rb +0 -86
  101. data/lib/dor/suri_service.rb +0 -37
  102. data/lib/dor/workflow_object.rb +0 -13
  103. data/lib/dor/workflow_service.rb +0 -111
  104. data/lib/xml_models/foxml.rb +0 -261
  105. data/lib/xml_models/identity_metadata/dublin_core.rb +0 -119
  106. data/lib/xml_models/identity_metadata/identity_metadata.rb +0 -288
@@ -0,0 +1,214 @@
1
+ module Dor
2
+
3
+ class VersionTag
4
+ include Comparable
5
+
6
+ attr_reader :major, :minor, :admin
7
+
8
+ def <=>(anOther)
9
+ diff = @major <=> anOther.major
10
+ return diff if diff != 0
11
+ diff = @minor <=> anOther.minor
12
+ return diff if diff != 0
13
+ @admin <=> anOther.admin
14
+ end
15
+
16
+ # @param [String] raw_tag the value of the tag attribute from a Version node
17
+ def self.parse(raw_tag)
18
+ unless(raw_tag =~ /(\d+)\.(\d+)\.(\d+)/)
19
+ return nil
20
+ end
21
+ VersionTag.new $1, $2, $3
22
+ end
23
+
24
+ def initialize(maj, min, adm)
25
+ @major = maj.to_i
26
+ @minor = min.to_i
27
+ @admin = adm.to_i
28
+ end
29
+
30
+ # @param [Symbol] sig which part of the version tag to increment
31
+ # :major, :minor, :admin
32
+ def increment(sig)
33
+ case sig
34
+ when :major
35
+ @major += 1
36
+ @minor = 0
37
+ @admin = 0
38
+ when :minor
39
+ @minor += 1
40
+ @admin = 0
41
+ when :admin
42
+ @admin += 1
43
+ end
44
+ self
45
+ end
46
+
47
+ def to_s
48
+ "#{@major.to_s}.#{@minor.to_s}.#{admin.to_s}"
49
+ end
50
+ end
51
+
52
+ class VersionMetadataDS < ActiveFedora::OmDatastream
53
+ before_create :ensure_non_versionable
54
+
55
+ set_terminology do |t|
56
+ t.root(:path => "versionMetadata")
57
+ t.version do
58
+ t.version_id :path => { :attribute => "versionID" }
59
+ t.tag :path => { :attribute => "tag" }
60
+ t.description
61
+ end
62
+ end
63
+
64
+ # Default EventsDS xml
65
+ def self.xml_template
66
+ builder = Nokogiri::XML::Builder.new do |xml|
67
+ xml.versionMetadata {
68
+ xml.version(:versionId => '1', :tag => '1.0.0') {
69
+ xml.description 'Initial Version'
70
+ }
71
+ }
72
+ end
73
+ return builder.doc
74
+ end
75
+
76
+ def ensure_non_versionable
77
+ self.versionable = "false"
78
+ end
79
+
80
+ # @param [String] description optional text describing version change
81
+ # @param [Symbol] :significance optional which part of the version tag to increment
82
+ # :major, :minor, :admin (see VersionTag#increment)
83
+ def increment_version(description = nil, significance = nil)
84
+ if( find_by_terms(:version).size == 0)
85
+ v = ng_xml.create_element "version",
86
+ :versionId => '1', :tag => '1.0.0'
87
+ d = ng_xml.create_element "description", "Initial Version"
88
+ ng_xml.root['objectId'] = pid
89
+ ng_xml.root.add_child(v)
90
+ v.add_child d
91
+ else
92
+ current = current_version_node
93
+ current_id = current[:versionId].to_i
94
+ current_tag = VersionTag.parse(current[:tag])
95
+
96
+ v = ng_xml.create_element "version", :versionId => (current_id + 1).to_s
97
+ if(significance && current_tag)
98
+ v[:tag] = current_tag.increment(significance).to_s
99
+ end
100
+ ng_xml.root['objectId'] = pid
101
+ ng_xml.root.add_child(v)
102
+
103
+ if(description)
104
+ d = ng_xml.create_element "description", description
105
+ v.add_child d
106
+ end
107
+ end
108
+ end
109
+
110
+ # @returns [Fixnum] value of the most current versionId
111
+ def current_version_id
112
+ current_version=current_version_node
113
+ if current_version.nil?
114
+ return '1'
115
+ else
116
+ current_version[:versionId].to_s
117
+ end
118
+ end
119
+
120
+ # @param [Hash] opts optional params
121
+ # @option opts [String] :description describes the version change
122
+ # @option opts [Symbol] :significance which part of the version tag to increment
123
+ # :major, :minor, :admin (see VersionTag#increment)
124
+ def update_current_version(opts = {})
125
+ ng_xml.root['objectId'] = pid
126
+ return if find_by_terms(:version).size == 1
127
+ return if opts.empty?
128
+ current = current_version_node
129
+ if(opts.include? :description)
130
+ d = current.at_xpath('description')
131
+ if(d)
132
+ d.content = opts[:description]
133
+ else
134
+ d_node = ng_xml.create_element "description", opts[:description]
135
+ current.add_child d_node
136
+ end
137
+ end
138
+ if(opts.include? :significance)
139
+ # tricky because if there is no tag, we have to find the newest
140
+ if(current[:tag].nil?)
141
+ current[:tag] = newest_tag.increment(opts[:significance]).to_s
142
+ else
143
+ # get rid of the current tag
144
+ tags = find_by_terms(:version, :tag)
145
+ sorted_tags = tags.map{|t| VersionTag.parse(t.value)}.sort
146
+ current_tag = sorted_tags[sorted_tags.length - 2] # Get the second greatest tag since we are dropping the current, greatest
147
+ current[:tag] = current_tag.increment(opts[:significance]).to_s
148
+ end
149
+
150
+ end
151
+ self.content = ng_xml.to_s
152
+ end
153
+
154
+ # @return [String] The value of the greatest versionId
155
+ def current_version_id
156
+ current_version_node[:versionId].to_s
157
+ end
158
+
159
+ # @return [Boolean] returns true if the current version has a tag and a description, false otherwise
160
+ def current_version_closeable?
161
+ current = current_version_node
162
+ if(current[:tag] && current.at_xpath('description'))
163
+ return true
164
+ else
165
+ return false
166
+ end
167
+ end
168
+
169
+ # @return [String] The tag for the newest version
170
+ def current_tag
171
+ current_version_node[:tag].to_s
172
+ end
173
+
174
+ def tag_for_version(versionId)
175
+ nodes=self.ng_xml.search('//version[@versionId=\''+versionId+'\']')
176
+ if nodes.length == 1
177
+ nodes.first['tag'].to_s
178
+ else
179
+ ''
180
+ end
181
+ end
182
+ # @return [String] The description for the specified version, or empty string if there is no description
183
+ def description_for_version(versionId)
184
+ nodes=self.ng_xml.search('//version[@versionId=\''+versionId+'\']')
185
+ if nodes.length == 1 and nodes.first.at_xpath('description')
186
+ nodes.first.at_xpath('description').content.to_s
187
+ else
188
+ ''
189
+ end
190
+ end
191
+
192
+ # @return [String] The description for the current version
193
+ def current_description
194
+ desc_node=current_version_node.at_xpath('description')
195
+ if desc_node
196
+ return desc_node.content
197
+ end
198
+ ''
199
+ end
200
+
201
+ private
202
+
203
+ # @return [Nokogiri::XML::Node] Node representing the current version
204
+ def current_version_node
205
+ versions = find_by_terms(:version)
206
+ versions.max_by {|v| v[:versionId].to_i }
207
+ end
208
+
209
+ def newest_tag
210
+ tags = find_by_terms(:version, :tag)
211
+ tags.map{|t| VersionTag.parse(t.value)}.max
212
+ end
213
+ end
214
+ end
@@ -0,0 +1,113 @@
1
+ module Dor
2
+ class WorkflowDefinitionDs < ActiveFedora::OmDatastream
3
+ include SolrDocHelper
4
+
5
+ set_terminology do |t|
6
+ t.root(:path => "workflow-def", :index_as => [:not_searchable])
7
+ t.process(:index_as => [:not_searchable])
8
+ end
9
+
10
+ define_template :process do |builder,workflow,attrs|
11
+ prereqs = attrs.delete('prerequisite')
12
+ if prereqs.is_a?(String)
13
+ prereqs = prereqs.split(/\s*,\s*/)
14
+ end
15
+ attrs.keys.each { |k| attrs[k.to_s.dasherize.to_sym] = attrs.delete(k) }
16
+ builder.process(attrs) do |node|
17
+ Array(prereqs).each do |prereq|
18
+ (repo,wf,prereq_name) = prereq.split(/:/)
19
+ if prereq_name.nil?
20
+ prereq_name = repo
21
+ repo = nil
22
+ end
23
+ if (repo == workflow.repo and wf = workflow.name)
24
+ repo = nil
25
+ wf = nil
26
+ end
27
+ attrs = (repo.nil? and wf.nil?) ? {} : { :repository => repo, :workflow => wf }
28
+ node.prereq(attrs) { node.text prereq_name }
29
+ end
30
+ end
31
+ end
32
+
33
+ def self.xml_template
34
+ Nokogiri::XML('<workflow-def/>')
35
+ end
36
+
37
+ def add_process(attributes)
38
+ add_child_node(ng_xml.at_xpath('/workflow-def'), :process, self, attributes)
39
+ end
40
+
41
+ def graph(parent = nil)
42
+ Workflow::Graph.from_processes(self.repo, self.name, self.processes, parent)
43
+ end
44
+
45
+ def processes
46
+ ng_xml.xpath('/workflow-def/process').collect do |node|
47
+ Workflow::Process.new(self.repo, self.name, node)
48
+ end.sort { |a,b| (a.sequence || 0) <=> (b.sequence || 0) }
49
+ end
50
+
51
+ def name
52
+ ng_xml.at_xpath('/workflow-def/@id').to_s
53
+ end
54
+
55
+ def repo
56
+ ng_xml.at_xpath('/workflow-def/@repository').to_s
57
+ end
58
+
59
+ def configuration
60
+ result = ActiveSupport::OrderedHash.new
61
+ result['repository'] = repo
62
+ result['name'] = name
63
+ processes.each { |process| result[process.name] = process.to_hash }
64
+ result
65
+ end
66
+
67
+ def configuration=(hash)
68
+ self.ng_xml = Nokogiri::XML(%{<workflow-def id="#{hash['name']}" repository="#{hash['repository']}"/>})
69
+ i = 0
70
+ hash.each_pair do |k,v|
71
+ if v.is_a?(Hash)
72
+ add_process(v.merge({:name => k, :sequence => i+=1}))
73
+ end
74
+ end
75
+ end
76
+
77
+ # Creates the xml used by Dor::WorkflowService.create_workflow
78
+ # @return [String] An object's initial workflow as defined by the <workflow-def> in content
79
+ def initial_workflow
80
+ doc = Nokogiri::XML("<workflow/>")
81
+ root = doc.root
82
+ root['id'] = name
83
+ processes.each { |proc|
84
+ doc.create_element 'process' do |node|
85
+ node['name'] = proc.name
86
+ if(proc.status)
87
+ node['status'] = proc.status
88
+ node['attempts'] = '1'
89
+ else
90
+ node['status'] = 'waiting'
91
+ end
92
+ node['lifecycle'] = proc.lifecycle if proc.lifecycle
93
+ root.add_child node
94
+ end
95
+ }
96
+ Nokogiri::XML(doc.to_xml) { |x| x.noblanks }.to_xml { |config| config.no_declaration }
97
+ end
98
+
99
+ def to_solr(solr_doc=Hash.new,*args)
100
+ super(solr_doc,*args)
101
+ add_solr_value(solr_doc, "workflow_name", self.name, :symbol, [:searchable])
102
+ processes.each do |p|
103
+ add_solr_value(solr_doc, "process", "#{p.name}|#{p.label}", :symbol, [:displayable])
104
+ end
105
+ solr_doc
106
+ end
107
+
108
+ def to_yaml
109
+ YAML.dump(self.configuration)
110
+ end
111
+
112
+ end
113
+ end
@@ -0,0 +1,103 @@
1
+ module Dor
2
+ class WorkflowDs < ActiveFedora::OmDatastream
3
+ include SolrDocHelper
4
+
5
+ set_terminology do |t|
6
+ t.root(:path=>"workflows")
7
+ t.workflow {
8
+ t.workflowId(:path=>{:attribute => "id"})
9
+ t.process {
10
+ t.name_(:path=>{:attribute=>"name"}, :index_as => [:displayable, :not_searchable])
11
+ t.status(:path=>{:attribute=>"status"}, :index_as => [:displayable, :not_searchable])
12
+ t.timestamp(:path=>{:attribute=>"datetime"}, :index_as => [:displayable, :not_searchable])#, :data_type => :date)
13
+ t.elapsed(:path=>{:attribute=>"elapsed"}, :index_as => [:displayable, :not_searchable])
14
+ t.lifecycle(:path=>{:attribute=>"lifecycle"}, :index_as => [:displayable, :not_searchable])
15
+ t.attempts(:path=>{:attribute=>"attempts"}, :index_as => [:displayable, :not_searchable])
16
+ }
17
+ }
18
+ end
19
+
20
+ def initialize *args
21
+ self.field_mapper = UtcDateFieldMapper.new
22
+ super
23
+ end
24
+
25
+ def get_workflow (wf,repo='dor')
26
+ xml=Dor::WorkflowService.get_workflow_xml(repo, self.pid, wf)
27
+ xml=Nokogiri::XML(xml)
28
+ if xml.xpath('workflow').length == 0
29
+ nil
30
+ else
31
+ Workflow::Document.new(xml.to_s)
32
+ end
33
+ end
34
+
35
+ def [](wf)
36
+ xml=Dor::WorkflowService.get_workflow_xml('dor', self.pid, wf)
37
+ xml=Nokogiri::XML(xml)
38
+ if xml.xpath('workflow').length == 0
39
+ nil
40
+ else
41
+ Workflow::Document.new(xml.to_s)
42
+ end
43
+ end
44
+
45
+ def ensure_xml_loaded
46
+ ng_xml
47
+ self.xml_loaded = true
48
+ end
49
+
50
+ def ng_xml
51
+ @ng_xml ||= Nokogiri::XML::Document.parse(content)
52
+ end
53
+
54
+ def content
55
+ begin
56
+ @content ||= Dor::WorkflowService.get_workflow_xml 'dor', self.pid, nil
57
+ rescue RestClient::ResourceNotFound
58
+ xml = Nokogiri::XML(%{<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n<workflows objectId="#{self.pid}"/>})
59
+ self.digital_object.datastreams.keys.each do |dsid|
60
+ if dsid =~ /WF$/
61
+ ds_content = Nokogiri::XML(Dor::WorkflowService.get_workflow_xml 'dor', self.pid, dsid)
62
+ xml.root.add_child(ds_content.root)
63
+ end
64
+ end
65
+ @content ||= xml.to_xml
66
+ end
67
+ end
68
+
69
+ def workflows
70
+ @workflows ||= self.workflow.nodeset.collect { |wf_node| Workflow::Document.new wf_node.to_xml }
71
+ end
72
+
73
+ def graph(dir=nil)
74
+ result = GraphViz.digraph(self.pid)
75
+ sg = result.add_graph('rank') { |g| g[:rank => 'same'] }
76
+ workflows.each do |wf|
77
+ wf_name = wf.workflowId.first
78
+ unless wf.nil?
79
+ g = wf.graph(result)
80
+ sg.add_node(g.root.id) unless g.nil?
81
+ end
82
+ end
83
+ result['rankdir'] = dir || 'TB'
84
+ result
85
+ end
86
+
87
+ # Finds the first workflow that is expedited, then returns the value of its priority
88
+ #
89
+ # @return [Integer] value of the priority. Defaults to 0 if none of the workflows are expedited
90
+ def current_priority
91
+ cp = workflows.detect {|wf| wf.expedited? }
92
+ return 0 if(cp.nil?)
93
+ cp.priority.to_i
94
+ end
95
+
96
+ def to_solr(solr_doc=Hash.new, *args)
97
+ # super solr_doc, *args
98
+ self.workflows.each { |wf| wf.to_solr(solr_doc, *args) }
99
+ solr_doc
100
+ end
101
+
102
+ end
103
+ end
@@ -9,5 +9,4 @@ module Dor
9
9
  @pid = pid
10
10
  end
11
11
  end
12
-
13
12
  end
@@ -0,0 +1,7 @@
1
+ Dor::ContentMetadataDS.on_upgrade '3.6.0', 'Change contentMetadata type attribute' do |ds|
2
+ translations = { 'googleScannedBook' => 'book', 'etd' => 'file', 'eem' => 'file' }
3
+ translations.any? do |old_type, new_type|
4
+ current_type = (ds.contentType.to_ary.first) rescue ds.contentType
5
+ (current_type == old_type) && (ds.contentType = new_type)
6
+ end
7
+ end
@@ -0,0 +1,9 @@
1
+ Dor::Identifiable.on_upgrade '3.5.0', 'Assert hydra:isGovernedBy' do |obj|
2
+ # Assign hydra:isGovernedBy based on identityMetadata/adminPolicy
3
+ if obj.admin_policy_object.nil?
4
+ apo_id = obj.identityMetadata.adminPolicy.first
5
+ apo_id.present? && obj.admin_policy_object = Dor.find(apo_id) unless apo_id.nil?
6
+ else
7
+ false
8
+ end
9
+ end