dor-services 2.2.4 → 4.4.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. checksums.yaml +15 -0
  2. data/bin/dor-indexer +108 -0
  3. data/bin/dor-indexerd +73 -0
  4. data/bin/nokogiri +19 -0
  5. data/bin/rake +19 -0
  6. data/bin/ruby_noexec_wrapper +14 -0
  7. data/bin/solrizer +19 -0
  8. data/bin/solrizerd +19 -0
  9. data/config/certs/README +1 -0
  10. data/config/config_defaults.yml +62 -0
  11. data/config/dev_console_env.rb.example +67 -0
  12. data/config/predicate_mappings.yml +55 -0
  13. data/lib/dor-services.rb +152 -19
  14. data/lib/dor/config.rb +133 -35
  15. data/lib/dor/datastreams/administrative_metadata_ds.rb +84 -0
  16. data/lib/dor/datastreams/content_metadata_ds.rb +337 -0
  17. data/lib/dor/datastreams/datastream_spec_solrizer.rb +18 -0
  18. data/lib/dor/datastreams/default_object_rights_ds.rb +52 -0
  19. data/lib/dor/datastreams/desc_metadata_ds.rb +39 -0
  20. data/lib/{datastreams → dor/datastreams}/embargo_metadata_ds.rb +25 -20
  21. data/lib/{datastreams → dor/datastreams}/events_ds.rb +14 -9
  22. data/lib/dor/datastreams/identity.xsl +8 -0
  23. data/lib/dor/datastreams/identity_metadata_ds.rb +112 -0
  24. data/lib/dor/datastreams/role_metadata_ds.rb +51 -0
  25. data/lib/dor/datastreams/simple_dublin_core_ds.rb +45 -0
  26. data/lib/dor/datastreams/version_metadata_ds.rb +214 -0
  27. data/lib/dor/datastreams/workflow_definition_ds.rb +113 -0
  28. data/lib/dor/datastreams/workflow_ds.rb +103 -0
  29. data/lib/dor/exceptions.rb +0 -1
  30. data/lib/dor/migrations/content_metadata_ds/change_content_type.rb +7 -0
  31. data/lib/dor/migrations/identifiable/assert_adminPolicy.rb +9 -0
  32. data/lib/dor/migrations/identifiable/fix_model_assertions.rb +13 -0
  33. data/lib/dor/migrations/identifiable/record_remediation.rb +18 -0
  34. data/lib/dor/migrations/identifiable/uriify_augmented_contentlocation_refs.rb +18 -0
  35. data/lib/dor/migrations/identifiable/uriify_contentlocation_refs.rb +18 -0
  36. data/lib/dor/migrations/processable/unify_workflows.rb +17 -0
  37. data/lib/dor/migrations/versionable/add_missing_version_md.rb +9 -0
  38. data/lib/dor/models/admin_policy_object.rb +16 -0
  39. data/lib/dor/models/assembleable.rb +14 -0
  40. data/lib/dor/models/collection.rb +14 -0
  41. data/lib/dor/models/contentable.rb +227 -0
  42. data/lib/dor/models/describable.rb +194 -0
  43. data/lib/dor/models/discoverable.rb +66 -0
  44. data/lib/dor/models/editable.rb +267 -0
  45. data/lib/dor/models/embargoable.rb +97 -0
  46. data/lib/dor/models/eventable.rb +12 -0
  47. data/lib/dor/models/governable.rb +162 -0
  48. data/lib/dor/models/identifiable.rb +211 -0
  49. data/lib/dor/models/item.rb +44 -0
  50. data/lib/dor/models/itemizable.rb +66 -0
  51. data/lib/dor/{mods2dc.xslt → models/mods2dc.xslt} +39 -12
  52. data/lib/dor/models/preservable.rb +50 -0
  53. data/lib/dor/models/processable.rb +229 -0
  54. data/lib/dor/models/publishable.rb +74 -0
  55. data/lib/dor/models/set.rb +12 -0
  56. data/lib/dor/models/shelvable.rb +27 -0
  57. data/lib/dor/models/upgradable.rb +74 -0
  58. data/lib/dor/models/versionable.rb +94 -0
  59. data/lib/dor/models/workflow_object.rb +54 -0
  60. data/lib/dor/services/cleanup_service.rb +47 -0
  61. data/lib/dor/services/digital_stacks_service.rb +55 -0
  62. data/lib/dor/services/merge_service.rb +96 -0
  63. data/lib/dor/{metadata_handlers → services/metadata_handlers}/catalog_handler.rb +0 -2
  64. data/lib/dor/{metadata_handlers → services/metadata_handlers}/mdtoolkit_handler.rb +0 -2
  65. data/lib/dor/{metadata_service.rb → services/metadata_service.rb} +1 -3
  66. data/lib/dor/services/registration_service.rb +181 -0
  67. data/lib/dor/services/sdr_ingest_service.rb +181 -0
  68. data/lib/dor/services/search_service.rb +131 -0
  69. data/lib/dor/services/suri_service.rb +32 -0
  70. data/lib/dor/services/technical_metadata_service.rb +226 -0
  71. data/lib/dor/{tei2dc.xslt → services/tei2dc.xslt} +0 -0
  72. data/lib/dor/utils/ng_tidy.rb +37 -0
  73. data/lib/dor/utils/predicate_patch.rb +23 -0
  74. data/lib/dor/utils/solr_doc_helper.rb +9 -0
  75. data/lib/dor/utils/utc_date_field_mapper.rb +7 -0
  76. data/lib/dor/version.rb +3 -0
  77. data/lib/dor/workflow/document.rb +131 -0
  78. data/lib/dor/workflow/graph.rb +166 -0
  79. data/lib/dor/workflow/process.rb +99 -0
  80. data/lib/gsearch/demoFoxmlToSolr.xslt +340 -122
  81. data/lib/tasks/dor.rake +39 -0
  82. metadata +494 -384
  83. data/lib/datastreams/content_metadata_ds.rb +0 -12
  84. data/lib/datastreams/identity_metadata_ds.rb +0 -28
  85. data/lib/datastreams/ng_tidy.rb +0 -19
  86. data/lib/datastreams/simple_dublin_core_ds.rb +0 -23
  87. data/lib/datastreams/workflow_definition_ds.rb +0 -105
  88. data/lib/datastreams/workflow_ds.rb +0 -16
  89. data/lib/dor/admin_policy_object.rb +0 -11
  90. data/lib/dor/base.rb +0 -81
  91. data/lib/dor/cleanup_service.rb +0 -32
  92. data/lib/dor/digital_stacks_service.rb +0 -82
  93. data/lib/dor/druid_utils.rb +0 -41
  94. data/lib/dor/embargo.rb +0 -41
  95. data/lib/dor/item.rb +0 -141
  96. data/lib/dor/provenance_metadata_service.rb +0 -65
  97. data/lib/dor/registration_service.rb +0 -87
  98. data/lib/dor/rsolr.rb +0 -27
  99. data/lib/dor/sdr_ingest_service.rb +0 -117
  100. data/lib/dor/search_service.rb +0 -86
  101. data/lib/dor/suri_service.rb +0 -37
  102. data/lib/dor/workflow_object.rb +0 -13
  103. data/lib/dor/workflow_service.rb +0 -111
  104. data/lib/xml_models/foxml.rb +0 -261
  105. data/lib/xml_models/identity_metadata/dublin_core.rb +0 -119
  106. data/lib/xml_models/identity_metadata/identity_metadata.rb +0 -288
@@ -1,19 +1,152 @@
1
- require 'dor/config'
2
- require 'dor/exceptions'
3
-
4
- # ActiveFedora Classes
5
- require 'dor/base'
6
- require 'dor/item'
7
- require 'dor/admin_policy_object'
8
- require 'dor/workflow_object'
9
-
10
- # Services
11
- require 'dor/metadata_service'
12
- require 'dor/registration_service'
13
- require 'dor/suri_service'
14
- require 'dor/workflow_service'
15
- require 'dor/digital_stacks_service'
16
- require 'dor/druid_utils'
17
- require 'dor/sdr_ingest_service'
18
- require 'dor/cleanup_service'
19
- require 'dor/provenance_metadata_service'
1
+ require 'active_fedora'
2
+ require 'active_support/core_ext/module/attribute_accessors'
3
+
4
+ module Dor
5
+ @@registered_classes = {}
6
+ mattr_reader :registered_classes
7
+ INDEX_VERSION_FIELD = 'dor_services_version_facet'
8
+
9
+ class << self
10
+
11
+ def configure *args, &block
12
+ Dor::Config.configure *args, &block
13
+ end
14
+
15
+ # Load an object and inspect its identityMetadata to figure out what class
16
+ # to adapt it to. This is necessary when the object is not indexed, or the
17
+ # index is missing the objectType property.
18
+ # @param [String] pid The object's PID
19
+ def load_instance pid
20
+ ensure_models_loaded!
21
+ obj = Dor::Abstract.find pid
22
+ return nil if obj.new_object?
23
+ object_type = obj.identityMetadata.objectType.first
24
+ object_class = registered_classes[object_type] || Dor::Item
25
+ obj.adapt_to(object_class)
26
+ end
27
+
28
+ # Get objectType information from solr and load the correct class the first time,
29
+ # saving the overhead of using ActiveFedora::Base#adapt_to. It falls back to
30
+ # Dor.load_instance() if the item is not in the index, or is improperly
31
+ # indexed.
32
+ # @param [String] pid The object's PID
33
+ def find pid, opts={}
34
+ self.find_all(%{id:"#{pid}"}, opts).first || self.load_instance(pid)
35
+ end
36
+
37
+ def find_all query, opts={}
38
+ ensure_models_loaded!
39
+ af_version = Gem::Version.new(ActiveFedora::VERSION)
40
+ if opts[:lightweight] and af_version < Gem::Version.new('4.0.0.rc9')
41
+ ActiveFedora.logger.warn("Loading of lightweight objects requires ActiveFedora >= 4.0.0")
42
+ opts.delete(:lightweight)
43
+ end
44
+
45
+ resp = SearchService.query query, opts
46
+ resp.docs.collect do |solr_doc|
47
+ doc_version = solr_doc[INDEX_VERSION_FIELD].first rescue '0.0.0'
48
+ doc_version = Gem::Version.new(doc_version)
49
+ object_type = Array(solr_doc[ActiveFedora::SolrService.solr_name('objectType',:string)]).first
50
+ object_class = registered_classes[object_type] || ActiveFedora::Base
51
+ if opts[:lightweight] and doc_version >= Gem::Version.new('3.1.0')
52
+ begin
53
+ object_class.load_instance_from_solr solr_doc['id'], solr_doc
54
+ rescue Exception => e
55
+ ActiveFedora.logger.warn("Exception: '#{e.message}' trying to load #{solr_doc['id']} from solr. Loading from Fedora")
56
+ load_instance(solr_doc['id'])
57
+ end
58
+ else
59
+ load_instance solr_doc['id']
60
+ end
61
+ end
62
+ end
63
+
64
+ def ensure_models_loaded!
65
+ [Item, Set, Collection, AdminPolicyObject, WorkflowObject]
66
+ end
67
+
68
+ def root
69
+ File.dirname(__FILE__)
70
+ end
71
+ end
72
+
73
+ require 'dor/version'
74
+ require 'dor/config'
75
+ require 'dor/exceptions'
76
+
77
+ # patches, utilities and helpers
78
+ require 'dor/utils/ng_tidy'
79
+ require 'dor/utils/solr_doc_helper'
80
+ require 'dor/utils/utc_date_field_mapper'
81
+ require 'dor/utils/predicate_patch'
82
+
83
+ require 'dor/datastreams/datastream_spec_solrizer'
84
+
85
+ require 'druid-tools'
86
+
87
+ # datastreams
88
+ autoload :AdministrativeMetadataDS, 'dor/datastreams/administrative_metadata_ds'
89
+ autoload :ContentMetadataDS, 'dor/datastreams/content_metadata_ds'
90
+ autoload :DescMetadataDS, 'dor/datastreams/desc_metadata_ds'
91
+ autoload :EmbargoMetadataDS, 'dor/datastreams/embargo_metadata_ds'
92
+ autoload :EventsDS, 'dor/datastreams/events_ds'
93
+ autoload :IdentityMetadataDS, 'dor/datastreams/identity_metadata_ds'
94
+ autoload :RoleMetadataDS, 'dor/datastreams/role_metadata_ds'
95
+ autoload :WorkflowDefinitionDs, 'dor/datastreams/workflow_definition_ds'
96
+ autoload :WorkflowDs, 'dor/datastreams/workflow_ds'
97
+ autoload :VersionMetadataDS, 'dor/datastreams/version_metadata_ds'
98
+ autoload :DefaultObjectRightsDS, 'dor/datastreams/default_object_rights_ds'
99
+ ::Object.autoload :SimpleDublinCoreDs, 'dor/datastreams/simple_dublin_core_ds'
100
+
101
+ # DOR Concerns
102
+ autoload :Identifiable, 'dor/models/identifiable'
103
+ autoload :Itemizable, 'dor/models/itemizable'
104
+ autoload :Processable, 'dor/models/processable'
105
+ autoload :Governable, 'dor/models/governable'
106
+ autoload :Describable, 'dor/models/describable'
107
+ autoload :Publishable, 'dor/models/publishable'
108
+ autoload :Shelvable, 'dor/models/shelvable'
109
+ autoload :Embargoable, 'dor/models/embargoable'
110
+ autoload :Preservable, 'dor/models/preservable'
111
+ autoload :Assembleable, 'dor/models/assembleable'
112
+ autoload :Upgradable, 'dor/models/upgradable'
113
+ autoload :Eventable, 'dor/models/eventable'
114
+ autoload :Versionable, 'dor/models/versionable'
115
+ autoload :Contentable, 'dor/models/contentable'
116
+ autoload :Editable, 'dor/models/editable'
117
+ autoload :Discoverable, 'dor/models/discoverable'
118
+
119
+
120
+ # ActiveFedora Classes
121
+ autoload :Abstract, 'dor/models/item'
122
+ autoload :Item, 'dor/models/item'
123
+ autoload :Set, 'dor/models/set'
124
+ autoload :Collection, 'dor/models/collection'
125
+ autoload :AdminPolicyObject, 'dor/models/admin_policy_object'
126
+ autoload :WorkflowObject, 'dor/models/workflow_object'
127
+
128
+ # Services
129
+ autoload :SearchService, 'dor/services/search_service'
130
+ autoload :MetadataService, 'dor/services/metadata_service'
131
+ autoload :RegistrationService, 'dor/services/registration_service'
132
+ autoload :SuriService, 'dor/services/suri_service'
133
+ autoload :WorkflowService, 'dor/services/workflow_service'
134
+ autoload :DigitalStacksService, 'dor/services/digital_stacks_service'
135
+ autoload :SdrIngestService, 'dor/services/sdr_ingest_service'
136
+ autoload :CleanupService, 'dor/services/cleanup_service'
137
+ autoload :ProvenanceMetadataService, 'dor/services/provenance_metadata_service'
138
+ autoload :TechnicalMetadataService, 'dor/services/technical_metadata_service'
139
+ autoload :MergeService, 'dor/services/merge_service'
140
+
141
+ # Versioning Classes
142
+ module Versioning
143
+ autoload :FileInventoryDifference, 'dor/versioning/file_inventory_difference'
144
+ end
145
+
146
+ # Workflow Classes
147
+ module Workflow
148
+ autoload :Graph, 'dor/workflow/graph'
149
+ autoload :Process, 'dor/workflow/process'
150
+ autoload :Document, 'dor/workflow/document'
151
+ end
152
+ end
@@ -1,45 +1,143 @@
1
- require 'mod_cons'
1
+ require 'confstruct/configuration'
2
+ require 'rsolr-ext'
3
+ require 'stomp'
4
+ require 'yaml'
2
5
 
3
6
  module Dor
4
- Config = ModCons::Configuration.new(:'Dor::Config')
5
-
6
- Config.declare do
7
- fedora do
8
- url nil
9
- safeurl nil
10
- cert_file nil
11
- key_file nil
12
- key_pass ''
13
-
14
- instance_eval do
15
- def client
16
- RestClient::Resource.new(
17
- self.url,
18
- :ssl_client_cert => OpenSSL::X509::Certificate.new(File.read(self.cert_file)),
19
- :ssl_client_key => OpenSSL::PKey::RSA.new(File.read(self.key_file), self.key_pass)
20
- )
7
+ class Configuration < Confstruct::Configuration
8
+ include ActiveSupport::Callbacks
9
+ define_callbacks :initialize
10
+ define_callbacks :configure
11
+
12
+ def initialize *args
13
+ super *args
14
+ run_callbacks(:initialize) { }
15
+ end
16
+
17
+ def configure *args
18
+ result = self
19
+ temp_v = $-v
20
+ $-v = nil
21
+ begin
22
+ run_callbacks :configure do
23
+ result = super(*args)
21
24
  end
25
+ ensure
26
+ $-v = temp_v
22
27
  end
28
+ Dor::WorkflowService.configure result.workflow.url, :dor_services_url => result.dor_services.url
29
+ return result
30
+ end
31
+
32
+ def autoconfigure(url, cert_file=Config.ssl.cert_file, key_file=Config.ssl.key_file, key_pass=Config.ssl.key_pass)
33
+ client = make_rest_client(url, cert_file, key_file, key_pass)
34
+ config = Confstruct::Configuration.symbolize_hash JSON.parse(client.get :accept => 'application/json')
35
+ self.configure(config)
36
+ end
37
+
38
+ def sanitize
39
+ self.dup
40
+ end
41
+
42
+ def make_rest_client(url, cert=Config.ssl.cert_file, key=Config.ssl.key_file, pass=Config.ssl.key_pass)
43
+ params = {}
44
+ params[:ssl_client_cert] = OpenSSL::X509::Certificate.new(File.read(cert)) if cert
45
+ params[:ssl_client_key] = OpenSSL::PKey::RSA.new(File.read(key), pass) if key
46
+ RestClient::Resource.new(url, params)
47
+ end
48
+
49
+ def make_solr_connection(add_opts={})
50
+ opts = Config.solrizer.opts.merge(add_opts).merge(
51
+ :url => Config.solrizer.url
52
+ )
53
+ ::RSolr::Ext.connect(opts)
54
+ end
55
+
56
+ set_callback :initialize, :after do |config|
57
+ config.deep_merge!({
58
+ :fedora => {
59
+ :client => Confstruct.deferred { |c| config.make_rest_client c.url },
60
+ :safeurl => Confstruct.deferred { |c|
61
+ begin
62
+ fedora_uri = URI.parse(config.fedora.url)
63
+ fedora_uri.user = fedora_uri.password = nil
64
+ fedora_uri.to_s
65
+ rescue URI::InvalidURIError
66
+ nil
67
+ end
68
+ }
69
+ },
70
+ :sdr => {
71
+ :rest_client => Confstruct.deferred { |c| config.make_rest_client c.url },
72
+ },
73
+ :gsearch => {
74
+ :rest_client => Confstruct.deferred { |c| config.make_rest_client c.rest_url },
75
+ :client => Confstruct.deferred { |c| config.make_rest_client c.url }
76
+ },
77
+ :stomp => {
78
+ :connection => Confstruct.deferred { |c| Stomp::Connection.new c.user, c.password, c.host, c.port, true, 5, { 'client-id' => c.client_id }},
79
+ :client => Confstruct.deferred { |c| Stomp::Client.new c.user, c.password, c.host, c.port }
80
+ }
81
+ })
82
+ true
83
+ end
84
+
85
+ set_callback :configure, :after do |config|
86
+ config[:stomp][:host] ||= URI.parse(config.fedora.url).host rescue nil
23
87
 
24
- config_changed do |fedora|
25
- fedora_uri = URI.parse(fedora.url)
26
- fedora_uri.user = fedora_uri.password = nil
27
- fedora.safeurl fedora_uri.to_s
28
-
29
- temp_v = $-v
30
- $-v = nil
31
- begin
32
- ::ENABLE_SOLR_UPDATES = false
33
- ::Fedora::Repository.register(fedora.url)
34
- ::Fedora::Connection.const_set(:SSL_CLIENT_CERT_FILE,fedora.cert_file)
35
- ::Fedora::Connection.const_set(:SSL_CLIENT_KEY_FILE,fedora.key_file)
36
- ::Fedora::Connection.const_set(:SSL_CLIENT_KEY_PASS,fedora.key_pass)
37
- ensure
38
- $-v = temp_v
88
+ [:cert_file, :key_file, :key_pass].each do |key|
89
+ stack = caller.dup
90
+ stack.shift while stack[0] =~ %r{(active_support/callbacks|dor/config|dor-services)\.rb}
91
+ if config.fedora[key].present?
92
+ ActiveSupport::Deprecation.warn "Dor::Config -- fedora.#{key.to_s} is deprecated. Please use ssl.#{key.to_s} instead.", stack
93
+ config.ssl[key] = config.fedora[key] unless config.ssl[key].present?
94
+ config.fedora.delete(key)
39
95
  end
40
96
  end
97
+
98
+ if ActiveFedora.respond_to?(:configurator)
99
+ if config.solrizer.url.present?
100
+ ActiveFedora::SolrService.register
101
+ ActiveFedora::SolrService.instance.instance_variable_set :@conn, self.make_solr_connection
102
+ end
103
+ else
104
+ ActiveFedora::RubydoraConnection.connect self.fedora_config if self.fedora.url.present?
105
+ if self.solrizer.url.present?
106
+ ActiveFedora::SolrService.register config.solrizer.url, config.solrizer.opts
107
+ conn = ActiveFedora::SolrService.instance.conn.connection
108
+ if config.ssl.cert_file.present?
109
+ conn.use_ssl = true
110
+ conn.cert = OpenSSL::X509::Certificate.new(File.read(config.ssl.cert_file))
111
+ conn.key = OpenSSL::PKey::RSA.new(File.read(config.ssl.key_file),config.ssl.key_pass) if config.ssl.key_file.present?
112
+ conn.verify_mode = OpenSSL::SSL::VERIFY_NONE
113
+ end
114
+ end
115
+ ActiveFedora.init
116
+ ActiveFedora.fedora_config_path = File.expand_path('../../../config/dummy.yml', __FILE__)
117
+ end
118
+ end
119
+
120
+ # Act like an ActiveFedora.configurator
121
+
122
+ def init *args; end
123
+
124
+ def fedora_config
125
+ fedora_uri = URI.parse(self.fedora.url)
126
+ connection_opts = { :url => self.fedora.safeurl, :user => fedora_uri.user, :password => fedora_uri.password }
127
+ connection_opts[:ssl_client_cert] = OpenSSL::X509::Certificate.new(File.read(self.ssl.cert_file)) if self.ssl.cert_file.present?
128
+ connection_opts[:ssl_client_key] = OpenSSL::PKey::RSA.new(File.read(self.ssl.key_file),self.ssl.key_pass) if self.ssl.key_file.present?
129
+ connection_opts
130
+ end
131
+
132
+ def solr_config
133
+ { :url => self.solrizer.url }
134
+ end
135
+
136
+ def predicate_config
137
+ YAML.load(File.read(File.expand_path('../../../config/predicate_mappings.yml',__FILE__)))
41
138
  end
42
139
  end
43
-
44
- end
45
140
 
141
+ Config = Configuration.new(YAML.load(File.read(File.expand_path('../../../config/config_defaults.yml', __FILE__))))
142
+ ActiveFedora.configurator = Config if ActiveFedora.respond_to?(:configurator)
143
+ end
@@ -0,0 +1,84 @@
1
+ module Dor
2
+ class AdministrativeMetadataDS < ActiveFedora::OmDatastream
3
+
4
+ set_terminology do |t|
5
+ t.root :path => 'administrativeMetadata', :index_as => [:not_searchable]
6
+ t.metadata_format :path => 'descMetadata/format'
7
+ t.metadata_source :path => 'descMetadata/source'
8
+ t.descMetadata do
9
+ t.source
10
+ t.format
11
+ end
12
+ # Placeholders for existing defined stanzas to be fleshed out as needed
13
+ t.contact :index_as => [:not_searchable]
14
+ t.rights :index_as => [:not_searchable]
15
+ t.relationships :index_as => [:not_searchable]
16
+ t.registration :index_as => [:not_searchable] do
17
+ t.agreementId
18
+ t.itemTag
19
+ t.workflow_id :path => 'workflow/@id', :index_as => [:facetable]
20
+ t.default_collection :path => 'collection/@id', :index_as => [:facetable]
21
+ end
22
+ t.workflow :path => 'registration/workflow'
23
+ t.deposit :index_as => [:not_searchable]
24
+
25
+ t.accessioning :index_as => [:not_searchable] do
26
+ t.workflow_id :path => 'workflow/@id', :index_as => [:facetable]
27
+ end
28
+
29
+ t.preservation :index_as => [:not_searchable]
30
+ t.dissemination :index_as => [:not_searchable] do
31
+ t.harvester
32
+ t.releaseDelayLimit
33
+ end
34
+ end
35
+ define_template :default_collection do |xml|
36
+ xml.administrativeMetadata{
37
+ xml.registration{
38
+ xml.collection(:id => '')
39
+ }
40
+ }
41
+ end
42
+ define_template :agreementId do |xml|
43
+ xml.administrativeMetadata {
44
+ xml.registration{
45
+ xml.agreementId
46
+ }
47
+ }
48
+ end
49
+ define_template :metadata_format do |xml|
50
+ xml.descMetadata{
51
+ xml.format
52
+ }
53
+
54
+ end
55
+ define_template :metadata_source do |xml|
56
+ xml.administrativeMetadata{
57
+ xml.descMetadata{
58
+ xml.source
59
+ }
60
+ }
61
+ end
62
+ define_template :registration do |xml|
63
+ xml.administrativeMetadata {
64
+ xml.registration{
65
+ xml.workflow(:id=> '')
66
+ }
67
+ }
68
+ end
69
+ define_template :default_collection do |xml|
70
+ xml.administrativeMetadata {
71
+ xml.registration{
72
+ xml.collection
73
+ }
74
+ }
75
+ end
76
+ def self.xml_template
77
+ Nokogiri::XML::Builder.new do |xml|
78
+ xml.administrativeMetadata{
79
+ }
80
+ end.doc
81
+ end
82
+ end
83
+
84
+ end
@@ -0,0 +1,337 @@
1
+ module Dor
2
+ class ContentMetadataDS < ActiveFedora::OmDatastream
3
+ include Upgradable
4
+ include SolrDocHelper
5
+
6
+ set_terminology do |t|
7
+ t.root :path => 'contentMetadata', :index_as => [:not_searchable]
8
+ t.contentType :path => '/contentMetadata/@type', :index_as => [:not_searchable]
9
+ t.resource(:index_as => [:not_searchable]) do
10
+ t.id_ :path => { :attribute => 'id' }
11
+ t.sequence :path => { :attribute => 'sequence' }#, :data_type => :integer
12
+ t.type_ :path => { :attribute => 'type' }, :index_as => [:displayable]
13
+ t.attribute(:path => 'attr', :index_as => [:not_searchable]) do
14
+ t.name :path => { :attribute => 'name' }, :index_as => [:not_searchable]
15
+ end
16
+ t.file(:index_as => [:not_searchable]) do
17
+ t.id_ :path => { :attribute => 'id' }
18
+ t.mimeType :path => { :attribute => 'mimeType' }, :index_as => [:displayable]
19
+ t.dataType :path => { :attribute => 'dataType' }, :index_as => [:displayable]
20
+ t.size :path => { :attribute => 'size' }, :index_as => [:displayable]#, :data_type => :long
21
+ t.shelve :path => { :attribute => 'shelve' }, :index_as => [:not_searchable]#, :data_type => :boolean
22
+ t.publish :path => { :attribute => 'publish' }, :index_as => [:not_searchable]#, :data_type => :boolean
23
+ t.preserve :path => { :attribute => 'preserve' }, :index_as => [:not_searchable]#, :data_type => :boolean
24
+ t.checksum do
25
+ t.type_ :path => { :attribute => 'type' }
26
+ end
27
+ end
28
+ t.shelved_file(:path => 'file', :attributes => {:shelve=>'yes'}, :index_as => [:not_searchable]) do
29
+ t.id_ :path => { :attribute => 'id' }, :index_as => [:displayable, :searchable]
30
+ end
31
+ end
32
+ t.shelved_file_id :proxy => [:resource, :shelved_file, :id], :index_as => [:displayable, :searchable]
33
+ end
34
+
35
+ def public_xml
36
+ result = self.ng_xml.clone
37
+ result.xpath('/contentMetadata/resource[not(file[(@deliver="yes" or @publish="yes")])]').each { |n| n.remove }
38
+ result.xpath('/contentMetadata/resource/file[not(@deliver="yes" or @publish="yes")]').each { |n| n.remove }
39
+ result.xpath('/contentMetadata/resource/file').xpath('@preserve|@shelve|@publish|@deliver').each { |n| n.remove }
40
+ result.xpath('/contentMetadata/resource/file/checksum').each { |n| n.remove }
41
+ result
42
+ end
43
+ def add_file(file, resource_name)
44
+ xml=self.ng_xml
45
+ resource_nodes = xml.search('//resource[@id=\''+resource_name+'\']')
46
+ if resource_nodes.length==0
47
+ raise 'resource doesnt exist.'
48
+ end
49
+ node=resource_nodes.first
50
+ file_node=Nokogiri::XML::Node.new('file',xml)
51
+ file_node['id']=file[:name]
52
+ file_node['shelve']=file[:shelve] ? file[:shelve] : ''
53
+ file_node['publish']=file[:publish] ? file[:publish] : ''
54
+ file_node['preserve']=file[:preserve] ? file[:preserve] : ''
55
+ node.add_child(file_node)
56
+
57
+ if file[:md5]
58
+ checksum_node=Nokogiri::XML::Node.new('checksum',xml)
59
+ checksum_node['type']='md5'
60
+ checksum_node.content=file[:md5]
61
+ file_node.add_child(checksum_node)
62
+ end
63
+ if file[:sha1]
64
+ checksum_node=Nokogiri::XML::Node.new('checksum',xml)
65
+ checksum_node['type']='sha1'
66
+ checksum_node.content=file[:sha1]
67
+ file_node.add_child(checksum_node)
68
+ end
69
+ if file[:size]
70
+ file_node['size']=file[:size]
71
+ end
72
+ if file[:mime_type]
73
+ file_node['mimetype']=file[:mime_type]
74
+ end
75
+ self.content=xml.to_s
76
+ self.save
77
+ end
78
+
79
+ def add_resource(files,resource_name, position,type="file")
80
+ xml=self.ng_xml
81
+ if xml.search('//resource[@id=\''+resource_name+'\']').length>0
82
+ raise 'resource '+resource_name+' already exists'
83
+ end
84
+ node=nil
85
+
86
+ max=-1
87
+ xml.search('//resource').each do |node|
88
+ if node['sequence'].to_i>max
89
+ max=node['sequence'].to_i
90
+ end
91
+ end
92
+ #renumber all of the resources that will come after the newly added one
93
+ while max>position do
94
+ node=xml.search('//resource[@sequence=\'' + position + '\']')
95
+ if node.length>0
96
+ node=node.first
97
+ node[sequence]=max+1
98
+ end
99
+ max=max-1
100
+ end
101
+ node=Nokogiri::XML::Node.new('resource',xml)
102
+ node['sequence']=position.to_s
103
+ node['id']=resource_name
104
+ node['type']=type
105
+ files.each do |file|
106
+ file_node=Nokogiri::XML::Node.new('file',xml)
107
+ file_node['shelve']=file[:shelve] ? file[:shelve] : ''
108
+ file_node['publish']=file[:publish] ? file[:publish] : ''
109
+ file_node['preserve']=file[:preserve] ? file[:preserve] : ''
110
+ file_node['id']=file[:name]
111
+ node.add_child(file_node)
112
+
113
+ if not file[:md5].nil?
114
+ checksum_node=Nokogiri::XML::Node.new('checksum',xml)
115
+ checksum_node['type']='md5'
116
+ checksum_node.content=file[:md5]
117
+ file_node.add_child(checksum_node)
118
+ end
119
+ if not file[:sha1].nil?
120
+ checksum_node=Nokogiri::XML::Node.new('checksum',xml)
121
+ checksum_node['type']='sha1'
122
+ checksum_node.content=file[:sha1]
123
+ file_node.add_child(checksum_node)
124
+ end
125
+ if file[:size]
126
+ file_node['size']=file[:size]
127
+ end
128
+ end
129
+ xml.search('//contentMetadata').first.add_child(node)
130
+ self.content=xml.to_s
131
+ self.save
132
+ end
133
+
134
+ def remove_resource resource_name
135
+ xml=self.ng_xml
136
+ position=-1
137
+
138
+ resources=xml.search('//resource[@id=\''+resource_name+'\']')
139
+ if resources.length!=1
140
+ raise 'Resource is missing or duplicated!'
141
+ end
142
+ position=resources.first['sequence']
143
+ resources.first.remove
144
+ position=position.to_i+1
145
+ while true
146
+ res=xml.search('//resource[@sequence=\''+position.to_s+'\']')
147
+ if(res.length==0)
148
+ break
149
+ end
150
+ res['sequence']=position.to_s
151
+ position=position+1
152
+ end
153
+ self.content=xml.to_s
154
+ self.save
155
+ end
156
+
157
+ def remove_file file_name
158
+ xml=self.ng_xml
159
+ xml.search('//file[@id=\''+file_name+'\']').each do |node|
160
+ node.remove
161
+ end
162
+ self.content=xml.to_s
163
+ self.save
164
+ end
165
+ def update_attributes file_name, publish, shelve, preserve
166
+ xml=self.ng_xml
167
+ file_node=xml.search('//file[@id=\''+file_name+'\']').first
168
+ file_node['shelve']=shelve
169
+ file_node['publish']=publish
170
+ file_node['preserve']=preserve
171
+ self.content=xml.to_s
172
+ self.save
173
+ end
174
+ def update_file file, old_file_id
175
+ xml=self.ng_xml
176
+ file_node=xml.search('//file[@id=\''+old_file_id+'\']').first
177
+ file_node['id']=file[:name]
178
+ if not file[:md5].nil?
179
+ checksum_node=xml.search('//file[@id=\''+old_file_id+'\']/checksum[@type=\'md5\']').first
180
+ if checksum_node.nil?
181
+ checksum_node=Nokogiri::XML::Node.new('checksum',xml)
182
+ file_node.add_child(checksum_node)
183
+ end
184
+ checksum_node['type']='md5'
185
+ checksum_node.content=file[:md5]
186
+ end
187
+ if not file[:sha1].nil?
188
+ checksum_node=xml.search('//file[@id=\''+old_file_id+'\']/checksum[@type=\'sha1\']').first
189
+ if checksum_node.nil?
190
+ checksum_node=Nokogiri::XML::Node.new('checksum',xml)
191
+ file_node.add_child(checksum_node)
192
+ end
193
+ checksum_node['type']='sha1'
194
+ checksum_node.content=file[:sha1]
195
+ end
196
+ if file[:size]
197
+ file_node['size']=file[:size]
198
+ end
199
+ if file[:shelve]
200
+ file_node['shelve']=file[:shelve]
201
+ end
202
+ if file[:preserve]
203
+ file_node['preserve']=file[:preserve]
204
+ end
205
+ if file[:publish]
206
+ file_node['publish']=file[:publish]
207
+ end
208
+ self.content=xml.to_s
209
+ self.save
210
+ end
211
+ # Terminology-based solrization is going to be painfully slow for large
212
+ # contentMetadata streams. Just select the relevant elements instead.
213
+ def to_solr(solr_doc=Hash.new, *args)
214
+ doc = self.ng_xml
215
+ if doc.root['type']
216
+ shelved_file_count=0
217
+ content_file_count=0
218
+ resource_type_counts={}
219
+ resource_count=0
220
+ preserved_size=0
221
+ first_shelved_image=nil
222
+ add_solr_value(solr_doc, "content_type", doc.root['type'], :string, [:facetable])
223
+ doc.xpath('contentMetadata/resource').sort { |a,b| a['sequence'].to_i <=> b['sequence'].to_i }.each do |resource|
224
+ resource_count+=1
225
+ if(resource['type'])
226
+ if resource_type_counts[resource['type']]
227
+ resource_type_counts[resource['type']]+=1
228
+ else
229
+ resource_type_counts[resource['type']]=1
230
+ end
231
+ end
232
+ resource.xpath('file').each do |file|
233
+ content_file_count+=1
234
+ if file['shelve'] == 'yes'
235
+ shelved_file_count+=1
236
+ if first_shelved_image.nil? and file['id'].match(/jp2$/)
237
+ first_shelved_image=file['id']
238
+ end
239
+ end
240
+ if file['preserve'] == 'yes'
241
+ preserved_size += file['size'].to_i
242
+ end
243
+ end
244
+ end
245
+ add_solr_value(solr_doc, "content_file_count", content_file_count.to_s, :string, [:searchable, :displayable])
246
+ add_solr_value(solr_doc, "shelved_content_file_count", shelved_file_count.to_s, :string, [:searchable, :displayable])
247
+ add_solr_value(solr_doc, "resource_count", resource_count.to_s, :string, [:searchable, :displayable])
248
+ add_solr_value(solr_doc, "preserved_size", preserved_size.to_s, :string, [:searchable, :displayable])
249
+ resource_type_counts.each do |key, count|
250
+ add_solr_value(solr_doc, key+"_resource_count", count.to_s, :string, [:searchable, :displayable])
251
+ end
252
+ if not first_shelved_image.nil?
253
+ add_solr_value(solr_doc, "first_shelved_image", first_shelved_image, :string, [:displayable])
254
+ end
255
+ end
256
+ solr_doc
257
+ end
258
+ def rename_file old_name, new_name
259
+ xml=self.ng_xml
260
+ file_node=xml.search('//file[@id=\''+old_name+'\']').first
261
+ file_node['id']=new_name
262
+ self.content=xml.to_s
263
+ self.save
264
+ end
265
+
266
+ def update_resource_label resource_name, new_label
267
+ xml=self.ng_xml
268
+ resource_node=xml.search('//resource[@id=\''+resource_name+'\']')
269
+ if(resource_node.length!=1)
270
+ raise 'Resource not found or duplicate found.'
271
+ end
272
+ labels=xml.search('//resource[@id=\''+resource_name+'\']/label')
273
+ if(labels.length==0)
274
+ #create a label
275
+ label_node = Nokogiri::XML::Node.new('label',xml)
276
+ label_node.content=new_label
277
+ resource_node.first.add_child(label_node)
278
+ else
279
+ labels.first.content=new_label
280
+ end
281
+ end
282
+ def update_resource_type resource, new_type
283
+ xml=self.ng_xml
284
+ resource_node=xml.search('//resource[@id=\''+resource_name+'\']')
285
+ if(resource_node.length!=1)
286
+ raise 'Resource not found or duplicate found.'
287
+ end
288
+ resource_node.first['type']=new_type
289
+ end
290
+
291
+ def move_resource resource_name, new_position
292
+ xml=self.ng_xml
293
+ file_node=xml.search('//resource[@id=\''+resource_name+'\']')
294
+ if(file_node.length!=1)
295
+ raise 'Resource not found or duplicate found.'
296
+ end
297
+ position=file_node.first['sequence'].to_i
298
+ #is the resource being moved earlier in the sequence or later?
299
+ new_position=new_position.to_i
300
+ if new_position>position
301
+ counter=position
302
+ while true
303
+ if counter == position
304
+ break
305
+ end
306
+ item=xml.search('/resource[@id=\''+counter.to_s+'\']').first
307
+ counter=counter+1
308
+ item['sequence']=counter.to_s
309
+ end
310
+ else
311
+ counter=position
312
+ while true
313
+ if counter == new_position
314
+ break
315
+ end
316
+ item=xml.search('/resource[@id=\''+counter.to_s+'\']').first
317
+ counter=counter-1
318
+ item['sequence']=counter.to_s
319
+ end
320
+ end
321
+ end
322
+ #Set the content type to and the resource types for all resources
323
+ #@param type [String] the new content type, ex book
324
+ #@param resource_type [String] the new type for all resources, ex book
325
+ def set_content_type old_type, old_resource_type, new_type, new_resource_type
326
+ xml=self.ng_xml
327
+ xml.search('/contentMetadata[@type=\''+old_type+'\']').each do |node|
328
+ node['type']=new_type
329
+ xml.search('//resource[@type=\''+old_resource_type+'\']').each do |resource|
330
+ resource['type']=new_resource_type
331
+ end
332
+ end
333
+ self.content=xml.to_s
334
+ end
335
+ end
336
+
337
+ end