active_fedora-datastreams 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ module ActiveFedora
2
+ module Datastreams
3
+ VERSION = "0.1.0".freeze
4
+ end
5
+ end
@@ -0,0 +1,71 @@
1
+ require "nom"
2
+
3
+ module ActiveFedora
4
+ class NomDatastream < File
5
+ include Datastreams::NokogiriDatastreams
6
+
7
+ def self.set_terminology(options = {}, &block)
8
+ @terminology_options = options || {}
9
+ @terminology = block
10
+ end
11
+
12
+ class << self
13
+ attr_reader :terminology_options
14
+ end
15
+
16
+ class << self
17
+ attr_reader :terminology
18
+ end
19
+
20
+ def self.decorate_ng_xml(xml)
21
+ xml.set_terminology terminology_options, &terminology
22
+ xml.nom!
23
+ xml
24
+ end
25
+
26
+ def serialize!
27
+ self.content = @ng_xml.to_s if @ng_xml
28
+ end
29
+
30
+ def to_solr
31
+ solr_doc = {}
32
+
33
+ ng_xml.terminology.flatten.select { |x| x.options[:index] }.each do |term|
34
+ term.values.each do |v|
35
+ Array(term.options[:index]).each do |index_as|
36
+ solr_doc[index_as] ||= []
37
+ solr_doc[index_as] << if v.is_a? Nokogiri::XML::Node
38
+ v.text
39
+ else
40
+ v
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ solr_doc
47
+ end
48
+
49
+ def method_missing(method, *args, &block)
50
+ if ng_xml.respond_to? method
51
+ ng_xml.send(method, *args, &block)
52
+ else
53
+ super
54
+ end
55
+ end
56
+
57
+ def respond_to_missing?(*args)
58
+ ng_xml.respond_to?(*args)
59
+ end
60
+
61
+ def respond_to?(*args)
62
+ super || self.class.terminology.respond_to?(*args)
63
+ end
64
+
65
+ protected
66
+
67
+ def default_mime_type
68
+ 'text/xml'
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,112 @@
1
+ require "om"
2
+
3
+ module ActiveFedora
4
+ class OmDatastream < File
5
+ # before_save do
6
+ # if content.blank?
7
+ # ActiveFedora::Base.logger.warn "Cowardly refusing to save a datastream with empty content: #{self.inspect}"
8
+ # false
9
+ # end
10
+ # end
11
+
12
+ include OM::XML::Document
13
+ include OM::XML::TerminologyBasedSolrizer # this adds support for calling .to_solr
14
+ include Datastreams::NokogiriDatastreams
15
+
16
+ alias om_term_values term_values unless method_defined?(:om_term_values)
17
+ alias om_update_values update_values unless method_defined?(:om_update_values)
18
+
19
+ def default_mime_type
20
+ 'text/xml'
21
+ end
22
+
23
+ # Indicates that this datastream has metadata content.
24
+ # @return true
25
+ def metadata?
26
+ true
27
+ end
28
+
29
+ # Return a hash suitable for indexing in solr. Every field name is prefixed with the
30
+ # value returned by the +prefix+ method.
31
+ def to_solr(solr_doc = {}, opts = {})
32
+ prefix = self.prefix(opts[:name])
33
+ solr_doc.merge super({}).each_with_object({}) { |(key, value), new| new[[prefix, key].join] = value }
34
+ end
35
+
36
+ # Update field values within the current datastream using {#update_values}, which is a wrapper for {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values}
37
+ # Ignores any fields from params that this datastream's Terminology doesn't recognize
38
+ #
39
+ # @param [Hash] params The params specifying which fields to update and their new values. The syntax of the params Hash is the same as that expected by
40
+ # term_pointers must be a valid OM Term pointers (ie. [:name]). Strings will be ignored.
41
+ # @param [Hash] _opts This is not currently used by the datastream-level update_indexed_attributes method
42
+ #
43
+ # Example:
44
+ # @mods_ds.update_indexed_attributes( {[{":person"=>"0"}, "role"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"} })
45
+ # => {"person_0_role"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}}
46
+ #
47
+ # @mods_ds.to_xml # (the following is an approximation)
48
+ # <mods>
49
+ # <mods:name type="person">
50
+ # <mods:role>
51
+ # <mods:roleTerm>role1</mods:roleTerm>
52
+ # </mods:role>
53
+ # <mods:role>
54
+ # <mods:roleTerm>role2</mods:roleTerm>
55
+ # </mods:role>
56
+ # <mods:role>
57
+ # <mods:roleTerm>role3</mods:roleTerm>
58
+ # </mods:role>
59
+ # </mods:name>
60
+ # </mods>
61
+ def update_indexed_attributes(params = {}, _opts = {})
62
+ if self.class.terminology.nil?
63
+ raise "No terminology is set for this OmDatastream class. Cannot perform update_indexed_attributes"
64
+ end
65
+ # remove any fields from params that this datastream doesn't recognize
66
+ # make sure to make a copy of params so not to modify hash that might be passed to other methods
67
+ current_params = params.clone
68
+ current_params.delete_if do |term_pointer, new_values|
69
+ if term_pointer.is_a?(String)
70
+ ActiveFedora::Base.logger.warn "WARNING: #{self.class.name} ignoring {#{term_pointer.inspect} => #{new_values.inspect}} because #{term_pointer.inspect} is a String (only valid OM Term Pointers will be used). Make sure your html has the correct field_selector tags in it." if ActiveFedora::Base.logger
71
+ true
72
+ else
73
+ !self.class.terminology.has_term?(*OM.destringify(term_pointer))
74
+ end
75
+ end
76
+
77
+ result = {}
78
+ result = update_values(current_params) unless current_params.empty?
79
+
80
+ result
81
+ end
82
+
83
+ def get_values(field_key, _default = [])
84
+ term_values(*field_key)
85
+ end
86
+
87
+ def find_by_terms(*termpointer)
88
+ super
89
+ end
90
+
91
+ # Update values in the datastream's xml
92
+ # This wraps {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values} so that returns an error if we have loaded from solr since datastreams loaded that way should be read-only
93
+ #
94
+ # @example Updating multiple values with a Hash of Term pointers and values
95
+ # ds.update_values( {[{":person"=>"0"}, "role", "text"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, [{:person=>1}, :family_name]=>"Andronicus", [{"person"=>"1"},:given_name]=>["Titus"],[{:person=>1},:role,:text]=>["otherrole1","otherrole2"] } )
96
+ # => {"person_0_role_text"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, "person_1_role_text"=>{"0"=>"otherrole1", "1"=>"otherrole2"}}
97
+ def update_values(params = {})
98
+ raise "can't modify frozen #{self.class}" if frozen?
99
+ ng_xml_will_change!
100
+ result = om_update_values(params)
101
+ result
102
+ end
103
+
104
+ protected
105
+
106
+ # The string to prefix all solr fields with. Override this method if you want
107
+ # a prefix other than the default
108
+ def prefix(path)
109
+ path ? "#{path.underscore}__" : ''
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,158 @@
1
+ module ActiveFedora
2
+ # This class represents a Qualified Dublin Core Datastream. A special case of ActiveFedora::OmDatastream
3
+ # The implementation of this class defines the terms from the Qualified Dublin Core specification.
4
+ # This implementation features customized xml generators and deserialization routines to handle the
5
+ # Fedora Dublin Core XML datastreams structure.
6
+ #
7
+ # Fields can still be overridden if more specificity is desired (see ActiveFedora::File#fields method).
8
+ class QualifiedDublinCoreDatastream < OmDatastream
9
+ attr_accessor :fields
10
+ class_attribute :class_fields
11
+ self.class_fields = []
12
+
13
+ set_terminology do |t|
14
+ t.root(path: "dc", xmlns: "http://purl.org/dc/terms/")
15
+ end
16
+
17
+ define_template :creator do |xml, name|
18
+ xml.creator do
19
+ xml.text(name)
20
+ end
21
+ end
22
+
23
+ # A frozen array of Dublincore Terms.
24
+ DCTERMS = [
25
+ :abstract,
26
+ :accessRights,
27
+ :accrualMethod,
28
+ :accrualPeriodicity,
29
+ :accrualPolicy,
30
+ :alternative,
31
+ :audience,
32
+ :available,
33
+ :bibliographicCitation,
34
+ :conformsTo,
35
+ :contributor,
36
+ :coverage,
37
+ :created,
38
+ :creator,
39
+ :date,
40
+ :dateAccepted,
41
+ :dateCopyrighted,
42
+ :dateSubmitted,
43
+ :description,
44
+ :educationLevel,
45
+ :extent,
46
+ :hasFormat,
47
+ :hasPart,
48
+ :hasVersion,
49
+ :identifier,
50
+ :instructionalMethod,
51
+ :isFormatOf,
52
+ :isPartOf,
53
+ :isReferencedBy,
54
+ :isReplacedBy,
55
+ :isRequiredBy,
56
+ :isVersionOf,
57
+ :issued,
58
+ :language,
59
+ :license,
60
+ :mediator,
61
+ :medium,
62
+ :modified,
63
+ :provenance,
64
+ :publisher,
65
+ :references,
66
+ :relation,
67
+ :replaces,
68
+ :requires,
69
+ :rights,
70
+ :rightsHolder,
71
+ :source,
72
+ :spatial,
73
+ :subject,
74
+ :tableOfContents,
75
+ :temporal,
76
+ :title,
77
+ :type,
78
+ :valid
79
+ ].freeze # removed :format
80
+ DCTERMS.freeze
81
+
82
+ # Constructor. this class will call self.field for each DCTERM. In short, all DCTERMS fields will already exist
83
+ # when this method returns. Each term is marked as a multivalue string.
84
+ def initialize(string_or_url = nil)
85
+ super
86
+ self.fields = {}
87
+ DCTERMS.each do |el|
88
+ field el, :string, multiple: true
89
+ end
90
+ end
91
+
92
+ # This method generates the various accessor and mutator methods on self for the datastream metadata attributes.
93
+ # each field will have the 2 magic methods:
94
+ # name=(arg)
95
+ # name
96
+ #
97
+ #
98
+ # Calling any of the generated methods marks self as dirty.
99
+ #
100
+ # 'tupe' is a datatype, currently :string, :text and :date are supported.
101
+ #
102
+ # opts is an options hash, which will affect the generation of the xml representation of this datastream.
103
+ #
104
+ # Currently supported modifiers:
105
+ # For +QualifiedDublinCorDatastreams+:
106
+ # :element_attrs =>{:foo=>:bar} - hash of xml element attributes
107
+ # :xml_node => :nodename - The xml node to be used to represent this object (in dcterms namespace)
108
+ # :encoding=>foo, or encodings_scheme - causes an xsi:type attribute to be set to 'foo'
109
+ # :multiple=>true - mark this field as a multivalue field (on by default)
110
+ #
111
+ #
112
+ # There is quite a good example of this class in use in spec/examples/oral_history.rb
113
+ #
114
+ # !! Careful: If you declare two fields that correspond to the same xml node without any qualifiers to differentiate them,
115
+ # you will end up replicating the values in the underlying datastream, resulting in mysterious dubling, quadrupling, etc.
116
+ # whenever you edit the field's values.
117
+ def field(name, tupe = nil, opts = {})
118
+ @fields[name.to_s.to_sym] = { type: tupe, values: [] }.merge(opts)
119
+ # add term to template
120
+ self.class.class_fields << name.to_s
121
+ # add term to terminology
122
+ return if self.class.terminology.has_term?(name.to_sym)
123
+ om_term_opts = { xmlns: "http://purl.org/dc/terms/", namespace_prefix: "dcterms", path: opts[:path] }
124
+ term = OM::XML::Term.new(name.to_sym, om_term_opts, self.class.terminology)
125
+ self.class.terminology.add_term(term)
126
+ term.generate_xpath_queries!
127
+ end
128
+
129
+ def update_indexed_attributes(params = {}, opts = {})
130
+ # if the params are just keys, not an array, make then into an array.
131
+ new_params = {}
132
+ params.each do |key, val|
133
+ if key.is_a? Array
134
+ new_params[key] = val
135
+ else
136
+ new_params[[key.to_sym]] = val
137
+ end
138
+ end
139
+ super(new_params, opts)
140
+ end
141
+
142
+ def self.xml_template
143
+ Nokogiri::XML::Document.parse("<dc xmlns:dcterms='http://purl.org/dc/terms/' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'/>")
144
+ end
145
+
146
+ def to_solr(solr_doc = {}, _opts = {}) # :nodoc:
147
+ @fields.each do |field_key, field_info|
148
+ things = send(field_key)
149
+ next unless things
150
+ field_symbol = ActiveFedora.index_field_mapper.solr_name(field_key, type: field_info[:type])
151
+ things.val.each do |val|
152
+ ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_symbol, val)
153
+ end
154
+ end
155
+ solr_doc
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,49 @@
1
+ module ActiveFedora::RDF
2
+ module DatastreamIndexing
3
+ extend ActiveSupport::Concern
4
+
5
+ def to_solr(solr_doc = {}, opts = {}) # :nodoc:
6
+ super.tap do |new_doc|
7
+ solrize_rdf_assertions(opts[:name], new_doc)
8
+ end
9
+ end
10
+
11
+ module ClassMethods
12
+ def indexer
13
+ ActiveFedora::RDF::IndexingService
14
+ end
15
+
16
+ def index_config
17
+ @index_config ||= ActiveFedora::Indexing::Map.new
18
+ end
19
+ end
20
+
21
+ protected
22
+
23
+ def indexing_service
24
+ @indexing_service ||= self.class.indexer.new(self)
25
+ end
26
+
27
+ # Serialize the datastream's RDF relationships to solr
28
+ # @param [String] file_path used to prefix the keys in the solr document
29
+ # @param [Hash] solr_doc @default an empty Hash
30
+ def solrize_rdf_assertions(file_path, solr_doc = {})
31
+ solr_doc.merge! indexing_service.generate_solr_document(prefix_method(file_path))
32
+ end
33
+
34
+ # Returns a function that takes field name and returns a solr document key
35
+ def prefix_method(file_path)
36
+ ->(field_name) { apply_prefix(field_name, file_path) }
37
+ end
38
+
39
+ def apply_prefix(name, file_path)
40
+ prefix(file_path) + name.to_s
41
+ end
42
+
43
+ # The string to prefix all solr fields with. Override this method if you want
44
+ # a prefix other than the default
45
+ def prefix(path)
46
+ path ? "#{path.underscore}__" : ''
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,9 @@
1
+ require 'rdf/ntriples'
2
+
3
+ module ActiveFedora
4
+ class NtriplesRDFDatastream < RDFDatastream
5
+ def serialization_format
6
+ :ntriples
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,164 @@
1
+ module ActiveFedora
2
+ class RDFDatastream < File
3
+ include ActiveTriples::NestedAttributes
4
+ include RDF::DatastreamIndexing
5
+ include ActiveTriples::Properties
6
+ include ActiveTriples::Reflection
7
+
8
+ delegate :rdf_subject, :set_value, :get_values, :attributes=, to: :resource
9
+
10
+ class << self
11
+ def rdf_subject(&block)
12
+ return @subject_block = block if block_given?
13
+
14
+ @subject_block ||= ->(ds) { parent_uri(ds) }
15
+ end
16
+
17
+ # Trim the last segment off the URI to get the parents uri
18
+ def parent_uri(ds)
19
+ m = /^(.*)\/[^\/]*$/.match(ds.uri)
20
+ if m
21
+ m[1]
22
+ else
23
+ ::RDF::URI.new(nil)
24
+ end
25
+ end
26
+
27
+ ##
28
+ # @param [Class] klass an object to set as the resource class, Must be a descendant of
29
+ # ActiveTriples::Resource and include ActiveFedora::RDF::Persistence.
30
+ #
31
+ # @return [Class] the object resource class
32
+ def resource_class(klass = nil)
33
+ if klass
34
+ raise ArgumentError, "#{self} already has a resource_class #{@resource_class}, cannot redefine it to #{klass}" if @resource_class && klass != @resource_class
35
+ raise ArgumentError, "#{klass} must be a subclass of ActiveTriples::Resource" unless klass < ActiveTriples::Resource
36
+ end
37
+
38
+ @resource_class ||= begin
39
+ klass = Class.new(klass || ActiveTriples::Resource)
40
+ klass.send(:include, RDF::Persistence)
41
+ klass
42
+ end
43
+ end
44
+ end
45
+
46
+ before_save do
47
+ if content.blank?
48
+ ActiveFedora::Base.logger.warn "Cowardly refusing to save a datastream with empty content: #{inspect}" if ActiveFedora::Base.logger
49
+ if ActiveSupport.respond_to?(:halt_callback_chains_on_return_false)
50
+ # For Rails 5+
51
+ throw :abort
52
+ else
53
+ # For Rails <= 4
54
+ false
55
+ end
56
+ end
57
+ end
58
+
59
+ def parent_uri
60
+ self.class.parent_uri(self)
61
+ end
62
+
63
+ def metadata?
64
+ true
65
+ end
66
+
67
+ def content
68
+ serialize
69
+ end
70
+
71
+ def content=(new_content)
72
+ resource.clear!
73
+ resource << deserialize(new_content)
74
+ content
75
+ end
76
+
77
+ def uri=(uri)
78
+ super
79
+ resource.set_subject!(parent_uri) if empty_or_blank_subject?
80
+ end
81
+
82
+ def content_changed?
83
+ return false unless instance_variable_defined? :@resource
84
+ return true if empty_or_blank_subject? # can't be serialized because a subject hasn't been assigned yet.
85
+ @content = serialize
86
+ super
87
+ end
88
+
89
+ def empty_or_blank_subject?
90
+ resource.rdf_subject.node? || resource.rdf_subject.value.blank?
91
+ end
92
+
93
+ def freeze
94
+ @resource.freeze
95
+ end
96
+
97
+ ##
98
+ # The resource is the RdfResource object that stores the graph for
99
+ # the datastream and is the central point for its relationship to
100
+ # other nodes.
101
+ #
102
+ # set_value, get_value, and property accessors are delegated to this object.
103
+ def resource
104
+ @resource ||= begin
105
+ klass = self.class.resource_class
106
+ klass.properties.merge(self.class.properties).each do |_prop, config|
107
+ klass.property(config.term,
108
+ predicate: config.predicate,
109
+ class_name: config.class_name)
110
+ end
111
+ klass.accepts_nested_attributes_for(*nested_attributes_options.keys) unless nested_attributes_options.blank?
112
+ uri_stub = self.class.rdf_subject.call(self)
113
+
114
+ r = klass.new(uri_stub)
115
+ r.datastream = self
116
+ r << deserialize
117
+ r
118
+ end
119
+ end
120
+
121
+ alias graph resource
122
+
123
+ def refresh_attributes
124
+ @resource = nil
125
+ end
126
+
127
+ ##
128
+ # This method allows for delegation.
129
+ # This patches the fact that there's no consistent API for allowing delegation - we're matching the
130
+ # OmDatastream implementation as our "consistency" point.
131
+ # @TODO: We may need to enable deep RDF delegation at one point.
132
+ def term_values(*values)
133
+ send(values.first)
134
+ end
135
+
136
+ def update_indexed_attributes(hash)
137
+ hash.each do |fields, value|
138
+ fields.each do |field|
139
+ send("#{field}=", value)
140
+ end
141
+ end
142
+ end
143
+
144
+ def serialize
145
+ resource.set_subject!(parent_uri) if parent_uri && rdf_subject.node?
146
+ resource.dump serialization_format
147
+ end
148
+
149
+ def deserialize(data = nil)
150
+ return ::RDF::Graph.new if new_record? && data.nil?
151
+ data ||= remote_content
152
+
153
+ # Because datastream_content can return nil, we should check that here.
154
+ return ::RDF::Graph.new if data.nil?
155
+
156
+ data.force_encoding('utf-8')
157
+ ::RDF::Graph.new << ::RDF::Reader.for(serialization_format).new(data)
158
+ end
159
+
160
+ def serialization_format
161
+ raise "you must override the `serialization_format' method in a subclass"
162
+ end
163
+ end
164
+ end