active_fedora-datastreams 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ module ActiveFedora
2
+ module Datastreams
3
+ VERSION = "0.1.0".freeze
4
+ end
5
+ end
@@ -0,0 +1,71 @@
1
+ require "nom"
2
+
3
+ module ActiveFedora
4
+ class NomDatastream < File
5
+ include Datastreams::NokogiriDatastreams
6
+
7
+ def self.set_terminology(options = {}, &block)
8
+ @terminology_options = options || {}
9
+ @terminology = block
10
+ end
11
+
12
+ class << self
13
+ attr_reader :terminology_options
14
+ end
15
+
16
+ class << self
17
+ attr_reader :terminology
18
+ end
19
+
20
+ def self.decorate_ng_xml(xml)
21
+ xml.set_terminology terminology_options, &terminology
22
+ xml.nom!
23
+ xml
24
+ end
25
+
26
+ def serialize!
27
+ self.content = @ng_xml.to_s if @ng_xml
28
+ end
29
+
30
+ def to_solr
31
+ solr_doc = {}
32
+
33
+ ng_xml.terminology.flatten.select { |x| x.options[:index] }.each do |term|
34
+ term.values.each do |v|
35
+ Array(term.options[:index]).each do |index_as|
36
+ solr_doc[index_as] ||= []
37
+ solr_doc[index_as] << if v.is_a? Nokogiri::XML::Node
38
+ v.text
39
+ else
40
+ v
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ solr_doc
47
+ end
48
+
49
+ def method_missing(method, *args, &block)
50
+ if ng_xml.respond_to? method
51
+ ng_xml.send(method, *args, &block)
52
+ else
53
+ super
54
+ end
55
+ end
56
+
57
+ def respond_to_missing?(*args)
58
+ ng_xml.respond_to?(*args)
59
+ end
60
+
61
+ def respond_to?(*args)
62
+ super || self.class.terminology.respond_to?(*args)
63
+ end
64
+
65
+ protected
66
+
67
+ def default_mime_type
68
+ 'text/xml'
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,112 @@
1
+ require "om"
2
+
3
+ module ActiveFedora
4
+ class OmDatastream < File
5
+ # before_save do
6
+ # if content.blank?
7
+ # ActiveFedora::Base.logger.warn "Cowardly refusing to save a datastream with empty content: #{self.inspect}"
8
+ # false
9
+ # end
10
+ # end
11
+
12
+ include OM::XML::Document
13
+ include OM::XML::TerminologyBasedSolrizer # this adds support for calling .to_solr
14
+ include Datastreams::NokogiriDatastreams
15
+
16
+ alias om_term_values term_values unless method_defined?(:om_term_values)
17
+ alias om_update_values update_values unless method_defined?(:om_update_values)
18
+
19
+ def default_mime_type
20
+ 'text/xml'
21
+ end
22
+
23
+ # Indicates that this datastream has metadata content.
24
+ # @return true
25
+ def metadata?
26
+ true
27
+ end
28
+
29
+ # Return a hash suitable for indexing in solr. Every field name is prefixed with the
30
+ # value returned by the +prefix+ method.
31
+ def to_solr(solr_doc = {}, opts = {})
32
+ prefix = self.prefix(opts[:name])
33
+ solr_doc.merge super({}).each_with_object({}) { |(key, value), new| new[[prefix, key].join] = value }
34
+ end
35
+
36
+ # Update field values within the current datastream using {#update_values}, which is a wrapper for {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values}
37
+ # Ignores any fields from params that this datastream's Terminology doesn't recognize
38
+ #
39
+ # @param [Hash] params The params specifying which fields to update and their new values. The syntax of the params Hash is the same as that expected by
40
+ # term_pointers must be a valid OM Term pointers (ie. [:name]). Strings will be ignored.
41
+ # @param [Hash] _opts This is not currently used by the datastream-level update_indexed_attributes method
42
+ #
43
+ # Example:
44
+ # @mods_ds.update_indexed_attributes( {[{":person"=>"0"}, "role"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"} })
45
+ # => {"person_0_role"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}}
46
+ #
47
+ # @mods_ds.to_xml # (the following is an approximation)
48
+ # <mods>
49
+ # <mods:name type="person">
50
+ # <mods:role>
51
+ # <mods:roleTerm>role1</mods:roleTerm>
52
+ # </mods:role>
53
+ # <mods:role>
54
+ # <mods:roleTerm>role2</mods:roleTerm>
55
+ # </mods:role>
56
+ # <mods:role>
57
+ # <mods:roleTerm>role3</mods:roleTerm>
58
+ # </mods:role>
59
+ # </mods:name>
60
+ # </mods>
61
+ def update_indexed_attributes(params = {}, _opts = {})
62
+ if self.class.terminology.nil?
63
+ raise "No terminology is set for this OmDatastream class. Cannot perform update_indexed_attributes"
64
+ end
65
+ # remove any fields from params that this datastream doesn't recognize
66
+ # make sure to make a copy of params so not to modify hash that might be passed to other methods
67
+ current_params = params.clone
68
+ current_params.delete_if do |term_pointer, new_values|
69
+ if term_pointer.is_a?(String)
70
+ ActiveFedora::Base.logger.warn "WARNING: #{self.class.name} ignoring {#{term_pointer.inspect} => #{new_values.inspect}} because #{term_pointer.inspect} is a String (only valid OM Term Pointers will be used). Make sure your html has the correct field_selector tags in it." if ActiveFedora::Base.logger
71
+ true
72
+ else
73
+ !self.class.terminology.has_term?(*OM.destringify(term_pointer))
74
+ end
75
+ end
76
+
77
+ result = {}
78
+ result = update_values(current_params) unless current_params.empty?
79
+
80
+ result
81
+ end
82
+
83
+ def get_values(field_key, _default = [])
84
+ term_values(*field_key)
85
+ end
86
+
87
+ def find_by_terms(*termpointer)
88
+ super
89
+ end
90
+
91
+ # Update values in the datastream's xml
92
+ # This wraps {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values} so that returns an error if we have loaded from solr since datastreams loaded that way should be read-only
93
+ #
94
+ # @example Updating multiple values with a Hash of Term pointers and values
95
+ # ds.update_values( {[{":person"=>"0"}, "role", "text"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, [{:person=>1}, :family_name]=>"Andronicus", [{"person"=>"1"},:given_name]=>["Titus"],[{:person=>1},:role,:text]=>["otherrole1","otherrole2"] } )
96
+ # => {"person_0_role_text"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, "person_1_role_text"=>{"0"=>"otherrole1", "1"=>"otherrole2"}}
97
+ def update_values(params = {})
98
+ raise "can't modify frozen #{self.class}" if frozen?
99
+ ng_xml_will_change!
100
+ result = om_update_values(params)
101
+ result
102
+ end
103
+
104
+ protected
105
+
106
+ # The string to prefix all solr fields with. Override this method if you want
107
+ # a prefix other than the default
108
+ def prefix(path)
109
+ path ? "#{path.underscore}__" : ''
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,158 @@
1
+ module ActiveFedora
2
+ # This class represents a Qualified Dublin Core Datastream. A special case of ActiveFedora::OmDatastream
3
+ # The implementation of this class defines the terms from the Qualified Dublin Core specification.
4
+ # This implementation features customized xml generators and deserialization routines to handle the
5
+ # Fedora Dublin Core XML datastreams structure.
6
+ #
7
+ # Fields can still be overridden if more specificity is desired (see ActiveFedora::File#fields method).
8
+ class QualifiedDublinCoreDatastream < OmDatastream
9
+ attr_accessor :fields
10
+ class_attribute :class_fields
11
+ self.class_fields = []
12
+
13
+ set_terminology do |t|
14
+ t.root(path: "dc", xmlns: "http://purl.org/dc/terms/")
15
+ end
16
+
17
+ define_template :creator do |xml, name|
18
+ xml.creator do
19
+ xml.text(name)
20
+ end
21
+ end
22
+
23
+ # A frozen array of Dublincore Terms.
24
+ DCTERMS = [
25
+ :abstract,
26
+ :accessRights,
27
+ :accrualMethod,
28
+ :accrualPeriodicity,
29
+ :accrualPolicy,
30
+ :alternative,
31
+ :audience,
32
+ :available,
33
+ :bibliographicCitation,
34
+ :conformsTo,
35
+ :contributor,
36
+ :coverage,
37
+ :created,
38
+ :creator,
39
+ :date,
40
+ :dateAccepted,
41
+ :dateCopyrighted,
42
+ :dateSubmitted,
43
+ :description,
44
+ :educationLevel,
45
+ :extent,
46
+ :hasFormat,
47
+ :hasPart,
48
+ :hasVersion,
49
+ :identifier,
50
+ :instructionalMethod,
51
+ :isFormatOf,
52
+ :isPartOf,
53
+ :isReferencedBy,
54
+ :isReplacedBy,
55
+ :isRequiredBy,
56
+ :isVersionOf,
57
+ :issued,
58
+ :language,
59
+ :license,
60
+ :mediator,
61
+ :medium,
62
+ :modified,
63
+ :provenance,
64
+ :publisher,
65
+ :references,
66
+ :relation,
67
+ :replaces,
68
+ :requires,
69
+ :rights,
70
+ :rightsHolder,
71
+ :source,
72
+ :spatial,
73
+ :subject,
74
+ :tableOfContents,
75
+ :temporal,
76
+ :title,
77
+ :type,
78
+ :valid
79
+ ].freeze # removed :format
80
+ DCTERMS.freeze
81
+
82
+ # Constructor. this class will call self.field for each DCTERM. In short, all DCTERMS fields will already exist
83
+ # when this method returns. Each term is marked as a multivalue string.
84
+ def initialize(string_or_url = nil)
85
+ super
86
+ self.fields = {}
87
+ DCTERMS.each do |el|
88
+ field el, :string, multiple: true
89
+ end
90
+ end
91
+
92
+ # This method generates the various accessor and mutator methods on self for the datastream metadata attributes.
93
+ # each field will have the 2 magic methods:
94
+ # name=(arg)
95
+ # name
96
+ #
97
+ #
98
+ # Calling any of the generated methods marks self as dirty.
99
+ #
100
+ # 'tupe' is a datatype, currently :string, :text and :date are supported.
101
+ #
102
+ # opts is an options hash, which will affect the generation of the xml representation of this datastream.
103
+ #
104
+ # Currently supported modifiers:
105
+ # For +QualifiedDublinCorDatastreams+:
106
+ # :element_attrs =>{:foo=>:bar} - hash of xml element attributes
107
+ # :xml_node => :nodename - The xml node to be used to represent this object (in dcterms namespace)
108
+ # :encoding=>foo, or encodings_scheme - causes an xsi:type attribute to be set to 'foo'
109
+ # :multiple=>true - mark this field as a multivalue field (on by default)
110
+ #
111
+ #
112
+ # There is quite a good example of this class in use in spec/examples/oral_history.rb
113
+ #
114
+ # !! Careful: If you declare two fields that correspond to the same xml node without any qualifiers to differentiate them,
115
+ # you will end up replicating the values in the underlying datastream, resulting in mysterious dubling, quadrupling, etc.
116
+ # whenever you edit the field's values.
117
+ def field(name, tupe = nil, opts = {})
118
+ @fields[name.to_s.to_sym] = { type: tupe, values: [] }.merge(opts)
119
+ # add term to template
120
+ self.class.class_fields << name.to_s
121
+ # add term to terminology
122
+ return if self.class.terminology.has_term?(name.to_sym)
123
+ om_term_opts = { xmlns: "http://purl.org/dc/terms/", namespace_prefix: "dcterms", path: opts[:path] }
124
+ term = OM::XML::Term.new(name.to_sym, om_term_opts, self.class.terminology)
125
+ self.class.terminology.add_term(term)
126
+ term.generate_xpath_queries!
127
+ end
128
+
129
+ def update_indexed_attributes(params = {}, opts = {})
130
+ # if the params are just keys, not an array, make then into an array.
131
+ new_params = {}
132
+ params.each do |key, val|
133
+ if key.is_a? Array
134
+ new_params[key] = val
135
+ else
136
+ new_params[[key.to_sym]] = val
137
+ end
138
+ end
139
+ super(new_params, opts)
140
+ end
141
+
142
+ def self.xml_template
143
+ Nokogiri::XML::Document.parse("<dc xmlns:dcterms='http://purl.org/dc/terms/' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'/>")
144
+ end
145
+
146
+ def to_solr(solr_doc = {}, _opts = {}) # :nodoc:
147
+ @fields.each do |field_key, field_info|
148
+ things = send(field_key)
149
+ next unless things
150
+ field_symbol = ActiveFedora.index_field_mapper.solr_name(field_key, type: field_info[:type])
151
+ things.val.each do |val|
152
+ ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_symbol, val)
153
+ end
154
+ end
155
+ solr_doc
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,49 @@
1
+ module ActiveFedora::RDF
2
+ module DatastreamIndexing
3
+ extend ActiveSupport::Concern
4
+
5
+ def to_solr(solr_doc = {}, opts = {}) # :nodoc:
6
+ super.tap do |new_doc|
7
+ solrize_rdf_assertions(opts[:name], new_doc)
8
+ end
9
+ end
10
+
11
+ module ClassMethods
12
+ def indexer
13
+ ActiveFedora::RDF::IndexingService
14
+ end
15
+
16
+ def index_config
17
+ @index_config ||= ActiveFedora::Indexing::Map.new
18
+ end
19
+ end
20
+
21
+ protected
22
+
23
+ def indexing_service
24
+ @indexing_service ||= self.class.indexer.new(self)
25
+ end
26
+
27
+ # Serialize the datastream's RDF relationships to solr
28
+ # @param [String] file_path used to prefix the keys in the solr document
29
+ # @param [Hash] solr_doc @default an empty Hash
30
+ def solrize_rdf_assertions(file_path, solr_doc = {})
31
+ solr_doc.merge! indexing_service.generate_solr_document(prefix_method(file_path))
32
+ end
33
+
34
+ # Returns a function that takes field name and returns a solr document key
35
+ def prefix_method(file_path)
36
+ ->(field_name) { apply_prefix(field_name, file_path) }
37
+ end
38
+
39
+ def apply_prefix(name, file_path)
40
+ prefix(file_path) + name.to_s
41
+ end
42
+
43
+ # The string to prefix all solr fields with. Override this method if you want
44
+ # a prefix other than the default
45
+ def prefix(path)
46
+ path ? "#{path.underscore}__" : ''
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,9 @@
1
+ require 'rdf/ntriples'
2
+
3
+ module ActiveFedora
4
+ class NtriplesRDFDatastream < RDFDatastream
5
+ def serialization_format
6
+ :ntriples
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,164 @@
1
+ module ActiveFedora
2
+ class RDFDatastream < File
3
+ include ActiveTriples::NestedAttributes
4
+ include RDF::DatastreamIndexing
5
+ include ActiveTriples::Properties
6
+ include ActiveTriples::Reflection
7
+
8
+ delegate :rdf_subject, :set_value, :get_values, :attributes=, to: :resource
9
+
10
+ class << self
11
+ def rdf_subject(&block)
12
+ return @subject_block = block if block_given?
13
+
14
+ @subject_block ||= ->(ds) { parent_uri(ds) }
15
+ end
16
+
17
+ # Trim the last segment off the URI to get the parents uri
18
+ def parent_uri(ds)
19
+ m = /^(.*)\/[^\/]*$/.match(ds.uri)
20
+ if m
21
+ m[1]
22
+ else
23
+ ::RDF::URI.new(nil)
24
+ end
25
+ end
26
+
27
+ ##
28
+ # @param [Class] klass an object to set as the resource class, Must be a descendant of
29
+ # ActiveTriples::Resource and include ActiveFedora::RDF::Persistence.
30
+ #
31
+ # @return [Class] the object resource class
32
+ def resource_class(klass = nil)
33
+ if klass
34
+ raise ArgumentError, "#{self} already has a resource_class #{@resource_class}, cannot redefine it to #{klass}" if @resource_class && klass != @resource_class
35
+ raise ArgumentError, "#{klass} must be a subclass of ActiveTriples::Resource" unless klass < ActiveTriples::Resource
36
+ end
37
+
38
+ @resource_class ||= begin
39
+ klass = Class.new(klass || ActiveTriples::Resource)
40
+ klass.send(:include, RDF::Persistence)
41
+ klass
42
+ end
43
+ end
44
+ end
45
+
46
+ before_save do
47
+ if content.blank?
48
+ ActiveFedora::Base.logger.warn "Cowardly refusing to save a datastream with empty content: #{inspect}" if ActiveFedora::Base.logger
49
+ if ActiveSupport.respond_to?(:halt_callback_chains_on_return_false)
50
+ # For Rails 5+
51
+ throw :abort
52
+ else
53
+ # For Rails <= 4
54
+ false
55
+ end
56
+ end
57
+ end
58
+
59
+ def parent_uri
60
+ self.class.parent_uri(self)
61
+ end
62
+
63
+ def metadata?
64
+ true
65
+ end
66
+
67
+ def content
68
+ serialize
69
+ end
70
+
71
+ def content=(new_content)
72
+ resource.clear!
73
+ resource << deserialize(new_content)
74
+ content
75
+ end
76
+
77
+ def uri=(uri)
78
+ super
79
+ resource.set_subject!(parent_uri) if empty_or_blank_subject?
80
+ end
81
+
82
+ def content_changed?
83
+ return false unless instance_variable_defined? :@resource
84
+ return true if empty_or_blank_subject? # can't be serialized because a subject hasn't been assigned yet.
85
+ @content = serialize
86
+ super
87
+ end
88
+
89
+ def empty_or_blank_subject?
90
+ resource.rdf_subject.node? || resource.rdf_subject.value.blank?
91
+ end
92
+
93
+ def freeze
94
+ @resource.freeze
95
+ end
96
+
97
+ ##
98
+ # The resource is the RdfResource object that stores the graph for
99
+ # the datastream and is the central point for its relationship to
100
+ # other nodes.
101
+ #
102
+ # set_value, get_value, and property accessors are delegated to this object.
103
+ def resource
104
+ @resource ||= begin
105
+ klass = self.class.resource_class
106
+ klass.properties.merge(self.class.properties).each do |_prop, config|
107
+ klass.property(config.term,
108
+ predicate: config.predicate,
109
+ class_name: config.class_name)
110
+ end
111
+ klass.accepts_nested_attributes_for(*nested_attributes_options.keys) unless nested_attributes_options.blank?
112
+ uri_stub = self.class.rdf_subject.call(self)
113
+
114
+ r = klass.new(uri_stub)
115
+ r.datastream = self
116
+ r << deserialize
117
+ r
118
+ end
119
+ end
120
+
121
+ alias graph resource
122
+
123
+ def refresh_attributes
124
+ @resource = nil
125
+ end
126
+
127
+ ##
128
+ # This method allows for delegation.
129
+ # This patches the fact that there's no consistent API for allowing delegation - we're matching the
130
+ # OmDatastream implementation as our "consistency" point.
131
+ # @TODO: We may need to enable deep RDF delegation at one point.
132
+ def term_values(*values)
133
+ send(values.first)
134
+ end
135
+
136
+ def update_indexed_attributes(hash)
137
+ hash.each do |fields, value|
138
+ fields.each do |field|
139
+ send("#{field}=", value)
140
+ end
141
+ end
142
+ end
143
+
144
+ def serialize
145
+ resource.set_subject!(parent_uri) if parent_uri && rdf_subject.node?
146
+ resource.dump serialization_format
147
+ end
148
+
149
+ def deserialize(data = nil)
150
+ return ::RDF::Graph.new if new_record? && data.nil?
151
+ data ||= remote_content
152
+
153
+ # Because datastream_content can return nil, we should check that here.
154
+ return ::RDF::Graph.new if data.nil?
155
+
156
+ data.force_encoding('utf-8')
157
+ ::RDF::Graph.new << ::RDF::Reader.for(serialization_format).new(data)
158
+ end
159
+
160
+ def serialization_format
161
+ raise "you must override the `serialization_format' method in a subclass"
162
+ end
163
+ end
164
+ end