active-fedora 6.0.0.pre10 → 6.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,6 +4,14 @@ Added support for RDF lists
4
4
  RDF does not index terms by default
5
5
  count is now a scoped query.
6
6
 
7
+ 5.6.1
8
+ Fixed delegating enumerable methods (join, [], each_with_index, to_a, etc) on an RdfNode::TermProxy
9
+
10
+ 5.6.0
11
+ Upgrade to rubydora 1.3.0, which prevents unnecessary loading of datastream content.
12
+ #35 fix passing :sort argument to ActiveFedora::Base#find
13
+ Better support for ruby 2.0 (delegating inspect within RdfNode::TermProxy)
14
+
7
15
  5.5.2
8
16
  #25 When multiple has_many relationships share a predicate, the solr_query should use class_name to distinguish the sets.
9
17
 
@@ -26,7 +26,7 @@ module ActiveFedora
26
26
  def metadata?
27
27
  false
28
28
  end
29
-
29
+
30
30
  def validate_content_present
31
31
  has_content?
32
32
  end
@@ -1,405 +1,10 @@
1
- require "nokogiri"
2
- require "om"
3
- require "solrizer/xml"
4
-
5
1
  #this class represents a xml metadata datastream
6
2
  module ActiveFedora
7
- class NokogiriDatastream < Datastream
8
-
9
- before_save do
10
- if content.blank?
11
- logger.warn "Cowardly refusing to save a datastream with empty content: #{self.inspect}"
12
- false
13
- end
14
- end
15
-
16
- include OM::XML::Document
17
- include OM::XML::TerminologyBasedSolrizer # this adds support for calling .to_solr
18
-
19
- alias_method(:om_term_values, :term_values) unless method_defined?(:om_term_values)
20
- alias_method(:om_update_values, :update_values) unless method_defined?(:om_update_values)
21
-
22
- attr_accessor :internal_solr_doc
23
-
24
- def self.default_attributes
25
- super.merge(:controlGroup => 'X', :mimeType => 'text/xml')
26
- end
27
-
28
- # Create an instance of this class based on xml content
29
- # @param [String, File, Nokogiri::XML::Node] xml the xml content to build from
30
- # @param [ActiveFedora::NokogiriDatastream] tmpl the Datastream object that you are building @default a new instance of this class
31
- # Careful! If you call this from a constructor, be sure to provide something 'ie. self' as the @tmpl. Otherwise, you will get an infinite loop!
32
- def self.from_xml(xml, tmpl=nil)
33
- tmpl = self.new if tmpl.nil? ## This path is used only for unit testing (e.g. MarpaDCDatastream.from_xml(fixture("data.xml")) )
34
-
35
- if !xml.present?
36
- tmpl.ng_xml = self.xml_template
37
- elsif xml.kind_of? Nokogiri::XML::Node || xml.kind_of?(Nokogiri::XML::Document)
38
- tmpl.ng_xml = xml
39
- else
40
- tmpl.ng_xml = Nokogiri::XML::Document.parse(xml)
41
- end
42
-
43
- tmpl.ng_xml_doesnt_change!
44
-
45
- return tmpl
46
- end
47
-
48
- def self.xml_template
49
- Nokogiri::XML::Document.parse("<xml/>")
50
- end
51
-
52
- def ng_xml
53
- @ng_xml ||= begin
54
- if new?
55
- ## Load up the template
56
- self.class.xml_template
57
- else
58
- Nokogiri::XML::Document.parse(datastream_content)
59
- end
60
- end
61
- end
62
-
63
- def ng_xml=(new_xml)
64
- case new_xml
65
- when Nokogiri::XML::Document
66
- self.content=new_xml.to_xml
67
- when Nokogiri::XML::Node
68
- ## Cast a fragment to a document
69
- self.content=new_xml.to_s
70
- when String
71
- self.content=new_xml
72
- else
73
- raise TypeError, "You passed a #{new_xml.class} into the ng_xml of the #{self.dsid} datastream. NokogiriDatastream.ng_xml= only accepts Nokogiri::XML::Document, Nokogiri::XML::Element, Nokogiri::XML::Node, or raw XML (String) as inputs."
74
- end
75
- end
76
-
77
- # don't want content eagerly loaded by proxy, so implementing methods that would be implemented by define_attribute_methods
78
- def ng_xml_will_change!
79
- changed_attributes['ng_xml'] = nil
80
- end
81
-
82
- def ng_xml_doesnt_change!
83
- changed_attributes.delete('ng_xml')
84
- end
85
-
86
- # don't want content eagerly loaded by proxy, so implementing methods that would be implemented by define_attribute_methods
87
- def ng_xml_changed?
88
- changed_attributes.has_key? 'ng_xml'
89
- end
90
-
91
- # Indicates that this datastream has metadata content.
92
- # @return true
93
- def metadata?
94
- true
95
- end
96
-
97
- def content
98
- to_xml
99
- end
100
-
101
- def datastream_content
102
- @datastream_content ||= Nokogiri::XML(super).to_xml {|config| config.no_declaration}.strip
103
- end
104
-
105
- def content=(content)
106
- @ng_xml = Nokogiri::XML::Document.parse(content)
107
- super(@ng_xml.to_s)
108
- end
109
-
110
- def content_changed?
111
- return false if new? and !xml_loaded
3
+ class NokogiriDatastream < OmDatastream
4
+ def initialize(digital_object=nil, dsid=nil, options={})
5
+ Deprecation.warn("NokogiriDatastream is deprecated and will be removed in active-fedora 7.0, use OmDatastream instead", caller)
112
6
  super
113
7
  end
114
-
115
- def to_xml(xml = nil)
116
- xml = self.ng_xml if xml.nil?
117
- ng_xml = self.ng_xml
118
- if ng_xml.respond_to?(:root) && ng_xml.root.nil? && self.class.respond_to?(:root_property_ref) && !self.class.root_property_ref.nil?
119
- ng_xml = self.class.generate(self.class.root_property_ref, "")
120
- if xml.root.nil?
121
- xml = ng_xml
122
- end
123
- end
124
-
125
- unless xml == ng_xml || ng_xml.root.nil?
126
- if xml.kind_of?(Nokogiri::XML::Document)
127
- xml.root.add_child(ng_xml.root)
128
- elsif xml.kind_of?(Nokogiri::XML::Node)
129
- xml.add_child(ng_xml.root)
130
- else
131
- raise "You can only pass instances of Nokogiri::XML::Node into this method. You passed in #{xml}"
132
- end
133
- end
134
-
135
- return xml.to_xml {|config| config.no_declaration}.strip
136
- end
137
-
138
- # ** Experimental **
139
- #
140
- # This method is called by ActiveFedora::Base.load_instance_from_solr
141
- # in order to initialize a nokogiri datastreams values from a solr document.
142
- # This method merely sets the internal_solr_doc to the document passed in.
143
- # Then any calls to get_values get values from the solr document on demand
144
- # instead of directly from the xml stored in Fedora. This should be used
145
- # for read-only purposes only, and instances where you want to improve performance by
146
- # getting data from solr instead of Fedora.
147
- #
148
- # See ActiveFedora::Base.load_instance_from_solr and +get_values_from_solr+ for more information.
149
- def from_solr(solr_doc)
150
- #just initialize internal_solr_doc since any value retrieval will be done via lazy loading on this doc on-demand
151
- @internal_solr_doc = solr_doc
152
- end
153
-
154
-
155
- # ** Experimental **
156
- # This method is called by +get_values+ if this datastream has been initialized by calling from_solr method via
157
- # ActiveFedora::Base.load_instance_from_solr. This method retrieves values from a preinitialized @internal_solr_doc instead of xml.
158
- # This makes the datastream read-only and this method is not intended to be used in any other case.
159
- #
160
- # Values are retrieved from the @internal_solr_doc on-demand instead of via xml preloaded into memory.
161
- # A term_pointer is passed in and if it contains hierarchical indexes it will detect which solr field values need to be returned.
162
- #
163
- # ====Example 1 (non-hierarchical term_pointer):
164
- #
165
- # term_pointer = [:image, :title_set, :title]
166
- #
167
- # Returns value of "image_title_set_title_t" in @internal_solr_doc
168
- #
169
- # ====Example 2 (hierarchical term_pointer that contains one or more indexes):
170
- # term_pointer = [:image, {:title_set=>1}, :title]
171
- #
172
- # relevant xml:
173
- # <image>
174
- # <title_set>
175
- # <title>Title 1</title>
176
- # </title_set>
177
- # </image>
178
- # <image>
179
- # <title_set>
180
- # <title>Title 2</title>
181
- # </title_set>
182
- # <title_set>
183
- # <title>Title 3</title>
184
- # </title_set>
185
- # </image>
186
- #
187
- # Repeating element nodes are indexed and will be stored in solr as follows:
188
- # image_0_title_set_0_title_t = "Title 1"
189
- # image_1_title_set_0_title_t = "Title 2"
190
- # image_1_title_set_1_title_t = "Title 3"
191
- #
192
- # Even though no image element index is specified, only the second image element has two title_set elements so the expected return value is
193
- # ["Title 3"]
194
- #
195
- # While loading from solr the xml hierarchy is not immediately apparent so we must detect first how many image elements with a title_set element exist
196
- # and then check which of those elements have a second title element.
197
- #
198
- # As this nokogiri datastream is indexed in solr, a value at each level in the tree will be stored independently and therefore
199
- # if 'image_0_title_set_0_title_t' exists in solr 'image_0_title_set_t' will also exist in solr.
200
- # So, we will build up the relevant solr names incrementally for a given term_pointer. The last element in the
201
- # solr_name will not contain an index.
202
- #
203
- # It then will do the following:
204
- # Because no index is supplied for :image it will detect which indexes exist in solr
205
- # image_0_title_set_t (found key and add 'image_0_title_set' to base solr_name list)
206
- # image_1_title_set_t (found key and add 'image_0_title_set' to base solr_name list)
207
- # image_2_title_set_t (not found and stop checking indexes for image)
208
- # After iteration 1:
209
- # bases = ["image_0_title_set","image_1_title_set"]
210
- #
211
- # Two image nodes were found and next sees index of 1 supplied for title_set so just uses index of 1 building off bases found in previous iteration
212
- # image_0_title_set_1_title_t (not found remove 'image_0_title_set' from base solr_name list)
213
- # image_1_title_set_1_title_t (found and replace 'image_1_title_set' with new base 'image_1_title_set_1_title')
214
- #
215
- # After iteration 2:
216
- # bases = ["image_1_title_set_1_title"]
217
- # It always looks ahead one element so we check if any elements are after title. There are not any other elements so we are done iterating.
218
- # returns @internal_solr_doc["image_1_title_set_1_title_t"]
219
- # @param [Array] term_pointer Term pointer similar to an xpath ie. [:image, :title_set, :title]
220
- # @return [Array] If no values are found an empty Array is returned.
221
- def get_values_from_solr(*term_pointer)
222
- values = []
223
- solr_doc = @internal_solr_doc
224
- return values if solr_doc.nil?
225
- term = self.class.terminology.retrieve_term(*OM.pointers_to_flat_array(term_pointer, false))
226
- #check if hierarchical term pointer
227
- if is_hierarchical_term_pointer?(*term_pointer)
228
- # if we are hierarchical need to detect all possible node values that exist
229
- # we do this by building up the possible solr names parent by parent and/or child by child
230
- # if an index is supplied for any node in the pointer it will be used
231
- # otherwise it will include all nodes and indexes that exist in solr
232
- bases = []
233
- #add first item in term_pointer as start of bases
234
- # then iterate through possible nodes that might exist
235
- term_pointer.first.kind_of?(Hash) ? bases << term_pointer.first.keys.first : bases << term_pointer.first
236
- for i in 1..(term_pointer.length-1)
237
- #iterate in reverse so that we can modify the bases array while iterating
238
- (bases.length-1).downto(0) do |j|
239
- current_last = (term_pointer[i].kind_of?(Hash) ? term_pointer[i].keys.first : term_pointer[i])
240
- if (term_pointer[i-1].kind_of?(Hash))
241
- #just use index supplied instead of trying possibilities
242
- index = term_pointer[i-1].values.first
243
- solr_name_base = OM::XML::Terminology.term_hierarchical_name({bases[j]=>index},current_last)
244
- solr_name = generate_solr_symbol(solr_name_base, term.type)
245
- bases.delete_at(j)
246
- #insert the new solr name base if found
247
- bases.insert(j,solr_name_base) if has_solr_name?(solr_name,solr_doc)
248
- else
249
- #detect how many nodes exist
250
- index = 0
251
- current_base = bases[j]
252
- bases.delete_at(j)
253
- solr_name_base = OM::XML::Terminology.term_hierarchical_name({current_base=>index},current_last)
254
- solr_name = generate_solr_symbol(solr_name_base, term.type)
255
- #check for indexes that exist until we find all nodes
256
- while has_solr_name?(solr_name,solr_doc) do
257
- #only reinsert if it exists
258
- bases.insert(j,solr_name_base)
259
- index = index + 1
260
- solr_name_base = OM::XML::Terminology.term_hierarchical_name({current_base=>index},current_last)
261
- solr_name = generate_solr_symbol(solr_name_base, term.type)
262
- end
263
- end
264
- end
265
- end
266
-
267
- #all existing applicable solr_names have been found and we can now grab all values and build up our value array
268
- bases.each do |base|
269
- field_name = generate_solr_symbol(base.to_sym, term.type)
270
- value = (solr_doc[field_name].nil? ? solr_doc[field_name.to_s]: solr_doc[field_name])
271
- unless value.nil?
272
- value.is_a?(Array) ? values.concat(value) : values << value
273
- end
274
- end
275
- else
276
- #this is not hierarchical and we can simply look for the solr name created using the terms without any indexes
277
- generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
278
- generic_field_name = generate_solr_symbol(generic_field_name_base, term.type)
279
- value = (solr_doc[generic_field_name].nil? ? solr_doc[generic_field_name.to_s]: solr_doc[generic_field_name])
280
- unless value.nil?
281
- value.is_a?(Array) ? values.concat(value) : values << value
282
- end
283
- end
284
- values
285
- end
286
-
287
- def generate_solr_symbol(base, data_type)
288
- ActiveFedora::SolrService.solr_name(base.to_sym, type: data_type)
289
- end
290
-
291
- # ** Experimental **
292
- #@return [Boolean] true if either the key for name exists in solr or if its string value exists
293
- #@param [String] name Name of key to look for
294
- #@param [Solr::Document] solr_doc Solr doc to query
295
- def has_solr_name?(name, solr_doc=Hash.new)
296
- !solr_doc[name].nil? || !solr_doc[name.to_s].nil?
297
- end
298
-
299
- # ** Experimental **
300
- #@return true if the term_pointer contains an index
301
- # ====Example:
302
- # [:image, {:title_set=>1}, :title] return true
303
- # [:image, :title_set, :title] return false
304
- def is_hierarchical_term_pointer?(*term_pointer)
305
- if term_pointer.length>1
306
- term_pointer.each do |pointer|
307
- if pointer.kind_of?(Hash)
308
- return true
309
- end
310
- end
311
- end
312
- return false
313
- end
314
-
315
- # Update field values within the current datastream using {#update_values}, which is a wrapper for {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values}
316
- # Ignores any fields from params that this datastream's Terminology doesn't recognize
317
- #
318
- # @param [Hash] params The params specifying which fields to update and their new values. The syntax of the params Hash is the same as that expected by
319
- # term_pointers must be a valid OM Term pointers (ie. [:name]). Strings will be ignored.
320
- # @param [Hash] opts This is not currently used by the datastream-level update_indexed_attributes method
321
- #
322
- # Example:
323
- # @mods_ds.update_indexed_attributes( {[{":person"=>"0"}, "role"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"} })
324
- # => {"person_0_role"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}}
325
- #
326
- # @mods_ds.to_xml # (the following is an approximation)
327
- # <mods>
328
- # <mods:name type="person">
329
- # <mods:role>
330
- # <mods:roleTerm>role1</mods:roleTerm>
331
- # </mods:role>
332
- # <mods:role>
333
- # <mods:roleTerm>role2</mods:roleTerm>
334
- # </mods:role>
335
- # <mods:role>
336
- # <mods:roleTerm>role3</mods:roleTerm>
337
- # </mods:role>
338
- # </mods:name>
339
- # </mods>
340
- def update_indexed_attributes(params={}, opts={})
341
- if self.class.terminology.nil?
342
- raise "No terminology is set for this NokogiriDatastream class. Cannot perform update_indexed_attributes"
343
- end
344
- # remove any fields from params that this datastream doesn't recognize
345
- # make sure to make a copy of params so not to modify hash that might be passed to other methods
346
- current_params = params.clone
347
- current_params.delete_if do |term_pointer,new_values|
348
- if term_pointer.kind_of?(String)
349
- logger.warn "WARNING: #{dsid} ignoring {#{term_pointer.inspect} => #{new_values.inspect}} because #{term_pointer.inspect} is a String (only valid OM Term Pointers will be used). Make sure your html has the correct field_selector tags in it."
350
- true
351
- else
352
- !self.class.terminology.has_term?(*OM.destringify(term_pointer))
353
- end
354
- end
355
-
356
- result = {}
357
- unless current_params.empty?
358
- result = update_values( current_params )
359
- end
360
-
361
- return result
362
- end
363
-
364
- def get_values(field_key,default=[])
365
- term_values(*field_key)
366
- end
367
-
368
-
369
- def find_by_terms(*termpointer)
370
- super
371
- end
372
-
373
- # Update values in the datastream's xml
374
- # This wraps {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values} so that returns an error if we have loaded from solr since datastreams loaded that way should be read-only
375
- #
376
- # @example Updating multiple values with a Hash of Term pointers and values
377
- # ds.update_values( {[{":person"=>"0"}, "role", "text"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, [{:person=>1}, :family_name]=>"Andronicus", [{"person"=>"1"},:given_name]=>["Titus"],[{:person=>1},:role,:text]=>["otherrole1","otherrole2"] } )
378
- # => {"person_0_role_text"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, "person_1_role_text"=>{"0"=>"otherrole1", "1"=>"otherrole2"}}
379
- def update_values(params={})
380
- if @internal_solr_doc
381
- raise "No update performed, this object was initialized via Solr instead of Fedora and is therefore read-only. Please utilize ActiveFedora::Base.find to first load object via Fedora instead."
382
- else
383
- ng_xml_will_change!
384
- result = om_update_values(params)
385
- return result
386
- end
387
- end
388
-
389
- #override OM::XML::term_values so can lazy load from solr if this datastream initialized using +from_solr+
390
- def term_values(*term_pointer)
391
- if @internal_solr_doc
392
- #lazy load values from solr on demand
393
- get_values_from_solr(*term_pointer)
394
- else
395
- om_term_values(*term_pointer)
396
- end
397
- end
398
-
399
- def xml_loaded
400
- instance_variable_defined? :@ng_xml
401
- end
402
-
403
8
  end
404
9
  end
405
10
 
@@ -1,7 +1,402 @@
1
1
  require "om"
2
2
 
3
3
  module ActiveFedora
4
- class OmDatastream < NokogiriDatastream
4
+ class OmDatastream < Datastream
5
5
  # right now, OmDatastream is just an alias for NokogiriDatastream
6
+
7
+ before_save do
8
+ if content.blank?
9
+ logger.warn "Cowardly refusing to save a datastream with empty content: #{self.inspect}"
10
+ false
11
+ end
12
+ end
13
+
14
+ include OM::XML::Document
15
+ include OM::XML::TerminologyBasedSolrizer # this adds support for calling .to_solr
16
+
17
+ alias_method(:om_term_values, :term_values) unless method_defined?(:om_term_values)
18
+ alias_method(:om_update_values, :update_values) unless method_defined?(:om_update_values)
19
+
20
+ attr_accessor :internal_solr_doc
21
+
22
+ def self.default_attributes
23
+ super.merge(:controlGroup => 'X', :mimeType => 'text/xml')
24
+ end
25
+
26
+ # Create an instance of this class based on xml content
27
+ # @param [String, File, Nokogiri::XML::Node] xml the xml content to build from
28
+ # @param [ActiveFedora::NokogiriDatastream] tmpl the Datastream object that you are building @default a new instance of this class
29
+ # Careful! If you call this from a constructor, be sure to provide something 'ie. self' as the @tmpl. Otherwise, you will get an infinite loop!
30
+ def self.from_xml(xml, tmpl=nil)
31
+ tmpl = self.new if tmpl.nil? ## This path is used only for unit testing (e.g. MarpaDCDatastream.from_xml(fixture("data.xml")) )
32
+
33
+ if !xml.present?
34
+ tmpl.ng_xml = self.xml_template
35
+ elsif xml.kind_of? Nokogiri::XML::Node || xml.kind_of?(Nokogiri::XML::Document)
36
+ tmpl.ng_xml = xml
37
+ else
38
+ tmpl.ng_xml = Nokogiri::XML::Document.parse(xml)
39
+ end
40
+
41
+ tmpl.ng_xml_doesnt_change!
42
+
43
+ return tmpl
44
+ end
45
+
46
+ def self.xml_template
47
+ Nokogiri::XML::Document.parse("<xml/>")
48
+ end
49
+
50
+ def ng_xml
51
+ @ng_xml ||= begin
52
+ if new?
53
+ ## Load up the template
54
+ self.class.xml_template
55
+ else
56
+ Nokogiri::XML::Document.parse(datastream_content)
57
+ end
58
+ end
59
+ end
60
+
61
+ def ng_xml=(new_xml)
62
+ case new_xml
63
+ when Nokogiri::XML::Document
64
+ self.content=new_xml.to_xml
65
+ when Nokogiri::XML::Node
66
+ ## Cast a fragment to a document
67
+ self.content=new_xml.to_s
68
+ when String
69
+ self.content=new_xml
70
+ else
71
+ raise TypeError, "You passed a #{new_xml.class} into the ng_xml of the #{self.dsid} datastream. NokogiriDatastream.ng_xml= only accepts Nokogiri::XML::Document, Nokogiri::XML::Element, Nokogiri::XML::Node, or raw XML (String) as inputs."
72
+ end
73
+ end
74
+
75
+ # don't want content eagerly loaded by proxy, so implementing methods that would be implemented by define_attribute_methods
76
+ def ng_xml_will_change!
77
+ changed_attributes['ng_xml'] = nil
78
+ end
79
+
80
+ def ng_xml_doesnt_change!
81
+ changed_attributes.delete('ng_xml')
82
+ end
83
+
84
+ # don't want content eagerly loaded by proxy, so implementing methods that would be implemented by define_attribute_methods
85
+ def ng_xml_changed?
86
+ changed_attributes.has_key? 'ng_xml'
87
+ end
88
+
89
+ # Indicates that this datastream has metadata content.
90
+ # @return true
91
+ def metadata?
92
+ true
93
+ end
94
+
95
+ def content
96
+ to_xml
97
+ end
98
+
99
+ def datastream_content
100
+ @datastream_content ||= Nokogiri::XML(super).to_xml {|config| config.no_declaration}.strip
101
+ end
102
+
103
+ def content=(content)
104
+ @ng_xml = Nokogiri::XML::Document.parse(content)
105
+ super(@ng_xml.to_s)
106
+ end
107
+
108
+ def content_changed?
109
+ return false if new? and !xml_loaded
110
+ super
111
+ end
112
+
113
+ def to_xml(xml = nil)
114
+ xml = self.ng_xml if xml.nil?
115
+ ng_xml = self.ng_xml
116
+ if ng_xml.respond_to?(:root) && ng_xml.root.nil? && self.class.respond_to?(:root_property_ref) && !self.class.root_property_ref.nil?
117
+ ng_xml = self.class.generate(self.class.root_property_ref, "")
118
+ if xml.root.nil?
119
+ xml = ng_xml
120
+ end
121
+ end
122
+
123
+ unless xml == ng_xml || ng_xml.root.nil?
124
+ if xml.kind_of?(Nokogiri::XML::Document)
125
+ xml.root.add_child(ng_xml.root)
126
+ elsif xml.kind_of?(Nokogiri::XML::Node)
127
+ xml.add_child(ng_xml.root)
128
+ else
129
+ raise "You can only pass instances of Nokogiri::XML::Node into this method. You passed in #{xml}"
130
+ end
131
+ end
132
+
133
+ return xml.to_xml {|config| config.no_declaration}.strip
134
+ end
135
+
136
+ # ** Experimental **
137
+ #
138
+ # This method is called by ActiveFedora::Base.load_instance_from_solr
139
+ # in order to initialize a nokogiri datastreams values from a solr document.
140
+ # This method merely sets the internal_solr_doc to the document passed in.
141
+ # Then any calls to get_values get values from the solr document on demand
142
+ # instead of directly from the xml stored in Fedora. This should be used
143
+ # for read-only purposes only, and instances where you want to improve performance by
144
+ # getting data from solr instead of Fedora.
145
+ #
146
+ # See ActiveFedora::Base.load_instance_from_solr and +get_values_from_solr+ for more information.
147
+ def from_solr(solr_doc)
148
+ #just initialize internal_solr_doc since any value retrieval will be done via lazy loading on this doc on-demand
149
+ @internal_solr_doc = solr_doc
150
+ end
151
+
152
+
153
+ # ** Experimental **
154
+ # This method is called by +get_values+ if this datastream has been initialized by calling from_solr method via
155
+ # ActiveFedora::Base.load_instance_from_solr. This method retrieves values from a preinitialized @internal_solr_doc instead of xml.
156
+ # This makes the datastream read-only and this method is not intended to be used in any other case.
157
+ #
158
+ # Values are retrieved from the @internal_solr_doc on-demand instead of via xml preloaded into memory.
159
+ # A term_pointer is passed in and if it contains hierarchical indexes it will detect which solr field values need to be returned.
160
+ #
161
+ # ====Example 1 (non-hierarchical term_pointer):
162
+ #
163
+ # term_pointer = [:image, :title_set, :title]
164
+ #
165
+ # Returns value of "image_title_set_title_t" in @internal_solr_doc
166
+ #
167
+ # ====Example 2 (hierarchical term_pointer that contains one or more indexes):
168
+ # term_pointer = [:image, {:title_set=>1}, :title]
169
+ #
170
+ # relevant xml:
171
+ # <image>
172
+ # <title_set>
173
+ # <title>Title 1</title>
174
+ # </title_set>
175
+ # </image>
176
+ # <image>
177
+ # <title_set>
178
+ # <title>Title 2</title>
179
+ # </title_set>
180
+ # <title_set>
181
+ # <title>Title 3</title>
182
+ # </title_set>
183
+ # </image>
184
+ #
185
+ # Repeating element nodes are indexed and will be stored in solr as follows:
186
+ # image_0_title_set_0_title_t = "Title 1"
187
+ # image_1_title_set_0_title_t = "Title 2"
188
+ # image_1_title_set_1_title_t = "Title 3"
189
+ #
190
+ # Even though no image element index is specified, only the second image element has two title_set elements so the expected return value is
191
+ # ["Title 3"]
192
+ #
193
+ # While loading from solr the xml hierarchy is not immediately apparent so we must detect first how many image elements with a title_set element exist
194
+ # and then check which of those elements have a second title element.
195
+ #
196
+ # As this nokogiri datastream is indexed in solr, a value at each level in the tree will be stored independently and therefore
197
+ # if 'image_0_title_set_0_title_t' exists in solr 'image_0_title_set_t' will also exist in solr.
198
+ # So, we will build up the relevant solr names incrementally for a given term_pointer. The last element in the
199
+ # solr_name will not contain an index.
200
+ #
201
+ # It then will do the following:
202
+ # Because no index is supplied for :image it will detect which indexes exist in solr
203
+ # image_0_title_set_t (found key and add 'image_0_title_set' to base solr_name list)
204
+ # image_1_title_set_t (found key and add 'image_0_title_set' to base solr_name list)
205
+ # image_2_title_set_t (not found and stop checking indexes for image)
206
+ # After iteration 1:
207
+ # bases = ["image_0_title_set","image_1_title_set"]
208
+ #
209
+ # Two image nodes were found and next sees index of 1 supplied for title_set so just uses index of 1 building off bases found in previous iteration
210
+ # image_0_title_set_1_title_t (not found remove 'image_0_title_set' from base solr_name list)
211
+ # image_1_title_set_1_title_t (found and replace 'image_1_title_set' with new base 'image_1_title_set_1_title')
212
+ #
213
+ # After iteration 2:
214
+ # bases = ["image_1_title_set_1_title"]
215
+ # It always looks ahead one element so we check if any elements are after title. There are not any other elements so we are done iterating.
216
+ # returns @internal_solr_doc["image_1_title_set_1_title_t"]
217
+ # @param [Array] term_pointer Term pointer similar to an xpath ie. [:image, :title_set, :title]
218
+ # @return [Array] If no values are found an empty Array is returned.
219
+ def get_values_from_solr(*term_pointer)
220
+ values = []
221
+ solr_doc = @internal_solr_doc
222
+ return values if solr_doc.nil?
223
+ term = self.class.terminology.retrieve_term(*OM.pointers_to_flat_array(term_pointer, false))
224
+ #check if hierarchical term pointer
225
+ if is_hierarchical_term_pointer?(*term_pointer)
226
+ # if we are hierarchical need to detect all possible node values that exist
227
+ # we do this by building up the possible solr names parent by parent and/or child by child
228
+ # if an index is supplied for any node in the pointer it will be used
229
+ # otherwise it will include all nodes and indexes that exist in solr
230
+ bases = []
231
+ #add first item in term_pointer as start of bases
232
+ # then iterate through possible nodes that might exist
233
+ term_pointer.first.kind_of?(Hash) ? bases << term_pointer.first.keys.first : bases << term_pointer.first
234
+ for i in 1..(term_pointer.length-1)
235
+ #iterate in reverse so that we can modify the bases array while iterating
236
+ (bases.length-1).downto(0) do |j|
237
+ current_last = (term_pointer[i].kind_of?(Hash) ? term_pointer[i].keys.first : term_pointer[i])
238
+ if (term_pointer[i-1].kind_of?(Hash))
239
+ #just use index supplied instead of trying possibilities
240
+ index = term_pointer[i-1].values.first
241
+ solr_name_base = OM::XML::Terminology.term_hierarchical_name({bases[j]=>index},current_last)
242
+ solr_name = generate_solr_symbol(solr_name_base, term.type)
243
+ bases.delete_at(j)
244
+ #insert the new solr name base if found
245
+ bases.insert(j,solr_name_base) if has_solr_name?(solr_name,solr_doc)
246
+ else
247
+ #detect how many nodes exist
248
+ index = 0
249
+ current_base = bases[j]
250
+ bases.delete_at(j)
251
+ solr_name_base = OM::XML::Terminology.term_hierarchical_name({current_base=>index},current_last)
252
+ solr_name = generate_solr_symbol(solr_name_base, term.type)
253
+ #check for indexes that exist until we find all nodes
254
+ while has_solr_name?(solr_name,solr_doc) do
255
+ #only reinsert if it exists
256
+ bases.insert(j,solr_name_base)
257
+ index = index + 1
258
+ solr_name_base = OM::XML::Terminology.term_hierarchical_name({current_base=>index},current_last)
259
+ solr_name = generate_solr_symbol(solr_name_base, term.type)
260
+ end
261
+ end
262
+ end
263
+ end
264
+
265
+ #all existing applicable solr_names have been found and we can now grab all values and build up our value array
266
+ bases.each do |base|
267
+ field_name = generate_solr_symbol(base.to_sym, term.type)
268
+ value = (solr_doc[field_name].nil? ? solr_doc[field_name.to_s]: solr_doc[field_name])
269
+ unless value.nil?
270
+ value.is_a?(Array) ? values.concat(value) : values << value
271
+ end
272
+ end
273
+ else
274
+ #this is not hierarchical and we can simply look for the solr name created using the terms without any indexes
275
+ generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
276
+ generic_field_name = generate_solr_symbol(generic_field_name_base, term.type)
277
+ value = (solr_doc[generic_field_name].nil? ? solr_doc[generic_field_name.to_s]: solr_doc[generic_field_name])
278
+ unless value.nil?
279
+ value.is_a?(Array) ? values.concat(value) : values << value
280
+ end
281
+ end
282
+ values
283
+ end
284
+
285
+ def generate_solr_symbol(base, data_type)
286
+ ActiveFedora::SolrService.solr_name(base.to_sym, type: data_type)
287
+ end
288
+
289
+ # ** Experimental **
290
+ #@return [Boolean] true if either the key for name exists in solr or if its string value exists
291
+ #@param [String] name Name of key to look for
292
+ #@param [Solr::Document] solr_doc Solr doc to query
293
+ def has_solr_name?(name, solr_doc=Hash.new)
294
+ !solr_doc[name].nil? || !solr_doc[name.to_s].nil?
295
+ end
296
+
297
+ # ** Experimental **
298
+ #@return true if the term_pointer contains an index
299
+ # ====Example:
300
+ # [:image, {:title_set=>1}, :title] return true
301
+ # [:image, :title_set, :title] return false
302
+ def is_hierarchical_term_pointer?(*term_pointer)
303
+ if term_pointer.length>1
304
+ term_pointer.each do |pointer|
305
+ if pointer.kind_of?(Hash)
306
+ return true
307
+ end
308
+ end
309
+ end
310
+ return false
311
+ end
312
+
313
+ # Update field values within the current datastream using {#update_values}, which is a wrapper for {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values}
314
+ # Ignores any fields from params that this datastream's Terminology doesn't recognize
315
+ #
316
+ # @param [Hash] params The params specifying which fields to update and their new values. The syntax of the params Hash is the same as that expected by
317
+ # term_pointers must be a valid OM Term pointers (ie. [:name]). Strings will be ignored.
318
+ # @param [Hash] opts This is not currently used by the datastream-level update_indexed_attributes method
319
+ #
320
+ # Example:
321
+ # @mods_ds.update_indexed_attributes( {[{":person"=>"0"}, "role"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"} })
322
+ # => {"person_0_role"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}}
323
+ #
324
+ # @mods_ds.to_xml # (the following is an approximation)
325
+ # <mods>
326
+ # <mods:name type="person">
327
+ # <mods:role>
328
+ # <mods:roleTerm>role1</mods:roleTerm>
329
+ # </mods:role>
330
+ # <mods:role>
331
+ # <mods:roleTerm>role2</mods:roleTerm>
332
+ # </mods:role>
333
+ # <mods:role>
334
+ # <mods:roleTerm>role3</mods:roleTerm>
335
+ # </mods:role>
336
+ # </mods:name>
337
+ # </mods>
338
+ def update_indexed_attributes(params={}, opts={})
339
+ if self.class.terminology.nil?
340
+ raise "No terminology is set for this NokogiriDatastream class. Cannot perform update_indexed_attributes"
341
+ end
342
+ # remove any fields from params that this datastream doesn't recognize
343
+ # make sure to make a copy of params so not to modify hash that might be passed to other methods
344
+ current_params = params.clone
345
+ current_params.delete_if do |term_pointer,new_values|
346
+ if term_pointer.kind_of?(String)
347
+ logger.warn "WARNING: #{dsid} ignoring {#{term_pointer.inspect} => #{new_values.inspect}} because #{term_pointer.inspect} is a String (only valid OM Term Pointers will be used). Make sure your html has the correct field_selector tags in it."
348
+ true
349
+ else
350
+ !self.class.terminology.has_term?(*OM.destringify(term_pointer))
351
+ end
352
+ end
353
+
354
+ result = {}
355
+ unless current_params.empty?
356
+ result = update_values( current_params )
357
+ end
358
+
359
+ return result
360
+ end
361
+
362
+ def get_values(field_key,default=[])
363
+ term_values(*field_key)
364
+ end
365
+
366
+
367
+ def find_by_terms(*termpointer)
368
+ super
369
+ end
370
+
371
+ # Update values in the datastream's xml
372
+ # This wraps {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values} so that returns an error if we have loaded from solr since datastreams loaded that way should be read-only
373
+ #
374
+ # @example Updating multiple values with a Hash of Term pointers and values
375
+ # ds.update_values( {[{":person"=>"0"}, "role", "text"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, [{:person=>1}, :family_name]=>"Andronicus", [{"person"=>"1"},:given_name]=>["Titus"],[{:person=>1},:role,:text]=>["otherrole1","otherrole2"] } )
376
+ # => {"person_0_role_text"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, "person_1_role_text"=>{"0"=>"otherrole1", "1"=>"otherrole2"}}
377
+ def update_values(params={})
378
+ if @internal_solr_doc
379
+ raise "No update performed, this object was initialized via Solr instead of Fedora and is therefore read-only. Please utilize ActiveFedora::Base.find to first load object via Fedora instead."
380
+ else
381
+ ng_xml_will_change!
382
+ result = om_update_values(params)
383
+ return result
384
+ end
385
+ end
386
+
387
+ #override OM::XML::term_values so can lazy load from solr if this datastream initialized using +from_solr+
388
+ def term_values(*term_pointer)
389
+ if @internal_solr_doc
390
+ #lazy load values from solr on demand
391
+ get_values_from_solr(*term_pointer)
392
+ else
393
+ om_term_values(*term_pointer)
394
+ end
395
+ end
396
+
397
+ def xml_loaded
398
+ instance_variable_defined? :@ng_xml
399
+ end
400
+
6
401
  end
7
- end
402
+ end
@@ -4,9 +4,9 @@ module ActiveFedora
4
4
 
5
5
  attr_reader :graph, :subject, :predicate, :options
6
6
 
7
- delegate :class, :to_s, :==, :kind_of?, :each, :map, :empty?, :as_json,
8
- :is_a?, :to_ary, :inspect, :first, :last, :include?, :count,
9
- :size, :join, :[], :to => :values
7
+ delegate :class, :to_s, :==, :kind_of?, :each, :each_with_index, :map,
8
+ :empty?, :as_json, :is_a?, :to_ary, :to_a, :inspect, :first,
9
+ :last, :include?, :count, :size, :join, :[], :to => :values
10
10
 
11
11
  # @param graph RDF::Graph
12
12
  # @param subject RDF::URI
@@ -1,3 +1,3 @@
1
1
  module ActiveFedora
2
- VERSION = "6.0.0.pre10"
2
+ VERSION = "6.0.0.rc1"
3
3
  end
@@ -42,6 +42,7 @@ describe ActiveFedora::Datastream do
42
42
  @test_datastream.validate_content_present.should be_false
43
43
  @test_datastream.content = "<foo><xmlelement/></foo>"
44
44
  @test_datastream.validate_content_present.should be_true
45
+ @test_datastream.should_not be_external
45
46
  end
46
47
 
47
48
  it "should expect a dsLocation on an External (E) datastream" do
@@ -50,6 +51,7 @@ describe ActiveFedora::Datastream do
50
51
  @test_datastream.validate_content_present.should be_false
51
52
  @test_datastream.dsLocation = "http://example.com/test/content/abcd"
52
53
  @test_datastream.validate_content_present.should be_true
54
+ @test_datastream.should be_external
53
55
  end
54
56
 
55
57
  it "should expect a dsLocation on a Redirect (R) datastream" do
@@ -45,6 +45,10 @@ describe ActiveFedora::NtriplesRDFDatastream do
45
45
  @subject.title.count.should == 1
46
46
  @subject.title.size.should == 1
47
47
  @subject.title[0].should == "Title of work"
48
+ @subject.title.to_a.should == ["Title of work"]
49
+ val = []
50
+ @subject.title.each_with_index {|v, i| val << "#{i}. #{v}"}
51
+ val.should == ["0. Title of work"]
48
52
  end
49
53
 
50
54
  it "should return fields that are not TermProxies" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active-fedora
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.0.0.pre10
4
+ version: 6.0.0.rc1
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-02-04 00:00:00.000000000 Z
14
+ date: 2013-02-15 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rsolr