active-fedora 3.0.7 → 3.1.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. data/.rvmrc +1 -1
  2. data/Gemfile.lock +39 -10
  3. data/History.txt +0 -4
  4. data/active-fedora.gemspec +4 -3
  5. data/lib/active_fedora.rb +9 -9
  6. data/lib/active_fedora/base.rb +92 -163
  7. data/lib/active_fedora/datastream.rb +59 -60
  8. data/lib/active_fedora/datastream_hash.rb +18 -0
  9. data/lib/active_fedora/metadata_datastream.rb +3 -2
  10. data/lib/active_fedora/metadata_datastream_helper.rb +3 -15
  11. data/lib/active_fedora/model.rb +3 -3
  12. data/lib/active_fedora/nokogiri_datastream.rb +305 -302
  13. data/lib/active_fedora/qualified_dublin_core_datastream.rb +24 -19
  14. data/lib/active_fedora/rels_ext_datastream.rb +39 -37
  15. data/lib/active_fedora/rubydora_connection.rb +40 -0
  16. data/lib/active_fedora/semantic_node.rb +1 -1
  17. data/lib/active_fedora/solr_service.rb +1 -1
  18. data/lib/active_fedora/version.rb +1 -1
  19. data/lib/ruby-fedora.rb +0 -8
  20. data/lib/tasks/active_fedora.rake +14 -9
  21. data/lib/tasks/active_fedora_dev.rake +23 -40
  22. data/spec/integration/base_loader_spec.rb +4 -21
  23. data/spec/integration/base_spec.rb +300 -310
  24. data/spec/integration/bug_spec.rb +9 -10
  25. data/spec/integration/datastream_spec.rb +12 -12
  26. data/spec/integration/metadata_datastream_helper_spec.rb +7 -10
  27. data/spec/integration/model_spec.rb +3 -2
  28. data/spec/integration/rels_ext_datastream_spec.rb +9 -15
  29. data/spec/spec_helper.rb +2 -29
  30. data/spec/unit/active_fedora_spec.rb +5 -5
  31. data/spec/unit/base_cma_spec.rb +0 -7
  32. data/spec/unit/base_datastream_management_spec.rb +8 -67
  33. data/spec/unit/base_delegate_spec.rb +26 -9
  34. data/spec/unit/base_extra_spec.rb +5 -3
  35. data/spec/unit/base_file_management_spec.rb +10 -17
  36. data/spec/unit/base_named_datastream_spec.rb +76 -199
  37. data/spec/unit/base_spec.rb +152 -69
  38. data/spec/unit/content_model_spec.rb +1 -1
  39. data/spec/unit/datastream_concurrency_spec.rb +5 -4
  40. data/spec/unit/datastream_spec.rb +28 -48
  41. data/spec/unit/has_many_collection_spec.rb +2 -0
  42. data/spec/unit/inheritance_spec.rb +6 -6
  43. data/spec/unit/metadata_datastream_spec.rb +12 -28
  44. data/spec/unit/model_spec.rb +10 -10
  45. data/spec/unit/nokogiri_datastream_spec.rb +31 -33
  46. data/spec/unit/qualified_dublin_core_datastream_spec.rb +15 -15
  47. data/spec/unit/rels_ext_datastream_spec.rb +35 -29
  48. data/spec/unit/rubydora_connection_spec.rb +26 -0
  49. data/spec/unit/semantic_node_spec.rb +12 -17
  50. data/spec/unit/solr_config_options_spec.rb +13 -14
  51. data/spec/unit/solr_service_spec.rb +14 -17
  52. metadata +59 -55
  53. data/lib/fedora/base.rb +0 -38
  54. data/lib/fedora/connection.rb +0 -218
  55. data/lib/fedora/datastream.rb +0 -67
  56. data/lib/fedora/fedora_object.rb +0 -161
  57. data/lib/fedora/formats.rb +0 -30
  58. data/lib/fedora/generic_search.rb +0 -71
  59. data/lib/fedora/repository.rb +0 -298
  60. data/spec/integration/datastreams_crud_spec.rb +0 -208
  61. data/spec/integration/fedora_object_spec.rb +0 -77
  62. data/spec/integration/repository_spec.rb +0 -301
  63. data/spec/integration/rf_fedora_object_spec.rb +0 -95
  64. data/spec/unit/connection_spec.rb +0 -25
  65. data/spec/unit/fedora_object_spec.rb +0 -74
  66. data/spec/unit/repository_spec.rb +0 -143
  67. data/spec/unit/rf_datastream_spec.rb +0 -63
@@ -1,63 +1,54 @@
1
- require 'fedora/datastream'
2
1
  module ActiveFedora
3
2
 
4
3
  #This class represents a Fedora datastream
5
- class Datastream < Fedora::Datastream
4
+ class Datastream < Rubydora::Datastream
6
5
 
7
6
  attr_accessor :dirty, :last_modified, :fields
8
7
 
9
- def initialize(attrs = {})
8
+ def initialize(digital_object, dsid, exists_in_fedora=false )
10
9
  @fields={}
11
10
  @dirty = false
12
- super
11
+ super(digital_object, dsid)
13
12
  end
14
13
 
15
- #Return the xml content representing this Datastream from Fedora
16
- def content
17
- result = Fedora::Repository.instance.fetch_custom(self.attributes[:pid], "datastreams/#{self.dsid}/content")
18
- return result
19
- end
14
+ # #Return the xml content representing this Datastream from Fedora
15
+ # def content
16
+ # result = Fedora::Repository.instance.fetch_custom(self.attributes[:pid], "datastreams/#{self.dsid}/content")
17
+ # return result
18
+ # end
20
19
 
21
20
  #set this Datastream's content
22
21
  def content=(content)
23
- self.blob = content
22
+ super
24
23
  self.dirty = true
25
24
  end
26
25
 
27
- def self.delete(parent_pid, dsid)
28
- Fedora::Repository.instance.delete("%s/datastreams/%s"%[parent_pid, dsid])
29
- end
26
+ # def self.delete(parent_pid, dsid)
27
+ # Fedora::Repository.instance.delete("%s/datastreams/%s"%[parent_pid, dsid])
28
+ # end
30
29
 
31
- def delete
32
- self.class.delete(self.pid, self.dsid)
33
- end
30
+ # def delete
31
+ # self.class.delete(self.pid, self.dsid)
32
+ # end
34
33
 
35
- #get this datastreams identifier
36
- def pid
37
- self.attributes[:pid]
38
- end
34
+ # #get this datastreams identifier
35
+ # def pid
36
+ # self.attributes[:pid]
37
+ # end
39
38
 
40
- #set this datastreams parent identifier
41
- def pid=(pid)
42
- self.attributes[:pid] = pid
43
- end
39
+ # #set this datastreams parent identifier
40
+ # def pid=(pid)
41
+ # self.attributes[:pid] = pid
42
+ # end
44
43
 
45
- #set this datastreams identifier (note: sets both dsID and dsid)
46
- def dsid=(dsid)
47
- self.attributes[:dsID] = dsid
48
- self.attributes[:dsid] = dsid
49
- end
44
+ # #set this datastreams identifier (note: sets both dsID and dsid)
45
+ # def dsid=(dsid)
46
+ # self.attributes[:dsID] = dsid
47
+ # self.attributes[:dsid] = dsid
48
+ # end
50
49
 
51
50
  def size
52
- if !self.attributes.fetch(:dsSize,nil)
53
- if self.new_object?
54
- self.attributes[:dsSize]=nil
55
- else
56
- attrs = XmlSimple.xml_in(Fedora::Repository.instance.fetch_custom(self.pid,"datastreams/#{self.dsid}"))
57
- self.attributes[:dsSize]=attrs["dsSize"].first
58
- end
59
- end
60
- self.attributes[:dsSize]
51
+ self.profile['dsSize']
61
52
  end
62
53
 
63
54
  #compatibility method for rails' url generators. This method will
@@ -69,14 +60,19 @@ module ActiveFedora
69
60
 
70
61
  # Test whether this datastream been modified since it was last saved?
71
62
  def dirty?
72
- @dirty
63
+ @dirty || changed?
64
+ end
65
+
66
+ def new_object?
67
+ new?
73
68
  end
74
69
 
75
70
  # Save the datastream into fedora.
76
71
  # Also triggers {#before_save} and {#after_save} callbacks
77
72
  def save
78
73
  before_save
79
- result = Fedora::Repository.instance.save(self)
74
+ raise "No content #{dsid}" if @content.nil?
75
+ result = super
80
76
  after_save
81
77
  result
82
78
  end
@@ -86,6 +82,9 @@ module ActiveFedora
86
82
  #check_concurrency
87
83
  end
88
84
 
85
+ # serializes any changed data into the content field
86
+ def serialize!
87
+ end
89
88
  # Populate a Datastream object based on the "datastream" node from a FOXML file
90
89
  # @param [ActiveFedora::Datastream] tmpl the Datastream object that you are building
91
90
  # @param [Nokogiri::XML::Node] node the "foxml:datastream" node from a FOXML file
@@ -102,26 +101,26 @@ module ActiveFedora
102
101
 
103
102
  # returns a datetime in the standard W3C DateTime Format.
104
103
  # ie 2008-10-17T00:17:18.194Z
105
- def last_modified_in_repository
106
- # A hack to get around the fact that you can't call getDatastreamHistory
107
- # or API-M getDatasreams on Fedora 3.0 REST API
108
- # grabs the CREATED attribute off of the last foxml:datastreamVersion
109
- # within the appropriate datastream node in the objectXML
110
- if self.pid != nil
111
- object_xml = Fedora::FedoraObject.object_xml(self.pid).gsub("\n ","")
112
- datastream_xml = REXML::Document.new(object_xml).root.elements["foxml:datastream[@ID='#{self.dsid}']"]
113
-
114
- if datastream_xml.length > 3
115
- datastream_xml.elements.each do |el|
116
- logger.debug el.inspect
117
- end
118
- end
119
-
120
- datastream_xml.elements[datastream_xml.length - 2].attributes["CREATED"]
121
- else
122
- return nil
123
- end
124
- end
104
+ # def last_modified_in_repository
105
+ # # A hack to get around the fact that you can't call getDatastreamHistory
106
+ # # or API-M getDatasreams on Fedora 3.0 REST API
107
+ # # grabs the CREATED attribute off of the last foxml:datastreamVersion
108
+ # # within the appropriate datastream node in the objectXML
109
+ # if self.pid != nil
110
+ # object_xml = Fedora::FedoraObject.object_xml(self.pid).gsub("\n ","")
111
+ # datastream_xml = REXML::Document.new(object_xml).root.elements["foxml:datastream[@ID='#{self.dsid}']"]
112
+ #
113
+ # if datastream_xml.length > 3
114
+ # datastream_xml.elements.each do |el|
115
+ # logger.debug el.inspect
116
+ # end
117
+ # end
118
+ #
119
+ # datastream_xml.elements[datastream_xml.length - 2].attributes["CREATED"]
120
+ # else
121
+ # return nil
122
+ # end
123
+ # end
125
124
 
126
125
  def check_concurrency # :nodoc:
127
126
  return true
@@ -0,0 +1,18 @@
1
+ module ActiveFedora
2
+ class DatastreamHash < Hash
3
+
4
+ def initialize (obj)
5
+ @obj = obj
6
+ super()
7
+ end
8
+
9
+ def [] (key)
10
+ if key == 'DC' && !has_key?(key)
11
+ ds = Datastream.new(@obj.inner_object, key, true)
12
+ ds.content
13
+ self[key] = ds
14
+ end
15
+ super
16
+ end
17
+ end
18
+ end
@@ -133,8 +133,9 @@ module ActiveFedora
133
133
  # Populate a MetadataDatastream object based on the "datastream" node from a FOXML file
134
134
  # @param [ActiveFedora::Datastream] tmpl the Datastream object that you are building
135
135
  # @param [Nokogiri::XML::Node] node the "foxml:datastream" node from a FOXML file. Assumes that the content of this datastream is that of an ActiveFedora MetadataDatastream (<fields>...</fields>)
136
- def self.from_xml(tmpl, node) # :nodoc:
137
- node.xpath("./foxml:datastreamVersion[last()]/foxml:xmlContent/fields/node()").each do |f|
136
+ def self.from_xml(xml, tmpl) # :nodoc:
137
+ node = Nokogiri::XML::Document.parse(xml)
138
+ node.xpath("fields/node()").each do |f|
138
139
  tmpl.send("#{f.name}_append", f.text) unless f.class == Nokogiri::XML::Text
139
140
  end
140
141
  tmpl.send(:dirty=, false)
@@ -21,20 +21,8 @@ module ActiveFedora::MetadataDatastreamHelper
21
21
  klass.send(:include, Solrizer::FieldNameMapper)
22
22
  end
23
23
 
24
- #constructor, calls up to ActiveFedora::Datastream's constructor
25
- def initialize(attrs=nil)
26
- super
27
- @fields={}
28
- end
29
-
30
- # sets the blob, which in this case is the xml version of self, then calls ActiveFedora::Datastream.save
31
- def save
32
- self.set_blob_for_save
33
- super
34
- end
35
-
36
- def set_blob_for_save # :nodoc:
37
- self.blob = self.to_xml
24
+ def serialize! # :nodoc:
25
+ self.content = self.to_xml ##TODO only do this when the xml will have changed to avoid a load of the datastream content.
38
26
  end
39
27
 
40
28
  def to_solr(solr_doc = Hash.new) # :nodoc:
@@ -95,4 +83,4 @@ module ActiveFedora::MetadataDatastreamHelper
95
83
  return builder.to_xml
96
84
  end
97
85
 
98
- end
86
+ end
@@ -50,7 +50,7 @@ module ActiveFedora
50
50
  # @example this will return an instance of Book, even if the object hydra:dataset1 asserts that it is a Dataset
51
51
  # Book.load_instance("hydra:dataset1")
52
52
  def load_instance(pid)
53
- Fedora::Repository.instance.find_model(pid, self)
53
+ RubydoraConnection.instance.find_model(pid, self)
54
54
  end
55
55
 
56
56
  # Takes :all or a pid as arguments
@@ -75,7 +75,7 @@ module ActiveFedora
75
75
  hits = SolrService.instance.conn.query(q).hits
76
76
  end
77
77
  results = hits.map do |hit|
78
- obj = Fedora::Repository.instance.find_model(hit[SOLR_DOCUMENT_ID], self)
78
+ obj = RubydoraConnection.instance.find_model(hit[SOLR_DOCUMENT_ID], self)
79
79
  #obj.inner_object.new_object = false
80
80
  #return obj
81
81
  end
@@ -208,7 +208,7 @@ module ActiveFedora
208
208
 
209
209
  def class_fields
210
210
  #create dummy object that is empty by passing in fake pid
211
- object = self.new({:pid=>'FAKE'})
211
+ object = self.new()#{:pid=>'FAKE'})
212
212
  fields = object.fields
213
213
  #reset id to nothing
214
214
  fields[:id][:values] = []
@@ -3,345 +3,348 @@ require "om"
3
3
  require "solrizer/xml"
4
4
 
5
5
  #this class represents a MetadataDatastream, a special case of ActiveFedora::Datastream
6
- class ActiveFedora::NokogiriDatastream < ActiveFedora::Datastream
6
+ module ActiveFedora
7
+ class NokogiriDatastream < Datastream
8
+
9
+ include MetadataDatastreamHelper
10
+ include OM::XML::Document
11
+ include Solrizer::XML::TerminologyBasedSolrizer # this adds support for calling .to_solr
7
12
 
8
- include ActiveFedora::MetadataDatastreamHelper
9
- include OM::XML::Document
10
- include Solrizer::XML::TerminologyBasedSolrizer # this adds support for calling .to_solr
11
-
12
- # extend(OM::XML::Container::ClassMethods)
13
+ # extend(OM::XML::Container::ClassMethods)
13
14
 
14
- alias_method(:om_term_values, :term_values) unless method_defined?(:om_term_values)
15
- alias_method(:om_update_values, :update_values) unless method_defined?(:om_update_values)
16
-
17
- attr_accessor :internal_solr_doc
18
- attr_reader :ng_xml
19
-
20
- #constructor, calls up to ActiveFedora::Datastream's constructor
21
- def initialize(attrs=nil)
22
- super
23
- @fields={}
24
- self.class.from_xml(blob, self)
25
- end
15
+ alias_method(:om_term_values, :term_values) unless method_defined?(:om_term_values)
16
+ alias_method(:om_update_values, :update_values) unless method_defined?(:om_update_values)
17
+
18
+ attr_accessor :internal_solr_doc
19
+ attr_reader :ng_xml
26
20
 
27
- # Create an instance of this class based on xml content
28
- # @param [String, File, Nokogiri::XML::Node] xml the xml content to build from
29
- # @param [ActiveFedora::MetadataDatastream] tmpl the Datastream object that you are building @default a new instance of this class
30
- # Careful! If you call this from a constructor, be sure to provide something 'ie. self' as the @tmpl. Otherwise, you will get an infinite loop!
31
- def self.from_xml(xml, tmpl=self.new) # :nodoc:
32
- if xml.nil?
33
- tmpl.ng_xml = self.xml_template
34
- elsif xml.kind_of? Nokogiri::XML::Node || xml.kind_of?(Nokogiri::XML::Document)
35
- tmpl.ng_xml = xml
36
- else
37
- tmpl.ng_xml = Nokogiri::XML::Document.parse(xml)
38
- end
39
- tmpl.send(:dirty=, false)
40
- return tmpl
41
- end
42
-
43
- def self.xml_template
44
- Nokogiri::XML::Document.parse("<xml/>")
45
- end
46
-
47
- def ng_xml=(new_xml)
48
- case new_xml
49
- when Nokogiri::XML::Document, Nokogiri::XML::Element, Nokogiri::XML::Node
50
- @ng_xml = new_xml
51
- when String
52
- @ng_xml = Nokogiri::XML::Document.parse(new_xml)
53
- else
54
- raise TypeError, "You passed a #{new_xml.class} into the ng_xml of the #{self.dsid} datastream. NokogiriDatastream.ng_xml= only accepts Nokogiri::XML::Document, Nokogiri::XML::Element, Nokogiri::XML::Node, or raw XML (String) as inputs."
21
+ def initialize(digital_object, dsid, exists_in_fedora=false)
22
+ super
23
+ self.class.from_xml(nil, self)
55
24
  end
56
- self.dirty = true
57
- end
58
-
59
- def content=(content)
60
- super
61
- self.ng_xml = Nokogiri::XML::Document.parse(content)
62
- end
63
-
64
-
65
- def to_xml(xml = self.ng_xml)
66
- ng_xml = self.ng_xml
67
- if ng_xml.respond_to?(:root) && ng_xml.root.nil? && self.class.respond_to?(:root_property_ref) && !self.class.root_property_ref.nil?
68
- ng_xml = self.class.generate(self.class.root_property_ref, "")
69
- if xml.root.nil?
70
- xml = ng_xml
25
+
26
+
27
+ # Create an instance of this class based on xml content
28
+ # @param [String, File, Nokogiri::XML::Node] xml the xml content to build from
29
+ # @param [ActiveFedora::MetadataDatastream] tmpl the Datastream object that you are building @default a new instance of this class
30
+ # Careful! If you call this from a constructor, be sure to provide something 'ie. self' as the @tmpl. Otherwise, you will get an infinite loop!
31
+ def self.from_xml(xml, tmpl=nil)
32
+ tmpl = self.new(nil, nil) if tmpl.nil? ## This path is used only for unit testing (e.g. MarpaDCDatastream.from_xml(fixture("data.xml")) )
33
+
34
+ if xml.nil?
35
+ tmpl.ng_xml = self.xml_template
36
+ elsif xml.kind_of? Nokogiri::XML::Node || xml.kind_of?(Nokogiri::XML::Document)
37
+ tmpl.ng_xml = xml
38
+ else
39
+ tmpl.ng_xml = Nokogiri::XML::Document.parse(xml)
40
+ end
41
+ tmpl.send(:dirty=, false)
42
+ return tmpl
43
+ end
44
+
45
+ def self.xml_template
46
+ Nokogiri::XML::Document.parse("<xml/>")
47
+ end
48
+
49
+ def ng_xml=(new_xml)
50
+ case new_xml
51
+ when Nokogiri::XML::Document, Nokogiri::XML::Element, Nokogiri::XML::Node
52
+ @ng_xml = new_xml
53
+ when String
54
+ @ng_xml = Nokogiri::XML::Document.parse(new_xml)
55
+ else
56
+ raise TypeError, "You passed a #{new_xml.class} into the ng_xml of the #{self.dsid} datastream. NokogiriDatastream.ng_xml= only accepts Nokogiri::XML::Document, Nokogiri::XML::Element, Nokogiri::XML::Node, or raw XML (String) as inputs."
71
57
  end
72
58
  end
59
+
60
+ def content=(content)
61
+ super
62
+ self.ng_xml = Nokogiri::XML::Document.parse(content)
63
+ end
64
+
65
+
66
+ def to_xml(xml = self.ng_xml)
67
+ ng_xml = self.ng_xml
68
+ if ng_xml.respond_to?(:root) && ng_xml.root.nil? && self.class.respond_to?(:root_property_ref) && !self.class.root_property_ref.nil?
69
+ ng_xml = self.class.generate(self.class.root_property_ref, "")
70
+ if xml.root.nil?
71
+ xml = ng_xml
72
+ end
73
+ end
73
74
 
74
- unless xml == ng_xml || ng_xml.root.nil?
75
- if xml.kind_of?(Nokogiri::XML::Document)
76
- xml.root.add_child(ng_xml.root)
77
- elsif xml.kind_of?(Nokogiri::XML::Node)
78
- xml.add_child(ng_xml.root)
79
- else
80
- raise "You can only pass instances of Nokogiri::XML::Node into this method. You passed in #{xml}"
75
+ unless xml == ng_xml || ng_xml.root.nil?
76
+ if xml.kind_of?(Nokogiri::XML::Document)
77
+ xml.root.add_child(ng_xml.root)
78
+ elsif xml.kind_of?(Nokogiri::XML::Node)
79
+ xml.add_child(ng_xml.root)
80
+ else
81
+ raise "You can only pass instances of Nokogiri::XML::Node into this method. You passed in #{xml}"
82
+ end
81
83
  end
84
+
85
+ return xml.to_xml {|config| config.no_declaration}
82
86
  end
83
87
 
84
- return xml.to_xml {|config| config.no_declaration}
85
- end
86
-
87
- # ** Experimental **
88
- #
89
- # This method is called by ActiveFedora::Base.load_instance_from_solr
90
- # in order to initialize a nokogiri datastreams values from a solr document.
91
- # This method merely sets the internal_solr_doc to the document passed in.
92
- # Then any calls to get_values get values from the solr document on demand
93
- # instead of directly from the xml stored in Fedora. This should be used
94
- # for read-only purposes only, and instances where you want to improve performance by
95
- # getting data from solr instead of Fedora.
96
- #
97
- # See ActiveFedora::Base.load_instance_from_solr and +get_values_from_solr+ for more information.
98
- def from_solr(solr_doc)
99
- #just initialize internal_solr_doc since any value retrieval will be done via lazy loading on this doc on-demand
100
- @internal_solr_doc = solr_doc
101
- end
88
+ # ** Experimental **
89
+ #
90
+ # This method is called by ActiveFedora::Base.load_instance_from_solr
91
+ # in order to initialize a nokogiri datastreams values from a solr document.
92
+ # This method merely sets the internal_solr_doc to the document passed in.
93
+ # Then any calls to get_values get values from the solr document on demand
94
+ # instead of directly from the xml stored in Fedora. This should be used
95
+ # for read-only purposes only, and instances where you want to improve performance by
96
+ # getting data from solr instead of Fedora.
97
+ #
98
+ # See ActiveFedora::Base.load_instance_from_solr and +get_values_from_solr+ for more information.
99
+ def from_solr(solr_doc)
100
+ #just initialize internal_solr_doc since any value retrieval will be done via lazy loading on this doc on-demand
101
+ @internal_solr_doc = solr_doc
102
+ end
102
103
 
103
104
 
104
- # ** Experimental **
105
- # This method is called by +get_values+ if this datastream has been initialized by calling from_solr method via
106
- # ActiveFedora::Base.load_instance_from_solr. This method retrieves values from a preinitialized @internal_solr_doc instead of xml.
107
- # This makes the datastream read-only and this method is not intended to be used in any other case.
108
- #
109
- # Values are retrieved from the @internal_solr_doc on-demand instead of via xml preloaded into memory.
110
- # A term_pointer is passed in and if it contains hierarchical indexes it will detect which solr field values need to be returned.
111
- #
112
- # ====Example 1 (non-hierarchical term_pointer):
113
- #
114
- # term_pointer = [:image, :title_set, :title]
115
- #
116
- # Returns value of "image_title_set_title_t" in @internal_solr_doc
117
- #
118
- # ====Example 2 (hierarchical term_pointer that contains one or more indexes):
119
- # term_pointer = [:image, {:title_set=>1}, :title]
120
- #
121
- # relevant xml:
122
- # <image>
123
- # <title_set>
124
- # <title>Title 1</title>
125
- # </title_set>
126
- # </image>
127
- # <image>
128
- # <title_set>
129
- # <title>Title 2</title>
130
- # </title_set>
131
- # <title_set>
132
- # <title>Title 3</title>
133
- # </title_set>
134
- # </image>
135
- #
136
- # Repeating element nodes are indexed and will be stored in solr as follows:
137
- # image_0_title_set_0_title_t = "Title 1"
138
- # image_1_title_set_0_title_t = "Title 2"
139
- # image_1_title_set_1_title_t = "Title 3"
140
- #
141
- # Even though no image element index is specified, only the second image element has two title_set elements so the expected return value is
142
- # ["Title 3"]
143
- #
144
- # While loading from solr the xml hierarchy is not immediately apparent so we must detect first how many image elements with a title_set element exist
145
- # and then check which of those elements have a second title element.
146
- #
147
- # As this nokogiri datastream is indexed in solr, a value at each level in the tree will be stored independently and therefore
148
- # if 'image_0_title_set_0_title_t' exists in solr 'image_0_title_set_t' will also exist in solr.
149
- # So, we will build up the relevant solr names incrementally for a given term_pointer. The last element in the
150
- # solr_name will not contain an index.
151
- #
152
- # It then will do the following:
153
- # Because no index is supplied for :image it will detect which indexes exist in solr
154
- # image_0_title_set_t (found key and add 'image_0_title_set' to base solr_name list)
155
- # image_1_title_set_t (found key and add 'image_0_title_set' to base solr_name list)
156
- # image_2_title_set_t (not found and stop checking indexes for image)
157
- # After iteration 1:
158
- # bases = ["image_0_title_set","image_1_title_set"]
159
- #
160
- # Two image nodes were found and next sees index of 1 supplied for title_set so just uses index of 1 building off bases found in previous iteration
161
- # image_0_title_set_1_title_t (not found remove 'image_0_title_set' from base solr_name list)
162
- # image_1_title_set_1_title_t (found and replace 'image_1_title_set' with new base 'image_1_title_set_1_title')
163
- #
164
- # After iteration 2:
165
- # bases = ["image_1_title_set_1_title"]
166
- # It always looks ahead one element so we check if any elements are after title. There are not any other elements so we are done iterating.
167
- # returns @internal_solr_doc["image_1_title_set_1_title_t"]
168
- # @param [Array] term_pointer Term pointer similar to an xpath ie. [:image, :title_set, :title]
169
- # @return [Array] If no values are found an empty Array is returned.
170
- def get_values_from_solr(*term_pointer)
171
- values = []
172
- solr_doc = @internal_solr_doc
173
- return values if solr_doc.nil?
174
- begin
175
- term = self.class.terminology.retrieve_term(*OM.pointers_to_flat_array(term_pointer, false))
176
- #check if hierarchical term pointer
177
- if is_hierarchical_term_pointer?(*term_pointer)
178
- # if we are hierarchical need to detect all possible node values that exist
179
- # we do this by building up the possible solr names parent by parent and/or child by child
180
- # if an index is supplied for any node in the pointer it will be used
181
- # otherwise it will include all nodes and indexes that exist in solr
182
- bases = []
183
- #add first item in term_pointer as start of bases
184
- # then iterate through possible nodes that might exist
185
- term_pointer.first.kind_of?(Hash) ? bases << term_pointer.first.keys.first : bases << term_pointer.first
186
- for i in 1..(term_pointer.length-1)
187
- #iterate in reverse so that we can modify the bases array while iterating
188
- (bases.length-1).downto(0) do |j|
189
- current_last = (term_pointer[i].kind_of?(Hash) ? term_pointer[i].keys.first : term_pointer[i])
190
- if (term_pointer[i-1].kind_of?(Hash))
191
- #just use index supplied instead of trying possibilities
192
- index = term_pointer[i-1].values.first
193
- solr_name_base = OM::XML::Terminology.term_hierarchical_name({bases[j]=>index},current_last)
194
- solr_name = generate_solr_symbol(solr_name_base, term.data_type)
195
- bases.delete_at(j)
196
- #insert the new solr name base if found
197
- bases.insert(j,solr_name_base) if has_solr_name?(solr_name,solr_doc)
198
- else
199
- #detect how many nodes exist
200
- index = 0
201
- current_base = bases[j]
202
- bases.delete_at(j)
203
- solr_name_base = OM::XML::Terminology.term_hierarchical_name({current_base=>index},current_last)
204
- solr_name = generate_solr_symbol(solr_name_base, term.data_type)
205
- #check for indexes that exist until we find all nodes
206
- while has_solr_name?(solr_name,solr_doc) do
207
- #only reinsert if it exists
208
- bases.insert(j,solr_name_base)
209
- index = index + 1
105
+ # ** Experimental **
106
+ # This method is called by +get_values+ if this datastream has been initialized by calling from_solr method via
107
+ # ActiveFedora::Base.load_instance_from_solr. This method retrieves values from a preinitialized @internal_solr_doc instead of xml.
108
+ # This makes the datastream read-only and this method is not intended to be used in any other case.
109
+ #
110
+ # Values are retrieved from the @internal_solr_doc on-demand instead of via xml preloaded into memory.
111
+ # A term_pointer is passed in and if it contains hierarchical indexes it will detect which solr field values need to be returned.
112
+ #
113
+ # ====Example 1 (non-hierarchical term_pointer):
114
+ #
115
+ # term_pointer = [:image, :title_set, :title]
116
+ #
117
+ # Returns value of "image_title_set_title_t" in @internal_solr_doc
118
+ #
119
+ # ====Example 2 (hierarchical term_pointer that contains one or more indexes):
120
+ # term_pointer = [:image, {:title_set=>1}, :title]
121
+ #
122
+ # relevant xml:
123
+ # <image>
124
+ # <title_set>
125
+ # <title>Title 1</title>
126
+ # </title_set>
127
+ # </image>
128
+ # <image>
129
+ # <title_set>
130
+ # <title>Title 2</title>
131
+ # </title_set>
132
+ # <title_set>
133
+ # <title>Title 3</title>
134
+ # </title_set>
135
+ # </image>
136
+ #
137
+ # Repeating element nodes are indexed and will be stored in solr as follows:
138
+ # image_0_title_set_0_title_t = "Title 1"
139
+ # image_1_title_set_0_title_t = "Title 2"
140
+ # image_1_title_set_1_title_t = "Title 3"
141
+ #
142
+ # Even though no image element index is specified, only the second image element has two title_set elements so the expected return value is
143
+ # ["Title 3"]
144
+ #
145
+ # While loading from solr the xml hierarchy is not immediately apparent so we must detect first how many image elements with a title_set element exist
146
+ # and then check which of those elements have a second title element.
147
+ #
148
+ # As this nokogiri datastream is indexed in solr, a value at each level in the tree will be stored independently and therefore
149
+ # if 'image_0_title_set_0_title_t' exists in solr 'image_0_title_set_t' will also exist in solr.
150
+ # So, we will build up the relevant solr names incrementally for a given term_pointer. The last element in the
151
+ # solr_name will not contain an index.
152
+ #
153
+ # It then will do the following:
154
+ # Because no index is supplied for :image it will detect which indexes exist in solr
155
+ # image_0_title_set_t (found key and add 'image_0_title_set' to base solr_name list)
156
+ # image_1_title_set_t (found key and add 'image_0_title_set' to base solr_name list)
157
+ # image_2_title_set_t (not found and stop checking indexes for image)
158
+ # After iteration 1:
159
+ # bases = ["image_0_title_set","image_1_title_set"]
160
+ #
161
+ # Two image nodes were found and next sees index of 1 supplied for title_set so just uses index of 1 building off bases found in previous iteration
162
+ # image_0_title_set_1_title_t (not found remove 'image_0_title_set' from base solr_name list)
163
+ # image_1_title_set_1_title_t (found and replace 'image_1_title_set' with new base 'image_1_title_set_1_title')
164
+ #
165
+ # After iteration 2:
166
+ # bases = ["image_1_title_set_1_title"]
167
+ # It always looks ahead one element so we check if any elements are after title. There are not any other elements so we are done iterating.
168
+ # returns @internal_solr_doc["image_1_title_set_1_title_t"]
169
+ # @param [Array] term_pointer Term pointer similar to an xpath ie. [:image, :title_set, :title]
170
+ # @return [Array] If no values are found an empty Array is returned.
171
+ def get_values_from_solr(*term_pointer)
172
+ values = []
173
+ solr_doc = @internal_solr_doc
174
+ return values if solr_doc.nil?
175
+ begin
176
+ term = self.class.terminology.retrieve_term(*OM.pointers_to_flat_array(term_pointer, false))
177
+ #check if hierarchical term pointer
178
+ if is_hierarchical_term_pointer?(*term_pointer)
179
+ # if we are hierarchical need to detect all possible node values that exist
180
+ # we do this by building up the possible solr names parent by parent and/or child by child
181
+ # if an index is supplied for any node in the pointer it will be used
182
+ # otherwise it will include all nodes and indexes that exist in solr
183
+ bases = []
184
+ #add first item in term_pointer as start of bases
185
+ # then iterate through possible nodes that might exist
186
+ term_pointer.first.kind_of?(Hash) ? bases << term_pointer.first.keys.first : bases << term_pointer.first
187
+ for i in 1..(term_pointer.length-1)
188
+ #iterate in reverse so that we can modify the bases array while iterating
189
+ (bases.length-1).downto(0) do |j|
190
+ current_last = (term_pointer[i].kind_of?(Hash) ? term_pointer[i].keys.first : term_pointer[i])
191
+ if (term_pointer[i-1].kind_of?(Hash))
192
+ #just use index supplied instead of trying possibilities
193
+ index = term_pointer[i-1].values.first
194
+ solr_name_base = OM::XML::Terminology.term_hierarchical_name({bases[j]=>index},current_last)
195
+ solr_name = generate_solr_symbol(solr_name_base, term.data_type)
196
+ bases.delete_at(j)
197
+ #insert the new solr name base if found
198
+ bases.insert(j,solr_name_base) if has_solr_name?(solr_name,solr_doc)
199
+ else
200
+ #detect how many nodes exist
201
+ index = 0
202
+ current_base = bases[j]
203
+ bases.delete_at(j)
210
204
  solr_name_base = OM::XML::Terminology.term_hierarchical_name({current_base=>index},current_last)
211
205
  solr_name = generate_solr_symbol(solr_name_base, term.data_type)
206
+ #check for indexes that exist until we find all nodes
207
+ while has_solr_name?(solr_name,solr_doc) do
208
+ #only reinsert if it exists
209
+ bases.insert(j,solr_name_base)
210
+ index = index + 1
211
+ solr_name_base = OM::XML::Terminology.term_hierarchical_name({current_base=>index},current_last)
212
+ solr_name = generate_solr_symbol(solr_name_base, term.data_type)
213
+ end
212
214
  end
213
215
  end
214
216
  end
215
- end
216
217
 
217
- #all existing applicable solr_names have been found and we can now grab all values and build up our value array
218
- bases.each do |base|
219
- field_name = generate_solr_symbol(base.to_sym, term.data_type)
220
- value = (solr_doc[field_name].nil? ? solr_doc[field_name.to_s]: solr_doc[field_name])
218
+ #all existing applicable solr_names have been found and we can now grab all values and build up our value array
219
+ bases.each do |base|
220
+ field_name = generate_solr_symbol(base.to_sym, term.data_type)
221
+ value = (solr_doc[field_name].nil? ? solr_doc[field_name.to_s]: solr_doc[field_name])
222
+ unless value.nil?
223
+ value.is_a?(Array) ? values.concat(value) : values << value
224
+ end
225
+ end
226
+ else
227
+ #this is not hierarchical and we can simply look for the solr name created using the terms without any indexes
228
+ generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
229
+ generic_field_name = generate_solr_symbol(generic_field_name_base, term.data_type)
230
+ value = (solr_doc[generic_field_name].nil? ? solr_doc[generic_field_name.to_s]: solr_doc[generic_field_name])
221
231
  unless value.nil?
222
232
  value.is_a?(Array) ? values.concat(value) : values << value
223
233
  end
224
234
  end
225
- else
226
- #this is not hierarchical and we can simply look for the solr name created using the terms without any indexes
227
- generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
228
- generic_field_name = generate_solr_symbol(generic_field_name_base, term.data_type)
229
- value = (solr_doc[generic_field_name].nil? ? solr_doc[generic_field_name.to_s]: solr_doc[generic_field_name])
230
- unless value.nil?
231
- value.is_a?(Array) ? values.concat(value) : values << value
232
- end
233
- end
234
- rescue Exception => e
235
- #just do nothing since term does not exist and return emtpy values
236
- raise e
235
+ rescue Exception => e
236
+ #just do nothing since term does not exist and return emtpy values
237
+ raise e
238
+ end
239
+ values
237
240
  end
238
- values
239
- end
240
241
 
241
- def generate_solr_symbol(base, data_type)
242
- Solrizer::XML::TerminologyBasedSolrizer.default_field_mapper.solr_name(base.to_sym, data_type)
243
- end
242
+ def generate_solr_symbol(base, data_type)
243
+ Solrizer::XML::TerminologyBasedSolrizer.default_field_mapper.solr_name(base.to_sym, data_type)
244
+ end
244
245
 
245
- # ** Experimental **
246
- #@return [Boolean] true if either the key for name exists in solr or if its string value exists
247
- #@param [String] name Name of key to look for
248
- #@param [Solr::Document] solr_doc Solr doc to query
249
- def has_solr_name?(name, solr_doc=Hash.new)
250
- !solr_doc[name].nil? || !solr_doc[name.to_s].nil?
251
- end
246
+ # ** Experimental **
247
+ #@return [Boolean] true if either the key for name exists in solr or if its string value exists
248
+ #@param [String] name Name of key to look for
249
+ #@param [Solr::Document] solr_doc Solr doc to query
250
+ def has_solr_name?(name, solr_doc=Hash.new)
251
+ !solr_doc[name].nil? || !solr_doc[name.to_s].nil?
252
+ end
252
253
 
253
- # ** Experimental **
254
- #@return true if the term_pointer contains an index
255
- # ====Example:
256
- # [:image, {:title_set=>1}, :title] return true
257
- # [:image, :title_set, :title] return false
258
- def is_hierarchical_term_pointer?(*term_pointer)
259
- if term_pointer.length>1
260
- term_pointer.each do |pointer|
261
- if pointer.kind_of?(Hash)
262
- return true
254
+ # ** Experimental **
255
+ #@return true if the term_pointer contains an index
256
+ # ====Example:
257
+ # [:image, {:title_set=>1}, :title] return true
258
+ # [:image, :title_set, :title] return false
259
+ def is_hierarchical_term_pointer?(*term_pointer)
260
+ if term_pointer.length>1
261
+ term_pointer.each do |pointer|
262
+ if pointer.kind_of?(Hash)
263
+ return true
264
+ end
263
265
  end
264
266
  end
267
+ return false
265
268
  end
266
- return false
267
- end
268
269
 
269
- # Update field values within the current datastream using {#update_values}, which is a wrapper for {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values}
270
- # Ignores any fields from params that this datastream's Terminology doesn't recognize
271
- #
272
- # @param [Hash] params The params specifying which fields to update and their new values. The syntax of the params Hash is the same as that expected by
273
- # term_pointers must be a valid OM Term pointers (ie. [:name]). Strings will be ignored.
274
- # @param [Hash] opts This is not currently used by the datastream-level update_indexed_attributes method
275
- #
276
- # Example:
277
- # @mods_ds.update_indexed_attributes( {[{":person"=>"0"}, "role"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"} })
278
- # => {"person_0_role"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}}
279
- #
280
- # @mods_ds.to_xml # (the following is an approximation)
281
- # <mods>
282
- # <mods:name type="person">
283
- # <mods:role>
284
- # <mods:roleTerm>role1</mods:roleTerm>
285
- # </mods:role>
286
- # <mods:role>
287
- # <mods:roleTerm>role2</mods:roleTerm>
288
- # </mods:role>
289
- # <mods:role>
290
- # <mods:roleTerm>role3</mods:roleTerm>
291
- # </mods:role>
292
- # </mods:name>
293
- # </mods>
294
- def update_indexed_attributes(params={}, opts={})
295
- if self.class.terminology.nil?
296
- raise "No terminology is set for this NokogiriDatastream class. Cannot perform update_indexed_attributes"
297
- end
298
- # remove any fields from params that this datastream doesn't recognize
299
- # make sure to make a copy of params so not to modify hash that might be passed to other methods
300
- current_params = params.clone
301
- current_params.delete_if do |term_pointer,new_values|
302
- if term_pointer.kind_of?(String)
303
- logger.warn "WARNING: #{dsid} ignoring {#{term_pointer.inspect} => #{new_values.inspect}} because #{term_pointer.inspect} is a String (only valid OM Term Pointers will be used). Make sure your html has the correct field_selector tags in it."
304
- true
305
- else
306
- !self.class.terminology.has_term?(*OM.destringify(term_pointer))
270
+ # Update field values within the current datastream using {#update_values}, which is a wrapper for {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values}
271
+ # Ignores any fields from params that this datastream's Terminology doesn't recognize
272
+ #
273
+ # @param [Hash] params The params specifying which fields to update and their new values. The syntax of the params Hash is the same as that expected by
274
+ # term_pointers must be a valid OM Term pointers (ie. [:name]). Strings will be ignored.
275
+ # @param [Hash] opts This is not currently used by the datastream-level update_indexed_attributes method
276
+ #
277
+ # Example:
278
+ # @mods_ds.update_indexed_attributes( {[{":person"=>"0"}, "role"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"} })
279
+ # => {"person_0_role"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}}
280
+ #
281
+ # @mods_ds.to_xml # (the following is an approximation)
282
+ # <mods>
283
+ # <mods:name type="person">
284
+ # <mods:role>
285
+ # <mods:roleTerm>role1</mods:roleTerm>
286
+ # </mods:role>
287
+ # <mods:role>
288
+ # <mods:roleTerm>role2</mods:roleTerm>
289
+ # </mods:role>
290
+ # <mods:role>
291
+ # <mods:roleTerm>role3</mods:roleTerm>
292
+ # </mods:role>
293
+ # </mods:name>
294
+ # </mods>
295
+ def update_indexed_attributes(params={}, opts={})
296
+ if self.class.terminology.nil?
297
+ raise "No terminology is set for this NokogiriDatastream class. Cannot perform update_indexed_attributes"
298
+ end
299
+ # remove any fields from params that this datastream doesn't recognize
300
+ # make sure to make a copy of params so not to modify hash that might be passed to other methods
301
+ current_params = params.clone
302
+ current_params.delete_if do |term_pointer,new_values|
303
+ if term_pointer.kind_of?(String)
304
+ logger.warn "WARNING: #{dsid} ignoring {#{term_pointer.inspect} => #{new_values.inspect}} because #{term_pointer.inspect} is a String (only valid OM Term Pointers will be used). Make sure your html has the correct field_selector tags in it."
305
+ true
306
+ else
307
+ !self.class.terminology.has_term?(*OM.destringify(term_pointer))
308
+ end
307
309
  end
308
- end
309
310
 
310
- result = {}
311
- unless current_params.empty?
312
- result = update_values( current_params )
311
+ result = {}
312
+ unless current_params.empty?
313
+ result = update_values( current_params )
314
+ end
315
+
316
+ return result
313
317
  end
314
318
 
315
- return result
316
- end
317
-
318
- def get_values(field_key,default=[])
319
- term_values(*field_key)
320
- end
319
+ def get_values(field_key,default=[])
320
+ term_values(*field_key)
321
+ end
321
322
 
322
- # Update values in the datastream's xml
323
- # This wraps {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values} so that returns an error if we have loaded from solr since datastreams loaded that way should be read-only
324
- #
325
- # @example Updating multiple values with a Hash of Term pointers and values
326
- # ds.update_values( {[{":person"=>"0"}, "role", "text"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, [{:person=>1}, :family_name]=>"Andronicus", [{"person"=>"1"},:given_name]=>["Titus"],[{:person=>1},:role,:text]=>["otherrole1","otherrole2"] } )
327
- # => {"person_0_role_text"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, "person_1_role_text"=>{"0"=>"otherrole1", "1"=>"otherrole2"}}
328
- def update_values(params={})
329
- if @internal_solr_doc
330
- raise "No update performed, this object was initialized via Solr instead of Fedora and is therefore read-only. Please utilize ActiveFedora::Base.load_instance to first load object via Fedora instead."
331
- else
332
- result = om_update_values(params)
333
- self.dirty= true
334
- return result
323
+ # Update values in the datastream's xml
324
+ # This wraps {http://rdoc.info/gems/om/1.2.4/OM/XML/TermValueOperators#update_values-instance_method OM::TermValueOperators#update_values} so that returns an error if we have loaded from solr since datastreams loaded that way should be read-only
325
+ #
326
+ # @example Updating multiple values with a Hash of Term pointers and values
327
+ # ds.update_values( {[{":person"=>"0"}, "role", "text"]=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, [{:person=>1}, :family_name]=>"Andronicus", [{"person"=>"1"},:given_name]=>["Titus"],[{:person=>1},:role,:text]=>["otherrole1","otherrole2"] } )
328
+ # => {"person_0_role_text"=>{"0"=>"role1", "1"=>"role2", "2"=>"role3"}, "person_1_role_text"=>{"0"=>"otherrole1", "1"=>"otherrole2"}}
329
+ def update_values(params={})
330
+ if @internal_solr_doc
331
+ raise "No update performed, this object was initialized via Solr instead of Fedora and is therefore read-only. Please utilize ActiveFedora::Base.load_instance to first load object via Fedora instead."
332
+ else
333
+ result = om_update_values(params)
334
+ self.dirty= true
335
+ return result
336
+ end
335
337
  end
336
- end
337
338
 
338
- #override OM::XML::term_values so can lazy load from solr if this datastream initialized using +from_solr+
339
- def term_values(*term_pointer)
340
- if @internal_solr_doc
341
- #lazy load values from solr on demand
342
- get_values_from_solr(*term_pointer)
343
- else
344
- om_term_values(*term_pointer)
339
+ #override OM::XML::term_values so can lazy load from solr if this datastream initialized using +from_solr+
340
+ def term_values(*term_pointer)
341
+ if @internal_solr_doc
342
+ #lazy load values from solr on demand
343
+ get_values_from_solr(*term_pointer)
344
+ else
345
+ om_term_values(*term_pointer)
346
+ end
345
347
  end
346
348
  end
347
349
  end
350
+