active-fedora 11.5.6 → 12.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +4 -0
  3. data/.travis.yml +15 -0
  4. data/Gemfile +1 -3
  5. data/README.md +10 -13
  6. data/active-fedora.gemspec +7 -9
  7. data/lib/active_fedora.rb +3 -5
  8. data/lib/active_fedora/associations/collection_proxy.rb +0 -2
  9. data/lib/active_fedora/attributes/property_builder.rb +3 -1
  10. data/lib/active_fedora/caching_connection.rb +1 -1
  11. data/lib/active_fedora/errors.rb +4 -0
  12. data/lib/active_fedora/fedora.rb +5 -0
  13. data/lib/active_fedora/file.rb +3 -1
  14. data/lib/active_fedora/file/attributes.rb +5 -0
  15. data/lib/active_fedora/file_io.rb +120 -0
  16. data/lib/active_fedora/indexing.rb +6 -1
  17. data/lib/active_fedora/indexing/default_descriptors.rb +128 -0
  18. data/lib/active_fedora/indexing/descendant_fetcher.rb +22 -18
  19. data/lib/active_fedora/indexing/descriptor.rb +44 -0
  20. data/lib/active_fedora/indexing/field_mapper.rb +146 -0
  21. data/lib/active_fedora/indexing/inserter.rb +40 -0
  22. data/lib/active_fedora/indexing/suffix.rb +81 -0
  23. data/lib/active_fedora/indexing_service.rb +2 -2
  24. data/lib/active_fedora/ldp_resource.rb +1 -2
  25. data/lib/active_fedora/railtie.rb +0 -1
  26. data/lib/active_fedora/rdf/field_map_entry.rb +2 -2
  27. data/lib/active_fedora/rdf/indexing_service.rb +6 -6
  28. data/lib/active_fedora/relation.rb +0 -14
  29. data/lib/active_fedora/relation/delegation.rb +1 -2
  30. data/lib/active_fedora/relation/finder_methods.rb +19 -39
  31. data/lib/active_fedora/version.rb +1 -1
  32. data/lib/generators/active_fedora/config/fedora/templates/.fcrepo_wrapper +1 -1
  33. data/lib/generators/active_fedora/config/solr/templates/solr.yml +3 -3
  34. data/lib/generators/active_fedora/config/solr/templates/solr/config/schema.xml +34 -33
  35. data/spec/integration/base_spec.rb +39 -35
  36. data/spec/integration/indexing/descendant_fetcher_spec.rb +64 -0
  37. data/spec/integration/relation_spec.rb +1 -39
  38. data/spec/integration/scoping_spec.rb +17 -11
  39. data/spec/spec_helper.rb +1 -1
  40. data/spec/unit/active_fedora/indexing/inserter_spec.rb +30 -0
  41. data/spec/unit/attributes_spec.rb +3 -7
  42. data/spec/unit/fedora_spec.rb +12 -0
  43. data/spec/unit/file_configurator_spec.rb +0 -9
  44. data/spec/unit/file_io_spec.rb +137 -0
  45. data/spec/unit/file_spec.rb +14 -17
  46. metadata +26 -30
  47. data/.circleci/config.yml +0 -43
@@ -7,7 +7,7 @@ module ActiveFedora
7
7
  #
8
8
  # The DescendantFetcher is also capable of partitioning the URIs into "priority" URIs
9
9
  # that will be first in the returned list. These prioritized URIs belong to objects
10
- # with certain hasModel models. This feature is used in some hydra apps that need to
10
+ # with certain hasModel models. This feature is used in some samvera apps that need to
11
11
  # index 'permissions' objects before other objects to have the solr indexing work right.
12
12
  # And so by default, the prioritized class names are the ones form Hydra::AccessControls,
13
13
  # but you can alter the prioritized model name list, or set it to the empty array.
@@ -34,6 +34,7 @@ module ActiveFedora
34
34
  @exclude_self = exclude_self
35
35
  end
36
36
 
37
+ # @return [Array<String>] uris starting with priority models
37
38
  def descendant_and_self_uris
38
39
  partitioned = descendant_and_self_uris_partitioned
39
40
  partitioned[:priority] + partitioned[:other]
@@ -41,11 +42,20 @@ module ActiveFedora
41
42
 
42
43
  # returns a hash where key :priority is an array of all prioritized
43
44
  # type objects, key :other is an array of the rest.
45
+ # @return [Hash<String, Array<String>>] uris sorted into :priority and :other
44
46
  def descendant_and_self_uris_partitioned
45
- resource = Ldp::Resource::RdfSource.new(ActiveFedora.fedora.connection, uri)
47
+ model_partitioned = descendant_and_self_uris_partitioned_by_model
48
+ { priority: model_partitioned.slice(*priority_models).values.flatten,
49
+ other: model_partitioned.slice(*(model_partitioned.keys - priority_models)).values.flatten }
50
+ end
51
+
52
+ # Returns a hash where keys are model names
53
+ # This is useful if you need to action on certain models and want finer grainularity than priority/other
54
+ # @return [Hash<String, Array<String>>] uris sorted by model names
55
+ def descendant_and_self_uris_partitioned_by_model
46
56
  # GET could be slow if it's a big resource, we're using HEAD to avoid this problem,
47
57
  # but this causes more requests to Fedora.
48
- return partitioned_uris unless resource.head.rdf_source?
58
+ return partitioned_uris unless rdf_resource.head.rdf_source?
49
59
 
50
60
  add_self_to_partitioned_uris unless @exclude_self
51
61
 
@@ -54,9 +64,11 @@ module ActiveFedora
54
64
  self.class.new(
55
65
  descendant_uri,
56
66
  priority_models: priority_models
57
- ).descendant_and_self_uris_partitioned.tap do |descendant_partitioned|
58
- partitioned_uris[:priority].concat descendant_partitioned[:priority]
59
- partitioned_uris[:other].concat descendant_partitioned[:other]
67
+ ).descendant_and_self_uris_partitioned_by_model.tap do |descendant_partitioned|
68
+ descendant_partitioned.keys.each do |k|
69
+ partitioned_uris[k] ||= []
70
+ partitioned_uris[k].concat descendant_partitioned[k]
71
+ end
60
72
  end
61
73
  end
62
74
  partitioned_uris
@@ -73,10 +85,7 @@ module ActiveFedora
73
85
  end
74
86
 
75
87
  def partitioned_uris
76
- @partitioned_uris ||= {
77
- priority: [],
78
- other: []
79
- }
88
+ @partitioned_uris ||= {}
80
89
  end
81
90
 
82
91
  def rdf_graph_models
@@ -85,15 +94,10 @@ module ActiveFedora
85
94
  end.compact
86
95
  end
87
96
 
88
- def prioritized_object?
89
- priority_models.present? && (rdf_graph_models & priority_models).count > 0
90
- end
91
-
92
97
  def add_self_to_partitioned_uris
93
- if prioritized_object?
94
- partitioned_uris[:priority] << rdf_resource.subject
95
- else
96
- partitioned_uris[:other] << rdf_resource.subject
98
+ rdf_graph_models.each do |model|
99
+ partitioned_uris[model] ||= []
100
+ partitioned_uris[model] << rdf_resource.subject
97
101
  end
98
102
  end
99
103
  end
@@ -0,0 +1,44 @@
1
+ module ActiveFedora
2
+ module Indexing
3
+ class Descriptor
4
+ attr_reader :index_type
5
+ def initialize(*args)
6
+ if args.last.is_a? Hash
7
+ opts = args.pop
8
+ @converter = opts[:converter]
9
+ @type_required = opts[:requires_type]
10
+ end
11
+ @index_type = args
12
+ raise Solrizer::InvalidIndexDescriptor, "Invalid index type passed to Sorizer.solr_name. It should be an array like [:string, :indexed, :stored, :multivalued]. You provided: `#{@index_type}'" unless index_type.is_a? Array
13
+ end
14
+
15
+ def name_and_converter(field_name, args = nil)
16
+ args ||= {}
17
+ field_type = args[:type]
18
+ if type_required?
19
+ raise ArgumentError, "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
20
+ end
21
+ [field_name.to_s + suffix(field_type), converter(field_type)]
22
+ end
23
+
24
+ def type_required?
25
+ @type_required
26
+ end
27
+
28
+ def evaluate_suffix(field_type)
29
+ Suffix.new(index_type.first.is_a?(Proc) ? index_type.first.call(field_type) : index_type.dup)
30
+ end
31
+
32
+ protected
33
+
34
+ # Suffix can be overridden if you want a different method of grabbing the suffix
35
+ def suffix(field_type)
36
+ evaluate_suffix(field_type).to_s
37
+ end
38
+
39
+ def converter(field_type)
40
+ @converter.call(field_type) if @converter
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,146 @@
1
+ module ActiveFedora
2
+ module Indexing
3
+ class FieldMapper
4
+ class_attribute :id_field, :descriptors
5
+ # set defaults
6
+ self.id_field = 'id'
7
+ self.descriptors = [DefaultDescriptors]
8
+
9
+ # @api
10
+ # @params [Hash] doc the hash to insert the value into
11
+ # @params [String] name the name of the field (without the suffix)
12
+ # @params [String,Date] value the value to be inserted
13
+ # @params [Array,Hash] indexer_args the arguments that find the indexer
14
+ # @returns [Hash] doc the document that was provided with the new field (replacing any field with the same name)
15
+ def set_field(doc, name, value, *indexer_args)
16
+ # adding defaults indexer
17
+ indexer_args = [:stored_searchable] if indexer_args.empty?
18
+ doc.merge! solr_names_and_values(name, value, indexer_args)
19
+ doc
20
+ end
21
+
22
+ # @api
23
+ # Given a field name, index_type, etc., returns the corresponding Solr name.
24
+ # TODO field type is the input format, maybe we could just detect that?
25
+ # @param [String] field_name the ruby (term) name which will get a suffix appended to become a Solr field name
26
+ # @param opts - index_type is only needed if the FieldDescriptor requires it (e.g. :searcahble)
27
+ # @return [String] name of the solr field, based on the params
28
+ def solr_name(field_name, *opts)
29
+ index_type, args = if opts.first.is_a? Hash
30
+ [:stored_searchable, opts.first]
31
+ elsif opts.empty?
32
+ [:stored_searchable, { type: :text }]
33
+ else
34
+ [opts[0], opts[1] || { type: :string }]
35
+ end
36
+
37
+ indexer(index_type).name_and_converter(field_name, args).first
38
+ end
39
+
40
+ # Given a field name-value pair, a data type, and an array of index types, returns a hash of
41
+ # mapped names and values. The values in the hash are _arrays_, and may contain multiple values.
42
+ def solr_names_and_values(field_name, field_value, index_types)
43
+ return {} if field_value.nil?
44
+
45
+ # Determine the set of index types
46
+ index_types = Array(index_types)
47
+ index_types.uniq!
48
+ index_types.dup.each do |index_type|
49
+ if index_type.to_s =~ /^not_(.*)/
50
+ index_types.delete index_type # not_foo
51
+ index_types.delete Regexp.last_match(1).to_sym # foo
52
+ end
53
+ end
54
+
55
+ # Map names and values
56
+
57
+ results = {}
58
+
59
+ # Time seems to extend enumerable, so wrap it so we don't interate over each of its elements.
60
+ field_value = [field_value] if field_value.is_a? Time
61
+
62
+ index_types.each do |index_type|
63
+ Array(field_value).each do |single_value|
64
+ # Get mapping for field
65
+ descriptor = indexer(index_type)
66
+ data_type = extract_type(single_value)
67
+ name, converter = descriptor.name_and_converter(field_name, type: data_type)
68
+ next unless name
69
+
70
+ # Is there a custom converter?
71
+ # TODO instead of a custom converter, look for input data type and output data type. Create a few methods that can do that cast.
72
+
73
+ value = if converter
74
+ if converter.arity == 1
75
+ converter.call(single_value)
76
+ else
77
+ converter.call(single_value, field_name)
78
+ end
79
+ elsif data_type == :boolean
80
+ single_value
81
+ else
82
+ single_value.to_s
83
+ end
84
+
85
+ # Add mapped name & value, unless it's a duplicate
86
+ if descriptor.evaluate_suffix(data_type).multivalued?
87
+ values = (results[name] ||= [])
88
+ values << value unless value.nil? || values.include?(value)
89
+ else
90
+ Solrizer.logger.warn "Setting #{name} to `#{value}', but it already had `#{results[name]}'" if results[name] && Solrizer.logger
91
+ results[name] = value
92
+ end
93
+ end
94
+ end
95
+
96
+ results
97
+ end
98
+
99
+ private
100
+
101
+ # @param [Symbol, String, Descriptor] index_type is a Descriptor, a symbol that references a method that returns a Descriptor, or a string which will be used as the suffix.
102
+ # @return [Descriptor]
103
+ def indexer(index_type)
104
+ index_type = case index_type
105
+ when Symbol
106
+ index_type_macro(index_type)
107
+ when String
108
+ StringDescriptor.new(index_type)
109
+ when Descriptor
110
+ index_type
111
+ else
112
+ raise Solrizer::InvalidIndexDescriptor, "#{index_type.class} is not a valid indexer_type. Use a String, Symbol or Descriptor."
113
+ end
114
+
115
+ raise InvalidIndexDescriptor, "index type should be an Descriptor, you passed: #{index_type.class}" unless index_type.is_a? Descriptor
116
+ index_type
117
+ end
118
+
119
+ # @param index_type [Symbol]
120
+ # search through the descriptors (class attribute) until a module is found that responds to index_type, then call it.
121
+ def index_type_macro(index_type)
122
+ klass = self.class.descriptors.find { |descriptor_klass| descriptor_klass.respond_to? index_type }
123
+ if klass
124
+ klass.send(index_type)
125
+ else
126
+ raise UnknownIndexMacro, "Unable to find `#{index_type}' in #{self.class.descriptors}"
127
+ end
128
+ end
129
+
130
+ def extract_type(value)
131
+ case value
132
+ when NilClass
133
+ nil
134
+ when Integer # In ruby < 2.4, Fixnum extends Integer
135
+ :integer
136
+ when DateTime
137
+ :time
138
+ when TrueClass, FalseClass
139
+ :boolean
140
+ else
141
+ value.class.to_s.underscore.to_sym
142
+ end
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,40 @@
1
+ module ActiveFedora
2
+ module Indexing
3
+ # Utilities for adding fields to index documents
4
+ class Inserter
5
+ # @param [String] field_name_base the field name
6
+ # @param [String] value the value to insert into the index
7
+ # @param [Array<Symbol>] index_as the index type suffixes
8
+ # @param [Hash] solr_doc the index doc to add to
9
+ # @example:
10
+ # solr_doc = {}
11
+ # create_and_insert_terms('title', 'War and Peace', [:displayable, :searchable], solr_doc)
12
+ # solr_doc
13
+ # # => {"title_ssm"=>["War and Peace"], "title_teim"=>["War and Peace"]}
14
+ def self.create_and_insert_terms(field_name_base, value, index_as, solr_doc)
15
+ index_as.each do |indexer|
16
+ insert_field(solr_doc, field_name_base, value, indexer)
17
+ end
18
+ end
19
+
20
+ # @params [Hash] doc the hash to insert the value into
21
+ # @params [String] name the name of the field (without the suffix)
22
+ # @params [String,Date,Array] value the value (or array of values) to be inserted
23
+ # @params [Array,Hash] indexer_args the arguments that find the indexer
24
+ # @returns [Hash] doc the document that was provided with the new field inserted
25
+ def self.insert_field(doc, name, value, *indexer_args)
26
+ # adding defaults indexer
27
+ indexer_args = [:stored_searchable] if indexer_args.empty?
28
+ ActiveFedora.index_field_mapper.solr_names_and_values(name, value, indexer_args).each do |k, v|
29
+ doc[k] ||= []
30
+ if v.is_a? Array
31
+ doc[k] += v
32
+ else
33
+ doc[k] = v
34
+ end
35
+ end
36
+ doc
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,81 @@
1
+ require 'ostruct'
2
+
3
+ module ActiveFedora
4
+ module Indexing
5
+ class Suffix
6
+ def initialize(*fields)
7
+ @fields = fields.flatten
8
+ end
9
+
10
+ def multivalued?
11
+ has_field? :multivalued
12
+ end
13
+
14
+ def stored?
15
+ has_field? :stored
16
+ end
17
+
18
+ def indexed?
19
+ has_field? :indexed
20
+ end
21
+
22
+ def has_field?(f)
23
+ (f.to_sym == :type) || @fields.include?(f.to_sym)
24
+ end
25
+
26
+ def data_type
27
+ @fields.first
28
+ end
29
+
30
+ def to_s
31
+ raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{@fields}" unless data_type
32
+
33
+ field_suffix = [config.suffix_delimiter]
34
+
35
+ config.fields.select { |f| has_field? f }.each do |f|
36
+ key = :"#{f}_suffix"
37
+ field_suffix << if config.send(key).is_a? Proc
38
+ config.send(key).call(@fields)
39
+ else
40
+ config.send(key)
41
+ end
42
+ end
43
+
44
+ field_suffix.join
45
+ end
46
+
47
+ def self.config
48
+ @config ||= OpenStruct.new fields: [:type, :stored, :indexed, :multivalued],
49
+ suffix_delimiter: '_',
50
+ type_suffix: (lambda do |fields|
51
+ type = fields.first
52
+ case type
53
+ when :string, :symbol # TODO: `:symbol' usage ought to be deprecated
54
+ 's'
55
+ when :text
56
+ 't'
57
+ when :text_en
58
+ 'te'
59
+ when :date, :time
60
+ 'dt'
61
+ when :integer
62
+ 'i'
63
+ when :boolean
64
+ 'b'
65
+ when :long
66
+ 'lt'
67
+ else
68
+ raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{type.inspect}'. Must be one of: :date, :time, :text, :text_en, :string, :symbol, :integer, :boolean"
69
+ end
70
+ end),
71
+ stored_suffix: 's',
72
+ indexed_suffix: 'i',
73
+ multivalued_suffix: 'm'
74
+ end
75
+
76
+ def config
77
+ @config ||= self.class.config.dup
78
+ end
79
+ end
80
+ end
81
+ end
@@ -32,8 +32,8 @@ module ActiveFedora
32
32
  # @return [Hash] the solr document
33
33
  def generate_solr_document
34
34
  solr_doc = {}
35
- Solrizer.set_field(solr_doc, 'system_create', c_time, :stored_sortable)
36
- Solrizer.set_field(solr_doc, 'system_modified', m_time, :stored_sortable)
35
+ ActiveFedora.index_field_mapper.set_field(solr_doc, 'system_create', c_time, :stored_sortable)
36
+ ActiveFedora.index_field_mapper.set_field(solr_doc, 'system_modified', m_time, :stored_sortable)
37
37
  solr_doc[QueryResultBuilder::HAS_MODEL_SOLR_FIELD] = object.has_model
38
38
  solr_doc[ActiveFedora.id_field.to_sym] = object.id
39
39
  object.declared_attached_files.each do |name, file|
@@ -18,8 +18,7 @@ module ActiveFedora
18
18
  end
19
19
 
20
20
  def marshal_load(data)
21
- ivars = data
22
- ivars.each { |name, val| instance_variable_set(name, val) }
21
+ data.each { |name, val| instance_variable_set(name, val) }
23
22
  end
24
23
  end
25
24
  end
@@ -15,7 +15,6 @@ module ActiveFedora
15
15
  initializer "active_fedora.logger" do
16
16
  ActiveSupport.on_load(:active_fedora) do
17
17
  self.logger = ::Rails.logger if logger.is_a? NullLogger
18
- Solrizer.logger ||= logger
19
18
  end
20
19
  end
21
20
 
@@ -1,7 +1,7 @@
1
1
  module ActiveFedora::RDF
2
2
  # Transient class that represents a field that we send to solr.
3
3
  # It might be possible for two properties to share a single field map entry if they use the same solr key.
4
- # @attribute [Symbol] type the data type hint for Solrizer
4
+ # @attribute [Symbol] type the data type hint for ActiveFedora::Indexing::FieldMapper
5
5
  # @attribute [Array] behaviors the indexing hints such as :stored_searchable or :symbol
6
6
  # @!attribute [w] values the raw values
7
7
  class FieldMapEntry
@@ -15,7 +15,7 @@ module ActiveFedora::RDF
15
15
 
16
16
  # Merges any existing values for solr fields with new, incoming values and ensures that resulting values are unique.
17
17
  # @param [Symbol] type the data type for the field such as :string, :date, :integer
18
- # @param [Array] behaviors Solrizer's behaviors for indexing such as :stored_searhable, :symbol
18
+ # @param [Array] behaviors FieldMapper's behaviors for indexing such as :stored_searchable, :symbol
19
19
  # @param [Array] new_values values to append into the existing solr field
20
20
  def merge!(type, behaviors, new_values)
21
21
  self.type ||= type