active-fedora 11.5.6 → 12.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +4 -0
  3. data/.travis.yml +15 -0
  4. data/Gemfile +1 -3
  5. data/README.md +10 -13
  6. data/active-fedora.gemspec +7 -9
  7. data/lib/active_fedora.rb +3 -5
  8. data/lib/active_fedora/associations/collection_proxy.rb +0 -2
  9. data/lib/active_fedora/attributes/property_builder.rb +3 -1
  10. data/lib/active_fedora/caching_connection.rb +1 -1
  11. data/lib/active_fedora/errors.rb +4 -0
  12. data/lib/active_fedora/fedora.rb +5 -0
  13. data/lib/active_fedora/file.rb +3 -1
  14. data/lib/active_fedora/file/attributes.rb +5 -0
  15. data/lib/active_fedora/file_io.rb +120 -0
  16. data/lib/active_fedora/indexing.rb +6 -1
  17. data/lib/active_fedora/indexing/default_descriptors.rb +128 -0
  18. data/lib/active_fedora/indexing/descendant_fetcher.rb +22 -18
  19. data/lib/active_fedora/indexing/descriptor.rb +44 -0
  20. data/lib/active_fedora/indexing/field_mapper.rb +146 -0
  21. data/lib/active_fedora/indexing/inserter.rb +40 -0
  22. data/lib/active_fedora/indexing/suffix.rb +81 -0
  23. data/lib/active_fedora/indexing_service.rb +2 -2
  24. data/lib/active_fedora/ldp_resource.rb +1 -2
  25. data/lib/active_fedora/railtie.rb +0 -1
  26. data/lib/active_fedora/rdf/field_map_entry.rb +2 -2
  27. data/lib/active_fedora/rdf/indexing_service.rb +6 -6
  28. data/lib/active_fedora/relation.rb +0 -14
  29. data/lib/active_fedora/relation/delegation.rb +1 -2
  30. data/lib/active_fedora/relation/finder_methods.rb +19 -39
  31. data/lib/active_fedora/version.rb +1 -1
  32. data/lib/generators/active_fedora/config/fedora/templates/.fcrepo_wrapper +1 -1
  33. data/lib/generators/active_fedora/config/solr/templates/solr.yml +3 -3
  34. data/lib/generators/active_fedora/config/solr/templates/solr/config/schema.xml +34 -33
  35. data/spec/integration/base_spec.rb +39 -35
  36. data/spec/integration/indexing/descendant_fetcher_spec.rb +64 -0
  37. data/spec/integration/relation_spec.rb +1 -39
  38. data/spec/integration/scoping_spec.rb +17 -11
  39. data/spec/spec_helper.rb +1 -1
  40. data/spec/unit/active_fedora/indexing/inserter_spec.rb +30 -0
  41. data/spec/unit/attributes_spec.rb +3 -7
  42. data/spec/unit/fedora_spec.rb +12 -0
  43. data/spec/unit/file_configurator_spec.rb +0 -9
  44. data/spec/unit/file_io_spec.rb +137 -0
  45. data/spec/unit/file_spec.rb +14 -17
  46. metadata +26 -30
  47. data/.circleci/config.yml +0 -43
@@ -7,7 +7,7 @@ module ActiveFedora
7
7
  #
8
8
  # The DescendantFetcher is also capable of partitioning the URIs into "priority" URIs
9
9
  # that will be first in the returned list. These prioritized URIs belong to objects
10
- # with certain hasModel models. This feature is used in some hydra apps that need to
10
+ # with certain hasModel models. This feature is used in some samvera apps that need to
11
11
  # index 'permissions' objects before other objects to have the solr indexing work right.
12
12
  # And so by default, the prioritized class names are the ones form Hydra::AccessControls,
13
13
  # but you can alter the prioritized model name list, or set it to the empty array.
@@ -34,6 +34,7 @@ module ActiveFedora
34
34
  @exclude_self = exclude_self
35
35
  end
36
36
 
37
+ # @return [Array<String>] uris starting with priority models
37
38
  def descendant_and_self_uris
38
39
  partitioned = descendant_and_self_uris_partitioned
39
40
  partitioned[:priority] + partitioned[:other]
@@ -41,11 +42,20 @@ module ActiveFedora
41
42
 
42
43
  # returns a hash where key :priority is an array of all prioritized
43
44
  # type objects, key :other is an array of the rest.
45
+ # @return [Hash<String, Array<String>>] uris sorted into :priority and :other
44
46
  def descendant_and_self_uris_partitioned
45
- resource = Ldp::Resource::RdfSource.new(ActiveFedora.fedora.connection, uri)
47
+ model_partitioned = descendant_and_self_uris_partitioned_by_model
48
+ { priority: model_partitioned.slice(*priority_models).values.flatten,
49
+ other: model_partitioned.slice(*(model_partitioned.keys - priority_models)).values.flatten }
50
+ end
51
+
52
+ # Returns a hash where keys are model names
53
+ # This is useful if you need to action on certain models and want finer grainularity than priority/other
54
+ # @return [Hash<String, Array<String>>] uris sorted by model names
55
+ def descendant_and_self_uris_partitioned_by_model
46
56
  # GET could be slow if it's a big resource, we're using HEAD to avoid this problem,
47
57
  # but this causes more requests to Fedora.
48
- return partitioned_uris unless resource.head.rdf_source?
58
+ return partitioned_uris unless rdf_resource.head.rdf_source?
49
59
 
50
60
  add_self_to_partitioned_uris unless @exclude_self
51
61
 
@@ -54,9 +64,11 @@ module ActiveFedora
54
64
  self.class.new(
55
65
  descendant_uri,
56
66
  priority_models: priority_models
57
- ).descendant_and_self_uris_partitioned.tap do |descendant_partitioned|
58
- partitioned_uris[:priority].concat descendant_partitioned[:priority]
59
- partitioned_uris[:other].concat descendant_partitioned[:other]
67
+ ).descendant_and_self_uris_partitioned_by_model.tap do |descendant_partitioned|
68
+ descendant_partitioned.keys.each do |k|
69
+ partitioned_uris[k] ||= []
70
+ partitioned_uris[k].concat descendant_partitioned[k]
71
+ end
60
72
  end
61
73
  end
62
74
  partitioned_uris
@@ -73,10 +85,7 @@ module ActiveFedora
73
85
  end
74
86
 
75
87
  def partitioned_uris
76
- @partitioned_uris ||= {
77
- priority: [],
78
- other: []
79
- }
88
+ @partitioned_uris ||= {}
80
89
  end
81
90
 
82
91
  def rdf_graph_models
@@ -85,15 +94,10 @@ module ActiveFedora
85
94
  end.compact
86
95
  end
87
96
 
88
- def prioritized_object?
89
- priority_models.present? && (rdf_graph_models & priority_models).count > 0
90
- end
91
-
92
97
  def add_self_to_partitioned_uris
93
- if prioritized_object?
94
- partitioned_uris[:priority] << rdf_resource.subject
95
- else
96
- partitioned_uris[:other] << rdf_resource.subject
98
+ rdf_graph_models.each do |model|
99
+ partitioned_uris[model] ||= []
100
+ partitioned_uris[model] << rdf_resource.subject
97
101
  end
98
102
  end
99
103
  end
@@ -0,0 +1,44 @@
1
+ module ActiveFedora
2
+ module Indexing
3
+ class Descriptor
4
+ attr_reader :index_type
5
+ def initialize(*args)
6
+ if args.last.is_a? Hash
7
+ opts = args.pop
8
+ @converter = opts[:converter]
9
+ @type_required = opts[:requires_type]
10
+ end
11
+ @index_type = args
12
+ raise Solrizer::InvalidIndexDescriptor, "Invalid index type passed to Sorizer.solr_name. It should be an array like [:string, :indexed, :stored, :multivalued]. You provided: `#{@index_type}'" unless index_type.is_a? Array
13
+ end
14
+
15
+ def name_and_converter(field_name, args = nil)
16
+ args ||= {}
17
+ field_type = args[:type]
18
+ if type_required?
19
+ raise ArgumentError, "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
20
+ end
21
+ [field_name.to_s + suffix(field_type), converter(field_type)]
22
+ end
23
+
24
+ def type_required?
25
+ @type_required
26
+ end
27
+
28
+ def evaluate_suffix(field_type)
29
+ Suffix.new(index_type.first.is_a?(Proc) ? index_type.first.call(field_type) : index_type.dup)
30
+ end
31
+
32
+ protected
33
+
34
+ # Suffix can be overridden if you want a different method of grabbing the suffix
35
+ def suffix(field_type)
36
+ evaluate_suffix(field_type).to_s
37
+ end
38
+
39
+ def converter(field_type)
40
+ @converter.call(field_type) if @converter
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,146 @@
1
+ module ActiveFedora
2
+ module Indexing
3
+ class FieldMapper
4
+ class_attribute :id_field, :descriptors
5
+ # set defaults
6
+ self.id_field = 'id'
7
+ self.descriptors = [DefaultDescriptors]
8
+
9
+ # @api
10
+ # @params [Hash] doc the hash to insert the value into
11
+ # @params [String] name the name of the field (without the suffix)
12
+ # @params [String,Date] value the value to be inserted
13
+ # @params [Array,Hash] indexer_args the arguments that find the indexer
14
+ # @returns [Hash] doc the document that was provided with the new field (replacing any field with the same name)
15
+ def set_field(doc, name, value, *indexer_args)
16
+ # adding defaults indexer
17
+ indexer_args = [:stored_searchable] if indexer_args.empty?
18
+ doc.merge! solr_names_and_values(name, value, indexer_args)
19
+ doc
20
+ end
21
+
22
+ # @api
23
+ # Given a field name, index_type, etc., returns the corresponding Solr name.
24
+ # TODO field type is the input format, maybe we could just detect that?
25
+ # @param [String] field_name the ruby (term) name which will get a suffix appended to become a Solr field name
26
+ # @param opts - index_type is only needed if the FieldDescriptor requires it (e.g. :searcahble)
27
+ # @return [String] name of the solr field, based on the params
28
+ def solr_name(field_name, *opts)
29
+ index_type, args = if opts.first.is_a? Hash
30
+ [:stored_searchable, opts.first]
31
+ elsif opts.empty?
32
+ [:stored_searchable, { type: :text }]
33
+ else
34
+ [opts[0], opts[1] || { type: :string }]
35
+ end
36
+
37
+ indexer(index_type).name_and_converter(field_name, args).first
38
+ end
39
+
40
+ # Given a field name-value pair, a data type, and an array of index types, returns a hash of
41
+ # mapped names and values. The values in the hash are _arrays_, and may contain multiple values.
42
+ def solr_names_and_values(field_name, field_value, index_types)
43
+ return {} if field_value.nil?
44
+
45
+ # Determine the set of index types
46
+ index_types = Array(index_types)
47
+ index_types.uniq!
48
+ index_types.dup.each do |index_type|
49
+ if index_type.to_s =~ /^not_(.*)/
50
+ index_types.delete index_type # not_foo
51
+ index_types.delete Regexp.last_match(1).to_sym # foo
52
+ end
53
+ end
54
+
55
+ # Map names and values
56
+
57
+ results = {}
58
+
59
+ # Time seems to extend enumerable, so wrap it so we don't interate over each of its elements.
60
+ field_value = [field_value] if field_value.is_a? Time
61
+
62
+ index_types.each do |index_type|
63
+ Array(field_value).each do |single_value|
64
+ # Get mapping for field
65
+ descriptor = indexer(index_type)
66
+ data_type = extract_type(single_value)
67
+ name, converter = descriptor.name_and_converter(field_name, type: data_type)
68
+ next unless name
69
+
70
+ # Is there a custom converter?
71
+ # TODO instead of a custom converter, look for input data type and output data type. Create a few methods that can do that cast.
72
+
73
+ value = if converter
74
+ if converter.arity == 1
75
+ converter.call(single_value)
76
+ else
77
+ converter.call(single_value, field_name)
78
+ end
79
+ elsif data_type == :boolean
80
+ single_value
81
+ else
82
+ single_value.to_s
83
+ end
84
+
85
+ # Add mapped name & value, unless it's a duplicate
86
+ if descriptor.evaluate_suffix(data_type).multivalued?
87
+ values = (results[name] ||= [])
88
+ values << value unless value.nil? || values.include?(value)
89
+ else
90
+ Solrizer.logger.warn "Setting #{name} to `#{value}', but it already had `#{results[name]}'" if results[name] && Solrizer.logger
91
+ results[name] = value
92
+ end
93
+ end
94
+ end
95
+
96
+ results
97
+ end
98
+
99
+ private
100
+
101
+ # @param [Symbol, String, Descriptor] index_type is a Descriptor, a symbol that references a method that returns a Descriptor, or a string which will be used as the suffix.
102
+ # @return [Descriptor]
103
+ def indexer(index_type)
104
+ index_type = case index_type
105
+ when Symbol
106
+ index_type_macro(index_type)
107
+ when String
108
+ StringDescriptor.new(index_type)
109
+ when Descriptor
110
+ index_type
111
+ else
112
+ raise Solrizer::InvalidIndexDescriptor, "#{index_type.class} is not a valid indexer_type. Use a String, Symbol or Descriptor."
113
+ end
114
+
115
+ raise InvalidIndexDescriptor, "index type should be an Descriptor, you passed: #{index_type.class}" unless index_type.is_a? Descriptor
116
+ index_type
117
+ end
118
+
119
+ # @param index_type [Symbol]
120
+ # search through the descriptors (class attribute) until a module is found that responds to index_type, then call it.
121
+ def index_type_macro(index_type)
122
+ klass = self.class.descriptors.find { |descriptor_klass| descriptor_klass.respond_to? index_type }
123
+ if klass
124
+ klass.send(index_type)
125
+ else
126
+ raise UnknownIndexMacro, "Unable to find `#{index_type}' in #{self.class.descriptors}"
127
+ end
128
+ end
129
+
130
+ def extract_type(value)
131
+ case value
132
+ when NilClass
133
+ nil
134
+ when Integer # In ruby < 2.4, Fixnum extends Integer
135
+ :integer
136
+ when DateTime
137
+ :time
138
+ when TrueClass, FalseClass
139
+ :boolean
140
+ else
141
+ value.class.to_s.underscore.to_sym
142
+ end
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,40 @@
1
+ module ActiveFedora
2
+ module Indexing
3
+ # Utilities for adding fields to index documents
4
+ class Inserter
5
+ # @param [String] field_name_base the field name
6
+ # @param [String] value the value to insert into the index
7
+ # @param [Array<Symbol>] index_as the index type suffixes
8
+ # @param [Hash] solr_doc the index doc to add to
9
+ # @example:
10
+ # solr_doc = {}
11
+ # create_and_insert_terms('title', 'War and Peace', [:displayable, :searchable], solr_doc)
12
+ # solr_doc
13
+ # # => {"title_ssm"=>["War and Peace"], "title_teim"=>["War and Peace"]}
14
+ def self.create_and_insert_terms(field_name_base, value, index_as, solr_doc)
15
+ index_as.each do |indexer|
16
+ insert_field(solr_doc, field_name_base, value, indexer)
17
+ end
18
+ end
19
+
20
+ # @params [Hash] doc the hash to insert the value into
21
+ # @params [String] name the name of the field (without the suffix)
22
+ # @params [String,Date,Array] value the value (or array of values) to be inserted
23
+ # @params [Array,Hash] indexer_args the arguments that find the indexer
24
+ # @returns [Hash] doc the document that was provided with the new field inserted
25
+ def self.insert_field(doc, name, value, *indexer_args)
26
+ # adding defaults indexer
27
+ indexer_args = [:stored_searchable] if indexer_args.empty?
28
+ ActiveFedora.index_field_mapper.solr_names_and_values(name, value, indexer_args).each do |k, v|
29
+ doc[k] ||= []
30
+ if v.is_a? Array
31
+ doc[k] += v
32
+ else
33
+ doc[k] = v
34
+ end
35
+ end
36
+ doc
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,81 @@
1
+ require 'ostruct'
2
+
3
+ module ActiveFedora
4
+ module Indexing
5
+ class Suffix
6
+ def initialize(*fields)
7
+ @fields = fields.flatten
8
+ end
9
+
10
+ def multivalued?
11
+ has_field? :multivalued
12
+ end
13
+
14
+ def stored?
15
+ has_field? :stored
16
+ end
17
+
18
+ def indexed?
19
+ has_field? :indexed
20
+ end
21
+
22
+ def has_field?(f)
23
+ (f.to_sym == :type) || @fields.include?(f.to_sym)
24
+ end
25
+
26
+ def data_type
27
+ @fields.first
28
+ end
29
+
30
+ def to_s
31
+ raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{@fields}" unless data_type
32
+
33
+ field_suffix = [config.suffix_delimiter]
34
+
35
+ config.fields.select { |f| has_field? f }.each do |f|
36
+ key = :"#{f}_suffix"
37
+ field_suffix << if config.send(key).is_a? Proc
38
+ config.send(key).call(@fields)
39
+ else
40
+ config.send(key)
41
+ end
42
+ end
43
+
44
+ field_suffix.join
45
+ end
46
+
47
+ def self.config
48
+ @config ||= OpenStruct.new fields: [:type, :stored, :indexed, :multivalued],
49
+ suffix_delimiter: '_',
50
+ type_suffix: (lambda do |fields|
51
+ type = fields.first
52
+ case type
53
+ when :string, :symbol # TODO: `:symbol' usage ought to be deprecated
54
+ 's'
55
+ when :text
56
+ 't'
57
+ when :text_en
58
+ 'te'
59
+ when :date, :time
60
+ 'dt'
61
+ when :integer
62
+ 'i'
63
+ when :boolean
64
+ 'b'
65
+ when :long
66
+ 'lt'
67
+ else
68
+ raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{type.inspect}'. Must be one of: :date, :time, :text, :text_en, :string, :symbol, :integer, :boolean"
69
+ end
70
+ end),
71
+ stored_suffix: 's',
72
+ indexed_suffix: 'i',
73
+ multivalued_suffix: 'm'
74
+ end
75
+
76
+ def config
77
+ @config ||= self.class.config.dup
78
+ end
79
+ end
80
+ end
81
+ end
@@ -32,8 +32,8 @@ module ActiveFedora
32
32
  # @return [Hash] the solr document
33
33
  def generate_solr_document
34
34
  solr_doc = {}
35
- Solrizer.set_field(solr_doc, 'system_create', c_time, :stored_sortable)
36
- Solrizer.set_field(solr_doc, 'system_modified', m_time, :stored_sortable)
35
+ ActiveFedora.index_field_mapper.set_field(solr_doc, 'system_create', c_time, :stored_sortable)
36
+ ActiveFedora.index_field_mapper.set_field(solr_doc, 'system_modified', m_time, :stored_sortable)
37
37
  solr_doc[QueryResultBuilder::HAS_MODEL_SOLR_FIELD] = object.has_model
38
38
  solr_doc[ActiveFedora.id_field.to_sym] = object.id
39
39
  object.declared_attached_files.each do |name, file|
@@ -18,8 +18,7 @@ module ActiveFedora
18
18
  end
19
19
 
20
20
  def marshal_load(data)
21
- ivars = data
22
- ivars.each { |name, val| instance_variable_set(name, val) }
21
+ data.each { |name, val| instance_variable_set(name, val) }
23
22
  end
24
23
  end
25
24
  end
@@ -15,7 +15,6 @@ module ActiveFedora
15
15
  initializer "active_fedora.logger" do
16
16
  ActiveSupport.on_load(:active_fedora) do
17
17
  self.logger = ::Rails.logger if logger.is_a? NullLogger
18
- Solrizer.logger ||= logger
19
18
  end
20
19
  end
21
20
 
@@ -1,7 +1,7 @@
1
1
  module ActiveFedora::RDF
2
2
  # Transient class that represents a field that we send to solr.
3
3
  # It might be possible for two properties to share a single field map entry if they use the same solr key.
4
- # @attribute [Symbol] type the data type hint for Solrizer
4
+ # @attribute [Symbol] type the data type hint for ActiveFedora::Indexing::FieldMapper
5
5
  # @attribute [Array] behaviors the indexing hints such as :stored_searchable or :symbol
6
6
  # @!attribute [w] values the raw values
7
7
  class FieldMapEntry
@@ -15,7 +15,7 @@ module ActiveFedora::RDF
15
15
 
16
16
  # Merges any existing values for solr fields with new, incoming values and ensures that resulting values are unique.
17
17
  # @param [Symbol] type the data type for the field such as :string, :date, :integer
18
- # @param [Array] behaviors Solrizer's behaviors for indexing such as :stored_searhable, :symbol
18
+ # @param [Array] behaviors FieldMapper's behaviors for indexing such as :stored_searchable, :symbol
19
19
  # @param [Array] new_values values to append into the existing solr field
20
20
  def merge!(type, behaviors, new_values)
21
21
  self.type ||= type