active-fedora 11.5.6 → 12.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rubocop.yml +4 -0
- data/.travis.yml +15 -0
- data/Gemfile +1 -3
- data/README.md +10 -13
- data/active-fedora.gemspec +7 -9
- data/lib/active_fedora.rb +3 -5
- data/lib/active_fedora/associations/collection_proxy.rb +0 -2
- data/lib/active_fedora/attributes/property_builder.rb +3 -1
- data/lib/active_fedora/caching_connection.rb +1 -1
- data/lib/active_fedora/errors.rb +4 -0
- data/lib/active_fedora/fedora.rb +5 -0
- data/lib/active_fedora/file.rb +3 -1
- data/lib/active_fedora/file/attributes.rb +5 -0
- data/lib/active_fedora/file_io.rb +120 -0
- data/lib/active_fedora/indexing.rb +6 -1
- data/lib/active_fedora/indexing/default_descriptors.rb +128 -0
- data/lib/active_fedora/indexing/descendant_fetcher.rb +22 -18
- data/lib/active_fedora/indexing/descriptor.rb +44 -0
- data/lib/active_fedora/indexing/field_mapper.rb +146 -0
- data/lib/active_fedora/indexing/inserter.rb +40 -0
- data/lib/active_fedora/indexing/suffix.rb +81 -0
- data/lib/active_fedora/indexing_service.rb +2 -2
- data/lib/active_fedora/ldp_resource.rb +1 -2
- data/lib/active_fedora/railtie.rb +0 -1
- data/lib/active_fedora/rdf/field_map_entry.rb +2 -2
- data/lib/active_fedora/rdf/indexing_service.rb +6 -6
- data/lib/active_fedora/relation.rb +0 -14
- data/lib/active_fedora/relation/delegation.rb +1 -2
- data/lib/active_fedora/relation/finder_methods.rb +19 -39
- data/lib/active_fedora/version.rb +1 -1
- data/lib/generators/active_fedora/config/fedora/templates/.fcrepo_wrapper +1 -1
- data/lib/generators/active_fedora/config/solr/templates/solr.yml +3 -3
- data/lib/generators/active_fedora/config/solr/templates/solr/config/schema.xml +34 -33
- data/spec/integration/base_spec.rb +39 -35
- data/spec/integration/indexing/descendant_fetcher_spec.rb +64 -0
- data/spec/integration/relation_spec.rb +1 -39
- data/spec/integration/scoping_spec.rb +17 -11
- data/spec/spec_helper.rb +1 -1
- data/spec/unit/active_fedora/indexing/inserter_spec.rb +30 -0
- data/spec/unit/attributes_spec.rb +3 -7
- data/spec/unit/fedora_spec.rb +12 -0
- data/spec/unit/file_configurator_spec.rb +0 -9
- data/spec/unit/file_io_spec.rb +137 -0
- data/spec/unit/file_spec.rb +14 -17
- metadata +26 -30
- data/.circleci/config.yml +0 -43
@@ -7,7 +7,7 @@ module ActiveFedora
|
|
7
7
|
#
|
8
8
|
# The DescendantFetcher is also capable of partitioning the URIs into "priority" URIs
|
9
9
|
# that will be first in the returned list. These prioritized URIs belong to objects
|
10
|
-
# with certain hasModel models. This feature is used in some
|
10
|
+
# with certain hasModel models. This feature is used in some samvera apps that need to
|
11
11
|
# index 'permissions' objects before other objects to have the solr indexing work right.
|
12
12
|
# And so by default, the prioritized class names are the ones form Hydra::AccessControls,
|
13
13
|
# but you can alter the prioritized model name list, or set it to the empty array.
|
@@ -34,6 +34,7 @@ module ActiveFedora
|
|
34
34
|
@exclude_self = exclude_self
|
35
35
|
end
|
36
36
|
|
37
|
+
# @return [Array<String>] uris starting with priority models
|
37
38
|
def descendant_and_self_uris
|
38
39
|
partitioned = descendant_and_self_uris_partitioned
|
39
40
|
partitioned[:priority] + partitioned[:other]
|
@@ -41,11 +42,20 @@ module ActiveFedora
|
|
41
42
|
|
42
43
|
# returns a hash where key :priority is an array of all prioritized
|
43
44
|
# type objects, key :other is an array of the rest.
|
45
|
+
# @return [Hash<String, Array<String>>] uris sorted into :priority and :other
|
44
46
|
def descendant_and_self_uris_partitioned
|
45
|
-
|
47
|
+
model_partitioned = descendant_and_self_uris_partitioned_by_model
|
48
|
+
{ priority: model_partitioned.slice(*priority_models).values.flatten,
|
49
|
+
other: model_partitioned.slice(*(model_partitioned.keys - priority_models)).values.flatten }
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns a hash where keys are model names
|
53
|
+
# This is useful if you need to action on certain models and want finer grainularity than priority/other
|
54
|
+
# @return [Hash<String, Array<String>>] uris sorted by model names
|
55
|
+
def descendant_and_self_uris_partitioned_by_model
|
46
56
|
# GET could be slow if it's a big resource, we're using HEAD to avoid this problem,
|
47
57
|
# but this causes more requests to Fedora.
|
48
|
-
return partitioned_uris unless
|
58
|
+
return partitioned_uris unless rdf_resource.head.rdf_source?
|
49
59
|
|
50
60
|
add_self_to_partitioned_uris unless @exclude_self
|
51
61
|
|
@@ -54,9 +64,11 @@ module ActiveFedora
|
|
54
64
|
self.class.new(
|
55
65
|
descendant_uri,
|
56
66
|
priority_models: priority_models
|
57
|
-
).
|
58
|
-
|
59
|
-
|
67
|
+
).descendant_and_self_uris_partitioned_by_model.tap do |descendant_partitioned|
|
68
|
+
descendant_partitioned.keys.each do |k|
|
69
|
+
partitioned_uris[k] ||= []
|
70
|
+
partitioned_uris[k].concat descendant_partitioned[k]
|
71
|
+
end
|
60
72
|
end
|
61
73
|
end
|
62
74
|
partitioned_uris
|
@@ -73,10 +85,7 @@ module ActiveFedora
|
|
73
85
|
end
|
74
86
|
|
75
87
|
def partitioned_uris
|
76
|
-
@partitioned_uris ||= {
|
77
|
-
priority: [],
|
78
|
-
other: []
|
79
|
-
}
|
88
|
+
@partitioned_uris ||= {}
|
80
89
|
end
|
81
90
|
|
82
91
|
def rdf_graph_models
|
@@ -85,15 +94,10 @@ module ActiveFedora
|
|
85
94
|
end.compact
|
86
95
|
end
|
87
96
|
|
88
|
-
def prioritized_object?
|
89
|
-
priority_models.present? && (rdf_graph_models & priority_models).count > 0
|
90
|
-
end
|
91
|
-
|
92
97
|
def add_self_to_partitioned_uris
|
93
|
-
|
94
|
-
partitioned_uris[
|
95
|
-
|
96
|
-
partitioned_uris[:other] << rdf_resource.subject
|
98
|
+
rdf_graph_models.each do |model|
|
99
|
+
partitioned_uris[model] ||= []
|
100
|
+
partitioned_uris[model] << rdf_resource.subject
|
97
101
|
end
|
98
102
|
end
|
99
103
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module ActiveFedora
|
2
|
+
module Indexing
|
3
|
+
class Descriptor
|
4
|
+
attr_reader :index_type
|
5
|
+
def initialize(*args)
|
6
|
+
if args.last.is_a? Hash
|
7
|
+
opts = args.pop
|
8
|
+
@converter = opts[:converter]
|
9
|
+
@type_required = opts[:requires_type]
|
10
|
+
end
|
11
|
+
@index_type = args
|
12
|
+
raise Solrizer::InvalidIndexDescriptor, "Invalid index type passed to Sorizer.solr_name. It should be an array like [:string, :indexed, :stored, :multivalued]. You provided: `#{@index_type}'" unless index_type.is_a? Array
|
13
|
+
end
|
14
|
+
|
15
|
+
def name_and_converter(field_name, args = nil)
|
16
|
+
args ||= {}
|
17
|
+
field_type = args[:type]
|
18
|
+
if type_required?
|
19
|
+
raise ArgumentError, "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
|
20
|
+
end
|
21
|
+
[field_name.to_s + suffix(field_type), converter(field_type)]
|
22
|
+
end
|
23
|
+
|
24
|
+
def type_required?
|
25
|
+
@type_required
|
26
|
+
end
|
27
|
+
|
28
|
+
def evaluate_suffix(field_type)
|
29
|
+
Suffix.new(index_type.first.is_a?(Proc) ? index_type.first.call(field_type) : index_type.dup)
|
30
|
+
end
|
31
|
+
|
32
|
+
protected
|
33
|
+
|
34
|
+
# Suffix can be overridden if you want a different method of grabbing the suffix
|
35
|
+
def suffix(field_type)
|
36
|
+
evaluate_suffix(field_type).to_s
|
37
|
+
end
|
38
|
+
|
39
|
+
def converter(field_type)
|
40
|
+
@converter.call(field_type) if @converter
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
module ActiveFedora
|
2
|
+
module Indexing
|
3
|
+
class FieldMapper
|
4
|
+
class_attribute :id_field, :descriptors
|
5
|
+
# set defaults
|
6
|
+
self.id_field = 'id'
|
7
|
+
self.descriptors = [DefaultDescriptors]
|
8
|
+
|
9
|
+
# @api
|
10
|
+
# @params [Hash] doc the hash to insert the value into
|
11
|
+
# @params [String] name the name of the field (without the suffix)
|
12
|
+
# @params [String,Date] value the value to be inserted
|
13
|
+
# @params [Array,Hash] indexer_args the arguments that find the indexer
|
14
|
+
# @returns [Hash] doc the document that was provided with the new field (replacing any field with the same name)
|
15
|
+
def set_field(doc, name, value, *indexer_args)
|
16
|
+
# adding defaults indexer
|
17
|
+
indexer_args = [:stored_searchable] if indexer_args.empty?
|
18
|
+
doc.merge! solr_names_and_values(name, value, indexer_args)
|
19
|
+
doc
|
20
|
+
end
|
21
|
+
|
22
|
+
# @api
|
23
|
+
# Given a field name, index_type, etc., returns the corresponding Solr name.
|
24
|
+
# TODO field type is the input format, maybe we could just detect that?
|
25
|
+
# @param [String] field_name the ruby (term) name which will get a suffix appended to become a Solr field name
|
26
|
+
# @param opts - index_type is only needed if the FieldDescriptor requires it (e.g. :searcahble)
|
27
|
+
# @return [String] name of the solr field, based on the params
|
28
|
+
def solr_name(field_name, *opts)
|
29
|
+
index_type, args = if opts.first.is_a? Hash
|
30
|
+
[:stored_searchable, opts.first]
|
31
|
+
elsif opts.empty?
|
32
|
+
[:stored_searchable, { type: :text }]
|
33
|
+
else
|
34
|
+
[opts[0], opts[1] || { type: :string }]
|
35
|
+
end
|
36
|
+
|
37
|
+
indexer(index_type).name_and_converter(field_name, args).first
|
38
|
+
end
|
39
|
+
|
40
|
+
# Given a field name-value pair, a data type, and an array of index types, returns a hash of
|
41
|
+
# mapped names and values. The values in the hash are _arrays_, and may contain multiple values.
|
42
|
+
def solr_names_and_values(field_name, field_value, index_types)
|
43
|
+
return {} if field_value.nil?
|
44
|
+
|
45
|
+
# Determine the set of index types
|
46
|
+
index_types = Array(index_types)
|
47
|
+
index_types.uniq!
|
48
|
+
index_types.dup.each do |index_type|
|
49
|
+
if index_type.to_s =~ /^not_(.*)/
|
50
|
+
index_types.delete index_type # not_foo
|
51
|
+
index_types.delete Regexp.last_match(1).to_sym # foo
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Map names and values
|
56
|
+
|
57
|
+
results = {}
|
58
|
+
|
59
|
+
# Time seems to extend enumerable, so wrap it so we don't interate over each of its elements.
|
60
|
+
field_value = [field_value] if field_value.is_a? Time
|
61
|
+
|
62
|
+
index_types.each do |index_type|
|
63
|
+
Array(field_value).each do |single_value|
|
64
|
+
# Get mapping for field
|
65
|
+
descriptor = indexer(index_type)
|
66
|
+
data_type = extract_type(single_value)
|
67
|
+
name, converter = descriptor.name_and_converter(field_name, type: data_type)
|
68
|
+
next unless name
|
69
|
+
|
70
|
+
# Is there a custom converter?
|
71
|
+
# TODO instead of a custom converter, look for input data type and output data type. Create a few methods that can do that cast.
|
72
|
+
|
73
|
+
value = if converter
|
74
|
+
if converter.arity == 1
|
75
|
+
converter.call(single_value)
|
76
|
+
else
|
77
|
+
converter.call(single_value, field_name)
|
78
|
+
end
|
79
|
+
elsif data_type == :boolean
|
80
|
+
single_value
|
81
|
+
else
|
82
|
+
single_value.to_s
|
83
|
+
end
|
84
|
+
|
85
|
+
# Add mapped name & value, unless it's a duplicate
|
86
|
+
if descriptor.evaluate_suffix(data_type).multivalued?
|
87
|
+
values = (results[name] ||= [])
|
88
|
+
values << value unless value.nil? || values.include?(value)
|
89
|
+
else
|
90
|
+
Solrizer.logger.warn "Setting #{name} to `#{value}', but it already had `#{results[name]}'" if results[name] && Solrizer.logger
|
91
|
+
results[name] = value
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
results
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
# @param [Symbol, String, Descriptor] index_type is a Descriptor, a symbol that references a method that returns a Descriptor, or a string which will be used as the suffix.
|
102
|
+
# @return [Descriptor]
|
103
|
+
def indexer(index_type)
|
104
|
+
index_type = case index_type
|
105
|
+
when Symbol
|
106
|
+
index_type_macro(index_type)
|
107
|
+
when String
|
108
|
+
StringDescriptor.new(index_type)
|
109
|
+
when Descriptor
|
110
|
+
index_type
|
111
|
+
else
|
112
|
+
raise Solrizer::InvalidIndexDescriptor, "#{index_type.class} is not a valid indexer_type. Use a String, Symbol or Descriptor."
|
113
|
+
end
|
114
|
+
|
115
|
+
raise InvalidIndexDescriptor, "index type should be an Descriptor, you passed: #{index_type.class}" unless index_type.is_a? Descriptor
|
116
|
+
index_type
|
117
|
+
end
|
118
|
+
|
119
|
+
# @param index_type [Symbol]
|
120
|
+
# search through the descriptors (class attribute) until a module is found that responds to index_type, then call it.
|
121
|
+
def index_type_macro(index_type)
|
122
|
+
klass = self.class.descriptors.find { |descriptor_klass| descriptor_klass.respond_to? index_type }
|
123
|
+
if klass
|
124
|
+
klass.send(index_type)
|
125
|
+
else
|
126
|
+
raise UnknownIndexMacro, "Unable to find `#{index_type}' in #{self.class.descriptors}"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def extract_type(value)
|
131
|
+
case value
|
132
|
+
when NilClass
|
133
|
+
nil
|
134
|
+
when Integer # In ruby < 2.4, Fixnum extends Integer
|
135
|
+
:integer
|
136
|
+
when DateTime
|
137
|
+
:time
|
138
|
+
when TrueClass, FalseClass
|
139
|
+
:boolean
|
140
|
+
else
|
141
|
+
value.class.to_s.underscore.to_sym
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module ActiveFedora
|
2
|
+
module Indexing
|
3
|
+
# Utilities for adding fields to index documents
|
4
|
+
class Inserter
|
5
|
+
# @param [String] field_name_base the field name
|
6
|
+
# @param [String] value the value to insert into the index
|
7
|
+
# @param [Array<Symbol>] index_as the index type suffixes
|
8
|
+
# @param [Hash] solr_doc the index doc to add to
|
9
|
+
# @example:
|
10
|
+
# solr_doc = {}
|
11
|
+
# create_and_insert_terms('title', 'War and Peace', [:displayable, :searchable], solr_doc)
|
12
|
+
# solr_doc
|
13
|
+
# # => {"title_ssm"=>["War and Peace"], "title_teim"=>["War and Peace"]}
|
14
|
+
def self.create_and_insert_terms(field_name_base, value, index_as, solr_doc)
|
15
|
+
index_as.each do |indexer|
|
16
|
+
insert_field(solr_doc, field_name_base, value, indexer)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# @params [Hash] doc the hash to insert the value into
|
21
|
+
# @params [String] name the name of the field (without the suffix)
|
22
|
+
# @params [String,Date,Array] value the value (or array of values) to be inserted
|
23
|
+
# @params [Array,Hash] indexer_args the arguments that find the indexer
|
24
|
+
# @returns [Hash] doc the document that was provided with the new field inserted
|
25
|
+
def self.insert_field(doc, name, value, *indexer_args)
|
26
|
+
# adding defaults indexer
|
27
|
+
indexer_args = [:stored_searchable] if indexer_args.empty?
|
28
|
+
ActiveFedora.index_field_mapper.solr_names_and_values(name, value, indexer_args).each do |k, v|
|
29
|
+
doc[k] ||= []
|
30
|
+
if v.is_a? Array
|
31
|
+
doc[k] += v
|
32
|
+
else
|
33
|
+
doc[k] = v
|
34
|
+
end
|
35
|
+
end
|
36
|
+
doc
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module ActiveFedora
|
4
|
+
module Indexing
|
5
|
+
class Suffix
|
6
|
+
def initialize(*fields)
|
7
|
+
@fields = fields.flatten
|
8
|
+
end
|
9
|
+
|
10
|
+
def multivalued?
|
11
|
+
has_field? :multivalued
|
12
|
+
end
|
13
|
+
|
14
|
+
def stored?
|
15
|
+
has_field? :stored
|
16
|
+
end
|
17
|
+
|
18
|
+
def indexed?
|
19
|
+
has_field? :indexed
|
20
|
+
end
|
21
|
+
|
22
|
+
def has_field?(f)
|
23
|
+
(f.to_sym == :type) || @fields.include?(f.to_sym)
|
24
|
+
end
|
25
|
+
|
26
|
+
def data_type
|
27
|
+
@fields.first
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s
|
31
|
+
raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{@fields}" unless data_type
|
32
|
+
|
33
|
+
field_suffix = [config.suffix_delimiter]
|
34
|
+
|
35
|
+
config.fields.select { |f| has_field? f }.each do |f|
|
36
|
+
key = :"#{f}_suffix"
|
37
|
+
field_suffix << if config.send(key).is_a? Proc
|
38
|
+
config.send(key).call(@fields)
|
39
|
+
else
|
40
|
+
config.send(key)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
field_suffix.join
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.config
|
48
|
+
@config ||= OpenStruct.new fields: [:type, :stored, :indexed, :multivalued],
|
49
|
+
suffix_delimiter: '_',
|
50
|
+
type_suffix: (lambda do |fields|
|
51
|
+
type = fields.first
|
52
|
+
case type
|
53
|
+
when :string, :symbol # TODO: `:symbol' usage ought to be deprecated
|
54
|
+
's'
|
55
|
+
when :text
|
56
|
+
't'
|
57
|
+
when :text_en
|
58
|
+
'te'
|
59
|
+
when :date, :time
|
60
|
+
'dt'
|
61
|
+
when :integer
|
62
|
+
'i'
|
63
|
+
when :boolean
|
64
|
+
'b'
|
65
|
+
when :long
|
66
|
+
'lt'
|
67
|
+
else
|
68
|
+
raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{type.inspect}'. Must be one of: :date, :time, :text, :text_en, :string, :symbol, :integer, :boolean"
|
69
|
+
end
|
70
|
+
end),
|
71
|
+
stored_suffix: 's',
|
72
|
+
indexed_suffix: 'i',
|
73
|
+
multivalued_suffix: 'm'
|
74
|
+
end
|
75
|
+
|
76
|
+
def config
|
77
|
+
@config ||= self.class.config.dup
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -32,8 +32,8 @@ module ActiveFedora
|
|
32
32
|
# @return [Hash] the solr document
|
33
33
|
def generate_solr_document
|
34
34
|
solr_doc = {}
|
35
|
-
|
36
|
-
|
35
|
+
ActiveFedora.index_field_mapper.set_field(solr_doc, 'system_create', c_time, :stored_sortable)
|
36
|
+
ActiveFedora.index_field_mapper.set_field(solr_doc, 'system_modified', m_time, :stored_sortable)
|
37
37
|
solr_doc[QueryResultBuilder::HAS_MODEL_SOLR_FIELD] = object.has_model
|
38
38
|
solr_doc[ActiveFedora.id_field.to_sym] = object.id
|
39
39
|
object.declared_attached_files.each do |name, file|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module ActiveFedora::RDF
|
2
2
|
# Transient class that represents a field that we send to solr.
|
3
3
|
# It might be possible for two properties to share a single field map entry if they use the same solr key.
|
4
|
-
# @attribute [Symbol] type the data type hint for
|
4
|
+
# @attribute [Symbol] type the data type hint for ActiveFedora::Indexing::FieldMapper
|
5
5
|
# @attribute [Array] behaviors the indexing hints such as :stored_searchable or :symbol
|
6
6
|
# @!attribute [w] values the raw values
|
7
7
|
class FieldMapEntry
|
@@ -15,7 +15,7 @@ module ActiveFedora::RDF
|
|
15
15
|
|
16
16
|
# Merges any existing values for solr fields with new, incoming values and ensures that resulting values are unique.
|
17
17
|
# @param [Symbol] type the data type for the field such as :string, :date, :integer
|
18
|
-
# @param [Array] behaviors
|
18
|
+
# @param [Array] behaviors FieldMapper's behaviors for indexing such as :stored_searchable, :symbol
|
19
19
|
# @param [Array] new_values values to append into the existing solr field
|
20
20
|
def merge!(type, behaviors, new_values)
|
21
21
|
self.type ||= type
|