RubyGems - active-fedora - Versions diffs - 11.5.6 → 12.0.0 - Mend

active-fedora 11.5.6 → 12.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

checksums.yaml +5 -5
data/.rubocop.yml +4 -0
data/.travis.yml +15 -0
data/Gemfile +1 -3
data/README.md +10 -13
data/active-fedora.gemspec +7 -9
data/lib/active_fedora.rb +3 -5
data/lib/active_fedora/associations/collection_proxy.rb +0 -2
data/lib/active_fedora/attributes/property_builder.rb +3 -1
data/lib/active_fedora/caching_connection.rb +1 -1
data/lib/active_fedora/errors.rb +4 -0
data/lib/active_fedora/fedora.rb +5 -0
data/lib/active_fedora/file.rb +3 -1
data/lib/active_fedora/file/attributes.rb +5 -0
data/lib/active_fedora/file_io.rb +120 -0
data/lib/active_fedora/indexing.rb +6 -1
data/lib/active_fedora/indexing/default_descriptors.rb +128 -0
data/lib/active_fedora/indexing/descendant_fetcher.rb +22 -18
data/lib/active_fedora/indexing/descriptor.rb +44 -0
data/lib/active_fedora/indexing/field_mapper.rb +146 -0
data/lib/active_fedora/indexing/inserter.rb +40 -0
data/lib/active_fedora/indexing/suffix.rb +81 -0
data/lib/active_fedora/indexing_service.rb +2 -2
data/lib/active_fedora/ldp_resource.rb +1 -2
data/lib/active_fedora/railtie.rb +0 -1
data/lib/active_fedora/rdf/field_map_entry.rb +2 -2
data/lib/active_fedora/rdf/indexing_service.rb +6 -6
data/lib/active_fedora/relation.rb +0 -14
data/lib/active_fedora/relation/delegation.rb +1 -2
data/lib/active_fedora/relation/finder_methods.rb +19 -39
data/lib/active_fedora/version.rb +1 -1
data/lib/generators/active_fedora/config/fedora/templates/.fcrepo_wrapper +1 -1
data/lib/generators/active_fedora/config/solr/templates/solr.yml +3 -3
data/lib/generators/active_fedora/config/solr/templates/solr/config/schema.xml +34 -33
data/spec/integration/base_spec.rb +39 -35
data/spec/integration/indexing/descendant_fetcher_spec.rb +64 -0
data/spec/integration/relation_spec.rb +1 -39
data/spec/integration/scoping_spec.rb +17 -11
data/spec/spec_helper.rb +1 -1
data/spec/unit/active_fedora/indexing/inserter_spec.rb +30 -0
data/spec/unit/attributes_spec.rb +3 -7
data/spec/unit/fedora_spec.rb +12 -0
data/spec/unit/file_configurator_spec.rb +0 -9
data/spec/unit/file_io_spec.rb +137 -0
data/spec/unit/file_spec.rb +14 -17
metadata +26 -30
data/.circleci/config.yml +0 -43

data/lib/active_fedora/indexing/descendant_fetcher.rb CHANGED

@@ -7,7 +7,7 @@ module ActiveFedora
     #
     # The DescendantFetcher is also capable of partitioning the URIs into "priority" URIs
     # that will be first in the returned list. These prioritized URIs belong to objects
-    # with certain hasModel models. This feature is used in some hydra apps that need to
+    # with certain hasModel models. This feature is used in some samvera apps that need to
     # index 'permissions' objects before other objects to have the solr indexing work right.
     # And so by default, the prioritized class names are the ones form Hydra::AccessControls,
     # but you can alter the prioritized model name list, or set it to the empty array.
@@ -34,6 +34,7 @@ module ActiveFedora
         @exclude_self = exclude_self
       end
+      # @return [Array<String>] uris starting with priority models
       def descendant_and_self_uris
         partitioned = descendant_and_self_uris_partitioned
         partitioned[:priority] + partitioned[:other]
@@ -41,11 +42,20 @@ module ActiveFedora
       # returns a hash where key :priority is an array of all prioritized
       # type objects, key :other is an array of the rest.
+      # @return [Hash<String, Array<String>>] uris sorted into :priority and :other
       def descendant_and_self_uris_partitioned
-        resource = Ldp::Resource::RdfSource.new(ActiveFedora.fedora.connection, uri)
+        model_partitioned = descendant_and_self_uris_partitioned_by_model
+        { priority: model_partitioned.slice(*priority_models).values.flatten,
+          other: model_partitioned.slice(*(model_partitioned.keys - priority_models)).values.flatten }
+      end
+      # Returns a hash where keys are model names
+      # This is useful if you need to action on certain models and want finer grainularity than priority/other
+      # @return [Hash<String, Array<String>>] uris sorted by model names
+      def descendant_and_self_uris_partitioned_by_model
         # GET could be slow if it's a big resource, we're using HEAD to avoid this problem,
         # but this causes more requests to Fedora.
-        return partitioned_uris unless resource.head.rdf_source?
+        return partitioned_uris unless rdf_resource.head.rdf_source?
         add_self_to_partitioned_uris unless @exclude_self
@@ -54,9 +64,11 @@ module ActiveFedora
           self.class.new(
             descendant_uri,
             priority_models: priority_models
-          ).descendant_and_self_uris_partitioned.tap do |descendant_partitioned|
-            partitioned_uris[:priority].concat descendant_partitioned[:priority]
-            partitioned_uris[:other].concat descendant_partitioned[:other]
+          ).descendant_and_self_uris_partitioned_by_model.tap do |descendant_partitioned|
+            descendant_partitioned.keys.each do |k|
+              partitioned_uris[k] ||= []
+              partitioned_uris[k].concat descendant_partitioned[k]
+            end
           end
         end
         partitioned_uris
@@ -73,10 +85,7 @@ module ActiveFedora
         end
         def partitioned_uris
-          @partitioned_uris ||= {
-            priority: [],
-            other: []
-          }
+          @partitioned_uris ||= {}
         end
         def rdf_graph_models
@@ -85,15 +94,10 @@ module ActiveFedora
           end.compact
         end
-        def prioritized_object?
-          priority_models.present? && (rdf_graph_models & priority_models).count > 0
-        end
         def add_self_to_partitioned_uris
-          if prioritized_object?
-            partitioned_uris[:priority] << rdf_resource.subject
-          else
-            partitioned_uris[:other] << rdf_resource.subject
+          rdf_graph_models.each do |model|
+            partitioned_uris[model] ||= []
+            partitioned_uris[model] << rdf_resource.subject
           end
         end
     end

data/lib/active_fedora/indexing/descriptor.rb ADDED

@@ -0,0 +1,44 @@
+module ActiveFedora
+  module Indexing
+    class Descriptor
+      attr_reader :index_type
+      def initialize(*args)
+        if args.last.is_a? Hash
+          opts = args.pop
+          @converter = opts[:converter]
+          @type_required = opts[:requires_type]
+        end
+        @index_type = args
+        raise Solrizer::InvalidIndexDescriptor, "Invalid index type passed to Sorizer.solr_name.  It should be an array like [:string, :indexed, :stored, :multivalued]. You provided: `#{@index_type}'" unless index_type.is_a? Array
+      end
+      def name_and_converter(field_name, args = nil)
+        args ||= {}
+        field_type = args[:type]
+        if type_required?
+          raise ArgumentError, "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
+        end
+        [field_name.to_s + suffix(field_type), converter(field_type)]
+      end
+      def type_required?
+        @type_required
+      end
+      def evaluate_suffix(field_type)
+        Suffix.new(index_type.first.is_a?(Proc) ? index_type.first.call(field_type) : index_type.dup)
+      end
+      protected
+        # Suffix can be overridden if you want a different method of grabbing the suffix
+        def suffix(field_type)
+          evaluate_suffix(field_type).to_s
+        end
+        def converter(field_type)
+          @converter.call(field_type) if @converter
+        end
+    end
+  end
+end

data/lib/active_fedora/indexing/field_mapper.rb ADDED

@@ -0,0 +1,146 @@
+module ActiveFedora
+  module Indexing
+    class FieldMapper
+      class_attribute :id_field, :descriptors
+      # set defaults
+      self.id_field = 'id'
+      self.descriptors = [DefaultDescriptors]
+      # @api
+      # @params [Hash] doc the hash to insert the value into
+      # @params [String] name the name of the field (without the suffix)
+      # @params [String,Date] value the value to be inserted
+      # @params [Array,Hash] indexer_args the arguments that find the indexer
+      # @returns [Hash] doc the document that was provided with the new field (replacing any field with the same name)
+      def set_field(doc, name, value, *indexer_args)
+        # adding defaults indexer
+        indexer_args = [:stored_searchable] if indexer_args.empty?
+        doc.merge! solr_names_and_values(name, value, indexer_args)
+        doc
+      end
+      # @api
+      # Given a field name, index_type, etc., returns the corresponding Solr name.
+      # TODO field type is the input format, maybe we could just detect that?
+      # @param [String] field_name the ruby (term) name which will get a suffix appended to become a Solr field name
+      # @param opts - index_type is only needed if the FieldDescriptor requires it (e.g. :searcahble)
+      # @return [String] name of the solr field, based on the params
+      def solr_name(field_name, *opts)
+        index_type, args = if opts.first.is_a? Hash
+                             [:stored_searchable, opts.first]
+                           elsif opts.empty?
+                             [:stored_searchable, { type: :text }]
+                           else
+                             [opts[0], opts[1] || { type: :string }]
+                           end
+        indexer(index_type).name_and_converter(field_name, args).first
+      end
+      # Given a field name-value pair, a data type, and an array of index types, returns a hash of
+      # mapped names and values. The values in the hash are _arrays_, and may contain multiple values.
+      def solr_names_and_values(field_name, field_value, index_types)
+        return {} if field_value.nil?
+        # Determine the set of index types
+        index_types = Array(index_types)
+        index_types.uniq!
+        index_types.dup.each do |index_type|
+          if index_type.to_s =~ /^not_(.*)/
+            index_types.delete index_type # not_foo
+            index_types.delete Regexp.last_match(1).to_sym # foo
+          end
+        end
+        # Map names and values
+        results = {}
+        # Time seems to extend enumerable, so wrap it so we don't interate over each of its elements.
+        field_value = [field_value] if field_value.is_a? Time
+        index_types.each do |index_type|
+          Array(field_value).each do |single_value|
+            # Get mapping for field
+            descriptor = indexer(index_type)
+            data_type = extract_type(single_value)
+            name, converter = descriptor.name_and_converter(field_name, type: data_type)
+            next unless name
+            # Is there a custom converter?
+            # TODO instead of a custom converter, look for input data type and output data type. Create a few methods that can do that cast.
+            value = if converter
+                      if converter.arity == 1
+                        converter.call(single_value)
+                      else
+                        converter.call(single_value, field_name)
+                      end
+                    elsif data_type == :boolean
+                      single_value
+                    else
+                      single_value.to_s
+                    end
+            # Add mapped name & value, unless it's a duplicate
+            if descriptor.evaluate_suffix(data_type).multivalued?
+              values = (results[name] ||= [])
+              values << value unless value.nil? || values.include?(value)
+            else
+              Solrizer.logger.warn "Setting #{name} to `#{value}', but it already had `#{results[name]}'" if results[name] && Solrizer.logger
+              results[name] = value
+            end
+          end
+        end
+        results
+      end
+      private
+        # @param [Symbol, String, Descriptor] index_type is a Descriptor, a symbol that references a method that returns a Descriptor, or a string which will be used as the suffix.
+        # @return [Descriptor]
+        def indexer(index_type)
+          index_type = case index_type
+                       when Symbol
+                         index_type_macro(index_type)
+                       when String
+                         StringDescriptor.new(index_type)
+                       when Descriptor
+                         index_type
+                       else
+                         raise Solrizer::InvalidIndexDescriptor, "#{index_type.class} is not a valid indexer_type. Use a String, Symbol or Descriptor."
+                       end
+          raise InvalidIndexDescriptor, "index type should be an Descriptor, you passed: #{index_type.class}" unless index_type.is_a? Descriptor
+          index_type
+        end
+        # @param index_type [Symbol]
+        # search through the descriptors (class attribute) until a module is found that responds to index_type, then call it.
+        def index_type_macro(index_type)
+          klass = self.class.descriptors.find { |descriptor_klass| descriptor_klass.respond_to? index_type }
+          if klass
+            klass.send(index_type)
+          else
+            raise UnknownIndexMacro, "Unable to find `#{index_type}' in #{self.class.descriptors}"
+          end
+        end
+        def extract_type(value)
+          case value
+          when NilClass
+            nil
+          when Integer # In ruby < 2.4, Fixnum extends Integer
+            :integer
+          when DateTime
+            :time
+          when TrueClass, FalseClass
+            :boolean
+          else
+            value.class.to_s.underscore.to_sym
+          end
+        end
+    end
+  end
+end

data/lib/active_fedora/indexing/inserter.rb ADDED

@@ -0,0 +1,40 @@
+module ActiveFedora
+  module Indexing
+    # Utilities for adding fields to index documents
+    class Inserter
+      # @param [String] field_name_base the field name
+      # @param [String] value the value to insert into the index
+      # @param [Array<Symbol>] index_as the index type suffixes
+      # @param [Hash] solr_doc the index doc to add to
+      # @example:
+      #   solr_doc = {}
+      #   create_and_insert_terms('title', 'War and Peace', [:displayable, :searchable], solr_doc)
+      #   solr_doc
+      #   # => {"title_ssm"=>["War and Peace"], "title_teim"=>["War and Peace"]}
+      def self.create_and_insert_terms(field_name_base, value, index_as, solr_doc)
+        index_as.each do |indexer|
+          insert_field(solr_doc, field_name_base, value, indexer)
+        end
+      end
+      # @params [Hash] doc the hash to insert the value into
+      # @params [String] name the name of the field (without the suffix)
+      # @params [String,Date,Array] value the value (or array of values) to be inserted
+      # @params [Array,Hash] indexer_args the arguments that find the indexer
+      # @returns [Hash] doc the document that was provided with the new field inserted
+      def self.insert_field(doc, name, value, *indexer_args)
+        # adding defaults indexer
+        indexer_args = [:stored_searchable] if indexer_args.empty?
+        ActiveFedora.index_field_mapper.solr_names_and_values(name, value, indexer_args).each do |k, v|
+          doc[k] ||= []
+          if v.is_a? Array
+            doc[k] += v
+          else
+            doc[k] = v
+          end
+        end
+        doc
+      end
+    end
+  end
+end

data/lib/active_fedora/indexing/suffix.rb ADDED

@@ -0,0 +1,81 @@
+require 'ostruct'
+module ActiveFedora
+  module Indexing
+    class Suffix
+      def initialize(*fields)
+        @fields = fields.flatten
+      end
+      def multivalued?
+        has_field? :multivalued
+      end
+      def stored?
+        has_field? :stored
+      end
+      def indexed?
+        has_field? :indexed
+      end
+      def has_field?(f)
+        (f.to_sym == :type) || @fields.include?(f.to_sym)
+      end
+      def data_type
+        @fields.first
+      end
+      def to_s
+        raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{@fields}" unless data_type
+        field_suffix = [config.suffix_delimiter]
+        config.fields.select { |f| has_field? f }.each do |f|
+          key = :"#{f}_suffix"
+          field_suffix << if config.send(key).is_a? Proc
+                            config.send(key).call(@fields)
+                          else
+                            config.send(key)
+                          end
+        end
+        field_suffix.join
+      end
+      def self.config
+        @config ||= OpenStruct.new fields: [:type, :stored, :indexed, :multivalued],
+                                   suffix_delimiter: '_',
+                                   type_suffix: (lambda do |fields|
+                                                   type = fields.first
+                                                   case type
+                                                   when :string, :symbol # TODO: `:symbol' usage ought to be deprecated
+                                                     's'
+                                                   when :text
+                                                     't'
+                                                   when :text_en
+                                                     'te'
+                                                   when :date, :time
+                                                     'dt'
+                                                   when :integer
+                                                     'i'
+                                                   when :boolean
+                                                     'b'
+                                                   when :long
+                                                     'lt'
+                                                   else
+                                                     raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{type.inspect}'. Must be one of: :date, :time, :text, :text_en, :string, :symbol, :integer, :boolean"
+                                                   end
+                                                 end),
+                                   stored_suffix: 's',
+                                   indexed_suffix: 'i',
+                                   multivalued_suffix: 'm'
+      end
+      def config
+        @config ||= self.class.config.dup
+      end
+    end
+  end
+end

data/lib/active_fedora/indexing_service.rb CHANGED

@@ -32,8 +32,8 @@ module ActiveFedora
     # @return [Hash] the solr document
     def generate_solr_document
       solr_doc = {}
-      Solrizer.set_field(solr_doc, 'system_create', c_time, :stored_sortable)
-      Solrizer.set_field(solr_doc, 'system_modified', m_time, :stored_sortable)
+      ActiveFedora.index_field_mapper.set_field(solr_doc, 'system_create', c_time, :stored_sortable)
+      ActiveFedora.index_field_mapper.set_field(solr_doc, 'system_modified', m_time, :stored_sortable)
       solr_doc[QueryResultBuilder::HAS_MODEL_SOLR_FIELD] = object.has_model
       solr_doc[ActiveFedora.id_field.to_sym] = object.id
       object.declared_attached_files.each do |name, file|

data/lib/active_fedora/ldp_resource.rb CHANGED

@@ -18,8 +18,7 @@ module ActiveFedora
     end
     def marshal_load(data)
-      ivars = data
-      ivars.each { |name, val| instance_variable_set(name, val) }
+      data.each { |name, val| instance_variable_set(name, val) }
     end
   end
 end

data/lib/active_fedora/railtie.rb CHANGED

@@ -15,7 +15,6 @@ module ActiveFedora
     initializer "active_fedora.logger" do
       ActiveSupport.on_load(:active_fedora) do
         self.logger = ::Rails.logger if logger.is_a? NullLogger
-        Solrizer.logger ||= logger
       end
     end

data/lib/active_fedora/rdf/field_map_entry.rb CHANGED

@@ -1,7 +1,7 @@
 module ActiveFedora::RDF
   # Transient class that represents a field that we send to solr.
   # It might be possible for two properties to share a single field map entry if they use the same solr key.
-  # @attribute [Symbol] type the data type hint for Solrizer
+  # @attribute [Symbol] type the data type hint for ActiveFedora::Indexing::FieldMapper
   # @attribute [Array] behaviors the indexing hints such as :stored_searchable or :symbol
   # @!attribute [w] values the raw values
   class FieldMapEntry
@@ -15,7 +15,7 @@ module ActiveFedora::RDF
     # Merges any existing values for solr fields with new, incoming values and ensures that resulting values are unique.
     # @param [Symbol] type the data type for the field such as :string, :date, :integer
-    # @param [Array] behaviors Solrizer's behaviors for indexing such as :stored_searhable, :symbol
+    # @param [Array] behaviors FieldMapper's behaviors for indexing such as :stored_searchable, :symbol
     # @param [Array] new_values values to append into the existing solr field
     def merge!(type, behaviors, new_values)
       self.type ||= type