samvera-nesting_indexer 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.rubocop.yml +53 -0
- data/.rubocop_todo.yml +12 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/Gemfile +4 -0
- data/Guardfile +49 -0
- data/LICENSE +14 -0
- data/README.md +102 -0
- data/Rakefile +34 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/samvera/nesting_indexer.rb +82 -0
- data/lib/samvera/nesting_indexer/adapters.rb +12 -0
- data/lib/samvera/nesting_indexer/adapters/abstract_adapter.rb +44 -0
- data/lib/samvera/nesting_indexer/adapters/in_memory_adapter.rb +157 -0
- data/lib/samvera/nesting_indexer/adapters/interface_behavior_spec.rb +53 -0
- data/lib/samvera/nesting_indexer/configuration.rb +51 -0
- data/lib/samvera/nesting_indexer/documents.rb +90 -0
- data/lib/samvera/nesting_indexer/exceptions.rb +35 -0
- data/lib/samvera/nesting_indexer/railtie.rb +12 -0
- data/lib/samvera/nesting_indexer/relationship_reindexer.rb +127 -0
- data/lib/samvera/nesting_indexer/repository_reindexer.rb +65 -0
- data/lib/samvera/nesting_indexer/version.rb +5 -0
- data/samvera-nesting_indexer.gemspec +39 -0
- metadata +294 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d85b5e9c924b8da1ab4155a9144073696f0d8aba
|
4
|
+
data.tar.gz: a49b5e3004e1b477d878343ce702a03ce02244e4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2769007dd241f6eddf9be40857685f87f7e55a18ac103acb9674242df470388cb45dbc2cf7aa06a5b57bb20a278ea31c254eb3d1c06a225e5de0c79c514b60e5
|
7
|
+
data.tar.gz: bb3bbaface89adceb926133b867ccbe188ec026a1611e3db3c4180aacaddc91bc196a36805dc27c27a083c1776cc08b569107ea3412f4941497b59cf49bc2945
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
################################################################################
|
4
|
+
## Releasing the hounds in your local environment.
|
5
|
+
##
|
6
|
+
## Setup:
|
7
|
+
## $ gem install rubocop
|
8
|
+
##
|
9
|
+
## Run:
|
10
|
+
## $ rubocop ./path/to/file ./or/path/to/directory -c ./.hound.yml
|
11
|
+
##
|
12
|
+
## Generation Notes:
|
13
|
+
## This file was generated via the commitment:install generator. You are free
|
14
|
+
## and expected to change this file.
|
15
|
+
################################################################################
|
16
|
+
AllCops:
|
17
|
+
Include:
|
18
|
+
- Rakefile
|
19
|
+
Exclude:
|
20
|
+
- 'vendor/**/*'
|
21
|
+
- 'tmp/**/*'
|
22
|
+
- 'bin/**/*'
|
23
|
+
- Gemfile
|
24
|
+
- Guardfile
|
25
|
+
- samvera-nesting_indexer.gemspec
|
26
|
+
TargetRubyVersion: 2.2
|
27
|
+
LineLength:
|
28
|
+
Description: 'Limit lines to 160 characters.'
|
29
|
+
Max: 160
|
30
|
+
Enabled: true
|
31
|
+
|
32
|
+
ModuleLength:
|
33
|
+
Exclude:
|
34
|
+
- 'spec/**/*'
|
35
|
+
|
36
|
+
|
37
|
+
Style/StringLiterals:
|
38
|
+
Description: 'Checks if uses of quotes match the configured preference.'
|
39
|
+
Enabled: false
|
40
|
+
|
41
|
+
PercentLiteralDelimiters:
|
42
|
+
Description: 'Use `%`-literal delimiters consistently'
|
43
|
+
Enabled: false
|
44
|
+
|
45
|
+
Documentation:
|
46
|
+
Description: 'Document classes and non-namespace modules.'
|
47
|
+
Enabled: true
|
48
|
+
Exclude:
|
49
|
+
- spec/**/*
|
50
|
+
- test/**/*
|
51
|
+
|
52
|
+
Style/WordArray:
|
53
|
+
Enabled: false
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2017-03-24 15:55:07 -0400 using RuboCop version 0.47.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 3
|
10
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
11
|
+
Metrics/BlockLength:
|
12
|
+
Max: 188
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.4.1
|
data/.travis.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
sudo: false
|
2
|
+
cache: bundler
|
3
|
+
|
4
|
+
language: ruby
|
5
|
+
rvm:
|
6
|
+
- 2.3.1
|
7
|
+
- 2.2.5
|
8
|
+
- 2.2.2
|
9
|
+
- 2.1.10
|
10
|
+
- 2.0.0
|
11
|
+
|
12
|
+
matrix:
|
13
|
+
allow_failures:
|
14
|
+
- rvm: "2.3.1"
|
15
|
+
|
16
|
+
before_install: gem install bundler -v 1.12.5
|
17
|
+
|
18
|
+
script: 'bundle exec rake'
|
19
|
+
|
20
|
+
addons:
|
21
|
+
code_climate:
|
22
|
+
repo_token: 71b80cc45ed849e84e5943bb4874393b5ce26a2356381a839552395cd9b2f71a
|
data/Gemfile
ADDED
data/Guardfile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
## Uncomment and set this to only include directories you want to watch
|
5
|
+
# directories %w(app lib config test spec features) \
|
6
|
+
# .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
|
7
|
+
|
8
|
+
## Note: if you are using the `directories` clause above and you are not
|
9
|
+
## watching the project directory ('.'), then you will want to move
|
10
|
+
## the Guardfile to a watched dir and symlink it back, e.g.
|
11
|
+
#
|
12
|
+
# $ mkdir config
|
13
|
+
# $ mv Guardfile config/
|
14
|
+
# $ ln -s config/Guardfile .
|
15
|
+
#
|
16
|
+
# and, you'll have to watch "config/Guardfile" instead of "Guardfile"
|
17
|
+
|
18
|
+
# Note: The cmd option is now required due to the increasing number of ways
|
19
|
+
# rspec may be run, below are examples of the most common uses.
|
20
|
+
# * bundler: 'bundle exec rspec'
|
21
|
+
# * bundler binstubs: 'bin/rspec'
|
22
|
+
# * spring: 'bin/rspec' (This will use spring if running and you have
|
23
|
+
# installed the spring binstubs per the docs)
|
24
|
+
# * zeus: 'zeus rspec' (requires the server to be started separately)
|
25
|
+
# * 'just' rspec: 'rspec'
|
26
|
+
|
27
|
+
notification :terminal_notifier, subtitle: 'Samvera::NestingIndexer', timeout: 4
|
28
|
+
|
29
|
+
guard :rspec, cmd: "bundle exec rspec" do
|
30
|
+
require "guard/rspec/dsl"
|
31
|
+
dsl = Guard::RSpec::Dsl.new(self)
|
32
|
+
|
33
|
+
# Feel free to open issues for suggestions and improvements
|
34
|
+
|
35
|
+
# RSpec files
|
36
|
+
rspec = dsl.rspec
|
37
|
+
watch(rspec.spec_helper) { rspec.spec_dir }
|
38
|
+
watch(rspec.spec_support) { rspec.spec_dir }
|
39
|
+
watch(rspec.spec_files)
|
40
|
+
|
41
|
+
# Ruby files
|
42
|
+
ruby = dsl.ruby
|
43
|
+
dsl.watch_spec_files_for(ruby.lib_files)
|
44
|
+
end
|
45
|
+
|
46
|
+
guard :rubocop do
|
47
|
+
watch(%r{.+\.rb$})
|
48
|
+
watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
|
49
|
+
end
|
data/LICENSE
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
##########################################################################
|
2
|
+
# Copyright 2014-2015 University of Notre Dame
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
# Samvera::NestingIndexer
|
2
|
+
|
3
|
+
[![Build Status](https://travis-ci.org/ndlib/samvera-indexer.png?branch=master)](https://travis-ci.org/ndlib/samvera-indexer)
|
4
|
+
[![Test Coverage](https://codeclimate.com/github/ndlib/samvera-indexer/badges/coverage.svg)](https://codeclimate.com/github/ndlib/samvera-indexer)
|
5
|
+
[![Code Climate](https://codeclimate.com/github/ndlib/samvera-indexer.png)](https://codeclimate.com/github/ndlib/samvera-indexer)
|
6
|
+
[![Documentation Status](http://inch-ci.org/github/ndlib/samvera-indexer.svg?branch=master)](http://inch-ci.org/github/ndlib/samvera-indexer)
|
7
|
+
[![APACHE 2 License](http://img.shields.io/badge/APACHE2-license-blue.svg)](./LICENSE)
|
8
|
+
|
9
|
+
The Samvera::NestingIndexer gem is responsible for indexing the graph relationship of objects. It maps a PreservationDocument to an IndexDocument by mapping a PreservationDocument's direct parents into the paths to get from a root document to the given PreservationDocument.
|
10
|
+
|
11
|
+
* [Background](#background)
|
12
|
+
* [Concepts](#concepts)
|
13
|
+
* [Examples](#examples)
|
14
|
+
* [Adapters](#adapters)
|
15
|
+
* [Considerations](#considerations)
|
16
|
+
|
17
|
+
## Background
|
18
|
+
|
19
|
+
This is a sandbox to work through the reindexing strategy as it relates to [CurateND Collections](https://github.com/ndlib/samvera_nd/issues/420). At this point the code is separate to allow for raid testing and prototyping (no sense spinning up SOLR and Fedora to walk an arbitrary graph).
|
20
|
+
|
21
|
+
## Concepts
|
22
|
+
|
23
|
+
As we are indexing objects, we have two types of documents:
|
24
|
+
|
25
|
+
1. [PreservationDocument](./lib/samvera/nesting_indexer/documents.rb) - a light-weight representation of a Fedora object
|
26
|
+
2. [IndexDocument](./lib/samvera/nesting_indexer/documents.rb) - a light-weight representation of a SOLR document object
|
27
|
+
|
28
|
+
We have four attributes to consider for indexing the graph:
|
29
|
+
|
30
|
+
1. id - the unique identifier for a document
|
31
|
+
2. parent_ids - the ids for all of the parents of a given document
|
32
|
+
3. pathnames - the paths to traverse from a root document to the given document
|
33
|
+
4. ancestors - the pathnames of each of the ancestors
|
34
|
+
|
35
|
+
See [Samvera::NestingIndexer::Documents::IndexDocument](./lib/samvera/nesting_indexer/documents.rb) for further discussion.
|
36
|
+
|
37
|
+
To reindex a single document, we leverage the [`Samvera::NestingIndexer.reindex_relationships`](./lib/samvera/nesting_indexer.rb) method.
|
38
|
+
|
39
|
+
## Examples
|
40
|
+
|
41
|
+
Given the following PreservationDocuments:
|
42
|
+
|
43
|
+
| PID | Parents |
|
44
|
+
|-----|---------|
|
45
|
+
| A | - |
|
46
|
+
| B | - |
|
47
|
+
| C | A |
|
48
|
+
| D | A, B |
|
49
|
+
| E | C |
|
50
|
+
|
51
|
+
If we were to reindex the above PreservationDocuments, we will generate the following IndexDocuments:
|
52
|
+
|
53
|
+
| PID | Parents | Pathnames | Ancestors |
|
54
|
+
|-----|---------|------------|-----------|
|
55
|
+
| A | - | [A] | [] |
|
56
|
+
| B | - | [B] | [] |
|
57
|
+
| C | A | [A/C] | [A] |
|
58
|
+
| D | A, B | [A/D, B/D] | [A, B] |
|
59
|
+
| E | C | [A/C/E] | [A/C] |
|
60
|
+
|
61
|
+
For more scenarios, look at the [Reindex PID and Descendants specs](./spec/features/reindex_id_and_descendants_spec.rb).
|
62
|
+
|
63
|
+
## Adapters
|
64
|
+
|
65
|
+
An [AbstractAdapter](./lib/samvera/nesting_indexer/adapters/abstract_adapter.rb) provides the method interface for others to build against.
|
66
|
+
|
67
|
+
The [InMemory adapter](./lib/samvera/nesting_indexer/adapters/in_memory_adapter.rb) is a reference implementation (and used to ease testing overhead).
|
68
|
+
|
69
|
+
CurateND has implemented the [following adapter](https://github.com/ndlib/samvera_nd/blob/master/lib/samvera/library_collection_indexing_adapter.rb) for its LibraryCollection indexing.
|
70
|
+
|
71
|
+
To define the adapter for your application:
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
# In an application initializer (e.g. config/samvera_indexer_config.rb)
|
75
|
+
Samvera::NestingIndexer.configure do |config|
|
76
|
+
config.adapter = MyCustomAdapter
|
77
|
+
end
|
78
|
+
```
|
79
|
+
|
80
|
+
To best ensure you have implemented the adapter to spec:
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
# In the spec for MyCustomAdapter
|
84
|
+
require 'samvera/nesting_indexer/adapters/interface_behavior_spec'
|
85
|
+
RSpec.describe MyCustomAdapter
|
86
|
+
it_behaves_like 'a Samvera::NestingIndexer::Adapter'
|
87
|
+
end
|
88
|
+
```
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
[See CurateND for our adaptor configuration](https://github.com/ndlib/samvera_nd/blob/6fbe79c9725c0f8b4641981044ec250c5163053b/config/initializers/samvera_config.rb#L32-L35).
|
94
|
+
|
95
|
+
## Considerations
|
96
|
+
|
97
|
+
Given a single object A, when we reindex A, we:
|
98
|
+
|
99
|
+
* Find the parent objects of A to calculate the ancestors and pathnames
|
100
|
+
* Iterate through each descendant, in a breadth-first process, to reindex it (and each descendant's descendants).
|
101
|
+
|
102
|
+
This is a potentially time consumptive process and should not be run within the request cycle.
|
data/Rakefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
|
+
|
4
|
+
namespace :commitment do
|
5
|
+
require 'rubocop/rake_task'
|
6
|
+
# Why hound? Because hound-ci assumes this file, and perhaps you'll be using this
|
7
|
+
RuboCop::RakeTask.new
|
8
|
+
|
9
|
+
task :configure_test_for_code_coverage do
|
10
|
+
ENV['COVERAGE'] = 'true'
|
11
|
+
end
|
12
|
+
task :code_coverage do
|
13
|
+
require 'json'
|
14
|
+
$stdout.puts "Checking commitment:code_coverage"
|
15
|
+
coverage_percentage = JSON.parse(File.read('coverage/.last_run.json')).fetch('result').fetch('covered_percent').to_i
|
16
|
+
goal = 100
|
17
|
+
if goal > coverage_percentage
|
18
|
+
abort("Code Coverage Goal Not Met:\n\t#{coverage_percentage}%\tExpected\n\t#{goal}%\tActual")
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
task(
|
24
|
+
default: [
|
25
|
+
'commitment:rubocop',
|
26
|
+
'commitment:configure_test_for_code_coverage',
|
27
|
+
'spec',
|
28
|
+
'commitment:code_coverage'
|
29
|
+
]
|
30
|
+
)
|
31
|
+
|
32
|
+
RSpec::Core::RakeTask.new(:spec)
|
33
|
+
|
34
|
+
task default: :spec
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "samvera/nesting_indexer"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
require "samvera/nesting_indexer/version"
|
2
|
+
require 'samvera/nesting_indexer/relationship_reindexer'
|
3
|
+
require 'samvera/nesting_indexer/repository_reindexer'
|
4
|
+
require 'samvera/nesting_indexer/configuration'
|
5
|
+
require 'samvera/nesting_indexer/railtie' if defined?(Rails)
|
6
|
+
|
7
|
+
module Samvera
|
8
|
+
# Responsible for indexing an object and its related child objects.
|
9
|
+
module NestingIndexer
|
10
|
+
# @api public
|
11
|
+
# Responsible for reindexing the associated document for the given :id and the descendants of that :id.
|
12
|
+
# In a perfect world we could reindex the id as well; But that is for another test.
|
13
|
+
#
|
14
|
+
# @param id [String] - The permanent identifier of the object that will be reindexed along with its children.
|
15
|
+
# @param maximum_nesting_depth [Integer] - there to guard against cyclical graphs
|
16
|
+
# @return [Boolean] - It was successful
|
17
|
+
# @raise Samvera::Exceptions::CycleDetectionError - A potential cycle was detected
|
18
|
+
def self.reindex_relationships(id:, maximum_nesting_depth: configuration.maximum_nesting_depth)
|
19
|
+
RelationshipReindexer.call(id: id, maximum_nesting_depth: maximum_nesting_depth, adapter: adapter)
|
20
|
+
true
|
21
|
+
end
|
22
|
+
|
23
|
+
class << self
|
24
|
+
# Here because I made a previous declaration that .reindex was part of the
|
25
|
+
# public API. Then I decided I didn't want to use that method.
|
26
|
+
alias reindex reindex_relationships
|
27
|
+
end
|
28
|
+
|
29
|
+
# @api public
|
30
|
+
# Responsible for reindexing the entire preservation layer.
|
31
|
+
# @param maximum_nesting_depth [Integer] - there to guard against cyclical graphs
|
32
|
+
# @return [Boolean] - It was successful
|
33
|
+
# @raise Samvera::Exceptions::CycleDetectionError - A potential cycle was detected
|
34
|
+
def self.reindex_all!(maximum_nesting_depth: configuration.maximum_nesting_depth)
|
35
|
+
# While the RepositoryReindexer is responsible for reindexing everything, I
|
36
|
+
# want to inject the lambda that will reindex a single item.
|
37
|
+
id_reindexer = method(:reindex_relationships)
|
38
|
+
RepositoryReindexer.call(maximum_nesting_depth: maximum_nesting_depth, id_reindexer: id_reindexer, adapter: adapter)
|
39
|
+
true
|
40
|
+
end
|
41
|
+
|
42
|
+
# @api public
|
43
|
+
#
|
44
|
+
# Contains the Samvera::NestingIndexer configuration information that is referenceable from wit
|
45
|
+
# @see Samvera::NestingIndexer::Configuration
|
46
|
+
def self.configuration
|
47
|
+
@configuration ||= Configuration.new
|
48
|
+
end
|
49
|
+
|
50
|
+
# @api public
|
51
|
+
#
|
52
|
+
# Exposes the data adapter to use for the reindexing process.
|
53
|
+
#
|
54
|
+
# @see Samvera::NestingIndexer::Adapters::AbstractAdapter
|
55
|
+
# @return Object that implementes the Samvera::NestingIndexer::Adapters::AbstractAdapter method interface
|
56
|
+
def self.adapter
|
57
|
+
configuration.adapter
|
58
|
+
end
|
59
|
+
|
60
|
+
# @api public
|
61
|
+
#
|
62
|
+
# Capture the configuration information
|
63
|
+
#
|
64
|
+
# @see Samvera::NestingIndexer::Configuration
|
65
|
+
# @see .configuration
|
66
|
+
# @see Samvera::NestingIndexer::Railtie
|
67
|
+
def self.configure(&block)
|
68
|
+
@configuration_block = block
|
69
|
+
# The Rails load sequence means that some of the configured Targets may
|
70
|
+
# not be loaded; As such I am not calling configure! instead relying on
|
71
|
+
# Samvera::NestingIndexer::Railtie to handle the configure! call
|
72
|
+
configure! unless defined?(Rails)
|
73
|
+
end
|
74
|
+
|
75
|
+
# @api private
|
76
|
+
def self.configure!
|
77
|
+
return false unless @configuration_block.respond_to?(:call)
|
78
|
+
@configuration_block.call(configuration)
|
79
|
+
@configuration_block = nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'samvera/nesting_indexer/adapters/abstract_adapter'
|
2
|
+
require 'samvera/nesting_indexer/adapters/in_memory_adapter'
|
3
|
+
|
4
|
+
module Samvera
|
5
|
+
module NestingIndexer
|
6
|
+
# A container for the various adapter implementations.
|
7
|
+
# @see Samvera::NestingIndexer::Adapters::AbstractAdapter
|
8
|
+
# @see Samvera::NestingIndexer::Adapters::InMemoryAdapter
|
9
|
+
module Adapters
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|