samvera-nesting_indexer 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.rubocop.yml +53 -0
- data/.rubocop_todo.yml +12 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/Gemfile +4 -0
- data/Guardfile +49 -0
- data/LICENSE +14 -0
- data/README.md +102 -0
- data/Rakefile +34 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/samvera/nesting_indexer.rb +82 -0
- data/lib/samvera/nesting_indexer/adapters.rb +12 -0
- data/lib/samvera/nesting_indexer/adapters/abstract_adapter.rb +44 -0
- data/lib/samvera/nesting_indexer/adapters/in_memory_adapter.rb +157 -0
- data/lib/samvera/nesting_indexer/adapters/interface_behavior_spec.rb +53 -0
- data/lib/samvera/nesting_indexer/configuration.rb +51 -0
- data/lib/samvera/nesting_indexer/documents.rb +90 -0
- data/lib/samvera/nesting_indexer/exceptions.rb +35 -0
- data/lib/samvera/nesting_indexer/railtie.rb +12 -0
- data/lib/samvera/nesting_indexer/relationship_reindexer.rb +127 -0
- data/lib/samvera/nesting_indexer/repository_reindexer.rb +65 -0
- data/lib/samvera/nesting_indexer/version.rb +5 -0
- data/samvera-nesting_indexer.gemspec +39 -0
- metadata +294 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d85b5e9c924b8da1ab4155a9144073696f0d8aba
|
4
|
+
data.tar.gz: a49b5e3004e1b477d878343ce702a03ce02244e4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2769007dd241f6eddf9be40857685f87f7e55a18ac103acb9674242df470388cb45dbc2cf7aa06a5b57bb20a278ea31c254eb3d1c06a225e5de0c79c514b60e5
|
7
|
+
data.tar.gz: bb3bbaface89adceb926133b867ccbe188ec026a1611e3db3c4180aacaddc91bc196a36805dc27c27a083c1776cc08b569107ea3412f4941497b59cf49bc2945
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
################################################################################
|
4
|
+
## Releasing the hounds in your local environment.
|
5
|
+
##
|
6
|
+
## Setup:
|
7
|
+
## $ gem install rubocop
|
8
|
+
##
|
9
|
+
## Run:
|
10
|
+
## $ rubocop ./path/to/file ./or/path/to/directory -c ./.hound.yml
|
11
|
+
##
|
12
|
+
## Generation Notes:
|
13
|
+
## This file was generated via the commitment:install generator. You are free
|
14
|
+
## and expected to change this file.
|
15
|
+
################################################################################
|
16
|
+
AllCops:
|
17
|
+
Include:
|
18
|
+
- Rakefile
|
19
|
+
Exclude:
|
20
|
+
- 'vendor/**/*'
|
21
|
+
- 'tmp/**/*'
|
22
|
+
- 'bin/**/*'
|
23
|
+
- Gemfile
|
24
|
+
- Guardfile
|
25
|
+
- samvera-nesting_indexer.gemspec
|
26
|
+
TargetRubyVersion: 2.2
|
27
|
+
LineLength:
|
28
|
+
Description: 'Limit lines to 160 characters.'
|
29
|
+
Max: 160
|
30
|
+
Enabled: true
|
31
|
+
|
32
|
+
ModuleLength:
|
33
|
+
Exclude:
|
34
|
+
- 'spec/**/*'
|
35
|
+
|
36
|
+
|
37
|
+
Style/StringLiterals:
|
38
|
+
Description: 'Checks if uses of quotes match the configured preference.'
|
39
|
+
Enabled: false
|
40
|
+
|
41
|
+
PercentLiteralDelimiters:
|
42
|
+
Description: 'Use `%`-literal delimiters consistently'
|
43
|
+
Enabled: false
|
44
|
+
|
45
|
+
Documentation:
|
46
|
+
Description: 'Document classes and non-namespace modules.'
|
47
|
+
Enabled: true
|
48
|
+
Exclude:
|
49
|
+
- spec/**/*
|
50
|
+
- test/**/*
|
51
|
+
|
52
|
+
Style/WordArray:
|
53
|
+
Enabled: false
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2017-03-24 15:55:07 -0400 using RuboCop version 0.47.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 3
|
10
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
11
|
+
Metrics/BlockLength:
|
12
|
+
Max: 188
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.4.1
|
data/.travis.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
sudo: false
|
2
|
+
cache: bundler
|
3
|
+
|
4
|
+
language: ruby
|
5
|
+
rvm:
|
6
|
+
- 2.3.1
|
7
|
+
- 2.2.5
|
8
|
+
- 2.2.2
|
9
|
+
- 2.1.10
|
10
|
+
- 2.0.0
|
11
|
+
|
12
|
+
matrix:
|
13
|
+
allow_failures:
|
14
|
+
- rvm: "2.3.1"
|
15
|
+
|
16
|
+
before_install: gem install bundler -v 1.12.5
|
17
|
+
|
18
|
+
script: 'bundle exec rake'
|
19
|
+
|
20
|
+
addons:
|
21
|
+
code_climate:
|
22
|
+
repo_token: 71b80cc45ed849e84e5943bb4874393b5ce26a2356381a839552395cd9b2f71a
|
data/Gemfile
ADDED
data/Guardfile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
## Uncomment and set this to only include directories you want to watch
|
5
|
+
# directories %w(app lib config test spec features) \
|
6
|
+
# .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
|
7
|
+
|
8
|
+
## Note: if you are using the `directories` clause above and you are not
|
9
|
+
## watching the project directory ('.'), then you will want to move
|
10
|
+
## the Guardfile to a watched dir and symlink it back, e.g.
|
11
|
+
#
|
12
|
+
# $ mkdir config
|
13
|
+
# $ mv Guardfile config/
|
14
|
+
# $ ln -s config/Guardfile .
|
15
|
+
#
|
16
|
+
# and, you'll have to watch "config/Guardfile" instead of "Guardfile"
|
17
|
+
|
18
|
+
# Note: The cmd option is now required due to the increasing number of ways
|
19
|
+
# rspec may be run, below are examples of the most common uses.
|
20
|
+
# * bundler: 'bundle exec rspec'
|
21
|
+
# * bundler binstubs: 'bin/rspec'
|
22
|
+
# * spring: 'bin/rspec' (This will use spring if running and you have
|
23
|
+
# installed the spring binstubs per the docs)
|
24
|
+
# * zeus: 'zeus rspec' (requires the server to be started separately)
|
25
|
+
# * 'just' rspec: 'rspec'
|
26
|
+
|
27
|
+
notification :terminal_notifier, subtitle: 'Samvera::NestingIndexer', timeout: 4
|
28
|
+
|
29
|
+
guard :rspec, cmd: "bundle exec rspec" do
|
30
|
+
require "guard/rspec/dsl"
|
31
|
+
dsl = Guard::RSpec::Dsl.new(self)
|
32
|
+
|
33
|
+
# Feel free to open issues for suggestions and improvements
|
34
|
+
|
35
|
+
# RSpec files
|
36
|
+
rspec = dsl.rspec
|
37
|
+
watch(rspec.spec_helper) { rspec.spec_dir }
|
38
|
+
watch(rspec.spec_support) { rspec.spec_dir }
|
39
|
+
watch(rspec.spec_files)
|
40
|
+
|
41
|
+
# Ruby files
|
42
|
+
ruby = dsl.ruby
|
43
|
+
dsl.watch_spec_files_for(ruby.lib_files)
|
44
|
+
end
|
45
|
+
|
46
|
+
guard :rubocop do
|
47
|
+
watch(%r{.+\.rb$})
|
48
|
+
watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
|
49
|
+
end
|
data/LICENSE
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
##########################################################################
|
2
|
+
# Copyright 2014-2015 University of Notre Dame
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
# Samvera::NestingIndexer
|
2
|
+
|
3
|
+
[](https://travis-ci.org/ndlib/samvera-indexer)
|
4
|
+
[](https://codeclimate.com/github/ndlib/samvera-indexer)
|
5
|
+
[](https://codeclimate.com/github/ndlib/samvera-indexer)
|
6
|
+
[](http://inch-ci.org/github/ndlib/samvera-indexer)
|
7
|
+
[](./LICENSE)
|
8
|
+
|
9
|
+
The Samvera::NestingIndexer gem is responsible for indexing the graph relationship of objects. It maps a PreservationDocument to an IndexDocument by mapping a PreservationDocument's direct parents into the paths to get from a root document to the given PreservationDocument.
|
10
|
+
|
11
|
+
* [Background](#background)
|
12
|
+
* [Concepts](#concepts)
|
13
|
+
* [Examples](#examples)
|
14
|
+
* [Adapters](#adapters)
|
15
|
+
* [Considerations](#considerations)
|
16
|
+
|
17
|
+
## Background
|
18
|
+
|
19
|
+
This is a sandbox to work through the reindexing strategy as it relates to [CurateND Collections](https://github.com/ndlib/samvera_nd/issues/420). At this point the code is separate to allow for raid testing and prototyping (no sense spinning up SOLR and Fedora to walk an arbitrary graph).
|
20
|
+
|
21
|
+
## Concepts
|
22
|
+
|
23
|
+
As we are indexing objects, we have two types of documents:
|
24
|
+
|
25
|
+
1. [PreservationDocument](./lib/samvera/nesting_indexer/documents.rb) - a light-weight representation of a Fedora object
|
26
|
+
2. [IndexDocument](./lib/samvera/nesting_indexer/documents.rb) - a light-weight representation of a SOLR document object
|
27
|
+
|
28
|
+
We have four attributes to consider for indexing the graph:
|
29
|
+
|
30
|
+
1. id - the unique identifier for a document
|
31
|
+
2. parent_ids - the ids for all of the parents of a given document
|
32
|
+
3. pathnames - the paths to traverse from a root document to the given document
|
33
|
+
4. ancestors - the pathnames of each of the ancestors
|
34
|
+
|
35
|
+
See [Samvera::NestingIndexer::Documents::IndexDocument](./lib/samvera/nesting_indexer/documents.rb) for further discussion.
|
36
|
+
|
37
|
+
To reindex a single document, we leverage the [`Samvera::NestingIndexer.reindex_relationships`](./lib/samvera/nesting_indexer.rb) method.
|
38
|
+
|
39
|
+
## Examples
|
40
|
+
|
41
|
+
Given the following PreservationDocuments:
|
42
|
+
|
43
|
+
| PID | Parents |
|
44
|
+
|-----|---------|
|
45
|
+
| A | - |
|
46
|
+
| B | - |
|
47
|
+
| C | A |
|
48
|
+
| D | A, B |
|
49
|
+
| E | C |
|
50
|
+
|
51
|
+
If we were to reindex the above PreservationDocuments, we will generate the following IndexDocuments:
|
52
|
+
|
53
|
+
| PID | Parents | Pathnames | Ancestors |
|
54
|
+
|-----|---------|------------|-----------|
|
55
|
+
| A | - | [A] | [] |
|
56
|
+
| B | - | [B] | [] |
|
57
|
+
| C | A | [A/C] | [A] |
|
58
|
+
| D | A, B | [A/D, B/D] | [A, B] |
|
59
|
+
| E | C | [A/C/E] | [A/C] |
|
60
|
+
|
61
|
+
For more scenarios, look at the [Reindex PID and Descendants specs](./spec/features/reindex_id_and_descendants_spec.rb).
|
62
|
+
|
63
|
+
## Adapters
|
64
|
+
|
65
|
+
An [AbstractAdapter](./lib/samvera/nesting_indexer/adapters/abstract_adapter.rb) provides the method interface for others to build against.
|
66
|
+
|
67
|
+
The [InMemory adapter](./lib/samvera/nesting_indexer/adapters/in_memory_adapter.rb) is a reference implementation (and used to ease testing overhead).
|
68
|
+
|
69
|
+
CurateND has implemented the [following adapter](https://github.com/ndlib/samvera_nd/blob/master/lib/samvera/library_collection_indexing_adapter.rb) for its LibraryCollection indexing.
|
70
|
+
|
71
|
+
To define the adapter for your application:
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
# In an application initializer (e.g. config/samvera_indexer_config.rb)
|
75
|
+
Samvera::NestingIndexer.configure do |config|
|
76
|
+
config.adapter = MyCustomAdapter
|
77
|
+
end
|
78
|
+
```
|
79
|
+
|
80
|
+
To best ensure you have implemented the adapter to spec:
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
# In the spec for MyCustomAdapter
|
84
|
+
require 'samvera/nesting_indexer/adapters/interface_behavior_spec'
|
85
|
+
RSpec.describe MyCustomAdapter
|
86
|
+
it_behaves_like 'a Samvera::NestingIndexer::Adapter'
|
87
|
+
end
|
88
|
+
```
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
[See CurateND for our adaptor configuration](https://github.com/ndlib/samvera_nd/blob/6fbe79c9725c0f8b4641981044ec250c5163053b/config/initializers/samvera_config.rb#L32-L35).
|
94
|
+
|
95
|
+
## Considerations
|
96
|
+
|
97
|
+
Given a single object A, when we reindex A, we:
|
98
|
+
|
99
|
+
* Find the parent objects of A to calculate the ancestors and pathnames
|
100
|
+
* Iterate through each descendant, in a breadth-first process, to reindex it (and each descendant's descendants).
|
101
|
+
|
102
|
+
This is a potentially time consumptive process and should not be run within the request cycle.
|
data/Rakefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
|
+
|
4
|
+
namespace :commitment do
|
5
|
+
require 'rubocop/rake_task'
|
6
|
+
# Why hound? Because hound-ci assumes this file, and perhaps you'll be using this
|
7
|
+
RuboCop::RakeTask.new
|
8
|
+
|
9
|
+
task :configure_test_for_code_coverage do
|
10
|
+
ENV['COVERAGE'] = 'true'
|
11
|
+
end
|
12
|
+
task :code_coverage do
|
13
|
+
require 'json'
|
14
|
+
$stdout.puts "Checking commitment:code_coverage"
|
15
|
+
coverage_percentage = JSON.parse(File.read('coverage/.last_run.json')).fetch('result').fetch('covered_percent').to_i
|
16
|
+
goal = 100
|
17
|
+
if goal > coverage_percentage
|
18
|
+
abort("Code Coverage Goal Not Met:\n\t#{coverage_percentage}%\tExpected\n\t#{goal}%\tActual")
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
task(
|
24
|
+
default: [
|
25
|
+
'commitment:rubocop',
|
26
|
+
'commitment:configure_test_for_code_coverage',
|
27
|
+
'spec',
|
28
|
+
'commitment:code_coverage'
|
29
|
+
]
|
30
|
+
)
|
31
|
+
|
32
|
+
RSpec::Core::RakeTask.new(:spec)
|
33
|
+
|
34
|
+
task default: :spec
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "samvera/nesting_indexer"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
require "samvera/nesting_indexer/version"
|
2
|
+
require 'samvera/nesting_indexer/relationship_reindexer'
|
3
|
+
require 'samvera/nesting_indexer/repository_reindexer'
|
4
|
+
require 'samvera/nesting_indexer/configuration'
|
5
|
+
require 'samvera/nesting_indexer/railtie' if defined?(Rails)
|
6
|
+
|
7
|
+
module Samvera
|
8
|
+
# Responsible for indexing an object and its related child objects.
|
9
|
+
module NestingIndexer
|
10
|
+
# @api public
|
11
|
+
# Responsible for reindexing the associated document for the given :id and the descendants of that :id.
|
12
|
+
# In a perfect world we could reindex the id as well; But that is for another test.
|
13
|
+
#
|
14
|
+
# @param id [String] - The permanent identifier of the object that will be reindexed along with its children.
|
15
|
+
# @param maximum_nesting_depth [Integer] - there to guard against cyclical graphs
|
16
|
+
# @return [Boolean] - It was successful
|
17
|
+
# @raise Samvera::Exceptions::CycleDetectionError - A potential cycle was detected
|
18
|
+
def self.reindex_relationships(id:, maximum_nesting_depth: configuration.maximum_nesting_depth)
|
19
|
+
RelationshipReindexer.call(id: id, maximum_nesting_depth: maximum_nesting_depth, adapter: adapter)
|
20
|
+
true
|
21
|
+
end
|
22
|
+
|
23
|
+
class << self
|
24
|
+
# Here because I made a previous declaration that .reindex was part of the
|
25
|
+
# public API. Then I decided I didn't want to use that method.
|
26
|
+
alias reindex reindex_relationships
|
27
|
+
end
|
28
|
+
|
29
|
+
# @api public
|
30
|
+
# Responsible for reindexing the entire preservation layer.
|
31
|
+
# @param maximum_nesting_depth [Integer] - there to guard against cyclical graphs
|
32
|
+
# @return [Boolean] - It was successful
|
33
|
+
# @raise Samvera::Exceptions::CycleDetectionError - A potential cycle was detected
|
34
|
+
def self.reindex_all!(maximum_nesting_depth: configuration.maximum_nesting_depth)
|
35
|
+
# While the RepositoryReindexer is responsible for reindexing everything, I
|
36
|
+
# want to inject the lambda that will reindex a single item.
|
37
|
+
id_reindexer = method(:reindex_relationships)
|
38
|
+
RepositoryReindexer.call(maximum_nesting_depth: maximum_nesting_depth, id_reindexer: id_reindexer, adapter: adapter)
|
39
|
+
true
|
40
|
+
end
|
41
|
+
|
42
|
+
# @api public
|
43
|
+
#
|
44
|
+
# Contains the Samvera::NestingIndexer configuration information that is referenceable from wit
|
45
|
+
# @see Samvera::NestingIndexer::Configuration
|
46
|
+
def self.configuration
|
47
|
+
@configuration ||= Configuration.new
|
48
|
+
end
|
49
|
+
|
50
|
+
# @api public
|
51
|
+
#
|
52
|
+
# Exposes the data adapter to use for the reindexing process.
|
53
|
+
#
|
54
|
+
# @see Samvera::NestingIndexer::Adapters::AbstractAdapter
|
55
|
+
# @return Object that implementes the Samvera::NestingIndexer::Adapters::AbstractAdapter method interface
|
56
|
+
def self.adapter
|
57
|
+
configuration.adapter
|
58
|
+
end
|
59
|
+
|
60
|
+
# @api public
|
61
|
+
#
|
62
|
+
# Capture the configuration information
|
63
|
+
#
|
64
|
+
# @see Samvera::NestingIndexer::Configuration
|
65
|
+
# @see .configuration
|
66
|
+
# @see Samvera::NestingIndexer::Railtie
|
67
|
+
def self.configure(&block)
|
68
|
+
@configuration_block = block
|
69
|
+
# The Rails load sequence means that some of the configured Targets may
|
70
|
+
# not be loaded; As such I am not calling configure! instead relying on
|
71
|
+
# Samvera::NestingIndexer::Railtie to handle the configure! call
|
72
|
+
configure! unless defined?(Rails)
|
73
|
+
end
|
74
|
+
|
75
|
+
# @api private
|
76
|
+
def self.configure!
|
77
|
+
return false unless @configuration_block.respond_to?(:call)
|
78
|
+
@configuration_block.call(configuration)
|
79
|
+
@configuration_block = nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'samvera/nesting_indexer/adapters/abstract_adapter'
|
2
|
+
require 'samvera/nesting_indexer/adapters/in_memory_adapter'
|
3
|
+
|
4
|
+
module Samvera
|
5
|
+
module NestingIndexer
|
6
|
+
# A container for the various adapter implementations.
|
7
|
+
# @see Samvera::NestingIndexer::Adapters::AbstractAdapter
|
8
|
+
# @see Samvera::NestingIndexer::Adapters::InMemoryAdapter
|
9
|
+
module Adapters
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|