curate-indexer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e1be92053b6ce30d9e75ac935c7000861725a4c5
4
+ data.tar.gz: babb5d4f0228d9210d096688d920d5c457f0f266
5
+ SHA512:
6
+ metadata.gz: eb5f7aaecda93774887fe655bd9c9884901c65360f32575f74df1f1e15404ef5b0a3933e5992ef5a4a35b794364fddf5c0cbc63f5212214efe1d288a5f7099e3
7
+ data.tar.gz: 1aaf669fc93f8f435b6d116417da0433428cb72a0ff5871c362e8815d6348cbdb012dc2d8c7bee7b9a4ba9c17cdf315d435cc08c05a45d91223db0741aa66b7d
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.rubocop.yml ADDED
@@ -0,0 +1,48 @@
1
+ ################################################################################
2
+ ## Releasing the hounds in your local environment.
3
+ ##
4
+ ## Setup:
5
+ ## $ gem install rubocop
6
+ ##
7
+ ## Run:
8
+ ## $ rubocop ./path/to/file ./or/path/to/directory -c ./.hound.yml
9
+ ##
10
+ ## Generation Notes:
11
+ ## This file was generated via the commitment:install generator. You are free
12
+ ## and expected to change this file.
13
+ ################################################################################
14
+ AllCops:
15
+ Include:
16
+ - Rakefile
17
+ Exclude:
18
+ - 'vendor/**/*'
19
+ - 'tmp/**/*'
20
+ - 'bin/**/*'
21
+ TargetRubyVersion: 2.0
22
+ LineLength:
23
+ Description: 'Limit lines to 140 characters.'
24
+ Max: 140
25
+ Enabled: true
26
+
27
+ ModuleLength:
28
+ Exclude:
29
+ - 'spec/**/*'
30
+
31
+
32
+ Style/StringLiterals:
33
+ Description: 'Checks if uses of quotes match the configured preference.'
34
+ Enabled: false
35
+
36
+ PercentLiteralDelimiters:
37
+ Description: 'Use `%`-literal delimiters consistently'
38
+ Enabled: false
39
+
40
+ Documentation:
41
+ Description: 'Document classes and non-namespace modules.'
42
+ Enabled: true
43
+ Exclude:
44
+ - spec/**/*
45
+ - test/**/*
46
+
47
+ Style/WordArray:
48
+ Enabled: false
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.2.2
data/.travis.yml ADDED
@@ -0,0 +1,22 @@
1
+ sudo: false
2
+ cache: bundler
3
+
4
+ language: ruby
5
+ rvm:
6
+ - 2.3.1
7
+ - 2.2.5
8
+ - 2.2.2
9
+ - 2.1.10
10
+ - 2.0.0
11
+
12
+ matrix:
13
+ allow_failures:
14
+ - rvm: "2.3.1"
15
+
16
+ before_install: gem install bundler -v 1.12.5
17
+
18
+ script: 'bundle exec rake'
19
+
20
+ addons:
21
+ code_climate:
22
+ repo_token: 71b80cc45ed849e84e5943bb4874393b5ce26a2356381a839552395cd9b2f71a
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in curate-indexer.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,49 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ ## Uncomment and set this to only include directories you want to watch
5
+ # directories %w(app lib config test spec features) \
6
+ # .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
7
+
8
+ ## Note: if you are using the `directories` clause above and you are not
9
+ ## watching the project directory ('.'), then you will want to move
10
+ ## the Guardfile to a watched dir and symlink it back, e.g.
11
+ #
12
+ # $ mkdir config
13
+ # $ mv Guardfile config/
14
+ # $ ln -s config/Guardfile .
15
+ #
16
+ # and, you'll have to watch "config/Guardfile" instead of "Guardfile"
17
+
18
+ # Note: The cmd option is now required due to the increasing number of ways
19
+ # rspec may be run, below are examples of the most common uses.
20
+ # * bundler: 'bundle exec rspec'
21
+ # * bundler binstubs: 'bin/rspec'
22
+ # * spring: 'bin/rspec' (This will use spring if running and you have
23
+ # installed the spring binstubs per the docs)
24
+ # * zeus: 'zeus rspec' (requires the server to be started separately)
25
+ # * 'just' rspec: 'rspec'
26
+
27
+ notification :terminal_notifier, subtitle: 'Curate::Indexer'
28
+
29
+ guard :rspec, cmd: "bundle exec rspec" do
30
+ require "guard/rspec/dsl"
31
+ dsl = Guard::RSpec::Dsl.new(self)
32
+
33
+ # Feel free to open issues for suggestions and improvements
34
+
35
+ # RSpec files
36
+ rspec = dsl.rspec
37
+ watch(rspec.spec_helper) { rspec.spec_dir }
38
+ watch(rspec.spec_support) { rspec.spec_dir }
39
+ watch(rspec.spec_files)
40
+
41
+ # Ruby files
42
+ ruby = dsl.ruby
43
+ dsl.watch_spec_files_for(ruby.lib_files)
44
+ end
45
+
46
+ guard :rubocop do
47
+ watch(%r{.+\.rb$})
48
+ watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
49
+ end
data/LICENSE ADDED
@@ -0,0 +1,14 @@
1
+ ##########################################################################
2
+ # Copyright 2014-2015 University of Notre Dame
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
data/README.md ADDED
@@ -0,0 +1,9 @@
1
+ # Curate::Indexer
2
+
3
+ [![Build Status](https://travis-ci.org/ndlib/curate-indexer.png?branch=master)](https://travis-ci.org/ndlib/curate-indexer)
4
+ [![Test Coverage](https://codeclimate.com/github/ndlib/curate-indexer/badges/coverage.svg)](https://codeclimate.com/github/ndlib/curate-indexer)
5
+ [![Code Climate](https://codeclimate.com/github/ndlib/curate-indexer.png)](https://codeclimate.com/github/ndlib/curate-indexer)
6
+ [![Documentation Status](http://inch-ci.org/github/ndlib/curate-indexer.svg?branch=master)](http://inch-ci.org/github/ndlib/curate-indexer)
7
+ [![APACHE 2 License](http://img.shields.io/badge/APACHE2-license-blue.svg)](./LICENSE)
8
+
9
+ This is a sandbox to work through the reindexing strategy as it relates to [CurateND Collections](https://github.com/ndlib/curate_nd/issues/420). At this point the code is separate to allow for rapid testing and prototyping (no sense spinning up SOLR and Fedora to walk an arbitrary graph).
data/Rakefile ADDED
@@ -0,0 +1,34 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ namespace :commitment do
5
+ require 'rubocop/rake_task'
6
+ # Why hound? Because hound-ci assumes this file, and perhaps you'll be using this
7
+ RuboCop::RakeTask.new
8
+
9
+ task :configure_test_for_code_coverage do
10
+ ENV['COVERAGE'] = 'true'
11
+ end
12
+ task :code_coverage do
13
+ require 'json'
14
+ $stdout.puts "Checking commitment:code_coverage"
15
+ coverage_percentage = JSON.parse(File.read('coverage/.last_run.json')).fetch('result').fetch('covered_percent').to_i
16
+ goal = 100
17
+ if goal > coverage_percentage
18
+ abort("Code Coverage Goal Not Met:\n\t#{coverage_percentage}%\tExpected\n\t#{goal}%\tActual")
19
+ end
20
+ end
21
+ end
22
+
23
+ task(
24
+ default: [
25
+ 'commitment:rubocop',
26
+ 'commitment:configure_test_for_code_coverage',
27
+ 'spec',
28
+ 'commitment:code_coverage'
29
+ ]
30
+ )
31
+
32
+ RSpec::Core::RakeTask.new(:spec)
33
+
34
+ task default: :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "curate/indexer"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,39 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'curate/indexer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "curate-indexer"
8
+ spec.version = Curate::Indexer::VERSION
9
+ spec.authors = ["Jeremy Friesen"]
10
+ spec.email = ["jeremy.n.friesen@gmail.com"]
11
+
12
+ spec.summary = %q{A playground for CurateND collections indexing}
13
+ spec.description = %q{A playground for CurateND collections indexing}
14
+ spec.homepage = "https://github.com/ndlib/curate-indexer"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "bin"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+ spec.required_ruby_version = '~>2.0'
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.12"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec", "~> 3.0"
25
+ spec.add_development_dependency "rspec-its"
26
+ spec.add_development_dependency "guard-rspec"
27
+ spec.add_development_dependency "guard-rubocop"
28
+ spec.add_development_dependency "terminal-notifier-guard"
29
+ spec.add_development_dependency "terminal-notifier"
30
+ spec.add_development_dependency "rubocop"
31
+ spec.add_development_dependency "simplecov"
32
+ spec.add_development_dependency "codeclimate-test-reporter"
33
+ spec.add_development_dependency "json"
34
+ spec.add_development_dependency "byebug"
35
+ spec.add_development_dependency "railties", '~> 4.0'
36
+ # As a secondary dependency, listen is preventing bundling
37
+ spec.add_development_dependency "listen", '~> 3.0.8'
38
+ spec.add_dependency "dry-equalizer"
39
+ end
@@ -0,0 +1,95 @@
1
+ module Curate
2
+ module Indexer
3
+ module Adapters
4
+ # @api public
5
+ # A module that defines the interface of methods required to interact with Curate::Indexer operations
6
+ module AbstractAdapter
7
+ # @api public
8
+ # @param pid [String]
9
+ # @return Curate::Indexer::Document::PreservationDocument
10
+ def self.find_preservation_document_by(*)
11
+ raise NotImplementedError
12
+ end
13
+
14
+ # @api public
15
+ # @param pid [String]
16
+ # @return Curate::Indexer::Documents::IndexDocument
17
+ def self.find_index_document_by(*)
18
+ raise NotImplementedError
19
+ end
20
+
21
+ # @api public
22
+ # @yield Curate::Indexer::Document::PreservationDocument
23
+ def self.each_preservation_document
24
+ raise NotImplementedError
25
+ end
26
+
27
+ # @api public
28
+ # @param pid [String]
29
+ # @yield Curate::Indexer::Documents::IndexDocument
30
+ def self.each_child_document_of(*, &_block)
31
+ raise NotImplementedError
32
+ end
33
+
34
+ # @api public
35
+ # @return Curate::Indexer::Documents::IndexDocument
36
+ def self.write_document_attributes_to_index_layer(*)
37
+ raise NotImplementedError
38
+ end
39
+ end
40
+ # @api public
41
+ #
42
+ # Defines the interface for interacting with the InMemory layer. It is a reference
43
+ # implementation that is used throughout tests.
44
+ module InMemoryAdapter
45
+ extend AbstractAdapter
46
+ # @api public
47
+ # @param pid [String]
48
+ # @return Curate::Indexer::Document::PreservationDocument
49
+ def self.find_preservation_document_by(pid)
50
+ Preservation.find(pid)
51
+ end
52
+
53
+ # @api public
54
+ # @param pid [String]
55
+ # @return Curate::Indexer::Documents::IndexDocument
56
+ def self.find_index_document_by(pid)
57
+ Index.find(pid)
58
+ end
59
+
60
+ # @api public
61
+ # @yield Curate::Indexer::Document::PreservationDocument
62
+ def self.each_preservation_document
63
+ Preservation.find_each { |document| yield(document) }
64
+ end
65
+
66
+ # @api public
67
+ # @param pid [String]
68
+ # @yield Curate::Indexer::Documents::IndexDocument
69
+ def self.each_child_document_of(pid, &block)
70
+ Index.each_child_document_of(pid, &block)
71
+ end
72
+
73
+ # @api public
74
+ # This is not something that I envision using in the production environment;
75
+ # It is hear to keep the Preservation system isolated and accessible only through interfaces.
76
+ # @return Curate::Indexer::Documents::PreservationDocument
77
+ def self.write_document_attributes_to_preservation_layer(attributes = {})
78
+ Preservation.write_document(attributes)
79
+ end
80
+
81
+ # @api public
82
+ # @return Curate::Indexer::Documents::IndexDocument
83
+ def self.write_document_attributes_to_index_layer(attributes = {})
84
+ Index.write_document(attributes)
85
+ end
86
+
87
+ # @api private
88
+ def self.clear_cache!
89
+ Preservation.clear_cache!
90
+ Index.clear_cache!
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,21 @@
1
+ module Curate
2
+ # :nodoc:
3
+ module Indexer
4
+ # Responsible for the configuration of the Curate::Indexer
5
+ class Configuration
6
+ def adapter
7
+ @adapter || default_adapter
8
+ end
9
+ # TODO: Should we guard against a bad adapter?
10
+ attr_writer :adapter
11
+
12
+ private
13
+
14
+ def default_adapter
15
+ require 'curate/indexer/adapters'
16
+ Adapters::InMemoryAdapter
17
+ end
18
+ end
19
+ private_constant :Configuration
20
+ end
21
+ end
@@ -0,0 +1,47 @@
1
+ require 'dry-equalizer'
2
+
3
+ module Curate
4
+ module Indexer
5
+ module Documents
6
+ # @api public
7
+ #
8
+ # A simplified document that reflects the necessary attributes for re-indexing
9
+ # the children of Fedora objects.
10
+ class PreservationDocument
11
+ def initialize(keywords = {})
12
+ @pid = keywords.fetch(:pid).to_s
13
+ @parent_pids = Array(keywords.fetch(:parent_pids))
14
+ end
15
+ attr_reader :pid, :parent_pids
16
+ end
17
+
18
+ # @api private
19
+ #
20
+ # A rudimentary representation of what is needed to reindex Solr documents
21
+ class IndexDocument
22
+ # A quick and dirty means of doing comparative logic
23
+ include Dry::Equalizer(:pid, :sorted_parent_pids, :sorted_pathnames, :sorted_ancestors)
24
+
25
+ def initialize(keywords = {})
26
+ @pid = keywords.fetch(:pid).to_s
27
+ @parent_pids = Array(keywords.fetch(:parent_pids))
28
+ @pathnames = Array(keywords.fetch(:pathnames))
29
+ @ancestors = Array(keywords.fetch(:ancestors))
30
+ end
31
+ attr_reader :pid, :parent_pids, :pathnames, :ancestors
32
+
33
+ def sorted_parent_pids
34
+ parent_pids.sort
35
+ end
36
+
37
+ def sorted_pathnames
38
+ pathnames.sort
39
+ end
40
+
41
+ def sorted_ancestors
42
+ ancestors.sort
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,16 @@
1
+ module Curate
2
+ module Indexer
3
+ module Exceptions
4
+ class RuntimeError < ::RuntimeError
5
+ end
6
+ # Raised when we may have detected a cycle within the graph
7
+ class CycleDetectionError < RuntimeError
8
+ attr_reader :pid
9
+ def initialize(pid)
10
+ @pid = pid
11
+ super "Possible graph cycle discovered related to PID:#{pid}."
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,39 @@
1
+ require 'curate/indexer/storage_module'
2
+ require 'curate/indexer/documents'
3
+
4
+ module Curate
5
+ # :nodoc:
6
+ module Indexer
7
+ # @api private
8
+ #
9
+ # An abstract representation of the underlying index service. In the case of
10
+ # CurateND this is an abstraction of Solr.
11
+ module Index
12
+ def self.clear_cache!
13
+ Storage.clear_cache!
14
+ end
15
+
16
+ def self.find(pid)
17
+ Storage.find(pid)
18
+ end
19
+
20
+ def self.each_child_document_of(pid, &block)
21
+ Storage.find_children_of_pid(pid).each(&block)
22
+ end
23
+
24
+ def self.write_document(attributes = {})
25
+ Documents::IndexDocument.new(attributes).tap { |doc| Storage.write(doc) }
26
+ end
27
+
28
+ # :nodoc:
29
+ module Storage
30
+ extend StorageModule
31
+ def self.find_children_of_pid(pid)
32
+ cache.values.select { |document| document.parent_pids.include?(pid) }
33
+ end
34
+ end
35
+ private_constant :Storage
36
+ end
37
+ private_constant :Index
38
+ end
39
+ end
@@ -0,0 +1,39 @@
1
+ require 'curate/indexer/storage_module'
2
+ require 'curate/indexer/documents'
3
+
4
+ module Curate
5
+ # :nodoc:
6
+ module Indexer
7
+ # @api private
8
+ #
9
+ # A module responsible for containing the "preservation interface" logic.
10
+ # In the case of CurateND, there will need to be an adapter to get a Fedora
11
+ # object coerced into a Curate::Indexer::Preservation::Document
12
+ module Preservation
13
+ def self.find(pid, *)
14
+ MemoryStorage.find(pid)
15
+ end
16
+
17
+ def self.find_each(*, &block)
18
+ MemoryStorage.find_each(&block)
19
+ end
20
+
21
+ def self.clear_cache!
22
+ MemoryStorage.clear_cache!
23
+ end
24
+
25
+ def self.write_document(attributes = {})
26
+ Documents::PreservationDocument.new(attributes).tap do |doc|
27
+ MemoryStorage.write(doc)
28
+ end
29
+ end
30
+
31
+ # :nodoc:
32
+ module MemoryStorage
33
+ extend StorageModule
34
+ end
35
+ private_constant :MemoryStorage
36
+ end
37
+ private_constant :Preservation
38
+ end
39
+ end
@@ -0,0 +1,13 @@
1
+ require 'rails/railtie'
2
+
3
+ module Curate
4
+ module Indexer
5
+ # Connect into the boot sequence of a Rails application
6
+ class Railtie < Rails::Railtie
7
+ config.eager_load_namespaces << Curate::Indexer
8
+ config.to_prepare do
9
+ Curate::Indexer.send(:configure!)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,103 @@
1
+ require 'curate/indexer/exceptions'
2
+ require 'curate/indexer/index'
3
+ require 'curate/indexer/preservation'
4
+ require 'forwardable'
5
+ require 'set'
6
+
7
+ module Curate
8
+ # Establishing namespace
9
+ module Indexer
10
+ # Responsible for reindexing the PID and its descendants
11
+ # @note There is cycle detection via the TIME_TO_LIVE counter
12
+ # @api private
13
+ class RelationshipReindexer
14
+ def self.call(options = {})
15
+ new(options).call
16
+ end
17
+
18
+ def initialize(options = {})
19
+ @pid = options.fetch(:pid).to_s
20
+ @time_to_live = options.fetch(:time_to_live).to_i
21
+ @adapter = options.fetch(:adapter)
22
+ @queue = options.fetch(:queue, [])
23
+ end
24
+ attr_reader :pid, :time_to_live, :queue, :adapter
25
+
26
+ def call
27
+ enqueue(pid, time_to_live)
28
+ index_document = dequeue
29
+ while index_document
30
+ process_a_document(index_document)
31
+ adapter.each_child_document_of(index_document.pid) { |child| enqueue(child.pid, index_document.time_to_live - 1) }
32
+ index_document = dequeue
33
+ end
34
+ self
35
+ end
36
+
37
+ private
38
+
39
+ attr_writer :document
40
+
41
+ extend Forwardable
42
+ def_delegator :queue, :shift, :dequeue
43
+
44
+ ProcessingDocument = Struct.new(:pid, :time_to_live)
45
+ private_constant :ProcessingDocument
46
+ def enqueue(pid, time_to_live)
47
+ queue.push(ProcessingDocument.new(pid, time_to_live))
48
+ end
49
+
50
+ def process_a_document(index_document)
51
+ raise Exceptions::CycleDetectionError, pid if index_document.time_to_live <= 0
52
+ preservation_document = adapter.find_preservation_document_by(index_document.pid)
53
+ adapter.write_document_attributes_to_index_layer(parent_pids_and_path_and_ancestors_for(preservation_document))
54
+ end
55
+
56
+ def parent_pids_and_path_and_ancestors_for(preservation_document)
57
+ ParentAndPathAndAncestorsBuilder.new(preservation_document, adapter).to_hash
58
+ end
59
+
60
+ # A small object that helps encapsulate the logic of building the hash of information regarding
61
+ # the initialization of an Index::Document
62
+ class ParentAndPathAndAncestorsBuilder
63
+ def initialize(preservation_document, adapter)
64
+ @preservation_document = preservation_document
65
+ @parent_pids = Set.new
66
+ @pathnames = Set.new
67
+ @ancestors = Set.new
68
+ @adapter = adapter
69
+ compile!
70
+ end
71
+
72
+ def to_hash
73
+ { pid: @preservation_document.pid, parent_pids: @parent_pids.to_a, pathnames: @pathnames.to_a, ancestors: @ancestors.to_a }
74
+ end
75
+
76
+ private
77
+
78
+ attr_reader :adapter
79
+
80
+ def compile!
81
+ @preservation_document.parent_pids.each do |parent_pid|
82
+ parent_index_document = adapter.find_index_document_by(parent_pid)
83
+ compile_one!(parent_index_document)
84
+ end
85
+ # Ensuring that an "orphan" has a path to get to it
86
+ @pathnames << @preservation_document.pid if @parent_pids.empty?
87
+ end
88
+
89
+ def compile_one!(parent_index_document)
90
+ @parent_pids << parent_index_document.pid
91
+ parent_index_document.pathnames.each do |pathname|
92
+ @pathnames << File.join(pathname, @preservation_document.pid)
93
+ slugs = pathname.split("/")
94
+ slugs.each_index { |i| @ancestors << slugs[0..i].join('/') }
95
+ end
96
+ @ancestors += parent_index_document.ancestors
97
+ end
98
+ end
99
+ private_constant :ParentAndPathAndAncestorsBuilder
100
+ end
101
+ private_constant :RelationshipReindexer
102
+ end
103
+ end
@@ -0,0 +1,49 @@
1
+ module Curate
2
+ # Establishing namespace
3
+ module Indexer
4
+ # Responsible for reindexing the entire repository
5
+ # @api private
6
+ # @note There is cycle detection logic for walking the graph prior to attempting relationship re-indexing
7
+ class RepositoryReindexer
8
+ def self.call(*args)
9
+ new(*args).call
10
+ end
11
+
12
+ def initialize(options = {})
13
+ @max_time_to_live = options.fetch(:time_to_live).to_i
14
+ @pid_reindexer = options.fetch(:pid_reindexer)
15
+ @adapter = options.fetch(:adapter)
16
+ @processed_pids = []
17
+ end
18
+
19
+ def call
20
+ @adapter.each_preservation_document { |document| recursive_reindex(document, max_time_to_live) }
21
+ end
22
+
23
+ private
24
+
25
+ attr_reader :max_time_to_live, :processed_pids, :pid_reindexer
26
+
27
+ # Given that we are attempting to reindex the parents before we reindex, we can't rely on
28
+ # the reindex time_to_live but instead must have a separate time to live.
29
+ #
30
+ # The reindexing process assumes that an object's parents have been indexed; Thus we need to
31
+ # walk up the parent graph to reindex the parents before we start on the child.
32
+ def recursive_reindex(document, time_to_live = max_time_to_live)
33
+ return true if processed_pids.include?(document.pid)
34
+ raise Exceptions::CycleDetectionError, document.pid if time_to_live <= 0
35
+ document.parent_pids.each do |parent_pid|
36
+ parent_document = @adapter.find_preservation_document_by(parent_pid)
37
+ recursive_reindex(parent_document, time_to_live - 1)
38
+ end
39
+ reindex_a_pid(document.pid)
40
+ end
41
+
42
+ def reindex_a_pid(pid)
43
+ pid_reindexer.call(pid)
44
+ processed_pids << pid
45
+ end
46
+ end
47
+ private_constant :RepositoryReindexer
48
+ end
49
+ end
@@ -0,0 +1,34 @@
1
+ module Curate
2
+ module Indexer
3
+ # @api private
4
+ #
5
+ # A module mixin to expose rudimentary read/write capabilities
6
+ #
7
+ # @example
8
+ # module Foo
9
+ # extend Curate::Indexer::StorageModule
10
+ # end
11
+ module StorageModule
12
+ def write(doc)
13
+ cache[doc.pid] = doc
14
+ end
15
+
16
+ def find(pid)
17
+ cache.fetch(pid.to_s)
18
+ end
19
+
20
+ def find_each
21
+ cache.each { |_key, document| yield(document) }
22
+ end
23
+
24
+ def clear_cache!
25
+ @cache = {}
26
+ end
27
+
28
+ def cache
29
+ @cache ||= {}
30
+ end
31
+ private :cache
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,5 @@
1
+ module Curate
2
+ module Indexer
3
+ VERSION = "0.1.0".freeze
4
+ end
5
+ end
@@ -0,0 +1,71 @@
1
+ require "curate/indexer/version"
2
+ require 'curate/indexer/relationship_reindexer'
3
+ require 'curate/indexer/repository_reindexer'
4
+ require 'curate/indexer/configuration'
5
+ require 'curate/indexer/railtie' if defined?(Rails)
6
+
7
+ module Curate
8
+ # Responsible for performign the indexing of an object and its related child objects.
9
+ module Indexer
10
+ # This assumes a rather deep graph
11
+ DEFAULT_TIME_TO_LIVE = 15
12
+ # @api public
13
+ # Responsible for reindexing the associated document for the given :pid and the descendants of that :pid.
14
+ # In a perfect world we could reindex the pid as well; But that is for another test.
15
+ #
16
+ # @param pid [String] - The permanent identifier of the object that will be reindexed along with its children.
17
+ # @param time_to_live [Integer] - there to guard against cyclical graphs
18
+ # @return [Boolean] - It was successful
19
+ # @raise Curate::Exceptions::CycleDetectionError - A potential cycle was detected
20
+ def self.reindex_relationships(pid, time_to_live = DEFAULT_TIME_TO_LIVE)
21
+ RelationshipReindexer.call(pid: pid, time_to_live: time_to_live, adapter: configuration.adapter)
22
+ true
23
+ end
24
+
25
+ class << self
26
+ # Here because I made a previous declaration that .reindex was part of the
27
+ # public API. Then I decided I didn't want to use that method.
28
+ alias reindex reindex_relationships
29
+ end
30
+
31
+ # @api public
32
+ # Responsible for reindexing the entire preservation layer.
33
+ # @param time_to_live [Integer] - there to guard against cyclical graphs
34
+ # @return [Boolean] - It was successful
35
+ # @raise Curate::Exceptions::CycleDetectionError - A potential cycle was detected
36
+ def self.reindex_all!(time_to_live = DEFAULT_TIME_TO_LIVE)
37
+ RepositoryReindexer.call(time_to_live: time_to_live, pid_reindexer: method(:reindex_relationships), adapter: configuration.adapter)
38
+ true
39
+ end
40
+
41
+ # Contains the Curate::Indexer configuration information that is referenceable from wit
42
+ # @see Curate::Indexer::Configuration
43
+ def self.configuration
44
+ @configuration ||= Configuration.new
45
+ end
46
+
47
+ # @api public
48
+ def self.adapter
49
+ configuration.adapter
50
+ end
51
+
52
+ # @api public
53
+ # @see Curate::Indexer::Configuration
54
+ # @see .configuration
55
+ def self.configure(&block)
56
+ @configuration_block = block
57
+ configure!
58
+ # The Rails load sequence means that some of the configured Targets may
59
+ # not be loaded; As such I am not calling configure! instead relying on
60
+ # Curate::Indexer::Railtie to handle the configure! call
61
+ configure! unless defined?(Rails)
62
+ end
63
+
64
+ # @api public
65
+ def self.configure!
66
+ return false unless @configuration_block.respond_to?(:call)
67
+ @configuration_block.call(configuration)
68
+ @configuration_block = nil
69
+ end
70
+ end
71
+ end
metadata ADDED
@@ -0,0 +1,293 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: curate-indexer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jeremy Friesen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-06-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.12'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.12'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec-its
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: guard-rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: guard-rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: terminal-notifier-guard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: terminal-notifier
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rubocop
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: simplecov
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: codeclimate-test-reporter
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: json
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: byebug
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ - !ruby/object:Gem::Dependency
196
+ name: railties
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - "~>"
200
+ - !ruby/object:Gem::Version
201
+ version: '4.0'
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - "~>"
207
+ - !ruby/object:Gem::Version
208
+ version: '4.0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: listen
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - "~>"
214
+ - !ruby/object:Gem::Version
215
+ version: 3.0.8
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: 3.0.8
223
+ - !ruby/object:Gem::Dependency
224
+ name: dry-equalizer
225
+ requirement: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - ">="
228
+ - !ruby/object:Gem::Version
229
+ version: '0'
230
+ type: :runtime
231
+ prerelease: false
232
+ version_requirements: !ruby/object:Gem::Requirement
233
+ requirements:
234
+ - - ">="
235
+ - !ruby/object:Gem::Version
236
+ version: '0'
237
+ description: A playground for CurateND collections indexing
238
+ email:
239
+ - jeremy.n.friesen@gmail.com
240
+ executables: []
241
+ extensions: []
242
+ extra_rdoc_files: []
243
+ files:
244
+ - ".gitignore"
245
+ - ".rspec"
246
+ - ".rubocop.yml"
247
+ - ".ruby-version"
248
+ - ".travis.yml"
249
+ - Gemfile
250
+ - Guardfile
251
+ - LICENSE
252
+ - README.md
253
+ - Rakefile
254
+ - bin/console
255
+ - bin/setup
256
+ - curate-indexer.gemspec
257
+ - lib/curate/indexer.rb
258
+ - lib/curate/indexer/adapters.rb
259
+ - lib/curate/indexer/configuration.rb
260
+ - lib/curate/indexer/documents.rb
261
+ - lib/curate/indexer/exceptions.rb
262
+ - lib/curate/indexer/index.rb
263
+ - lib/curate/indexer/preservation.rb
264
+ - lib/curate/indexer/railtie.rb
265
+ - lib/curate/indexer/relationship_reindexer.rb
266
+ - lib/curate/indexer/repository_reindexer.rb
267
+ - lib/curate/indexer/storage_module.rb
268
+ - lib/curate/indexer/version.rb
269
+ homepage: https://github.com/ndlib/curate-indexer
270
+ licenses: []
271
+ metadata: {}
272
+ post_install_message:
273
+ rdoc_options: []
274
+ require_paths:
275
+ - lib
276
+ required_ruby_version: !ruby/object:Gem::Requirement
277
+ requirements:
278
+ - - "~>"
279
+ - !ruby/object:Gem::Version
280
+ version: '2.0'
281
+ required_rubygems_version: !ruby/object:Gem::Requirement
282
+ requirements:
283
+ - - ">="
284
+ - !ruby/object:Gem::Version
285
+ version: '0'
286
+ requirements: []
287
+ rubyforge_project:
288
+ rubygems_version: 2.4.5
289
+ signing_key:
290
+ specification_version: 4
291
+ summary: A playground for CurateND collections indexing
292
+ test_files: []
293
+ has_rdoc: