curate-indexer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e1be92053b6ce30d9e75ac935c7000861725a4c5
4
+ data.tar.gz: babb5d4f0228d9210d096688d920d5c457f0f266
5
+ SHA512:
6
+ metadata.gz: eb5f7aaecda93774887fe655bd9c9884901c65360f32575f74df1f1e15404ef5b0a3933e5992ef5a4a35b794364fddf5c0cbc63f5212214efe1d288a5f7099e3
7
+ data.tar.gz: 1aaf669fc93f8f435b6d116417da0433428cb72a0ff5871c362e8815d6348cbdb012dc2d8c7bee7b9a4ba9c17cdf315d435cc08c05a45d91223db0741aa66b7d
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.rubocop.yml ADDED
@@ -0,0 +1,48 @@
1
+ ################################################################################
2
+ ## Releasing the hounds in your local environment.
3
+ ##
4
+ ## Setup:
5
+ ## $ gem install rubocop
6
+ ##
7
+ ## Run:
8
+ ## $ rubocop ./path/to/file ./or/path/to/directory -c ./.hound.yml
9
+ ##
10
+ ## Generation Notes:
11
+ ## This file was generated via the commitment:install generator. You are free
12
+ ## and expected to change this file.
13
+ ################################################################################
14
+ AllCops:
15
+ Include:
16
+ - Rakefile
17
+ Exclude:
18
+ - 'vendor/**/*'
19
+ - 'tmp/**/*'
20
+ - 'bin/**/*'
21
+ TargetRubyVersion: 2.0
22
+ LineLength:
23
+ Description: 'Limit lines to 140 characters.'
24
+ Max: 140
25
+ Enabled: true
26
+
27
+ ModuleLength:
28
+ Exclude:
29
+ - 'spec/**/*'
30
+
31
+
32
+ Style/StringLiterals:
33
+ Description: 'Checks if uses of quotes match the configured preference.'
34
+ Enabled: false
35
+
36
+ PercentLiteralDelimiters:
37
+ Description: 'Use `%`-literal delimiters consistently'
38
+ Enabled: false
39
+
40
+ Documentation:
41
+ Description: 'Document classes and non-namespace modules.'
42
+ Enabled: true
43
+ Exclude:
44
+ - spec/**/*
45
+ - test/**/*
46
+
47
+ Style/WordArray:
48
+ Enabled: false
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.2.2
data/.travis.yml ADDED
@@ -0,0 +1,22 @@
1
+ sudo: false
2
+ cache: bundler
3
+
4
+ language: ruby
5
+ rvm:
6
+ - 2.3.1
7
+ - 2.2.5
8
+ - 2.2.2
9
+ - 2.1.10
10
+ - 2.0.0
11
+
12
+ matrix:
13
+ allow_failures:
14
+ - rvm: "2.3.1"
15
+
16
+ before_install: gem install bundler -v 1.12.5
17
+
18
+ script: 'bundle exec rake'
19
+
20
+ addons:
21
+ code_climate:
22
+ repo_token: 71b80cc45ed849e84e5943bb4874393b5ce26a2356381a839552395cd9b2f71a
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in curate-indexer.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,49 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ ## Uncomment and set this to only include directories you want to watch
5
+ # directories %w(app lib config test spec features) \
6
+ # .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
7
+
8
+ ## Note: if you are using the `directories` clause above and you are not
9
+ ## watching the project directory ('.'), then you will want to move
10
+ ## the Guardfile to a watched dir and symlink it back, e.g.
11
+ #
12
+ # $ mkdir config
13
+ # $ mv Guardfile config/
14
+ # $ ln -s config/Guardfile .
15
+ #
16
+ # and, you'll have to watch "config/Guardfile" instead of "Guardfile"
17
+
18
+ # Note: The cmd option is now required due to the increasing number of ways
19
+ # rspec may be run, below are examples of the most common uses.
20
+ # * bundler: 'bundle exec rspec'
21
+ # * bundler binstubs: 'bin/rspec'
22
+ # * spring: 'bin/rspec' (This will use spring if running and you have
23
+ # installed the spring binstubs per the docs)
24
+ # * zeus: 'zeus rspec' (requires the server to be started separately)
25
+ # * 'just' rspec: 'rspec'
26
+
27
+ notification :terminal_notifier, subtitle: 'Curate::Indexer'
28
+
29
+ guard :rspec, cmd: "bundle exec rspec" do
30
+ require "guard/rspec/dsl"
31
+ dsl = Guard::RSpec::Dsl.new(self)
32
+
33
+ # Feel free to open issues for suggestions and improvements
34
+
35
+ # RSpec files
36
+ rspec = dsl.rspec
37
+ watch(rspec.spec_helper) { rspec.spec_dir }
38
+ watch(rspec.spec_support) { rspec.spec_dir }
39
+ watch(rspec.spec_files)
40
+
41
+ # Ruby files
42
+ ruby = dsl.ruby
43
+ dsl.watch_spec_files_for(ruby.lib_files)
44
+ end
45
+
46
+ guard :rubocop do
47
+ watch(%r{.+\.rb$})
48
+ watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
49
+ end
data/LICENSE ADDED
@@ -0,0 +1,14 @@
1
+ ##########################################################################
2
+ # Copyright 2014-2015 University of Notre Dame
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
data/README.md ADDED
@@ -0,0 +1,9 @@
1
+ # Curate::Indexer
2
+
3
+ [![Build Status](https://travis-ci.org/ndlib/curate-indexer.png?branch=master)](https://travis-ci.org/ndlib/curate-indexer)
4
+ [![Test Coverage](https://codeclimate.com/github/ndlib/curate-indexer/badges/coverage.svg)](https://codeclimate.com/github/ndlib/curate-indexer)
5
+ [![Code Climate](https://codeclimate.com/github/ndlib/curate-indexer.png)](https://codeclimate.com/github/ndlib/curate-indexer)
6
+ [![Documentation Status](http://inch-ci.org/github/ndlib/curate-indexer.svg?branch=master)](http://inch-ci.org/github/ndlib/curate-indexer)
7
+ [![APACHE 2 License](http://img.shields.io/badge/APACHE2-license-blue.svg)](./LICENSE)
8
+
9
+ This is a sandbox to work through the reindexing strategy as it relates to [CurateND Collections](https://github.com/ndlib/curate_nd/issues/420). At this point the code is separate to allow for rapid testing and prototyping (no sense spinning up SOLR and Fedora to walk an arbitrary graph).
data/Rakefile ADDED
@@ -0,0 +1,34 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ namespace :commitment do
5
+ require 'rubocop/rake_task'
6
+ # Why hound? Because hound-ci assumes this file, and perhaps you'll be using this
7
+ RuboCop::RakeTask.new
8
+
9
+ task :configure_test_for_code_coverage do
10
+ ENV['COVERAGE'] = 'true'
11
+ end
12
+ task :code_coverage do
13
+ require 'json'
14
+ $stdout.puts "Checking commitment:code_coverage"
15
+ coverage_percentage = JSON.parse(File.read('coverage/.last_run.json')).fetch('result').fetch('covered_percent').to_i
16
+ goal = 100
17
+ if goal > coverage_percentage
18
+ abort("Code Coverage Goal Not Met:\n\t#{coverage_percentage}%\tExpected\n\t#{goal}%\tActual")
19
+ end
20
+ end
21
+ end
22
+
23
+ task(
24
+ default: [
25
+ 'commitment:rubocop',
26
+ 'commitment:configure_test_for_code_coverage',
27
+ 'spec',
28
+ 'commitment:code_coverage'
29
+ ]
30
+ )
31
+
32
+ RSpec::Core::RakeTask.new(:spec)
33
+
34
+ task default: :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "curate/indexer"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,39 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'curate/indexer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "curate-indexer"
8
+ spec.version = Curate::Indexer::VERSION
9
+ spec.authors = ["Jeremy Friesen"]
10
+ spec.email = ["jeremy.n.friesen@gmail.com"]
11
+
12
+ spec.summary = %q{A playground for CurateND collections indexing}
13
+ spec.description = %q{A playground for CurateND collections indexing}
14
+ spec.homepage = "https://github.com/ndlib/curate-indexer"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "bin"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+ spec.required_ruby_version = '~>2.0'
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.12"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec", "~> 3.0"
25
+ spec.add_development_dependency "rspec-its"
26
+ spec.add_development_dependency "guard-rspec"
27
+ spec.add_development_dependency "guard-rubocop"
28
+ spec.add_development_dependency "terminal-notifier-guard"
29
+ spec.add_development_dependency "terminal-notifier"
30
+ spec.add_development_dependency "rubocop"
31
+ spec.add_development_dependency "simplecov"
32
+ spec.add_development_dependency "codeclimate-test-reporter"
33
+ spec.add_development_dependency "json"
34
+ spec.add_development_dependency "byebug"
35
+ spec.add_development_dependency "railties", '~> 4.0'
36
+ # As a secondary dependency, listen is preventing bundling
37
+ spec.add_development_dependency "listen", '~> 3.0.8'
38
+ spec.add_dependency "dry-equalizer"
39
+ end
@@ -0,0 +1,95 @@
1
+ module Curate
2
+ module Indexer
3
+ module Adapters
4
+ # @api public
5
+ # A module that defines the interface of methods required to interact with Curate::Indexer operations
6
+ module AbstractAdapter
7
+ # @api public
8
+ # @param pid [String]
9
+ # @return Curate::Indexer::Document::PreservationDocument
10
+ def self.find_preservation_document_by(*)
11
+ raise NotImplementedError
12
+ end
13
+
14
+ # @api public
15
+ # @param pid [String]
16
+ # @return Curate::Indexer::Documents::IndexDocument
17
+ def self.find_index_document_by(*)
18
+ raise NotImplementedError
19
+ end
20
+
21
+ # @api public
22
+ # @yield Curate::Indexer::Document::PreservationDocument
23
+ def self.each_preservation_document
24
+ raise NotImplementedError
25
+ end
26
+
27
+ # @api public
28
+ # @param pid [String]
29
+ # @yield Curate::Indexer::Documents::IndexDocument
30
+ def self.each_child_document_of(*, &_block)
31
+ raise NotImplementedError
32
+ end
33
+
34
+ # @api public
35
+ # @return Curate::Indexer::Documents::IndexDocument
36
+ def self.write_document_attributes_to_index_layer(*)
37
+ raise NotImplementedError
38
+ end
39
+ end
40
+ # @api public
41
+ #
42
+ # Defines the interface for interacting with the InMemory layer. It is a reference
43
+ # implementation that is used throughout tests.
44
+ module InMemoryAdapter
45
+ extend AbstractAdapter
46
+ # @api public
47
+ # @param pid [String]
48
+ # @return Curate::Indexer::Document::PreservationDocument
49
+ def self.find_preservation_document_by(pid)
50
+ Preservation.find(pid)
51
+ end
52
+
53
+ # @api public
54
+ # @param pid [String]
55
+ # @return Curate::Indexer::Documents::IndexDocument
56
+ def self.find_index_document_by(pid)
57
+ Index.find(pid)
58
+ end
59
+
60
+ # @api public
61
+ # @yield Curate::Indexer::Document::PreservationDocument
62
+ def self.each_preservation_document
63
+ Preservation.find_each { |document| yield(document) }
64
+ end
65
+
66
+ # @api public
67
+ # @param pid [String]
68
+ # @yield Curate::Indexer::Documents::IndexDocument
69
+ def self.each_child_document_of(pid, &block)
70
+ Index.each_child_document_of(pid, &block)
71
+ end
72
+
73
+ # @api public
74
+ # This is not something that I envision using in the production environment;
75
+ # It is hear to keep the Preservation system isolated and accessible only through interfaces.
76
+ # @return Curate::Indexer::Documents::PreservationDocument
77
+ def self.write_document_attributes_to_preservation_layer(attributes = {})
78
+ Preservation.write_document(attributes)
79
+ end
80
+
81
+ # @api public
82
+ # @return Curate::Indexer::Documents::IndexDocument
83
+ def self.write_document_attributes_to_index_layer(attributes = {})
84
+ Index.write_document(attributes)
85
+ end
86
+
87
+ # @api private
88
+ def self.clear_cache!
89
+ Preservation.clear_cache!
90
+ Index.clear_cache!
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,21 @@
1
+ module Curate
2
+ # :nodoc:
3
+ module Indexer
4
+ # Responsible for the configuration of the Curate::Indexer
5
+ class Configuration
6
+ def adapter
7
+ @adapter || default_adapter
8
+ end
9
+ # TODO: Should we guard against a bad adapter?
10
+ attr_writer :adapter
11
+
12
+ private
13
+
14
+ def default_adapter
15
+ require 'curate/indexer/adapters'
16
+ Adapters::InMemoryAdapter
17
+ end
18
+ end
19
+ private_constant :Configuration
20
+ end
21
+ end
@@ -0,0 +1,47 @@
1
+ require 'dry-equalizer'
2
+
3
+ module Curate
4
+ module Indexer
5
+ module Documents
6
+ # @api public
7
+ #
8
+ # A simplified document that reflects the necessary attributes for re-indexing
9
+ # the children of Fedora objects.
10
+ class PreservationDocument
11
+ def initialize(keywords = {})
12
+ @pid = keywords.fetch(:pid).to_s
13
+ @parent_pids = Array(keywords.fetch(:parent_pids))
14
+ end
15
+ attr_reader :pid, :parent_pids
16
+ end
17
+
18
+ # @api private
19
+ #
20
+ # A rudimentary representation of what is needed to reindex Solr documents
21
+ class IndexDocument
22
+ # A quick and dirty means of doing comparative logic
23
+ include Dry::Equalizer(:pid, :sorted_parent_pids, :sorted_pathnames, :sorted_ancestors)
24
+
25
+ def initialize(keywords = {})
26
+ @pid = keywords.fetch(:pid).to_s
27
+ @parent_pids = Array(keywords.fetch(:parent_pids))
28
+ @pathnames = Array(keywords.fetch(:pathnames))
29
+ @ancestors = Array(keywords.fetch(:ancestors))
30
+ end
31
+ attr_reader :pid, :parent_pids, :pathnames, :ancestors
32
+
33
+ def sorted_parent_pids
34
+ parent_pids.sort
35
+ end
36
+
37
+ def sorted_pathnames
38
+ pathnames.sort
39
+ end
40
+
41
+ def sorted_ancestors
42
+ ancestors.sort
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,16 @@
1
+ module Curate
2
+ module Indexer
3
+ module Exceptions
4
+ class RuntimeError < ::RuntimeError
5
+ end
6
+ # Raised when we may have detected a cycle within the graph
7
+ class CycleDetectionError < RuntimeError
8
+ attr_reader :pid
9
+ def initialize(pid)
10
+ @pid = pid
11
+ super "Possible graph cycle discovered related to PID:#{pid}."
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,39 @@
1
+ require 'curate/indexer/storage_module'
2
+ require 'curate/indexer/documents'
3
+
4
+ module Curate
5
+ # :nodoc:
6
+ module Indexer
7
+ # @api private
8
+ #
9
+ # An abstract representation of the underlying index service. In the case of
10
+ # CurateND this is an abstraction of Solr.
11
+ module Index
12
+ def self.clear_cache!
13
+ Storage.clear_cache!
14
+ end
15
+
16
+ def self.find(pid)
17
+ Storage.find(pid)
18
+ end
19
+
20
+ def self.each_child_document_of(pid, &block)
21
+ Storage.find_children_of_pid(pid).each(&block)
22
+ end
23
+
24
+ def self.write_document(attributes = {})
25
+ Documents::IndexDocument.new(attributes).tap { |doc| Storage.write(doc) }
26
+ end
27
+
28
+ # :nodoc:
29
+ module Storage
30
+ extend StorageModule
31
+ def self.find_children_of_pid(pid)
32
+ cache.values.select { |document| document.parent_pids.include?(pid) }
33
+ end
34
+ end
35
+ private_constant :Storage
36
+ end
37
+ private_constant :Index
38
+ end
39
+ end
@@ -0,0 +1,39 @@
1
+ require 'curate/indexer/storage_module'
2
+ require 'curate/indexer/documents'
3
+
4
+ module Curate
5
+ # :nodoc:
6
+ module Indexer
7
+ # @api private
8
+ #
9
+ # A module responsible for containing the "preservation interface" logic.
10
+ # In the case of CurateND, there will need to be an adapter to get a Fedora
11
+ # object coerced into a Curate::Indexer::Preservation::Document
12
+ module Preservation
13
+ def self.find(pid, *)
14
+ MemoryStorage.find(pid)
15
+ end
16
+
17
+ def self.find_each(*, &block)
18
+ MemoryStorage.find_each(&block)
19
+ end
20
+
21
+ def self.clear_cache!
22
+ MemoryStorage.clear_cache!
23
+ end
24
+
25
+ def self.write_document(attributes = {})
26
+ Documents::PreservationDocument.new(attributes).tap do |doc|
27
+ MemoryStorage.write(doc)
28
+ end
29
+ end
30
+
31
+ # :nodoc:
32
+ module MemoryStorage
33
+ extend StorageModule
34
+ end
35
+ private_constant :MemoryStorage
36
+ end
37
+ private_constant :Preservation
38
+ end
39
+ end
@@ -0,0 +1,13 @@
1
+ require 'rails/railtie'
2
+
3
+ module Curate
4
+ module Indexer
5
+ # Connect into the boot sequence of a Rails application
6
+ class Railtie < Rails::Railtie
7
+ config.eager_load_namespaces << Curate::Indexer
8
+ config.to_prepare do
9
+ Curate::Indexer.send(:configure!)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,103 @@
1
+ require 'curate/indexer/exceptions'
2
+ require 'curate/indexer/index'
3
+ require 'curate/indexer/preservation'
4
+ require 'forwardable'
5
+ require 'set'
6
+
7
+ module Curate
8
+ # Establishing namespace
9
+ module Indexer
10
+ # Responsible for reindexing the PID and its descendants
11
+ # @note There is cycle detection via the TIME_TO_LIVE counter
12
+ # @api private
13
+ class RelationshipReindexer
14
+ def self.call(options = {})
15
+ new(options).call
16
+ end
17
+
18
+ def initialize(options = {})
19
+ @pid = options.fetch(:pid).to_s
20
+ @time_to_live = options.fetch(:time_to_live).to_i
21
+ @adapter = options.fetch(:adapter)
22
+ @queue = options.fetch(:queue, [])
23
+ end
24
+ attr_reader :pid, :time_to_live, :queue, :adapter
25
+
26
+ def call
27
+ enqueue(pid, time_to_live)
28
+ index_document = dequeue
29
+ while index_document
30
+ process_a_document(index_document)
31
+ adapter.each_child_document_of(index_document.pid) { |child| enqueue(child.pid, index_document.time_to_live - 1) }
32
+ index_document = dequeue
33
+ end
34
+ self
35
+ end
36
+
37
+ private
38
+
39
+ attr_writer :document
40
+
41
+ extend Forwardable
42
+ def_delegator :queue, :shift, :dequeue
43
+
44
+ ProcessingDocument = Struct.new(:pid, :time_to_live)
45
+ private_constant :ProcessingDocument
46
+ def enqueue(pid, time_to_live)
47
+ queue.push(ProcessingDocument.new(pid, time_to_live))
48
+ end
49
+
50
+ def process_a_document(index_document)
51
+ raise Exceptions::CycleDetectionError, pid if index_document.time_to_live <= 0
52
+ preservation_document = adapter.find_preservation_document_by(index_document.pid)
53
+ adapter.write_document_attributes_to_index_layer(parent_pids_and_path_and_ancestors_for(preservation_document))
54
+ end
55
+
56
+ def parent_pids_and_path_and_ancestors_for(preservation_document)
57
+ ParentAndPathAndAncestorsBuilder.new(preservation_document, adapter).to_hash
58
+ end
59
+
60
+ # A small object that helps encapsulate the logic of building the hash of information regarding
61
+ # the initialization of an Index::Document
62
+ class ParentAndPathAndAncestorsBuilder
63
+ def initialize(preservation_document, adapter)
64
+ @preservation_document = preservation_document
65
+ @parent_pids = Set.new
66
+ @pathnames = Set.new
67
+ @ancestors = Set.new
68
+ @adapter = adapter
69
+ compile!
70
+ end
71
+
72
+ def to_hash
73
+ { pid: @preservation_document.pid, parent_pids: @parent_pids.to_a, pathnames: @pathnames.to_a, ancestors: @ancestors.to_a }
74
+ end
75
+
76
+ private
77
+
78
+ attr_reader :adapter
79
+
80
+ def compile!
81
+ @preservation_document.parent_pids.each do |parent_pid|
82
+ parent_index_document = adapter.find_index_document_by(parent_pid)
83
+ compile_one!(parent_index_document)
84
+ end
85
+ # Ensuring that an "orphan" has a path to get to it
86
+ @pathnames << @preservation_document.pid if @parent_pids.empty?
87
+ end
88
+
89
+ def compile_one!(parent_index_document)
90
+ @parent_pids << parent_index_document.pid
91
+ parent_index_document.pathnames.each do |pathname|
92
+ @pathnames << File.join(pathname, @preservation_document.pid)
93
+ slugs = pathname.split("/")
94
+ slugs.each_index { |i| @ancestors << slugs[0..i].join('/') }
95
+ end
96
+ @ancestors += parent_index_document.ancestors
97
+ end
98
+ end
99
+ private_constant :ParentAndPathAndAncestorsBuilder
100
+ end
101
+ private_constant :RelationshipReindexer
102
+ end
103
+ end
@@ -0,0 +1,49 @@
1
+ module Curate
2
+ # Establishing namespace
3
+ module Indexer
4
+ # Responsible for reindexing the entire repository
5
+ # @api private
6
+ # @note There is cycle detection logic for walking the graph prior to attempting relationship re-indexing
7
+ class RepositoryReindexer
8
+ def self.call(*args)
9
+ new(*args).call
10
+ end
11
+
12
+ def initialize(options = {})
13
+ @max_time_to_live = options.fetch(:time_to_live).to_i
14
+ @pid_reindexer = options.fetch(:pid_reindexer)
15
+ @adapter = options.fetch(:adapter)
16
+ @processed_pids = []
17
+ end
18
+
19
+ def call
20
+ @adapter.each_preservation_document { |document| recursive_reindex(document, max_time_to_live) }
21
+ end
22
+
23
+ private
24
+
25
+ attr_reader :max_time_to_live, :processed_pids, :pid_reindexer
26
+
27
+ # Given that we are attempting to reindex the parents before we reindex, we can't rely on
28
+ # the reindex time_to_live but instead must have a separate time to live.
29
+ #
30
+ # The reindexing process assumes that an object's parents have been indexed; Thus we need to
31
+ # walk up the parent graph to reindex the parents before we start on the child.
32
+ def recursive_reindex(document, time_to_live = max_time_to_live)
33
+ return true if processed_pids.include?(document.pid)
34
+ raise Exceptions::CycleDetectionError, document.pid if time_to_live <= 0
35
+ document.parent_pids.each do |parent_pid|
36
+ parent_document = @adapter.find_preservation_document_by(parent_pid)
37
+ recursive_reindex(parent_document, time_to_live - 1)
38
+ end
39
+ reindex_a_pid(document.pid)
40
+ end
41
+
42
+ def reindex_a_pid(pid)
43
+ pid_reindexer.call(pid)
44
+ processed_pids << pid
45
+ end
46
+ end
47
+ private_constant :RepositoryReindexer
48
+ end
49
+ end
@@ -0,0 +1,34 @@
1
+ module Curate
2
+ module Indexer
3
+ # @api private
4
+ #
5
+ # A module mixin to expose rudimentary read/write capabilities
6
+ #
7
+ # @example
8
+ # module Foo
9
+ # extend Curate::Indexer::StorageModule
10
+ # end
11
+ module StorageModule
12
+ def write(doc)
13
+ cache[doc.pid] = doc
14
+ end
15
+
16
+ def find(pid)
17
+ cache.fetch(pid.to_s)
18
+ end
19
+
20
+ def find_each
21
+ cache.each { |_key, document| yield(document) }
22
+ end
23
+
24
+ def clear_cache!
25
+ @cache = {}
26
+ end
27
+
28
+ def cache
29
+ @cache ||= {}
30
+ end
31
+ private :cache
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,5 @@
1
+ module Curate
2
+ module Indexer
3
+ VERSION = "0.1.0".freeze
4
+ end
5
+ end
@@ -0,0 +1,71 @@
1
+ require "curate/indexer/version"
2
+ require 'curate/indexer/relationship_reindexer'
3
+ require 'curate/indexer/repository_reindexer'
4
+ require 'curate/indexer/configuration'
5
+ require 'curate/indexer/railtie' if defined?(Rails)
6
+
7
+ module Curate
8
+ # Responsible for performign the indexing of an object and its related child objects.
9
+ module Indexer
10
+ # This assumes a rather deep graph
11
+ DEFAULT_TIME_TO_LIVE = 15
12
+ # @api public
13
+ # Responsible for reindexing the associated document for the given :pid and the descendants of that :pid.
14
+ # In a perfect world we could reindex the pid as well; But that is for another test.
15
+ #
16
+ # @param pid [String] - The permanent identifier of the object that will be reindexed along with its children.
17
+ # @param time_to_live [Integer] - there to guard against cyclical graphs
18
+ # @return [Boolean] - It was successful
19
+ # @raise Curate::Exceptions::CycleDetectionError - A potential cycle was detected
20
+ def self.reindex_relationships(pid, time_to_live = DEFAULT_TIME_TO_LIVE)
21
+ RelationshipReindexer.call(pid: pid, time_to_live: time_to_live, adapter: configuration.adapter)
22
+ true
23
+ end
24
+
25
+ class << self
26
+ # Here because I made a previous declaration that .reindex was part of the
27
+ # public API. Then I decided I didn't want to use that method.
28
+ alias reindex reindex_relationships
29
+ end
30
+
31
+ # @api public
32
+ # Responsible for reindexing the entire preservation layer.
33
+ # @param time_to_live [Integer] - there to guard against cyclical graphs
34
+ # @return [Boolean] - It was successful
35
+ # @raise Curate::Exceptions::CycleDetectionError - A potential cycle was detected
36
+ def self.reindex_all!(time_to_live = DEFAULT_TIME_TO_LIVE)
37
+ RepositoryReindexer.call(time_to_live: time_to_live, pid_reindexer: method(:reindex_relationships), adapter: configuration.adapter)
38
+ true
39
+ end
40
+
41
+ # Contains the Curate::Indexer configuration information that is referenceable from wit
42
+ # @see Curate::Indexer::Configuration
43
+ def self.configuration
44
+ @configuration ||= Configuration.new
45
+ end
46
+
47
+ # @api public
48
+ def self.adapter
49
+ configuration.adapter
50
+ end
51
+
52
+ # @api public
53
+ # @see Curate::Indexer::Configuration
54
+ # @see .configuration
55
+ def self.configure(&block)
56
+ @configuration_block = block
57
+ configure!
58
+ # The Rails load sequence means that some of the configured Targets may
59
+ # not be loaded; As such I am not calling configure! instead relying on
60
+ # Curate::Indexer::Railtie to handle the configure! call
61
+ configure! unless defined?(Rails)
62
+ end
63
+
64
+ # @api public
65
+ def self.configure!
66
+ return false unless @configuration_block.respond_to?(:call)
67
+ @configuration_block.call(configuration)
68
+ @configuration_block = nil
69
+ end
70
+ end
71
+ end
metadata ADDED
@@ -0,0 +1,293 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: curate-indexer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jeremy Friesen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-06-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.12'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.12'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec-its
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: guard-rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: guard-rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: terminal-notifier-guard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: terminal-notifier
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rubocop
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: simplecov
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: codeclimate-test-reporter
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: json
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: byebug
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ - !ruby/object:Gem::Dependency
196
+ name: railties
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - "~>"
200
+ - !ruby/object:Gem::Version
201
+ version: '4.0'
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - "~>"
207
+ - !ruby/object:Gem::Version
208
+ version: '4.0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: listen
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - "~>"
214
+ - !ruby/object:Gem::Version
215
+ version: 3.0.8
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: 3.0.8
223
+ - !ruby/object:Gem::Dependency
224
+ name: dry-equalizer
225
+ requirement: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - ">="
228
+ - !ruby/object:Gem::Version
229
+ version: '0'
230
+ type: :runtime
231
+ prerelease: false
232
+ version_requirements: !ruby/object:Gem::Requirement
233
+ requirements:
234
+ - - ">="
235
+ - !ruby/object:Gem::Version
236
+ version: '0'
237
+ description: A playground for CurateND collections indexing
238
+ email:
239
+ - jeremy.n.friesen@gmail.com
240
+ executables: []
241
+ extensions: []
242
+ extra_rdoc_files: []
243
+ files:
244
+ - ".gitignore"
245
+ - ".rspec"
246
+ - ".rubocop.yml"
247
+ - ".ruby-version"
248
+ - ".travis.yml"
249
+ - Gemfile
250
+ - Guardfile
251
+ - LICENSE
252
+ - README.md
253
+ - Rakefile
254
+ - bin/console
255
+ - bin/setup
256
+ - curate-indexer.gemspec
257
+ - lib/curate/indexer.rb
258
+ - lib/curate/indexer/adapters.rb
259
+ - lib/curate/indexer/configuration.rb
260
+ - lib/curate/indexer/documents.rb
261
+ - lib/curate/indexer/exceptions.rb
262
+ - lib/curate/indexer/index.rb
263
+ - lib/curate/indexer/preservation.rb
264
+ - lib/curate/indexer/railtie.rb
265
+ - lib/curate/indexer/relationship_reindexer.rb
266
+ - lib/curate/indexer/repository_reindexer.rb
267
+ - lib/curate/indexer/storage_module.rb
268
+ - lib/curate/indexer/version.rb
269
+ homepage: https://github.com/ndlib/curate-indexer
270
+ licenses: []
271
+ metadata: {}
272
+ post_install_message:
273
+ rdoc_options: []
274
+ require_paths:
275
+ - lib
276
+ required_ruby_version: !ruby/object:Gem::Requirement
277
+ requirements:
278
+ - - "~>"
279
+ - !ruby/object:Gem::Version
280
+ version: '2.0'
281
+ required_rubygems_version: !ruby/object:Gem::Requirement
282
+ requirements:
283
+ - - ">="
284
+ - !ruby/object:Gem::Version
285
+ version: '0'
286
+ requirements: []
287
+ rubyforge_project:
288
+ rubygems_version: 2.4.5
289
+ signing_key:
290
+ specification_version: 4
291
+ summary: A playground for CurateND collections indexing
292
+ test_files: []
293
+ has_rdoc: