base_indexer 0.6.1 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/initializers/base_indexer.rb +1 -1
- data/lib/base_indexer/{solr → config}/solr_configuration.rb +0 -0
- data/lib/base_indexer/{solr → config}/solr_configuration_from_file.rb +0 -0
- data/lib/base_indexer/main_indexer_engine.rb +2 -66
- data/lib/base_indexer/solr/client.rb +113 -0
- data/lib/base_indexer/solr/writer.rb +54 -0
- data/lib/base_indexer/version.rb +1 -1
- data/lib/base_indexer.rb +4 -3
- metadata +6 -6
- data/README.rdoc +0 -93
- data/lib/base_indexer/collection.rb +0 -46
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1fd0a69da41b249eeebbea882c38c0de1fe0bd70
|
4
|
+
data.tar.gz: 53d6ed530f7463518f0c7621ea50354bf9edb185
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 36315c92a7aa60a2b2d3414cc2162771e3eadcfb1c765560ae4756f1256407d05c43f51dd3957f593238b8645acfdb2ea4e9f4e7f998160b098deb513c86a347
|
7
|
+
data.tar.gz: 0bc53b7fa053c1b1a4c4c446451e9741cf4dcf4bb72f0e6d071ca2de241827ac0f7a1c4393d16a0df4e137241c081f64b86d63e4dcc4ce5bee100a3707d3c06a
|
@@ -5,4 +5,4 @@ BaseIndexer.indexer_class = 'BaseIndexer::MainIndexerEngine'
|
|
5
5
|
BaseIndexer.solr_configuration_class_name = 'BaseIndexer::SolrConfigurationFromFile'
|
6
6
|
# BaseIndexer.solr_configuration_class.constantize.new(Rails.configuration.solr_config_file_path)
|
7
7
|
BaseIndexer.mapper_class_name = 'DiscoveryIndexer::Mapper::GeneralMapper'
|
8
|
-
BaseIndexer.solr_writer_class_name = '
|
8
|
+
BaseIndexer.solr_writer_class_name = 'BaseIndexer::Solr::Writer'
|
File without changes
|
File without changes
|
@@ -26,27 +26,12 @@ module BaseIndexer
|
|
26
26
|
#
|
27
27
|
# @raise it will raise erros if there is any problems happen in any level
|
28
28
|
def index(druid, targets = nil)
|
29
|
-
# Read input mods and purl
|
30
|
-
purl_model = read_purl(druid)
|
31
|
-
mods_model = read_mods(druid)
|
32
|
-
collection_data = collection_data(purl_model.collection_druids)
|
33
|
-
|
34
29
|
# Map the input to solr_doc
|
35
|
-
solr_doc = BaseIndexer.mapper_class_name.constantize.new(druid
|
36
|
-
|
37
|
-
# Get target list
|
38
|
-
targets_hash = {}
|
39
|
-
if targets.present?
|
40
|
-
targets_hash = targets_hash_from_param(targets)
|
41
|
-
else
|
42
|
-
targets_hash = purl_model.release_tags_hash
|
43
|
-
end
|
44
|
-
|
45
|
-
targets_hash = update_targets_before_write(targets_hash, purl_model)
|
30
|
+
solr_doc = BaseIndexer.mapper_class_name.constantize.new(druid).convert_to_solr_doc
|
46
31
|
|
47
32
|
# Get SOLR configuration and write
|
48
33
|
solr_targets_configs = BaseIndexer.solr_configuration_class_name.constantize.instance.get_configuration_hash
|
49
|
-
BaseIndexer.solr_writer_class_name.constantize.new.process(druid, solr_doc,
|
34
|
+
BaseIndexer.solr_writer_class_name.constantize.new.process(druid, solr_doc, targets, solr_targets_configs)
|
50
35
|
end
|
51
36
|
|
52
37
|
# It deletes an item defined by druid from all registered solr core
|
@@ -56,54 +41,5 @@ module BaseIndexer
|
|
56
41
|
BaseIndexer.solr_writer_class_name.constantize.new.solr_delete_from_all(druid, solr_targets_configs)
|
57
42
|
end
|
58
43
|
|
59
|
-
def read_purl(druid)
|
60
|
-
DiscoveryIndexer::InputXml::Purlxml.new(druid).load
|
61
|
-
end
|
62
|
-
|
63
|
-
def read_mods(druid)
|
64
|
-
DiscoveryIndexer::InputXml::Modsxml.new(druid).load
|
65
|
-
end
|
66
|
-
|
67
|
-
# It converts targets array to targets hash
|
68
|
-
# @param targets [Array] a list of specfic targets
|
69
|
-
# @return [Hash] a hash of targets with true value
|
70
|
-
# @example convert target list
|
71
|
-
# targets_hash_from_param( ["searchworks","revs"] )
|
72
|
-
# {"searchworks"=>true, "revs"=>true}
|
73
|
-
def targets_hash_from_param(targets)
|
74
|
-
targets_hash = {}
|
75
|
-
unless targets.nil?
|
76
|
-
targets.each do |target|
|
77
|
-
targets_hash[target] = true
|
78
|
-
end
|
79
|
-
end
|
80
|
-
targets_hash
|
81
|
-
end
|
82
|
-
|
83
|
-
# It allows the consumer to modify the targets list before doing the final writing
|
84
|
-
# to the solr core. Default behavior returns the targets_hash as it is
|
85
|
-
# @param targets_hash [Hash] a hash of targets with true value
|
86
|
-
# @param purl_model [DiscoveryIndexer::Reader::PurlxmlModel] represents the purlxml model
|
87
|
-
# @return [Hash] a hash of targets
|
88
|
-
def update_targets_before_write(targets_hash, _purl_model)
|
89
|
-
targets_hash
|
90
|
-
end
|
91
|
-
|
92
|
-
# It converts collection_druids list to a hash with names. If the druid doesn't
|
93
|
-
# have a collection name, it will be excluded from the hash
|
94
|
-
# @param collection_druids [Array] a list of druids
|
95
|
-
# !["ab123cd4567", "ef123gh4567"]
|
96
|
-
# @return [Hash] a hash for collection druid and its name
|
97
|
-
# !{"ab123cd4567"=>"Collection 1", "ef123gh4567"=>"Collection 2"}
|
98
|
-
def collection_data(collection_druids)
|
99
|
-
collection_data = {}
|
100
|
-
unless collection_druids.nil?
|
101
|
-
collection_druids.each do |cdruid|
|
102
|
-
cdata = BaseIndexer::Collection.new(cdruid).collection_info
|
103
|
-
collection_data[cdruid] = cdata if cdata.present?
|
104
|
-
end
|
105
|
-
end
|
106
|
-
collection_data
|
107
|
-
end
|
108
44
|
end
|
109
45
|
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'retries'
|
2
|
+
require 'rsolr'
|
3
|
+
require 'rest-client'
|
4
|
+
module BaseIndexer
|
5
|
+
module Solr
|
6
|
+
# Processes adds and deletes to the solr core
|
7
|
+
class Client
|
8
|
+
include DiscoveryIndexer::Logging
|
9
|
+
|
10
|
+
# Add the document to solr, retry if an error occurs.
|
11
|
+
# See https://github.com/ooyala/retries for docs on with_retries.
|
12
|
+
# @param id [String] the document id, usually it will be druid.
|
13
|
+
# @param solr_doc [Hash] a Hash representation of the solr document
|
14
|
+
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
15
|
+
# @param max_retries [Integer] the maximum number of tries before fail
|
16
|
+
def self.add(id, solr_doc, solr_connector, max_retries = 10)
|
17
|
+
process(id, solr_doc, solr_connector, max_retries, false)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Add the document to solr, retry if an error occurs.
|
21
|
+
# See https://github.com/ooyala/retries for docs on with_retries.
|
22
|
+
# @param id [String] the document id, usually it will be druid.
|
23
|
+
# @param solr_connector[RSolr::Client] is an open connection with the solr core
|
24
|
+
# @param max_retries [Integer] the maximum number of tries before fail
|
25
|
+
def self.delete(id, solr_connector, max_retries = 10)
|
26
|
+
process(id, {}, solr_connector, max_retries, true)
|
27
|
+
end
|
28
|
+
|
29
|
+
# It's an internal method that receives all the requests and deal with
|
30
|
+
# SOLR core. This method can call add, delete, or update
|
31
|
+
#
|
32
|
+
# @param id [String] the document id, usually it will be druid.
|
33
|
+
# @param solr_doc [Hash] is the solr doc in hash format
|
34
|
+
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
35
|
+
# @param max_retries [Integer] the maximum number of tries before fail
|
36
|
+
def self.process(id, solr_doc, solr_connector, max_retries, is_delete = false)
|
37
|
+
handler = proc do |exception, attempt_number, _total_delay|
|
38
|
+
DiscoveryIndexer::Logging.logger.debug "#{exception.class} on attempt #{attempt_number} for #{id}"
|
39
|
+
end
|
40
|
+
|
41
|
+
with_retries(max_tries: max_retries, handler: handler, base_sleep_seconds: 1, max_sleep_seconds: 5) do |attempt|
|
42
|
+
DiscoveryIndexer::Logging.logger.debug "Attempt #{attempt} for #{id}"
|
43
|
+
|
44
|
+
if is_delete
|
45
|
+
DiscoveryIndexer::Logging.logger.info "Deleting #{id} on attempt #{attempt}"
|
46
|
+
solr_connector.delete_by_id(id, :add_attributes => {:commitWithin => 10000})
|
47
|
+
elsif allow_update?(solr_connector) && doc_exists?(id, solr_connector)
|
48
|
+
DiscoveryIndexer::Logging.logger.info "Updating #{id} on attempt #{attempt}"
|
49
|
+
update_solr_doc(id, solr_doc, solr_connector)
|
50
|
+
else
|
51
|
+
DiscoveryIndexer::Logging.logger.info "Indexing #{id} on attempt #{attempt}"
|
52
|
+
solr_connector.add(solr_doc, :add_attributes => {:commitWithin => 10000})
|
53
|
+
end
|
54
|
+
#solr_connector.commit
|
55
|
+
DiscoveryIndexer::Logging.logger.info "Completing #{id} successfully on attempt #{attempt}"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
60
|
+
# @return [Boolean] true if the solr core allowing update feature
|
61
|
+
def self.allow_update?(solr_connector)
|
62
|
+
solr_connector.options.include?(:allow_update) ? solr_connector.options[:allow_update] : false
|
63
|
+
end
|
64
|
+
|
65
|
+
# @param id [String] the document id, usually it will be druid.
|
66
|
+
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
67
|
+
# @return [Boolean] true if the solr doc defined by this id exists
|
68
|
+
def self.doc_exists?(id, solr_connector)
|
69
|
+
response = solr_connector.get 'select', params: { q: 'id:"' + id + '"' }
|
70
|
+
response['response']['numFound'] == 1
|
71
|
+
end
|
72
|
+
|
73
|
+
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
74
|
+
# send hard commit to solr
|
75
|
+
def self.commit(solr_connector)
|
76
|
+
RestClient.post self.solr_url(solr_connector), {},:content_type => :json, :accept=>:json
|
77
|
+
end
|
78
|
+
|
79
|
+
# It is an internal method that updates the solr doc instead of adding a new one.
|
80
|
+
# @param id [String] the document id, usually it will be druid.
|
81
|
+
# @param solr_doc [Hash] is the solr doc in hash format
|
82
|
+
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
83
|
+
def self.update_solr_doc(id, solr_doc, solr_connector)
|
84
|
+
# update_solr_doc can't used RSolr because updating hash doc is not supported
|
85
|
+
# so we need to build the json input manually
|
86
|
+
params = "[{\"id\":\"#{id}\","
|
87
|
+
solr_doc.each do |field_name, new_values|
|
88
|
+
next if field_name == :id
|
89
|
+
params += "\"#{field_name}\":"
|
90
|
+
new_values = [new_values] unless new_values.class == Array
|
91
|
+
new_values = new_values.map { |s| s.to_s.gsub('\\', '\\\\\\').gsub('"', '\"').strip } # strip leading/trailing spaces and escape quotes for each value
|
92
|
+
params += "{\"set\":[\"#{new_values.join('","')}\"]},"
|
93
|
+
end
|
94
|
+
params.chomp!(',')
|
95
|
+
params += '}]'
|
96
|
+
RestClient.post self.solr_url(solr_connector), params, content_type: :json, accept: :json
|
97
|
+
end
|
98
|
+
|
99
|
+
# adjust the solr_url so it works with or without a trailing /
|
100
|
+
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
101
|
+
# @return [String] the solr URL
|
102
|
+
def self.solr_url(solr_connector)
|
103
|
+
solr_url = solr_connector.options[:url]
|
104
|
+
if solr_url.end_with?('/')
|
105
|
+
"#{solr_url}update?commit=true"
|
106
|
+
else
|
107
|
+
"#{solr_url}/update?commit=true"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'retries'
|
2
|
+
require 'rsolr'
|
3
|
+
|
4
|
+
module BaseIndexer
|
5
|
+
module Solr
|
6
|
+
# Performs writes to solr client based upon true and false release flags
|
7
|
+
class Writer
|
8
|
+
attr_reader :solr_targets_configs
|
9
|
+
|
10
|
+
include DiscoveryIndexer::Logging
|
11
|
+
|
12
|
+
def process(id, index_doc, targets, targets_configs)
|
13
|
+
@solr_targets_configs = targets_configs
|
14
|
+
index_targets = targets.select { |_, b| b }.keys
|
15
|
+
delete_targets = targets.reject { |_, b| b }.keys
|
16
|
+
|
17
|
+
# get targets with true
|
18
|
+
solr_index_client(id, index_doc, index_targets) if index_targets.present?
|
19
|
+
# get targets with false
|
20
|
+
solr_delete_client(id, delete_targets) if delete_targets.present?
|
21
|
+
end
|
22
|
+
|
23
|
+
def solr_delete_from_all(id, targets_configs)
|
24
|
+
# Get a list of all registered targets
|
25
|
+
@solr_targets_configs = targets_configs
|
26
|
+
targets = solr_targets_configs.keys
|
27
|
+
solr_delete_client(id, targets)
|
28
|
+
end
|
29
|
+
|
30
|
+
def solr_index_client(id, index_doc, targets)
|
31
|
+
targets.each do |solr_target|
|
32
|
+
solr_connector = get_connector_for_target(solr_target)
|
33
|
+
Client.add(id, index_doc, solr_connector) unless solr_connector.nil?
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def solr_delete_client(id, targets)
|
38
|
+
targets.each do |solr_target|
|
39
|
+
solr_connector = get_connector_for_target(solr_target)
|
40
|
+
Client.delete(id, solr_connector) unless solr_connector.nil?
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_connector_for_target(solr_target)
|
45
|
+
solr_connector = nil
|
46
|
+
if solr_targets_configs.keys.include?(solr_target)
|
47
|
+
config = solr_targets_configs[solr_target]
|
48
|
+
solr_connector = RSolr.connect(config.deep_symbolize_keys)
|
49
|
+
end
|
50
|
+
solr_connector
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/base_indexer/version.rb
CHANGED
data/lib/base_indexer.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'base_indexer/engine'
|
2
2
|
|
3
3
|
require 'base_indexer/main_indexer_engine'
|
4
|
-
require 'base_indexer/
|
5
|
-
require 'base_indexer/
|
6
|
-
require 'base_indexer/
|
4
|
+
require 'base_indexer/config/solr_configuration'
|
5
|
+
require 'base_indexer/config/solr_configuration_from_file'
|
6
|
+
require 'base_indexer/solr/client'
|
7
|
+
require 'base_indexer/solr/writer'
|
7
8
|
require 'discovery-indexer'
|
8
9
|
module BaseIndexer
|
9
10
|
mattr_accessor :indexer_class
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: base_indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ahmed Alsum
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-12-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rails
|
@@ -220,7 +220,6 @@ executables: []
|
|
220
220
|
extensions: []
|
221
221
|
extra_rdoc_files: []
|
222
222
|
files:
|
223
|
-
- README.rdoc
|
224
223
|
- Rakefile
|
225
224
|
- app/controllers/base_indexer/about_controller.rb
|
226
225
|
- app/controllers/base_indexer/application_controller.rb
|
@@ -235,11 +234,12 @@ files:
|
|
235
234
|
- config/initializers/is_it_working.rb
|
236
235
|
- config/routes.rb
|
237
236
|
- lib/base_indexer.rb
|
238
|
-
- lib/base_indexer/
|
237
|
+
- lib/base_indexer/config/solr_configuration.rb
|
238
|
+
- lib/base_indexer/config/solr_configuration_from_file.rb
|
239
239
|
- lib/base_indexer/engine.rb
|
240
240
|
- lib/base_indexer/main_indexer_engine.rb
|
241
|
-
- lib/base_indexer/solr/
|
242
|
-
- lib/base_indexer/solr/
|
241
|
+
- lib/base_indexer/solr/client.rb
|
242
|
+
- lib/base_indexer/solr/writer.rb
|
243
243
|
- lib/base_indexer/version.rb
|
244
244
|
- lib/generators/base_indexer/install_generator.rb
|
245
245
|
- lib/generators/base_indexer/templates/solr.yml
|
data/README.rdoc
DELETED
@@ -1,93 +0,0 @@
|
|
1
|
-
{<img src="https://travis-ci.org/sul-dlss/base_indexer.svg?branch=master" alt="Build Status" />}[https://travis-ci.org/sul-dlss/base_indexer] {<img src="https://coveralls.io/repos/sul-dlss/base_indexer/badge.svg" alt="Coverage Status" />}[https://coveralls.io/r/sul-dlss/base_indexer]
|
2
|
-
|
3
|
-
|
4
|
-
= BaseIndexer
|
5
|
-
|
6
|
-
This project rocks and uses MIT-LICENSE.
|
7
|
-
|
8
|
-
== Running tests
|
9
|
-
|
10
|
-
Clone from github.
|
11
|
-
rake # first time setup and to generate all docs
|
12
|
-
bundle exec rake spec # just run the tests next time around
|
13
|
-
|
14
|
-
|
15
|
-
== Steps to hook the base_indexer engine in your app
|
16
|
-
* Generate new rails app
|
17
|
-
rails new my_indexer_app
|
18
|
-
|
19
|
-
*Edit Gemfile and add the base_indexer gem name
|
20
|
-
gem 'base_indexer'
|
21
|
-
|
22
|
-
* Run bundle install to download the gem
|
23
|
-
bundle install
|
24
|
-
|
25
|
-
* Mount the engine in your favorite domain.
|
26
|
-
mount BaseIndexer::Engine, at: '/items'
|
27
|
-
|
28
|
-
== Basic configuration
|
29
|
-
The engine is looking for the following values
|
30
|
-
|
31
|
-
config.solr_config_file_path = "#{config.root}/config/solr.yml"
|
32
|
-
DiscoveryIndexer::PURL_DEFAULT='https://purl.stanford.edu'
|
33
|
-
|
34
|
-
|
35
|
-
== Advanced features
|
36
|
-
|
37
|
-
The engine gives the developer the ability to extend any of its classes
|
38
|
-
|
39
|
-
To extend any of indexer features (purl-reader, mods-reader, mapper, solr-writer)
|
40
|
-
|
41
|
-
1. Create a new class that inherits from BaseIndexer::MainIndexerEngine
|
42
|
-
2. Create a new file named config/initializers/base_indexer.rb
|
43
|
-
3. In this file, add the following line. replace 'MyIndexerClassName' with the fully qualifed actual class name. The name should be between double qoutes
|
44
|
-
BaseIndexer.indexer_class = "MyIndexerClassName"
|
45
|
-
4. In the new indexer class, you can override any of the functions that you need to change its implementation. For example, if you need to use a new mapper, you will override map function.
|
46
|
-
|
47
|
-
To extend mapper functionality.
|
48
|
-
1. Create a new class e.g., MyMapper that inherits from GeneralMapper or IndexMapper.
|
49
|
-
2. Implement MyMapper.map to converts the input to solr doc hash.
|
50
|
-
3. Override MyIndexerClassName.map to call your new class instead of the default one.
|
51
|
-
|
52
|
-
== Rake Tasks For Indexing Druids
|
53
|
-
|
54
|
-
All rake tasks that perform batch indexing will generate log files in the "log" folder within the app itself. You can tail the log file to watch the progress. The
|
55
|
-
log file is also useful since you can pass it to the "reindexer" rake task to retry just the errored out druids. The name of the log file will depend on which
|
56
|
-
rake task you are running, and will be timestamped to be unique.
|
57
|
-
|
58
|
-
==== Index a single druid:
|
59
|
-
|
60
|
-
rake index RAILS_ENV=production target=revs_prod druid=oo000oo0001
|
61
|
-
|
62
|
-
==== Index a list of druids from a pre-assembly run, a remeditaion run, or a simple CSV:
|
63
|
-
|
64
|
-
rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1.yaml log_type=preassembly = preassembly run
|
65
|
-
nohup rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1.yaml log_type=preassembly & = for a long running process, which will be most runs that have more than a few dozen druids, nohup it
|
66
|
-
|
67
|
-
rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1_remediate.yaml log_type=remediate = remediation run
|
68
|
-
|
69
|
-
rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander.csv log_type=csv = a simple csv file -- it must have a header line, with the header of "druid" definining the items you wish to index
|
70
|
-
|
71
|
-
==== Index an entire collection, including the collection itself, along with all of its members (be sure to check the dor-fetcher-url parameter in the Rails environment you are running under to be sure it is connecting where you expect):
|
72
|
-
|
73
|
-
rake collection_indexer RAILS_ENV=production target=revs_prod collection_druid=oo000oo0001
|
74
|
-
nohup rake collection_indexer RAILS_ENV=production target=revs_prod collection_druid=oo000oo0001 & = for a long running process, e.g. a collection with more than a few dozen druids, nohup it
|
75
|
-
|
76
|
-
==== Re-Index Just Errored Out Items
|
77
|
-
|
78
|
-
If you had errors when indexing from a preassembly/remediation log or from indexing an entire collection, you can re-run the errored out druids only with the log file. All log files are kept in the log folder in the revs-indexer-service app.
|
79
|
-
|
80
|
-
rake reindexer RAILS_ENV=production target=revs_prod file=log/logfile.log
|
81
|
-
|
82
|
-
nohup rake reindexer RAILS_ENV=production target=revs_prod file=log/logfile.log & = probably no need to nohup unless there were alot of errors
|
83
|
-
|
84
|
-
|
85
|
-
==== Delete Druids
|
86
|
-
|
87
|
-
Delete a list of druids specified in a CSV/txt file. Be careful, this will delete from all targets! Put one druid per line, no header is necessary.
|
88
|
-
|
89
|
-
rake delete_druids RAILS_ENV=production file=druid_list.txt
|
90
|
-
|
91
|
-
==== Delete a single druid
|
92
|
-
|
93
|
-
rake delete RAILS_ENV=production druid=oo000oo0001
|
@@ -1,46 +0,0 @@
|
|
1
|
-
module BaseIndexer
|
2
|
-
|
3
|
-
# It caches the collection information such as name and catkey
|
4
|
-
class Collection
|
5
|
-
|
6
|
-
def initialize(collection_druid)
|
7
|
-
@collection_druid = collection_druid
|
8
|
-
end
|
9
|
-
|
10
|
-
# Returns the collection name from cache, otherwise will fetch it from PURL.
|
11
|
-
#
|
12
|
-
# @param collection_druid [String] is the druid for a collection e.g., ab123cd4567
|
13
|
-
# @return [Array<String>] the collection data or [] if there is no name and catkey or the object
|
14
|
-
# is not a collection
|
15
|
-
def collection_info
|
16
|
-
from_cache || from_purl || {}
|
17
|
-
end
|
18
|
-
|
19
|
-
private
|
20
|
-
|
21
|
-
# @param [String] collection_druid is the druid for a collection e.g., ab123cd4567
|
22
|
-
# @return [String] return the collection label from cache if available, nil otherwise
|
23
|
-
def from_cache
|
24
|
-
Rails.cache.read(@collection_druid)
|
25
|
-
end
|
26
|
-
|
27
|
-
# @param [String] collection_druid is the druid for a collection e.g., ab123cd4567
|
28
|
-
# @return [String] return the collection label from purl if available, nil otherwise
|
29
|
-
def from_purl
|
30
|
-
return nil unless purl_model
|
31
|
-
return nil unless purl_model.is_collection
|
32
|
-
purl_data = { label: purl_model.label, ckey: purl_model.catkey }
|
33
|
-
Rails.cache.write(@collection_druid, purl_data, expires_in: 1.hours)
|
34
|
-
purl_data
|
35
|
-
end
|
36
|
-
|
37
|
-
def purl_model
|
38
|
-
@purl_model ||= begin
|
39
|
-
DiscoveryIndexer::InputXml::Purlxml.new(@collection_druid).load
|
40
|
-
rescue => e
|
41
|
-
Rails.logger.error "There is a problem in retrieving collection name and/or catkey for #{@collection_druid}. #{e.inspect}\n#{e.message }\n#{e.backtrace}"
|
42
|
-
nil
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|