discovery-indexer 0.10.1 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/discovery-indexer/collection.rb +48 -0
- data/lib/{errors.rb → discovery-indexer/errors.rb} +0 -0
- data/lib/discovery-indexer/general_mapper.rb +44 -0
- data/lib/{logging.rb → discovery-indexer/logging.rb} +0 -0
- data/lib/{reader → discovery-indexer/reader}/modsxml.rb +0 -9
- data/lib/{reader → discovery-indexer/reader}/modsxml_reader.rb +0 -0
- data/lib/{reader → discovery-indexer/reader}/purlxml.rb +1 -11
- data/lib/{reader → discovery-indexer/reader}/purlxml_model.rb +0 -0
- data/lib/{reader → discovery-indexer/reader}/purlxml_parser_strict.rb +8 -6
- data/lib/{reader → discovery-indexer/reader}/purlxml_reader.rb +1 -3
- data/lib/{version.rb → discovery-indexer/version.rb} +1 -1
- data/lib/discovery-indexer.rb +10 -13
- metadata +27 -15
- data/lib/mapper/general_mapper.rb +0 -27
- data/lib/reader/purlxml_parser.rb +0 -13
- data/lib/writer/solr_client.rb +0 -113
- data/lib/writer/solr_writer.rb +0 -54
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 94eb6c9bdbd29fc02f9aece9351e6c4af77a59b1
|
4
|
+
data.tar.gz: bb54745bb7c03fb7a60559e55cc7804db706cd8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80a631460ec997ab2c92b90836bca19ff8a4fc12ab70f7bbd70684cda9a152f29aa8a1d6c6431bb914a2ffc8a8ea7af7cad9196c00f6dda902dcdaaace1a8202
|
7
|
+
data.tar.gz: e02780cf225013328439cbe55b4c890ceb6fba82e77b9f3dfe98a617aaa591c310bc7f97365b1f0dfe4a11c16400e7484dd0b03222a7e59edaa56d5442521dfb
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module DiscoveryIndexer
|
2
|
+
|
3
|
+
# It caches the collection information such as name and catkey
|
4
|
+
class Collection
|
5
|
+
|
6
|
+
attr_reader :druid
|
7
|
+
delegate :present?, to: :collection_info
|
8
|
+
|
9
|
+
def initialize(druid)
|
10
|
+
@druid = druid
|
11
|
+
end
|
12
|
+
|
13
|
+
def searchworks_id
|
14
|
+
collection_info[:ckey] || druid
|
15
|
+
end
|
16
|
+
|
17
|
+
def title
|
18
|
+
collection_info[:title]
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
# Returns the collection name from cache, otherwise will fetch it from PURL.
|
24
|
+
#
|
25
|
+
# @param collection_druid [String] is the druid for a collection e.g., ab123cd4567
|
26
|
+
# @return [Array<String>] the collection data or [] if there is no name and catkey or the object
|
27
|
+
# is not a collection
|
28
|
+
def collection_info
|
29
|
+
from_purl || {}
|
30
|
+
end
|
31
|
+
|
32
|
+
# @param [String] collection_druid is the druid for a collection e.g., ab123cd4567
|
33
|
+
# @return [String] return the collection label from purl if available, nil otherwise
|
34
|
+
def from_purl
|
35
|
+
return unless purl_model
|
36
|
+
{ title: purl_model.label, ckey: purl_model.catkey }
|
37
|
+
end
|
38
|
+
|
39
|
+
def purl_model
|
40
|
+
@purl_model ||= begin
|
41
|
+
DiscoveryIndexer::InputXml::Purlxml.new(druid).load
|
42
|
+
rescue => e
|
43
|
+
DiscoveryIndexer::Logging.logger.error "There is a problem in retrieving collection name and/or catkey for #{druid}. #{e.inspect}\n#{e.message }\n#{e.backtrace}"
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
File without changes
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module DiscoveryIndexer
|
2
|
+
class GeneralMapper
|
3
|
+
|
4
|
+
attr_reader :druid
|
5
|
+
|
6
|
+
# Initializes an instance from IndexMapper
|
7
|
+
# @param [String] druid e.g. ab123cd4567
|
8
|
+
# @param [Stanford::Mods::Record] modsxml represents the MODS xml for the druid
|
9
|
+
# @param [DiscoveryIndexer::Reader::PurlxmlModel] purlxml represents the purlxml model
|
10
|
+
# @param [Hash] collection_data represents a hash of collection_druid and catkey
|
11
|
+
# collection_data = {'aa00bb0001'=>{:name=>'Test Collection Name',:ckey=>'000001'},'nt028fd5773'=>{:name=>'Revs Institute Archive',:ckey=>'000002'}}
|
12
|
+
def initialize(druid)
|
13
|
+
@druid = druid
|
14
|
+
end
|
15
|
+
|
16
|
+
# Create a Hash representing a Solr doc, with all MODS related fields populated.
|
17
|
+
# @return [Hash] Hash representing the Solr document
|
18
|
+
def convert_to_solr_doc
|
19
|
+
solr_doc = {}
|
20
|
+
solr_doc[:id] = druid
|
21
|
+
solr_doc[:title] = modsxml.sw_full_title
|
22
|
+
solr_doc
|
23
|
+
end
|
24
|
+
|
25
|
+
# It converts collection_druids list to a hash with names. If the druid doesn't
|
26
|
+
# have a collection name, it will be excluded from the hash
|
27
|
+
# @return [Hash] a hash for collection druid and its name
|
28
|
+
# !{"ab123cd4567"=>"Collection 1", "ef123gh4567"=>"Collection 2"}
|
29
|
+
def collection_data
|
30
|
+
@collection_data ||= collection_druids.map do |cdruid|
|
31
|
+
DiscoveryIndexer::Collection.new(cdruid)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
def collection_druids
|
35
|
+
purlxml.collection_druids
|
36
|
+
end
|
37
|
+
def modsxml
|
38
|
+
@modsxml ||= DiscoveryIndexer::InputXml::Modsxml.new(druid).load
|
39
|
+
end
|
40
|
+
def purlxml
|
41
|
+
@purlxml ||= DiscoveryIndexer::InputXml::Purlxml.new(druid).load
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
File without changes
|
@@ -25,15 +25,6 @@ module DiscoveryIndexer
|
|
25
25
|
|
26
26
|
modsxml_model = Stanford::Mods::Record.new
|
27
27
|
modsxml_model.from_nk_node(@modsxml_ng_doc)
|
28
|
-
modsxml_model
|
29
|
-
end
|
30
|
-
|
31
|
-
# loads the mods xml to stanford mods model for the fedora object defind in the druid,
|
32
|
-
# it reads the mods xml from PURL server with every call
|
33
|
-
# @return [Stanford::Mods::Record] represents the mods xml
|
34
|
-
def reload
|
35
|
-
@modsxml_ng_doc = ModsxmlReader.read(@druid)
|
36
|
-
load
|
37
28
|
end
|
38
29
|
end
|
39
30
|
end
|
File without changes
|
@@ -19,17 +19,7 @@ module DiscoveryIndexer
|
|
19
19
|
# @return [PurlxmlModel] represents the purlxml
|
20
20
|
def load
|
21
21
|
@purlxml_ng_doc = PurlxmlReader.read(@druid) if @purlxml_ng_doc.nil?
|
22
|
-
purlxml_parser = PurlxmlParserStrict.new(@druid, @purlxml_ng_doc)
|
23
|
-
purlxml_model = purlxml_parser.parse
|
24
|
-
purlxml_model
|
25
|
-
end
|
26
|
-
|
27
|
-
# loads the purl xml to purlxml model for the fedora object defind in the druid
|
28
|
-
# it reads the purl xml from PURL server with every call
|
29
|
-
# @return [PurlxmlModel] represents the purlxml
|
30
|
-
def reload
|
31
|
-
@purlxml_ng_doc = PurlxmlReader.read(@druid)
|
32
|
-
load
|
22
|
+
purlxml_parser = PurlxmlParserStrict.new(@druid, @purlxml_ng_doc).parse
|
33
23
|
end
|
34
24
|
end
|
35
25
|
end
|
File without changes
|
@@ -1,12 +1,17 @@
|
|
1
1
|
module DiscoveryIndexer
|
2
2
|
module InputXml
|
3
|
-
class PurlxmlParserStrict
|
3
|
+
class PurlxmlParserStrict
|
4
4
|
include DiscoveryIndexer::Logging
|
5
5
|
|
6
6
|
RDF_NAMESPACE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
7
7
|
OAI_DC_NAMESPACE = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
|
8
8
|
MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
|
9
9
|
|
10
|
+
def initialize(druid, purlxml_ng_doc)
|
11
|
+
@purlxml_ng_doc = purlxml_ng_doc
|
12
|
+
@druid = druid
|
13
|
+
end
|
14
|
+
|
10
15
|
# it parses the purlxml into a purlxml model
|
11
16
|
# @return [PurlxmlModel] represents the purlxml as parsed based on the parser rules
|
12
17
|
def parse
|
@@ -117,12 +122,9 @@ module DiscoveryIndexer
|
|
117
122
|
ns_hash = { 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'fedora' => 'info:fedora/fedora-system:def/relations-external#', '' => '' }
|
118
123
|
is_member_of_nodes ||= @purlxml_ng_doc.xpath('/publicObject/rdf:RDF/rdf:Description/fedora:isMemberOfCollection/@rdf:resource', ns_hash)
|
119
124
|
# from public_xml rels-ext
|
120
|
-
|
121
|
-
|
122
|
-
druids << n.value.split('druid:').last unless n.value.empty?
|
125
|
+
is_member_of_nodes.reject { |n| n.value.empty? }.map do |n|
|
126
|
+
n.value.split('druid:').last
|
123
127
|
end
|
124
|
-
return nil if druids.empty?
|
125
|
-
druids
|
126
128
|
end
|
127
129
|
|
128
130
|
# the value of the type attribute for a DOR object's contentMetadata
|
@@ -9,10 +9,8 @@ module DiscoveryIndexer
|
|
9
9
|
# @raise [MissingPublicXml] if there's no purl xml available for this druid
|
10
10
|
def self.read(druid)
|
11
11
|
purlxml_uri = "#{DiscoveryIndexer::PURL_DEFAULT}/#{druid}.xml"
|
12
|
-
|
13
12
|
begin
|
14
|
-
|
15
|
-
return purlxml_object
|
13
|
+
Nokogiri::XML(open(purlxml_uri))
|
16
14
|
rescue
|
17
15
|
raise DiscoveryIndexer::Errors::MissingPurlPage.new(purlxml_uri)
|
18
16
|
end
|
data/lib/discovery-indexer.rb
CHANGED
@@ -1,19 +1,16 @@
|
|
1
|
-
require 'errors'
|
2
|
-
require 'logging'
|
1
|
+
require 'discovery-indexer/errors'
|
2
|
+
require 'discovery-indexer/logging'
|
3
3
|
|
4
|
-
require 'reader/purlxml'
|
5
|
-
require 'reader/purlxml_reader'
|
6
|
-
require 'reader/
|
7
|
-
require 'reader/
|
8
|
-
require 'reader/purlxml_model'
|
4
|
+
require 'discovery-indexer/reader/purlxml'
|
5
|
+
require 'discovery-indexer/reader/purlxml_reader'
|
6
|
+
require 'discovery-indexer/reader/purlxml_parser_strict'
|
7
|
+
require 'discovery-indexer/reader/purlxml_model'
|
9
8
|
|
10
|
-
require 'reader/modsxml'
|
11
|
-
require 'reader/modsxml_reader'
|
9
|
+
require 'discovery-indexer/reader/modsxml'
|
10
|
+
require 'discovery-indexer/reader/modsxml_reader'
|
12
11
|
|
13
|
-
require '
|
14
|
-
|
15
|
-
require 'writer/solr_client'
|
16
|
-
require 'writer/solr_writer'
|
12
|
+
require 'discovery-indexer/general_mapper'
|
13
|
+
require 'discovery-indexer/collection'
|
17
14
|
|
18
15
|
# require 'utilities/extract_sub_targets'
|
19
16
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: discovery-indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ahmed AlSum
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-12-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -81,6 +81,20 @@ dependencies:
|
|
81
81
|
- - ">="
|
82
82
|
- !ruby/object:Gem::Version
|
83
83
|
version: '0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: rake
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
84
98
|
- !ruby/object:Gem::Dependency
|
85
99
|
name: rspec
|
86
100
|
requirement: !ruby/object:Gem::Requirement
|
@@ -145,19 +159,17 @@ extensions: []
|
|
145
159
|
extra_rdoc_files: []
|
146
160
|
files:
|
147
161
|
- lib/discovery-indexer.rb
|
148
|
-
- lib/
|
149
|
-
- lib/
|
150
|
-
- lib/
|
151
|
-
- lib/
|
152
|
-
- lib/reader/
|
153
|
-
- lib/reader/
|
154
|
-
- lib/reader/
|
155
|
-
- lib/reader/
|
156
|
-
- lib/reader/purlxml_parser_strict.rb
|
157
|
-
- lib/reader/purlxml_reader.rb
|
158
|
-
- lib/version.rb
|
159
|
-
- lib/writer/solr_client.rb
|
160
|
-
- lib/writer/solr_writer.rb
|
162
|
+
- lib/discovery-indexer/collection.rb
|
163
|
+
- lib/discovery-indexer/errors.rb
|
164
|
+
- lib/discovery-indexer/general_mapper.rb
|
165
|
+
- lib/discovery-indexer/logging.rb
|
166
|
+
- lib/discovery-indexer/reader/modsxml.rb
|
167
|
+
- lib/discovery-indexer/reader/modsxml_reader.rb
|
168
|
+
- lib/discovery-indexer/reader/purlxml.rb
|
169
|
+
- lib/discovery-indexer/reader/purlxml_model.rb
|
170
|
+
- lib/discovery-indexer/reader/purlxml_parser_strict.rb
|
171
|
+
- lib/discovery-indexer/reader/purlxml_reader.rb
|
172
|
+
- lib/discovery-indexer/version.rb
|
161
173
|
homepage:
|
162
174
|
licenses:
|
163
175
|
- Stanford University
|
@@ -1,27 +0,0 @@
|
|
1
|
-
module DiscoveryIndexer
|
2
|
-
module Mapper
|
3
|
-
class GeneralMapper
|
4
|
-
# Initializes an instance from IndexMapper
|
5
|
-
# @param [String] druid e.g. ab123cd4567
|
6
|
-
# @param [Stanford::Mods::Record] modsxml represents the MODS xml for the druid
|
7
|
-
# @param [DiscoveryIndexer::Reader::PurlxmlModel] purlxml represents the purlxml model
|
8
|
-
# @param [Hash] collection_data represents a hash of collection_druid and catkey
|
9
|
-
# e.g. @collection_data = {'aa00bb0001'=>{:name=>'Test Collection Name',:ckey=>'000001'},'nt028fd5773'=>{:name=>'Revs Institute Archive',:ckey=>'000002'}}
|
10
|
-
def initialize(druid, modsxml, purlxml, collection_data = {})
|
11
|
-
@druid = druid
|
12
|
-
@modsxml = modsxml
|
13
|
-
@purlxml = purlxml
|
14
|
-
@collection_data = collection_data
|
15
|
-
end
|
16
|
-
|
17
|
-
# Create a Hash representing a Solr doc, with all MODS related fields populated.
|
18
|
-
# @return [Hash] Hash representing the Solr document
|
19
|
-
def convert_to_solr_doc
|
20
|
-
solr_doc = {}
|
21
|
-
solr_doc[:id] = @druid
|
22
|
-
solr_doc[:title] = @modsxml.sw_full_title
|
23
|
-
solr_doc
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
data/lib/writer/solr_client.rb
DELETED
@@ -1,113 +0,0 @@
|
|
1
|
-
require 'retries'
|
2
|
-
require 'rsolr'
|
3
|
-
require 'rest-client'
|
4
|
-
module DiscoveryIndexer
|
5
|
-
module Writer
|
6
|
-
# Processes adds and deletes to the solr core
|
7
|
-
class SolrClient
|
8
|
-
include DiscoveryIndexer::Logging
|
9
|
-
|
10
|
-
# Add the document to solr, retry if an error occurs.
|
11
|
-
# See https://github.com/ooyala/retries for docs on with_retries.
|
12
|
-
# @param id [String] the document id, usually it will be druid.
|
13
|
-
# @param solr_doc [Hash] a Hash representation of the solr document
|
14
|
-
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
15
|
-
# @param max_retries [Integer] the maximum number of tries before fail
|
16
|
-
def self.add(id, solr_doc, solr_connector, max_retries = 10)
|
17
|
-
process(id, solr_doc, solr_connector, max_retries, false)
|
18
|
-
end
|
19
|
-
|
20
|
-
# Add the document to solr, retry if an error occurs.
|
21
|
-
# See https://github.com/ooyala/retries for docs on with_retries.
|
22
|
-
# @param id [String] the document id, usually it will be druid.
|
23
|
-
# @param solr_connector[RSolr::Client] is an open connection with the solr core
|
24
|
-
# @param max_retries [Integer] the maximum number of tries before fail
|
25
|
-
def self.delete(id, solr_connector, max_retries = 10)
|
26
|
-
process(id, {}, solr_connector, max_retries, true)
|
27
|
-
end
|
28
|
-
|
29
|
-
# It's an internal method that receives all the requests and deal with
|
30
|
-
# SOLR core. This method can call add, delete, or update
|
31
|
-
#
|
32
|
-
# @param id [String] the document id, usually it will be druid.
|
33
|
-
# @param solr_doc [Hash] is the solr doc in hash format
|
34
|
-
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
35
|
-
# @param max_retries [Integer] the maximum number of tries before fail
|
36
|
-
def self.process(id, solr_doc, solr_connector, max_retries, is_delete = false)
|
37
|
-
handler = proc do |exception, attempt_number, _total_delay|
|
38
|
-
DiscoveryIndexer::Logging.logger.debug "#{exception.class} on attempt #{attempt_number} for #{id}"
|
39
|
-
end
|
40
|
-
|
41
|
-
with_retries(max_tries: max_retries, handler: handler, base_sleep_seconds: 1, max_sleep_seconds: 5) do |attempt|
|
42
|
-
DiscoveryIndexer::Logging.logger.debug "Attempt #{attempt} for #{id}"
|
43
|
-
|
44
|
-
if is_delete
|
45
|
-
DiscoveryIndexer::Logging.logger.info "Deleting #{id} on attempt #{attempt}"
|
46
|
-
solr_connector.delete_by_id(id, :add_attributes => {:commitWithin => 10000})
|
47
|
-
elsif allow_update?(solr_connector) && doc_exists?(id, solr_connector)
|
48
|
-
DiscoveryIndexer::Logging.logger.info "Updating #{id} on attempt #{attempt}"
|
49
|
-
update_solr_doc(id, solr_doc, solr_connector)
|
50
|
-
else
|
51
|
-
DiscoveryIndexer::Logging.logger.info "Indexing #{id} on attempt #{attempt}"
|
52
|
-
solr_connector.add(solr_doc, :add_attributes => {:commitWithin => 10000})
|
53
|
-
end
|
54
|
-
#solr_connector.commit
|
55
|
-
DiscoveryIndexer::Logging.logger.info "Completing #{id} successfully on attempt #{attempt}"
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
60
|
-
# @return [Boolean] true if the solr core allowing update feature
|
61
|
-
def self.allow_update?(solr_connector)
|
62
|
-
solr_connector.options.include?(:allow_update) ? solr_connector.options[:allow_update] : false
|
63
|
-
end
|
64
|
-
|
65
|
-
# @param id [String] the document id, usually it will be druid.
|
66
|
-
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
67
|
-
# @return [Boolean] true if the solr doc defined by this id exists
|
68
|
-
def self.doc_exists?(id, solr_connector)
|
69
|
-
response = solr_connector.get 'select', params: { q: 'id:"' + id + '"' }
|
70
|
-
response['response']['numFound'] == 1
|
71
|
-
end
|
72
|
-
|
73
|
-
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
74
|
-
# send hard commit to solr
|
75
|
-
def self.commit(solr_connector)
|
76
|
-
RestClient.post self.solr_url(solr_connector), {},:content_type => :json, :accept=>:json
|
77
|
-
end
|
78
|
-
|
79
|
-
# It is an internal method that updates the solr doc instead of adding a new one.
|
80
|
-
# @param id [String] the document id, usually it will be druid.
|
81
|
-
# @param solr_doc [Hash] is the solr doc in hash format
|
82
|
-
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
83
|
-
def self.update_solr_doc(id, solr_doc, solr_connector)
|
84
|
-
# update_solr_doc can't used RSolr because updating hash doc is not supported
|
85
|
-
# so we need to build the json input manually
|
86
|
-
params = "[{\"id\":\"#{id}\","
|
87
|
-
solr_doc.each do |field_name, new_values|
|
88
|
-
next if field_name == :id
|
89
|
-
params += "\"#{field_name}\":"
|
90
|
-
new_values = [new_values] unless new_values.class == Array
|
91
|
-
new_values = new_values.map { |s| s.to_s.gsub('\\', '\\\\\\').gsub('"', '\"').strip } # strip leading/trailing spaces and escape quotes for each value
|
92
|
-
params += "{\"set\":[\"#{new_values.join('","')}\"]},"
|
93
|
-
end
|
94
|
-
params.chomp!(',')
|
95
|
-
params += '}]'
|
96
|
-
RestClient.post self.solr_url(solr_connector), params, content_type: :json, accept: :json
|
97
|
-
end
|
98
|
-
|
99
|
-
# adjust the solr_url so it works with or without a trailing /
|
100
|
-
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
101
|
-
# @return [String] the solr URL
|
102
|
-
def self.solr_url(solr_connector)
|
103
|
-
solr_url = solr_connector.options[:url]
|
104
|
-
if solr_url.end_with?('/')
|
105
|
-
"#{solr_url}update?commit=true"
|
106
|
-
else
|
107
|
-
"#{solr_url}/update?commit=true"
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
data/lib/writer/solr_writer.rb
DELETED
@@ -1,54 +0,0 @@
|
|
1
|
-
require 'retries'
|
2
|
-
require 'rsolr'
|
3
|
-
|
4
|
-
module DiscoveryIndexer
|
5
|
-
module Writer
|
6
|
-
# Performs writes to solr client based upon true and false release flags
|
7
|
-
class SolrWriter
|
8
|
-
attr_reader :solr_targets_configs
|
9
|
-
|
10
|
-
include DiscoveryIndexer::Logging
|
11
|
-
|
12
|
-
def process(id, index_doc, targets, targets_configs)
|
13
|
-
@solr_targets_configs = targets_configs
|
14
|
-
index_targets = targets.select { |_, b| b }.keys
|
15
|
-
delete_targets = targets.reject { |_, b| b }.keys
|
16
|
-
|
17
|
-
# get targets with true
|
18
|
-
solr_index_client(id, index_doc, index_targets) if index_targets.present?
|
19
|
-
# get targets with false
|
20
|
-
solr_delete_client(id, delete_targets) if delete_targets.present?
|
21
|
-
end
|
22
|
-
|
23
|
-
def solr_delete_from_all(id, targets_configs)
|
24
|
-
# Get a list of all registered targets
|
25
|
-
@solr_targets_configs = targets_configs
|
26
|
-
targets = solr_targets_configs.keys
|
27
|
-
solr_delete_client(id, targets)
|
28
|
-
end
|
29
|
-
|
30
|
-
def solr_index_client(id, index_doc, targets)
|
31
|
-
targets.each do |solr_target|
|
32
|
-
solr_connector = get_connector_for_target(solr_target)
|
33
|
-
SolrClient.add(id, index_doc, solr_connector) unless solr_connector.nil?
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
def solr_delete_client(id, targets)
|
38
|
-
targets.each do |solr_target|
|
39
|
-
solr_connector = get_connector_for_target(solr_target)
|
40
|
-
SolrClient.delete(id, solr_connector) unless solr_connector.nil?
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def get_connector_for_target(solr_target)
|
45
|
-
solr_connector = nil
|
46
|
-
if solr_targets_configs.keys.include?(solr_target)
|
47
|
-
config = solr_targets_configs[solr_target]
|
48
|
-
solr_connector = RSolr.connect(config.deep_symbolize_keys)
|
49
|
-
end
|
50
|
-
solr_connector
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|