discovery-indexer 0.6 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/lib/utilities/extract_sub_targets.rb +13 -0
- data/lib/version.rb +1 -1
- data/lib/writer/solr_client.rb +58 -15
- data/lib/writer/solr_writer.rb +1 -1
- metadata +35 -20
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
NjBhYzU1N2VkNjMzMWM0YjA1NGI2MzQ5YWFjY2FkOThjYjVjMTZlMQ==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f6c25de0587885161b4a8fc2882808e21ecaa915
|
4
|
+
data.tar.gz: 74bc83405539956a12749de6de5af8e39b8e2d8a
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
Y2Q5MzAxMGFmMDFhZjQ4YzI5ZTkwMTM3NjEyMGJhMGNjMGE2NmRlN2VhYzM5
|
11
|
-
ZjQ1ZTFkMjg3MWZiNzdkYjczNDU4OTczY2RkODRkNmI0YWE5YTg=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
ZDA5NGIxYzUyOGVkNzU2NmQ1ZDQ3NWUwN2NhZWFkOGRiYzk4NzhjOTE0M2Mz
|
14
|
-
NTM1NjMyYjhmMDg1OTdkZTA2NzY4NWM2Njg3Y2JiMjZhODM2Mzg1MWYyYWEx
|
15
|
-
ZjQzODgxYzEwOGY3ZWQzNTRiZThkZGJhN2ZiMTdiYjczZTIwMGQ=
|
6
|
+
metadata.gz: 012c0f66b119167e989aa977fea213fb61cf4c8b89b96cce59016cdc7918d1b6205e9429cbdeb717f64e1c540b17e54967c59584b8421750bcdf95a2be6e0a56
|
7
|
+
data.tar.gz: 38470801e1d4558ddfd27e8e4dd9933b1617e6a93cd727e897cf668e0b5acb26ba0858538af1fe66c001fc24cbdeebb963eb983d559638183e81e10b89c1e9d1
|
data/lib/version.rb
CHANGED
data/lib/writer/solr_client.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'retries'
|
2
2
|
require 'rsolr'
|
3
|
-
|
3
|
+
require 'rest-client'
|
4
4
|
module DiscoveryIndexer
|
5
5
|
module Writer
|
6
6
|
class SolrClient
|
@@ -8,42 +8,85 @@ module DiscoveryIndexer
|
|
8
8
|
|
9
9
|
# Add the document to solr, retry if an error occurs.
|
10
10
|
# See https://github.com/ooyala/retries for docs on with_retries.
|
11
|
-
# @param [
|
12
|
-
# @param [
|
13
|
-
# @param [
|
11
|
+
# @param id [String] the document id, usually it will be druid.
|
12
|
+
# @param solr_doc [Hash] a Hash representation of the solr document
|
13
|
+
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
14
|
+
# @param max_retries [Integer] the maximum number of tries before fail
|
14
15
|
def self.add(id, solr_doc, solr_connector, max_retries = 10)
|
15
16
|
process(id, solr_doc, solr_connector, max_retries, is_delete=false)
|
16
17
|
end
|
17
18
|
|
18
19
|
# Add the document to solr, retry if an error occurs.
|
19
20
|
# See https://github.com/ooyala/retries for docs on with_retries.
|
20
|
-
# @param [
|
21
|
-
# @param [RSolr::Client]
|
22
|
-
# @param [Integer]
|
23
|
-
def self.delete(id,
|
24
|
-
process(id,
|
21
|
+
# @param id [String] the document id, usually it will be druid.
|
22
|
+
# @param solr_connector[RSolr::Client] is an open connection with the solr core
|
23
|
+
# @param max_retries [Integer] the maximum number of tries before fail
|
24
|
+
def self.delete(id, solr_connector, max_retries = 10)
|
25
|
+
process(id, {}, solr_connector, max_retries, is_delete=true)
|
25
26
|
end
|
26
|
-
|
27
|
+
|
28
|
+
# It's an internal method that receives all the requests and deal with
|
29
|
+
# SOLR core. This method can call add, delete, or update
|
30
|
+
#
|
31
|
+
# @param id [String] the document id, usually it will be druid.
|
32
|
+
# @param solr_doc [Hash] is the solr doc in hash format
|
33
|
+
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
34
|
+
# @param max_retries [Integer] the maximum number of tries before fail
|
27
35
|
def self.process(id, solr_doc, solr_connector, max_retries, is_delete=false)
|
28
36
|
handler = Proc.new do |exception, attempt_number, total_delay|
|
29
37
|
DiscoveryIndexer::Logging.logger.debug "#{exception.class} on attempt #{attempt_number} for #{id}"
|
30
38
|
end
|
31
|
-
|
39
|
+
|
32
40
|
with_retries(:max_tries => max_retries, :handler => handler, :base_sleep_seconds => 1, :max_sleep_seconds => 5) do |attempt|
|
33
41
|
DiscoveryIndexer::Logging.logger.debug "Attempt #{attempt} for #{id}"
|
34
|
-
|
42
|
+
|
35
43
|
if is_delete
|
36
44
|
solr_connector.delete_by_id(id)
|
37
45
|
DiscoveryIndexer::Logging.logger.info "Successfully deleted #{id} on attempt #{attempt}"
|
46
|
+
elsif allow_update?(solr_connector) && doc_exists?(id,solr_connector)
|
47
|
+
update_solr_doc(id,solr_doc,solr_connector)
|
48
|
+
DiscoveryIndexer::Logging.logger.info "Successfully updated #{id} on attempt #{attempt}"
|
38
49
|
else
|
39
50
|
solr_connector.add(solr_doc)
|
40
51
|
DiscoveryIndexer::Logging.logger.info "Successfully indexed #{id} on attempt #{attempt}"
|
41
52
|
end
|
42
|
-
|
53
|
+
solr_connector.commit
|
43
54
|
end
|
44
|
-
solr_connector.commit
|
45
55
|
end
|
46
|
-
|
56
|
+
|
57
|
+
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
58
|
+
# @return [Boolean] true if the solr core allowing update feature
|
59
|
+
def self.allow_update?(solr_connector)
|
60
|
+
return solr_connector.options.include?(:allow_update) ? solr_connector.options[:allow_update] : false
|
61
|
+
end
|
62
|
+
|
63
|
+
# @param id [String] the document id, usually it will be druid.
|
64
|
+
# @param solr_connector [RSolr::Client] is an open connection with the solr core
|
65
|
+
# @return [Boolean] true if the solr doc defined by this id exists
|
66
|
+
def self.doc_exists?(id,solr_connector)
|
67
|
+
response=solr_connector.get 'select', :params=>{:q=>'id:"' + id + '"'}
|
68
|
+
response['response']['numFound'] == 1
|
69
|
+
end
|
70
|
+
|
71
|
+
# It is an internal method that updates the solr doc instead of adding a new one.
|
72
|
+
def self.update_solr_doc(id,solr_doc,solr_connector)
|
73
|
+
# update_solr_doc can't used RSolr because updating hash doc is not supported
|
74
|
+
# so we need to build the json input manually
|
75
|
+
url="#{solr_connector.options[:url]}update?commit=true"
|
76
|
+
params="[{\"id\":\"#{id}\","
|
77
|
+
solr_doc.each do |field_name,new_values|
|
78
|
+
unless field_name == :id
|
79
|
+
params+="\"#{field_name}\":"
|
80
|
+
new_values=[new_values] unless new_values.class==Array
|
81
|
+
new_values = new_values.map {|s| s.to_s.gsub("\\","\\\\\\").gsub('"','\"').strip} # strip leading/trailing spaces and escape quotes for each value
|
82
|
+
params+="{\"set\":[\"#{new_values.join('","')}\"]},"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
params.chomp!(',')
|
86
|
+
params+="}]"
|
87
|
+
RestClient.post url, params,:content_type => :json, :accept=>:json
|
88
|
+
end
|
89
|
+
|
47
90
|
end
|
48
91
|
end
|
49
92
|
end
|
data/lib/writer/solr_writer.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: discovery-indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ahmed AlSum
|
@@ -14,112 +14,126 @@ dependencies:
|
|
14
14
|
name: nokogiri
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: stanford-mods
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: retries
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rsolr
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rest-client
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: rspec
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
72
86
|
requirements:
|
73
|
-
- -
|
87
|
+
- - ">="
|
74
88
|
- !ruby/object:Gem::Version
|
75
89
|
version: '0'
|
76
90
|
type: :development
|
77
91
|
prerelease: false
|
78
92
|
version_requirements: !ruby/object:Gem::Requirement
|
79
93
|
requirements:
|
80
|
-
- -
|
94
|
+
- - ">="
|
81
95
|
- !ruby/object:Gem::Version
|
82
96
|
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: webmock
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
86
100
|
requirements:
|
87
|
-
- -
|
101
|
+
- - ">="
|
88
102
|
- !ruby/object:Gem::Version
|
89
103
|
version: '0'
|
90
104
|
type: :development
|
91
105
|
prerelease: false
|
92
106
|
version_requirements: !ruby/object:Gem::Requirement
|
93
107
|
requirements:
|
94
|
-
- -
|
108
|
+
- - ">="
|
95
109
|
- !ruby/object:Gem::Version
|
96
110
|
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: equivalent-xml
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
100
114
|
requirements:
|
101
|
-
- -
|
115
|
+
- - ">="
|
102
116
|
- !ruby/object:Gem::Version
|
103
117
|
version: '0'
|
104
118
|
type: :development
|
105
119
|
prerelease: false
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
107
121
|
requirements:
|
108
|
-
- -
|
122
|
+
- - ">="
|
109
123
|
- !ruby/object:Gem::Version
|
110
124
|
version: '0'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: vcr
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
114
128
|
requirements:
|
115
|
-
- -
|
129
|
+
- - ">="
|
116
130
|
- !ruby/object:Gem::Version
|
117
131
|
version: '0'
|
118
132
|
type: :development
|
119
133
|
prerelease: false
|
120
134
|
version_requirements: !ruby/object:Gem::Requirement
|
121
135
|
requirements:
|
122
|
-
- -
|
136
|
+
- - ">="
|
123
137
|
- !ruby/object:Gem::Version
|
124
138
|
version: '0'
|
125
139
|
description: This library manages the core operations for the discovery indexing such
|
@@ -140,6 +154,7 @@ files:
|
|
140
154
|
- lib/reader/purlxml_parser.rb
|
141
155
|
- lib/reader/purlxml_parser_strict.rb
|
142
156
|
- lib/reader/purlxml_reader.rb
|
157
|
+
- lib/utilities/extract_sub_targets.rb
|
143
158
|
- lib/version.rb
|
144
159
|
- lib/writer/solr_client.rb
|
145
160
|
- lib/writer/solr_writer.rb
|
@@ -153,17 +168,17 @@ require_paths:
|
|
153
168
|
- lib
|
154
169
|
required_ruby_version: !ruby/object:Gem::Requirement
|
155
170
|
requirements:
|
156
|
-
- -
|
171
|
+
- - ">="
|
157
172
|
- !ruby/object:Gem::Version
|
158
173
|
version: '0'
|
159
174
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
160
175
|
requirements:
|
161
|
-
- -
|
176
|
+
- - ">="
|
162
177
|
- !ruby/object:Gem::Version
|
163
178
|
version: '0'
|
164
179
|
requirements: []
|
165
180
|
rubyforge_project:
|
166
|
-
rubygems_version: 2.
|
181
|
+
rubygems_version: 2.2.2
|
167
182
|
signing_key:
|
168
183
|
specification_version: 4
|
169
184
|
summary: Shared library for the basic discovery indexing operation for Stanford DLSS.
|