gdor-indexer 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4724a7e6363dc6d287c4bf1bf6033e000930bf90
4
- data.tar.gz: 72fbd1746a114c918ebc9dd838f9674f7fddb8b0
3
+ metadata.gz: d023031326d6543f222881f79d0a102a6a27f0e9
4
+ data.tar.gz: 5dd3419e6e38601413f737b3b3011ce3e9baecd9
5
5
  SHA512:
6
- metadata.gz: 88b7024e0faf8b01d0091b6f53e1683d9f9fc9ba6602d4905d2078e682d7cdb284c95e63d362698ef22709961f313f002ec483dcad43f5f4e224149e6269d339
7
- data.tar.gz: 8b9b76607dabc0f1409a66b37efb1fd0e07c8afc29a4c36d65a3f144b60b6b531afb3251512a97cd75a47164ce8b599cdf79a0d12fd795e2f85c8385983fbb21
6
+ metadata.gz: f182f105f741f8c21bb15c369573b6c3da905b28ef8e0d8920a421dcdacce4beea22891fcbc78dca2c60690c087e800f14f986749b657c218e88fe93247ce072
7
+ data.tar.gz: 882369a75b3c62d0176746241f60e38204573644855514bc4fc5764ab3fcf97c3758bf7efcdc0fe8bd579102ab355255dfe76ec4730a6c69bffcfdab9b910e76
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
@@ -1,29 +1,11 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2016-01-13 15:12:03 -0800 using RuboCop version 0.34.2.
3
+ # on 2016-07-20 15:53:51 -0700 using RuboCop version 0.41.2.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 6
10
- Lint/AmbiguousRegexpLiteral:
11
- Exclude:
12
- - 'spec/unit/indexer_spec.rb'
13
-
14
- # Offense count: 1
15
- # Cop supports --auto-correct.
16
- # Configuration parameters: AlignWith, SupportedStyles, AutoCorrect.
17
- Lint/EndAlignment:
18
- Enabled: false
19
-
20
- # Offense count: 4
21
- Lint/UselessAssignment:
22
- Exclude:
23
- - 'lib/gdor/indexer.rb'
24
- - 'spec/unit/indexer_spec.rb'
25
- - 'spec/unit/solr_doc_builder_spec.rb'
26
-
27
9
  # Offense count: 21
28
10
  Metrics/AbcSize:
29
11
  Max: 82
@@ -31,18 +13,19 @@ Metrics/AbcSize:
31
13
  # Offense count: 1
32
14
  # Configuration parameters: CountComments.
33
15
  Metrics/ClassLength:
34
- Max: 243
16
+ Max: 240
35
17
 
36
18
  # Offense count: 5
37
19
  Metrics/CyclomaticComplexity:
38
20
  Max: 9
39
21
 
40
- # Offense count: 318
41
- # Configuration parameters: AllowURI, URISchemes.
22
+ # Offense count: 314
23
+ # Configuration parameters: AllowHeredoc, AllowURI, URISchemes.
24
+ # URISchemes: http, https
42
25
  Metrics/LineLength:
43
26
  Max: 258
44
27
 
45
- # Offense count: 15
28
+ # Offense count: 13
46
29
  # Configuration parameters: CountComments.
47
30
  Metrics/MethodLength:
48
31
  Max: 43
@@ -51,6 +34,23 @@ Metrics/MethodLength:
51
34
  Metrics/PerceivedComplexity:
52
35
  Max: 10
53
36
 
37
+ # Offense count: 15
38
+ RSpec/AnyInstance:
39
+ Exclude:
40
+ - 'spec/unit/indexer_spec.rb'
41
+ - 'spec/unit/solr_doc_hash_spec.rb'
42
+
43
+ # Offense count: 43
44
+ # Configuration parameters: Max.
45
+ RSpec/ExampleLength:
46
+ Exclude:
47
+ - 'spec/unit/indexer_spec.rb'
48
+ - 'spec/unit/mods_fields_spec.rb'
49
+ - 'spec/unit/mods_pub_fields_spec.rb'
50
+ - 'spec/unit/mods_subject_fields_spec.rb'
51
+ - 'spec/unit/public_xml_fields_spec.rb'
52
+ - 'spec/unit/solr_doc_hash_spec.rb'
53
+
54
54
  # Offense count: 7
55
55
  # Configuration parameters: CustomTransform.
56
56
  RSpec/FilePath:
@@ -63,22 +63,30 @@ RSpec/FilePath:
63
63
  - 'spec/unit/solr_doc_builder_spec.rb'
64
64
  - 'spec/unit/solr_doc_hash_spec.rb'
65
65
 
66
- # Offense count: 193
66
+ # Offense count: 163
67
67
  RSpec/InstanceVariable:
68
68
  Exclude:
69
69
  - 'spec/unit/indexer_spec.rb'
70
70
  - 'spec/unit/public_xml_fields_spec.rb'
71
71
  - 'spec/unit/solr_doc_builder_spec.rb'
72
72
 
73
+ # Offense count: 7
74
+ # Configuration parameters: IgnoreSymbolicNames.
75
+ RSpec/VerifiedDoubles:
76
+ Exclude:
77
+ - 'spec/unit/indexer_spec.rb'
78
+
73
79
  # Offense count: 7
74
80
  # Cop supports --auto-correct.
75
81
  # Configuration parameters: EnforcedStyle, SupportedStyles.
82
+ # SupportedStyles: braces, no_braces, context_dependent
76
83
  Style/BracesAroundHashParameters:
77
84
  Exclude:
78
85
  - 'spec/unit/indexer_spec.rb'
79
86
 
80
87
  # Offense count: 4
81
88
  # Configuration parameters: EnforcedStyle, SupportedStyles.
89
+ # SupportedStyles: nested, compact
82
90
  Style/ClassAndModuleChildren:
83
91
  Exclude:
84
92
  - 'lib/gdor/indexer/mods_fields.rb'
@@ -86,54 +94,28 @@ Style/ClassAndModuleChildren:
86
94
  - 'lib/gdor/indexer/solr_doc_builder.rb'
87
95
  - 'lib/gdor/indexer/solr_doc_hash.rb'
88
96
 
89
- # Offense count: 4
90
- # Configuration parameters: Exclude.
91
- Style/Documentation:
92
- Exclude:
93
- - 'lib/gdor/indexer.rb'
94
- - 'lib/gdor/indexer/nokogiri_xml_node_mixin.rb'
95
- - 'lib/gdor/indexer/solr_doc_hash.rb'
96
- - 'lib/gdor/indexer/version.rb'
97
-
98
- # Offense count: 1
99
- Style/DoubleNegation:
100
- Exclude:
101
- - 'lib/gdor/indexer/solr_doc_hash.rb'
102
-
103
97
  # Offense count: 1
104
98
  # Cop supports --auto-correct.
105
99
  # Configuration parameters: EnforcedStyle, SupportedStyles.
100
+ # SupportedStyles: empty_lines, no_empty_lines
106
101
  Style/EmptyLinesAroundBlockBody:
107
102
  Exclude:
108
103
  - 'spec/unit/mods_pub_fields_spec.rb'
109
104
 
110
- # Offense count: 3
111
- # Configuration parameters: MinBodyLength.
112
- Style/GuardClause:
113
- Exclude:
114
- - 'lib/gdor/indexer.rb'
115
-
116
- # Offense count: 21
105
+ # Offense count: 22
117
106
  # Cop supports --auto-correct.
118
- # Configuration parameters: EnforcedStyle, SupportedStyles.
107
+ # Configuration parameters: EnforcedStyle, SupportedStyles, IndentationWidth.
108
+ # SupportedStyles: aligned, indented
119
109
  Style/MultilineOperationIndentation:
120
- Enabled: false
121
-
122
- # Offense count: 1
123
- # Configuration parameters: NamePrefix, NamePrefixBlacklist.
124
- Style/PredicateName:
125
110
  Exclude:
126
- - 'lib/gdor/indexer/mods_fields.rb'
111
+ - 'Rakefile'
112
+ - 'spec/unit/mods_pub_fields_spec.rb'
127
113
 
128
114
  # Offense count: 2
129
115
  # Cop supports --auto-correct.
130
116
  # Configuration parameters: EnforcedStyle, SupportedStyles, AllowInnerSlashes.
117
+ # SupportedStyles: slashes, percent_r, mixed
131
118
  Style/RegexpLiteral:
132
119
  Exclude:
133
120
  - 'spec/unit/indexer_spec.rb'
134
121
  - 'spec/unit/mods_subject_fields_spec.rb'
135
-
136
- # Offense count: 1
137
- Style/UnlessElse:
138
- Exclude:
139
- - 'lib/gdor/indexer.rb'
@@ -0,0 +1,11 @@
1
+ sudo: false
2
+ language: ruby
3
+ cache: bundler
4
+ notifications:
5
+ email: false
6
+ rvm:
7
+ - 2.3.1
8
+ env:
9
+ global:
10
+ - NOKOGIRI_USE_SYSTEM_LIBRARIES=true
11
+ jdk: oraclejdk8
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- [![Dependency Status](https://gemnasium.com/sul-dlss/gdor-indexer.svg)](https://gemnasium.com/sul-dlss/gdor-indexer) [![Gem Version](https://badge.fury.io/rb/gdor-indexer.svg)](http://badge.fury.io/rb/gdor-indexer)
1
+ [![Build Status](https://travis-ci.org/sul-dlss/gdor-indexer.svg)](https://travis-ci.org/sul-dlss/gdor-indexer) [![Coverage Status](https://coveralls.io/repos/sul-dlss/gdor-indexer/badge.svg?branch=master&service=github)](https://coveralls.io/github/sul-dlss/gdor-indexer?branch=master) [![Dependency Status](https://gemnasium.com/sul-dlss/gdor-indexer.svg)](https://gemnasium.com/sul-dlss/gdor-indexer) [![Gem Version](https://badge.fury.io/rb/gdor-indexer.svg)](http://badge.fury.io/rb/gdor-indexer)
2
2
 
3
3
  # gdor-indexer
4
4
 
@@ -6,55 +6,59 @@ Code to harvest DOR druids via DOR Fetcher service, mods from PURL, and use it t
6
6
 
7
7
  ## Prerequisites
8
8
 
9
- 1. ruby
9
+ 1. ruby 2.x+
10
10
  2. bundler gem must be installed
11
11
 
12
12
  ## Install steps for running locally
13
13
 
14
14
  Add this line to your application's Gemfile:
15
-
16
- gem 'harvestdor-indexer'
15
+ ```ruby
16
+ gem 'harvestdor-indexer'
17
+ ```
17
18
 
18
19
  Then execute:
19
-
20
- $ bundle
20
+ ```bash
21
+ bundle
22
+ ```
21
23
 
22
24
  ## Configuration
23
25
 
24
- #### Create a collections folder in the config directory:
26
+ ### Create a collections folder in the config directory:
25
27
 
26
- $ cd /path/to/gdor-indexer/config
27
- $ mkdir collections
28
+ ```bash
29
+ cd /path/to/gdor-indexer/config
30
+ mkdir collections
31
+ ```
28
32
 
29
- #### Create a yml config file for your collection(s) to be harvested and indexed.
33
+ ### Create a yml config file for your collection(s) to be harvested and indexed.
30
34
 
31
- See ```spec/config/walters_integration_spec.yml``` for an example. Copy that file to ```config/collections``` and change the following settings:
35
+ See `spec/config/walters_integration_spec.yml` for an example. Copy that file to `config/collections` and change the following settings:
32
36
 
33
37
  * whitelist
34
38
  * dor_fetcher service_url
35
39
  * harvestdor log_dir and log_name
36
40
  * solr_url
37
41
 
38
- ##### whitelist
42
+ #### whitelist
39
43
 
40
- The whitelist is how you specify which objects to index. The whitelist can be
44
+ The whitelist is how you specify which objects to index. The whitelist can be:
41
45
 
42
46
  * an Array of druids inline in the config yml file
43
47
  * a filename containing a list of druids (one per line)
44
48
 
45
- If a druid, per the object's identityMetadata at purl page, is for a
49
+ If a druid, per the object's identityMetadata at purl page, is for a:
46
50
 
47
- * collection record: then we process all the item druids in that collection (as if they were included individually in the whitelist)
51
+ * collection record: then we process all the item druids in that collection (as if they were included individually in the whitelist)
48
52
  * non-collection record: then we process the druid as an individual item
49
53
 
50
- #### Run the indexer script
54
+ ### Run the indexer script
51
55
 
52
56
  $ cd /path/to/gdor-indexer
53
57
  $ nohup ./bin/indexer -c my_collection &>path/to/nohup.output
54
58
 
55
59
  ## Running the tests
56
60
 
57
- ```$ rake```
61
+ `rake`
58
62
 
59
63
  ## Contributing
60
64
 
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.require_paths = ['lib']
19
19
 
20
20
  spec.add_dependency 'harvestdor-indexer'
21
- spec.add_dependency 'stanford-mods', '>= 1.4.0' # for new pub date methods
21
+ spec.add_dependency 'stanford-mods', '~> 2.2.1'
22
22
  spec.add_dependency 'nokogiri'
23
23
  spec.add_dependency 'rsolr'
24
24
  spec.add_dependency 'activesupport'
@@ -12,8 +12,8 @@ require 'logger'
12
12
  require 'net/smtp'
13
13
  require 'set'
14
14
 
15
- # Base class to harvest from DOR via harvestdor gem
16
15
  module GDor
16
+ # Base class to harvest from DOR via harvestdor gem
17
17
  class Indexer
18
18
  include Hooks
19
19
 
@@ -89,12 +89,12 @@ module GDor
89
89
  index_with_exception_handling resource
90
90
  end
91
91
 
92
- unless nocommit
92
+ if nocommit
93
+ logger.info('Skipping commit per nocommit flag')
94
+ else
93
95
  logger.info('Beginning Commit.')
94
96
  solr_client.commit!
95
97
  logger.info('Finished Commit.')
96
- else
97
- logger.info('Skipping commit per nocommit flag')
98
98
  end
99
99
 
100
100
  @total_time = elapsed_time(start_time)
@@ -156,6 +156,7 @@ module GDor
156
156
  # Create Solr document for the collection druid suitable for SearchWorks
157
157
  # and write the result to the SearchWorks Solr Index
158
158
  # @param [Harvestdor::Indexer::Resource] resource a collection record
159
+ # @return [Hash]
159
160
  def collection_solr_document(resource)
160
161
  coll_sdb = GDor::Indexer::SolrDocBuilder.new(resource, logger)
161
162
 
@@ -166,7 +167,6 @@ module GDor
166
167
  collection_type: 'Digital Collection',
167
168
  display_type: coll_display_types_from_items(resource),
168
169
  format_main_ssim: 'Archive/Manuscript', # per INDEX-12, add this to all collection records (does not add dups)
169
- format: 'Manuscript/Archive', # per INDEX-144, add this to all collection records (does not add dups)
170
170
  building_facet: 'Stanford Digital Repository' # INDEX-53 add building_facet = Stanford Digital Repository here for collection
171
171
  )
172
172
 
@@ -181,7 +181,7 @@ module GDor
181
181
 
182
182
  # add coll level data to this solr doc and/or cache collection level information
183
183
  # @param [Hash] doc_hash representing the Solr document (for an item)
184
- # @param [Array<Harvestdor::Indexer::Resource>] collections the collections the item is a member of
184
+ # @param [Array<Harvestdor::Indexer::Resource>] collections the collections the item is a member of
185
185
  def add_coll_info(doc_hash, collections)
186
186
  if collections
187
187
  doc_hash[:collection] = []
@@ -213,9 +213,8 @@ module GDor
213
213
  # cache the display_type of this (item) object with a collection, so when the collection rec
214
214
  # is being indexed, it can get all of the display_types of the members
215
215
  def cache_display_type_for_collection(resource, display_type)
216
- if display_type && display_type.instance_of?(String)
217
- coll_display_types_from_items(resource) << display_type
218
- end
216
+ return unless display_type && display_type.instance_of?(String)
217
+ coll_display_types_from_items(resource) << display_type
219
218
  end
220
219
 
221
220
  # count the number of records in solr for this collection (and the collection record itself)
@@ -238,8 +237,7 @@ module GDor
238
237
  # @return [Array<String>] Array of messages suitable for notificaiton email and/or logs
239
238
  def record_count_msgs
240
239
  @record_count_msgs ||= begin
241
- msgs = []
242
- msgs << "Successful count (items + coll record indexed w/o error): #{metrics.success_count}"
240
+ msgs = ["Successful count (items + coll record indexed w/o error): #{metrics.success_count}"]
243
241
 
244
242
  harvestdor.resources.select(&:collection?).each do |collection|
245
243
  solr_count = num_found_in_solr(collection: collection.bare_druid)
@@ -270,12 +268,11 @@ module GDor
270
268
  logger.info("Avg complete index time per object (all): #{(@total_time / metrics.total).round(2)} seconds") unless metrics.total == 0
271
269
  end
272
270
 
271
+ # @return [String] the composed email body
273
272
  def email_report_body
274
- body = ''
275
-
276
- body += "\n" + record_count_msgs.join("\n") + "\n"
273
+ body = "\n" + record_count_msgs.join("\n") + "\n"
277
274
 
278
- if @druids_failed_to_ix.size > 0
275
+ unless @druids_failed_to_ix.empty?
279
276
  body += "\n"
280
277
  body += "records that may have failed to index: \n"
281
278
  body += @druids_failed_to_ix.join("\n") + "\n"
@@ -285,7 +282,7 @@ module GDor
285
282
  body += "full log is at gdor_indexer/shared/#{config.harvestdor.log_dir}/#{config.harvestdor.log_name} on #{Socket.gethostname}"
286
283
  body += "\n"
287
284
 
288
- body += @validation_messages.join("\n") + "\n"
285
+ body + @validation_messages.join("\n") + "\n"
289
286
  end
290
287
 
291
288
  # email the results of indexing if we are on one of the harvestdor boxes
@@ -305,12 +302,14 @@ module GDor
305
302
  end
306
303
  end
307
304
 
305
+ # @param [String] to target email address
306
+ # @param [Hash] opts options
308
307
  def send_email(to, opts = {})
309
- opts[:server] ||= 'localhost'
310
- opts[:from] ||= 'gryphondor@stanford.edu'
308
+ opts[:server] ||= 'localhost'
309
+ opts[:from] ||= 'gryphondor@stanford.edu'
311
310
  opts[:from_alias] ||= 'gryphondor'
312
- opts[:subject] ||= 'default subject'
313
- opts[:body] ||= 'default message body'
311
+ opts[:subject] ||= 'default subject'
312
+ opts[:body] ||= 'default message body'
314
313
  mail = Mail.new do
315
314
  from opts[:from]
316
315
  to to
@@ -81,14 +81,14 @@ module GDor::Indexer::ModsFields
81
81
  # @return [Hash] updated Hash representing the Solr document
82
82
  def add_pub_year_tisim(doc_hash)
83
83
  pub_date_sort_val = doc_hash[:pub_year_isi]
84
- if is_positive_int? pub_date_sort_val
84
+ if positive_int? pub_date_sort_val
85
85
  doc_hash[:pub_year_tisim] = pub_date_sort_val # for date slider
86
86
  end
87
87
  doc_hash
88
88
  end
89
89
 
90
90
  # @return true if the string parses into an int, and if so, the int is >= 0
91
- def is_positive_int?(str)
91
+ def positive_int?(str)
92
92
  str.to_i >= 0
93
93
  rescue
94
94
  false
@@ -1,6 +1,6 @@
1
- # Monkey patch for Nokogiri to cache xpath contexts and make things faster under jRuby
2
1
  module Nokogiri
3
2
  module XML
3
+ # Monkey patch for Nokogiri to cache xpath contexts and make things faster under jRuby
4
4
  class Node
5
5
  @context = nil
6
6
 
@@ -1,33 +1,34 @@
1
1
  require 'delegate'
2
2
 
3
3
  class GDor::Indexer
4
+ # Hash-like SolrDoc Object class
4
5
  class SolrDocHash < SimpleDelegator
5
6
  def initialize(hash = {})
6
7
  super(hash)
7
8
  end
8
9
 
9
- # looks for non-empty existence of field when exp_val is nil;
10
+ # @param [String,Regexp] exp_val
11
+ # when exp_val is nil, looks for non-empty existence of field
10
12
  # when exp_val is a String, looks for matching value as a String or as a member of an Array
11
13
  # when exp_val is a Regexp, looks for String value that matches, or Array with a String member that matches
12
- # @return true if the field is non-trivially present in the hash, false otherwise
14
+ # @return [Boolean] true if the field is non-trivially present in the hash, false otherwise
13
15
  def field_present?(field, exp_val = nil)
14
- !!(if self.include?(field) && Array(self[field]).any? { |v| !v.blank? }
15
- actual = Array(self[field])
16
-
17
- case exp_val
18
- when nil
19
- true
20
- when Regexp
21
- actual.index { |s| exp_val.match(s) }
22
- else
23
- actual.include? exp_val
24
- end
25
- end)
16
+ return false unless include?(field) && Array(self[field]).any? { |v| !v.blank? }
17
+ case exp_val
18
+ when nil
19
+ true
20
+ when Regexp
21
+ Array(self[field]).index { |s| exp_val.match(s) }
22
+ else
23
+ Array(self[field]).include? exp_val
24
+ end
26
25
  end
27
26
 
28
27
  # merge in field values from the new hash, with the following guarantees:
29
28
  # values for keys in new_hash will be a non-empty String or flat Array
30
29
  # keys will be removed from hash if all values are nil or empty
30
+ # @param [Hash] new_hash
31
+ # @return [GDor::Indexer::SolrDocHash] self
31
32
  def combine(new_hash)
32
33
  new_hash.select { |_key, value| Array(value).any? { |v| !v.blank? } }.each do |key, new_val|
33
34
  if field_present? key
@@ -46,10 +47,10 @@ class GDor::Indexer
46
47
  end
47
48
 
48
49
  compact_blank_fields!
49
-
50
50
  self
51
51
  end
52
52
 
53
+ # @return [GDor::Indexer::SolrDocHash] self
53
54
  def compact_blank_fields!
54
55
  keys.reject { |key| field_present? key }.each do |key|
55
56
  delete key
@@ -57,11 +58,13 @@ class GDor::Indexer
57
58
  self
58
59
  end
59
60
 
61
+ # @return [String]
60
62
  def druid
61
63
  self[:druid]
62
64
  end
63
65
 
64
66
  # validate fields that should be in hash for any item object in SearchWorks Solr
67
+ # @param [Object] config Configuration object
65
68
  # @return [Array<String>] Array of messages suitable for notificaiton email and/or logs
66
69
  def validate_item(config)
67
70
  result = validate_gdor_fields(config)
@@ -75,7 +78,7 @@ class GDor::Indexer
75
78
  end
76
79
 
77
80
  # validate fields that should be in hash for any collection object in SearchWorks Solr
78
- # @return [Array<String>] Array of messages suitable for notificaiton email and/or logs
81
+ # @see #validate_item for param and return
79
82
  def validate_collection(config)
80
83
  result = validate_gdor_fields(config)
81
84
  result << "#{druid} missing collection_type 'Digital Collection'\n" unless field_present?(:collection_type, 'Digital Collection')
@@ -84,7 +87,7 @@ class GDor::Indexer
84
87
  end
85
88
 
86
89
  # validate fields that should be in hash for every gryphonDOR object in SearchWorks Solr
87
- # @return [Array<String>] Array of messages suitable for notificaiton email and/or logs
90
+ # @see #validate_item for param and return
88
91
  def validate_gdor_fields(config)
89
92
  result = []
90
93
  result << "#{druid} missing druid field\n" unless field_present?(:druid, druid)
@@ -96,7 +99,7 @@ class GDor::Indexer
96
99
  end
97
100
 
98
101
  # validate fields that should be in doc hash for every unmerged gryphonDOR object in SearchWorks Solr
99
- # @return [Array<String>] array of Strings indicating absence of required fields
102
+ # @see #validate_item for param and return
100
103
  def validate_mods(_config)
101
104
  result = []
102
105
  result << "#{druid} missing modsxml\n" unless field_present?(:modsxml)
@@ -1,5 +1,5 @@
1
1
  module GDor
2
2
  class Indexer
3
- VERSION = '0.5.0'
3
+ VERSION = '0.6.0'.freeze
4
4
  end
5
5
  end
@@ -1,9 +1,8 @@
1
- require 'spec_helper'
1
+ require 'yaml'
2
2
 
3
3
  describe GDor::Indexer do
4
4
  before(:all) do
5
5
  @config_yml_path = File.join(File.dirname(__FILE__), '..', 'config', 'walters_integration_spec.yml')
6
- require 'yaml'
7
6
  @yaml = YAML.load_file(@config_yml_path)
8
7
  @ns_decl = "xmlns='#{Mods::MODS_NS}'"
9
8
  @fake_druid = 'oo000oo0000'
@@ -82,7 +81,7 @@ describe GDor::Indexer do
82
81
 
83
82
  @indexer.harvest_and_index
84
83
  end
85
- it 'indexs each resource' do
84
+ it 'indexes each resource' do
86
85
  allow(@indexer).to receive(:harvestdor).and_return(Class.new do
87
86
  def initialize(*items)
88
87
  @items = items
@@ -109,13 +108,13 @@ describe GDor::Indexer do
109
108
  @indexer.harvest_and_index
110
109
  end
111
110
  it 'does not commit if nocommit is set' do
112
- expect(@indexer.solr_client).to_not receive(:commit!)
111
+ expect(@indexer.solr_client).not_to receive(:commit!)
113
112
  @indexer.harvest_and_index(true)
114
113
  end
115
114
  end
116
115
 
117
116
  describe '#index' do
118
- it 'indexs collections as collections' do
117
+ it 'indexes collections as collections' do
119
118
  expect(@indexer).to receive(:collection_solr_document).with(collection)
120
119
  @indexer.index collection
121
120
  end
@@ -196,17 +195,16 @@ describe GDor::Indexer do
196
195
  end # item_solr_document
197
196
 
198
197
  context '#collection_solr_document' do
198
+ let(:doc_hash) { GDor::Indexer::SolrDocHash.new }
199
199
  it 'calls validate_collection' do
200
- doc_hash = GDor::Indexer::SolrDocHash.new
201
200
  allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(doc_hash) # speed up the test
202
201
  expect(doc_hash).to receive(:validate_collection).and_return([])
203
- doc_hash = @indexer.collection_solr_document collection
202
+ @indexer.collection_solr_document collection
204
203
  end
205
204
  it 'calls GDor::Indexer::SolrDocBuilder.validate_mods' do
206
- doc_hash = GDor::Indexer::SolrDocHash.new
207
205
  allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(doc_hash) # speed up the test
208
206
  expect(doc_hash).to receive(:validate_mods).and_return([])
209
- doc_hash = @indexer.collection_solr_document collection
207
+ @indexer.collection_solr_document collection
210
208
  end
211
209
  it 'populates druid and access_facet fields' do
212
210
  doc_hash = @indexer.collection_solr_document collection
@@ -222,26 +220,25 @@ describe GDor::Indexer do
222
220
  doc_hash = @indexer.collection_solr_document collection
223
221
  expect(doc_hash).to include collection_type: 'Digital Collection'
224
222
  end
223
+
225
224
  context 'add format_main_ssim Archive/Manuscript' do
226
225
  it 'no other values' do
227
226
  allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new)
228
-
229
227
  doc_hash = @indexer.collection_solr_document collection
230
228
  expect(doc_hash).to include format_main_ssim: 'Archive/Manuscript'
231
229
  end
232
230
  it 'other values present' do
233
231
  allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new({ format_main_ssim: %w(Image Video) }))
234
-
235
232
  doc_hash = @indexer.collection_solr_document collection
236
233
  expect(doc_hash).to include format_main_ssim: ['Image', 'Video', 'Archive/Manuscript']
237
234
  end
238
235
  it 'already has values Archive/Manuscript' do
239
236
  allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new({ format_main_ssim: 'Archive/Manuscript' }))
240
-
241
237
  doc_hash = @indexer.collection_solr_document collection
242
238
  expect(doc_hash).to include format_main_ssim: ['Archive/Manuscript']
243
239
  end
244
240
  end
241
+
245
242
  it 'populates building_facet field with Stanford Digital Repository' do
246
243
  doc_hash = @indexer.collection_solr_document collection
247
244
  expect(doc_hash).to include building_facet: 'Stanford Digital Repository'
@@ -252,9 +249,9 @@ describe GDor::Indexer do
252
249
  before(:each) do
253
250
  @coll_druids_array = [collection]
254
251
  end
252
+ let(:doc_hash) { GDor::Indexer::SolrDocHash.new({}) }
255
253
 
256
254
  it 'adds no collection field values to doc_hash if there are none' do
257
- doc_hash = GDor::Indexer::SolrDocHash.new({})
258
255
  @indexer.add_coll_info(doc_hash, nil)
259
256
  expect(doc_hash[:collection]).to be_nil
260
257
  expect(doc_hash[:collection_with_title]).to be_nil
@@ -263,7 +260,6 @@ describe GDor::Indexer do
263
260
 
264
261
  context 'collection field' do
265
262
  it 'is added field to doc hash' do
266
- doc_hash = GDor::Indexer::SolrDocHash.new({})
267
263
  @indexer.add_coll_info(doc_hash, @coll_druids_array)
268
264
  expect(doc_hash[:collection]).to match_array [@coll_druid_from_test_config]
269
265
  end
@@ -286,7 +282,6 @@ describe GDor::Indexer do
286
282
  it 'adds two values to doc_hash when object belongs to two collections' do
287
283
  coll_druid1 = 'oo111oo2222'
288
284
  coll_druid2 = 'oo333oo4444'
289
- doc_hash = GDor::Indexer::SolrDocHash.new({})
290
285
  @indexer.add_coll_info(doc_hash, [double(druid: coll_druid1, bare_druid: coll_druid1, public_xml: @ng_pub_xml, identity_md_obj_label: 'foo'), double(druid: coll_druid2, bare_druid: coll_druid2, public_xml: @ng_pub_xml, identity_md_obj_label: 'bar')])
291
286
  expect(doc_hash[:collection_with_title]).to match_array ["#{coll_druid1}-|-foo", "#{coll_druid2}-|-bar"]
292
287
  end
@@ -347,25 +342,25 @@ describe GDor::Indexer do
347
342
  end
348
343
 
349
344
  it 'email body includes coll id' do
350
- expect(subject).to match /testcoll indexed coll record is: ww121ss5000/
345
+ expect(subject).to match(/testcoll indexed coll record is: ww121ss5000/)
351
346
  end
352
347
 
353
348
  it 'email body includes coll title' do
354
- expect(subject).to match /coll title: testcoll title/
349
+ expect(subject).to match(/coll title: testcoll title/)
355
350
  end
356
351
 
357
352
  it 'email body includes failed to index druids' do
358
353
  @indexer.instance_variable_set(:@druids_failed_to_ix, %w(a b))
359
- expect(subject).to match /records that may have failed to index: \na\nb\n\n/
354
+ expect(subject).to match(/records that may have failed to index: \na\nb\n\n/)
360
355
  end
361
356
 
362
357
  it 'email body include validation messages' do
363
358
  @indexer.instance_variable_set(:@validation_messages, ['this is a validation message'])
364
- expect(subject).to match /this is a validation message/
359
+ expect(subject).to match(/this is a validation message/)
365
360
  end
366
361
 
367
362
  it 'email includes reference to full log' do
368
- expect(subject).to match /full log is at gdor_indexer\/shared\/spec\/test_logs\/testcoll\.log/
363
+ expect(subject).to match(/full log is at gdor_indexer\/shared\/spec\/test_logs\/testcoll\.log/)
369
364
  end
370
365
  end
371
366
 
@@ -378,7 +373,7 @@ describe GDor::Indexer do
378
373
 
379
374
  it 'has an appropriate subject' do
380
375
  expect(@indexer).to receive(:send_email) do |_to, opts|
381
- expect(opts[:subject]).to match /is finished/
376
+ expect(opts[:subject]).to match(/is finished/)
382
377
  end
383
378
 
384
379
  @indexer.email_results
@@ -1,5 +1,3 @@
1
- require 'spec_helper'
2
-
3
1
  describe GDor::Indexer::ModsFields do
4
2
  let(:fake_druid) { 'oo000oo0000' }
5
3
  let(:ns_decl) { "xmlns='#{Mods::MODS_NS}'" }
@@ -1,5 +1,3 @@
1
- require 'spec_helper'
2
-
3
1
  describe GDor::Indexer::ModsFields do
4
2
  let(:fake_druid) { 'oo000oo0000' }
5
3
  let(:ns_decl) { "xmlns='#{Mods::MODS_NS}'" }
@@ -1,5 +1,3 @@
1
- require 'spec_helper'
2
-
3
1
  describe GDor::Indexer::ModsFields do
4
2
  let(:fake_druid) { 'oo000oo0000' }
5
3
  let(:ns_decl) { "xmlns='#{Mods::MODS_NS}'" }
@@ -1,5 +1,3 @@
1
- require 'spec_helper'
2
-
3
1
  describe GDor::Indexer::PublicXmlFields do
4
2
  before(:all) do
5
3
  @fake_druid = 'oo000oo0000'
@@ -1,13 +1,10 @@
1
- require 'spec_helper'
2
-
3
1
  describe GDor::Indexer::SolrDocBuilder do
4
2
  before(:all) do
5
- @fake_druid = 'oo000oo0000'
6
3
  @ns_decl = "xmlns='#{Mods::MODS_NS}'"
7
4
  @mods_xml = "<mods #{@ns_decl}><note>SolrDocBuilder test</note></mods>"
8
- @ng_mods_xml = Nokogiri::XML(@mods_xml)
9
5
  end
10
6
 
7
+ let(:fake_druid) { 'oo000oo0000' }
11
8
  let :logger do
12
9
  lgr = Logger.new(StringIO.new)
13
10
  lgr.level = Logger::WARN
@@ -15,7 +12,7 @@ describe GDor::Indexer::SolrDocBuilder do
15
12
  end
16
13
 
17
14
  def sdb_for_data(mods, pub_xml)
18
- resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
15
+ resource = Harvestdor::Indexer::Resource.new(double, fake_druid)
19
16
  allow(resource).to receive(:mods).and_return(Nokogiri::XML(mods))
20
17
  allow(resource).to receive(:public_xml).and_return(Nokogiri::XML(pub_xml))
21
18
  i = Harvestdor::Indexer.new
@@ -24,49 +21,40 @@ describe GDor::Indexer::SolrDocBuilder do
24
21
  GDor::Indexer::SolrDocBuilder.new(resource, logger)
25
22
  end
26
23
 
27
- # NOTE:
28
- # "Doubles, stubs, and message expectations are all cleaned out after each example."
29
- # per https://www.relishapp.com/rspec/rspec-mocks/docs/scope
30
-
31
24
  context 'doc_hash' do
32
- before(:all) do
33
- cmd_xml = "<contentMetadata type='image' objectId='#{@fake_druid}'></contentMetadata>"
34
- @pub_xml = "<publicObject id='druid#{@fake_druid}'>#{cmd_xml}</publicObject>"
35
- end
36
- let :doc_hash do
37
- sdb_for_data(@mods_xml, @pub_xml).doc_hash
38
- end
39
- before(:each) do
40
- @doc_hash = doc_hash
25
+ let(:doc_hash) do
26
+ cmd_xml = "<contentMetadata type='image' objectId='#{fake_druid}'></contentMetadata>"
27
+ pub_xml = "<publicObject id='druid#{fake_druid}'>#{cmd_xml}</publicObject>"
28
+ sdb_for_data(@mods_xml, pub_xml).doc_hash
41
29
  end
30
+
42
31
  it 'id field should be set to druid' do
43
- expect(@doc_hash[:id]).to eq(@fake_druid)
32
+ expect(doc_hash[:id]).to eq(fake_druid)
44
33
  end
45
34
  it 'does not have the gdor fields set in indexer.rb' do
46
- expect(@doc_hash).to_not have_key(:druid)
47
- expect(@doc_hash).to_not have_key(:access_facet)
48
- expect(@doc_hash).to_not have_key(:url_fulltext)
49
- expect(@doc_hash).to_not have_key(:display_type)
50
- expect(@doc_hash).to_not have_key(:file_id)
35
+ expect(doc_hash).not_to have_key(:druid)
36
+ expect(doc_hash).not_to have_key(:access_facet)
37
+ expect(doc_hash).not_to have_key(:url_fulltext)
38
+ expect(doc_hash).not_to have_key(:display_type)
39
+ expect(doc_hash).not_to have_key(:file_id)
51
40
  end
52
41
  it 'has the full MODS in the modsxml field' do
53
- expect(@doc_hash[:modsxml]).to be_equivalent_to @mods_xml
42
+ expect(doc_hash[:modsxml]).to be_equivalent_to @mods_xml
54
43
  end
55
44
  end # doc hash
56
45
 
57
46
  context '#catkey' do
58
- before(:all) do
59
- @identity_md_start = "<publicObject><identityMetadata objectId='#{@fake_druid}'>"
60
- @identity_md_end = '</identityMetadata></publicObject>'
61
- @empty_id_md = "#{@identity_md_start}#{@identity_md_end}"
62
- @barcode_id_md = "#{@identity_md_start}<otherId name=\"barcode\">666</otherId>#{@identity_md_end}"
63
- end
47
+ let(:identity_md_start) { "<publicObject><identityMetadata objectId='#{fake_druid}'>" }
48
+ let(:identity_md_end) { '</identityMetadata></publicObject>' }
49
+ let(:empty_id_md) { "#{identity_md_start}#{identity_md_end}" }
50
+ let(:barcode_id_md) { "#{identity_md_start}<otherId name=\"barcode\">666</otherId>#{identity_md_end}" }
51
+
64
52
  it 'is nil if there is no indication of catkey in identityMetadata' do
65
- sdb = sdb_for_data(@mods_xml, @empty_id_md)
53
+ sdb = sdb_for_data(@mods_xml, empty_id_md)
66
54
  expect(sdb.catkey).to be_nil
67
55
  end
68
56
  it 'takes a catkey in identityMetadata/otherId with name attribute of catkey' do
69
- pub_xml = "#{@identity_md_start}<otherId name=\"catkey\">12345</otherId>#{@identity_md_end}"
57
+ pub_xml = "#{identity_md_start}<otherId name=\"catkey\">12345</otherId>#{identity_md_end}"
70
58
  sdb = sdb_for_data(@mods_xml, pub_xml)
71
59
  expect(sdb.catkey).to eq('12345')
72
60
  end
@@ -74,18 +62,18 @@ describe GDor::Indexer::SolrDocBuilder do
74
62
  m = "<mods #{@ns_decl}><recordInfo>
75
63
  <recordIdentifier source=\"SIRSI\">a6780453</recordIdentifier>
76
64
  </recordInfo></mods>"
77
- sdb = sdb_for_data(@mods_xml, @empty_id_md)
65
+ sdb = sdb_for_data(m, empty_id_md)
78
66
  expect(sdb.catkey).to be_nil
79
67
  end
80
68
  it 'logs an error when there is identityMetadata/otherId with name attribute of barcode but there is no catkey in mods' do
81
- sdb = sdb_for_data(@mods_xml, @barcode_id_md)
82
- expect(logger).to receive(:error).with(/#{@fake_druid} has barcode .* in identityMetadata but no SIRSI catkey in mods/)
69
+ sdb = sdb_for_data(@mods_xml, barcode_id_md)
70
+ expect(logger).to receive(:error).with(/#{fake_druid} has barcode .* in identityMetadata but no SIRSI catkey in mods/)
83
71
  sdb.catkey
84
72
  end
85
73
 
86
74
  context 'catkey from mods' do
87
75
  it 'looks for catkey in mods if identityMetadata/otherId with name attribute of barcode is found' do
88
- sdb = sdb_for_data(@mods_xml, @barcode_id_md)
76
+ sdb = sdb_for_data(@mods_xml, barcode_id_md)
89
77
  smr = sdb.smods_rec
90
78
  expect(smr).to receive(:record_info).and_call_original # this is as close as I can figure to @smods_rec.record_info.recordIdentifier
91
79
  sdb.catkey
@@ -94,29 +82,28 @@ describe GDor::Indexer::SolrDocBuilder do
94
82
  m = "<mods #{@ns_decl}><recordInfo>
95
83
  <descriptionStandard>dacs</descriptionStandard>
96
84
  </recordInfo></mods>"
97
- sdb = sdb_for_data(m, @barcode_id_md)
98
-
85
+ sdb = sdb_for_data(m, barcode_id_md)
99
86
  expect(sdb.catkey).to be_nil
100
87
  end
101
88
  it 'populated when source attribute is SIRSI' do
102
89
  m = "<mods #{@ns_decl}><recordInfo>
103
90
  <recordIdentifier source=\"SIRSI\">a6780453</recordIdentifier>
104
91
  </recordInfo></mods>"
105
- sdb = sdb_for_data(m, @barcode_id_md)
92
+ sdb = sdb_for_data(m, barcode_id_md)
106
93
  expect(sdb.catkey).not_to be_nil
107
94
  end
108
95
  it 'not populated when source attribute is not SIRSI' do
109
96
  m = "<mods #{@ns_decl}><recordInfo>
110
97
  <recordIdentifier source=\"FOO\">a6780453</recordIdentifier>
111
98
  </recordInfo></mods>"
112
- sdb = sdb_for_data(m, @barcode_id_md)
99
+ sdb = sdb_for_data(m, barcode_id_md)
113
100
  expect(sdb.catkey).to be_nil
114
101
  end
115
102
  it 'removes the a at the beginning of the catkey' do
116
103
  m = "<mods #{@ns_decl}><recordInfo>
117
104
  <recordIdentifier source=\"SIRSI\">a6780453</recordIdentifier>
118
105
  </recordInfo></mods>"
119
- sdb = sdb_for_data(m, @barcode_id_md)
106
+ sdb = sdb_for_data(m, barcode_id_md)
120
107
  expect(sdb.catkey).to eq('6780453')
121
108
  end
122
109
  end
@@ -1,5 +1,3 @@
1
- require 'spec_helper'
2
-
3
1
  describe GDor::Indexer::SolrDocHash do
4
2
  context '#field_present?' do
5
3
  context 'actual field value is boolean true' do
@@ -132,6 +130,7 @@ describe GDor::Indexer::SolrDocHash do
132
130
  expect(subject.combine(foo: {})).to eq({})
133
131
  end
134
132
  end # orig has no key
133
+
135
134
  context 'orig value is nil' do
136
135
  subject do
137
136
  described_class.new(foo: nil)
@@ -155,6 +154,7 @@ describe GDor::Indexer::SolrDocHash do
155
154
  expect(subject.combine(foo: {})).to eq({})
156
155
  end
157
156
  end # orig value is nil
157
+
158
158
  context 'orig value is empty String' do
159
159
  subject do
160
160
  described_class.new(foo: '')
@@ -178,6 +178,7 @@ describe GDor::Indexer::SolrDocHash do
178
178
  expect(subject.combine(foo: {})).to eq({})
179
179
  end
180
180
  end # orig value is empty String
181
+
181
182
  context 'orig value is non-empty String' do
182
183
  subject do
183
184
  described_class.new(foo: 'a')
@@ -201,6 +202,7 @@ describe GDor::Indexer::SolrDocHash do
201
202
  expect(subject.combine(foo: :bar)).to eq(foo: ['a', :bar])
202
203
  end
203
204
  end # orig value is String
205
+
204
206
  context 'orig value is empty Array' do
205
207
  subject do
206
208
  described_class.new(foo: [])
@@ -224,6 +226,7 @@ describe GDor::Indexer::SolrDocHash do
224
226
  expect(subject.combine(foo: {})).to eq({})
225
227
  end
226
228
  end # orig value is empty Array
229
+
227
230
  context 'orig value is non-empty Array' do
228
231
  subject do
229
232
  described_class.new(foo: %w(a b))
@@ -309,7 +312,6 @@ describe GDor::Indexer::SolrDocHash do
309
312
  end
310
313
  it 'has a value if collection_type is missing' do
311
314
  hash = described_class.new(format_main_ssim: 'Archive/Manuscript')
312
-
313
315
  expect(hash.validate_collection(mock_config).first).to match(/collection_type/)
314
316
  end
315
317
  it "has a value if collection_type is not 'Digital Collection'" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-01-19 00:00:00.000000000 Z
13
+ date: 2016-08-06 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: harvestdor-indexer
@@ -30,16 +30,16 @@ dependencies:
30
30
  name: stanford-mods
31
31
  requirement: !ruby/object:Gem::Requirement
32
32
  requirements:
33
- - - ">="
33
+ - - "~>"
34
34
  - !ruby/object:Gem::Version
35
- version: 1.4.0
35
+ version: 2.2.1
36
36
  type: :runtime
37
37
  prerelease: false
38
38
  version_requirements: !ruby/object:Gem::Requirement
39
39
  requirements:
40
- - - ">="
40
+ - - "~>"
41
41
  - !ruby/object:Gem::Version
42
- version: 1.4.0
42
+ version: 2.2.1
43
43
  - !ruby/object:Gem::Dependency
44
44
  name: nokogiri
45
45
  requirement: !ruby/object:Gem::Requirement
@@ -316,8 +316,10 @@ extra_rdoc_files: []
316
316
  files:
317
317
  - ".gitignore"
318
318
  - ".hound.yml"
319
+ - ".rspec"
319
320
  - ".rubocop.yml"
320
321
  - ".rubocop_todo.yml"
322
+ - ".travis.yml"
321
323
  - ".yardopts"
322
324
  - Gemfile
323
325
  - LICENSE.txt
@@ -363,7 +365,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
363
365
  version: '0'
364
366
  requirements: []
365
367
  rubyforge_project:
366
- rubygems_version: 2.4.6
368
+ rubygems_version: 2.5.1
367
369
  signing_key:
368
370
  specification_version: 4
369
371
  summary: PURL doc => Solr hash logic
@@ -378,4 +380,3 @@ test_files:
378
380
  - spec/unit/solr_doc_builder_spec.rb
379
381
  - spec/unit/solr_doc_hash_spec.rb
380
382
  - spec/vcr_cassettes/no_coll_druid_in_druid_array_call.yml
381
- has_rdoc: