gdor-indexer 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b7d3fef57a3dcc4df36487bead9f96a691ae385d
4
- data.tar.gz: f724d1e3bfc0aee77798a10feb3c06939b29c30a
3
+ metadata.gz: b01625bc9ae63bd07de7edbee92754e2d5c3ce82
4
+ data.tar.gz: 9586786a5f096aeaf9a7cacac5aa4d07a846ac38
5
5
  SHA512:
6
- metadata.gz: bbed1557cd19168a20ca5c26656186206ef80ea83817715fea27fb79218f45ece95f24186f9eeb4fdb17c87b2ccf147556398dcba2fe5bed02db6983951dbe7d
7
- data.tar.gz: 578889a4be5005e8a6504313e46b082685a393937a46cd4fe4315756b8da1191ef7a383ec122b5586f6da6ab76ba3157ee325fedf9c87cf67edb24d1cacf0e67
6
+ metadata.gz: c82da4f86166c0864a0ad5ed969304b8ee8a357d07efa71738d7a7ce50976cb509568ff812b9ca3b28a44d514237a37ff69eb43a8a617cd28e6aadf2e6794378
7
+ data.tar.gz: 9bc2a66dbf0652206ca609b5658eebf0d80d32695c0ccf03cace3bf58506650dc212f7b49e09bf6ecfb50c2f6f1c5a5389a913345318e1bf3b212f74b0dcdad9
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2016-01-06 15:25:10 -0800 using RuboCop version 0.34.2.
3
+ # on 2016-01-07 14:25:59 -0800 using RuboCop version 0.34.2.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -24,9 +24,9 @@ Lint/UselessAssignment:
24
24
  - 'spec/unit/indexer_spec.rb'
25
25
  - 'spec/unit/solr_doc_builder_spec.rb'
26
26
 
27
- # Offense count: 21
27
+ # Offense count: 22
28
28
  Metrics/AbcSize:
29
- Max: 108
29
+ Max: 82
30
30
 
31
31
  # Offense count: 1
32
32
  # Configuration parameters: CountComments.
@@ -37,15 +37,15 @@ Metrics/ClassLength:
37
37
  Metrics/CyclomaticComplexity:
38
38
  Max: 9
39
39
 
40
- # Offense count: 307
40
+ # Offense count: 309
41
41
  # Configuration parameters: AllowURI, URISchemes.
42
42
  Metrics/LineLength:
43
43
  Max: 258
44
44
 
45
- # Offense count: 15
45
+ # Offense count: 16
46
46
  # Configuration parameters: CountComments.
47
47
  Metrics/MethodLength:
48
- Max: 51
48
+ Max: 43
49
49
 
50
50
  # Offense count: 3
51
51
  Metrics/PerceivedComplexity:
@@ -70,6 +70,12 @@ RSpec/InstanceVariable:
70
70
  - 'spec/unit/public_xml_fields_spec.rb'
71
71
  - 'spec/unit/solr_doc_builder_spec.rb'
72
72
 
73
+ # Offense count: 6
74
+ # Cop supports --auto-correct.
75
+ Style/AlignArray:
76
+ Exclude:
77
+ - 'lib/gdor/indexer/mods_fields.rb'
78
+
73
79
  # Offense count: 7
74
80
  # Cop supports --auto-correct.
75
81
  # Configuration parameters: EnforcedStyle, SupportedStyles.
@@ -100,7 +106,7 @@ Style/DoubleNegation:
100
106
  Exclude:
101
107
  - 'lib/gdor/indexer/solr_doc_hash.rb'
102
108
 
103
- # Offense count: 1
109
+ # Offense count: 2
104
110
  # Cop supports --auto-correct.
105
111
  # Configuration parameters: EnforcedStyle, SupportedStyles.
106
112
  Style/EmptyLinesAroundBlockBody:
@@ -113,7 +119,7 @@ Style/GuardClause:
113
119
  Exclude:
114
120
  - 'lib/gdor/indexer.rb'
115
121
 
116
- # Offense count: 12
122
+ # Offense count: 14
117
123
  # Cop supports --auto-correct.
118
124
  # Configuration parameters: EnforcedStyle, SupportedStyles.
119
125
  Style/MultilineOperationIndentation:
data/Rakefile CHANGED
@@ -18,10 +18,10 @@ end
18
18
  # sh 'irb -rubygems -I lib -r ./frda_indexer.rb'
19
19
  # end
20
20
 
21
- task default: [:ci, :rubocop]
21
+ task default: :ci
22
22
 
23
- desc 'run continuous integration suite (tests, coverage, docs)'
24
- task ci: [:rspec, :doc, :rubocop]
23
+ desc 'run continuous integration suite'
24
+ task ci: [:rspec, :rubocop]
25
25
 
26
26
  task spec: :rspec
27
27
 
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
8
8
  spec.version = GDor::Indexer::VERSION
9
9
  spec.authors = ['Naomi Dushay', 'Laney McGlohon', 'Chris Beer']
10
10
  spec.email = ['cabeer@stanford.edu']
11
- spec.summary = 'Gryphondor Solr indexing logic'
11
+ spec.summary = 'PURL doc => Solr hash logic'
12
12
  spec.homepage = 'https://github.com/sul-dlss/gdor-indexer'
13
13
  spec.license = 'Apache 2'
14
14
 
@@ -35,10 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.add_development_dependency 'rubocop-rspec'
36
36
  spec.add_development_dependency 'simplecov'
37
37
  spec.add_development_dependency 'equivalent-xml', '~> 0.5'
38
- spec.add_development_dependency 'capybara'
39
- spec.add_development_dependency 'poltergeist', '>= 1.5.0'
40
38
  spec.add_development_dependency 'vcr'
41
- spec.add_development_dependency 'jettywrapper'
42
39
  spec.add_development_dependency 'webmock'
43
40
  spec.add_development_dependency 'pry-byebug'
44
41
  end
@@ -4,6 +4,7 @@ module GDor::Indexer::ModsFields
4
4
  # Create a Hash representing a Solr doc, with all MODS related fields populated.
5
5
  # @return [Hash] Hash representing the Solr document
6
6
  def doc_hash_from_mods
7
+ sort_str_w_approx_dates = smods_rec.pub_date_sortable_string(false)
7
8
  doc_hash = {
8
9
  # title fields
9
10
  title_245a_search: smods_rec.sw_short_title,
@@ -45,10 +46,10 @@ module GDor::Indexer::ModsFields
45
46
 
46
47
  # publication fields
47
48
  pub_search: smods_rec.place,
48
- pub_date_sort: smods_rec.pub_date_sortable_string(false), # include approx dates
49
+ pub_date_sort: sort_str_w_approx_dates,
49
50
  # these are for single value facet display (in leiu of date slider (pub_year_tisim) and deprecated pub_date)
50
- pub_year_no_approx_isi: smods_rec.pub_date_facet_single_value(true),
51
- pub_year_w_approx_isi: smods_rec.pub_date_facet_single_value(false),
51
+ pub_year_no_approx_isi: smods_rec.pub_date_sortable_string(true),
52
+ pub_year_w_approx_isi: sort_str_w_approx_dates,
52
53
  # TODO: remove pub_date after reindexing existing colls; deprecated in favor of pub_year_xxx_approx_isi ...
53
54
  pub_date: smods_rec.pub_date_facet,
54
55
  # display fields
@@ -60,20 +61,7 @@ module GDor::Indexer::ModsFields
60
61
  all_search: smods_rec.text.gsub(/\s+/, ' ')
61
62
  }
62
63
 
63
- # more pub date field processing
64
- pub_date_sort_val = doc_hash[:pub_date_sort]
65
- if is_positive_int? pub_date_sort_val
66
- doc_hash[:pub_year_tisim] = pub_date_sort_val # for date slider
67
- # remove leading zeros
68
- doc_hash[:creation_year_isi] = remove_leading_zeros(doc_hash[:creation_year_isi]) if doc_hash[:creation_year_isi]
69
- doc_hash[:publication_year_isi] = remove_leading_zeros(doc_hash[:publication_year_isi]) if doc_hash[:publication_year_isi]
70
- else
71
- # turn B.C. into -yyy for display fields
72
- doc_hash[:creation_year_isi] = '-' + (1000 + doc_hash[:creation_year_isi].to_i).to_s if doc_hash[:creation_year_isi]
73
- doc_hash[:publication_year_isi] = '-' + (1000 + doc_hash[:publication_year_isi].to_i).to_s if doc_hash[:publication_year_isi]
74
- end
75
-
76
- doc_hash
64
+ more_pub_date_goodness(doc_hash)
77
65
  end
78
66
 
79
67
  private
@@ -88,6 +76,34 @@ module GDor::Indexer::ModsFields
88
76
  vals
89
77
  end
90
78
 
79
+ # additional pub date field processing for Solr doc hash
80
+ # @param [Hash] Hash representing the Solr document
81
+ # @return [Hash] updated Hash representing the Solr document
82
+ def more_pub_date_goodness(doc_hash)
83
+ pub_date_sort_val = doc_hash[:pub_date_sort]
84
+ if is_positive_int? pub_date_sort_val
85
+ doc_hash[:pub_year_tisim] = pub_date_sort_val # for date slider
86
+ # remove leading zeros
87
+ [:pub_year_no_approx_isi,
88
+ :pub_year_w_approx_isi,
89
+ :creation_year_isi,
90
+ :publication_year_isi
91
+ ].each do |field_sym|
92
+ doc_hash[field_sym] = remove_leading_zeros(doc_hash[field_sym]) if doc_hash[field_sym]
93
+ end
94
+ else
95
+ # turn B.C. into -yyy for facet/display fields
96
+ [:pub_year_no_approx_isi,
97
+ :pub_year_w_approx_isi,
98
+ :creation_year_isi,
99
+ :publication_year_isi
100
+ ].each do |field_sym|
101
+ doc_hash[field_sym] = '-' + (1000 + doc_hash[field_sym].to_i).to_s if doc_hash[field_sym]
102
+ end
103
+ end
104
+ doc_hash
105
+ end
106
+
91
107
  # @return true if the string parses into an int, and if so, the int is >= 0
92
108
  def is_positive_int?(str)
93
109
  str.to_i >= 0
@@ -1,5 +1,5 @@
1
1
  module GDor
2
2
  class Indexer
3
- VERSION = '0.4.0'
3
+ VERSION = '0.4.1'
4
4
  end
5
5
  end
@@ -120,14 +120,14 @@ describe GDor::Indexer do
120
120
  @indexer.index collection
121
121
  end
122
122
 
123
- it 'indexs other resources as items' do
123
+ it 'indexes other resources as items' do
124
124
  expect(@indexer).to receive(:item_solr_document).with(resource)
125
125
  @indexer.index resource
126
126
  end
127
127
  end
128
128
 
129
129
  describe '#index_with_exception_handling' do
130
- it 'capture,s log, and re-raise any exception thrown by the indexing process' do
130
+ it 'captures log and re-raises any exception thrown by the indexing process' do
131
131
  expect(@indexer).to receive(:index).with(resource).and_raise 'xyz'
132
132
  expect(@indexer.logger).to receive(:error)
133
133
  expect { @indexer.index_with_exception_handling(resource) }.to raise_error RuntimeError
@@ -433,7 +433,7 @@ describe GDor::Indexer do
433
433
  end
434
434
 
435
435
  # context "skip heartbeat" do
436
- # it "allows me to use a fake url for dor-fetcher-client" do
436
+ # it "allows use of a fake url for dor-fetcher-client" do
437
437
  # expect {GDor::Indexer.new(@config_yml_path)}.not_to raise_error
438
438
  # end
439
439
  # end
@@ -33,9 +33,51 @@ describe GDor::Indexer::ModsFields do
33
33
  end
34
34
  end
35
35
 
36
+ context 'pub_date (to know current behavior)' do
37
+ it 'calls Stanford::Mods::Record instance pub_date_sortable_string(false)' do
38
+ expect(sdb.smods_rec).to receive(:pub_date_facet)
39
+ sdb.doc_hash_from_mods[:pub_date]
40
+ end
41
+ it 'includes approx dates' do
42
+ m = mods_origin_info_start_str +
43
+ "<dateIssued qualifier='approximate'>1945</dateIssued>" +
44
+ mods_origin_info_end_str
45
+ sdb = sdb_for_mods(m)
46
+ expect(sdb.doc_hash_from_mods[:pub_date]).to eq('1945')
47
+ end
48
+ it 'takes single dateCreated' do
49
+ m = mods_origin_info_start_str +
50
+ "<dateCreated>1904</dateCreated>" +
51
+ mods_origin_info_end_str
52
+ sdb = sdb_for_mods(m)
53
+ expect(sdb.doc_hash_from_mods[:pub_date]).to eq('1904')
54
+ end
55
+ it_behaves_like 'expected', :pub_date, 'blah blah 1945 blah', '1945'
56
+ it_behaves_like 'expected', :pub_date, '1945', '1945'
57
+ it_behaves_like 'expected', :pub_date, '945', '945'
58
+ it_behaves_like 'expected', :pub_date, '66', nil
59
+ it_behaves_like 'expected', :pub_date, '5', nil
60
+ it_behaves_like 'expected', :pub_date, '0', nil
61
+ it_behaves_like 'expected', :pub_date, '-4', nil
62
+ it_behaves_like 'expected', :pub_date, '-15', nil
63
+ it_behaves_like 'expected', :pub_date, '-666', '666' # WRONG
64
+ it_behaves_like 'expected', :pub_date, '16--', nil
65
+ it_behaves_like 'expected', :pub_date, '8--', nil
66
+ it_behaves_like 'expected', :pub_date, '19th century', '19th century'
67
+ it_behaves_like 'expected', :pub_date, '9th century', '9th century'
68
+ it_behaves_like 'expected', :pub_date, '300 B.C.', '300 B.C.'
69
+ it_behaves_like 'expected', :pub_date, 'Text dated June 4, 1594; miniatures added by 1596', '1594'
70
+ it_behaves_like 'expected', :pub_date, 'Aug. 3rd, 1886', '1886'
71
+ it_behaves_like 'expected', :pub_date, 'Aug. 3rd, [18]86?', '1886'
72
+ it_behaves_like 'expected', :pub_date, 'early 1890s', '1890'
73
+ it_behaves_like 'expected', :pub_date, '1865-6', '1865'
74
+
75
+ end
76
+
36
77
  context 'pub_date_sort' do
37
78
  it 'calls Stanford::Mods::Record instance pub_date_sortable_string(false)' do
38
79
  expect(sdb.smods_rec).to receive(:pub_date_sortable_string).with(false)
80
+ allow(sdb.smods_rec).to receive(:pub_date_sortable_string).with(true) # for pub_year_no_approx_isi
39
81
  sdb.doc_hash_from_mods[:pub_date_sort]
40
82
  end
41
83
  it 'includes approx dates' do
@@ -78,30 +120,36 @@ describe GDor::Indexer::ModsFields do
78
120
  end
79
121
  it 'pub_year_no_approx_isi calls Stanford::Mods::Record instance pub_date_facet_single_value(true)' do
80
122
  sdb = sdb_for_mods(mods)
81
- expect(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(true).and_call_original
82
- allow(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(false) # for other flavor
123
+ expect(sdb.smods_rec).to receive(:pub_date_sortable_string).with(true).and_call_original
124
+ allow(sdb.smods_rec).to receive(:pub_date_sortable_string).with(false) # for other flavor
83
125
  expect(sdb.doc_hash_from_mods[:pub_year_no_approx_isi]).to eq '2000'
84
126
  end
85
127
  it 'pub_year_w_approx_isi calls Stanford::Mods::Record instance pub_date_facet_single_value(false)' do
86
128
  sdb = sdb_for_mods(mods)
87
- expect(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(false).and_call_original
88
- allow(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(true) # for other flavor
129
+ expect(sdb.smods_rec).to receive(:pub_date_sortable_string).with(false).and_call_original
130
+ allow(sdb.smods_rec).to receive(:pub_date_sortable_string).with(true) # for other flavor
89
131
  expect(sdb.doc_hash_from_mods[:pub_year_w_approx_isi]).to eq '1500'
90
132
  end
91
133
  RSpec.shared_examples "single pub year facet" do |field_sym|
134
+ it_behaves_like 'expected', field_sym, 'blah blah 1945 blah', '1945'
92
135
  it_behaves_like 'expected', field_sym, '1945', '1945'
93
136
  it_behaves_like 'expected', field_sym, '945', '945'
94
137
  it_behaves_like 'expected', field_sym, '66', '66'
95
138
  it_behaves_like 'expected', field_sym, '5', '5'
96
139
  it_behaves_like 'expected', field_sym, '0', '0'
97
- it_behaves_like 'expected', field_sym, '-4', '4 B.C.'
98
- it_behaves_like 'expected', field_sym, '-15', '15 B.C.'
99
- it_behaves_like 'expected', field_sym, '-666', '666 B.C.'
100
- it_behaves_like 'expected', field_sym, '16--', '17th century'
101
- it_behaves_like 'expected', field_sym, '8--', '9th century'
102
- it_behaves_like 'expected', field_sym, '19th century', '19th century'
103
- it_behaves_like 'expected', field_sym, '9th century', '9th century'
104
- it_behaves_like 'expected', field_sym, '300 B.C.', '300 B.C.'
140
+ it_behaves_like 'expected', field_sym, '-4', '-4'
141
+ it_behaves_like 'expected', field_sym, '-15', '-15'
142
+ it_behaves_like 'expected', field_sym, '-666', '-666'
143
+ it_behaves_like 'expected', field_sym, '16--', '1600'
144
+ it_behaves_like 'expected', field_sym, '8--', '800'
145
+ it_behaves_like 'expected', field_sym, '19th century', '1800'
146
+ it_behaves_like 'expected', field_sym, '9th century', '800'
147
+ it_behaves_like 'expected', field_sym, '300 B.C.', '-300'
148
+ it_behaves_like 'expected', field_sym, 'Text dated June 4, 1594; miniatures added by 1596', '1594'
149
+ it_behaves_like 'expected', field_sym, 'Aug. 3rd, 1886', '1886'
150
+ it_behaves_like 'expected', field_sym, 'Aug. 3rd, [18]86?', '1886'
151
+ it_behaves_like 'expected', field_sym, 'early 1890s', '1890'
152
+ it_behaves_like 'expected', field_sym, '1865-6', '1865'
105
153
  end
106
154
  it_behaves_like "single pub year facet", :pub_year_no_approx_isi
107
155
  it_behaves_like "single pub year facet", :pub_year_w_approx_isi
@@ -113,6 +161,7 @@ describe GDor::Indexer::ModsFields do
113
161
  # FIXME: it should be using a method approp for date slider values, not single value
114
162
  it 'pub_year_tisim calls Stanford::Mods::Record instance pub_date_sortable_string(false)' do
115
163
  expect(sdb.smods_rec).to receive(:pub_date_sortable_string).with(false)
164
+ allow(sdb.smods_rec).to receive(:pub_date_sortable_string).with(true) # for pub_year_no_approx_isi
116
165
  sdb.doc_hash_from_mods[:pub_year_tisim]
117
166
  end
118
167
  it 'includes approx dates' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-01-06 00:00:00.000000000 Z
13
+ date: 2016-01-07 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: harvestdor-indexer
@@ -264,34 +264,6 @@ dependencies:
264
264
  - - "~>"
265
265
  - !ruby/object:Gem::Version
266
266
  version: '0.5'
267
- - !ruby/object:Gem::Dependency
268
- name: capybara
269
- requirement: !ruby/object:Gem::Requirement
270
- requirements:
271
- - - ">="
272
- - !ruby/object:Gem::Version
273
- version: '0'
274
- type: :development
275
- prerelease: false
276
- version_requirements: !ruby/object:Gem::Requirement
277
- requirements:
278
- - - ">="
279
- - !ruby/object:Gem::Version
280
- version: '0'
281
- - !ruby/object:Gem::Dependency
282
- name: poltergeist
283
- requirement: !ruby/object:Gem::Requirement
284
- requirements:
285
- - - ">="
286
- - !ruby/object:Gem::Version
287
- version: 1.5.0
288
- type: :development
289
- prerelease: false
290
- version_requirements: !ruby/object:Gem::Requirement
291
- requirements:
292
- - - ">="
293
- - !ruby/object:Gem::Version
294
- version: 1.5.0
295
267
  - !ruby/object:Gem::Dependency
296
268
  name: vcr
297
269
  requirement: !ruby/object:Gem::Requirement
@@ -306,20 +278,6 @@ dependencies:
306
278
  - - ">="
307
279
  - !ruby/object:Gem::Version
308
280
  version: '0'
309
- - !ruby/object:Gem::Dependency
310
- name: jettywrapper
311
- requirement: !ruby/object:Gem::Requirement
312
- requirements:
313
- - - ">="
314
- - !ruby/object:Gem::Version
315
- version: '0'
316
- type: :development
317
- prerelease: false
318
- version_requirements: !ruby/object:Gem::Requirement
319
- requirements:
320
- - - ">="
321
- - !ruby/object:Gem::Version
322
- version: '0'
323
281
  - !ruby/object:Gem::Dependency
324
282
  name: webmock
325
283
  requirement: !ruby/object:Gem::Requirement
@@ -408,7 +366,7 @@ rubyforge_project:
408
366
  rubygems_version: 2.4.6
409
367
  signing_key:
410
368
  specification_version: 4
411
- summary: Gryphondor Solr indexing logic
369
+ summary: PURL doc => Solr hash logic
412
370
  test_files:
413
371
  - spec/config/walters_integration_spec.yml
414
372
  - spec/spec_helper.rb