gdor-indexer 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b7d3fef57a3dcc4df36487bead9f96a691ae385d
4
- data.tar.gz: f724d1e3bfc0aee77798a10feb3c06939b29c30a
3
+ metadata.gz: b01625bc9ae63bd07de7edbee92754e2d5c3ce82
4
+ data.tar.gz: 9586786a5f096aeaf9a7cacac5aa4d07a846ac38
5
5
  SHA512:
6
- metadata.gz: bbed1557cd19168a20ca5c26656186206ef80ea83817715fea27fb79218f45ece95f24186f9eeb4fdb17c87b2ccf147556398dcba2fe5bed02db6983951dbe7d
7
- data.tar.gz: 578889a4be5005e8a6504313e46b082685a393937a46cd4fe4315756b8da1191ef7a383ec122b5586f6da6ab76ba3157ee325fedf9c87cf67edb24d1cacf0e67
6
+ metadata.gz: c82da4f86166c0864a0ad5ed969304b8ee8a357d07efa71738d7a7ce50976cb509568ff812b9ca3b28a44d514237a37ff69eb43a8a617cd28e6aadf2e6794378
7
+ data.tar.gz: 9bc2a66dbf0652206ca609b5658eebf0d80d32695c0ccf03cace3bf58506650dc212f7b49e09bf6ecfb50c2f6f1c5a5389a913345318e1bf3b212f74b0dcdad9
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2016-01-06 15:25:10 -0800 using RuboCop version 0.34.2.
3
+ # on 2016-01-07 14:25:59 -0800 using RuboCop version 0.34.2.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -24,9 +24,9 @@ Lint/UselessAssignment:
24
24
  - 'spec/unit/indexer_spec.rb'
25
25
  - 'spec/unit/solr_doc_builder_spec.rb'
26
26
 
27
- # Offense count: 21
27
+ # Offense count: 22
28
28
  Metrics/AbcSize:
29
- Max: 108
29
+ Max: 82
30
30
 
31
31
  # Offense count: 1
32
32
  # Configuration parameters: CountComments.
@@ -37,15 +37,15 @@ Metrics/ClassLength:
37
37
  Metrics/CyclomaticComplexity:
38
38
  Max: 9
39
39
 
40
- # Offense count: 307
40
+ # Offense count: 309
41
41
  # Configuration parameters: AllowURI, URISchemes.
42
42
  Metrics/LineLength:
43
43
  Max: 258
44
44
 
45
- # Offense count: 15
45
+ # Offense count: 16
46
46
  # Configuration parameters: CountComments.
47
47
  Metrics/MethodLength:
48
- Max: 51
48
+ Max: 43
49
49
 
50
50
  # Offense count: 3
51
51
  Metrics/PerceivedComplexity:
@@ -70,6 +70,12 @@ RSpec/InstanceVariable:
70
70
  - 'spec/unit/public_xml_fields_spec.rb'
71
71
  - 'spec/unit/solr_doc_builder_spec.rb'
72
72
 
73
+ # Offense count: 6
74
+ # Cop supports --auto-correct.
75
+ Style/AlignArray:
76
+ Exclude:
77
+ - 'lib/gdor/indexer/mods_fields.rb'
78
+
73
79
  # Offense count: 7
74
80
  # Cop supports --auto-correct.
75
81
  # Configuration parameters: EnforcedStyle, SupportedStyles.
@@ -100,7 +106,7 @@ Style/DoubleNegation:
100
106
  Exclude:
101
107
  - 'lib/gdor/indexer/solr_doc_hash.rb'
102
108
 
103
- # Offense count: 1
109
+ # Offense count: 2
104
110
  # Cop supports --auto-correct.
105
111
  # Configuration parameters: EnforcedStyle, SupportedStyles.
106
112
  Style/EmptyLinesAroundBlockBody:
@@ -113,7 +119,7 @@ Style/GuardClause:
113
119
  Exclude:
114
120
  - 'lib/gdor/indexer.rb'
115
121
 
116
- # Offense count: 12
122
+ # Offense count: 14
117
123
  # Cop supports --auto-correct.
118
124
  # Configuration parameters: EnforcedStyle, SupportedStyles.
119
125
  Style/MultilineOperationIndentation:
data/Rakefile CHANGED
@@ -18,10 +18,10 @@ end
18
18
  # sh 'irb -rubygems -I lib -r ./frda_indexer.rb'
19
19
  # end
20
20
 
21
- task default: [:ci, :rubocop]
21
+ task default: :ci
22
22
 
23
- desc 'run continuous integration suite (tests, coverage, docs)'
24
- task ci: [:rspec, :doc, :rubocop]
23
+ desc 'run continuous integration suite'
24
+ task ci: [:rspec, :rubocop]
25
25
 
26
26
  task spec: :rspec
27
27
 
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
8
8
  spec.version = GDor::Indexer::VERSION
9
9
  spec.authors = ['Naomi Dushay', 'Laney McGlohon', 'Chris Beer']
10
10
  spec.email = ['cabeer@stanford.edu']
11
- spec.summary = 'Gryphondor Solr indexing logic'
11
+ spec.summary = 'PURL doc => Solr hash logic'
12
12
  spec.homepage = 'https://github.com/sul-dlss/gdor-indexer'
13
13
  spec.license = 'Apache 2'
14
14
 
@@ -35,10 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.add_development_dependency 'rubocop-rspec'
36
36
  spec.add_development_dependency 'simplecov'
37
37
  spec.add_development_dependency 'equivalent-xml', '~> 0.5'
38
- spec.add_development_dependency 'capybara'
39
- spec.add_development_dependency 'poltergeist', '>= 1.5.0'
40
38
  spec.add_development_dependency 'vcr'
41
- spec.add_development_dependency 'jettywrapper'
42
39
  spec.add_development_dependency 'webmock'
43
40
  spec.add_development_dependency 'pry-byebug'
44
41
  end
@@ -4,6 +4,7 @@ module GDor::Indexer::ModsFields
4
4
  # Create a Hash representing a Solr doc, with all MODS related fields populated.
5
5
  # @return [Hash] Hash representing the Solr document
6
6
  def doc_hash_from_mods
7
+ sort_str_w_approx_dates = smods_rec.pub_date_sortable_string(false)
7
8
  doc_hash = {
8
9
  # title fields
9
10
  title_245a_search: smods_rec.sw_short_title,
@@ -45,10 +46,10 @@ module GDor::Indexer::ModsFields
45
46
 
46
47
  # publication fields
47
48
  pub_search: smods_rec.place,
48
- pub_date_sort: smods_rec.pub_date_sortable_string(false), # include approx dates
49
+ pub_date_sort: sort_str_w_approx_dates,
49
50
  # these are for single value facet display (in leiu of date slider (pub_year_tisim) and deprecated pub_date)
50
- pub_year_no_approx_isi: smods_rec.pub_date_facet_single_value(true),
51
- pub_year_w_approx_isi: smods_rec.pub_date_facet_single_value(false),
51
+ pub_year_no_approx_isi: smods_rec.pub_date_sortable_string(true),
52
+ pub_year_w_approx_isi: sort_str_w_approx_dates,
52
53
  # TODO: remove pub_date after reindexing existing colls; deprecated in favor of pub_year_xxx_approx_isi ...
53
54
  pub_date: smods_rec.pub_date_facet,
54
55
  # display fields
@@ -60,20 +61,7 @@ module GDor::Indexer::ModsFields
60
61
  all_search: smods_rec.text.gsub(/\s+/, ' ')
61
62
  }
62
63
 
63
- # more pub date field processing
64
- pub_date_sort_val = doc_hash[:pub_date_sort]
65
- if is_positive_int? pub_date_sort_val
66
- doc_hash[:pub_year_tisim] = pub_date_sort_val # for date slider
67
- # remove leading zeros
68
- doc_hash[:creation_year_isi] = remove_leading_zeros(doc_hash[:creation_year_isi]) if doc_hash[:creation_year_isi]
69
- doc_hash[:publication_year_isi] = remove_leading_zeros(doc_hash[:publication_year_isi]) if doc_hash[:publication_year_isi]
70
- else
71
- # turn B.C. into -yyy for display fields
72
- doc_hash[:creation_year_isi] = '-' + (1000 + doc_hash[:creation_year_isi].to_i).to_s if doc_hash[:creation_year_isi]
73
- doc_hash[:publication_year_isi] = '-' + (1000 + doc_hash[:publication_year_isi].to_i).to_s if doc_hash[:publication_year_isi]
74
- end
75
-
76
- doc_hash
64
+ more_pub_date_goodness(doc_hash)
77
65
  end
78
66
 
79
67
  private
@@ -88,6 +76,34 @@ module GDor::Indexer::ModsFields
88
76
  vals
89
77
  end
90
78
 
79
+ # additional pub date field processing for Solr doc hash
80
+ # @param [Hash] Hash representing the Solr document
81
+ # @return [Hash] updated Hash representing the Solr document
82
+ def more_pub_date_goodness(doc_hash)
83
+ pub_date_sort_val = doc_hash[:pub_date_sort]
84
+ if is_positive_int? pub_date_sort_val
85
+ doc_hash[:pub_year_tisim] = pub_date_sort_val # for date slider
86
+ # remove leading zeros
87
+ [:pub_year_no_approx_isi,
88
+ :pub_year_w_approx_isi,
89
+ :creation_year_isi,
90
+ :publication_year_isi
91
+ ].each do |field_sym|
92
+ doc_hash[field_sym] = remove_leading_zeros(doc_hash[field_sym]) if doc_hash[field_sym]
93
+ end
94
+ else
95
+ # turn B.C. into -yyy for facet/display fields
96
+ [:pub_year_no_approx_isi,
97
+ :pub_year_w_approx_isi,
98
+ :creation_year_isi,
99
+ :publication_year_isi
100
+ ].each do |field_sym|
101
+ doc_hash[field_sym] = '-' + (1000 + doc_hash[field_sym].to_i).to_s if doc_hash[field_sym]
102
+ end
103
+ end
104
+ doc_hash
105
+ end
106
+
91
107
  # @return true if the string parses into an int, and if so, the int is >= 0
92
108
  def is_positive_int?(str)
93
109
  str.to_i >= 0
@@ -1,5 +1,5 @@
1
1
  module GDor
2
2
  class Indexer
3
- VERSION = '0.4.0'
3
+ VERSION = '0.4.1'
4
4
  end
5
5
  end
@@ -120,14 +120,14 @@ describe GDor::Indexer do
120
120
  @indexer.index collection
121
121
  end
122
122
 
123
- it 'indexs other resources as items' do
123
+ it 'indexes other resources as items' do
124
124
  expect(@indexer).to receive(:item_solr_document).with(resource)
125
125
  @indexer.index resource
126
126
  end
127
127
  end
128
128
 
129
129
  describe '#index_with_exception_handling' do
130
- it 'capture,s log, and re-raise any exception thrown by the indexing process' do
130
+ it 'captures log and re-raises any exception thrown by the indexing process' do
131
131
  expect(@indexer).to receive(:index).with(resource).and_raise 'xyz'
132
132
  expect(@indexer.logger).to receive(:error)
133
133
  expect { @indexer.index_with_exception_handling(resource) }.to raise_error RuntimeError
@@ -433,7 +433,7 @@ describe GDor::Indexer do
433
433
  end
434
434
 
435
435
  # context "skip heartbeat" do
436
- # it "allows me to use a fake url for dor-fetcher-client" do
436
+ # it "allows use of a fake url for dor-fetcher-client" do
437
437
  # expect {GDor::Indexer.new(@config_yml_path)}.not_to raise_error
438
438
  # end
439
439
  # end
@@ -33,9 +33,51 @@ describe GDor::Indexer::ModsFields do
33
33
  end
34
34
  end
35
35
 
36
+ context 'pub_date (to know current behavior)' do
37
+ it 'calls Stanford::Mods::Record instance pub_date_sortable_string(false)' do
38
+ expect(sdb.smods_rec).to receive(:pub_date_facet)
39
+ sdb.doc_hash_from_mods[:pub_date]
40
+ end
41
+ it 'includes approx dates' do
42
+ m = mods_origin_info_start_str +
43
+ "<dateIssued qualifier='approximate'>1945</dateIssued>" +
44
+ mods_origin_info_end_str
45
+ sdb = sdb_for_mods(m)
46
+ expect(sdb.doc_hash_from_mods[:pub_date]).to eq('1945')
47
+ end
48
+ it 'takes single dateCreated' do
49
+ m = mods_origin_info_start_str +
50
+ "<dateCreated>1904</dateCreated>" +
51
+ mods_origin_info_end_str
52
+ sdb = sdb_for_mods(m)
53
+ expect(sdb.doc_hash_from_mods[:pub_date]).to eq('1904')
54
+ end
55
+ it_behaves_like 'expected', :pub_date, 'blah blah 1945 blah', '1945'
56
+ it_behaves_like 'expected', :pub_date, '1945', '1945'
57
+ it_behaves_like 'expected', :pub_date, '945', '945'
58
+ it_behaves_like 'expected', :pub_date, '66', nil
59
+ it_behaves_like 'expected', :pub_date, '5', nil
60
+ it_behaves_like 'expected', :pub_date, '0', nil
61
+ it_behaves_like 'expected', :pub_date, '-4', nil
62
+ it_behaves_like 'expected', :pub_date, '-15', nil
63
+ it_behaves_like 'expected', :pub_date, '-666', '666' # WRONG
64
+ it_behaves_like 'expected', :pub_date, '16--', nil
65
+ it_behaves_like 'expected', :pub_date, '8--', nil
66
+ it_behaves_like 'expected', :pub_date, '19th century', '19th century'
67
+ it_behaves_like 'expected', :pub_date, '9th century', '9th century'
68
+ it_behaves_like 'expected', :pub_date, '300 B.C.', '300 B.C.'
69
+ it_behaves_like 'expected', :pub_date, 'Text dated June 4, 1594; miniatures added by 1596', '1594'
70
+ it_behaves_like 'expected', :pub_date, 'Aug. 3rd, 1886', '1886'
71
+ it_behaves_like 'expected', :pub_date, 'Aug. 3rd, [18]86?', '1886'
72
+ it_behaves_like 'expected', :pub_date, 'early 1890s', '1890'
73
+ it_behaves_like 'expected', :pub_date, '1865-6', '1865'
74
+
75
+ end
76
+
36
77
  context 'pub_date_sort' do
37
78
  it 'calls Stanford::Mods::Record instance pub_date_sortable_string(false)' do
38
79
  expect(sdb.smods_rec).to receive(:pub_date_sortable_string).with(false)
80
+ allow(sdb.smods_rec).to receive(:pub_date_sortable_string).with(true) # for pub_year_no_approx_isi
39
81
  sdb.doc_hash_from_mods[:pub_date_sort]
40
82
  end
41
83
  it 'includes approx dates' do
@@ -78,30 +120,36 @@ describe GDor::Indexer::ModsFields do
78
120
  end
79
121
  it 'pub_year_no_approx_isi calls Stanford::Mods::Record instance pub_date_facet_single_value(true)' do
80
122
  sdb = sdb_for_mods(mods)
81
- expect(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(true).and_call_original
82
- allow(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(false) # for other flavor
123
+ expect(sdb.smods_rec).to receive(:pub_date_sortable_string).with(true).and_call_original
124
+ allow(sdb.smods_rec).to receive(:pub_date_sortable_string).with(false) # for other flavor
83
125
  expect(sdb.doc_hash_from_mods[:pub_year_no_approx_isi]).to eq '2000'
84
126
  end
85
127
  it 'pub_year_w_approx_isi calls Stanford::Mods::Record instance pub_date_facet_single_value(false)' do
86
128
  sdb = sdb_for_mods(mods)
87
- expect(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(false).and_call_original
88
- allow(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(true) # for other flavor
129
+ expect(sdb.smods_rec).to receive(:pub_date_sortable_string).with(false).and_call_original
130
+ allow(sdb.smods_rec).to receive(:pub_date_sortable_string).with(true) # for other flavor
89
131
  expect(sdb.doc_hash_from_mods[:pub_year_w_approx_isi]).to eq '1500'
90
132
  end
91
133
  RSpec.shared_examples "single pub year facet" do |field_sym|
134
+ it_behaves_like 'expected', field_sym, 'blah blah 1945 blah', '1945'
92
135
  it_behaves_like 'expected', field_sym, '1945', '1945'
93
136
  it_behaves_like 'expected', field_sym, '945', '945'
94
137
  it_behaves_like 'expected', field_sym, '66', '66'
95
138
  it_behaves_like 'expected', field_sym, '5', '5'
96
139
  it_behaves_like 'expected', field_sym, '0', '0'
97
- it_behaves_like 'expected', field_sym, '-4', '4 B.C.'
98
- it_behaves_like 'expected', field_sym, '-15', '15 B.C.'
99
- it_behaves_like 'expected', field_sym, '-666', '666 B.C.'
100
- it_behaves_like 'expected', field_sym, '16--', '17th century'
101
- it_behaves_like 'expected', field_sym, '8--', '9th century'
102
- it_behaves_like 'expected', field_sym, '19th century', '19th century'
103
- it_behaves_like 'expected', field_sym, '9th century', '9th century'
104
- it_behaves_like 'expected', field_sym, '300 B.C.', '300 B.C.'
140
+ it_behaves_like 'expected', field_sym, '-4', '-4'
141
+ it_behaves_like 'expected', field_sym, '-15', '-15'
142
+ it_behaves_like 'expected', field_sym, '-666', '-666'
143
+ it_behaves_like 'expected', field_sym, '16--', '1600'
144
+ it_behaves_like 'expected', field_sym, '8--', '800'
145
+ it_behaves_like 'expected', field_sym, '19th century', '1800'
146
+ it_behaves_like 'expected', field_sym, '9th century', '800'
147
+ it_behaves_like 'expected', field_sym, '300 B.C.', '-300'
148
+ it_behaves_like 'expected', field_sym, 'Text dated June 4, 1594; miniatures added by 1596', '1594'
149
+ it_behaves_like 'expected', field_sym, 'Aug. 3rd, 1886', '1886'
150
+ it_behaves_like 'expected', field_sym, 'Aug. 3rd, [18]86?', '1886'
151
+ it_behaves_like 'expected', field_sym, 'early 1890s', '1890'
152
+ it_behaves_like 'expected', field_sym, '1865-6', '1865'
105
153
  end
106
154
  it_behaves_like "single pub year facet", :pub_year_no_approx_isi
107
155
  it_behaves_like "single pub year facet", :pub_year_w_approx_isi
@@ -113,6 +161,7 @@ describe GDor::Indexer::ModsFields do
113
161
  # FIXME: it should be using a method approp for date slider values, not single value
114
162
  it 'pub_year_tisim calls Stanford::Mods::Record instance pub_date_sortable_string(false)' do
115
163
  expect(sdb.smods_rec).to receive(:pub_date_sortable_string).with(false)
164
+ allow(sdb.smods_rec).to receive(:pub_date_sortable_string).with(true) # for pub_year_no_approx_isi
116
165
  sdb.doc_hash_from_mods[:pub_year_tisim]
117
166
  end
118
167
  it 'includes approx dates' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-01-06 00:00:00.000000000 Z
13
+ date: 2016-01-07 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: harvestdor-indexer
@@ -264,34 +264,6 @@ dependencies:
264
264
  - - "~>"
265
265
  - !ruby/object:Gem::Version
266
266
  version: '0.5'
267
- - !ruby/object:Gem::Dependency
268
- name: capybara
269
- requirement: !ruby/object:Gem::Requirement
270
- requirements:
271
- - - ">="
272
- - !ruby/object:Gem::Version
273
- version: '0'
274
- type: :development
275
- prerelease: false
276
- version_requirements: !ruby/object:Gem::Requirement
277
- requirements:
278
- - - ">="
279
- - !ruby/object:Gem::Version
280
- version: '0'
281
- - !ruby/object:Gem::Dependency
282
- name: poltergeist
283
- requirement: !ruby/object:Gem::Requirement
284
- requirements:
285
- - - ">="
286
- - !ruby/object:Gem::Version
287
- version: 1.5.0
288
- type: :development
289
- prerelease: false
290
- version_requirements: !ruby/object:Gem::Requirement
291
- requirements:
292
- - - ">="
293
- - !ruby/object:Gem::Version
294
- version: 1.5.0
295
267
  - !ruby/object:Gem::Dependency
296
268
  name: vcr
297
269
  requirement: !ruby/object:Gem::Requirement
@@ -306,20 +278,6 @@ dependencies:
306
278
  - - ">="
307
279
  - !ruby/object:Gem::Version
308
280
  version: '0'
309
- - !ruby/object:Gem::Dependency
310
- name: jettywrapper
311
- requirement: !ruby/object:Gem::Requirement
312
- requirements:
313
- - - ">="
314
- - !ruby/object:Gem::Version
315
- version: '0'
316
- type: :development
317
- prerelease: false
318
- version_requirements: !ruby/object:Gem::Requirement
319
- requirements:
320
- - - ">="
321
- - !ruby/object:Gem::Version
322
- version: '0'
323
281
  - !ruby/object:Gem::Dependency
324
282
  name: webmock
325
283
  requirement: !ruby/object:Gem::Requirement
@@ -408,7 +366,7 @@ rubyforge_project:
408
366
  rubygems_version: 2.4.6
409
367
  signing_key:
410
368
  specification_version: 4
411
- summary: Gryphondor Solr indexing logic
369
+ summary: PURL doc => Solr hash logic
412
370
  test_files:
413
371
  - spec/config/walters_integration_spec.yml
414
372
  - spec/spec_helper.rb