spotlight-dor-resources 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b86fa8d9250353055a9f68632944a996fa4e0523
4
- data.tar.gz: d53448b9b23303463ed7db3c54d7f02bff763739
3
+ metadata.gz: 31130b0ef44659b04e226ce3f7d54afeb2fab9bd
4
+ data.tar.gz: 773201c4913261fb010cbd06a7bd027fb550acc1
5
5
  SHA512:
6
- metadata.gz: 7df884fe6f4617270b56b97dccb83630f6cae9ac2640c6a28d4a7763c702ae1e3b77f4ca300647b952b185fefd577096742ceedd4e17f584b34ffee1068ec676
7
- data.tar.gz: a9757073e8b1e7a92df0e2cc39d00ba8df3d028aed15c9f776a907c0b546b19405b08a46535742bd09700819f1e8480c673443ad59e0589912c6ce70beb07608
6
+ metadata.gz: dd69ded57b4a0daaa03b544218a95edb3b23a503ae54c56274f7f8cc06bb8949120f19af29b9047b81d743914dd2e601fbeb5b4a133edfe2718084457290a096
7
+ data.tar.gz: 90f34128fd6494c255f69f9cf4b430cdae7c060618ba648e99a982c3fdfdf2d41ade20b73d6febe48a8eae46b7dbf5823996b5469e25b756340a84fb217a2f04
data/.gitignore CHANGED
@@ -12,6 +12,7 @@ capybara-*.html
12
12
  /db/*.sqlite3
13
13
  /public/system/*
14
14
  /coverage/
15
+ /pkg/
15
16
  /spec/tmp/*
16
17
  **.orig
17
18
  rerun.txt
data/.rubocop.yml CHANGED
@@ -6,3 +6,7 @@ AllCops:
6
6
  Exclude:
7
7
  - 'spec/internal/**/*'
8
8
  - 'spec/test_app_templates/**/*'
9
+
10
+ Metrics/ClassLength:
11
+ Exclude:
12
+ - 'lib/spotlight/dor/indexer.rb'
data/.rubocop_todo.yml CHANGED
@@ -6,9 +6,6 @@
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 2
10
- Metrics/AbcSize:
11
- Max: 33
12
9
 
13
10
  # Offense count: 1
14
11
  # Configuration parameters: CountComments.
@@ -31,11 +31,12 @@ module Spotlight::Dor
31
31
  end
32
32
 
33
33
  # add fields from raw mods
34
- # see comment with add_donor_tags about Feigenbaum specific donor tags data
35
34
  before_index :add_box
35
+ # see comment with add_donor_tags about Feigenbaum specific donor tags data
36
36
  before_index :add_donor_tags
37
37
  before_index :add_genre
38
38
  before_index :add_folder
39
+ before_index :add_folder_name
39
40
  before_index :add_series
40
41
  before_index :mods_cartographics_indexing
41
42
 
@@ -72,7 +73,7 @@ module Spotlight::Dor
72
73
 
73
74
  # This new donor_tags_sim field was added in October 2015 specifically for the Feigenbaum exhibit. It is very likely
74
75
  # it will go ununsed by other projects, but should be benign (since this field will not be created if this specific MODs note is not found.)
75
- # Later refactoring could include project specific fields. Peter Mangiafico
76
+ # Later refactoring could include exhibit specific fields. Peter Mangiafico
76
77
  def add_donor_tags sdb, solr_doc
77
78
  donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
78
79
  insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
@@ -86,21 +87,30 @@ module Spotlight::Dor
86
87
  # _location.physicalLocation should find top level and relatedItem
87
88
  folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
88
89
  val = node.text
89
- # folder may be text with commas
90
- match_data = val.match(/Folder:? ?(.+)/i)
91
- next if match_data.blank?
92
- result = match_data[1].strip
93
- # Menuez collection may have folder followed by Sleeve then Frame
94
- match2_data = result.match(/(.*),? ?Sleeve/i)
95
- if match2_data
96
- match2_data[1].strip.sub(/,$/, '')
97
- else
98
- result
99
- end
90
+
91
+ match_data = if val =~ /\|/
92
+ # we assume the data is pipe-delimited, and may contain commas within values
93
+ val.match(/Folder ?:? ?([^|]+)/)
94
+ else
95
+ # the data should be comma-delimited, and may not contain commas within values
96
+ val.match(/Folder ?:? ?([^,]+)/)
97
+ end
98
+
99
+ match_data[1].strip if match_data.present?
100
100
  end
101
+
101
102
  solr_doc['folder_ssi'] = folder_num.first if folder_num.present?
102
103
  end
103
104
 
105
+ # add the folder name to solr_doc as folder_name_ssi field (note: single valued!)
106
+ # data is specific to Feigenbaum collection and is in <note type='preferred citation'>
107
+ def add_folder_name(sdb, solr_doc)
108
+ # see spec for data examples
109
+ preferred_citation = sdb.smods_rec.note.select { |n| n.type_at == 'preferred citation' }.map(&:content)
110
+ match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
111
+ solr_doc['folder_name_ssi'] = match_data[1].rstrip if match_data.present?
112
+ end
113
+
104
114
  # add plain MODS <genre> element data, not the SearchWorks genre values
105
115
  def add_genre sdb, solr_doc
106
116
  insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
@@ -121,6 +131,7 @@ module Spotlight::Dor
121
131
  solr_doc['series_ssi'] = series_num.first if series_num.present?
122
132
  end
123
133
 
134
+ # rubocop:disable Metrics/AbcSize
124
135
  def mods_cartographics_indexing sdb, solr_doc
125
136
  insert_field(solr_doc, "coordinates", Array(sdb.smods_rec.subject.cartographics.coordinates).map { |n| n.text }, :stored_searchable)
126
137
 
@@ -138,6 +149,7 @@ module Spotlight::Dor
138
149
  solr_doc["point_bbox"] << "#{minX} #{minY} #{maxX} #{maxY}"
139
150
  end
140
151
  end
152
+ # rubocop:enable Metrics/AbcSize
141
153
 
142
154
  def coord_to_decimal point
143
155
  regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
@@ -1,7 +1,7 @@
1
1
  module Spotlight
2
2
  module Dor
3
3
  module Resources
4
- VERSION = "0.0.1"
4
+ VERSION = "0.0.2"
5
5
  end
6
6
  end
7
7
  end
@@ -172,7 +172,9 @@ describe Spotlight::Dor::Indexer do
172
172
  'Flat-box 228 | Volume 1': nil,
173
173
  # shpc (actually in <relatedItem><location><physicalLocation>)
174
174
  'Series Biographical Photographs | Box 1 | Folder Abbot, Nathan': 'Abbot, Nathan',
175
- 'Series General Photographs | Box 1 | Folder Administration building--Outer Quad': 'Administration building--Outer Quad'
175
+ 'Series General Photographs | Box 1 | Folder Administration building--Outer Quad': 'Administration building--Outer Quad',
176
+ # hypothetical
177
+ 'Folder: 42, Sheet: 15': '42'
176
178
  }.each do |example, expected|
177
179
  describe "for example '#{example}'" do
178
180
  let(:example) { example }
@@ -197,4 +199,64 @@ describe Spotlight::Dor::Indexer do
197
199
  end # for example
198
200
  end # each
199
201
  end # add_folder
202
+
203
+ let(:mods_note_plain) do
204
+ Nokogiri::XML <<-EOF
205
+ <mods xmlns="#{Mods::MODS_NS}">
206
+ <note>#{example}</note>
207
+ </mods>
208
+ EOF
209
+ end
210
+ let(:mods_note_preferred_citation) do
211
+ Nokogiri::XML <<-EOF
212
+ <mods xmlns="#{Mods::MODS_NS}">
213
+ <note type="preferred citation">#{example}</note>
214
+ </mods>
215
+ EOF
216
+ end
217
+ describe "#add_folder_name" do
218
+ # example string as key, expected folder name as value
219
+ # all from feigenbaum (or based on feigenbaum), as that is only coll
220
+ {
221
+ 'Call Number: SC0340, Accession: 1986-052, Box: 20, Folder: 40, Title: S': 'S',
222
+ 'Call Number: SC0340, Accession: 1986-052, Box: 54, Folder: 25, Title: Balzer': 'Balzer',
223
+ 'Call Number: SC0340, Accession: 1986-052, Box : 30, Folder: 21, Title: Feigenbaum, Publications. 2 of 2.': 'Feigenbaum, Publications. 2 of 2.',
224
+ # colon in name
225
+ 'Call Number: SC0340, Accession 2005-101, Box: 10, Folder: 26, Title: Gordon Bell Letter rdf:about blah (AI) 1987': 'Gordon Bell Letter rdf:about blah (AI) 1987',
226
+ 'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 74, Title: Microcomputer Systems Proposal: blah blah': 'Microcomputer Systems Proposal: blah blah',
227
+ 'Call Number: SC0340, Accession 2005-101, Box: 14, Folder: 20, Title: blah "bleah: blargW^"ugh" seriously?.': 'blah "bleah: blargW^"ugh" seriously?.',
228
+ # quotes in name
229
+ 'Call Number: SC0340, Accession 2005-101, Box: 29, Folder: 18, Title: "bleah" blah': '"bleah" blah',
230
+ 'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 58, Title: "M": blah': '"M": blah',
231
+ 'Call Number: SC0340, Accession 2005-101, Box : 32A, Folder: 19, Title: blah "bleah" blue': 'blah "bleah" blue',
232
+ # not parseable
233
+ 'Call Number: SC0340, Accession 2005-101': nil,
234
+ 'Call Number: SC0340, Accession: 1986-052': nil,
235
+ 'Call Number: SC0340, Accession: 1986-052, Box 36 Folder 38': nil,
236
+ 'blah blah ... with the umbrella title Feigenbaum and Feldman, Computers and Thought II. blah blah': nil,
237
+ 'blah blah ... Title ... blah blah': nil
238
+ }.each do |example, expected|
239
+ describe "for example '#{example}'" do
240
+ let(:example) { example }
241
+ context 'in preferred citation note' do
242
+ before do
243
+ allow(r).to receive(:mods).and_return(mods_note_preferred_citation)
244
+ subject.send(:add_folder_name, sdb, solr_doc)
245
+ end
246
+ it "has the expected folder name '#{expected}'" do
247
+ expect(solr_doc['folder_name_ssi']).to eq expected
248
+ end
249
+ end
250
+ context 'in plain note' do
251
+ before do
252
+ allow(r).to receive(:mods).and_return(mods_note_plain)
253
+ subject.send(:add_folder_name, sdb, solr_doc)
254
+ end
255
+ it 'does not have a folder name' do
256
+ expect(solr_doc['folder_name_ssi']).to be_falsey
257
+ end
258
+ end
259
+ end # for example
260
+ end # each
261
+ end # add_folder_name
200
262
  end
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Chris Beer"]
10
10
  spec.email = ["cabeer@stanford.edu"]
11
11
  spec.summary = "Spotlight resource indexer for DOR resources."
12
- spec.homepage = ""
12
+ spec.homepage = "https://github.com/sul-dlss/spotlight-dor-resources"
13
13
  spec.license = "Apache 2"
14
14
 
15
15
  spec.files = `git ls-files -z`.split("\x0")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spotlight-dor-resources
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Beer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-29 00:00:00.000000000 Z
11
+ date: 2015-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -292,7 +292,7 @@ files:
292
292
  - spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml
293
293
  - spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml
294
294
  - spotlight-dor-resources.gemspec
295
- homepage: ''
295
+ homepage: https://github.com/sul-dlss/spotlight-dor-resources
296
296
  licenses:
297
297
  - Apache 2
298
298
  metadata: {}