spotlight-dor-resources 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b86fa8d9250353055a9f68632944a996fa4e0523
4
- data.tar.gz: d53448b9b23303463ed7db3c54d7f02bff763739
3
+ metadata.gz: 31130b0ef44659b04e226ce3f7d54afeb2fab9bd
4
+ data.tar.gz: 773201c4913261fb010cbd06a7bd027fb550acc1
5
5
  SHA512:
6
- metadata.gz: 7df884fe6f4617270b56b97dccb83630f6cae9ac2640c6a28d4a7763c702ae1e3b77f4ca300647b952b185fefd577096742ceedd4e17f584b34ffee1068ec676
7
- data.tar.gz: a9757073e8b1e7a92df0e2cc39d00ba8df3d028aed15c9f776a907c0b546b19405b08a46535742bd09700819f1e8480c673443ad59e0589912c6ce70beb07608
6
+ metadata.gz: dd69ded57b4a0daaa03b544218a95edb3b23a503ae54c56274f7f8cc06bb8949120f19af29b9047b81d743914dd2e601fbeb5b4a133edfe2718084457290a096
7
+ data.tar.gz: 90f34128fd6494c255f69f9cf4b430cdae7c060618ba648e99a982c3fdfdf2d41ade20b73d6febe48a8eae46b7dbf5823996b5469e25b756340a84fb217a2f04
data/.gitignore CHANGED
@@ -12,6 +12,7 @@ capybara-*.html
12
12
  /db/*.sqlite3
13
13
  /public/system/*
14
14
  /coverage/
15
+ /pkg/
15
16
  /spec/tmp/*
16
17
  **.orig
17
18
  rerun.txt
data/.rubocop.yml CHANGED
@@ -6,3 +6,7 @@ AllCops:
6
6
  Exclude:
7
7
  - 'spec/internal/**/*'
8
8
  - 'spec/test_app_templates/**/*'
9
+
10
+ Metrics/ClassLength:
11
+ Exclude:
12
+ - 'lib/spotlight/dor/indexer.rb'
data/.rubocop_todo.yml CHANGED
@@ -6,9 +6,6 @@
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 2
10
- Metrics/AbcSize:
11
- Max: 33
12
9
 
13
10
  # Offense count: 1
14
11
  # Configuration parameters: CountComments.
@@ -31,11 +31,12 @@ module Spotlight::Dor
31
31
  end
32
32
 
33
33
  # add fields from raw mods
34
- # see comment with add_donor_tags about Feigenbaum specific donor tags data
35
34
  before_index :add_box
35
+ # see comment with add_donor_tags about Feigenbaum specific donor tags data
36
36
  before_index :add_donor_tags
37
37
  before_index :add_genre
38
38
  before_index :add_folder
39
+ before_index :add_folder_name
39
40
  before_index :add_series
40
41
  before_index :mods_cartographics_indexing
41
42
 
@@ -72,7 +73,7 @@ module Spotlight::Dor
72
73
 
73
74
  # This new donor_tags_sim field was added in October 2015 specifically for the Feigenbaum exhibit. It is very likely
74
75
  # it will go ununsed by other projects, but should be benign (since this field will not be created if this specific MODs note is not found.)
75
- # Later refactoring could include project specific fields. Peter Mangiafico
76
+ # Later refactoring could include exhibit specific fields. Peter Mangiafico
76
77
  def add_donor_tags sdb, solr_doc
77
78
  donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
78
79
  insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
@@ -86,21 +87,30 @@ module Spotlight::Dor
86
87
  # _location.physicalLocation should find top level and relatedItem
87
88
  folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
88
89
  val = node.text
89
- # folder may be text with commas
90
- match_data = val.match(/Folder:? ?(.+)/i)
91
- next if match_data.blank?
92
- result = match_data[1].strip
93
- # Menuez collection may have folder followed by Sleeve then Frame
94
- match2_data = result.match(/(.*),? ?Sleeve/i)
95
- if match2_data
96
- match2_data[1].strip.sub(/,$/, '')
97
- else
98
- result
99
- end
90
+
91
+ match_data = if val =~ /\|/
92
+ # we assume the data is pipe-delimited, and may contain commas within values
93
+ val.match(/Folder ?:? ?([^|]+)/)
94
+ else
95
+ # the data should be comma-delimited, and may not contain commas within values
96
+ val.match(/Folder ?:? ?([^,]+)/)
97
+ end
98
+
99
+ match_data[1].strip if match_data.present?
100
100
  end
101
+
101
102
  solr_doc['folder_ssi'] = folder_num.first if folder_num.present?
102
103
  end
103
104
 
105
+ # add the folder name to solr_doc as folder_name_ssi field (note: single valued!)
106
+ # data is specific to Feigenbaum collection and is in <note type='preferred citation'>
107
+ def add_folder_name(sdb, solr_doc)
108
+ # see spec for data examples
109
+ preferred_citation = sdb.smods_rec.note.select { |n| n.type_at == 'preferred citation' }.map(&:content)
110
+ match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
111
+ solr_doc['folder_name_ssi'] = match_data[1].rstrip if match_data.present?
112
+ end
113
+
104
114
  # add plain MODS <genre> element data, not the SearchWorks genre values
105
115
  def add_genre sdb, solr_doc
106
116
  insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
@@ -121,6 +131,7 @@ module Spotlight::Dor
121
131
  solr_doc['series_ssi'] = series_num.first if series_num.present?
122
132
  end
123
133
 
134
+ # rubocop:disable Metrics/AbcSize
124
135
  def mods_cartographics_indexing sdb, solr_doc
125
136
  insert_field(solr_doc, "coordinates", Array(sdb.smods_rec.subject.cartographics.coordinates).map { |n| n.text }, :stored_searchable)
126
137
 
@@ -138,6 +149,7 @@ module Spotlight::Dor
138
149
  solr_doc["point_bbox"] << "#{minX} #{minY} #{maxX} #{maxY}"
139
150
  end
140
151
  end
152
+ # rubocop:enable Metrics/AbcSize
141
153
 
142
154
  def coord_to_decimal point
143
155
  regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
@@ -1,7 +1,7 @@
1
1
  module Spotlight
2
2
  module Dor
3
3
  module Resources
4
- VERSION = "0.0.1"
4
+ VERSION = "0.0.2"
5
5
  end
6
6
  end
7
7
  end
@@ -172,7 +172,9 @@ describe Spotlight::Dor::Indexer do
172
172
  'Flat-box 228 | Volume 1': nil,
173
173
  # shpc (actually in <relatedItem><location><physicalLocation>)
174
174
  'Series Biographical Photographs | Box 1 | Folder Abbot, Nathan': 'Abbot, Nathan',
175
- 'Series General Photographs | Box 1 | Folder Administration building--Outer Quad': 'Administration building--Outer Quad'
175
+ 'Series General Photographs | Box 1 | Folder Administration building--Outer Quad': 'Administration building--Outer Quad',
176
+ # hypothetical
177
+ 'Folder: 42, Sheet: 15': '42'
176
178
  }.each do |example, expected|
177
179
  describe "for example '#{example}'" do
178
180
  let(:example) { example }
@@ -197,4 +199,64 @@ describe Spotlight::Dor::Indexer do
197
199
  end # for example
198
200
  end # each
199
201
  end # add_folder
202
+
203
+ let(:mods_note_plain) do
204
+ Nokogiri::XML <<-EOF
205
+ <mods xmlns="#{Mods::MODS_NS}">
206
+ <note>#{example}</note>
207
+ </mods>
208
+ EOF
209
+ end
210
+ let(:mods_note_preferred_citation) do
211
+ Nokogiri::XML <<-EOF
212
+ <mods xmlns="#{Mods::MODS_NS}">
213
+ <note type="preferred citation">#{example}</note>
214
+ </mods>
215
+ EOF
216
+ end
217
+ describe "#add_folder_name" do
218
+ # example string as key, expected folder name as value
219
+ # all from feigenbaum (or based on feigenbaum), as that is only coll
220
+ {
221
+ 'Call Number: SC0340, Accession: 1986-052, Box: 20, Folder: 40, Title: S': 'S',
222
+ 'Call Number: SC0340, Accession: 1986-052, Box: 54, Folder: 25, Title: Balzer': 'Balzer',
223
+ 'Call Number: SC0340, Accession: 1986-052, Box : 30, Folder: 21, Title: Feigenbaum, Publications. 2 of 2.': 'Feigenbaum, Publications. 2 of 2.',
224
+ # colon in name
225
+ 'Call Number: SC0340, Accession 2005-101, Box: 10, Folder: 26, Title: Gordon Bell Letter rdf:about blah (AI) 1987': 'Gordon Bell Letter rdf:about blah (AI) 1987',
226
+ 'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 74, Title: Microcomputer Systems Proposal: blah blah': 'Microcomputer Systems Proposal: blah blah',
227
+ 'Call Number: SC0340, Accession 2005-101, Box: 14, Folder: 20, Title: blah "bleah: blargW^"ugh" seriously?.': 'blah "bleah: blargW^"ugh" seriously?.',
228
+ # quotes in name
229
+ 'Call Number: SC0340, Accession 2005-101, Box: 29, Folder: 18, Title: "bleah" blah': '"bleah" blah',
230
+ 'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 58, Title: "M": blah': '"M": blah',
231
+ 'Call Number: SC0340, Accession 2005-101, Box : 32A, Folder: 19, Title: blah "bleah" blue': 'blah "bleah" blue',
232
+ # not parseable
233
+ 'Call Number: SC0340, Accession 2005-101': nil,
234
+ 'Call Number: SC0340, Accession: 1986-052': nil,
235
+ 'Call Number: SC0340, Accession: 1986-052, Box 36 Folder 38': nil,
236
+ 'blah blah ... with the umbrella title Feigenbaum and Feldman, Computers and Thought II. blah blah': nil,
237
+ 'blah blah ... Title ... blah blah': nil
238
+ }.each do |example, expected|
239
+ describe "for example '#{example}'" do
240
+ let(:example) { example }
241
+ context 'in preferred citation note' do
242
+ before do
243
+ allow(r).to receive(:mods).and_return(mods_note_preferred_citation)
244
+ subject.send(:add_folder_name, sdb, solr_doc)
245
+ end
246
+ it "has the expected folder name '#{expected}'" do
247
+ expect(solr_doc['folder_name_ssi']).to eq expected
248
+ end
249
+ end
250
+ context 'in plain note' do
251
+ before do
252
+ allow(r).to receive(:mods).and_return(mods_note_plain)
253
+ subject.send(:add_folder_name, sdb, solr_doc)
254
+ end
255
+ it 'does not have a folder name' do
256
+ expect(solr_doc['folder_name_ssi']).to be_falsey
257
+ end
258
+ end
259
+ end # for example
260
+ end # each
261
+ end # add_folder_name
200
262
  end
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Chris Beer"]
10
10
  spec.email = ["cabeer@stanford.edu"]
11
11
  spec.summary = "Spotlight resource indexer for DOR resources."
12
- spec.homepage = ""
12
+ spec.homepage = "https://github.com/sul-dlss/spotlight-dor-resources"
13
13
  spec.license = "Apache 2"
14
14
 
15
15
  spec.files = `git ls-files -z`.split("\x0")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spotlight-dor-resources
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Beer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-29 00:00:00.000000000 Z
11
+ date: 2015-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -292,7 +292,7 @@ files:
292
292
  - spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml
293
293
  - spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml
294
294
  - spotlight-dor-resources.gemspec
295
- homepage: ''
295
+ homepage: https://github.com/sul-dlss/spotlight-dor-resources
296
296
  licenses:
297
297
  - Apache 2
298
298
  metadata: {}