stanford-mods 0.0.23 → 0.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +9 -9
- data/.gitignore +2 -0
- data/.travis.yml +6 -3
- data/README.rdoc +1 -0
- data/lib/stanford-mods/searchworks.rb +156 -136
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/searchworks_format_spec.rb +103 -0
- data/spec/searchworks_pub_dates_spec.rb +236 -0
- data/spec/searchworks_spec.rb +4 -0
- data/spec/searchworks_subject_raw_spec.rb +384 -0
- data/spec/searchworks_subject_spec.rb +358 -347
- metadata +9 -9
- data/.rvmrc +0 -1
- data/lib/stanford-mods/kolb.rb +0 -14
- data/spec/kolb_spec.rb +0 -234
- data/spec/searchworks_gdor_spec.rb +0 -771
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NmY1ZWQ2ZGMyNjZiYzZjY2M5MzE3ODYxNTliN2NkYTcyNGEyMTZjMQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
7
|
-
|
6
|
+
YjhmY2NkODk0M2Q5OGYzODNkZTQ5NmI4YTc0Y2QyNTBiYzQ1M2RiZg==
|
7
|
+
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NzM5OTVjYTQ5ODg1NWMzMWMwMDY2YmY5ZTZiZjZlY2Y0NGI2OTYzNjllZWQ2
|
10
|
+
OGY4NWUxYmYwNGQzNTc1OTE3OTg4YTg3YjQzNDdiNTdmMjk0Nzg0ZDA5NDU1
|
11
|
+
MTY0NDI3YjE1OWQ4Yjc4YzY4N2Q4ZjhmZTgwZmJhZmY3Y2M5YmE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
Yjg5YmFmYzY0MjU2YjM1ZjE5MjMwMGU1MTIyMDkzNmUxMjM1Mjk0MmFiNjdm
|
14
|
+
YTJmYmNkODczYTFjMTcyMTNhNmI4ZWI2OWY5YTdlZDcwZmYxNGU1MTYyZDgx
|
15
|
+
NzFmMjk5ODI5YzA4NWM0NTdiOTMxN2VkNjQ2MzYwOTEzZGVhZWU=
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
language: ruby
|
2
2
|
script: rake rspec
|
3
3
|
rvm:
|
4
|
+
- 2.1.0
|
4
5
|
- 1.9.3
|
5
|
-
-
|
6
|
+
- ruby-head
|
7
|
+
- jruby-head
|
8
|
+
# - jruby-19mode
|
6
9
|
notifications:
|
7
10
|
email:
|
8
11
|
- ndushay@stanford.edu
|
9
12
|
- bess@stanford.edu
|
10
|
-
before_install:
|
11
|
-
gem update --system 1.8.24
|
13
|
+
#before_install:
|
14
|
+
# gem update --system 1.8.24
|
data/README.rdoc
CHANGED
@@ -59,6 +59,7 @@ Example Using SearchWorks Mixins:
|
|
59
59
|
6. Create new Pull Request
|
60
60
|
|
61
61
|
== Releases
|
62
|
+
* <b>0.0.24</b> Largely cosmetic refactoring for easier maintenance.
|
62
63
|
* <b>0.0.23</b> Added logic for dealing with "u-notation" approximate dates, e.g., 198u
|
63
64
|
* <b>0.0.20</b> Added mapping for typeOfResource notated music
|
64
65
|
* <b>0.0.19</b> Additional mappings, including Hydrus formats (GRYPHONDOR-207)
|
@@ -94,6 +94,28 @@ module Stanford
|
|
94
94
|
val.gsub(/[[:punct:]]*/, '').strip
|
95
95
|
end
|
96
96
|
|
97
|
+
def main_author_w_date_test
|
98
|
+
result = nil
|
99
|
+
first_wo_role = nil
|
100
|
+
self.plain_name.each { |n|
|
101
|
+
if n.role.size == 0
|
102
|
+
first_wo_role ||= n
|
103
|
+
end
|
104
|
+
n.role.each { |r|
|
105
|
+
if r.authority.include?('marcrelator') &&
|
106
|
+
(r.value.include?('Creator') || r.value.include?('Author'))
|
107
|
+
result ||= n.display_value_w_date
|
108
|
+
end
|
109
|
+
}
|
110
|
+
}
|
111
|
+
if !result && first_wo_role
|
112
|
+
result = first_wo_role.display_value_w_date
|
113
|
+
end
|
114
|
+
result
|
115
|
+
end
|
116
|
+
|
117
|
+
# ---- end AUTHOR ----
|
118
|
+
|
97
119
|
# ---- TITLE ----
|
98
120
|
|
99
121
|
# @return [String] value for title_245a_search field
|
@@ -123,6 +145,17 @@ module Stanford
|
|
123
145
|
val.gsub(/[[:punct:]]*/, '').strip
|
124
146
|
end
|
125
147
|
|
148
|
+
#remove trailing commas
|
149
|
+
def sw_full_title_without_commas
|
150
|
+
toret = self.sw_full_title
|
151
|
+
if toret
|
152
|
+
toret = toret.gsub(/,$/, '')
|
153
|
+
end
|
154
|
+
toret
|
155
|
+
end
|
156
|
+
|
157
|
+
# ---- end TITLE ----
|
158
|
+
|
126
159
|
# ---- SUBJECT ----
|
127
160
|
|
128
161
|
# Values are the contents of:
|
@@ -191,41 +224,38 @@ module Stanford
|
|
191
224
|
vals.empty? ? nil : vals
|
192
225
|
end
|
193
226
|
end
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
}
|
227
|
+
|
228
|
+
# Values are the contents of:
|
229
|
+
# subject/topic
|
230
|
+
# subject/name
|
231
|
+
# subject/title
|
232
|
+
# subject/occupation
|
233
|
+
# with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
234
|
+
# @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
|
235
|
+
def topic_facet
|
236
|
+
vals = subject_topics ? Array.new(subject_topics) : []
|
237
|
+
vals.concat(subject_names) if subject_names
|
238
|
+
vals.concat(subject_titles) if subject_titles
|
239
|
+
vals.concat(subject_occupations) if subject_occupations
|
240
|
+
vals.map! { |val|
|
241
|
+
v = val.sub(/[\\,;]$/, '')
|
242
|
+
v.strip
|
211
243
|
}
|
212
|
-
|
213
|
-
result = first_wo_role.display_value_w_date
|
214
|
-
end
|
215
|
-
result
|
244
|
+
vals.empty? ? nil : vals
|
216
245
|
end
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
end
|
223
|
-
toret
|
246
|
+
|
247
|
+
# geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
248
|
+
# @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
|
249
|
+
def geographic_facet
|
250
|
+
geographic_search.map { |val| val.sub(/[\\,;]$/, '').strip } unless !geographic_search
|
224
251
|
end
|
225
252
|
|
226
|
-
|
227
|
-
|
253
|
+
# subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
254
|
+
# @return [Array<String>] values for the era_facet Solr field for this document or nil if none
|
255
|
+
def era_facet
|
256
|
+
subject_temporal.map { |val| val.sub(/[\\,;]$/, '').strip } unless !subject_temporal
|
228
257
|
end
|
258
|
+
|
229
259
|
# Values are the contents of:
|
230
260
|
# subject/geographic
|
231
261
|
# subject/hierarchicalGeographic
|
@@ -285,7 +315,36 @@ module Stanford
|
|
285
315
|
vals.empty? ? nil : vals
|
286
316
|
end
|
287
317
|
end
|
318
|
+
|
319
|
+
# Values are the contents of:
|
320
|
+
# all subject subelements except subject/cartographic plus genre top level element
|
321
|
+
# @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
|
322
|
+
def subject_all_search
|
323
|
+
vals = topic_search ? Array.new(topic_search) : []
|
324
|
+
vals.concat(geographic_search) if geographic_search
|
325
|
+
vals.concat(subject_other_search) if subject_other_search
|
326
|
+
vals.concat(subject_other_subvy_search) if subject_other_subvy_search
|
327
|
+
vals.empty? ? nil : vals
|
328
|
+
end
|
329
|
+
|
330
|
+
# ---- end SUBJECT ----
|
331
|
+
|
332
|
+
# ---- PUBLICATION (place, year) ----
|
333
|
+
def place
|
334
|
+
vals = self.term_values([:origin_info,:place,:placeTerm])
|
335
|
+
vals
|
336
|
+
end
|
337
|
+
|
338
|
+
def pub_date_display
|
339
|
+
if pub_dates
|
340
|
+
pub_dates.first
|
341
|
+
else
|
342
|
+
nil
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
288
346
|
# @return [Array<String>] values for the pub_date_group_facet
|
347
|
+
# @deprecated
|
289
348
|
def pub_date_groups year
|
290
349
|
if not year
|
291
350
|
return nil
|
@@ -312,72 +371,6 @@ module Stanford
|
|
312
371
|
end
|
313
372
|
end
|
314
373
|
|
315
|
-
# select one or more format values from the controlled vocabulary here:
|
316
|
-
# http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format&rows=0&facet.sort=index
|
317
|
-
# based on the dor_content_type
|
318
|
-
# @return [String] value in the SearchWorks controlled vocabulary
|
319
|
-
def format
|
320
|
-
val=[]
|
321
|
-
formats = self.term_values(:typeOfResource)
|
322
|
-
genres = self.term_values(:genre)
|
323
|
-
issuance = self.term_values([:origin_info,:issuance])
|
324
|
-
if formats
|
325
|
-
formats.each do |form|
|
326
|
-
case form
|
327
|
-
when 'text'
|
328
|
-
val << 'Thesis' if genres and genres.include? 'thesis'
|
329
|
-
val << 'Book' if issuance and issuance.include? 'monographic'
|
330
|
-
val << 'Journal/Periodical' if issuance and issuance.include? 'continuing'
|
331
|
-
val << 'Journal/Periodical' if genres and genres.include? 'article'
|
332
|
-
val << 'Conference Proceedings' if genres and genres.include? 'conference publication'
|
333
|
-
val << 'Other' if genres and genres.include? 'student project report'
|
334
|
-
val << 'Book' if genres and genres.include? 'technical report'
|
335
|
-
when 'still image'
|
336
|
-
val << 'Image'
|
337
|
-
when 'mixed material'
|
338
|
-
val << 'Manuscript/Archive'
|
339
|
-
when 'moving image'
|
340
|
-
val << 'Video'
|
341
|
-
when 'notated music'
|
342
|
-
val << 'Music - Score'
|
343
|
-
when 'three dimensional object'
|
344
|
-
val <<'Other'
|
345
|
-
when 'cartographic'
|
346
|
-
val << 'Map/Globe'
|
347
|
-
when 'sound recording-musical'
|
348
|
-
val << 'Music-Recording'
|
349
|
-
when 'sound recording-nonmusical'
|
350
|
-
val << 'Sound Recording'
|
351
|
-
when 'software, multimedia'
|
352
|
-
val << 'Computer File'
|
353
|
-
end
|
354
|
-
end
|
355
|
-
end
|
356
|
-
if val.length>0
|
357
|
-
return val.uniq
|
358
|
-
end
|
359
|
-
if not self.typeOfResource or self.typeOfResource.length == 0
|
360
|
-
[]
|
361
|
-
end
|
362
|
-
end
|
363
|
-
|
364
|
-
# Values are the contents of:
|
365
|
-
# all subject subelements except subject/cartographic plus genre top level element
|
366
|
-
# @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
|
367
|
-
def subject_all_search
|
368
|
-
vals = topic_search ? Array.new(topic_search) : []
|
369
|
-
vals.concat(geographic_search) if geographic_search
|
370
|
-
vals.concat(subject_other_search) if subject_other_search
|
371
|
-
vals.concat(subject_other_subvy_search) if subject_other_subvy_search
|
372
|
-
vals.empty? ? nil : vals
|
373
|
-
end
|
374
|
-
def pub_date_display
|
375
|
-
if pub_dates
|
376
|
-
pub_dates.first
|
377
|
-
else
|
378
|
-
nil
|
379
|
-
end
|
380
|
-
end
|
381
374
|
#get the dates from dateIssued, and dateCreated merged into 1 array.
|
382
375
|
# @return [Array<String>] values for the issue_date_display Solr field for this document or nil if none
|
383
376
|
def pub_dates
|
@@ -407,32 +400,33 @@ module Stanford
|
|
407
400
|
end
|
408
401
|
return @pub_year
|
409
402
|
end
|
410
|
-
dates=pub_dates
|
403
|
+
dates = pub_dates
|
411
404
|
if dates
|
412
|
-
year=[]
|
413
|
-
pruned_dates=[]
|
405
|
+
year = []
|
406
|
+
pruned_dates = []
|
414
407
|
dates.each do |f_date|
|
415
408
|
#remove ? and []
|
416
409
|
pruned_dates << f_date.gsub('?','').gsub('[','').gsub(']','')
|
417
410
|
end
|
418
411
|
#try to find a date starting with the most normal date formats and progressing to more wonky ones
|
419
|
-
@pub_year=get_plain_four_digit_year pruned_dates
|
412
|
+
@pub_year = get_plain_four_digit_year pruned_dates
|
420
413
|
return @pub_year if @pub_year
|
421
414
|
# Check for years in u notation, e.g., 198u
|
422
|
-
@pub_year=get_u_year pruned_dates
|
415
|
+
@pub_year = get_u_year pruned_dates
|
423
416
|
return @pub_year if @pub_year
|
424
|
-
@pub_year=get_double_digit_century pruned_dates
|
417
|
+
@pub_year = get_double_digit_century pruned_dates
|
425
418
|
return @pub_year if @pub_year
|
426
|
-
@pub_year=get_bc_year pruned_dates
|
419
|
+
@pub_year = get_bc_year pruned_dates
|
427
420
|
return @pub_year if @pub_year
|
428
|
-
@pub_year=get_three_digit_year pruned_dates
|
421
|
+
@pub_year = get_three_digit_year pruned_dates
|
429
422
|
return @pub_year if @pub_year
|
430
|
-
@pub_year=get_single_digit_century pruned_dates
|
423
|
+
@pub_year = get_single_digit_century pruned_dates
|
431
424
|
return @pub_year if @pub_year
|
432
425
|
end
|
433
426
|
@pub_year=''
|
434
427
|
return nil
|
435
428
|
end
|
429
|
+
|
436
430
|
#creates a date suitable for sorting. Guarnteed to be 4 digits or nil
|
437
431
|
def pub_date_sort
|
438
432
|
pd=nil
|
@@ -446,6 +440,7 @@ module Stanford
|
|
446
440
|
raise "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd and pd.length !=4
|
447
441
|
pd
|
448
442
|
end
|
443
|
+
|
449
444
|
#The year the object was published, , filtered based on max_pub_date and min_pub_date from the config file
|
450
445
|
#@return [String] 4 character year or nil
|
451
446
|
def pub_date
|
@@ -455,6 +450,7 @@ module Stanford
|
|
455
450
|
end
|
456
451
|
nil
|
457
452
|
end
|
453
|
+
|
458
454
|
#Values for the pub date facet. This is less strict than the 4 year date requirements for pub_date
|
459
455
|
#@return <Array[String]> with values for the pub date facet
|
460
456
|
def pub_date_facet
|
@@ -475,37 +471,61 @@ module Stanford
|
|
475
471
|
end
|
476
472
|
end
|
477
473
|
|
474
|
+
# ---- end PUBLICATION (place, year) ----
|
478
475
|
|
479
|
-
|
480
|
-
|
481
|
-
# subject/name
|
482
|
-
# subject/title
|
483
|
-
# subject/occupation
|
484
|
-
# with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
485
|
-
# @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
|
486
|
-
def topic_facet
|
487
|
-
vals = subject_topics ? Array.new(subject_topics) : []
|
488
|
-
vals.concat(subject_names) if subject_names
|
489
|
-
vals.concat(subject_titles) if subject_titles
|
490
|
-
vals.concat(subject_occupations) if subject_occupations
|
491
|
-
vals.map! { |val|
|
492
|
-
v = val.sub(/[\\,;]$/, '')
|
493
|
-
v.strip
|
494
|
-
}
|
495
|
-
vals.empty? ? nil : vals
|
476
|
+
def sw_logger
|
477
|
+
@logger ||= Logger.new(STDOUT)
|
496
478
|
end
|
497
|
-
|
498
|
-
#
|
499
|
-
#
|
500
|
-
|
501
|
-
|
479
|
+
|
480
|
+
# select one or more format values from the controlled vocabulary here:
|
481
|
+
# http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format&rows=0&facet.sort=index
|
482
|
+
# based on the dor_content_type
|
483
|
+
# @return [String] value in the SearchWorks controlled vocabulary
|
484
|
+
def format
|
485
|
+
val=[]
|
486
|
+
formats = self.term_values(:typeOfResource)
|
487
|
+
genres = self.term_values(:genre)
|
488
|
+
issuance = self.term_values([:origin_info,:issuance])
|
489
|
+
if formats
|
490
|
+
formats.each do |form|
|
491
|
+
case form
|
492
|
+
when 'text'
|
493
|
+
val << 'Thesis' if genres and genres.include? 'thesis'
|
494
|
+
val << 'Book' if issuance and issuance.include? 'monographic'
|
495
|
+
val << 'Journal/Periodical' if issuance and issuance.include? 'continuing'
|
496
|
+
val << 'Journal/Periodical' if genres and genres.include? 'article'
|
497
|
+
val << 'Conference Proceedings' if genres and genres.include? 'conference publication'
|
498
|
+
val << 'Other' if genres and genres.include? 'student project report'
|
499
|
+
val << 'Book' if genres and genres.include? 'technical report'
|
500
|
+
when 'still image'
|
501
|
+
val << 'Image'
|
502
|
+
when 'mixed material'
|
503
|
+
val << 'Manuscript/Archive'
|
504
|
+
when 'moving image'
|
505
|
+
val << 'Video'
|
506
|
+
when 'notated music'
|
507
|
+
val << 'Music - Score'
|
508
|
+
when 'three dimensional object'
|
509
|
+
val <<'Other'
|
510
|
+
when 'cartographic'
|
511
|
+
val << 'Map/Globe'
|
512
|
+
when 'sound recording-musical'
|
513
|
+
val << 'Music-Recording'
|
514
|
+
when 'sound recording-nonmusical'
|
515
|
+
val << 'Sound Recording'
|
516
|
+
when 'software, multimedia'
|
517
|
+
val << 'Computer File'
|
518
|
+
end
|
519
|
+
end
|
520
|
+
end
|
521
|
+
if val.length>0
|
522
|
+
return val.uniq
|
523
|
+
end
|
524
|
+
if not self.typeOfResource or self.typeOfResource.length == 0
|
525
|
+
[]
|
526
|
+
end
|
502
527
|
end
|
503
528
|
|
504
|
-
# subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
505
|
-
# @return [Array<String>] values for the era_facet Solr field for this document or nil if none
|
506
|
-
def era_facet
|
507
|
-
subject_temporal.map { |val| val.sub(/[\\,;]$/, '').strip } unless !subject_temporal
|
508
|
-
end
|
509
529
|
# @return [String] value with the numeric catkey in it, or nil if none exists
|
510
530
|
def catkey
|
511
531
|
catkey=self.term_values([:record_info,:recordIdentifier])
|
@@ -521,7 +541,7 @@ module Stanford
|
|
521
541
|
@druid ? @druid : 'Unknown item'
|
522
542
|
end
|
523
543
|
|
524
|
-
|
544
|
+
# protected ----------------------------------------------------------
|
525
545
|
|
526
546
|
# convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
|
527
547
|
def subject_names
|
@@ -573,17 +593,17 @@ module Stanford
|
|
573
593
|
end
|
574
594
|
|
575
595
|
# If a year has a "u" in it, replace instances of u with 0
|
576
|
-
# @param [String]
|
596
|
+
# @param [String] dates
|
577
597
|
# @return String
|
578
598
|
def get_u_year dates
|
579
599
|
dates.each do |f_date|
|
580
600
|
# Single digit u notation
|
581
|
-
matches=f_date.scan(/\d{3}u/)
|
601
|
+
matches = f_date.scan(/\d{3}u/)
|
582
602
|
if matches.length == 1
|
583
603
|
return matches.first.gsub('u','0')
|
584
604
|
end
|
585
605
|
# Double digit u notation
|
586
|
-
matches=f_date.scan(/\d{2}u{2}/)
|
606
|
+
matches = f_date.scan(/\d{2}u{2}/)
|
587
607
|
if matches.length == 1
|
588
608
|
return matches.first.gsub('u','-')
|
589
609
|
end
|