stanford-mods 0.0.23 → 0.0.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +9 -9
- data/.gitignore +2 -0
- data/.travis.yml +6 -3
- data/README.rdoc +1 -0
- data/lib/stanford-mods/searchworks.rb +156 -136
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/searchworks_format_spec.rb +103 -0
- data/spec/searchworks_pub_dates_spec.rb +236 -0
- data/spec/searchworks_spec.rb +4 -0
- data/spec/searchworks_subject_raw_spec.rb +384 -0
- data/spec/searchworks_subject_spec.rb +358 -347
- metadata +9 -9
- data/.rvmrc +0 -1
- data/lib/stanford-mods/kolb.rb +0 -14
- data/spec/kolb_spec.rb +0 -234
- data/spec/searchworks_gdor_spec.rb +0 -771
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NmY1ZWQ2ZGMyNjZiYzZjY2M5MzE3ODYxNTliN2NkYTcyNGEyMTZjMQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
7
|
-
|
6
|
+
YjhmY2NkODk0M2Q5OGYzODNkZTQ5NmI4YTc0Y2QyNTBiYzQ1M2RiZg==
|
7
|
+
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NzM5OTVjYTQ5ODg1NWMzMWMwMDY2YmY5ZTZiZjZlY2Y0NGI2OTYzNjllZWQ2
|
10
|
+
OGY4NWUxYmYwNGQzNTc1OTE3OTg4YTg3YjQzNDdiNTdmMjk0Nzg0ZDA5NDU1
|
11
|
+
MTY0NDI3YjE1OWQ4Yjc4YzY4N2Q4ZjhmZTgwZmJhZmY3Y2M5YmE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
Yjg5YmFmYzY0MjU2YjM1ZjE5MjMwMGU1MTIyMDkzNmUxMjM1Mjk0MmFiNjdm
|
14
|
+
YTJmYmNkODczYTFjMTcyMTNhNmI4ZWI2OWY5YTdlZDcwZmYxNGU1MTYyZDgx
|
15
|
+
NzFmMjk5ODI5YzA4NWM0NTdiOTMxN2VkNjQ2MzYwOTEzZGVhZWU=
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
language: ruby
|
2
2
|
script: rake rspec
|
3
3
|
rvm:
|
4
|
+
- 2.1.0
|
4
5
|
- 1.9.3
|
5
|
-
-
|
6
|
+
- ruby-head
|
7
|
+
- jruby-head
|
8
|
+
# - jruby-19mode
|
6
9
|
notifications:
|
7
10
|
email:
|
8
11
|
- ndushay@stanford.edu
|
9
12
|
- bess@stanford.edu
|
10
|
-
before_install:
|
11
|
-
gem update --system 1.8.24
|
13
|
+
#before_install:
|
14
|
+
# gem update --system 1.8.24
|
data/README.rdoc
CHANGED
@@ -59,6 +59,7 @@ Example Using SearchWorks Mixins:
|
|
59
59
|
6. Create new Pull Request
|
60
60
|
|
61
61
|
== Releases
|
62
|
+
* <b>0.0.24</b> Largely cosmetic refactoring for easier maintenance.
|
62
63
|
* <b>0.0.23</b> Added logic for dealing with "u-notation" approximate dates, e.g., 198u
|
63
64
|
* <b>0.0.20</b> Added mapping for typeOfResource notated music
|
64
65
|
* <b>0.0.19</b> Additional mappings, including Hydrus formats (GRYPHONDOR-207)
|
@@ -94,6 +94,28 @@ module Stanford
|
|
94
94
|
val.gsub(/[[:punct:]]*/, '').strip
|
95
95
|
end
|
96
96
|
|
97
|
+
def main_author_w_date_test
|
98
|
+
result = nil
|
99
|
+
first_wo_role = nil
|
100
|
+
self.plain_name.each { |n|
|
101
|
+
if n.role.size == 0
|
102
|
+
first_wo_role ||= n
|
103
|
+
end
|
104
|
+
n.role.each { |r|
|
105
|
+
if r.authority.include?('marcrelator') &&
|
106
|
+
(r.value.include?('Creator') || r.value.include?('Author'))
|
107
|
+
result ||= n.display_value_w_date
|
108
|
+
end
|
109
|
+
}
|
110
|
+
}
|
111
|
+
if !result && first_wo_role
|
112
|
+
result = first_wo_role.display_value_w_date
|
113
|
+
end
|
114
|
+
result
|
115
|
+
end
|
116
|
+
|
117
|
+
# ---- end AUTHOR ----
|
118
|
+
|
97
119
|
# ---- TITLE ----
|
98
120
|
|
99
121
|
# @return [String] value for title_245a_search field
|
@@ -123,6 +145,17 @@ module Stanford
|
|
123
145
|
val.gsub(/[[:punct:]]*/, '').strip
|
124
146
|
end
|
125
147
|
|
148
|
+
#remove trailing commas
|
149
|
+
def sw_full_title_without_commas
|
150
|
+
toret = self.sw_full_title
|
151
|
+
if toret
|
152
|
+
toret = toret.gsub(/,$/, '')
|
153
|
+
end
|
154
|
+
toret
|
155
|
+
end
|
156
|
+
|
157
|
+
# ---- end TITLE ----
|
158
|
+
|
126
159
|
# ---- SUBJECT ----
|
127
160
|
|
128
161
|
# Values are the contents of:
|
@@ -191,41 +224,38 @@ module Stanford
|
|
191
224
|
vals.empty? ? nil : vals
|
192
225
|
end
|
193
226
|
end
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
}
|
227
|
+
|
228
|
+
# Values are the contents of:
|
229
|
+
# subject/topic
|
230
|
+
# subject/name
|
231
|
+
# subject/title
|
232
|
+
# subject/occupation
|
233
|
+
# with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
234
|
+
# @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
|
235
|
+
def topic_facet
|
236
|
+
vals = subject_topics ? Array.new(subject_topics) : []
|
237
|
+
vals.concat(subject_names) if subject_names
|
238
|
+
vals.concat(subject_titles) if subject_titles
|
239
|
+
vals.concat(subject_occupations) if subject_occupations
|
240
|
+
vals.map! { |val|
|
241
|
+
v = val.sub(/[\\,;]$/, '')
|
242
|
+
v.strip
|
211
243
|
}
|
212
|
-
|
213
|
-
result = first_wo_role.display_value_w_date
|
214
|
-
end
|
215
|
-
result
|
244
|
+
vals.empty? ? nil : vals
|
216
245
|
end
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
end
|
223
|
-
toret
|
246
|
+
|
247
|
+
# geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
248
|
+
# @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
|
249
|
+
def geographic_facet
|
250
|
+
geographic_search.map { |val| val.sub(/[\\,;]$/, '').strip } unless !geographic_search
|
224
251
|
end
|
225
252
|
|
226
|
-
|
227
|
-
|
253
|
+
# subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
254
|
+
# @return [Array<String>] values for the era_facet Solr field for this document or nil if none
|
255
|
+
def era_facet
|
256
|
+
subject_temporal.map { |val| val.sub(/[\\,;]$/, '').strip } unless !subject_temporal
|
228
257
|
end
|
258
|
+
|
229
259
|
# Values are the contents of:
|
230
260
|
# subject/geographic
|
231
261
|
# subject/hierarchicalGeographic
|
@@ -285,7 +315,36 @@ module Stanford
|
|
285
315
|
vals.empty? ? nil : vals
|
286
316
|
end
|
287
317
|
end
|
318
|
+
|
319
|
+
# Values are the contents of:
|
320
|
+
# all subject subelements except subject/cartographic plus genre top level element
|
321
|
+
# @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
|
322
|
+
def subject_all_search
|
323
|
+
vals = topic_search ? Array.new(topic_search) : []
|
324
|
+
vals.concat(geographic_search) if geographic_search
|
325
|
+
vals.concat(subject_other_search) if subject_other_search
|
326
|
+
vals.concat(subject_other_subvy_search) if subject_other_subvy_search
|
327
|
+
vals.empty? ? nil : vals
|
328
|
+
end
|
329
|
+
|
330
|
+
# ---- end SUBJECT ----
|
331
|
+
|
332
|
+
# ---- PUBLICATION (place, year) ----
|
333
|
+
def place
|
334
|
+
vals = self.term_values([:origin_info,:place,:placeTerm])
|
335
|
+
vals
|
336
|
+
end
|
337
|
+
|
338
|
+
def pub_date_display
|
339
|
+
if pub_dates
|
340
|
+
pub_dates.first
|
341
|
+
else
|
342
|
+
nil
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
288
346
|
# @return [Array<String>] values for the pub_date_group_facet
|
347
|
+
# @deprecated
|
289
348
|
def pub_date_groups year
|
290
349
|
if not year
|
291
350
|
return nil
|
@@ -312,72 +371,6 @@ module Stanford
|
|
312
371
|
end
|
313
372
|
end
|
314
373
|
|
315
|
-
# select one or more format values from the controlled vocabulary here:
|
316
|
-
# http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format&rows=0&facet.sort=index
|
317
|
-
# based on the dor_content_type
|
318
|
-
# @return [String] value in the SearchWorks controlled vocabulary
|
319
|
-
def format
|
320
|
-
val=[]
|
321
|
-
formats = self.term_values(:typeOfResource)
|
322
|
-
genres = self.term_values(:genre)
|
323
|
-
issuance = self.term_values([:origin_info,:issuance])
|
324
|
-
if formats
|
325
|
-
formats.each do |form|
|
326
|
-
case form
|
327
|
-
when 'text'
|
328
|
-
val << 'Thesis' if genres and genres.include? 'thesis'
|
329
|
-
val << 'Book' if issuance and issuance.include? 'monographic'
|
330
|
-
val << 'Journal/Periodical' if issuance and issuance.include? 'continuing'
|
331
|
-
val << 'Journal/Periodical' if genres and genres.include? 'article'
|
332
|
-
val << 'Conference Proceedings' if genres and genres.include? 'conference publication'
|
333
|
-
val << 'Other' if genres and genres.include? 'student project report'
|
334
|
-
val << 'Book' if genres and genres.include? 'technical report'
|
335
|
-
when 'still image'
|
336
|
-
val << 'Image'
|
337
|
-
when 'mixed material'
|
338
|
-
val << 'Manuscript/Archive'
|
339
|
-
when 'moving image'
|
340
|
-
val << 'Video'
|
341
|
-
when 'notated music'
|
342
|
-
val << 'Music - Score'
|
343
|
-
when 'three dimensional object'
|
344
|
-
val <<'Other'
|
345
|
-
when 'cartographic'
|
346
|
-
val << 'Map/Globe'
|
347
|
-
when 'sound recording-musical'
|
348
|
-
val << 'Music-Recording'
|
349
|
-
when 'sound recording-nonmusical'
|
350
|
-
val << 'Sound Recording'
|
351
|
-
when 'software, multimedia'
|
352
|
-
val << 'Computer File'
|
353
|
-
end
|
354
|
-
end
|
355
|
-
end
|
356
|
-
if val.length>0
|
357
|
-
return val.uniq
|
358
|
-
end
|
359
|
-
if not self.typeOfResource or self.typeOfResource.length == 0
|
360
|
-
[]
|
361
|
-
end
|
362
|
-
end
|
363
|
-
|
364
|
-
# Values are the contents of:
|
365
|
-
# all subject subelements except subject/cartographic plus genre top level element
|
366
|
-
# @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
|
367
|
-
def subject_all_search
|
368
|
-
vals = topic_search ? Array.new(topic_search) : []
|
369
|
-
vals.concat(geographic_search) if geographic_search
|
370
|
-
vals.concat(subject_other_search) if subject_other_search
|
371
|
-
vals.concat(subject_other_subvy_search) if subject_other_subvy_search
|
372
|
-
vals.empty? ? nil : vals
|
373
|
-
end
|
374
|
-
def pub_date_display
|
375
|
-
if pub_dates
|
376
|
-
pub_dates.first
|
377
|
-
else
|
378
|
-
nil
|
379
|
-
end
|
380
|
-
end
|
381
374
|
#get the dates from dateIssued, and dateCreated merged into 1 array.
|
382
375
|
# @return [Array<String>] values for the issue_date_display Solr field for this document or nil if none
|
383
376
|
def pub_dates
|
@@ -407,32 +400,33 @@ module Stanford
|
|
407
400
|
end
|
408
401
|
return @pub_year
|
409
402
|
end
|
410
|
-
dates=pub_dates
|
403
|
+
dates = pub_dates
|
411
404
|
if dates
|
412
|
-
year=[]
|
413
|
-
pruned_dates=[]
|
405
|
+
year = []
|
406
|
+
pruned_dates = []
|
414
407
|
dates.each do |f_date|
|
415
408
|
#remove ? and []
|
416
409
|
pruned_dates << f_date.gsub('?','').gsub('[','').gsub(']','')
|
417
410
|
end
|
418
411
|
#try to find a date starting with the most normal date formats and progressing to more wonky ones
|
419
|
-
@pub_year=get_plain_four_digit_year pruned_dates
|
412
|
+
@pub_year = get_plain_four_digit_year pruned_dates
|
420
413
|
return @pub_year if @pub_year
|
421
414
|
# Check for years in u notation, e.g., 198u
|
422
|
-
@pub_year=get_u_year pruned_dates
|
415
|
+
@pub_year = get_u_year pruned_dates
|
423
416
|
return @pub_year if @pub_year
|
424
|
-
@pub_year=get_double_digit_century pruned_dates
|
417
|
+
@pub_year = get_double_digit_century pruned_dates
|
425
418
|
return @pub_year if @pub_year
|
426
|
-
@pub_year=get_bc_year pruned_dates
|
419
|
+
@pub_year = get_bc_year pruned_dates
|
427
420
|
return @pub_year if @pub_year
|
428
|
-
@pub_year=get_three_digit_year pruned_dates
|
421
|
+
@pub_year = get_three_digit_year pruned_dates
|
429
422
|
return @pub_year if @pub_year
|
430
|
-
@pub_year=get_single_digit_century pruned_dates
|
423
|
+
@pub_year = get_single_digit_century pruned_dates
|
431
424
|
return @pub_year if @pub_year
|
432
425
|
end
|
433
426
|
@pub_year=''
|
434
427
|
return nil
|
435
428
|
end
|
429
|
+
|
436
430
|
#creates a date suitable for sorting. Guarnteed to be 4 digits or nil
|
437
431
|
def pub_date_sort
|
438
432
|
pd=nil
|
@@ -446,6 +440,7 @@ module Stanford
|
|
446
440
|
raise "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd and pd.length !=4
|
447
441
|
pd
|
448
442
|
end
|
443
|
+
|
449
444
|
#The year the object was published, , filtered based on max_pub_date and min_pub_date from the config file
|
450
445
|
#@return [String] 4 character year or nil
|
451
446
|
def pub_date
|
@@ -455,6 +450,7 @@ module Stanford
|
|
455
450
|
end
|
456
451
|
nil
|
457
452
|
end
|
453
|
+
|
458
454
|
#Values for the pub date facet. This is less strict than the 4 year date requirements for pub_date
|
459
455
|
#@return <Array[String]> with values for the pub date facet
|
460
456
|
def pub_date_facet
|
@@ -475,37 +471,61 @@ module Stanford
|
|
475
471
|
end
|
476
472
|
end
|
477
473
|
|
474
|
+
# ---- end PUBLICATION (place, year) ----
|
478
475
|
|
479
|
-
|
480
|
-
|
481
|
-
# subject/name
|
482
|
-
# subject/title
|
483
|
-
# subject/occupation
|
484
|
-
# with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
485
|
-
# @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
|
486
|
-
def topic_facet
|
487
|
-
vals = subject_topics ? Array.new(subject_topics) : []
|
488
|
-
vals.concat(subject_names) if subject_names
|
489
|
-
vals.concat(subject_titles) if subject_titles
|
490
|
-
vals.concat(subject_occupations) if subject_occupations
|
491
|
-
vals.map! { |val|
|
492
|
-
v = val.sub(/[\\,;]$/, '')
|
493
|
-
v.strip
|
494
|
-
}
|
495
|
-
vals.empty? ? nil : vals
|
476
|
+
def sw_logger
|
477
|
+
@logger ||= Logger.new(STDOUT)
|
496
478
|
end
|
497
|
-
|
498
|
-
#
|
499
|
-
#
|
500
|
-
|
501
|
-
|
479
|
+
|
480
|
+
# select one or more format values from the controlled vocabulary here:
|
481
|
+
# http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format&rows=0&facet.sort=index
|
482
|
+
# based on the dor_content_type
|
483
|
+
# @return [String] value in the SearchWorks controlled vocabulary
|
484
|
+
def format
|
485
|
+
val=[]
|
486
|
+
formats = self.term_values(:typeOfResource)
|
487
|
+
genres = self.term_values(:genre)
|
488
|
+
issuance = self.term_values([:origin_info,:issuance])
|
489
|
+
if formats
|
490
|
+
formats.each do |form|
|
491
|
+
case form
|
492
|
+
when 'text'
|
493
|
+
val << 'Thesis' if genres and genres.include? 'thesis'
|
494
|
+
val << 'Book' if issuance and issuance.include? 'monographic'
|
495
|
+
val << 'Journal/Periodical' if issuance and issuance.include? 'continuing'
|
496
|
+
val << 'Journal/Periodical' if genres and genres.include? 'article'
|
497
|
+
val << 'Conference Proceedings' if genres and genres.include? 'conference publication'
|
498
|
+
val << 'Other' if genres and genres.include? 'student project report'
|
499
|
+
val << 'Book' if genres and genres.include? 'technical report'
|
500
|
+
when 'still image'
|
501
|
+
val << 'Image'
|
502
|
+
when 'mixed material'
|
503
|
+
val << 'Manuscript/Archive'
|
504
|
+
when 'moving image'
|
505
|
+
val << 'Video'
|
506
|
+
when 'notated music'
|
507
|
+
val << 'Music - Score'
|
508
|
+
when 'three dimensional object'
|
509
|
+
val <<'Other'
|
510
|
+
when 'cartographic'
|
511
|
+
val << 'Map/Globe'
|
512
|
+
when 'sound recording-musical'
|
513
|
+
val << 'Music-Recording'
|
514
|
+
when 'sound recording-nonmusical'
|
515
|
+
val << 'Sound Recording'
|
516
|
+
when 'software, multimedia'
|
517
|
+
val << 'Computer File'
|
518
|
+
end
|
519
|
+
end
|
520
|
+
end
|
521
|
+
if val.length>0
|
522
|
+
return val.uniq
|
523
|
+
end
|
524
|
+
if not self.typeOfResource or self.typeOfResource.length == 0
|
525
|
+
[]
|
526
|
+
end
|
502
527
|
end
|
503
528
|
|
504
|
-
# subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
505
|
-
# @return [Array<String>] values for the era_facet Solr field for this document or nil if none
|
506
|
-
def era_facet
|
507
|
-
subject_temporal.map { |val| val.sub(/[\\,;]$/, '').strip } unless !subject_temporal
|
508
|
-
end
|
509
529
|
# @return [String] value with the numeric catkey in it, or nil if none exists
|
510
530
|
def catkey
|
511
531
|
catkey=self.term_values([:record_info,:recordIdentifier])
|
@@ -521,7 +541,7 @@ module Stanford
|
|
521
541
|
@druid ? @druid : 'Unknown item'
|
522
542
|
end
|
523
543
|
|
524
|
-
|
544
|
+
# protected ----------------------------------------------------------
|
525
545
|
|
526
546
|
# convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
|
527
547
|
def subject_names
|
@@ -573,17 +593,17 @@ module Stanford
|
|
573
593
|
end
|
574
594
|
|
575
595
|
# If a year has a "u" in it, replace instances of u with 0
|
576
|
-
# @param [String]
|
596
|
+
# @param [String] dates
|
577
597
|
# @return String
|
578
598
|
def get_u_year dates
|
579
599
|
dates.each do |f_date|
|
580
600
|
# Single digit u notation
|
581
|
-
matches=f_date.scan(/\d{3}u/)
|
601
|
+
matches = f_date.scan(/\d{3}u/)
|
582
602
|
if matches.length == 1
|
583
603
|
return matches.first.gsub('u','0')
|
584
604
|
end
|
585
605
|
# Double digit u notation
|
586
|
-
matches=f_date.scan(/\d{2}u{2}/)
|
606
|
+
matches = f_date.scan(/\d{2}u{2}/)
|
587
607
|
if matches.length == 1
|
588
608
|
return matches.first.gsub('u','-')
|
589
609
|
end
|