pennmarc 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +11 -19
- data/README.md +12 -2
- data/lib/pennmarc/helpers/creator.rb +24 -20
- data/lib/pennmarc/helpers/edition.rb +15 -13
- data/lib/pennmarc/helpers/format.rb +3 -3
- data/lib/pennmarc/helpers/genre.rb +5 -4
- data/lib/pennmarc/helpers/helper.rb +1 -0
- data/lib/pennmarc/helpers/language.rb +3 -3
- data/lib/pennmarc/helpers/location.rb +10 -10
- data/lib/pennmarc/helpers/relation.rb +7 -7
- data/lib/pennmarc/helpers/series.rb +18 -29
- data/lib/pennmarc/helpers/subject.rb +9 -10
- data/lib/pennmarc/mappers.rb +31 -0
- data/lib/pennmarc/parser.rb +36 -62
- data/lib/pennmarc/util.rb +10 -11
- data/pennmarc.gemspec +1 -1
- data/spec/lib/pennmarc/helpers/creator_spec.rb +9 -9
- data/spec/lib/pennmarc/helpers/edition_spec.rb +3 -2
- data/spec/lib/pennmarc/helpers/format_spec.rb +1 -1
- data/spec/lib/pennmarc/helpers/genre_spec.rb +1 -1
- data/spec/lib/pennmarc/helpers/language_spec.rb +1 -1
- data/spec/lib/pennmarc/helpers/location_spec.rb +8 -8
- data/spec/lib/pennmarc/helpers/relation_spec.rb +2 -2
- data/spec/lib/pennmarc/helpers/series_spec.rb +2 -2
- data/spec/lib/pennmarc/helpers/subject_spec.rb +1 -1
- data/spec/lib/pennmarc/parser_spec.rb +22 -1
- metadata +3 -5
- data/legacy/indexer.rb +0 -568
- data/legacy/marc.rb +0 -2964
- data/legacy/test_file_output.json +0 -49
data/legacy/indexer.rb
DELETED
@@ -1,568 +0,0 @@
|
|
1
|
-
# rubocop:disable all
|
2
|
-
$:.unshift './config'
|
3
|
-
|
4
|
-
require 'date'
|
5
|
-
|
6
|
-
# This fixes a bug in older versions of glibc, where name resolution under high load sometimes fails.
|
7
|
-
# We require this here, because indexing jobs don't load Rails initializers
|
8
|
-
require 'resolv-replace'
|
9
|
-
|
10
|
-
require 'traject'
|
11
|
-
|
12
|
-
require 'penn_lib/marc'
|
13
|
-
require 'penn_lib/code_mappings'
|
14
|
-
|
15
|
-
# Indexer for Franklin-native records (i.e. from Alma).
|
16
|
-
# This is also used as a parent class for Hathi and CRL
|
17
|
-
# since the vast majority of the indexing rules are the same.
|
18
|
-
# Overrideable field definitions should go into define_* methods
|
19
|
-
# and called in this constructor.
|
20
|
-
class FranklinIndexer < BaseIndexer
|
21
|
-
|
22
|
-
# this mixin defines lambda facotry method get_format for legacy marc formats
|
23
|
-
include Blacklight::Marc::Indexer::Formats
|
24
|
-
include BlacklightSolrplugins::Indexer
|
25
|
-
|
26
|
-
# This behaves like the wrapped MARC::Record object it contains
|
27
|
-
# except that the #each method filters out fields with non-standard tags.
|
28
|
-
class PlainMarcRecord
|
29
|
-
|
30
|
-
def initialize(record)
|
31
|
-
@record = record
|
32
|
-
@valid_tag_regex ||= /^\d\d\d$/
|
33
|
-
end
|
34
|
-
|
35
|
-
def method_missing(*args)
|
36
|
-
@record.send(*args)
|
37
|
-
end
|
38
|
-
|
39
|
-
def each
|
40
|
-
for field in @record.fields
|
41
|
-
yield field if field.tag =~ @valid_tag_regex
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
# Filter out enriched fields from ALMA because a lot of them can cause
|
47
|
-
# the stored MARC XML in Solr to exceed max field size. Note that the
|
48
|
-
# marc_view partial filters out non-standard MARC tags on display side too.
|
49
|
-
# @return [Proc] proc object to be used by traject
|
50
|
-
def get_plain_marc_xml
|
51
|
-
lambda do |record, accumulator|
|
52
|
-
accumulator << MARC::FastXMLWriter.encode(PlainMarcRecord.new(record))
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def initialize
|
57
|
-
super
|
58
|
-
|
59
|
-
# append extra params to the Solr update URL for solr-side cross reference handling
|
60
|
-
# and duplicate ID deletion
|
61
|
-
processors = [ 'xref-copyfield', 'fl-multiplex', 'shingles', 'id_hash' ]
|
62
|
-
if ENV['SOLR_USE_UID_DISTRIB_PROCESSOR']
|
63
|
-
# disable; handle deletion outside of solr, either permanently or pending bug fixes
|
64
|
-
#processors << 'uid-distrib'
|
65
|
-
end
|
66
|
-
|
67
|
-
solr_update_url = [ ENV['SOLR_URL'].chomp('/'), 'update', 'json' ].join('/') + "?processor=#{processors.join(',')}"
|
68
|
-
|
69
|
-
settings do
|
70
|
-
# type may be 'binary', 'xml', or 'json'
|
71
|
-
provide "marc_source.type", "xml"
|
72
|
-
# set this to be non-negative if threshold should be enforced
|
73
|
-
provide 'solr_writer.max_skipped', -1
|
74
|
-
|
75
|
-
provide 'solr.update_url', solr_update_url
|
76
|
-
|
77
|
-
store 'writer_class_name', 'PennLib::FranklinSolrJsonWriter'
|
78
|
-
|
79
|
-
# uncomment these lines to write to a file
|
80
|
-
# store "writer_class_name", "Traject::JsonWriter"
|
81
|
-
# store 'output_file', "traject_output.json"
|
82
|
-
|
83
|
-
if defined? JRUBY_VERSION
|
84
|
-
# 'store' overrides existing settings, 'provide' does not
|
85
|
-
store 'reader_class_name', "Traject::Marc4JReader"
|
86
|
-
store 'solr_writer.thread_pool', 4
|
87
|
-
store 'processing_thread_pool', 4
|
88
|
-
end
|
89
|
-
|
90
|
-
store 'solr_writer.commit_on_close', false
|
91
|
-
store 'solr_writer.batch_size', 2000
|
92
|
-
|
93
|
-
end
|
94
|
-
|
95
|
-
define_all_fields
|
96
|
-
end
|
97
|
-
|
98
|
-
def define_all_fields
|
99
|
-
|
100
|
-
define_id
|
101
|
-
|
102
|
-
define_grouped_id
|
103
|
-
|
104
|
-
define_record_source_id
|
105
|
-
|
106
|
-
define_record_source_facet
|
107
|
-
|
108
|
-
to_field 'nocirc_f_stored' do |rec, acc|
|
109
|
-
acc << pennlibmarc.items_nocirc(rec)
|
110
|
-
end
|
111
|
-
|
112
|
-
define_mms_id
|
113
|
-
|
114
|
-
define_oclc_id
|
115
|
-
|
116
|
-
define_cluster_id
|
117
|
-
|
118
|
-
define_full_text_link_text_a
|
119
|
-
|
120
|
-
# do NOT use *_xml_stored_single because it uses a Str (max 32k) for storage
|
121
|
-
to_field 'marcrecord_xml_stored_single_large', get_plain_marc_xml
|
122
|
-
|
123
|
-
# Our keyword searches use pf/qf to search multiple fields, so
|
124
|
-
# we don't need this field; leaving it commented out here just in case.
|
125
|
-
#
|
126
|
-
# to_field "text_search", extract_all_marc_values do |r, acc|
|
127
|
-
# acc.unshift(r['001'].try(:value))
|
128
|
-
# acc.replace [acc.join(' ')] # turn it into a single string
|
129
|
-
# end
|
130
|
-
|
131
|
-
define_access_facet
|
132
|
-
|
133
|
-
to_field 'format_f_stored' do |rec, acc|
|
134
|
-
acc.concat(pennlibmarc.get_format(rec))
|
135
|
-
end
|
136
|
-
|
137
|
-
author_creator_spec = %W{
|
138
|
-
100abcdjq
|
139
|
-
110abcdjq
|
140
|
-
700abcdjq
|
141
|
-
710abcdjq
|
142
|
-
800abcdjq
|
143
|
-
810abcdjq
|
144
|
-
111abcen
|
145
|
-
711abcen
|
146
|
-
811abcen
|
147
|
-
}.join(':')
|
148
|
-
|
149
|
-
# this is now automatically copied on the Solr side
|
150
|
-
# to_field "author_creator_f", extract_marc(author_creator_spec, :trim_punctuation => true)
|
151
|
-
|
152
|
-
# TODO: xfacet field, do not migrate
|
153
|
-
to_field 'author_creator_xfacet2_input', extract_marc(author_creator_spec, :trim_punctuation => true) do |r, acc|
|
154
|
-
acc.map! { |v| 'n' + v }
|
155
|
-
end
|
156
|
-
|
157
|
-
# this is now automatically copied on the Solr side
|
158
|
-
# to_field 'subject_f_stored' do |rec, acc|
|
159
|
-
# acc.concat(pennlibmarc.get_subject_facet_values(rec))
|
160
|
-
# end
|
161
|
-
|
162
|
-
to_field "db_type_f_stored" do |rec, acc|
|
163
|
-
acc.concat(pennlibmarc.get_db_types(rec))
|
164
|
-
end
|
165
|
-
|
166
|
-
to_field "db_category_f_stored" do |rec, acc|
|
167
|
-
acc.concat(pennlibmarc.get_db_categories(rec))
|
168
|
-
end
|
169
|
-
|
170
|
-
to_field "db_subcategory_f_stored" do |rec, acc|
|
171
|
-
acc.concat(pennlibmarc.get_db_subcategories(rec))
|
172
|
-
end
|
173
|
-
|
174
|
-
to_field 'subject_search' do |rec, acc|
|
175
|
-
acc.concat(pennlibmarc.get_subject_search_values(rec))
|
176
|
-
end
|
177
|
-
|
178
|
-
to_field 'toplevel_subject_f' do |rec, acc|
|
179
|
-
acc.concat(pennlibmarc.get_subject_facet_values(rec, true))
|
180
|
-
end
|
181
|
-
|
182
|
-
# TODO: xfacet field, do not migrate
|
183
|
-
to_field 'call_number_xfacet' do |rec, acc|
|
184
|
-
acc.concat(pennlibmarc.get_call_number_xfacet_values(rec))
|
185
|
-
end
|
186
|
-
|
187
|
-
to_field "language_f_stored" do |rec, acc|
|
188
|
-
acc.concat(pennlibmarc.get_language_values(rec))
|
189
|
-
end
|
190
|
-
|
191
|
-
to_field "language_search" do |rec, acc|
|
192
|
-
acc.concat(pennlibmarc.get_language_values(rec))
|
193
|
-
end
|
194
|
-
|
195
|
-
to_field "library_f_stored" do |rec, acc|
|
196
|
-
acc.concat(pennlibmarc.get_library_values(rec))
|
197
|
-
end
|
198
|
-
|
199
|
-
to_field "specific_location_f_stored" do |rec, acc|
|
200
|
-
acc.concat(pennlibmarc.get_specific_location_values(rec))
|
201
|
-
end
|
202
|
-
|
203
|
-
to_field "classification_f_stored" do |rec, acc|
|
204
|
-
acc.concat(pennlibmarc.get_classification_values(rec))
|
205
|
-
end
|
206
|
-
|
207
|
-
to_field "genre_f_stored" do |rec, acc|
|
208
|
-
acc.concat(pennlibmarc.get_genre_values(rec))
|
209
|
-
end
|
210
|
-
|
211
|
-
to_field "genre_search" do |rec, acc|
|
212
|
-
acc.concat(pennlibmarc.get_genre_search_values(rec))
|
213
|
-
end
|
214
|
-
|
215
|
-
# Title fields
|
216
|
-
|
217
|
-
to_field 'title_1_search' do |rec, acc|
|
218
|
-
acc.concat(pennlibmarc.get_title_1_search_values(rec))
|
219
|
-
end
|
220
|
-
|
221
|
-
to_field 'title_2_search' do |rec, acc|
|
222
|
-
acc.concat(pennlibmarc.get_title_2_search_values(rec))
|
223
|
-
end
|
224
|
-
|
225
|
-
to_field 'journal_title_1_search' do |rec, acc|
|
226
|
-
acc.concat(pennlibmarc.get_journal_title_1_search_values(rec))
|
227
|
-
end
|
228
|
-
|
229
|
-
to_field 'journal_title_2_search' do |rec, acc|
|
230
|
-
acc.concat(pennlibmarc.get_journal_title_2_search_values(rec))
|
231
|
-
end
|
232
|
-
|
233
|
-
to_field 'author_creator_1_search' do |rec, acc|
|
234
|
-
acc.concat(pennlibmarc.get_author_creator_1_search_values(rec))
|
235
|
-
end
|
236
|
-
|
237
|
-
to_field 'author_creator_2_search' do |rec, acc|
|
238
|
-
acc.concat(pennlibmarc.get_author_creator_2_search_values(rec))
|
239
|
-
end
|
240
|
-
|
241
|
-
to_field 'author_creator_a' do |rec, acc|
|
242
|
-
acc.concat(pennlibmarc.get_author_creator_values(rec))
|
243
|
-
end
|
244
|
-
|
245
|
-
to_field 'author_880_a' do |rec, acc|
|
246
|
-
acc.concat(pennlibmarc.get_author_880_values(rec))
|
247
|
-
end
|
248
|
-
|
249
|
-
to_field 'title' do |rec, acc|
|
250
|
-
acc.concat(pennlibmarc.get_title_values(rec))
|
251
|
-
end
|
252
|
-
|
253
|
-
to_field 'title_880_a' do |rec,acc|
|
254
|
-
acc.concat(pennlibmarc.get_title_880_values(rec))
|
255
|
-
end
|
256
|
-
|
257
|
-
to_field 'standardized_title_a' do |rec, acc|
|
258
|
-
acc.concat(pennlibmarc.get_standardized_title_values(rec))
|
259
|
-
end
|
260
|
-
|
261
|
-
# TODO: xfacet field, do not migrate
|
262
|
-
to_field 'title_xfacet' do |rec, acc|
|
263
|
-
acc.concat(pennlibmarc.get_title_xfacet_values(rec))
|
264
|
-
end
|
265
|
-
|
266
|
-
to_field 'title_nssort' do |rec, acc|
|
267
|
-
acc.concat(pennlibmarc.get_title_sort_values(rec))
|
268
|
-
end
|
269
|
-
|
270
|
-
to_field 'title_sort_tl' do |rec, acc|
|
271
|
-
acc.concat(pennlibmarc.get_title_sort_filing_parts(rec, false))
|
272
|
-
pennlibmarc.append_title_variants(rec, acc)
|
273
|
-
end
|
274
|
-
|
275
|
-
# Author fields
|
276
|
-
|
277
|
-
to_field 'author_creator_nssort' do |rec, acc|
|
278
|
-
acc.concat(pennlibmarc.get_author_creator_sort_values(rec))
|
279
|
-
end
|
280
|
-
|
281
|
-
to_field 'edition' do |rec, acc|
|
282
|
-
acc.concat(pennlibmarc.get_edition_values(rec))
|
283
|
-
end
|
284
|
-
|
285
|
-
to_field 'conference_a' do |rec, acc|
|
286
|
-
acc.concat(pennlibmarc.get_conference_values(rec))
|
287
|
-
end
|
288
|
-
|
289
|
-
to_field 'series' do |rec, acc|
|
290
|
-
acc.concat(pennlibmarc.get_series_values(rec))
|
291
|
-
end
|
292
|
-
|
293
|
-
to_field 'publication_a' do |rec, acc|
|
294
|
-
acc.concat(pennlibmarc.get_publication_values(rec))
|
295
|
-
end
|
296
|
-
|
297
|
-
to_field 'contained_within_a' do |rec, acc|
|
298
|
-
acc.concat(pennlibmarc.get_contained_within_values(rec))
|
299
|
-
end
|
300
|
-
|
301
|
-
to_field 'elvl_rank_isort' do |rec, acc|
|
302
|
-
val = pennlibmarc.get_encoding_level_rank(rec)
|
303
|
-
acc << val if val
|
304
|
-
end
|
305
|
-
|
306
|
-
to_field 'hld_count_isort' do |rec, acc|
|
307
|
-
val = pennlibmarc.get_hld_count(rec)
|
308
|
-
acc << val if val
|
309
|
-
end
|
310
|
-
|
311
|
-
to_field 'itm_count_isort' do |rec, acc|
|
312
|
-
val = pennlibmarc.get_itm_count(rec)
|
313
|
-
acc << val if val
|
314
|
-
end
|
315
|
-
|
316
|
-
to_field 'empty_hld_count_isort' do |rec, acc|
|
317
|
-
val = pennlibmarc.get_empty_hld_count(rec)
|
318
|
-
acc << val if val
|
319
|
-
end
|
320
|
-
|
321
|
-
to_field 'prt_count_isort' do |rec, acc|
|
322
|
-
val = pennlibmarc.get_prt_count(rec)
|
323
|
-
acc << val if val
|
324
|
-
end
|
325
|
-
|
326
|
-
each_record do |rec, ctx|
|
327
|
-
ctx.clipboard.tap do |c|
|
328
|
-
c[:timestamps] = pennlibmarc.prepare_timestamps(rec)
|
329
|
-
c[:dates] = pennlibmarc.prepare_dates(rec)
|
330
|
-
c[:subjects] = PennLib::SubjectConfig.prepare_subjects(rec)
|
331
|
-
end
|
332
|
-
end
|
333
|
-
|
334
|
-
# All browseable/facetable subject types are multiplexed through this field; for corresponding display,
|
335
|
-
# these values are then mapped Solr-side to the `*_subject_stored_a` fields below. The fields are still
|
336
|
-
# directly configured below for storage of values that should be displayed, but not directly
|
337
|
-
# browseable/facetable
|
338
|
-
# TODO: while we should not migrate this field directly, we need to
|
339
|
-
# ensure that the copyfield behavior is incorporated into our indexer
|
340
|
-
to_field 'subject_xfacet2_input' do |rec, acc, ctx|
|
341
|
-
val = ctx.clipboard.dig(:subjects, :xfacet)
|
342
|
-
acc.concat(val) if val
|
343
|
-
end
|
344
|
-
|
345
|
-
# The fields below exist because there are some values that appear in _display_, but should not be
|
346
|
-
# _directly_ browseable/facetable (except perhaps as xrefs).
|
347
|
-
# Note, this is a step towards consolidation/consistency in management of subjects generally; there are
|
348
|
-
# choices that are preserved here initially for functional backward compatibility, but some of the behavior
|
349
|
-
# we're preserving is of questionable merit. Namely, the fields below allow the display of fields that will
|
350
|
-
# be links, but which will in some cases not be present in the linked "browse" view. We'll take this one
|
351
|
-
# step at a time, consolidating first with minimal behavioral changes; but note that some of the preserved
|
352
|
-
# behavior may be ripe for reconsideration.
|
353
|
-
# BEGIN STORED SUBJECTS
|
354
|
-
to_field 'lcsh_subject_stored_a' do |rec, acc, ctx|
|
355
|
-
val = ctx.clipboard.dig(:subjects, :stored_lcsh)
|
356
|
-
acc.concat(val) if val
|
357
|
-
end
|
358
|
-
|
359
|
-
to_field 'childrens_subject_stored_a' do |rec, acc, ctx|
|
360
|
-
val = ctx.clipboard.dig(:subjects, :stored_childrens)
|
361
|
-
acc.concat(val) if val
|
362
|
-
end
|
363
|
-
|
364
|
-
to_field 'mesh_subject_stored_a' do |rec, acc, ctx|
|
365
|
-
val = ctx.clipboard.dig(:subjects, :stored_mesh)
|
366
|
-
acc.concat(val) if val
|
367
|
-
end
|
368
|
-
|
369
|
-
to_field 'local_subject_stored_a' do |rec, acc, ctx|
|
370
|
-
val = ctx.clipboard.dig(:subjects, :stored_local)
|
371
|
-
acc.concat(val) if val
|
372
|
-
end
|
373
|
-
# END STORED SUBJECTS
|
374
|
-
|
375
|
-
to_field 'recently_added_isort' do |rec, acc, ctx|
|
376
|
-
val = ctx.clipboard.dig(:timestamps, :most_recent_add)
|
377
|
-
acc << val if val
|
378
|
-
end
|
379
|
-
|
380
|
-
to_field 'last_update_isort' do |rec, acc, ctx|
|
381
|
-
val = ctx.clipboard.dig(:timestamps, :last_update)
|
382
|
-
acc << val if val
|
383
|
-
end
|
384
|
-
|
385
|
-
to_field 'publication_date_ssort' do |rec, acc, ctx|
|
386
|
-
val = ctx.clipboard.dig(:dates, :pub_date_sort)
|
387
|
-
acc << val if val
|
388
|
-
end
|
389
|
-
|
390
|
-
to_field 'pub_min_dtsort' do |rec, acc, ctx|
|
391
|
-
val = ctx.clipboard.dig(:dates, :pub_date_minsort)
|
392
|
-
acc << val if val
|
393
|
-
end
|
394
|
-
|
395
|
-
to_field 'pub_max_dtsort' do |rec, acc, ctx|
|
396
|
-
val = ctx.clipboard.dig(:dates, :pub_date_maxsort)
|
397
|
-
acc << val if val
|
398
|
-
end
|
399
|
-
|
400
|
-
to_field 'content_min_dtsort' do |rec, acc, ctx|
|
401
|
-
val = ctx.clipboard.dig(:dates, :content_date_minsort)
|
402
|
-
acc << val if val
|
403
|
-
end
|
404
|
-
|
405
|
-
to_field 'content_max_dtsort' do |rec, acc, ctx|
|
406
|
-
val = ctx.clipboard.dig(:dates, :content_date_maxsort)
|
407
|
-
acc << val if val
|
408
|
-
end
|
409
|
-
|
410
|
-
to_field 'publication_date_f_stored' do |rec, acc, ctx|
|
411
|
-
val = ctx.clipboard.dig(:dates, :pub_date_decade)
|
412
|
-
acc << val if val
|
413
|
-
end
|
414
|
-
|
415
|
-
to_field 'publication_dr' do |rec, acc, ctx|
|
416
|
-
val = ctx.clipboard.dig(:dates, :pub_date_range)
|
417
|
-
acc << val if val
|
418
|
-
end
|
419
|
-
|
420
|
-
to_field 'content_dr' do |rec, acc, ctx|
|
421
|
-
val = ctx.clipboard.dig(:dates, :content_date_range)
|
422
|
-
acc << val if val
|
423
|
-
end
|
424
|
-
|
425
|
-
to_field "isbn_isxn_stored", extract_marc(%W{020az 022alz}, :separator=>nil) do |rec, acc|
|
426
|
-
orig = acc.dup
|
427
|
-
acc.map!{|x| StdNum::ISBN.allNormalizedValues(x)}
|
428
|
-
acc << orig
|
429
|
-
acc.flatten!
|
430
|
-
acc.uniq!
|
431
|
-
end
|
432
|
-
|
433
|
-
to_field 'call_number_search' do |rec, acc|
|
434
|
-
acc.concat(pennlibmarc.get_call_number_search_values(rec))
|
435
|
-
end
|
436
|
-
|
437
|
-
to_field 'physical_holdings_json' do |rec, acc|
|
438
|
-
result = pennlibmarc.get_physical_holdings(rec)
|
439
|
-
if result.present?
|
440
|
-
acc << result.to_json
|
441
|
-
end
|
442
|
-
end
|
443
|
-
|
444
|
-
to_field 'electronic_holdings_json' do |rec, acc|
|
445
|
-
result = pennlibmarc.get_electronic_holdings(rec)
|
446
|
-
if result.present?
|
447
|
-
acc << result.to_json
|
448
|
-
end
|
449
|
-
end
|
450
|
-
|
451
|
-
# store IDs of associated boundwith records, where the actual holdings are attached.
|
452
|
-
# this is a multi-valued field because a bib may have multiple copies, each associated
|
453
|
-
# with a different boundwith record (a few such cases do exist).
|
454
|
-
# we use this to pass to the Availability API.
|
455
|
-
to_field 'bound_with_ids_a' do |rec, acc|
|
456
|
-
acc.concat(pennlibmarc.get_bound_with_id_values(rec))
|
457
|
-
end
|
458
|
-
|
459
|
-
to_field 'conference_search' do |rec, acc|
|
460
|
-
acc.concat(pennlibmarc.get_conference_search_values(rec))
|
461
|
-
end
|
462
|
-
|
463
|
-
to_field 'contents_note_search' do |rec, acc|
|
464
|
-
acc.concat(pennlibmarc.get_contents_note_search_values(rec))
|
465
|
-
end
|
466
|
-
|
467
|
-
to_field 'corporate_author_search' do |rec, acc|
|
468
|
-
acc.concat(pennlibmarc.get_corporate_author_search_values(rec))
|
469
|
-
end
|
470
|
-
|
471
|
-
to_field 'place_of_publication_search', extract_marc('260a:264|*1|a')
|
472
|
-
|
473
|
-
to_field 'publisher_search', extract_marc('260b:264|*1|b')
|
474
|
-
|
475
|
-
to_field 'pubnum_search', extract_marc('024a:028a')
|
476
|
-
|
477
|
-
to_field 'series_search' do |rec, acc|
|
478
|
-
acc.concat(pennlibmarc.get_series_search_values(rec))
|
479
|
-
end
|
480
|
-
|
481
|
-
end
|
482
|
-
|
483
|
-
def pennlibmarc
|
484
|
-
@code_mappings ||= PennLib::CodeMappings.new(Rails.root.join('config').join('translation_maps'))
|
485
|
-
@pennlibmarc ||= PennLib::Marc.new(@code_mappings)
|
486
|
-
end
|
487
|
-
|
488
|
-
def define_id
|
489
|
-
to_field "id", trim(extract_marc("001"), :first => true) do |rec, acc, context|
|
490
|
-
acc.map! { |id| "FRANKLIN_#{id}" }
|
491
|
-
|
492
|
-
# we do this check in the first 'id' field so that it happens early
|
493
|
-
if pennlibmarc.is_boundwith_record(rec)
|
494
|
-
context.skip!("Skipping boundwith record #{acc.first}")
|
495
|
-
end
|
496
|
-
end
|
497
|
-
end
|
498
|
-
|
499
|
-
def define_mms_id
|
500
|
-
to_field 'alma_mms_id', trim(extract_marc('001'), :first => true)
|
501
|
-
end
|
502
|
-
|
503
|
-
def define_access_facet
|
504
|
-
to_field "access_f_stored" do |rec, acc|
|
505
|
-
acc.concat(pennlibmarc.get_access_values(rec))
|
506
|
-
end
|
507
|
-
end
|
508
|
-
|
509
|
-
def define_oclc_id
|
510
|
-
to_field 'oclc_id' do |rec, acc|
|
511
|
-
oclc_ids = pennlibmarc.get_oclc_id_values(rec)
|
512
|
-
acc << oclc_ids.first unless oclc_ids.empty?
|
513
|
-
end
|
514
|
-
end
|
515
|
-
|
516
|
-
def get_cluster_id(rec)
|
517
|
-
pennlibmarc.get_oclc_id_values(rec).first || begin
|
518
|
-
id = rec.fields('001').take(1).map(&:value).first
|
519
|
-
digest = Digest::MD5.hexdigest(id)
|
520
|
-
# first 16 hex digits = first 8 bytes. construct an int out of that hex str.
|
521
|
-
digest[0,16].hex
|
522
|
-
end
|
523
|
-
end
|
524
|
-
|
525
|
-
def define_cluster_id
|
526
|
-
to_field 'cluster_id' do |rec, acc|
|
527
|
-
acc << get_cluster_id(rec)
|
528
|
-
end
|
529
|
-
end
|
530
|
-
|
531
|
-
def define_grouped_id
|
532
|
-
to_field 'grouped_id', trim(extract_marc('001'), :first => true) do |rec, acc, context|
|
533
|
-
oclc_ids = pennlibmarc.get_oclc_id_values(rec)
|
534
|
-
acc.map! { |id|
|
535
|
-
if oclc_ids.size > 1
|
536
|
-
puts 'Warning: Multiple OCLC IDs found, using the first one'
|
537
|
-
end
|
538
|
-
oclc_id = oclc_ids.first
|
539
|
-
prefix = oclc_id.present? ? "#{oclc_id}!" : ''
|
540
|
-
"#{prefix}FRANKLIN_#{id}"
|
541
|
-
}
|
542
|
-
end
|
543
|
-
end
|
544
|
-
|
545
|
-
def define_record_source_id
|
546
|
-
to_field 'record_source_id' do |rec, acc|
|
547
|
-
acc << RecordSource::PENN
|
548
|
-
end
|
549
|
-
end
|
550
|
-
|
551
|
-
def define_record_source_facet
|
552
|
-
to_field 'record_source_f' do |rec, acc|
|
553
|
-
acc << 'Penn'
|
554
|
-
acc << 'HathiTrust' if pennlibmarc.is_etas(rec)
|
555
|
-
end
|
556
|
-
end
|
557
|
-
|
558
|
-
def define_full_text_link_text_a
|
559
|
-
to_field 'full_text_link_text_a' do |rec, acc|
|
560
|
-
result = pennlibmarc.get_full_text_link_values(rec)
|
561
|
-
if result.present?
|
562
|
-
acc << result.to_json
|
563
|
-
end
|
564
|
-
end
|
565
|
-
end
|
566
|
-
|
567
|
-
end
|
568
|
-
# rubocop:enable all
|