pennmarc 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +119 -0
- data/README.md +82 -0
- data/legacy/indexer.rb +568 -0
- data/legacy/marc.rb +2964 -0
- data/legacy/test_file_output.json +49 -0
- data/lib/pennmarc/encoding_level.rb +43 -0
- data/lib/pennmarc/enriched_marc.rb +36 -0
- data/lib/pennmarc/heading_control.rb +11 -0
- data/lib/pennmarc/helpers/citation.rb +31 -0
- data/lib/pennmarc/helpers/creator.rb +237 -0
- data/lib/pennmarc/helpers/database.rb +89 -0
- data/lib/pennmarc/helpers/date.rb +85 -0
- data/lib/pennmarc/helpers/edition.rb +90 -0
- data/lib/pennmarc/helpers/format.rb +312 -0
- data/lib/pennmarc/helpers/genre.rb +71 -0
- data/lib/pennmarc/helpers/helper.rb +11 -0
- data/lib/pennmarc/helpers/identifier.rb +134 -0
- data/lib/pennmarc/helpers/language.rb +37 -0
- data/lib/pennmarc/helpers/link.rb +12 -0
- data/lib/pennmarc/helpers/location.rb +97 -0
- data/lib/pennmarc/helpers/note.rb +132 -0
- data/lib/pennmarc/helpers/production.rb +131 -0
- data/lib/pennmarc/helpers/relation.rb +135 -0
- data/lib/pennmarc/helpers/series.rb +118 -0
- data/lib/pennmarc/helpers/subject.rb +304 -0
- data/lib/pennmarc/helpers/title.rb +197 -0
- data/lib/pennmarc/mappings/language.yml +516 -0
- data/lib/pennmarc/mappings/locations.yml +1801 -0
- data/lib/pennmarc/mappings/relator.yml +263 -0
- data/lib/pennmarc/parser.rb +177 -0
- data/lib/pennmarc/util.rb +240 -0
- data/lib/pennmarc.rb +6 -0
- data/pennmarc.gemspec +22 -0
- data/spec/fixtures/marcxml/test.xml +167 -0
- data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
- data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
- data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
- data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
- data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
- data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
- data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
- data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
- data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
- data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
- data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
- data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
- data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
- data/spec/lib/pennmarc/parser_spec.rb +13 -0
- data/spec/spec_helper.rb +104 -0
- data/spec/support/marc_spec_helpers.rb +84 -0
- metadata +171 -0
@@ -0,0 +1,304 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# This helper extracts subject heading in various ways to facilitate searching, faceting and display of subject
|
5
|
+
# values. Michael Gibney did a lot to "clean up" Subject parsing in discovery-app, but much of it was intended to
|
6
|
+
# support features (xfacet) that we will no longer support, and ties display and xfacet field parsing together too
|
7
|
+
# tightly to be preserved. As a result fo this, display methods and facet methods below are ported from their state
|
8
|
+
# prior to Michael's 2/2021 subject parsing changes.
|
9
|
+
class Subject < Helper
|
10
|
+
class << self
|
11
|
+
# Tags that serve as sources for Subject search values
|
12
|
+
# @todo why are 541 and 561 included here? these fields include info about source of acquisition
|
13
|
+
SEARCH_TAGS = %w[541 561 600 610 611 630 650 651 653].freeze
|
14
|
+
|
15
|
+
# Valid indicator 2 values indicating the source thesaurus for subject terms. These are:
|
16
|
+
# - 0: LCSH
|
17
|
+
# - 1: LC Children's
|
18
|
+
# - 2: MeSH
|
19
|
+
# - 4: Source not specified (local?)
|
20
|
+
# - 7: Source specified in ǂ2
|
21
|
+
SEARCH_SOURCE_INDICATORS = %w[0 1 2 4 7].freeze
|
22
|
+
|
23
|
+
# Tags that serve as sources for Subject facet values
|
24
|
+
DISPLAY_TAGS = %w[600 610 611 630 650 651].freeze
|
25
|
+
|
26
|
+
# Local subject heading tags
|
27
|
+
LOCAL_TAGS = %w[690 691 697].freeze
|
28
|
+
|
29
|
+
# All Subjects for searching. This includes most subfield content from any field contained in {SEARCH_TAGS} or 69X,
|
30
|
+
# including any linked 880 fields. Fields must have an indicator2 value in {SEARCH_SOURCE_INDICATORS}.
|
31
|
+
# @todo this includes subfields that may not be desired like 1 (uri) and 2 (source code) but this might be OK for
|
32
|
+
# a search (non-display) field?
|
33
|
+
# @param [Hash] relator_map
|
34
|
+
# @param [MARC::Record] record
|
35
|
+
# @return [Array] array of all subject values for search
|
36
|
+
def search(record, relator_map)
|
37
|
+
subject_fields(record, type: :search).filter_map do |field|
|
38
|
+
subj_parts = field.filter_map do |subfield|
|
39
|
+
# TODO: use term hash here? pro/chr would be rejected...
|
40
|
+
# TODO: should we care about punctuation in a search field? relator mapping?
|
41
|
+
case subfield.code
|
42
|
+
when '5', '6', '8' then next
|
43
|
+
when 'a'
|
44
|
+
# remove %PRO or PRO or %CHR or CHR
|
45
|
+
# remove any ? at the end
|
46
|
+
subfield.value.gsub(/^%?(PRO|CHR)/, '').gsub(/\?$/, '').strip
|
47
|
+
when '4'
|
48
|
+
# TODO: use relation mapping method from Title helper? for potential URI support?
|
49
|
+
# sf 4 should contain a 3-letter code or URI "that specifies the relationship from the entity described
|
50
|
+
# in the record to the entity referenced in the field"
|
51
|
+
"#{subfield.value} #{relator_map[subfield.value.to_sym]}".strip
|
52
|
+
else
|
53
|
+
subfield.value
|
54
|
+
end
|
55
|
+
end
|
56
|
+
next if subj_parts.empty?
|
57
|
+
|
58
|
+
join_and_squish subj_parts
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# All Subjects for faceting
|
63
|
+
#
|
64
|
+
# @note this is ported mostly form MG's new-style Subject parsing
|
65
|
+
# @param [MARC::Record] record
|
66
|
+
# @return [Array] array of all subject values for faceting
|
67
|
+
def facet(record)
|
68
|
+
subject_fields(record, type: :facet).filter_map do |field|
|
69
|
+
term_hash = build_subject_hash(field)
|
70
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
71
|
+
|
72
|
+
format_term type: :facet, term: term_hash
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# All Subjects for display. This includes all {DISPLAY_TAGS} and {LOCAL_TAGS}. For tags that specify a source,
|
77
|
+
# only those with an allowed source code (see ALLOWED_SOURCE_CODES) are included.
|
78
|
+
#
|
79
|
+
# @param [MARC::Record] record
|
80
|
+
# @return [Array] array of all subject values for display
|
81
|
+
def show(record)
|
82
|
+
subject_fields(record, type: :all).filter_map do |field|
|
83
|
+
term_hash = build_subject_hash(field)
|
84
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
85
|
+
|
86
|
+
format_term type: :display, term: term_hash
|
87
|
+
end.uniq
|
88
|
+
end
|
89
|
+
|
90
|
+
# Get Subjects from "Children" ontology
|
91
|
+
#
|
92
|
+
# @param [MARC::Record] record
|
93
|
+
# @return [Array] array of children's subject values for display
|
94
|
+
def childrens_show(record)
|
95
|
+
subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '1' })
|
96
|
+
.filter_map do |field|
|
97
|
+
term_hash = build_subject_hash(field)
|
98
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
99
|
+
|
100
|
+
format_term type: :display, term: term_hash
|
101
|
+
end.uniq
|
102
|
+
end
|
103
|
+
|
104
|
+
# Get Subjects from "MeSH" ontology
|
105
|
+
#
|
106
|
+
# @param [MARC::Record] record
|
107
|
+
# @return [Array] array of MeSH subject values for display
|
108
|
+
def medical_show(record)
|
109
|
+
subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '2' })
|
110
|
+
.filter_map do |field|
|
111
|
+
term_hash = build_subject_hash(field)
|
112
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
113
|
+
|
114
|
+
format_term type: :display, term: term_hash
|
115
|
+
end.uniq
|
116
|
+
end
|
117
|
+
|
118
|
+
# Get Subject values from {DISPLAY_TAGS} where indicator2 is 4 and {LOCAL_TAGS}. Do not include any values where
|
119
|
+
# sf2 includes "penncoi" (Community of Interest).
|
120
|
+
#
|
121
|
+
# @param [MARC::Record] record
|
122
|
+
# @return [Array] array of local subject values for display
|
123
|
+
def local_show(record)
|
124
|
+
local_fields = subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '4' }) +
|
125
|
+
subject_fields(record, type: :local)
|
126
|
+
local_fields.filter_map do |field|
|
127
|
+
next if subfield_value?(field, '2', /penncoi/)
|
128
|
+
|
129
|
+
term_hash = build_subject_hash(field)
|
130
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
131
|
+
|
132
|
+
format_term type: :display, term: term_hash
|
133
|
+
end.uniq
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
# Get subject fields from a record based on expected usage type. Valid types are currently:
|
139
|
+
# - search
|
140
|
+
# - facet
|
141
|
+
# - display
|
142
|
+
# - local
|
143
|
+
# @param [MARC::Record] record
|
144
|
+
# @param [String, Symbol] type of fields desired
|
145
|
+
# @param [Hash] options to be passed to the selector method
|
146
|
+
# @return [Array<MARC::DataField>] selected fields
|
147
|
+
def subject_fields(record, type:, options: {})
|
148
|
+
selector_method = case type.to_sym
|
149
|
+
when :search then :subject_search_field?
|
150
|
+
when :facet then :subject_facet_field?
|
151
|
+
when :display then :subject_display_field?
|
152
|
+
when :local then :subject_local_field?
|
153
|
+
when :all then :subject_general_display_field?
|
154
|
+
else
|
155
|
+
raise ArgumentError("Unsupported type specified: #{type}")
|
156
|
+
end
|
157
|
+
record.fields.find_all do |field|
|
158
|
+
options.any? ? send(selector_method, field, options) : send(selector_method, field)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Format a term hash as a string for display
|
163
|
+
#
|
164
|
+
# @todo confirm punctuation handling
|
165
|
+
# @todo support search field formatting?
|
166
|
+
# @param [Symbol] type
|
167
|
+
# @param [Hash] term components and information as a hash
|
168
|
+
# @return [String]
|
169
|
+
def format_term(type:, term:)
|
170
|
+
return unless type.in? %i[facet display]
|
171
|
+
|
172
|
+
normalize_single_subfield(term[:parts].first) if term[:count] == 1
|
173
|
+
|
174
|
+
case type.to_sym
|
175
|
+
when :facet
|
176
|
+
"#{term[:parts].join('--')} #{term[:lasts].join(' ')}".strip
|
177
|
+
when :display
|
178
|
+
"#{term[:parts].join('--')} #{term[:lasts].join(' ')} #{term[:append].join(' ')}".strip
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Is a field intended for display in a general subject field
|
183
|
+
# @param [MARC::DataField] field
|
184
|
+
# @return [Boolean] whether a MARC field is intended for display under general "Subjects"
|
185
|
+
def subject_general_display_field?(field)
|
186
|
+
return false unless field.tag.in? DISPLAY_TAGS + LOCAL_TAGS
|
187
|
+
|
188
|
+
return false if field.indicator2 == '7' && !valid_subject_genre_source_code?(field)
|
189
|
+
|
190
|
+
true
|
191
|
+
end
|
192
|
+
|
193
|
+
# @param [MARC::DataField] field
|
194
|
+
# @return [Boolean] whether a MARC field is a local subject field (69X)
|
195
|
+
def subject_local_field?(field)
|
196
|
+
field.tag.in? LOCAL_TAGS
|
197
|
+
end
|
198
|
+
|
199
|
+
# @param [MARC::DataField] field
|
200
|
+
# @param [Hash] options include :tags and :indicator2 values
|
201
|
+
# @return [Boolean] whether a MARC field should be considered for display
|
202
|
+
def subject_display_field?(field, options)
|
203
|
+
return false if field.blank?
|
204
|
+
|
205
|
+
return true if field.tag.in?(options[:tags]) && field.indicator2.in?(options[:indicator2])
|
206
|
+
|
207
|
+
false
|
208
|
+
end
|
209
|
+
|
210
|
+
# @param [MARC::DataField] field
|
211
|
+
# @return [Boolean]
|
212
|
+
def subject_facet_field?(field)
|
213
|
+
return false if field.blank?
|
214
|
+
|
215
|
+
return true if field.tag.in?(DISPLAY_TAGS) && field.indicator2.in?(%w[0 2 4])
|
216
|
+
|
217
|
+
return true if field.indicator2 == '7' && valid_subject_genre_source_code?(field)
|
218
|
+
|
219
|
+
false
|
220
|
+
end
|
221
|
+
|
222
|
+
# Build a hash of Subject field components for analysis or for building a string.
|
223
|
+
#
|
224
|
+
# @note Note that we must separately track count (as opposed to simply checking `parts.size`),
|
225
|
+
# because we're using (where? - MK) "subdivision count" as a heuristic for the quality level of the
|
226
|
+
# heading. - MG
|
227
|
+
# @todo do i need all this?
|
228
|
+
# @todo do i need to handle punctuation? see append_new_part
|
229
|
+
# @param [MARC::DataField] field
|
230
|
+
# @return [Hash{Symbol->Integer | Array}]
|
231
|
+
def build_subject_hash(field)
|
232
|
+
term_info = { count: 0, parts: [], append: [], lasts: [], uri: nil,
|
233
|
+
local: field.indicator2 == '4' || field.tag.starts_with?('69'), # local subject heading
|
234
|
+
vernacular: field.tag == '880' }
|
235
|
+
field.each do |subfield|
|
236
|
+
case subfield.code
|
237
|
+
when '0', '6', '8', '5'
|
238
|
+
# explicitly ignore these subfields
|
239
|
+
next
|
240
|
+
when '1'
|
241
|
+
term_info[:uri] = subfield.value.strip
|
242
|
+
when 'a'
|
243
|
+
# filter out PRO/CHR entirely (but only need to check on local heading types)
|
244
|
+
return nil if term_info[:local] && subfield.value =~ /^%?(PRO|CHR)([ $]|$)/
|
245
|
+
|
246
|
+
term_info[:parts] << subfield.value.strip
|
247
|
+
term_info[:count] += 1
|
248
|
+
when '2'
|
249
|
+
term_info[:source] = subfield.value.strip
|
250
|
+
when 'e', 'w'
|
251
|
+
# 'e' is relator term; not sure what 'w' is. These are used to append for record-view display only
|
252
|
+
term_info[:append] << subfield.value.strip # TODO: map relator code?
|
253
|
+
when 'b', 'c', 'd', 'p', 'q', 't'
|
254
|
+
# these are appended to the last component if possible (i.e., when joined, should have no delimiter)
|
255
|
+
term_info[:lasts] << subfield.value.strip
|
256
|
+
term_info[:count] += 1
|
257
|
+
else
|
258
|
+
# the usual case; add a new component to `parts`
|
259
|
+
# this typically includes g, v, x, y, z, 4
|
260
|
+
term_info[:parts] << subfield.value.strip
|
261
|
+
term_info[:count] += 1
|
262
|
+
end
|
263
|
+
end
|
264
|
+
term_info
|
265
|
+
end
|
266
|
+
|
267
|
+
# Determine if a field should be considered for Subject search inclusion. It must be either contained in
|
268
|
+
# {SEARCH_TAGS}, be an 880 field otherwise linked to a valid Search tag, or be in {LOCAL_TAGS}.
|
269
|
+
# @param [MARC::DataField] field
|
270
|
+
# @return [Boolean]
|
271
|
+
def subject_search_field?(field)
|
272
|
+
return false if field.blank? || SEARCH_SOURCE_INDICATORS.exclude?(field.indicator2)
|
273
|
+
|
274
|
+
tag = if field.tag == '880'
|
275
|
+
subfield_values(field, '6').first
|
276
|
+
else
|
277
|
+
field.tag
|
278
|
+
end
|
279
|
+
subject_search_tag? tag
|
280
|
+
end
|
281
|
+
|
282
|
+
# Is a given tag a subject search field? Yes if it is contained in {SEARCH_TAGS} or starts with 69.
|
283
|
+
# @param [String, NilClass] tag
|
284
|
+
# @return [Boolean]
|
285
|
+
def subject_search_tag?(tag)
|
286
|
+
return false if tag.blank?
|
287
|
+
|
288
|
+
tag = tag[0..2]
|
289
|
+
tag&.in?(SEARCH_TAGS) || tag&.start_with?('69')
|
290
|
+
end
|
291
|
+
|
292
|
+
# when we've only encountered one subfield, assume that it might be a poorly-coded record
|
293
|
+
# with a bunch of subdivisions mashed together, and attempt to convert it to a consistent
|
294
|
+
# form.
|
295
|
+
# @param [String] first_part
|
296
|
+
# @return [String, nil] normalized string
|
297
|
+
def normalize_single_subfield(first_part)
|
298
|
+
first_part.gsub!(/([[[:alnum:]])])(\s+--\s*|\s*--\s+)([[[:upper:]][[:digit:]]])/, '\1--\3')
|
299
|
+
first_part.gsub!(/([[[:alpha:]])])\s+-\s+([[:upper:]]|[[:digit:]]{2,})/, '\1--\2')
|
300
|
+
first_part.gsub!(/([[[:alnum:]])])\s+-\s+([[:upper:]])/, '\1--\2')
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# This helper contains logic for parsing out Title and Title-related fields.
|
5
|
+
class Title < Helper
|
6
|
+
class << self
|
7
|
+
# these will be used when completing the *search_aux methods
|
8
|
+
AUX_TITLE_TAGS = {
|
9
|
+
main: %w[130 210 240 245 246 247 440 490 730 740 830],
|
10
|
+
related: %w[773 774 780 785],
|
11
|
+
entity: %w[700 710 711]
|
12
|
+
}.freeze
|
13
|
+
|
14
|
+
# Main Title Search field. Takes from 245 and linked 880.
|
15
|
+
# @note Ported from get_title_1_search_values.
|
16
|
+
# @param [MARC::Record] record
|
17
|
+
# @return [Array<String>] array of title values for search
|
18
|
+
def search(record)
|
19
|
+
titles = record.fields('245').filter_map do |field|
|
20
|
+
join_subfields(field, &subfield_not_in?(%w[c 6 8 h]))
|
21
|
+
end
|
22
|
+
titles + record.fields('880').filter_map do |field|
|
23
|
+
next unless subfield_value?(field, '6', /245/)
|
24
|
+
|
25
|
+
join_subfields(field, &subfield_not_in?(%w[c 6 8 h]))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# Auxiliary Title Search field. Takes from many fields that contain title-like information.
|
30
|
+
# @note Ported from get_title_2_search_values.
|
31
|
+
# @todo port this, it is way complicated but essential for relevance
|
32
|
+
# @param [MARC::Record] record
|
33
|
+
# @return [Array<String>] array of title values for search
|
34
|
+
def search_aux(record); end
|
35
|
+
|
36
|
+
# Journal Title Search field.
|
37
|
+
# @todo port this, it is way complicated but essential for relevance
|
38
|
+
# @param [MARC::Record] record
|
39
|
+
# @return [Array<String>] journal title information for search
|
40
|
+
def journal_search(record); end
|
41
|
+
|
42
|
+
# Auxiliary Journal Title Search field.
|
43
|
+
# @todo port this, it is way complicated but essential for relevance
|
44
|
+
# @param [MARC::Record] record
|
45
|
+
# @return [Array<String>] journal title information for search
|
46
|
+
def journal_search_aux(record); end
|
47
|
+
|
48
|
+
# Single-valued Title, for use in headings. Takes the first {https://www.oclc.org/bibformats/en/2xx/245.html 245}
|
49
|
+
# value. Special consideration for
|
50
|
+
# {https://www.oclc.org/bibformats/en/2xx/245.html#punctuation punctuation practices}.
|
51
|
+
# @todo still consider ǂh? medium, which OCLC doc says DO NOT USE...but that is OCLC...
|
52
|
+
# @todo is punctuation handling still as desired? treatment here is described in spreadsheet from 2011
|
53
|
+
# @param [MARC::Record] record
|
54
|
+
# @return [String] single title for display
|
55
|
+
def show(record)
|
56
|
+
field = record.fields('245').first
|
57
|
+
title_or_form = field.find_all(&subfield_in?(%w[a k]))
|
58
|
+
.map { |sf| trim_trailing(:comma, trim_trailing(:slash, sf.value).rstrip) }
|
59
|
+
.first || ''
|
60
|
+
other_info = field.find_all(&subfield_in?(%w[b n p]))
|
61
|
+
.map { |sf| trim_trailing(:slash, sf.value) }
|
62
|
+
.join(' ')
|
63
|
+
hpunct = field.find_all { |sf| sf.code == 'h' }.map { |sf| sf.value.last }.first
|
64
|
+
punctuation = if [title_or_form.last, hpunct].include?('=')
|
65
|
+
'='
|
66
|
+
else
|
67
|
+
[title_or_form.last, hpunct].include?(':') ? ':' : nil
|
68
|
+
end
|
69
|
+
[trim_trailing(:colon, trim_trailing(:equal, title_or_form)).strip,
|
70
|
+
punctuation,
|
71
|
+
other_info].compact_blank.join(' ')
|
72
|
+
end
|
73
|
+
|
74
|
+
# Canonical title with non-filing characters relocated to the end.
|
75
|
+
#
|
76
|
+
# @note Currently we index two "title sort" fields: title_nssort (ssort type - regex token filter applied) and
|
77
|
+
# title_sort_tl (text left justified). It is not yet clear why this distinction is useful. For now, use a
|
78
|
+
# properly normalized (leading articles and punctuation removed) single title value here.
|
79
|
+
# @todo refactor to reduce complexity
|
80
|
+
# @param [MARC::Record] record
|
81
|
+
# @return [String] title value for sorting
|
82
|
+
def sort(record)
|
83
|
+
title_field = record.fields('245').first
|
84
|
+
# attempt to get number of non-filing characters present, default to 0
|
85
|
+
offset = if title_field.indicator2 =~ /^[0-9]$/
|
86
|
+
title_field.indicator2.to_i
|
87
|
+
else
|
88
|
+
0
|
89
|
+
end
|
90
|
+
raw_title = join_subfields(title_field, &subfield_in?(['a'])) # get title from subfield a
|
91
|
+
value = if offset.between?(1, 9)
|
92
|
+
{ prefix: raw_title[0..offset - 1].strip, filing: raw_title[offset..].strip }
|
93
|
+
elsif raw_title.present?
|
94
|
+
handle_bracket_prefix raw_title
|
95
|
+
else
|
96
|
+
# no subfield a, no indicator
|
97
|
+
raw_form = join_subfields(title_field, &subfield_in?(['k']))
|
98
|
+
handle_bracket_prefix raw_form
|
99
|
+
end
|
100
|
+
value[:filing] = [value[:filing],
|
101
|
+
join_subfields(title_field, &subfield_in?(%w[b n p]))].compact_blank.join(' ')
|
102
|
+
[value[:filing], value[:prefix]].join(' ').strip
|
103
|
+
end
|
104
|
+
|
105
|
+
# Standardized Title
|
106
|
+
#
|
107
|
+
# These values are intended for display. There has been distinct logic for storing search values as well
|
108
|
+
# (see get_standardized_title_values) but this appears only used with Title Browse functionality. Values come
|
109
|
+
# from 130 ({https://www.oclc.org/bibformats/en/1xx/130.html OCLC docs}) and 240
|
110
|
+
# ({https://www.oclc.org/bibformats/en/2xx/240.html OCLC docs}) as well as relator fields. Ported from Franklin
|
111
|
+
# get_standardized_title_display. Returned values from legacy method are "link" hashes.
|
112
|
+
|
113
|
+
# @note this is simplified from legacy practice as a linking hash is not returned. I believe this only supported
|
114
|
+
# title browse and we will not be supporting that at this time
|
115
|
+
# @param [MARC::Record] record
|
116
|
+
# @return [Array<String>] Array of standardized titles as strings
|
117
|
+
def standardized(record)
|
118
|
+
standardized_titles = record.fields(%w[130 240]).map do |field|
|
119
|
+
join_subfields(field, &subfield_not_in?(%w[0 6 8 e w]))
|
120
|
+
end
|
121
|
+
standardized_titles += record.fields('730').filter_map do |field|
|
122
|
+
# skip unless one of the indicators is blank
|
123
|
+
next unless field.indicator1 == '' || field.indicator2 == ''
|
124
|
+
|
125
|
+
# skip if a subfield i is present
|
126
|
+
next if subfield_defined?(field, 'i')
|
127
|
+
|
128
|
+
join_subfields(field, &subfield_not_in?(%w[5 6 8 e w]))
|
129
|
+
end
|
130
|
+
standardized_titles + record.fields('880').filter_map do |field|
|
131
|
+
next unless subfield_undefined?(field, 'i') ||
|
132
|
+
subfield_value_in?(field, '6', %w[130 240 730])
|
133
|
+
|
134
|
+
join_subfields field, &subfield_not_in?(%w[5 6 8 e w])
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Other Title for display
|
139
|
+
#
|
140
|
+
# Data comes from 246 ({https://www.oclc.org/bibformats/en/2xx/246.htm OCLC docs}) and 740
|
141
|
+
# ({https://www.oclc.org/bibformats/en/7xx/740.html OCLC docs)}
|
142
|
+
#
|
143
|
+
# @param [MARC::Record] record
|
144
|
+
# @return [Array<String>] Array of other titles as strings
|
145
|
+
def other(record)
|
146
|
+
other_titles = record.fields('246').map do |field|
|
147
|
+
join_subfields(field, &subfield_not_in?(%w[6 8]))
|
148
|
+
end
|
149
|
+
other_titles += record.fields('740')
|
150
|
+
.filter_map do |field|
|
151
|
+
next unless field.indicator2.in? ['', ' ', '0', '1', '3']
|
152
|
+
|
153
|
+
join_subfields(field, &subfield_not_in?(%w[5 6 8]))
|
154
|
+
end
|
155
|
+
other_titles + record.fields('880').filter_map do |field|
|
156
|
+
next unless subfield_value_in? field, '6', %w[246 740]
|
157
|
+
|
158
|
+
join_subfields(field, &subfield_not_in?(%w[5 6 8]))
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Former Title for display.
|
163
|
+
# These values come from {https://www.oclc.org/bibformats/en/2xx/247.html 247}.
|
164
|
+
#
|
165
|
+
# @note Ported from get_former_title_display. That method returns a hash for constructing a search link.
|
166
|
+
# We may need to do something like that eventually.
|
167
|
+
# @todo what are e and w subfields?
|
168
|
+
# @param [MARC::Record] record
|
169
|
+
# @return [Array<String>] array of former titles
|
170
|
+
def former(record)
|
171
|
+
record.fields
|
172
|
+
.filter_map do |field|
|
173
|
+
next unless field.tag == '247' || (field.tag == '880' && subfield_value?(field, '6', /^247/))
|
174
|
+
|
175
|
+
former_title = join_subfields field, &subfield_not_in?(%w[6 8 e w]) # 6 and 8 are not meaningful for display
|
176
|
+
former_title_append = join_subfields field, &subfield_in?(%w[e w])
|
177
|
+
"#{former_title} #{former_title_append}".strip
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
private
|
182
|
+
|
183
|
+
# Create prefix/filing hash for representing a title value with filing characters removed, with special
|
184
|
+
# consideration for bracketed titles
|
185
|
+
# @todo Is this still useful?
|
186
|
+
# @param [String] title
|
187
|
+
# @return [Hash]
|
188
|
+
def handle_bracket_prefix(title)
|
189
|
+
if title.starts_with? '['
|
190
|
+
{ prefix: '[', filing: title[1..].strip }
|
191
|
+
else
|
192
|
+
{ prefix: '', filing: title.strip }
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|