pennmarc 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +119 -0
- data/README.md +82 -0
- data/legacy/indexer.rb +568 -0
- data/legacy/marc.rb +2964 -0
- data/legacy/test_file_output.json +49 -0
- data/lib/pennmarc/encoding_level.rb +43 -0
- data/lib/pennmarc/enriched_marc.rb +36 -0
- data/lib/pennmarc/heading_control.rb +11 -0
- data/lib/pennmarc/helpers/citation.rb +31 -0
- data/lib/pennmarc/helpers/creator.rb +237 -0
- data/lib/pennmarc/helpers/database.rb +89 -0
- data/lib/pennmarc/helpers/date.rb +85 -0
- data/lib/pennmarc/helpers/edition.rb +90 -0
- data/lib/pennmarc/helpers/format.rb +312 -0
- data/lib/pennmarc/helpers/genre.rb +71 -0
- data/lib/pennmarc/helpers/helper.rb +11 -0
- data/lib/pennmarc/helpers/identifier.rb +134 -0
- data/lib/pennmarc/helpers/language.rb +37 -0
- data/lib/pennmarc/helpers/link.rb +12 -0
- data/lib/pennmarc/helpers/location.rb +97 -0
- data/lib/pennmarc/helpers/note.rb +132 -0
- data/lib/pennmarc/helpers/production.rb +131 -0
- data/lib/pennmarc/helpers/relation.rb +135 -0
- data/lib/pennmarc/helpers/series.rb +118 -0
- data/lib/pennmarc/helpers/subject.rb +304 -0
- data/lib/pennmarc/helpers/title.rb +197 -0
- data/lib/pennmarc/mappings/language.yml +516 -0
- data/lib/pennmarc/mappings/locations.yml +1801 -0
- data/lib/pennmarc/mappings/relator.yml +263 -0
- data/lib/pennmarc/parser.rb +177 -0
- data/lib/pennmarc/util.rb +240 -0
- data/lib/pennmarc.rb +6 -0
- data/pennmarc.gemspec +22 -0
- data/spec/fixtures/marcxml/test.xml +167 -0
- data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
- data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
- data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
- data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
- data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
- data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
- data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
- data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
- data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
- data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
- data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
- data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
- data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
- data/spec/lib/pennmarc/parser_spec.rb +13 -0
- data/spec/spec_helper.rb +104 -0
- data/spec/support/marc_spec_helpers.rb +84 -0
- metadata +171 -0
@@ -0,0 +1,304 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# This helper extracts subject heading in various ways to facilitate searching, faceting and display of subject
|
5
|
+
# values. Michael Gibney did a lot to "clean up" Subject parsing in discovery-app, but much of it was intended to
|
6
|
+
# support features (xfacet) that we will no longer support, and ties display and xfacet field parsing together too
|
7
|
+
# tightly to be preserved. As a result fo this, display methods and facet methods below are ported from their state
|
8
|
+
# prior to Michael's 2/2021 subject parsing changes.
|
9
|
+
class Subject < Helper
|
10
|
+
class << self
|
11
|
+
# Tags that serve as sources for Subject search values
|
12
|
+
# @todo why are 541 and 561 included here? these fields include info about source of acquisition
|
13
|
+
SEARCH_TAGS = %w[541 561 600 610 611 630 650 651 653].freeze
|
14
|
+
|
15
|
+
# Valid indicator 2 values indicating the source thesaurus for subject terms. These are:
|
16
|
+
# - 0: LCSH
|
17
|
+
# - 1: LC Children's
|
18
|
+
# - 2: MeSH
|
19
|
+
# - 4: Source not specified (local?)
|
20
|
+
# - 7: Source specified in ǂ2
|
21
|
+
SEARCH_SOURCE_INDICATORS = %w[0 1 2 4 7].freeze
|
22
|
+
|
23
|
+
# Tags that serve as sources for Subject facet values
|
24
|
+
DISPLAY_TAGS = %w[600 610 611 630 650 651].freeze
|
25
|
+
|
26
|
+
# Local subject heading tags
|
27
|
+
LOCAL_TAGS = %w[690 691 697].freeze
|
28
|
+
|
29
|
+
# All Subjects for searching. This includes most subfield content from any field contained in {SEARCH_TAGS} or 69X,
|
30
|
+
# including any linked 880 fields. Fields must have an indicator2 value in {SEARCH_SOURCE_INDICATORS}.
|
31
|
+
# @todo this includes subfields that may not be desired like 1 (uri) and 2 (source code) but this might be OK for
|
32
|
+
# a search (non-display) field?
|
33
|
+
# @param [Hash] relator_map
|
34
|
+
# @param [MARC::Record] record
|
35
|
+
# @return [Array] array of all subject values for search
|
36
|
+
def search(record, relator_map)
|
37
|
+
subject_fields(record, type: :search).filter_map do |field|
|
38
|
+
subj_parts = field.filter_map do |subfield|
|
39
|
+
# TODO: use term hash here? pro/chr would be rejected...
|
40
|
+
# TODO: should we care about punctuation in a search field? relator mapping?
|
41
|
+
case subfield.code
|
42
|
+
when '5', '6', '8' then next
|
43
|
+
when 'a'
|
44
|
+
# remove %PRO or PRO or %CHR or CHR
|
45
|
+
# remove any ? at the end
|
46
|
+
subfield.value.gsub(/^%?(PRO|CHR)/, '').gsub(/\?$/, '').strip
|
47
|
+
when '4'
|
48
|
+
# TODO: use relation mapping method from Title helper? for potential URI support?
|
49
|
+
# sf 4 should contain a 3-letter code or URI "that specifies the relationship from the entity described
|
50
|
+
# in the record to the entity referenced in the field"
|
51
|
+
"#{subfield.value} #{relator_map[subfield.value.to_sym]}".strip
|
52
|
+
else
|
53
|
+
subfield.value
|
54
|
+
end
|
55
|
+
end
|
56
|
+
next if subj_parts.empty?
|
57
|
+
|
58
|
+
join_and_squish subj_parts
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# All Subjects for faceting
|
63
|
+
#
|
64
|
+
# @note this is ported mostly form MG's new-style Subject parsing
|
65
|
+
# @param [MARC::Record] record
|
66
|
+
# @return [Array] array of all subject values for faceting
|
67
|
+
def facet(record)
|
68
|
+
subject_fields(record, type: :facet).filter_map do |field|
|
69
|
+
term_hash = build_subject_hash(field)
|
70
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
71
|
+
|
72
|
+
format_term type: :facet, term: term_hash
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# All Subjects for display. This includes all {DISPLAY_TAGS} and {LOCAL_TAGS}. For tags that specify a source,
|
77
|
+
# only those with an allowed source code (see ALLOWED_SOURCE_CODES) are included.
|
78
|
+
#
|
79
|
+
# @param [MARC::Record] record
|
80
|
+
# @return [Array] array of all subject values for display
|
81
|
+
def show(record)
|
82
|
+
subject_fields(record, type: :all).filter_map do |field|
|
83
|
+
term_hash = build_subject_hash(field)
|
84
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
85
|
+
|
86
|
+
format_term type: :display, term: term_hash
|
87
|
+
end.uniq
|
88
|
+
end
|
89
|
+
|
90
|
+
# Get Subjects from "Children" ontology
|
91
|
+
#
|
92
|
+
# @param [MARC::Record] record
|
93
|
+
# @return [Array] array of children's subject values for display
|
94
|
+
def childrens_show(record)
|
95
|
+
subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '1' })
|
96
|
+
.filter_map do |field|
|
97
|
+
term_hash = build_subject_hash(field)
|
98
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
99
|
+
|
100
|
+
format_term type: :display, term: term_hash
|
101
|
+
end.uniq
|
102
|
+
end
|
103
|
+
|
104
|
+
# Get Subjects from "MeSH" ontology
|
105
|
+
#
|
106
|
+
# @param [MARC::Record] record
|
107
|
+
# @return [Array] array of MeSH subject values for display
|
108
|
+
def medical_show(record)
|
109
|
+
subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '2' })
|
110
|
+
.filter_map do |field|
|
111
|
+
term_hash = build_subject_hash(field)
|
112
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
113
|
+
|
114
|
+
format_term type: :display, term: term_hash
|
115
|
+
end.uniq
|
116
|
+
end
|
117
|
+
|
118
|
+
# Get Subject values from {DISPLAY_TAGS} where indicator2 is 4 and {LOCAL_TAGS}. Do not include any values where
|
119
|
+
# sf2 includes "penncoi" (Community of Interest).
|
120
|
+
#
|
121
|
+
# @param [MARC::Record] record
|
122
|
+
# @return [Array] array of local subject values for display
|
123
|
+
def local_show(record)
|
124
|
+
local_fields = subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '4' }) +
|
125
|
+
subject_fields(record, type: :local)
|
126
|
+
local_fields.filter_map do |field|
|
127
|
+
next if subfield_value?(field, '2', /penncoi/)
|
128
|
+
|
129
|
+
term_hash = build_subject_hash(field)
|
130
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
131
|
+
|
132
|
+
format_term type: :display, term: term_hash
|
133
|
+
end.uniq
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
# Get subject fields from a record based on expected usage type. Valid types are currently:
|
139
|
+
# - search
|
140
|
+
# - facet
|
141
|
+
# - display
|
142
|
+
# - local
|
143
|
+
# @param [MARC::Record] record
|
144
|
+
# @param [String, Symbol] type of fields desired
|
145
|
+
# @param [Hash] options to be passed to the selector method
|
146
|
+
# @return [Array<MARC::DataField>] selected fields
|
147
|
+
def subject_fields(record, type:, options: {})
|
148
|
+
selector_method = case type.to_sym
|
149
|
+
when :search then :subject_search_field?
|
150
|
+
when :facet then :subject_facet_field?
|
151
|
+
when :display then :subject_display_field?
|
152
|
+
when :local then :subject_local_field?
|
153
|
+
when :all then :subject_general_display_field?
|
154
|
+
else
|
155
|
+
raise ArgumentError("Unsupported type specified: #{type}")
|
156
|
+
end
|
157
|
+
record.fields.find_all do |field|
|
158
|
+
options.any? ? send(selector_method, field, options) : send(selector_method, field)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Format a term hash as a string for display
|
163
|
+
#
|
164
|
+
# @todo confirm punctuation handling
|
165
|
+
# @todo support search field formatting?
|
166
|
+
# @param [Symbol] type
|
167
|
+
# @param [Hash] term components and information as a hash
|
168
|
+
# @return [String]
|
169
|
+
def format_term(type:, term:)
|
170
|
+
return unless type.in? %i[facet display]
|
171
|
+
|
172
|
+
normalize_single_subfield(term[:parts].first) if term[:count] == 1
|
173
|
+
|
174
|
+
case type.to_sym
|
175
|
+
when :facet
|
176
|
+
"#{term[:parts].join('--')} #{term[:lasts].join(' ')}".strip
|
177
|
+
when :display
|
178
|
+
"#{term[:parts].join('--')} #{term[:lasts].join(' ')} #{term[:append].join(' ')}".strip
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Is a field intended for display in a general subject field
|
183
|
+
# @param [MARC::DataField] field
|
184
|
+
# @return [Boolean] whether a MARC field is intended for display under general "Subjects"
|
185
|
+
def subject_general_display_field?(field)
|
186
|
+
return false unless field.tag.in? DISPLAY_TAGS + LOCAL_TAGS
|
187
|
+
|
188
|
+
return false if field.indicator2 == '7' && !valid_subject_genre_source_code?(field)
|
189
|
+
|
190
|
+
true
|
191
|
+
end
|
192
|
+
|
193
|
+
# @param [MARC::DataField] field
|
194
|
+
# @return [Boolean] whether a MARC field is a local subject field (69X)
|
195
|
+
def subject_local_field?(field)
|
196
|
+
field.tag.in? LOCAL_TAGS
|
197
|
+
end
|
198
|
+
|
199
|
+
# @param [MARC::DataField] field
|
200
|
+
# @param [Hash] options include :tags and :indicator2 values
|
201
|
+
# @return [Boolean] whether a MARC field should be considered for display
|
202
|
+
def subject_display_field?(field, options)
|
203
|
+
return false if field.blank?
|
204
|
+
|
205
|
+
return true if field.tag.in?(options[:tags]) && field.indicator2.in?(options[:indicator2])
|
206
|
+
|
207
|
+
false
|
208
|
+
end
|
209
|
+
|
210
|
+
# @param [MARC::DataField] field
|
211
|
+
# @return [Boolean]
|
212
|
+
def subject_facet_field?(field)
|
213
|
+
return false if field.blank?
|
214
|
+
|
215
|
+
return true if field.tag.in?(DISPLAY_TAGS) && field.indicator2.in?(%w[0 2 4])
|
216
|
+
|
217
|
+
return true if field.indicator2 == '7' && valid_subject_genre_source_code?(field)
|
218
|
+
|
219
|
+
false
|
220
|
+
end
|
221
|
+
|
222
|
+
# Build a hash of Subject field components for analysis or for building a string.
|
223
|
+
#
|
224
|
+
# @note Note that we must separately track count (as opposed to simply checking `parts.size`),
|
225
|
+
# because we're using (where? - MK) "subdivision count" as a heuristic for the quality level of the
|
226
|
+
# heading. - MG
|
227
|
+
# @todo do i need all this?
|
228
|
+
# @todo do i need to handle punctuation? see append_new_part
|
229
|
+
# @param [MARC::DataField] field
|
230
|
+
# @return [Hash{Symbol->Integer | Array}]
|
231
|
+
def build_subject_hash(field)
|
232
|
+
term_info = { count: 0, parts: [], append: [], lasts: [], uri: nil,
|
233
|
+
local: field.indicator2 == '4' || field.tag.starts_with?('69'), # local subject heading
|
234
|
+
vernacular: field.tag == '880' }
|
235
|
+
field.each do |subfield|
|
236
|
+
case subfield.code
|
237
|
+
when '0', '6', '8', '5'
|
238
|
+
# explicitly ignore these subfields
|
239
|
+
next
|
240
|
+
when '1'
|
241
|
+
term_info[:uri] = subfield.value.strip
|
242
|
+
when 'a'
|
243
|
+
# filter out PRO/CHR entirely (but only need to check on local heading types)
|
244
|
+
return nil if term_info[:local] && subfield.value =~ /^%?(PRO|CHR)([ $]|$)/
|
245
|
+
|
246
|
+
term_info[:parts] << subfield.value.strip
|
247
|
+
term_info[:count] += 1
|
248
|
+
when '2'
|
249
|
+
term_info[:source] = subfield.value.strip
|
250
|
+
when 'e', 'w'
|
251
|
+
# 'e' is relator term; not sure what 'w' is. These are used to append for record-view display only
|
252
|
+
term_info[:append] << subfield.value.strip # TODO: map relator code?
|
253
|
+
when 'b', 'c', 'd', 'p', 'q', 't'
|
254
|
+
# these are appended to the last component if possible (i.e., when joined, should have no delimiter)
|
255
|
+
term_info[:lasts] << subfield.value.strip
|
256
|
+
term_info[:count] += 1
|
257
|
+
else
|
258
|
+
# the usual case; add a new component to `parts`
|
259
|
+
# this typically includes g, v, x, y, z, 4
|
260
|
+
term_info[:parts] << subfield.value.strip
|
261
|
+
term_info[:count] += 1
|
262
|
+
end
|
263
|
+
end
|
264
|
+
term_info
|
265
|
+
end
|
266
|
+
|
267
|
+
# Determine if a field should be considered for Subject search inclusion. It must be either contained in
|
268
|
+
# {SEARCH_TAGS}, be an 880 field otherwise linked to a valid Search tag, or be in {LOCAL_TAGS}.
|
269
|
+
# @param [MARC::DataField] field
|
270
|
+
# @return [Boolean]
|
271
|
+
def subject_search_field?(field)
|
272
|
+
return false if field.blank? || SEARCH_SOURCE_INDICATORS.exclude?(field.indicator2)
|
273
|
+
|
274
|
+
tag = if field.tag == '880'
|
275
|
+
subfield_values(field, '6').first
|
276
|
+
else
|
277
|
+
field.tag
|
278
|
+
end
|
279
|
+
subject_search_tag? tag
|
280
|
+
end
|
281
|
+
|
282
|
+
# Is a given tag a subject search field? Yes if it is contained in {SEARCH_TAGS} or starts with 69.
|
283
|
+
# @param [String, NilClass] tag
|
284
|
+
# @return [Boolean]
|
285
|
+
def subject_search_tag?(tag)
|
286
|
+
return false if tag.blank?
|
287
|
+
|
288
|
+
tag = tag[0..2]
|
289
|
+
tag&.in?(SEARCH_TAGS) || tag&.start_with?('69')
|
290
|
+
end
|
291
|
+
|
292
|
+
# when we've only encountered one subfield, assume that it might be a poorly-coded record
|
293
|
+
# with a bunch of subdivisions mashed together, and attempt to convert it to a consistent
|
294
|
+
# form.
|
295
|
+
# @param [String] first_part
|
296
|
+
# @return [String, nil] normalized string
|
297
|
+
def normalize_single_subfield(first_part)
|
298
|
+
first_part.gsub!(/([[[:alnum:]])])(\s+--\s*|\s*--\s+)([[[:upper:]][[:digit:]]])/, '\1--\3')
|
299
|
+
first_part.gsub!(/([[[:alpha:]])])\s+-\s+([[:upper:]]|[[:digit:]]{2,})/, '\1--\2')
|
300
|
+
first_part.gsub!(/([[[:alnum:]])])\s+-\s+([[:upper:]])/, '\1--\2')
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# This helper contains logic for parsing out Title and Title-related fields.
|
5
|
+
class Title < Helper
|
6
|
+
class << self
|
7
|
+
# these will be used when completing the *search_aux methods
|
8
|
+
AUX_TITLE_TAGS = {
|
9
|
+
main: %w[130 210 240 245 246 247 440 490 730 740 830],
|
10
|
+
related: %w[773 774 780 785],
|
11
|
+
entity: %w[700 710 711]
|
12
|
+
}.freeze
|
13
|
+
|
14
|
+
# Main Title Search field. Takes from 245 and linked 880.
|
15
|
+
# @note Ported from get_title_1_search_values.
|
16
|
+
# @param [MARC::Record] record
|
17
|
+
# @return [Array<String>] array of title values for search
|
18
|
+
def search(record)
|
19
|
+
titles = record.fields('245').filter_map do |field|
|
20
|
+
join_subfields(field, &subfield_not_in?(%w[c 6 8 h]))
|
21
|
+
end
|
22
|
+
titles + record.fields('880').filter_map do |field|
|
23
|
+
next unless subfield_value?(field, '6', /245/)
|
24
|
+
|
25
|
+
join_subfields(field, &subfield_not_in?(%w[c 6 8 h]))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# Auxiliary Title Search field. Takes from many fields that contain title-like information.
|
30
|
+
# @note Ported from get_title_2_search_values.
|
31
|
+
# @todo port this, it is way complicated but essential for relevance
|
32
|
+
# @param [MARC::Record] record
|
33
|
+
# @return [Array<String>] array of title values for search
|
34
|
+
def search_aux(record); end
|
35
|
+
|
36
|
+
# Journal Title Search field.
|
37
|
+
# @todo port this, it is way complicated but essential for relevance
|
38
|
+
# @param [MARC::Record] record
|
39
|
+
# @return [Array<String>] journal title information for search
|
40
|
+
def journal_search(record); end
|
41
|
+
|
42
|
+
# Auxiliary Journal Title Search field.
|
43
|
+
# @todo port this, it is way complicated but essential for relevance
|
44
|
+
# @param [MARC::Record] record
|
45
|
+
# @return [Array<String>] journal title information for search
|
46
|
+
def journal_search_aux(record); end
|
47
|
+
|
48
|
+
# Single-valued Title, for use in headings. Takes the first {https://www.oclc.org/bibformats/en/2xx/245.html 245}
|
49
|
+
# value. Special consideration for
|
50
|
+
# {https://www.oclc.org/bibformats/en/2xx/245.html#punctuation punctuation practices}.
|
51
|
+
# @todo still consider ǂh? medium, which OCLC doc says DO NOT USE...but that is OCLC...
|
52
|
+
# @todo is punctuation handling still as desired? treatment here is described in spreadsheet from 2011
|
53
|
+
# @param [MARC::Record] record
|
54
|
+
# @return [String] single title for display
|
55
|
+
def show(record)
|
56
|
+
field = record.fields('245').first
|
57
|
+
title_or_form = field.find_all(&subfield_in?(%w[a k]))
|
58
|
+
.map { |sf| trim_trailing(:comma, trim_trailing(:slash, sf.value).rstrip) }
|
59
|
+
.first || ''
|
60
|
+
other_info = field.find_all(&subfield_in?(%w[b n p]))
|
61
|
+
.map { |sf| trim_trailing(:slash, sf.value) }
|
62
|
+
.join(' ')
|
63
|
+
hpunct = field.find_all { |sf| sf.code == 'h' }.map { |sf| sf.value.last }.first
|
64
|
+
punctuation = if [title_or_form.last, hpunct].include?('=')
|
65
|
+
'='
|
66
|
+
else
|
67
|
+
[title_or_form.last, hpunct].include?(':') ? ':' : nil
|
68
|
+
end
|
69
|
+
[trim_trailing(:colon, trim_trailing(:equal, title_or_form)).strip,
|
70
|
+
punctuation,
|
71
|
+
other_info].compact_blank.join(' ')
|
72
|
+
end
|
73
|
+
|
74
|
+
# Canonical title with non-filing characters relocated to the end.
|
75
|
+
#
|
76
|
+
# @note Currently we index two "title sort" fields: title_nssort (ssort type - regex token filter applied) and
|
77
|
+
# title_sort_tl (text left justified). It is not yet clear why this distinction is useful. For now, use a
|
78
|
+
# properly normalized (leading articles and punctuation removed) single title value here.
|
79
|
+
# @todo refactor to reduce complexity
|
80
|
+
# @param [MARC::Record] record
|
81
|
+
# @return [String] title value for sorting
|
82
|
+
def sort(record)
|
83
|
+
title_field = record.fields('245').first
|
84
|
+
# attempt to get number of non-filing characters present, default to 0
|
85
|
+
offset = if title_field.indicator2 =~ /^[0-9]$/
|
86
|
+
title_field.indicator2.to_i
|
87
|
+
else
|
88
|
+
0
|
89
|
+
end
|
90
|
+
raw_title = join_subfields(title_field, &subfield_in?(['a'])) # get title from subfield a
|
91
|
+
value = if offset.between?(1, 9)
|
92
|
+
{ prefix: raw_title[0..offset - 1].strip, filing: raw_title[offset..].strip }
|
93
|
+
elsif raw_title.present?
|
94
|
+
handle_bracket_prefix raw_title
|
95
|
+
else
|
96
|
+
# no subfield a, no indicator
|
97
|
+
raw_form = join_subfields(title_field, &subfield_in?(['k']))
|
98
|
+
handle_bracket_prefix raw_form
|
99
|
+
end
|
100
|
+
value[:filing] = [value[:filing],
|
101
|
+
join_subfields(title_field, &subfield_in?(%w[b n p]))].compact_blank.join(' ')
|
102
|
+
[value[:filing], value[:prefix]].join(' ').strip
|
103
|
+
end
|
104
|
+
|
105
|
+
# Standardized Title
|
106
|
+
#
|
107
|
+
# These values are intended for display. There has been distinct logic for storing search values as well
|
108
|
+
# (see get_standardized_title_values) but this appears only used with Title Browse functionality. Values come
|
109
|
+
# from 130 ({https://www.oclc.org/bibformats/en/1xx/130.html OCLC docs}) and 240
|
110
|
+
# ({https://www.oclc.org/bibformats/en/2xx/240.html OCLC docs}) as well as relator fields. Ported from Franklin
|
111
|
+
# get_standardized_title_display. Returned values from legacy method are "link" hashes.
|
112
|
+
|
113
|
+
# @note this is simplified from legacy practice as a linking hash is not returned. I believe this only supported
|
114
|
+
# title browse and we will not be supporting that at this time
|
115
|
+
# @param [MARC::Record] record
|
116
|
+
# @return [Array<String>] Array of standardized titles as strings
|
117
|
+
def standardized(record)
|
118
|
+
standardized_titles = record.fields(%w[130 240]).map do |field|
|
119
|
+
join_subfields(field, &subfield_not_in?(%w[0 6 8 e w]))
|
120
|
+
end
|
121
|
+
standardized_titles += record.fields('730').filter_map do |field|
|
122
|
+
# skip unless one of the indicators is blank
|
123
|
+
next unless field.indicator1 == '' || field.indicator2 == ''
|
124
|
+
|
125
|
+
# skip if a subfield i is present
|
126
|
+
next if subfield_defined?(field, 'i')
|
127
|
+
|
128
|
+
join_subfields(field, &subfield_not_in?(%w[5 6 8 e w]))
|
129
|
+
end
|
130
|
+
standardized_titles + record.fields('880').filter_map do |field|
|
131
|
+
next unless subfield_undefined?(field, 'i') ||
|
132
|
+
subfield_value_in?(field, '6', %w[130 240 730])
|
133
|
+
|
134
|
+
join_subfields field, &subfield_not_in?(%w[5 6 8 e w])
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Other Title for display
|
139
|
+
#
|
140
|
+
# Data comes from 246 ({https://www.oclc.org/bibformats/en/2xx/246.htm OCLC docs}) and 740
|
141
|
+
# ({https://www.oclc.org/bibformats/en/7xx/740.html OCLC docs)}
|
142
|
+
#
|
143
|
+
# @param [MARC::Record] record
|
144
|
+
# @return [Array<String>] Array of other titles as strings
|
145
|
+
def other(record)
|
146
|
+
other_titles = record.fields('246').map do |field|
|
147
|
+
join_subfields(field, &subfield_not_in?(%w[6 8]))
|
148
|
+
end
|
149
|
+
other_titles += record.fields('740')
|
150
|
+
.filter_map do |field|
|
151
|
+
next unless field.indicator2.in? ['', ' ', '0', '1', '3']
|
152
|
+
|
153
|
+
join_subfields(field, &subfield_not_in?(%w[5 6 8]))
|
154
|
+
end
|
155
|
+
other_titles + record.fields('880').filter_map do |field|
|
156
|
+
next unless subfield_value_in? field, '6', %w[246 740]
|
157
|
+
|
158
|
+
join_subfields(field, &subfield_not_in?(%w[5 6 8]))
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Former Title for display.
|
163
|
+
# These values come from {https://www.oclc.org/bibformats/en/2xx/247.html 247}.
|
164
|
+
#
|
165
|
+
# @note Ported from get_former_title_display. That method returns a hash for constructing a search link.
|
166
|
+
# We may need to do something like that eventually.
|
167
|
+
# @todo what are e and w subfields?
|
168
|
+
# @param [MARC::Record] record
|
169
|
+
# @return [Array<String>] array of former titles
|
170
|
+
def former(record)
|
171
|
+
record.fields
|
172
|
+
.filter_map do |field|
|
173
|
+
next unless field.tag == '247' || (field.tag == '880' && subfield_value?(field, '6', /^247/))
|
174
|
+
|
175
|
+
former_title = join_subfields field, &subfield_not_in?(%w[6 8 e w]) # 6 and 8 are not meaningful for display
|
176
|
+
former_title_append = join_subfields field, &subfield_in?(%w[e w])
|
177
|
+
"#{former_title} #{former_title_append}".strip
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
private
|
182
|
+
|
183
|
+
# Create prefix/filing hash for representing a title value with filing characters removed, with special
|
184
|
+
# consideration for bracketed titles
|
185
|
+
# @todo Is this still useful?
|
186
|
+
# @param [String] title
|
187
|
+
# @return [Hash]
|
188
|
+
def handle_bracket_prefix(title)
|
189
|
+
if title.starts_with? '['
|
190
|
+
{ prefix: '[', filing: title[1..].strip }
|
191
|
+
else
|
192
|
+
{ prefix: '', filing: title.strip }
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|