pennmarc 1.0.3 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitleaks.toml +2 -0
- data/.rubocop_todo.yml +7 -35
- data/lib/pennmarc/helpers/access.rb +52 -0
- data/lib/pennmarc/helpers/date.rb +4 -8
- data/lib/pennmarc/helpers/language.rb +23 -8
- data/lib/pennmarc/mappers.rb +6 -2
- data/lib/pennmarc/mappings/iso639-3-languages.yml +7916 -0
- data/lib/pennmarc/util.rb +8 -0
- data/lib/pennmarc/version.rb +1 -1
- data/spec/lib/pennmarc/helpers/access_spec.rb +58 -0
- data/spec/lib/pennmarc/helpers/creator_spec.rb +10 -5
- data/spec/lib/pennmarc/helpers/date_spec.rb +16 -0
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +0 -1
- data/spec/lib/pennmarc/helpers/language_spec.rb +56 -8
- data/spec/lib/pennmarc/helpers/series_spec.rb +5 -3
- data/spec/lib/pennmarc/marc_util_spec.rb +12 -0
- data/spec/lib/pennmarc/parser_spec.rb +3 -3
- metadata +7 -3
- /data/lib/pennmarc/mappings/{language.yml → iso639-2-languages.yml} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9aa7e5cf7ade86fc51db4d35791eed5e3e38784e35d33bd9b82379a91ab3786e
|
4
|
+
data.tar.gz: 8b2668fbfd1fc645b203e23f12cfe5681014a870fa504f45c7c0b034682be325
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aff6902488cb0d85bee32f3a3c5f4b613c8c5867ab3b594af40dad4c54e501b735b79fbed1ba8656c1144b87b58a9edc6e0ed961cc62603c7ccefb0c244b3117
|
7
|
+
data.tar.gz: e262a56ed0512de8c93c4ef0f94f87c322904d44ed845a64b9eb8dc58c8c3cc9c62675d52e314c5cd15eba0d7f4daea4725675af0c3e96ac7cb501a55074edf7
|
data/.gitleaks.toml
ADDED
data/.rubocop_todo.yml
CHANGED
@@ -1,36 +1,12 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config --auto-gen-only-exclude --exclude-limit 10000`
|
3
|
-
# on 2023-08
|
3
|
+
# on 2023-11-08 20:19:45 UTC using RuboCop version 1.51.0.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
-
# Offense count:
|
10
|
-
# This cop supports safe autocorrection (--autocorrect).
|
11
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
12
|
-
# SupportedStyles: with_first_argument, with_fixed_indentation
|
13
|
-
Layout/ArgumentAlignment:
|
14
|
-
Exclude:
|
15
|
-
- 'spec/lib/pennmarc/helpers/series_spec.rb'
|
16
|
-
|
17
|
-
# Offense count: 1
|
18
|
-
# This cop supports safe autocorrection (--autocorrect).
|
19
|
-
# Configuration parameters: EnforcedStyle.
|
20
|
-
# SupportedStyles: empty_lines, no_empty_lines
|
21
|
-
Layout/EmptyLinesAroundBlockBody:
|
22
|
-
Exclude:
|
23
|
-
- 'spec/lib/pennmarc/helpers/identifer_spec.rb'
|
24
|
-
|
25
|
-
# Offense count: 1
|
26
|
-
# This cop supports safe autocorrection (--autocorrect).
|
27
|
-
# Configuration parameters: EnforcedStyle.
|
28
|
-
# SupportedStyles: final_newline, final_blank_line
|
29
|
-
Layout/TrailingEmptyLines:
|
30
|
-
Exclude:
|
31
|
-
- 'spec/lib/pennmarc/helpers/identifer_spec.rb'
|
32
|
-
|
33
|
-
# Offense count: 22
|
9
|
+
# Offense count: 23
|
34
10
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
35
11
|
Metrics/AbcSize:
|
36
12
|
Exclude:
|
@@ -38,6 +14,7 @@ Metrics/AbcSize:
|
|
38
14
|
- 'lib/pennmarc/helpers/edition.rb'
|
39
15
|
- 'lib/pennmarc/helpers/format.rb'
|
40
16
|
- 'lib/pennmarc/helpers/genre.rb'
|
17
|
+
- 'lib/pennmarc/helpers/language.rb'
|
41
18
|
- 'lib/pennmarc/helpers/location.rb'
|
42
19
|
- 'lib/pennmarc/helpers/note.rb'
|
43
20
|
- 'lib/pennmarc/helpers/production.rb'
|
@@ -66,6 +43,7 @@ Metrics/CyclomaticComplexity:
|
|
66
43
|
- 'lib/pennmarc/helpers/edition.rb'
|
67
44
|
- 'lib/pennmarc/helpers/format.rb'
|
68
45
|
- 'lib/pennmarc/helpers/genre.rb'
|
46
|
+
- 'lib/pennmarc/helpers/language.rb'
|
69
47
|
- 'lib/pennmarc/helpers/note.rb'
|
70
48
|
- 'lib/pennmarc/helpers/production.rb'
|
71
49
|
- 'lib/pennmarc/helpers/relation.rb'
|
@@ -104,6 +82,7 @@ Metrics/PerceivedComplexity:
|
|
104
82
|
- 'lib/pennmarc/helpers/edition.rb'
|
105
83
|
- 'lib/pennmarc/helpers/format.rb'
|
106
84
|
- 'lib/pennmarc/helpers/genre.rb'
|
85
|
+
- 'lib/pennmarc/helpers/language.rb'
|
107
86
|
- 'lib/pennmarc/helpers/note.rb'
|
108
87
|
- 'lib/pennmarc/helpers/production.rb'
|
109
88
|
- 'lib/pennmarc/helpers/series.rb'
|
@@ -152,16 +131,9 @@ RSpec/FilePath:
|
|
152
131
|
Exclude:
|
153
132
|
- 'spec/lib/pennmarc/parser_spec.rb'
|
154
133
|
|
155
|
-
# Offense count:
|
134
|
+
# Offense count: 6
|
156
135
|
# Configuration parameters: Max, AllowedGroups.
|
157
136
|
RSpec/NestedGroups:
|
158
137
|
Exclude:
|
138
|
+
- 'spec/lib/pennmarc/helpers/access_spec.rb'
|
159
139
|
- 'spec/lib/pennmarc/helpers/format_spec.rb'
|
160
|
-
|
161
|
-
# Offense count: 2
|
162
|
-
# This cop supports safe autocorrection (--autocorrect).
|
163
|
-
# Configuration parameters: Max, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns.
|
164
|
-
# URISchemes: http, https
|
165
|
-
Layout/LineLength:
|
166
|
-
Exclude:
|
167
|
-
- 'spec/lib/pennmarc/helpers/creator_spec.rb'
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Methods for extracting how a record can be accessed
|
5
|
+
class Access < Helper
|
6
|
+
ONLINE = 'Online'
|
7
|
+
AT_THE_LIBRARY = 'At the library'
|
8
|
+
|
9
|
+
class << self
|
10
|
+
# Based primarily on the "enhanced MARC" fields added by Alma, determine if the record has
|
11
|
+
# electronic access or has physical holding, and is therefore "Online" or "At the library". If a record is "At the
|
12
|
+
# library", but has a link to a finding aid in the 856 field (matching certain criteria), also add 'Online' as an
|
13
|
+
# access method.
|
14
|
+
# @todo What if none of these criteria match? Should we include "At the library" by default? Records with no value
|
15
|
+
# in this field would be lost if the user selects a facet value.
|
16
|
+
# @param [MARC::Record] record
|
17
|
+
# @return [Array]
|
18
|
+
def facet(record)
|
19
|
+
acc = record.filter_map do |field|
|
20
|
+
next AT_THE_LIBRARY if field.tag == EnrichedMarc::TAG_HOLDING
|
21
|
+
next ONLINE if field.tag == EnrichedMarc::TAG_ELECTRONIC_INVENTORY
|
22
|
+
end
|
23
|
+
|
24
|
+
return acc if acc.size == 2 # return early if all values are already present
|
25
|
+
|
26
|
+
acc << ONLINE if acc.exclude?(ONLINE) && finding_aid_linkage?(record) # only check if ONLINE isn't already there
|
27
|
+
acc
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# Check if a record contains an 856 entry for an online finding aid, meeting these criteria:
|
33
|
+
# 1. Indicator 1 is 4 (HTTP resource)
|
34
|
+
# 2. Indicator 2 is NOT 2 (indicating the linkage is to a "related" thing)
|
35
|
+
# 3. The URL specified in subfield u (URI) is a Penn Handle link
|
36
|
+
# See: https://www.loc.gov/marc/bibliographic/bd856.html
|
37
|
+
# @param [MARC::Record] record
|
38
|
+
# @return [Boolean]
|
39
|
+
def finding_aid_linkage?(record)
|
40
|
+
record.fields('856').filter_map do |field|
|
41
|
+
next if field.indicator2 == '2' || field.indicator1 != '4'
|
42
|
+
|
43
|
+
subz = subfield_values(field, 'z')
|
44
|
+
subfield_values(field, 'u').filter_map do |value|
|
45
|
+
return true if subz.include?('Finding aid') && value.include?('hdl.library.upenn.edu')
|
46
|
+
end
|
47
|
+
end
|
48
|
+
false
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -25,21 +25,17 @@ module PennMARC
|
|
25
25
|
# @return [DateTime, nil] The date added, or nil if date found in record is invalid
|
26
26
|
def added(record)
|
27
27
|
record.fields(EnrichedMarc::TAG_ITEM).flat_map { |field|
|
28
|
-
field.filter_map do |
|
29
|
-
# skip unless field has date created subfield
|
30
|
-
next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
|
31
|
-
|
28
|
+
subfield_values(field, EnrichedMarc::SUB_ITEM_DATE_CREATED).filter_map do |date_added|
|
32
29
|
# On 2022-05-02, this field value (as exported in enriched publishing
|
33
30
|
# job from Alma) began truncating time to day-level granularity. We have
|
34
31
|
# no guarantee that this won't switch back in the future, so for the
|
35
32
|
# foreseeable future we should support both formats.
|
36
33
|
|
37
|
-
format =
|
38
|
-
|
39
|
-
DateTime.strptime(subfield.value, format)
|
34
|
+
format = date_added.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
|
40
35
|
|
36
|
+
DateTime.strptime(date_added, format)
|
41
37
|
rescue StandardError => e
|
42
|
-
puts "Error parsing date in date added subfield: #{
|
38
|
+
puts "Error parsing date in date added subfield: #{date_added} - #{e}"
|
43
39
|
nil
|
44
40
|
end
|
45
41
|
}.max
|
@@ -4,10 +4,10 @@ module PennMARC
|
|
4
4
|
# Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
|
5
5
|
# the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
|
6
6
|
# is then mapped to a display-friendly value using the a provided mapping hash.
|
7
|
-
# @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
|
8
7
|
class Language < Helper
|
9
8
|
# Used when no value is present in the control field - still mapped
|
10
9
|
UNDETERMINED_CODE = :und
|
10
|
+
LANGUAGE_SUBFIELDS = %w[a b d e g h i j k m n p q r t].freeze
|
11
11
|
|
12
12
|
class << self
|
13
13
|
# Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
|
@@ -21,16 +21,31 @@ module PennMARC
|
|
21
21
|
values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
|
22
22
|
end
|
23
23
|
|
24
|
-
# Get language values for searching and faceting of a record. The
|
25
|
-
# the
|
24
|
+
# Get language values for searching and faceting of a record. The values are extracted from subfields
|
25
|
+
# in the 041 field. Language facet and search values will typically be the same, with the exception of `zxx`,
|
26
|
+
# when no linguistic content is found.
|
27
|
+
#
|
28
|
+
# @note In franklin, we extracted the language code from the 008 control field. After engaging cataloging unit
|
29
|
+
# representatives, we decided to extract these values from the 041 field: Includes records for multilingual
|
30
|
+
# items, items that involve translation, and items where the medium of communication is a sign language.
|
31
|
+
# https://www.loc.gov/marc/bibliographic/bd041.html
|
26
32
|
#
|
27
33
|
# @param [MARC::Record] record
|
28
|
-
# @param [Hash]
|
29
|
-
# @
|
30
|
-
|
34
|
+
# @param [Hash] iso_639_2_mapping iso-639-2 spec hash for language code translation
|
35
|
+
# @param [Hash] iso_639_3_mapping iso-639-3 spec hash for language code translation
|
36
|
+
# @return [Array] array of language values
|
37
|
+
def values(record, iso_639_2_mapping: Mappers.iso_639_2_language, iso_639_3_mapping: Mappers.iso_639_3_language)
|
38
|
+
values = record.fields('041').filter_map { |field|
|
39
|
+
mapper = subfield_value?(field, '2', /iso639-3/) ? iso_639_3_mapping : iso_639_2_mapping
|
40
|
+
field.filter_map do |sf|
|
41
|
+
next unless LANGUAGE_SUBFIELDS.include? sf.code
|
42
|
+
|
43
|
+
mapper[sf.value&.to_sym]
|
44
|
+
end
|
45
|
+
}.flatten
|
31
46
|
control_field = record['008']&.value
|
32
|
-
|
33
|
-
|
47
|
+
values << iso_639_2_mapping[control_field[35..37]&.to_sym] if control_field.present?
|
48
|
+
values.empty? ? values << iso_639_2_mapping[UNDETERMINED_CODE] : values.uniq
|
34
49
|
end
|
35
50
|
end
|
36
51
|
end
|
data/lib/pennmarc/mappers.rb
CHANGED
@@ -5,8 +5,12 @@ module PennMARC
|
|
5
5
|
class Mappers
|
6
6
|
class << self
|
7
7
|
# @return [Hash]
|
8
|
-
def
|
9
|
-
@
|
8
|
+
def iso_639_2_language
|
9
|
+
@iso_639_2_language ||= load_map('iso639-2-languages.yml')
|
10
|
+
end
|
11
|
+
|
12
|
+
def iso_639_3_language
|
13
|
+
@iso_639_3_language ||= load_map('iso639-3-languages.yml')
|
10
14
|
end
|
11
15
|
|
12
16
|
# @return [Hash]
|