pennmarc 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitleaks.toml +2 -0
- data/lib/pennmarc/helpers/date.rb +4 -8
- data/lib/pennmarc/helpers/language.rb +23 -8
- data/lib/pennmarc/mappers.rb +6 -2
- data/lib/pennmarc/mappings/iso639-3-languages.yml +7916 -0
- data/lib/pennmarc/version.rb +1 -1
- data/spec/lib/pennmarc/helpers/date_spec.rb +16 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +56 -8
- data/spec/lib/pennmarc/parser_spec.rb +3 -3
- metadata +5 -3
- /data/lib/pennmarc/mappings/{language.yml → iso639-2-languages.yml} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50fd0383f0e78807f62f8abe784f75d8dace6e5b0ecf64877f79ad90b6d40354
|
4
|
+
data.tar.gz: 3df19d10534fc787c55814e30bd1b066a8c5ea10e7da15f9b320c4500891e0be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 277bb7c15e224c8134b8cea8b28de474d27022a1537904844af8c5542ff3f0080c37f13db38a5a9463a07453bcc403ccf346c2b5538c2fe26012df6a67dec24f
|
7
|
+
data.tar.gz: 1ca8e643758f86aeabdfbeb1576f513c866ba5ef1a40b6edaa279f687af7644baf6b953db6f078548fc72ca99b75b6377db7d80d98a19059c797b6ad41b5f648
|
data/.gitleaks.toml
ADDED
@@ -25,21 +25,17 @@ module PennMARC
|
|
25
25
|
# @return [DateTime, nil] The date added, or nil if date found in record is invalid
|
26
26
|
def added(record)
|
27
27
|
record.fields(EnrichedMarc::TAG_ITEM).flat_map { |field|
|
28
|
-
field.filter_map do |
|
29
|
-
# skip unless field has date created subfield
|
30
|
-
next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
|
31
|
-
|
28
|
+
subfield_values(field, EnrichedMarc::SUB_ITEM_DATE_CREATED).filter_map do |date_added|
|
32
29
|
# On 2022-05-02, this field value (as exported in enriched publishing
|
33
30
|
# job from Alma) began truncating time to day-level granularity. We have
|
34
31
|
# no guarantee that this won't switch back in the future, so for the
|
35
32
|
# foreseeable future we should support both formats.
|
36
33
|
|
37
|
-
format =
|
38
|
-
|
39
|
-
DateTime.strptime(subfield.value, format)
|
34
|
+
format = date_added.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
|
40
35
|
|
36
|
+
DateTime.strptime(date_added, format)
|
41
37
|
rescue StandardError => e
|
42
|
-
puts "Error parsing date in date added subfield: #{
|
38
|
+
puts "Error parsing date in date added subfield: #{date_added} - #{e}"
|
43
39
|
nil
|
44
40
|
end
|
45
41
|
}.max
|
@@ -4,10 +4,10 @@ module PennMARC
|
|
4
4
|
# Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
|
5
5
|
# the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
|
6
6
|
# is then mapped to a display-friendly value using the a provided mapping hash.
|
7
|
-
# @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
|
8
7
|
class Language < Helper
|
9
8
|
# Used when no value is present in the control field - still mapped
|
10
9
|
UNDETERMINED_CODE = :und
|
10
|
+
LANGUAGE_SUBFIELDS = %w[a b d e g h i j k m n p q r t].freeze
|
11
11
|
|
12
12
|
class << self
|
13
13
|
# Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
|
@@ -21,16 +21,31 @@ module PennMARC
|
|
21
21
|
values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
|
22
22
|
end
|
23
23
|
|
24
|
-
# Get language values for searching and faceting of a record. The
|
25
|
-
# the
|
24
|
+
# Get language values for searching and faceting of a record. The values are extracted from subfields
|
25
|
+
# in the 041 field. Language facet and search values will typically be the same, with the exception of `zxx`,
|
26
|
+
# when no linguistic content is found.
|
27
|
+
#
|
28
|
+
# @note In franklin, we extracted the language code from the 008 control field. After engaging cataloging unit
|
29
|
+
# representatives, we decided to extract these values from the 041 field: Includes records for multilingual
|
30
|
+
# items, items that involve translation, and items where the medium of communication is a sign language.
|
31
|
+
# https://www.loc.gov/marc/bibliographic/bd041.html
|
26
32
|
#
|
27
33
|
# @param [MARC::Record] record
|
28
|
-
# @param [Hash]
|
29
|
-
# @
|
30
|
-
|
34
|
+
# @param [Hash] iso_639_2_mapping iso-639-2 spec hash for language code translation
|
35
|
+
# @param [Hash] iso_639_3_mapping iso-639-3 spec hash for language code translation
|
36
|
+
# @return [Array] array of language values
|
37
|
+
def values(record, iso_639_2_mapping: Mappers.iso_639_2_language, iso_639_3_mapping: Mappers.iso_639_3_language)
|
38
|
+
values = record.fields('041').filter_map { |field|
|
39
|
+
mapper = subfield_value?(field, '2', /iso639-3/) ? iso_639_3_mapping : iso_639_2_mapping
|
40
|
+
field.filter_map do |sf|
|
41
|
+
next unless LANGUAGE_SUBFIELDS.include? sf.code
|
42
|
+
|
43
|
+
mapper[sf.value&.to_sym]
|
44
|
+
end
|
45
|
+
}.flatten
|
31
46
|
control_field = record['008']&.value
|
32
|
-
|
33
|
-
|
47
|
+
values << iso_639_2_mapping[control_field[35..37]&.to_sym] if control_field.present?
|
48
|
+
values.empty? ? values << iso_639_2_mapping[UNDETERMINED_CODE] : values.uniq
|
34
49
|
end
|
35
50
|
end
|
36
51
|
end
|
data/lib/pennmarc/mappers.rb
CHANGED
@@ -5,8 +5,12 @@ module PennMARC
|
|
5
5
|
class Mappers
|
6
6
|
class << self
|
7
7
|
# @return [Hash]
|
8
|
-
def
|
9
|
-
@
|
8
|
+
def iso_639_2_language
|
9
|
+
@iso_639_2_language ||= load_map('iso639-2-languages.yml')
|
10
|
+
end
|
11
|
+
|
12
|
+
def iso_639_3_language
|
13
|
+
@iso_639_3_language ||= load_map('iso639-3-languages.yml')
|
10
14
|
end
|
11
15
|
|
12
16
|
# @return [Hash]
|