pennmarc 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitleaks.toml +2 -0
- data/lib/pennmarc/helpers/date.rb +4 -8
- data/lib/pennmarc/helpers/language.rb +23 -8
- data/lib/pennmarc/mappers.rb +6 -2
- data/lib/pennmarc/mappings/iso639-3-languages.yml +7916 -0
- data/lib/pennmarc/version.rb +1 -1
- data/spec/lib/pennmarc/helpers/date_spec.rb +16 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +56 -8
- data/spec/lib/pennmarc/parser_spec.rb +3 -3
- metadata +5 -3
- /data/lib/pennmarc/mappings/{language.yml → iso639-2-languages.yml} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50fd0383f0e78807f62f8abe784f75d8dace6e5b0ecf64877f79ad90b6d40354
|
4
|
+
data.tar.gz: 3df19d10534fc787c55814e30bd1b066a8c5ea10e7da15f9b320c4500891e0be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 277bb7c15e224c8134b8cea8b28de474d27022a1537904844af8c5542ff3f0080c37f13db38a5a9463a07453bcc403ccf346c2b5538c2fe26012df6a67dec24f
|
7
|
+
data.tar.gz: 1ca8e643758f86aeabdfbeb1576f513c866ba5ef1a40b6edaa279f687af7644baf6b953db6f078548fc72ca99b75b6377db7d80d98a19059c797b6ad41b5f648
|
data/.gitleaks.toml
ADDED
@@ -25,21 +25,17 @@ module PennMARC
|
|
25
25
|
# @return [DateTime, nil] The date added, or nil if date found in record is invalid
|
26
26
|
def added(record)
|
27
27
|
record.fields(EnrichedMarc::TAG_ITEM).flat_map { |field|
|
28
|
-
field.filter_map do |
|
29
|
-
# skip unless field has date created subfield
|
30
|
-
next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
|
31
|
-
|
28
|
+
subfield_values(field, EnrichedMarc::SUB_ITEM_DATE_CREATED).filter_map do |date_added|
|
32
29
|
# On 2022-05-02, this field value (as exported in enriched publishing
|
33
30
|
# job from Alma) began truncating time to day-level granularity. We have
|
34
31
|
# no guarantee that this won't switch back in the future, so for the
|
35
32
|
# foreseeable future we should support both formats.
|
36
33
|
|
37
|
-
format =
|
38
|
-
|
39
|
-
DateTime.strptime(subfield.value, format)
|
34
|
+
format = date_added.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
|
40
35
|
|
36
|
+
DateTime.strptime(date_added, format)
|
41
37
|
rescue StandardError => e
|
42
|
-
puts "Error parsing date in date added subfield: #{
|
38
|
+
puts "Error parsing date in date added subfield: #{date_added} - #{e}"
|
43
39
|
nil
|
44
40
|
end
|
45
41
|
}.max
|
@@ -4,10 +4,10 @@ module PennMARC
|
|
4
4
|
# Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
|
5
5
|
# the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
|
6
6
|
# is then mapped to a display-friendly value using the a provided mapping hash.
|
7
|
-
# @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
|
8
7
|
class Language < Helper
|
9
8
|
# Used when no value is present in the control field - still mapped
|
10
9
|
UNDETERMINED_CODE = :und
|
10
|
+
LANGUAGE_SUBFIELDS = %w[a b d e g h i j k m n p q r t].freeze
|
11
11
|
|
12
12
|
class << self
|
13
13
|
# Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
|
@@ -21,16 +21,31 @@ module PennMARC
|
|
21
21
|
values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
|
22
22
|
end
|
23
23
|
|
24
|
-
# Get language values for searching and faceting of a record. The
|
25
|
-
# the
|
24
|
+
# Get language values for searching and faceting of a record. The values are extracted from subfields
|
25
|
+
# in the 041 field. Language facet and search values will typically be the same, with the exception of `zxx`,
|
26
|
+
# when no linguistic content is found.
|
27
|
+
#
|
28
|
+
# @note In franklin, we extracted the language code from the 008 control field. After engaging cataloging unit
|
29
|
+
# representatives, we decided to extract these values from the 041 field: Includes records for multilingual
|
30
|
+
# items, items that involve translation, and items where the medium of communication is a sign language.
|
31
|
+
# https://www.loc.gov/marc/bibliographic/bd041.html
|
26
32
|
#
|
27
33
|
# @param [MARC::Record] record
|
28
|
-
# @param [Hash]
|
29
|
-
# @
|
30
|
-
|
34
|
+
# @param [Hash] iso_639_2_mapping iso-639-2 spec hash for language code translation
|
35
|
+
# @param [Hash] iso_639_3_mapping iso-639-3 spec hash for language code translation
|
36
|
+
# @return [Array] array of language values
|
37
|
+
def values(record, iso_639_2_mapping: Mappers.iso_639_2_language, iso_639_3_mapping: Mappers.iso_639_3_language)
|
38
|
+
values = record.fields('041').filter_map { |field|
|
39
|
+
mapper = subfield_value?(field, '2', /iso639-3/) ? iso_639_3_mapping : iso_639_2_mapping
|
40
|
+
field.filter_map do |sf|
|
41
|
+
next unless LANGUAGE_SUBFIELDS.include? sf.code
|
42
|
+
|
43
|
+
mapper[sf.value&.to_sym]
|
44
|
+
end
|
45
|
+
}.flatten
|
31
46
|
control_field = record['008']&.value
|
32
|
-
|
33
|
-
|
47
|
+
values << iso_639_2_mapping[control_field[35..37]&.to_sym] if control_field.present?
|
48
|
+
values.empty? ? values << iso_639_2_mapping[UNDETERMINED_CODE] : values.uniq
|
34
49
|
end
|
35
50
|
end
|
36
51
|
end
|
data/lib/pennmarc/mappers.rb
CHANGED
@@ -5,8 +5,12 @@ module PennMARC
|
|
5
5
|
class Mappers
|
6
6
|
class << self
|
7
7
|
# @return [Hash]
|
8
|
-
def
|
9
|
-
@
|
8
|
+
def iso_639_2_language
|
9
|
+
@iso_639_2_language ||= load_map('iso639-2-languages.yml')
|
10
|
+
end
|
11
|
+
|
12
|
+
def iso_639_3_language
|
13
|
+
@iso_639_3_language ||= load_map('iso639-3-languages.yml')
|
10
14
|
end
|
11
15
|
|
12
16
|
# @return [Hash]
|