pennmarc 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 28dc70104803899434767b340182cb8eecff8c402cfb5e8547058d560a45a1e5
4
- data.tar.gz: 1fc0897f12ffb24de8bc0f40b69474d29264c036e33ba4ea97390b60a9f60c43
3
+ metadata.gz: 50fd0383f0e78807f62f8abe784f75d8dace6e5b0ecf64877f79ad90b6d40354
4
+ data.tar.gz: 3df19d10534fc787c55814e30bd1b066a8c5ea10e7da15f9b320c4500891e0be
5
5
  SHA512:
6
- metadata.gz: c82d8a22f480a768a8f6cce3cb7bc2ed25f7d5b202e3b492ac572a6f5bfec0d801098e1d74db8eb470573a2afbdd9314a90e18955a576c9ff5dee42aa2fbc5cd
7
- data.tar.gz: d3c69ce76e13c50b0f2da649ea9499dcb93a734944d298cb783e71e0c4bc59c76fad0ea344353124f024524d11b5f8c92318771d780848506d50940f483c427b
6
+ metadata.gz: 277bb7c15e224c8134b8cea8b28de474d27022a1537904844af8c5542ff3f0080c37f13db38a5a9463a07453bcc403ccf346c2b5538c2fe26012df6a67dec24f
7
+ data.tar.gz: 1ca8e643758f86aeabdfbeb1576f513c866ba5ef1a40b6edaa279f687af7644baf6b953db6f078548fc72ca99b75b6377db7d80d98a19059c797b6ad41b5f648
data/.gitleaks.toml ADDED
@@ -0,0 +1,2 @@
1
+ [file]
2
+ paths-exclude = ["lib/pennmarc/mappings/language.yml"]
@@ -25,21 +25,17 @@ module PennMARC
25
25
  # @return [DateTime, nil] The date added, or nil if date found in record is invalid
26
26
  def added(record)
27
27
  record.fields(EnrichedMarc::TAG_ITEM).flat_map { |field|
28
- field.filter_map do |subfield|
29
- # skip unless field has date created subfield
30
- next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
31
-
28
+ subfield_values(field, EnrichedMarc::SUB_ITEM_DATE_CREATED).filter_map do |date_added|
32
29
  # On 2022-05-02, this field value (as exported in enriched publishing
33
30
  # job from Alma) began truncating time to day-level granularity. We have
34
31
  # no guarantee that this won't switch back in the future, so for the
35
32
  # foreseeable future we should support both formats.
36
33
 
37
- format = subfield.value.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
38
-
39
- DateTime.strptime(subfield.value, format)
34
+ format = date_added.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
40
35
 
36
+ DateTime.strptime(date_added, format)
41
37
  rescue StandardError => e
42
- puts "Error parsing date in date added subfield: #{subfield.value} - #{e}"
38
+ puts "Error parsing date in date added subfield: #{date_added} - #{e}"
43
39
  nil
44
40
  end
45
41
  }.max
@@ -4,10 +4,10 @@ module PennMARC
4
4
  # Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
5
5
  # the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
6
6
  # is then mapped to a display-friendly value using the a provided mapping hash.
7
- # @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
8
7
  class Language < Helper
9
8
  # Used when no value is present in the control field - still mapped
10
9
  UNDETERMINED_CODE = :und
10
+ LANGUAGE_SUBFIELDS = %w[a b d e g h i j k m n p q r t].freeze
11
11
 
12
12
  class << self
13
13
  # Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
@@ -21,16 +21,31 @@ module PennMARC
21
21
  values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
22
22
  end
23
23
 
24
- # Get language values for searching and faceting of a record. The value is extracted from a defined position in
25
- # the 008 control field. Language facet and search values will typically be the same.
24
+ # Get language values for searching and faceting of a record. The values are extracted from subfields
25
+ # in the 041 field. Language facet and search values will typically be the same, with the exception of `zxx`,
26
+ # when no linguistic content is found.
27
+ #
28
+ # @note In franklin, we extracted the language code from the 008 control field. After engaging cataloging unit
29
+ # representatives, we decided to extract these values from the 041 field: Includes records for multilingual
30
+ # items, items that involve translation, and items where the medium of communication is a sign language.
31
+ # https://www.loc.gov/marc/bibliographic/bd041.html
26
32
  #
27
33
  # @param [MARC::Record] record
28
- # @param [Hash] language_map hash for language code translation
29
- # @return [String] nice value for language
30
- def search(record, language_map: Mappers.language)
34
+ # @param [Hash] iso_639_2_mapping iso-639-2 spec hash for language code translation
35
+ # @param [Hash] iso_639_3_mapping iso-639-3 spec hash for language code translation
36
+ # @return [Array] array of language values
37
+ def values(record, iso_639_2_mapping: Mappers.iso_639_2_language, iso_639_3_mapping: Mappers.iso_639_3_language)
38
+ values = record.fields('041').filter_map { |field|
39
+ mapper = subfield_value?(field, '2', /iso639-3/) ? iso_639_3_mapping : iso_639_2_mapping
40
+ field.filter_map do |sf|
41
+ next unless LANGUAGE_SUBFIELDS.include? sf.code
42
+
43
+ mapper[sf.value&.to_sym]
44
+ end
45
+ }.flatten
31
46
  control_field = record['008']&.value
32
- language_code = control_field[35..37]
33
- language_map[language_code.to_sym || UNDETERMINED_CODE]
47
+ values << iso_639_2_mapping[control_field[35..37]&.to_sym] if control_field.present?
48
+ values.empty? ? values << iso_639_2_mapping[UNDETERMINED_CODE] : values.uniq
34
49
  end
35
50
  end
36
51
  end
@@ -5,8 +5,12 @@ module PennMARC
5
5
  class Mappers
6
6
  class << self
7
7
  # @return [Hash]
8
- def language
9
- @language ||= load_map('language.yml')
8
+ def iso_639_2_language
9
+ @iso_639_2_language ||= load_map('iso639-2-languages.yml')
10
+ end
11
+
12
+ def iso_639_3_language
13
+ @iso_639_3_language ||= load_map('iso639-3-languages.yml')
10
14
  end
11
15
 
12
16
  # @return [Hash]