pennmarc 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 28dc70104803899434767b340182cb8eecff8c402cfb5e8547058d560a45a1e5
4
- data.tar.gz: 1fc0897f12ffb24de8bc0f40b69474d29264c036e33ba4ea97390b60a9f60c43
3
+ metadata.gz: 9aa7e5cf7ade86fc51db4d35791eed5e3e38784e35d33bd9b82379a91ab3786e
4
+ data.tar.gz: 8b2668fbfd1fc645b203e23f12cfe5681014a870fa504f45c7c0b034682be325
5
5
  SHA512:
6
- metadata.gz: c82d8a22f480a768a8f6cce3cb7bc2ed25f7d5b202e3b492ac572a6f5bfec0d801098e1d74db8eb470573a2afbdd9314a90e18955a576c9ff5dee42aa2fbc5cd
7
- data.tar.gz: d3c69ce76e13c50b0f2da649ea9499dcb93a734944d298cb783e71e0c4bc59c76fad0ea344353124f024524d11b5f8c92318771d780848506d50940f483c427b
6
+ metadata.gz: aff6902488cb0d85bee32f3a3c5f4b613c8c5867ab3b594af40dad4c54e501b735b79fbed1ba8656c1144b87b58a9edc6e0ed961cc62603c7ccefb0c244b3117
7
+ data.tar.gz: e262a56ed0512de8c93c4ef0f94f87c322904d44ed845a64b9eb8dc58c8c3cc9c62675d52e314c5cd15eba0d7f4daea4725675af0c3e96ac7cb501a55074edf7
data/.gitleaks.toml ADDED
@@ -0,0 +1,2 @@
1
+ [file]
2
+ paths-exclude = ["lib/pennmarc/mappings/language.yml"]
data/.rubocop_todo.yml CHANGED
@@ -1,36 +1,12 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config --auto-gen-only-exclude --exclude-limit 10000`
3
- # on 2023-08-25 13:55:25 UTC using RuboCop version 1.51.0.
3
+ # on 2023-11-08 20:19:45 UTC using RuboCop version 1.51.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 2
10
- # This cop supports safe autocorrection (--autocorrect).
11
- # Configuration parameters: EnforcedStyle, IndentationWidth.
12
- # SupportedStyles: with_first_argument, with_fixed_indentation
13
- Layout/ArgumentAlignment:
14
- Exclude:
15
- - 'spec/lib/pennmarc/helpers/series_spec.rb'
16
-
17
- # Offense count: 1
18
- # This cop supports safe autocorrection (--autocorrect).
19
- # Configuration parameters: EnforcedStyle.
20
- # SupportedStyles: empty_lines, no_empty_lines
21
- Layout/EmptyLinesAroundBlockBody:
22
- Exclude:
23
- - 'spec/lib/pennmarc/helpers/identifer_spec.rb'
24
-
25
- # Offense count: 1
26
- # This cop supports safe autocorrection (--autocorrect).
27
- # Configuration parameters: EnforcedStyle.
28
- # SupportedStyles: final_newline, final_blank_line
29
- Layout/TrailingEmptyLines:
30
- Exclude:
31
- - 'spec/lib/pennmarc/helpers/identifer_spec.rb'
32
-
33
- # Offense count: 22
9
+ # Offense count: 23
34
10
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
35
11
  Metrics/AbcSize:
36
12
  Exclude:
@@ -38,6 +14,7 @@ Metrics/AbcSize:
38
14
  - 'lib/pennmarc/helpers/edition.rb'
39
15
  - 'lib/pennmarc/helpers/format.rb'
40
16
  - 'lib/pennmarc/helpers/genre.rb'
17
+ - 'lib/pennmarc/helpers/language.rb'
41
18
  - 'lib/pennmarc/helpers/location.rb'
42
19
  - 'lib/pennmarc/helpers/note.rb'
43
20
  - 'lib/pennmarc/helpers/production.rb'
@@ -66,6 +43,7 @@ Metrics/CyclomaticComplexity:
66
43
  - 'lib/pennmarc/helpers/edition.rb'
67
44
  - 'lib/pennmarc/helpers/format.rb'
68
45
  - 'lib/pennmarc/helpers/genre.rb'
46
+ - 'lib/pennmarc/helpers/language.rb'
69
47
  - 'lib/pennmarc/helpers/note.rb'
70
48
  - 'lib/pennmarc/helpers/production.rb'
71
49
  - 'lib/pennmarc/helpers/relation.rb'
@@ -104,6 +82,7 @@ Metrics/PerceivedComplexity:
104
82
  - 'lib/pennmarc/helpers/edition.rb'
105
83
  - 'lib/pennmarc/helpers/format.rb'
106
84
  - 'lib/pennmarc/helpers/genre.rb'
85
+ - 'lib/pennmarc/helpers/language.rb'
107
86
  - 'lib/pennmarc/helpers/note.rb'
108
87
  - 'lib/pennmarc/helpers/production.rb'
109
88
  - 'lib/pennmarc/helpers/series.rb'
@@ -152,16 +131,9 @@ RSpec/FilePath:
152
131
  Exclude:
153
132
  - 'spec/lib/pennmarc/parser_spec.rb'
154
133
 
155
- # Offense count: 4
134
+ # Offense count: 6
156
135
  # Configuration parameters: Max, AllowedGroups.
157
136
  RSpec/NestedGroups:
158
137
  Exclude:
138
+ - 'spec/lib/pennmarc/helpers/access_spec.rb'
159
139
  - 'spec/lib/pennmarc/helpers/format_spec.rb'
160
-
161
- # Offense count: 2
162
- # This cop supports safe autocorrection (--autocorrect).
163
- # Configuration parameters: Max, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns.
164
- # URISchemes: http, https
165
- Layout/LineLength:
166
- Exclude:
167
- - 'spec/lib/pennmarc/helpers/creator_spec.rb'
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Methods for extracting how a record can be accessed
5
+ class Access < Helper
6
+ ONLINE = 'Online'
7
+ AT_THE_LIBRARY = 'At the library'
8
+
9
+ class << self
10
+ # Based primarily on the "enhanced MARC" fields added by Alma, determine if the record has
11
+ # electronic access or has physical holding, and is therefore "Online" or "At the library". If a record is "At the
12
+ # library", but has a link to a finding aid in the 856 field (matching certain criteria), also add 'Online' as an
13
+ # access method.
14
+ # @todo What if none of these criteria match? Should we include "At the library" by default? Records with no value
15
+ # in this field would be lost if the user selects a facet value.
16
+ # @param [MARC::Record] record
17
+ # @return [Array]
18
+ def facet(record)
19
+ acc = record.filter_map do |field|
20
+ next AT_THE_LIBRARY if field.tag == EnrichedMarc::TAG_HOLDING
21
+ next ONLINE if field.tag == EnrichedMarc::TAG_ELECTRONIC_INVENTORY
22
+ end
23
+
24
+ return acc if acc.size == 2 # return early if all values are already present
25
+
26
+ acc << ONLINE if acc.exclude?(ONLINE) && finding_aid_linkage?(record) # only check if ONLINE isn't already there
27
+ acc
28
+ end
29
+
30
+ private
31
+
32
+ # Check if a record contains an 856 entry for an online finding aid, meeting these criteria:
33
+ # 1. Indicator 1 is 4 (HTTP resource)
34
+ # 2. Indicator 2 is NOT 2 (indicating the linkage is to a "related" thing)
35
+ # 3. The URL specified in subfield u (URI) is a Penn Handle link
36
+ # See: https://www.loc.gov/marc/bibliographic/bd856.html
37
+ # @param [MARC::Record] record
38
+ # @return [Boolean]
39
+ def finding_aid_linkage?(record)
40
+ record.fields('856').filter_map do |field|
41
+ next if field.indicator2 == '2' || field.indicator1 != '4'
42
+
43
+ subz = subfield_values(field, 'z')
44
+ subfield_values(field, 'u').filter_map do |value|
45
+ return true if subz.include?('Finding aid') && value.include?('hdl.library.upenn.edu')
46
+ end
47
+ end
48
+ false
49
+ end
50
+ end
51
+ end
52
+ end
@@ -25,21 +25,17 @@ module PennMARC
25
25
  # @return [DateTime, nil] The date added, or nil if date found in record is invalid
26
26
  def added(record)
27
27
  record.fields(EnrichedMarc::TAG_ITEM).flat_map { |field|
28
- field.filter_map do |subfield|
29
- # skip unless field has date created subfield
30
- next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
31
-
28
+ subfield_values(field, EnrichedMarc::SUB_ITEM_DATE_CREATED).filter_map do |date_added|
32
29
  # On 2022-05-02, this field value (as exported in enriched publishing
33
30
  # job from Alma) began truncating time to day-level granularity. We have
34
31
  # no guarantee that this won't switch back in the future, so for the
35
32
  # foreseeable future we should support both formats.
36
33
 
37
- format = subfield.value.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
38
-
39
- DateTime.strptime(subfield.value, format)
34
+ format = date_added.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
40
35
 
36
+ DateTime.strptime(date_added, format)
41
37
  rescue StandardError => e
42
- puts "Error parsing date in date added subfield: #{subfield.value} - #{e}"
38
+ puts "Error parsing date in date added subfield: #{date_added} - #{e}"
43
39
  nil
44
40
  end
45
41
  }.max
@@ -4,10 +4,10 @@ module PennMARC
4
4
  # Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
5
5
  # the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
6
6
  # is then mapped to a display-friendly value using the a provided mapping hash.
7
- # @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
8
7
  class Language < Helper
9
8
  # Used when no value is present in the control field - still mapped
10
9
  UNDETERMINED_CODE = :und
10
+ LANGUAGE_SUBFIELDS = %w[a b d e g h i j k m n p q r t].freeze
11
11
 
12
12
  class << self
13
13
  # Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
@@ -21,16 +21,31 @@ module PennMARC
21
21
  values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
22
22
  end
23
23
 
24
- # Get language values for searching and faceting of a record. The value is extracted from a defined position in
25
- # the 008 control field. Language facet and search values will typically be the same.
24
+ # Get language values for searching and faceting of a record. The values are extracted from subfields
25
+ # in the 041 field. Language facet and search values will typically be the same, with the exception of `zxx`,
26
+ # when no linguistic content is found.
27
+ #
28
+ # @note In franklin, we extracted the language code from the 008 control field. After engaging cataloging unit
29
+ # representatives, we decided to extract these values from the 041 field: Includes records for multilingual
30
+ # items, items that involve translation, and items where the medium of communication is a sign language.
31
+ # https://www.loc.gov/marc/bibliographic/bd041.html
26
32
  #
27
33
  # @param [MARC::Record] record
28
- # @param [Hash] language_map hash for language code translation
29
- # @return [String] nice value for language
30
- def search(record, language_map: Mappers.language)
34
+ # @param [Hash] iso_639_2_mapping iso-639-2 spec hash for language code translation
35
+ # @param [Hash] iso_639_3_mapping iso-639-3 spec hash for language code translation
36
+ # @return [Array] array of language values
37
+ def values(record, iso_639_2_mapping: Mappers.iso_639_2_language, iso_639_3_mapping: Mappers.iso_639_3_language)
38
+ values = record.fields('041').filter_map { |field|
39
+ mapper = subfield_value?(field, '2', /iso639-3/) ? iso_639_3_mapping : iso_639_2_mapping
40
+ field.filter_map do |sf|
41
+ next unless LANGUAGE_SUBFIELDS.include? sf.code
42
+
43
+ mapper[sf.value&.to_sym]
44
+ end
45
+ }.flatten
31
46
  control_field = record['008']&.value
32
- language_code = control_field[35..37]
33
- language_map[language_code.to_sym || UNDETERMINED_CODE]
47
+ values << iso_639_2_mapping[control_field[35..37]&.to_sym] if control_field.present?
48
+ values.empty? ? values << iso_639_2_mapping[UNDETERMINED_CODE] : values.uniq
34
49
  end
35
50
  end
36
51
  end
@@ -5,8 +5,12 @@ module PennMARC
5
5
  class Mappers
6
6
  class << self
7
7
  # @return [Hash]
8
- def language
9
- @language ||= load_map('language.yml')
8
+ def iso_639_2_language
9
+ @iso_639_2_language ||= load_map('iso639-2-languages.yml')
10
+ end
11
+
12
+ def iso_639_3_language
13
+ @iso_639_3_language ||= load_map('iso639-3-languages.yml')
10
14
  end
11
15
 
12
16
  # @return [Hash]