pennmarc 1.0.3 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 28dc70104803899434767b340182cb8eecff8c402cfb5e8547058d560a45a1e5
4
- data.tar.gz: 1fc0897f12ffb24de8bc0f40b69474d29264c036e33ba4ea97390b60a9f60c43
3
+ metadata.gz: 9aa7e5cf7ade86fc51db4d35791eed5e3e38784e35d33bd9b82379a91ab3786e
4
+ data.tar.gz: 8b2668fbfd1fc645b203e23f12cfe5681014a870fa504f45c7c0b034682be325
5
5
  SHA512:
6
- metadata.gz: c82d8a22f480a768a8f6cce3cb7bc2ed25f7d5b202e3b492ac572a6f5bfec0d801098e1d74db8eb470573a2afbdd9314a90e18955a576c9ff5dee42aa2fbc5cd
7
- data.tar.gz: d3c69ce76e13c50b0f2da649ea9499dcb93a734944d298cb783e71e0c4bc59c76fad0ea344353124f024524d11b5f8c92318771d780848506d50940f483c427b
6
+ metadata.gz: aff6902488cb0d85bee32f3a3c5f4b613c8c5867ab3b594af40dad4c54e501b735b79fbed1ba8656c1144b87b58a9edc6e0ed961cc62603c7ccefb0c244b3117
7
+ data.tar.gz: e262a56ed0512de8c93c4ef0f94f87c322904d44ed845a64b9eb8dc58c8c3cc9c62675d52e314c5cd15eba0d7f4daea4725675af0c3e96ac7cb501a55074edf7
data/.gitleaks.toml ADDED
@@ -0,0 +1,2 @@
1
+ [file]
2
+ paths-exclude = ["lib/pennmarc/mappings/language.yml"]
data/.rubocop_todo.yml CHANGED
@@ -1,36 +1,12 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config --auto-gen-only-exclude --exclude-limit 10000`
3
- # on 2023-08-25 13:55:25 UTC using RuboCop version 1.51.0.
3
+ # on 2023-11-08 20:19:45 UTC using RuboCop version 1.51.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 2
10
- # This cop supports safe autocorrection (--autocorrect).
11
- # Configuration parameters: EnforcedStyle, IndentationWidth.
12
- # SupportedStyles: with_first_argument, with_fixed_indentation
13
- Layout/ArgumentAlignment:
14
- Exclude:
15
- - 'spec/lib/pennmarc/helpers/series_spec.rb'
16
-
17
- # Offense count: 1
18
- # This cop supports safe autocorrection (--autocorrect).
19
- # Configuration parameters: EnforcedStyle.
20
- # SupportedStyles: empty_lines, no_empty_lines
21
- Layout/EmptyLinesAroundBlockBody:
22
- Exclude:
23
- - 'spec/lib/pennmarc/helpers/identifer_spec.rb'
24
-
25
- # Offense count: 1
26
- # This cop supports safe autocorrection (--autocorrect).
27
- # Configuration parameters: EnforcedStyle.
28
- # SupportedStyles: final_newline, final_blank_line
29
- Layout/TrailingEmptyLines:
30
- Exclude:
31
- - 'spec/lib/pennmarc/helpers/identifer_spec.rb'
32
-
33
- # Offense count: 22
9
+ # Offense count: 23
34
10
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
35
11
  Metrics/AbcSize:
36
12
  Exclude:
@@ -38,6 +14,7 @@ Metrics/AbcSize:
38
14
  - 'lib/pennmarc/helpers/edition.rb'
39
15
  - 'lib/pennmarc/helpers/format.rb'
40
16
  - 'lib/pennmarc/helpers/genre.rb'
17
+ - 'lib/pennmarc/helpers/language.rb'
41
18
  - 'lib/pennmarc/helpers/location.rb'
42
19
  - 'lib/pennmarc/helpers/note.rb'
43
20
  - 'lib/pennmarc/helpers/production.rb'
@@ -66,6 +43,7 @@ Metrics/CyclomaticComplexity:
66
43
  - 'lib/pennmarc/helpers/edition.rb'
67
44
  - 'lib/pennmarc/helpers/format.rb'
68
45
  - 'lib/pennmarc/helpers/genre.rb'
46
+ - 'lib/pennmarc/helpers/language.rb'
69
47
  - 'lib/pennmarc/helpers/note.rb'
70
48
  - 'lib/pennmarc/helpers/production.rb'
71
49
  - 'lib/pennmarc/helpers/relation.rb'
@@ -104,6 +82,7 @@ Metrics/PerceivedComplexity:
104
82
  - 'lib/pennmarc/helpers/edition.rb'
105
83
  - 'lib/pennmarc/helpers/format.rb'
106
84
  - 'lib/pennmarc/helpers/genre.rb'
85
+ - 'lib/pennmarc/helpers/language.rb'
107
86
  - 'lib/pennmarc/helpers/note.rb'
108
87
  - 'lib/pennmarc/helpers/production.rb'
109
88
  - 'lib/pennmarc/helpers/series.rb'
@@ -152,16 +131,9 @@ RSpec/FilePath:
152
131
  Exclude:
153
132
  - 'spec/lib/pennmarc/parser_spec.rb'
154
133
 
155
- # Offense count: 4
134
+ # Offense count: 6
156
135
  # Configuration parameters: Max, AllowedGroups.
157
136
  RSpec/NestedGroups:
158
137
  Exclude:
138
+ - 'spec/lib/pennmarc/helpers/access_spec.rb'
159
139
  - 'spec/lib/pennmarc/helpers/format_spec.rb'
160
-
161
- # Offense count: 2
162
- # This cop supports safe autocorrection (--autocorrect).
163
- # Configuration parameters: Max, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns.
164
- # URISchemes: http, https
165
- Layout/LineLength:
166
- Exclude:
167
- - 'spec/lib/pennmarc/helpers/creator_spec.rb'
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Methods for extracting how a record can be accessed
5
+ class Access < Helper
6
+ ONLINE = 'Online'
7
+ AT_THE_LIBRARY = 'At the library'
8
+
9
+ class << self
10
+ # Based primarily on the "enhanced MARC" fields added by Alma, determine if the record has
11
+ # electronic access or has physical holding, and is therefore "Online" or "At the library". If a record is "At the
12
+ # library", but has a link to a finding aid in the 856 field (matching certain criteria), also add 'Online' as an
13
+ # access method.
14
+ # @todo What if none of these criteria match? Should we include "At the library" by default? Records with no value
15
+ # in this field would be lost if the user selects a facet value.
16
+ # @param [MARC::Record] record
17
+ # @return [Array]
18
+ def facet(record)
19
+ acc = record.filter_map do |field|
20
+ next AT_THE_LIBRARY if field.tag == EnrichedMarc::TAG_HOLDING
21
+ next ONLINE if field.tag == EnrichedMarc::TAG_ELECTRONIC_INVENTORY
22
+ end
23
+
24
+ return acc if acc.size == 2 # return early if all values are already present
25
+
26
+ acc << ONLINE if acc.exclude?(ONLINE) && finding_aid_linkage?(record) # only check if ONLINE isn't already there
27
+ acc
28
+ end
29
+
30
+ private
31
+
32
+ # Check if a record contains an 856 entry for an online finding aid, meeting these criteria:
33
+ # 1. Indicator 1 is 4 (HTTP resource)
34
+ # 2. Indicator 2 is NOT 2 (indicating the linkage is to a "related" thing)
35
+ # 3. The URL specified in subfield u (URI) is a Penn Handle link
36
+ # See: https://www.loc.gov/marc/bibliographic/bd856.html
37
+ # @param [MARC::Record] record
38
+ # @return [Boolean]
39
+ def finding_aid_linkage?(record)
40
+ record.fields('856').filter_map do |field|
41
+ next if field.indicator2 == '2' || field.indicator1 != '4'
42
+
43
+ subz = subfield_values(field, 'z')
44
+ subfield_values(field, 'u').filter_map do |value|
45
+ return true if subz.include?('Finding aid') && value.include?('hdl.library.upenn.edu')
46
+ end
47
+ end
48
+ false
49
+ end
50
+ end
51
+ end
52
+ end
@@ -25,21 +25,17 @@ module PennMARC
25
25
  # @return [DateTime, nil] The date added, or nil if date found in record is invalid
26
26
  def added(record)
27
27
  record.fields(EnrichedMarc::TAG_ITEM).flat_map { |field|
28
- field.filter_map do |subfield|
29
- # skip unless field has date created subfield
30
- next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
31
-
28
+ subfield_values(field, EnrichedMarc::SUB_ITEM_DATE_CREATED).filter_map do |date_added|
32
29
  # On 2022-05-02, this field value (as exported in enriched publishing
33
30
  # job from Alma) began truncating time to day-level granularity. We have
34
31
  # no guarantee that this won't switch back in the future, so for the
35
32
  # foreseeable future we should support both formats.
36
33
 
37
- format = subfield.value.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
38
-
39
- DateTime.strptime(subfield.value, format)
34
+ format = date_added.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
40
35
 
36
+ DateTime.strptime(date_added, format)
41
37
  rescue StandardError => e
42
- puts "Error parsing date in date added subfield: #{subfield.value} - #{e}"
38
+ puts "Error parsing date in date added subfield: #{date_added} - #{e}"
43
39
  nil
44
40
  end
45
41
  }.max
@@ -4,10 +4,10 @@ module PennMARC
4
4
  # Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
5
5
  # the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
6
6
  # is then mapped to a display-friendly value using the a provided mapping hash.
7
- # @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
8
7
  class Language < Helper
9
8
  # Used when no value is present in the control field - still mapped
10
9
  UNDETERMINED_CODE = :und
10
+ LANGUAGE_SUBFIELDS = %w[a b d e g h i j k m n p q r t].freeze
11
11
 
12
12
  class << self
13
13
  # Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
@@ -21,16 +21,31 @@ module PennMARC
21
21
  values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
22
22
  end
23
23
 
24
- # Get language values for searching and faceting of a record. The value is extracted from a defined position in
25
- # the 008 control field. Language facet and search values will typically be the same.
24
+ # Get language values for searching and faceting of a record. The values are extracted from subfields
25
+ # in the 041 field. Language facet and search values will typically be the same, with the exception of `zxx`,
26
+ # when no linguistic content is found.
27
+ #
28
+ # @note In franklin, we extracted the language code from the 008 control field. After engaging cataloging unit
29
+ # representatives, we decided to extract these values from the 041 field: Includes records for multilingual
30
+ # items, items that involve translation, and items where the medium of communication is a sign language.
31
+ # https://www.loc.gov/marc/bibliographic/bd041.html
26
32
  #
27
33
  # @param [MARC::Record] record
28
- # @param [Hash] language_map hash for language code translation
29
- # @return [String] nice value for language
30
- def search(record, language_map: Mappers.language)
34
+ # @param [Hash] iso_639_2_mapping iso-639-2 spec hash for language code translation
35
+ # @param [Hash] iso_639_3_mapping iso-639-3 spec hash for language code translation
36
+ # @return [Array] array of language values
37
+ def values(record, iso_639_2_mapping: Mappers.iso_639_2_language, iso_639_3_mapping: Mappers.iso_639_3_language)
38
+ values = record.fields('041').filter_map { |field|
39
+ mapper = subfield_value?(field, '2', /iso639-3/) ? iso_639_3_mapping : iso_639_2_mapping
40
+ field.filter_map do |sf|
41
+ next unless LANGUAGE_SUBFIELDS.include? sf.code
42
+
43
+ mapper[sf.value&.to_sym]
44
+ end
45
+ }.flatten
31
46
  control_field = record['008']&.value
32
- language_code = control_field[35..37]
33
- language_map[language_code.to_sym || UNDETERMINED_CODE]
47
+ values << iso_639_2_mapping[control_field[35..37]&.to_sym] if control_field.present?
48
+ values.empty? ? values << iso_639_2_mapping[UNDETERMINED_CODE] : values.uniq
34
49
  end
35
50
  end
36
51
  end
@@ -5,8 +5,12 @@ module PennMARC
5
5
  class Mappers
6
6
  class << self
7
7
  # @return [Hash]
8
- def language
9
- @language ||= load_map('language.yml')
8
+ def iso_639_2_language
9
+ @iso_639_2_language ||= load_map('iso639-2-languages.yml')
10
+ end
11
+
12
+ def iso_639_3_language
13
+ @iso_639_3_language ||= load_map('iso639-3-languages.yml')
10
14
  end
11
15
 
12
16
  # @return [Hash]