pennmarc 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33527bf43532170690b9b591097fb03f854130b3383debfafe931417d0394f52
4
- data.tar.gz: a18a5be08df6d7b74d2aaa61275cdecdedec6caa4b5e8b2b81c99e91411ce093
3
+ metadata.gz: 50fd0383f0e78807f62f8abe784f75d8dace6e5b0ecf64877f79ad90b6d40354
4
+ data.tar.gz: 3df19d10534fc787c55814e30bd1b066a8c5ea10e7da15f9b320c4500891e0be
5
5
  SHA512:
6
- metadata.gz: 627a4da06351037f520bc02b0a9fd61ce6cfdec35c563f2b29e3c1c01b4ad76766f155630cafe680b010783ea4f4c285df2d499fd784b7ee4f3cb4948cd421dd
7
- data.tar.gz: c966233bb00009a14babc5bc92f0399e75640e886f7c32d176fea8983813aca8936d0e8fab98576a797afaf0b60c5f24268c2ae4e2d8cca31900e590db0a9bc6
6
+ metadata.gz: 277bb7c15e224c8134b8cea8b28de474d27022a1537904844af8c5542ff3f0080c37f13db38a5a9463a07453bcc403ccf346c2b5538c2fe26012df6a67dec24f
7
+ data.tar.gz: 1ca8e643758f86aeabdfbeb1576f513c866ba5ef1a40b6edaa279f687af7644baf6b953db6f078548fc72ca99b75b6377db7d80d98a19059c797b6ad41b5f648
data/.gitleaks.toml ADDED
@@ -0,0 +1,2 @@
1
+ [file]
2
+ paths-exclude = ["lib/pennmarc/mappings/language.yml"]
data/README.md CHANGED
@@ -73,7 +73,7 @@ rspec
73
73
 
74
74
  1. Update the version in `pennmarc.gemspec`
75
75
  2. Run `gem build pennmarc.gemspec` with the latest code
76
- 3. Run `gem push pennmarc-{version number here}`(e.g. `gem push pennmarc-1.0.0`) to push to RubyGems. You will need access and MFA setup with RubyGems.
76
+ 3. Run `gem push pennmarc-{version number here}.gem`(e.g. `gem push pennmarc-1.0.0.gem`) to push to RubyGems. You will need access and MFA setup with RubyGems.
77
77
 
78
78
  ## QA
79
79
 
@@ -89,4 +89,4 @@ MARC_FILE=path/to/marc.xml bundle exec rake pennmarc:parse
89
89
  - rake task or some similar command to return a full set of values extracted from a specified marcxml file
90
90
  - Pipeline to run tests and publish to Rubygems
91
91
  - rubocop check
92
- - rdoc/yard coverage checks?
92
+ - rdoc/yard coverage checks?
@@ -25,21 +25,17 @@ module PennMARC
25
25
  # @return [DateTime, nil] The date added, or nil if date found in record is invalid
26
26
  def added(record)
27
27
  record.fields(EnrichedMarc::TAG_ITEM).flat_map { |field|
28
- field.filter_map do |subfield|
29
- # skip unless field has date created subfield
30
- next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
31
-
28
+ subfield_values(field, EnrichedMarc::SUB_ITEM_DATE_CREATED).filter_map do |date_added|
32
29
  # On 2022-05-02, this field value (as exported in enriched publishing
33
30
  # job from Alma) began truncating time to day-level granularity. We have
34
31
  # no guarantee that this won't switch back in the future, so for the
35
32
  # foreseeable future we should support both formats.
36
33
 
37
- format = subfield.value.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
38
-
39
- DateTime.strptime(subfield.value, format)
34
+ format = date_added.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
40
35
 
36
+ DateTime.strptime(date_added, format)
41
37
  rescue StandardError => e
42
- puts "Error parsing date in date added subfield: #{subfield.value} - #{e}"
38
+ puts "Error parsing date in date added subfield: #{date_added} - #{e}"
43
39
  nil
44
40
  end
45
41
  }.max
@@ -41,10 +41,10 @@ module PennMARC
41
41
  results += record.fields('880').map do |f|
42
42
  subfield_to_ignore = if subfield_value?(f, 6, /^300/)
43
43
  %w[3 6 8]
44
- elsif subfield_value?(f, 6, /^(254|255|310|342|352|362)/)
45
- %w[6 8]
46
44
  elsif subfield_value?(f, 6, /^340/)
47
45
  %w[0 2 6 8]
46
+ else
47
+ %w[6 8]
48
48
  end
49
49
  join_subfields(f, &subfield_not_in?(subfield_to_ignore))
50
50
  end
@@ -23,7 +23,7 @@ module PennMARC
23
23
  if field.tag == '020'
24
24
  field.filter_map { |subfield| normalize_isbn(subfield.value) if subfield_in?(%w[a z]).call(subfield) }
25
25
  else
26
- field.filter_map { |subfield| subfield.value if subfield_in?(%w[a l z]).call(subfield) }
26
+ field.filter_map { |subfield| subfield.value if subfield_in?(%w[a l m y z]).call(subfield) }
27
27
  end
28
28
  }.flatten.uniq
29
29
  end
@@ -56,21 +56,42 @@ module PennMARC
56
56
 
57
57
  # Get numeric OCLC ID of first {https://www.oclc.org/bibformats/en/0xx/035.html 035 field}
58
58
  # with an OCLC ID defined in subfield 'a'.
59
- #
60
- # @todo We should evaluate this to return a single value in the future since subfield a is non-repeatable
61
59
  # @param [MARC::Record] record
62
- # @return [Array<String>]
63
- def oclc_id(record)
64
- oclc_id = Array.wrap(record.fields('035')
65
- .find { |field| field.any? { |subfield| subfield_a_is_oclc?(subfield) } })
66
-
67
- oclc_id.flat_map do |field|
60
+ # @return [String, nil]
61
+ def oclc_id_show(record)
62
+ ids = Array.wrap(record.fields('035')
63
+ .find { |field| field.any? { |subfield| subfield_a_is_oclc?(subfield) } })
64
+ ids.flat_map { |field|
68
65
  field.filter_map do |subfield|
69
66
  # skip unless subfield 'a' is an oclc id value
70
67
  next unless subfield_a_is_oclc?(subfield)
71
68
 
72
69
  # search for numeric part of oclc id (e.g. '610094484' in '(OCoLC)ocn610094484')
73
- match = /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(subfield.value)
70
+ match = match_oclc_number(subfield)
71
+
72
+ # skip unless search to find numeric part of oclc id has a match
73
+ next unless match
74
+
75
+ match[1]
76
+ end
77
+ }.first
78
+ end
79
+
80
+ # Retrieve valid and invalid numeric OCLC IDs from {https://www.oclc.org/bibformats/en/0xx/035.html 035 field}
81
+ # for search.
82
+ # @param [MARC::Record] record
83
+ # @return [Array<String>]
84
+ def oclc_id_search(record)
85
+ record.fields('035').flat_map do |field|
86
+ field.filter_map do |subfield|
87
+ # skip unless subfield 'a' or 'z'
88
+ next unless subfield.code.in?(%w[a z])
89
+
90
+ # skip unless subfield value matches OCLC ID
91
+ next unless subfield_is_oclc?(subfield)
92
+
93
+ # search for numeric part of oclc id
94
+ match = match_oclc_number(subfield)
74
95
 
75
96
  # skip unless search to find numeric part of oclc id has a match
76
97
  next unless match
@@ -143,7 +164,19 @@ module PennMARC
143
164
  # @param [MARC::Subfield]
144
165
  # @return [TrueClass, FalseClass]
145
166
  def subfield_a_is_oclc?(subfield)
146
- subfield.code == 'a' && (subfield.value =~ /^\(OCoLC\).*/).present?
167
+ subfield.code == 'a' && subfield_is_oclc?(subfield)
168
+ end
169
+
170
+ # @param [MARC::Subfield]
171
+ # @return [TrueClass, FalseClass]
172
+ def subfield_is_oclc?(subfield)
173
+ (subfield.value =~ /^\(OCoLC\).*/).present?
174
+ end
175
+
176
+ # @param [MARC::Subfield]
177
+ # @return [MatchData, nil]
178
+ def match_oclc_number(subfield)
179
+ /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(subfield.value)
147
180
  end
148
181
 
149
182
  # Normalize isbn value using {https://github.com/billdueber/library_stdnums library_stdnums gem}.
@@ -4,10 +4,10 @@ module PennMARC
4
4
  # Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
5
5
  # the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
6
6
  # is then mapped to a display-friendly value using the a provided mapping hash.
7
- # @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
8
7
  class Language < Helper
9
8
  # Used when no value is present in the control field - still mapped
10
9
  UNDETERMINED_CODE = :und
10
+ LANGUAGE_SUBFIELDS = %w[a b d e g h i j k m n p q r t].freeze
11
11
 
12
12
  class << self
13
13
  # Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
@@ -21,16 +21,31 @@ module PennMARC
21
21
  values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
22
22
  end
23
23
 
24
- # Get language values for searching and faceting of a record. The value is extracted from a defined position in
25
- # the 008 control field. Language facet and search values will typically be the same.
24
+ # Get language values for searching and faceting of a record. The values are extracted from subfields
25
+ # in the 041 field. Language facet and search values will typically be the same, with the exception of `zxx`,
26
+ # when no linguistic content is found.
27
+ #
28
+ # @note In franklin, we extracted the language code from the 008 control field. After engaging cataloging unit
29
+ # representatives, we decided to extract these values from the 041 field: Includes records for multilingual
30
+ # items, items that involve translation, and items where the medium of communication is a sign language.
31
+ # https://www.loc.gov/marc/bibliographic/bd041.html
26
32
  #
27
33
  # @param [MARC::Record] record
28
- # @param [Hash] language_map hash for language code translation
29
- # @return [String] nice value for language
30
- def search(record, language_map: Mappers.language)
34
+ # @param [Hash] iso_639_2_mapping iso-639-2 spec hash for language code translation
35
+ # @param [Hash] iso_639_3_mapping iso-639-3 spec hash for language code translation
36
+ # @return [Array] array of language values
37
+ def values(record, iso_639_2_mapping: Mappers.iso_639_2_language, iso_639_3_mapping: Mappers.iso_639_3_language)
38
+ values = record.fields('041').filter_map { |field|
39
+ mapper = subfield_value?(field, '2', /iso639-3/) ? iso_639_3_mapping : iso_639_2_mapping
40
+ field.filter_map do |sf|
41
+ next unless LANGUAGE_SUBFIELDS.include? sf.code
42
+
43
+ mapper[sf.value&.to_sym]
44
+ end
45
+ }.flatten
31
46
  control_field = record['008']&.value
32
- language_code = control_field[35..37]
33
- language_map[language_code.to_sym || UNDETERMINED_CODE]
47
+ values << iso_639_2_mapping[control_field[35..37]&.to_sym] if control_field.present?
48
+ values.empty? ? values << iso_639_2_mapping[UNDETERMINED_CODE] : values.uniq
34
49
  end
35
50
  end
36
51
  end
@@ -5,8 +5,12 @@ module PennMARC
5
5
  class Mappers
6
6
  class << self
7
7
  # @return [Hash]
8
- def language
9
- @language ||= load_map('language.yml')
8
+ def iso_639_2_language
9
+ @iso_639_2_language ||= load_map('iso639-2-languages.yml')
10
+ end
11
+
12
+ def iso_639_3_language
13
+ @iso_639_3_language ||= load_map('iso639-3-languages.yml')
10
14
  end
11
15
 
12
16
  # @return [Hash]