pennmarc 1.0.2 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33527bf43532170690b9b591097fb03f854130b3383debfafe931417d0394f52
4
- data.tar.gz: a18a5be08df6d7b74d2aaa61275cdecdedec6caa4b5e8b2b81c99e91411ce093
3
+ metadata.gz: 50fd0383f0e78807f62f8abe784f75d8dace6e5b0ecf64877f79ad90b6d40354
4
+ data.tar.gz: 3df19d10534fc787c55814e30bd1b066a8c5ea10e7da15f9b320c4500891e0be
5
5
  SHA512:
6
- metadata.gz: 627a4da06351037f520bc02b0a9fd61ce6cfdec35c563f2b29e3c1c01b4ad76766f155630cafe680b010783ea4f4c285df2d499fd784b7ee4f3cb4948cd421dd
7
- data.tar.gz: c966233bb00009a14babc5bc92f0399e75640e886f7c32d176fea8983813aca8936d0e8fab98576a797afaf0b60c5f24268c2ae4e2d8cca31900e590db0a9bc6
6
+ metadata.gz: 277bb7c15e224c8134b8cea8b28de474d27022a1537904844af8c5542ff3f0080c37f13db38a5a9463a07453bcc403ccf346c2b5538c2fe26012df6a67dec24f
7
+ data.tar.gz: 1ca8e643758f86aeabdfbeb1576f513c866ba5ef1a40b6edaa279f687af7644baf6b953db6f078548fc72ca99b75b6377db7d80d98a19059c797b6ad41b5f648
data/.gitleaks.toml ADDED
@@ -0,0 +1,2 @@
1
+ [file]
2
+ paths-exclude = ["lib/pennmarc/mappings/language.yml"]
data/README.md CHANGED
@@ -73,7 +73,7 @@ rspec
73
73
 
74
74
  1. Update the version in `pennmarc.gemspec`
75
75
  2. Run `gem build pennmarc.gemspec` with the latest code
76
- 3. Run `gem push pennmarc-{version number here}`(e.g. `gem push pennmarc-1.0.0`) to push to RubyGems. You will need access and MFA setup with RubyGems.
76
+ 3. Run `gem push pennmarc-{version number here}.gem`(e.g. `gem push pennmarc-1.0.0.gem`) to push to RubyGems. You will need access and MFA setup with RubyGems.
77
77
 
78
78
  ## QA
79
79
 
@@ -89,4 +89,4 @@ MARC_FILE=path/to/marc.xml bundle exec rake pennmarc:parse
89
89
  - rake task or some similar command to return a full set of values extracted from a specified marcxml file
90
90
  - Pipeline to run tests and publish to Rubygems
91
91
  - rubocop check
92
- - rdoc/yard coverage checks?
92
+ - rdoc/yard coverage checks?
@@ -25,21 +25,17 @@ module PennMARC
25
25
  # @return [DateTime, nil] The date added, or nil if date found in record is invalid
26
26
  def added(record)
27
27
  record.fields(EnrichedMarc::TAG_ITEM).flat_map { |field|
28
- field.filter_map do |subfield|
29
- # skip unless field has date created subfield
30
- next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
31
-
28
+ subfield_values(field, EnrichedMarc::SUB_ITEM_DATE_CREATED).filter_map do |date_added|
32
29
  # On 2022-05-02, this field value (as exported in enriched publishing
33
30
  # job from Alma) began truncating time to day-level granularity. We have
34
31
  # no guarantee that this won't switch back in the future, so for the
35
32
  # foreseeable future we should support both formats.
36
33
 
37
- format = subfield.value.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
38
-
39
- DateTime.strptime(subfield.value, format)
34
+ format = date_added.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
40
35
 
36
+ DateTime.strptime(date_added, format)
41
37
  rescue StandardError => e
42
- puts "Error parsing date in date added subfield: #{subfield.value} - #{e}"
38
+ puts "Error parsing date in date added subfield: #{date_added} - #{e}"
43
39
  nil
44
40
  end
45
41
  }.max
@@ -41,10 +41,10 @@ module PennMARC
41
41
  results += record.fields('880').map do |f|
42
42
  subfield_to_ignore = if subfield_value?(f, 6, /^300/)
43
43
  %w[3 6 8]
44
- elsif subfield_value?(f, 6, /^(254|255|310|342|352|362)/)
45
- %w[6 8]
46
44
  elsif subfield_value?(f, 6, /^340/)
47
45
  %w[0 2 6 8]
46
+ else
47
+ %w[6 8]
48
48
  end
49
49
  join_subfields(f, &subfield_not_in?(subfield_to_ignore))
50
50
  end
@@ -23,7 +23,7 @@ module PennMARC
23
23
  if field.tag == '020'
24
24
  field.filter_map { |subfield| normalize_isbn(subfield.value) if subfield_in?(%w[a z]).call(subfield) }
25
25
  else
26
- field.filter_map { |subfield| subfield.value if subfield_in?(%w[a l z]).call(subfield) }
26
+ field.filter_map { |subfield| subfield.value if subfield_in?(%w[a l m y z]).call(subfield) }
27
27
  end
28
28
  }.flatten.uniq
29
29
  end
@@ -56,21 +56,42 @@ module PennMARC
56
56
 
57
57
  # Get numeric OCLC ID of first {https://www.oclc.org/bibformats/en/0xx/035.html 035 field}
58
58
  # with an OCLC ID defined in subfield 'a'.
59
- #
60
- # @todo We should evaluate this to return a single value in the future since subfield a is non-repeatable
61
59
  # @param [MARC::Record] record
62
- # @return [Array<String>]
63
- def oclc_id(record)
64
- oclc_id = Array.wrap(record.fields('035')
65
- .find { |field| field.any? { |subfield| subfield_a_is_oclc?(subfield) } })
66
-
67
- oclc_id.flat_map do |field|
60
+ # @return [String, nil]
61
+ def oclc_id_show(record)
62
+ ids = Array.wrap(record.fields('035')
63
+ .find { |field| field.any? { |subfield| subfield_a_is_oclc?(subfield) } })
64
+ ids.flat_map { |field|
68
65
  field.filter_map do |subfield|
69
66
  # skip unless subfield 'a' is an oclc id value
70
67
  next unless subfield_a_is_oclc?(subfield)
71
68
 
72
69
  # search for numeric part of oclc id (e.g. '610094484' in '(OCoLC)ocn610094484')
73
- match = /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(subfield.value)
70
+ match = match_oclc_number(subfield)
71
+
72
+ # skip unless search to find numeric part of oclc id has a match
73
+ next unless match
74
+
75
+ match[1]
76
+ end
77
+ }.first
78
+ end
79
+
80
+ # Retrieve valid and invalid numeric OCLC IDs from {https://www.oclc.org/bibformats/en/0xx/035.html 035 field}
81
+ # for search.
82
+ # @param [MARC::Record] record
83
+ # @return [Array<String>]
84
+ def oclc_id_search(record)
85
+ record.fields('035').flat_map do |field|
86
+ field.filter_map do |subfield|
87
+ # skip unless subfield 'a' or 'z'
88
+ next unless subfield.code.in?(%w[a z])
89
+
90
+ # skip unless subfield value matches OCLC ID
91
+ next unless subfield_is_oclc?(subfield)
92
+
93
+ # search for numeric part of oclc id
94
+ match = match_oclc_number(subfield)
74
95
 
75
96
  # skip unless search to find numeric part of oclc id has a match
76
97
  next unless match
@@ -143,7 +164,19 @@ module PennMARC
143
164
  # @param [MARC::Subfield]
144
165
  # @return [TrueClass, FalseClass]
145
166
  def subfield_a_is_oclc?(subfield)
146
- subfield.code == 'a' && (subfield.value =~ /^\(OCoLC\).*/).present?
167
+ subfield.code == 'a' && subfield_is_oclc?(subfield)
168
+ end
169
+
170
+ # @param [MARC::Subfield]
171
+ # @return [TrueClass, FalseClass]
172
+ def subfield_is_oclc?(subfield)
173
+ (subfield.value =~ /^\(OCoLC\).*/).present?
174
+ end
175
+
176
+ # @param [MARC::Subfield]
177
+ # @return [MatchData, nil]
178
+ def match_oclc_number(subfield)
179
+ /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(subfield.value)
147
180
  end
148
181
 
149
182
  # Normalize isbn value using {https://github.com/billdueber/library_stdnums library_stdnums gem}.
@@ -4,10 +4,10 @@ module PennMARC
4
4
  # Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
5
5
  # the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
6
6
  # is then mapped to a display-friendly value using the a provided mapping hash.
7
- # @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
8
7
  class Language < Helper
9
8
  # Used when no value is present in the control field - still mapped
10
9
  UNDETERMINED_CODE = :und
10
+ LANGUAGE_SUBFIELDS = %w[a b d e g h i j k m n p q r t].freeze
11
11
 
12
12
  class << self
13
13
  # Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
@@ -21,16 +21,31 @@ module PennMARC
21
21
  values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
22
22
  end
23
23
 
24
- # Get language values for searching and faceting of a record. The value is extracted from a defined position in
25
- # the 008 control field. Language facet and search values will typically be the same.
24
+ # Get language values for searching and faceting of a record. The values are extracted from subfields
25
+ # in the 041 field. Language facet and search values will typically be the same, with the exception of `zxx`,
26
+ # when no linguistic content is found.
27
+ #
28
+ # @note In franklin, we extracted the language code from the 008 control field. After engaging cataloging unit
29
+ # representatives, we decided to extract these values from the 041 field: Includes records for multilingual
30
+ # items, items that involve translation, and items where the medium of communication is a sign language.
31
+ # https://www.loc.gov/marc/bibliographic/bd041.html
26
32
  #
27
33
  # @param [MARC::Record] record
28
- # @param [Hash] language_map hash for language code translation
29
- # @return [String] nice value for language
30
- def search(record, language_map: Mappers.language)
34
+ # @param [Hash] iso_639_2_mapping iso-639-2 spec hash for language code translation
35
+ # @param [Hash] iso_639_3_mapping iso-639-3 spec hash for language code translation
36
+ # @return [Array] array of language values
37
+ def values(record, iso_639_2_mapping: Mappers.iso_639_2_language, iso_639_3_mapping: Mappers.iso_639_3_language)
38
+ values = record.fields('041').filter_map { |field|
39
+ mapper = subfield_value?(field, '2', /iso639-3/) ? iso_639_3_mapping : iso_639_2_mapping
40
+ field.filter_map do |sf|
41
+ next unless LANGUAGE_SUBFIELDS.include? sf.code
42
+
43
+ mapper[sf.value&.to_sym]
44
+ end
45
+ }.flatten
31
46
  control_field = record['008']&.value
32
- language_code = control_field[35..37]
33
- language_map[language_code.to_sym || UNDETERMINED_CODE]
47
+ values << iso_639_2_mapping[control_field[35..37]&.to_sym] if control_field.present?
48
+ values.empty? ? values << iso_639_2_mapping[UNDETERMINED_CODE] : values.uniq
34
49
  end
35
50
  end
36
51
  end
@@ -5,8 +5,12 @@ module PennMARC
5
5
  class Mappers
6
6
  class << self
7
7
  # @return [Hash]
8
- def language
9
- @language ||= load_map('language.yml')
8
+ def iso_639_2_language
9
+ @iso_639_2_language ||= load_map('iso639-2-languages.yml')
10
+ end
11
+
12
+ def iso_639_3_language
13
+ @iso_639_3_language ||= load_map('iso639-3-languages.yml')
10
14
  end
11
15
 
12
16
  # @return [Hash]