RubyGems - pennmarc - Versions diffs - 1.0.2 → 1.0.4 - Mend

pennmarc 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/.gitleaks.toml +2 -0
data/README.md +2 -2
data/lib/pennmarc/helpers/date.rb +4 -8
data/lib/pennmarc/helpers/format.rb +2 -2
data/lib/pennmarc/helpers/identifier.rb +44 -11
data/lib/pennmarc/helpers/language.rb +23 -8
data/lib/pennmarc/mappers.rb +6 -2
data/lib/pennmarc/mappings/iso639-3-languages.yml +7916 -0
data/lib/pennmarc/version.rb +1 -1
data/spec/lib/pennmarc/helpers/date_spec.rb +16 -0
data/spec/lib/pennmarc/helpers/identifer_spec.rb +21 -5
data/spec/lib/pennmarc/helpers/language_spec.rb +56 -8
data/spec/lib/pennmarc/parser_spec.rb +3 -3
metadata +8 -6
/data/lib/pennmarc/mappings/{language.yml → iso639-2-languages.yml} +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 33527bf43532170690b9b591097fb03f854130b3383debfafe931417d0394f52
-  data.tar.gz: a18a5be08df6d7b74d2aaa61275cdecdedec6caa4b5e8b2b81c99e91411ce093
+  metadata.gz: 50fd0383f0e78807f62f8abe784f75d8dace6e5b0ecf64877f79ad90b6d40354
+  data.tar.gz: 3df19d10534fc787c55814e30bd1b066a8c5ea10e7da15f9b320c4500891e0be
 SHA512:
-  metadata.gz: 627a4da06351037f520bc02b0a9fd61ce6cfdec35c563f2b29e3c1c01b4ad76766f155630cafe680b010783ea4f4c285df2d499fd784b7ee4f3cb4948cd421dd
-  data.tar.gz: c966233bb00009a14babc5bc92f0399e75640e886f7c32d176fea8983813aca8936d0e8fab98576a797afaf0b60c5f24268c2ae4e2d8cca31900e590db0a9bc6
+  metadata.gz: 277bb7c15e224c8134b8cea8b28de474d27022a1537904844af8c5542ff3f0080c37f13db38a5a9463a07453bcc403ccf346c2b5538c2fe26012df6a67dec24f
+  data.tar.gz: 1ca8e643758f86aeabdfbeb1576f513c866ba5ef1a40b6edaa279f687af7644baf6b953db6f078548fc72ca99b75b6377db7d80d98a19059c797b6ad41b5f648

data/.gitleaks.toml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [file]
2	+ paths-exclude = ["lib/pennmarc/mappings/language.yml"]

data/README.md CHANGED Viewed

@@ -73,7 +73,7 @@ rspec
 1. Update the version in `pennmarc.gemspec`
 2. Run `gem build pennmarc.gemspec` with the latest code
-3. Run `gem push pennmarc-{version number here}`(e.g. `gem push pennmarc-1.0.0`) to push to RubyGems. You will need access and MFA setup with RubyGems.
+3. Run `gem push pennmarc-{version number here}.gem`(e.g. `gem push pennmarc-1.0.0.gem`) to push to RubyGems. You will need access and MFA setup with RubyGems.
 ## QA
@@ -89,4 +89,4 @@ MARC_FILE=path/to/marc.xml bundle exec rake pennmarc:parse
  - rake task or some similar command to return a full set of values extracted from a specified marcxml file
  - Pipeline to run tests and publish to Rubygems
     - rubocop check
-    - rdoc/yard coverage checks?
+    - rdoc/yard coverage checks?

data/lib/pennmarc/helpers/date.rb CHANGED Viewed

@@ -25,21 +25,17 @@ module PennMARC
       # @return [DateTime, nil] The date added, or nil if date found in record is invalid
       def added(record)
         record.fields(EnrichedMarc::TAG_ITEM).flat_map { |field|
-          field.filter_map do |subfield|
-            # skip unless field has date created subfield
-            next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
+          subfield_values(field, EnrichedMarc::SUB_ITEM_DATE_CREATED).filter_map do |date_added|
             # On 2022-05-02, this field value (as exported in enriched publishing
             # job from Alma) began truncating time to day-level granularity. We have
             # no guarantee that this won't switch back in the future, so for the
             # foreseeable future we should support both formats.
-            format = subfield.value.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
-            DateTime.strptime(subfield.value, format)
+            format = date_added.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
+            DateTime.strptime(date_added, format)
           rescue StandardError => e
-            puts "Error parsing date in date added subfield: #{subfield.value} - #{e}"
+            puts "Error parsing date in date added subfield: #{date_added} - #{e}"
             nil
           end
         }.max

data/lib/pennmarc/helpers/format.rb CHANGED Viewed

@@ -41,10 +41,10 @@ module PennMARC
         results += record.fields('880').map do |f|
           subfield_to_ignore = if subfield_value?(f, 6, /^300/)
                                  %w[3 6 8]
-                               elsif subfield_value?(f, 6, /^(254|255|310|342|352|362)/)
-                                 %w[6 8]
                                elsif subfield_value?(f, 6, /^340/)
                                  %w[0 2 6 8]
+                               else
+                                 %w[6 8]
                                end
           join_subfields(f, &subfield_not_in?(subfield_to_ignore))
         end

data/lib/pennmarc/helpers/identifier.rb CHANGED Viewed

@@ -23,7 +23,7 @@ module PennMARC
           if field.tag == '020'
             field.filter_map { |subfield| normalize_isbn(subfield.value) if subfield_in?(%w[a z]).call(subfield) }
           else
-            field.filter_map { |subfield| subfield.value if subfield_in?(%w[a l z]).call(subfield) }
+            field.filter_map { |subfield| subfield.value if subfield_in?(%w[a l m y z]).call(subfield) }
           end
         }.flatten.uniq
       end
@@ -56,21 +56,42 @@ module PennMARC
       # Get numeric OCLC ID of first {https://www.oclc.org/bibformats/en/0xx/035.html 035 field}
       # with an OCLC ID defined in subfield 'a'.
-      #
-      # @todo We should evaluate this to return a single value in the future since subfield a is non-repeatable
       # @param [MARC::Record] record
-      # @return [Array<String>]
-      def oclc_id(record)
-        oclc_id = Array.wrap(record.fields('035')
-                         .find { |field| field.any? { |subfield| subfield_a_is_oclc?(subfield) } })
-        oclc_id.flat_map do |field|
+      # @return [String, nil]
+      def oclc_id_show(record)
+        ids = Array.wrap(record.fields('035')
+                           .find { |field| field.any? { |subfield| subfield_a_is_oclc?(subfield) } })
+        ids.flat_map { |field|
           field.filter_map do |subfield|
             # skip unless subfield 'a' is an oclc id value
             next unless subfield_a_is_oclc?(subfield)
             # search for numeric part of oclc id (e.g. '610094484' in '(OCoLC)ocn610094484')
-            match = /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(subfield.value)
+            match = match_oclc_number(subfield)
+            # skip unless search to find numeric part of oclc id has a match
+            next unless match
+            match[1]
+          end
+        }.first
+      end
+      # Retrieve valid and invalid numeric OCLC IDs from {https://www.oclc.org/bibformats/en/0xx/035.html 035 field}
+      # for search.
+      # @param [MARC::Record] record
+      # @return [Array<String>]
+      def oclc_id_search(record)
+        record.fields('035').flat_map do |field|
+          field.filter_map do |subfield|
+            # skip unless subfield 'a' or 'z'
+            next unless subfield.code.in?(%w[a z])
+            # skip unless subfield value matches OCLC ID
+            next unless subfield_is_oclc?(subfield)
+            # search for numeric part of oclc id
+            match = match_oclc_number(subfield)
             # skip unless search to find numeric part of oclc id has a match
             next unless match
@@ -143,7 +164,19 @@ module PennMARC
       # @param [MARC::Subfield]
       # @return [TrueClass, FalseClass]
       def subfield_a_is_oclc?(subfield)
-        subfield.code == 'a' && (subfield.value =~ /^\(OCoLC\).*/).present?
+        subfield.code == 'a' && subfield_is_oclc?(subfield)
+      end
+      # @param [MARC::Subfield]
+      # @return [TrueClass, FalseClass]
+      def subfield_is_oclc?(subfield)
+        (subfield.value =~ /^\(OCoLC\).*/).present?
+      end
+      # @param [MARC::Subfield]
+      # @return [MatchData, nil]
+      def match_oclc_number(subfield)
+        /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(subfield.value)
       end
       # Normalize isbn value using {https://github.com/billdueber/library_stdnums library_stdnums gem}.

data/lib/pennmarc/helpers/language.rb CHANGED Viewed

@@ -4,10 +4,10 @@ module PennMARC
   # Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
   # the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
   # is then mapped to a display-friendly value using the a provided mapping hash.
-  # @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
   class Language < Helper
     # Used when no value is present in the control field - still mapped
     UNDETERMINED_CODE = :und
+    LANGUAGE_SUBFIELDS = %w[a b d e g h i j k m n p q r t].freeze
     class << self
       # Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
@@ -21,16 +21,31 @@ module PennMARC
         values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
       end
-      # Get language values for searching and faceting of a record. The value is extracted from a defined position in
-      # the 008 control field. Language facet and search values will typically be the same.
+      # Get language values for searching and faceting of a record. The values are extracted from subfields
+      # in the 041 field. Language facet and search values will typically be the same, with the exception of `zxx`,
+      # when no linguistic content is found.
+      #
+      # @note In franklin, we extracted the language code from the 008 control field. After engaging cataloging unit
+      #   representatives, we decided to extract these values from the 041 field: Includes records for multilingual
+      #   items, items that involve translation, and items where the medium of communication is a sign language.
+      #   https://www.loc.gov/marc/bibliographic/bd041.html
       #
       # @param [MARC::Record] record
-      # @param [Hash] language_map hash for language code translation
-      # @return [String] nice value for language
-      def search(record, language_map: Mappers.language)
+      # @param [Hash] iso_639_2_mapping iso-639-2 spec hash for language code translation
+      # @param [Hash] iso_639_3_mapping iso-639-3 spec hash for language code translation
+      # @return [Array] array of language values
+      def values(record, iso_639_2_mapping: Mappers.iso_639_2_language, iso_639_3_mapping: Mappers.iso_639_3_language)
+        values = record.fields('041').filter_map { |field|
+          mapper = subfield_value?(field, '2', /iso639-3/) ? iso_639_3_mapping : iso_639_2_mapping
+          field.filter_map do |sf|
+            next unless LANGUAGE_SUBFIELDS.include? sf.code
+            mapper[sf.value&.to_sym]
+          end
+        }.flatten
         control_field = record['008']&.value
-        language_code = control_field[35..37]
-        language_map[language_code.to_sym || UNDETERMINED_CODE]
+        values << iso_639_2_mapping[control_field[35..37]&.to_sym] if control_field.present?
+        values.empty? ? values << iso_639_2_mapping[UNDETERMINED_CODE] : values.uniq
       end
     end
   end

data/lib/pennmarc/mappers.rb CHANGED Viewed

@@ -5,8 +5,12 @@ module PennMARC
   class Mappers
     class << self
       # @return [Hash]
-      def language
-        @language ||= load_map('language.yml')
+      def iso_639_2_language
+        @iso_639_2_language ||= load_map('iso639-2-languages.yml')
+      end
+      def iso_639_3_language
+        @iso_639_3_language ||= load_map('iso639-3-languages.yml')
       end
       # @return [Hash]