RubyGems - berkeley_library-tind - Versions diffs - 0.5.1 → 0.6.0 - Mend

berkeley_library-tind 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

data/lib/berkeley_library/tind/mapping/match_tind_field.rb ADDED Viewed

@@ -0,0 +1,77 @@
+require 'marc'
+module BerkeleyLibrary
+  module TIND
+    module Mapping
+      module MatchTindField
+        # return regular fields without matched 880 fields
+        # return 880 fields without matched regular fields
+        def un_matched_fields_880(fields, mms_id)
+          unmached_fields = []
+          str_arr_from_880 = subfield6_values_from_880_fields(fields)
+          str_arr_from_regular = subfield6_values_from_regular_fields(fields)
+          fields_tobe_validated = fields_need_880_validation(fields)
+          fields_880_tobe_validated = fields_tobe_validated.select { |f| is_880_field?(f) }
+          fields_regular_tobe_validated = fields_tobe_validated.reject { |f| is_880_field?(f) }
+          unmached_fields.concat un_matched_fields(fields_880_tobe_validated, str_arr_from_regular)
+          unmached_fields.concat un_matched_fields(fields_regular_tobe_validated, str_arr_from_880)
+          log_warning(unmached_fields, mms_id)
+        end
+        def check_abnormal_formated_subfield6(fields)
+          fields.each { |f| check_subfield6_format(f) if check_subfield6?(f) }
+        end
+        private
+        def subfield6_values_from_880_fields(fields)
+          formated_subfield6_value_arr(fields_by(fields) { |f| is_880_field?(f) })
+        end
+        def subfield6_values_from_regular_fields(fields)
+          formated_subfield6_value_arr(fields_by(fields) { |f| !is_880_field?(f) })
+        end
+        def fields_need_880_validation(fields)
+          fields_with_subfield6(fields).reject { |f| subfield6_endwith_00?(f) }
+        end
+        # return true when field has a matched 880 field,
+        # or an 880 field has a matched regular field
+        def match?(f, arr)
+          str = formated_subfield6_value(f)
+          arr.include? str
+        end
+        def un_matched_fields(fields, arr)
+          fds = []
+          fields.each { |f| fds << f unless match?(f, arr) }
+          fds
+        end
+        def log_warning(fields, mms_id)
+          warning_message_for_rspec = []
+          fields.each do |f|
+            msg = "Please check 880 matching: mms_id: #{mms_id}, tag: #{f.tag}, value: #{f['6']} "
+            warning_message_for_rspec << msg
+            logger.warn(msg)
+          end
+          warning_message_for_rspec
+        end
+        def check_subfield6?(f)
+          return false if ::MARC::ControlField.control_tag?(f.tag)
+          f['6'] ? true : false
+        end
+      end
+    end
+  end
+end

data/lib/berkeley_library/tind/mapping/misc.rb ADDED Viewed

@@ -0,0 +1,69 @@
+require 'marc'
+module BerkeleyLibrary
+  module TIND
+    module Mapping
+      module Misc
+        #### referred tag ###
+        def origin_mapping_tag(f)
+          is_880_field?(f) ? referred_tag(f) : f.tag
+        end
+        # get the 880 referred tag.
+        # An example $6 value: '650-05/$1', referred tag is 650
+        def referred_tag(field)
+          return nil unless subfield6?(field)
+          field['6'].strip.split('-')[0]
+        end
+        # check a tag in subfield 6 of a 880 datafield
+        def field_880_has_referred_tag?(tag, field)
+          referred_tag_from_880 = referred_tag(field)
+          return false unless referred_tag_from_880
+          referred_tag_from_880 == tag
+        end
+        ### referred tag end ###
+        # add subfield6 validation
+        def check_subfield6_format(f)
+          val = f['6']
+          reg1 = %r{^\d{3}-\d{2}/}
+          reg2 = /^\d{3}-\d{2}$/
+          logger.warn("Unusual subfield6 format: #{val}; correct format examples: 1) 880-02 ; 2)246-02/$1") unless reg1.match(val) || reg2.match(val)
+        end
+        private
+        # manipulate original values
+        # Delete characters when occuring at the end of a subfield value
+        def rm_punctuation(str)
+          return str if str.empty? || str.nil?
+          punctuations = Config.punctuations
+          char = str[-1]
+          return str unless punctuations.include? char
+          rm_punctuation(str.delete_suffix!(char))
+        end
+        def clr_value(value)
+          new_value = rm_punctuation(value)
+          ['[', ']'].each { |v| value.gsub!(v, ' ') }
+          new_value.strip
+        end
+        # input example: 1) 880-02 ; 2)246-02/$1
+        def seq_no(value)
+          # logger if not started with ***-** format
+          value.split('/')[0].split('-')[1].to_i # nil.to_i => 0, ''.to_i = >0
+        end
+      end
+    end
+  end
+end

data/lib/berkeley_library/tind/mapping/multiple_rule.rb ADDED Viewed

@@ -0,0 +1,36 @@
+module BerkeleyLibrary
+  module TIND
+    module Mapping
+      class MultipleRule
+        include Util
+        attr_reader :tag_origin
+        attr_reader :tag_destination
+        attr_reader :indicator
+        attr_reader :pre_existed_tag
+        attr_reader :subfield_key
+        attr_reader :position_from_to
+        def initialize(row)
+          @tag_origin = row[:tag_origin]
+          @tag_destination = row[:tag_destination]
+          @indicator = Util.indicator(row[:new_indecator])
+          @pre_existed_tag = row[:map_if_no_this_tag_existed]
+          @subfield_key = row[:subfield_key]
+          @position_from_to = extract_position(row[:value_from], row[:value_to])
+        end
+        private
+        # return an array with string positons for extracting value
+        def extract_position(f, t)
+          return nil unless f && t
+          [f.to_i, t.to_i]
+        end
+      end
+    end
+  end
+end

data/lib/berkeley_library/tind/mapping/single_rule.rb ADDED Viewed

@@ -0,0 +1,143 @@
+module BerkeleyLibrary
+  module TIND
+    module Mapping
+      # TODO: ADD CSV VALIDATION, WHEN NEED ADD A NEW MAPPING CSV FILE
+      # 1. not empty row
+      # 2. a subfield name can appear in either normal mapping or combine mapping, not both
+      # 3. single map has the same amount of from names and to names
+      # 4. Combine mappping should have tree columns, validate more?
+      # 5. Combine from_subfield, to_subfield should have values, no empty
+      # 6. Tag from row[:map_if_no_this_tag_subfield_existed]), row[:map_if_no_this_tag_existed] # This tag should be the same as destination tag
+      # 7. In single map csv, one row cannot have both "map_if_no_this_tag_existed" (245) and ":map_if_no_this_tag_subfield_existed"
+      #      (245__b) because tag in these two column are  identical
+      # 8. csv file validation - a row should have coulumns:  tag origin and destintation ? single rule
+      # 9. Validating these column names
+      # 10. csv file validation - a row should have coulumns:  tag origin and destintation ? single rule
+      # 11. validating headers
+      # 12.  Formats for some of the columns
+      class SingleRule
+        include Util
+        attr_reader :tag_origin
+        attr_reader :tag_destination
+        attr_reader :indicator
+        attr_reader :pre_existed_tag
+        attr_reader :pre_existed_tag_subfield
+        attr_reader :single_rule_hash
+        attr_reader :single_rule_subfield_excluded_hash
+        attr_reader :combined_rules
+        def initialize(row)
+          @tag_origin = row[:tag_origin]
+          @tag_destination = row[:tag_destination]
+          @indicator = Util.indicator(row[:new_indecator])
+          @pre_existed_tag = row[:map_if_no_this_tag_existed]
+          @pre_existed_tag_subfield = existed_tag_subfield(row[:map_if_no_this_tag_subfield_existed]) # This tag should be the same as destination tag
+          @single_rule_hash = single_map_dic(row[:subfield_single_from], row[:subfield_single_to])
+          @single_rule_subfield_excluded_hash = single_map_subfield_excluded_dic
+          @combined_rules = rules_with_same_subfield_name(row)
+        end
+        # 1. Return an array of combined rules, an item in the array
+        #    is an array of rules which have the same 'to subfield name'
+        # 2. An example: [[["a,b,c,d", "b", "--"],["o,p,q", "b", ""]],[["x,y,z", "a", "--"]]]
+        def rules_with_same_subfield_name(row)
+          rules = all_combined_rules(row)
+          identical_to_subfield_names = unique_tosubfield_names(rules)
+          identical_to_subfield_names.each_with_object([]) do |name, result|
+            result << rules_with_sametosubfield(name, rules)
+          end
+        end
+        private
+        # return an array of tag and subfield name, example '255__a' => ['255','a']
+        def existed_tag_subfield(str)
+          str.nil? ? nil : str.split('__')
+        end
+        # list identical 'to subfield name's from combined mapping rules
+        # (an example rule ['a,b,c', 'b', ' -- '])
+        def unique_tosubfield_names(rules)
+          names = rules.map { |rule| rule[1] }
+          names.uniq
+        end
+        # numbers of combined rules
+        def combined_rule_counts(row)
+          headers = row.headers
+          headers.select { |h| h.to_s.include? 'subfield_combined_from_' }.count
+        end
+        # Three coulumns 'subfield_combined_from_*','subfield_combined_to_*','symbol_*'
+        # define a combined mapping rule
+        def combined_rule(row, i)
+          from_subfield = row["subfield_combined_from_#{i}".to_sym]
+          to_subfield = row["subfield_combined_to_#{i}".to_sym]
+          s = row["symbol_#{i}".to_sym]
+          from_subfield.nil? || to_subfield.nil? ? nil : [from_subfield, to_subfield, s] # add validation rule , such as not empty later
+        end
+        def all_combined_rules(row)
+          rules = []
+          n = combined_rule_counts(row)
+          (1..n).each do |i|
+            rule = combined_rule(row, i)
+            rules << rule if rule
+          end
+          rules
+        end
+        # list all combined rules with the same 'to subfield name'
+        def rules_with_sametosubfield(name, rules)
+          rules.each_with_object([]) do |rule, result|
+            result << rule if rule[1] == name
+          end
+        end
+        # Define a hash for single map rules, key is 'from subfield name',
+        # value is 'to subfield name'
+        def single_map_dic(str_from, str_to)
+          dic = {}
+          if  should_single_map?(str_from, str_to)
+            arr_from = str_from.strip.split(',')
+            arr_to = str_to.strip.split(',')
+            arr_from.each_with_index { |from_name, i| dic[from_name.to_s] = arr_to[i].to_s }
+          end
+          dic
+        end
+        # Hash removed the excluding subfield
+        def single_map_subfield_excluded_dic
+          dic = @single_rule_hash
+          return dic unless excluding_subfield?
+          excluding_subfield_name = @pre_existed_tag_subfield[1].to_s
+          dic.delete(excluding_subfield_name) if dic.key? excluding_subfield_name
+          dic
+        end
+        # Check excluding subfield
+        def excluding_subfield?
+          return false unless @pre_existed_tag_subfield
+          return false unless @pre_existed_tag_subfield[1]
+          true
+        end
+        # add this to validation
+        def should_single_map?(str_from, str_to)
+          return false unless str_from && str_to
+          arr_from = str_from.split(',')
+          arr_to = str_to.split(',')
+          return false unless arr_from.count == arr_to.count
+          true
+        end
+      end
+    end
+  end
+end

data/lib/berkeley_library/tind/mapping/tind_control_subfield.rb ADDED Viewed

@@ -0,0 +1,59 @@
+require 'marc'
+module BerkeleyLibrary
+  module TIND
+    module Mapping
+      module TindControlSubfield
+        def extract_value(rule, value)
+          pos = rule.position_from_to
+          return nil unless pos
+          value[pos[0]..pos[1]]
+        end
+        # return a mapped datafield based on rule and extract value
+        def extracted_field(rule, sub_value)
+          subname = rule.subfield_key
+          destiantion_tag = rule.tag_destination
+          indicator = rule.indicator
+          return nil unless subname && destiantion_tag && indicator
+          new_sub_value = clean_subfield_value(destiantion_tag, sub_value)
+          return nil unless new_sub_value
+          new_sub_value = clean_subfield_value(destiantion_tag, sub_value)
+          subfields = [Util.subfield(subname, new_sub_value)]
+          Util.datafield(destiantion_tag, indicator, subfields)
+        end
+        # pass in rules, a string value; return datafields based on rules
+        def extracted_fields_from_leader(leader_rules, leader_value)
+          new_fls = []
+          leader_rules.each do |rule|
+            sub_value = extract_value(rule, leader_value)
+            next unless sub_value
+            newfield = extracted_field(rule, sub_value)
+            new_fls << newfield if newfield
+          end
+          new_fls
+        end
+        private
+        def clean_subfield_value(tag, val)
+          return val if tag != '269'
+          new_val = val.downcase.sub(/u$/, '0')
+          qualified_269?(new_val) ? new_val : nil
+        end
+        def qualified_269?(val)
+          val =~ /^\d{4}$/
+        end
+      end
+    end
+  end
+end

data/lib/berkeley_library/tind/mapping/tind_field.rb ADDED Viewed

@@ -0,0 +1,49 @@
+require 'marc'
+module BerkeleyLibrary
+  module TIND
+    module Mapping
+      module TindField
+        class << self
+          def f_035_from_alma_id(alma_id, value_980)
+            val = "(#{value_980})#{alma_id}"
+            f('035', 'a', val)
+          end
+          def f_035(val)
+            f('035', 'a', val)
+          end
+          def f_245_p(val)
+            f('245', 'p', val)
+          end
+          def f_fft(url, txt = None)
+            return f('FFT', 'a', url) unless txt
+            ::MARC::DataField.new('FFT', ' ', ' ', ['d', txt], ['a', url])
+          end
+          def f_902_d
+            f('902', 'd', Time.now.strftime('%F'))
+          end
+          def f_902_n(name_initial)
+            f('902', 'n', name_initial)
+          end
+          def f_982_p(val)
+            f('982', 'p', val)
+          end
+          def f(tag, code, value)
+            ::MARC::DataField.new(tag, ' ', ' ', [code, value])
+          end
+        end
+      end
+    end
+  end
+end

data/lib/berkeley_library/tind/mapping/tind_field_from_leader.rb ADDED Viewed

@@ -0,0 +1,27 @@
+require 'marc'
+require 'berkeley_library/tind/mapping/tind_control_subfield'
+module BerkeleyLibrary
+  module TIND
+    require 'marc'
+    module Mapping
+      class TindFieldFromLeader
+        include CsvMultipleMapper
+        include Util
+        include TindControlSubfield
+        def initialize(record)
+          @leader_value = record.leader
+        end
+        def to_datafields
+          leader_rules = rules[Util.tag_symbol('LDR')]
+          return [] unless @leader_value && leader_rules
+          extracted_fields_from_leader(leader_rules, @leader_value)
+        end
+      end
+    end
+  end
+end

data/lib/berkeley_library/tind/mapping/tind_field_from_multiple_map.rb ADDED Viewed

@@ -0,0 +1,59 @@
+require 'marc'
+require 'berkeley_library/tind/mapping/tind_control_subfield'
+module BerkeleyLibrary
+  module TIND
+    module Mapping
+      class TindFieldFromMultipleMap
+        include CsvMultipleMapper
+        include Util
+        include TindControlSubfield
+        def initialize(controlfield, current_datafields)
+          @from_controlfield = controlfield
+          @current_tags = current_datafields.map(&:tag)
+        end
+        def to_datafields
+          datafields = []
+          control_rules = rules_on_controldatafield
+          if control_rules
+            control_rules.each do |rule|
+              df = to_datafield(rule)
+              datafields << df if df
+            end
+          end
+          datafields
+        end
+        private
+        # one control field may have multiple rules
+        def rules_on_controldatafield
+          tag = @from_controlfield.tag
+          sym = Util.tag_symbol(tag)
+          rules[sym]
+        end
+        # Check mapped current datafields has the pre-existed tag defined in the row (rule) of csv file
+        def pre_exsited_tag?(rule)
+          @current_tags.include? rule.pre_existed_tag.to_s
+        end
+        # get a datafield on a rule (row in csv file)
+        def to_datafield(rule)
+          return nil if pre_exsited_tag?(rule)
+          to_value = extract_value(rule, @from_controlfield.value)
+          return nil unless to_value
+          extracted_field(rule, to_value)
+        end
+      end
+    end
+  end
+end

data/lib/berkeley_library/tind/mapping/tind_field_from_single_map.rb ADDED Viewed

@@ -0,0 +1,170 @@
+require 'marc'
+require 'berkeley_library/tind/mapping/util'
+require 'berkeley_library/tind/mapping/tind_subfield_util'
+# 1. datafield could be a regular alma field
+# 1) data_fields_normal - using  @single_rule_hash from SingleRule
+# 2) data_fields_with_pre_existed_field - using  @single_rule_hash from SingleRule
+# 3) data_fields_with_pre_existed_subfield  - using   @single_rule_subfield_excluded_hash  from SingleRule
+# 2. data field could be an 880 alma field , below types are definded based on the tag from subfield6
+# 1) data_fields_normal - using  @single_rule_hash from SingleRule
+# 2) data_fields_with_pre_existed_field - using  @single_rule_hash from SingleRule
+# 3) data_fields_with_pre_existed_subfield  - using   @single_rule_subfield_excluded_hash from SingleRule
+# 3. map_to_tag, indicator are from mapping rule for output tindfield
+# 4. subfileds are re-mapped, or combined, used as subfields for  output tindfield
+module BerkeleyLibrary
+  module TIND
+    module Mapping
+      class TindFieldFromSingleMap
+        include CsvMapper
+        include Util
+        include TindSubfieldUtil
+        include Misc
+        # excluding_subfield = false: mapping by rule.single_rule_hash
+        # excluding_subfield = true: mapping by rule.single_rule_subfield_excluded_hash
+        def initialize(datafield, excluding_subfield)
+          @from_datafield = datafield
+          @excluding_subfield = excluding_subfield
+          @is_880_field = is_880_field?(datafield)
+          @mapping_rule = rule
+          @map_to_tag = nil
+          @indicator = nil
+          @single_mapping = nil
+          @ready_to_mapping = ready_to_mapping?
+          @to_subfields = all_subfields
+        end
+        def to_datafield
+          return nil unless mapped?
+          tindfield = Util.datafield(@map_to_tag, @indicator, @to_subfields)
+          @is_880_field ? reversed_880_field(tindfield) : tindfield
+        end
+        private
+        # A referred tag from 880 subfield6 may not have a rule
+        # For example: 880 subfild6 pass in a value in wrong format
+        # In above case, rule is nil
+        # Get mapping parameters from rule when having a rule
+        def ready_to_mapping?
+          return false unless @mapping_rule
+          @map_to_tag = @mapping_rule.tag_destination
+          @indicator =  @mapping_rule.indicator
+          @single_mapping = @excluding_subfield ? @mapping_rule.single_rule_subfield_excluded_hash : @mapping_rule.single_rule_hash
+          return false unless @map_to_tag && @indicator && !@single_mapping.empty?
+          true
+        end
+        def mapped?
+          !@to_subfields.empty?
+        end
+        # tag - regular alma field
+        # referred tag - got tag from subfield6 value of a 880 field
+        # nil rule caused by nil referred tag - eg. 880 subfild6 pass in a value in wrong format
+        def rule
+          tag = origin_mapping_tag(@from_datafield)
+          return nil unless tag
+          rules[Util.tag_symbol(tag)]
+        end
+        def all_subfields
+          @ready_to_mapping ? (subfields_from_single_map + subfields_from_combined_map) : []
+        end
+        # 1.subfields mapped with single rule, mapping one subfield to another subfield
+        # 2. one subfield is mapped to one subfield
+        # 3. When mutiple subfields with the same name found in an orignal field,
+        # they will be mapped one by one
+        def subfields_from_single_map
+          return [] if @single_mapping.empty?
+          mapped_subfields = []
+          @single_mapping.each do |from, to|
+            subfields = subfields_from_to(@from_datafield, from, to)
+            mapped_subfields.concat(subfields)
+          end
+          mapped_subfields
+        end
+        # return all subfields mapped with diferent combined rules - different destination subfield names
+        # mapped with all combined rules, exmaple: [[["a,b,c,d", "b", "--"],["o,p,q", "b", ""]],[["x,y,z", "a", "--"]]]
+        # mapping using above example rules will return two subfield: $b, $a
+        def subfields_from_combined_map
+          all_rules = @mapping_rule.combined_rules
+          return [] if all_rules.empty?
+          mapped_subfields = []
+          all_rules.each do |rules|
+            subfield = subfield_on_same_tosubfieldname(rules)
+            mapped_subfields.push(subfield) if subfield
+          end
+          mapped_subfields
+        end
+        # create one subfield with a desintaion subfield name
+        # input array of rules example: [["a,b,c,d", "b", "--"],["o,p,q", "b", ""]] -- all rules with the same destination subfield name "b"
+        # get a subfield$b with a concatenated value
+        def subfield_on_same_tosubfieldname(rules)
+          return nil if rules.empty?
+          val = subfield_value_on_rules(rules)
+          return nil if val.strip.empty?
+          subfield_name_to = rules[0][1]
+          Util.subfield(subfield_name_to, Util.remove_extra_symbol(rules, val))
+        end
+        # input an array of rules, example: [["a,b,c,d", "b", "--"],["o,p,q", "b", ""]]
+        # Theese rules have the same destination subfield name, for example "b" in above example
+        # get a value concatenated with values mapped using different rules
+        def subfield_value_on_rules(rules)
+          val = ''
+          rules.each { |rule| val << subfield_value_on_rule(rule) }
+          val
+        end
+        # input a rule (for example ["a,b,c,d", "b", "--"]),
+        # get a combined value of subfield a,b,c,d concatenated by " -- " as above example
+        # One subfield names may occurs mutiple times in a an orignal field
+        def subfield_value_on_rule(rule)
+          subfield_names_from = rule[0].strip.split(',')
+          symbol = Util.concatenation_symbol(rule[2])
+          val = ''
+          subfield_names_from.each do |subfield_name|
+            sub_val = combined_subfield_value(@from_datafield, subfield_name, symbol)
+            val << sub_val
+          end
+          val
+        end
+        # 880 datafield: reverse tag from 'to_tag' defined mapping rule to '880'
+        def reversed_880_field(f)
+          update_datafield6(f)
+          f.tag = '880'
+          f
+        end
+        # update subfield6 tag with destination tag from the rule
+        # since an origin tag may have been mapped a different tag - destination tag
+        def update_datafield6(f) # need test
+          f['6'].sub!(@mapping_rule.tag_origin, @mapping_rule.tag_destination)
+        end
+      end
+    end
+  end
+end