berkeley_library-tind 0.4.3 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +16 -4
- data/.gitignore +3 -0
- data/.idea/inspectionProfiles/Project_Default.xml +10 -0
- data/.idea/tind.iml +24 -22
- data/CHANGES.md +24 -0
- data/README.md +136 -3
- data/berkeley_library-tind.gemspec +2 -0
- data/bin/alma-multiple-tind +50 -0
- data/bin/alma-single-tind +48 -0
- data/bin/save_tind_records +80 -0
- data/bin/tind-marc +73 -0
- data/lib/berkeley_library/tind/mapping/additional_datafield_process.rb +128 -0
- data/lib/berkeley_library/tind/mapping/alma.rb +42 -0
- data/lib/berkeley_library/tind/mapping/alma_base.rb +101 -0
- data/lib/berkeley_library/tind/mapping/alma_multiple_tind.rb +31 -0
- data/lib/berkeley_library/tind/mapping/alma_single_tind.rb +28 -0
- data/lib/berkeley_library/tind/mapping/config.rb +44 -0
- data/lib/berkeley_library/tind/mapping/csv_mapper.rb +35 -0
- data/lib/berkeley_library/tind/mapping/csv_multiple_mapper.rb +41 -0
- data/lib/berkeley_library/tind/mapping/data/one_to_multiple_mapping.csv +4 -0
- data/lib/berkeley_library/tind/mapping/data/one_to_one_mapping.csv +39 -0
- data/lib/berkeley_library/tind/mapping/external_tind_field.rb +103 -0
- data/lib/berkeley_library/tind/mapping/field_catalog.rb +146 -0
- data/lib/berkeley_library/tind/mapping/field_catalog_util.rb +59 -0
- data/lib/berkeley_library/tind/mapping/match_tind_field.rb +77 -0
- data/lib/berkeley_library/tind/mapping/misc.rb +69 -0
- data/lib/berkeley_library/tind/mapping/multiple_rule.rb +36 -0
- data/lib/berkeley_library/tind/mapping/single_rule.rb +143 -0
- data/lib/berkeley_library/tind/mapping/tind_control_subfield.rb +59 -0
- data/lib/berkeley_library/tind/mapping/tind_field.rb +49 -0
- data/lib/berkeley_library/tind/mapping/tind_field_from_leader.rb +27 -0
- data/lib/berkeley_library/tind/mapping/tind_field_from_multiple_map.rb +59 -0
- data/lib/berkeley_library/tind/mapping/tind_field_from_single_map.rb +170 -0
- data/lib/berkeley_library/tind/mapping/tind_field_util.rb +112 -0
- data/lib/berkeley_library/tind/mapping/tind_marc.rb +134 -0
- data/lib/berkeley_library/tind/mapping/tind_subfield_util.rb +154 -0
- data/lib/berkeley_library/tind/mapping/util.rb +117 -0
- data/lib/berkeley_library/tind/mapping.rb +1 -0
- data/lib/berkeley_library/tind/marc/xml_builder.rb +70 -0
- data/lib/berkeley_library/tind/marc/xml_reader.rb +22 -16
- data/lib/berkeley_library/tind/marc/xml_writer.rb +152 -0
- data/lib/berkeley_library/tind/module_info.rb +1 -1
- data/lib/berkeley_library/util/files.rb +38 -0
- data/spec/berkeley_library/tind/mapping/additional_datafield_process_spec.rb +35 -0
- data/spec/berkeley_library/tind/mapping/alma_base_spec.rb +115 -0
- data/spec/berkeley_library/tind/mapping/alma_multiple_tind_spec.rb +20 -0
- data/spec/berkeley_library/tind/mapping/alma_single_tind_spec.rb +87 -0
- data/spec/berkeley_library/tind/mapping/alma_spec.rb +28 -0
- data/spec/berkeley_library/tind/mapping/config_spec.rb +19 -0
- data/spec/berkeley_library/tind/mapping/csv_mapper_spec.rb +27 -0
- data/spec/berkeley_library/tind/mapping/csv_multiple_mapper_spec.rb +27 -0
- data/spec/berkeley_library/tind/mapping/external_tind_field_spec.rb +45 -0
- data/spec/berkeley_library/tind/mapping/field_catalog_spec.rb +78 -0
- data/spec/berkeley_library/tind/mapping/field_catalog_util_spec.rb +57 -0
- data/spec/berkeley_library/tind/mapping/match_tind_field_spec.rb +25 -0
- data/spec/berkeley_library/tind/mapping/misc_spec.rb +51 -0
- data/spec/berkeley_library/tind/mapping/multiple_rule_spec.rb +44 -0
- data/spec/berkeley_library/tind/mapping/single_rule_spec.rb +52 -0
- data/spec/berkeley_library/tind/mapping/tind_control_subfield_spec.rb +96 -0
- data/spec/berkeley_library/tind/mapping/tind_field_from_leader_spec.rb +21 -0
- data/spec/berkeley_library/tind/mapping/tind_field_from_multiple_map_spec.rb +31 -0
- data/spec/berkeley_library/tind/mapping/tind_field_from_single_map_spec.rb +150 -0
- data/spec/berkeley_library/tind/mapping/tind_field_spec.rb +60 -0
- data/spec/berkeley_library/tind/mapping/tind_field_util_spec.rb +68 -0
- data/spec/berkeley_library/tind/mapping/tind_marc_spec.rb +88 -0
- data/spec/berkeley_library/tind/mapping/tind_subfield_util_spec.rb +48 -0
- data/spec/berkeley_library/tind/mapping/util_spec.rb +56 -0
- data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +22 -0
- data/spec/berkeley_library/tind/marc/xml_writer_spec.rb +218 -0
- data/spec/data/issue-4.xml +157 -0
- data/spec/data/mapping/991032333019706532-sru.xml +216 -0
- data/spec/data/mapping/one_to_multiple_mapping.csv +4 -0
- data/spec/data/mapping/one_to_one_mapping.csv +39 -0
- data/spec/data/mapping/record.xml +263 -0
- data/spec/data/mapping/record_not_qualified.xml +36 -0
- data/spec/data/new-records.xml +46 -0
- metadata +128 -2
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'berkeley_library/tind/mapping/tind_subfield_util'
|
2
|
+
require 'berkeley_library/tind/mapping/misc'
|
3
|
+
require 'berkeley_library/tind/mapping/field_catalog_util'
|
4
|
+
require 'berkeley_library/tind/mapping/match_tind_field'
|
5
|
+
|
6
|
+
module BerkeleyLibrary
|
7
|
+
module TIND
|
8
|
+
module Mapping
|
9
|
+
|
10
|
+
class DataFieldsCatalog
|
11
|
+
include Misc
|
12
|
+
include TindSubfieldUtil
|
13
|
+
include CsvMapper
|
14
|
+
include Util
|
15
|
+
include AdditionalDatafieldProcess
|
16
|
+
include FieldCatalogUtil
|
17
|
+
include MatchTindField
|
18
|
+
include BerkeleyLibrary::Logging
|
19
|
+
|
20
|
+
attr_reader :control_fields
|
21
|
+
attr_reader :data_fields_group
|
22
|
+
attr_reader :data_fields_880_group
|
23
|
+
attr_reader :data_fields_880_00
|
24
|
+
attr_reader :mms_id
|
25
|
+
|
26
|
+
def initialize(record)
|
27
|
+
@control_fields = []
|
28
|
+
@data_fields_group = []
|
29
|
+
@data_fields_880_group = []
|
30
|
+
@data_fields_880_00 = []
|
31
|
+
@mms_id = ''
|
32
|
+
|
33
|
+
@data_fields = []
|
34
|
+
@data_fields_880 = []
|
35
|
+
@alma_field_tags = []
|
36
|
+
|
37
|
+
init(record)
|
38
|
+
end
|
39
|
+
|
40
|
+
def init(record)
|
41
|
+
prepare_catalog(record)
|
42
|
+
@data_fields_group = prepare_group(@data_fields)
|
43
|
+
@data_fields_880_group = prepare_group(@data_fields_880)
|
44
|
+
@mms_id = alma_mms_id
|
45
|
+
end
|
46
|
+
|
47
|
+
def prepare_catalog(record)
|
48
|
+
clean_fields = clean_subfields(record.fields)
|
49
|
+
check_abnormal_formated_subfield6(clean_fields)
|
50
|
+
allocate_fields(clean_fields)
|
51
|
+
remove_fields_with_subject_fast
|
52
|
+
end
|
53
|
+
|
54
|
+
def remove_fields_with_subject_fast
|
55
|
+
@data_fields = exluding_fields_with_fast_subject(@data_fields)
|
56
|
+
@data_fields_880 = exluding_fields_with_fast_subject(@data_fields_880)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def allocate_fields(fields)
|
62
|
+
fields.each do |f|
|
63
|
+
next if added_control_field?(f)
|
64
|
+
next if added_880_field?(f)
|
65
|
+
|
66
|
+
tag = f.tag
|
67
|
+
next unless (found_in_mapper? tag) && (no_pre_existed_field? tag)
|
68
|
+
|
69
|
+
@data_fields << f
|
70
|
+
@alma_field_tags << f.tag
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# 880 field with a subfield6 including a tag belong to origin tags defined in csv file
|
75
|
+
def qualified_880_field?(f)
|
76
|
+
return false unless referred_tag(f)
|
77
|
+
|
78
|
+
found_in_mapper?(referred_tag(f))
|
79
|
+
end
|
80
|
+
|
81
|
+
def added_control_field?(f)
|
82
|
+
return false unless ::MARC::ControlField.control_tag?(f.tag)
|
83
|
+
|
84
|
+
@control_fields << f
|
85
|
+
true
|
86
|
+
end
|
87
|
+
|
88
|
+
def added_880_field?(f)
|
89
|
+
return false unless f.tag == '880'
|
90
|
+
|
91
|
+
# adding 880 datafield with "non-subfield6" to "00" group for keeping this record in TIND
|
92
|
+
# with log information, to let users correcting or removing this datafield from TIND record
|
93
|
+
@data_fields_880_00 << f unless valid_subfield6?(f)
|
94
|
+
|
95
|
+
if qualified_880_field?(f)
|
96
|
+
subfield6_endwith_00?(f) ? @data_fields_880_00 << f : @data_fields_880 << f
|
97
|
+
end
|
98
|
+
|
99
|
+
true
|
100
|
+
end
|
101
|
+
|
102
|
+
def valid_subfield6?(f)
|
103
|
+
return true if subfield6?(f)
|
104
|
+
|
105
|
+
logger.warn("880 field has no subfield 6 #{f.inspect}")
|
106
|
+
|
107
|
+
false
|
108
|
+
end
|
109
|
+
|
110
|
+
# Is the origin_tag of a field has related from_tag in csv file?
|
111
|
+
def found_in_mapper?(tag)
|
112
|
+
from_tags.include? tag
|
113
|
+
end
|
114
|
+
|
115
|
+
# If tag is listed in csv_mapper.one_occurrence_tags
|
116
|
+
# Check pre_existed field of this tag
|
117
|
+
# make sure to keep the first datafield for an one_occurrence_tag defined in csv mapping file
|
118
|
+
# def no_pre_existed_field?(tag)
|
119
|
+
# # no one-occurrence defined in csv
|
120
|
+
# return true unless one_occurrence_tags.include? tag
|
121
|
+
|
122
|
+
# # Checking the exsisting regular fields include the one-occurrence field defined in the csv
|
123
|
+
# return false if @alma_field_tags.compact.include? tag
|
124
|
+
|
125
|
+
# true
|
126
|
+
# end
|
127
|
+
|
128
|
+
def no_pre_existed_field?(tag)
|
129
|
+
# no one-occurrence defined in csv
|
130
|
+
return true unless one_occurrence_tags.include? tag
|
131
|
+
|
132
|
+
# Checking the exsisting regular fields include the one-occurrence field defined in the csv
|
133
|
+
!(@alma_field_tags.compact.include? tag)
|
134
|
+
end
|
135
|
+
|
136
|
+
def alma_mms_id
|
137
|
+
f_001 = @control_fields.find { |f| f if f.tag == '001' }
|
138
|
+
return nil unless f_001
|
139
|
+
|
140
|
+
f_001.value
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module BerkeleyLibrary
|
2
|
+
module TIND
|
3
|
+
module Mapping
|
4
|
+
module FieldCatalogUtil
|
5
|
+
|
6
|
+
# Excluding fields: subfield2 = 'fast' and tag or refered tag(880 fields) started with '6':
|
7
|
+
def exluding_fields_with_fast_subject(fields)
|
8
|
+
fields.reject { |f| excluding_field?(f) }
|
9
|
+
end
|
10
|
+
|
11
|
+
def prepare_group(from_fields)
|
12
|
+
datafields_hash = { normal: [], pre_tag: [], pre_tag_subfield: [] }
|
13
|
+
from_fields.each do |f|
|
14
|
+
# a regular field tag, or a tag value from 880 field captured from subfield6
|
15
|
+
tag = origin_mapping_tag(f)
|
16
|
+
next unless tag
|
17
|
+
|
18
|
+
rule = rules[Util.tag_symbol(tag)]
|
19
|
+
|
20
|
+
assing_field(rule, f, datafields_hash)
|
21
|
+
end
|
22
|
+
|
23
|
+
datafields_hash
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def excluding_field?(f)
|
29
|
+
return false unless field_6xx?(f)
|
30
|
+
return false unless subfield2_fast(f)
|
31
|
+
|
32
|
+
true
|
33
|
+
end
|
34
|
+
|
35
|
+
# Both regular and 880 field: tag or refered tag started with '6'
|
36
|
+
def field_6xx?(f)
|
37
|
+
tag = origin_mapping_tag(f)
|
38
|
+
tag =~ /^6\d{2}$/
|
39
|
+
end
|
40
|
+
|
41
|
+
def subfield2_fast(f)
|
42
|
+
subject = f['2']
|
43
|
+
return false unless subject
|
44
|
+
|
45
|
+
subject.downcase == 'fast'
|
46
|
+
end
|
47
|
+
|
48
|
+
# f is either from field whose tag having a match in csv mapping file - 'from tag' column
|
49
|
+
def assing_field(rule, f, datafields_hash)
|
50
|
+
if rule.pre_existed_tag then datafields_hash[:pre_tag] << f
|
51
|
+
elsif rule.pre_existed_tag_subfield then datafields_hash[:pre_tag_subfield] << f
|
52
|
+
else datafields_hash[:normal] << f
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'marc'
|
2
|
+
|
3
|
+
module BerkeleyLibrary
|
4
|
+
module TIND
|
5
|
+
module Mapping
|
6
|
+
module MatchTindField
|
7
|
+
|
8
|
+
# return regular fields without matched 880 fields
|
9
|
+
# return 880 fields without matched regular fields
|
10
|
+
def un_matched_fields_880(fields, mms_id)
|
11
|
+
unmached_fields = []
|
12
|
+
|
13
|
+
str_arr_from_880 = subfield6_values_from_880_fields(fields)
|
14
|
+
str_arr_from_regular = subfield6_values_from_regular_fields(fields)
|
15
|
+
|
16
|
+
fields_tobe_validated = fields_need_880_validation(fields)
|
17
|
+
|
18
|
+
fields_880_tobe_validated = fields_tobe_validated.select { |f| is_880_field?(f) }
|
19
|
+
fields_regular_tobe_validated = fields_tobe_validated.reject { |f| is_880_field?(f) }
|
20
|
+
|
21
|
+
unmached_fields.concat un_matched_fields(fields_880_tobe_validated, str_arr_from_regular)
|
22
|
+
unmached_fields.concat un_matched_fields(fields_regular_tobe_validated, str_arr_from_880)
|
23
|
+
|
24
|
+
log_warning(unmached_fields, mms_id)
|
25
|
+
end
|
26
|
+
|
27
|
+
def check_abnormal_formated_subfield6(fields)
|
28
|
+
fields.each { |f| check_subfield6_format(f) if check_subfield6?(f) }
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def subfield6_values_from_880_fields(fields)
|
34
|
+
formated_subfield6_value_arr(fields_by(fields) { |f| is_880_field?(f) })
|
35
|
+
end
|
36
|
+
|
37
|
+
def subfield6_values_from_regular_fields(fields)
|
38
|
+
formated_subfield6_value_arr(fields_by(fields) { |f| !is_880_field?(f) })
|
39
|
+
end
|
40
|
+
|
41
|
+
def fields_need_880_validation(fields)
|
42
|
+
fields_with_subfield6(fields).reject { |f| subfield6_endwith_00?(f) }
|
43
|
+
end
|
44
|
+
|
45
|
+
# return true when field has a matched 880 field,
|
46
|
+
# or an 880 field has a matched regular field
|
47
|
+
def match?(f, arr)
|
48
|
+
str = formated_subfield6_value(f)
|
49
|
+
arr.include? str
|
50
|
+
end
|
51
|
+
|
52
|
+
def un_matched_fields(fields, arr)
|
53
|
+
fds = []
|
54
|
+
fields.each { |f| fds << f unless match?(f, arr) }
|
55
|
+
fds
|
56
|
+
end
|
57
|
+
|
58
|
+
def log_warning(fields, mms_id)
|
59
|
+
warning_message_for_rspec = []
|
60
|
+
fields.each do |f|
|
61
|
+
msg = "Please check 880 matching: mms_id: #{mms_id}, tag: #{f.tag}, value: #{f['6']} "
|
62
|
+
warning_message_for_rspec << msg
|
63
|
+
logger.warn(msg)
|
64
|
+
end
|
65
|
+
warning_message_for_rspec
|
66
|
+
end
|
67
|
+
|
68
|
+
def check_subfield6?(f)
|
69
|
+
return false if ::MARC::ControlField.control_tag?(f.tag)
|
70
|
+
|
71
|
+
f['6'] ? true : false
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'marc'
|
2
|
+
|
3
|
+
module BerkeleyLibrary
|
4
|
+
module TIND
|
5
|
+
module Mapping
|
6
|
+
module Misc
|
7
|
+
|
8
|
+
#### referred tag ###
|
9
|
+
def origin_mapping_tag(f)
|
10
|
+
is_880_field?(f) ? referred_tag(f) : f.tag
|
11
|
+
end
|
12
|
+
|
13
|
+
# get the 880 referred tag.
|
14
|
+
# An example $6 value: '650-05/$1', referred tag is 650
|
15
|
+
def referred_tag(field)
|
16
|
+
return nil unless subfield6?(field)
|
17
|
+
|
18
|
+
field['6'].strip.split('-')[0]
|
19
|
+
end
|
20
|
+
|
21
|
+
# check a tag in subfield 6 of a 880 datafield
|
22
|
+
def field_880_has_referred_tag?(tag, field)
|
23
|
+
referred_tag_from_880 = referred_tag(field)
|
24
|
+
return false unless referred_tag_from_880
|
25
|
+
|
26
|
+
referred_tag_from_880 == tag
|
27
|
+
end
|
28
|
+
|
29
|
+
### referred tag end ###
|
30
|
+
|
31
|
+
# add subfield6 validation
|
32
|
+
def check_subfield6_format(f)
|
33
|
+
val = f['6']
|
34
|
+
reg1 = %r{^\d{3}-\d{2}/}
|
35
|
+
reg2 = /^\d{3}-\d{2}$/
|
36
|
+
|
37
|
+
logger.warn("Unusual subfield6 format: #{val}; correct format examples: 1) 880-02 ; 2)246-02/$1") unless reg1.match(val) || reg2.match(val)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
# manipulate original values
|
43
|
+
# Delete characters when occuring at the end of a subfield value
|
44
|
+
def rm_punctuation(str)
|
45
|
+
return str if str.empty? || str.nil?
|
46
|
+
|
47
|
+
punctuations = Config.punctuations
|
48
|
+
char = str[-1]
|
49
|
+
return str unless punctuations.include? char
|
50
|
+
|
51
|
+
rm_punctuation(str.delete_suffix!(char))
|
52
|
+
end
|
53
|
+
|
54
|
+
def clr_value(value)
|
55
|
+
new_value = rm_punctuation(value)
|
56
|
+
['[', ']'].each { |v| value.gsub!(v, ' ') }
|
57
|
+
new_value.strip
|
58
|
+
end
|
59
|
+
|
60
|
+
# input example: 1) 880-02 ; 2)246-02/$1
|
61
|
+
def seq_no(value)
|
62
|
+
# logger if not started with ***-** format
|
63
|
+
value.split('/')[0].split('-')[1].to_i # nil.to_i => 0, ''.to_i = >0
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module BerkeleyLibrary
|
2
|
+
module TIND
|
3
|
+
module Mapping
|
4
|
+
|
5
|
+
class MultipleRule
|
6
|
+
include Util
|
7
|
+
|
8
|
+
attr_reader :tag_origin
|
9
|
+
attr_reader :tag_destination
|
10
|
+
attr_reader :indicator
|
11
|
+
attr_reader :pre_existed_tag
|
12
|
+
attr_reader :subfield_key
|
13
|
+
attr_reader :position_from_to
|
14
|
+
|
15
|
+
def initialize(row)
|
16
|
+
@tag_origin = row[:tag_origin]
|
17
|
+
@tag_destination = row[:tag_destination]
|
18
|
+
@indicator = Util.indicator(row[:new_indecator])
|
19
|
+
@pre_existed_tag = row[:map_if_no_this_tag_existed]
|
20
|
+
@subfield_key = row[:subfield_key]
|
21
|
+
@position_from_to = extract_position(row[:value_from], row[:value_to])
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
# return an array with string positons for extracting value
|
27
|
+
def extract_position(f, t)
|
28
|
+
return nil unless f && t
|
29
|
+
|
30
|
+
[f.to_i, t.to_i]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
module BerkeleyLibrary
|
2
|
+
module TIND
|
3
|
+
module Mapping
|
4
|
+
# TODO: ADD CSV VALIDATION, WHEN NEED ADD A NEW MAPPING CSV FILE
|
5
|
+
# 1. not empty row
|
6
|
+
# 2. a subfield name can appear in either normal mapping or combine mapping, not both
|
7
|
+
# 3. single map has the same amount of from names and to names
|
8
|
+
# 4. Combine mappping should have tree columns, validate more?
|
9
|
+
# 5. Combine from_subfield, to_subfield should have values, no empty
|
10
|
+
# 6. Tag from row[:map_if_no_this_tag_subfield_existed]), row[:map_if_no_this_tag_existed] # This tag should be the same as destination tag
|
11
|
+
# 7. In single map csv, one row cannot have both "map_if_no_this_tag_existed" (245) and ":map_if_no_this_tag_subfield_existed"
|
12
|
+
# (245__b) because tag in these two column are identical
|
13
|
+
# 8. csv file validation - a row should have coulumns: tag origin and destintation ? single rule
|
14
|
+
# 9. Validating these column names
|
15
|
+
# 10. csv file validation - a row should have coulumns: tag origin and destintation ? single rule
|
16
|
+
# 11. validating headers
|
17
|
+
# 12. Formats for some of the columns
|
18
|
+
|
19
|
+
class SingleRule
|
20
|
+
include Util
|
21
|
+
attr_reader :tag_origin
|
22
|
+
attr_reader :tag_destination
|
23
|
+
attr_reader :indicator
|
24
|
+
attr_reader :pre_existed_tag
|
25
|
+
attr_reader :pre_existed_tag_subfield
|
26
|
+
attr_reader :single_rule_hash
|
27
|
+
attr_reader :single_rule_subfield_excluded_hash
|
28
|
+
attr_reader :combined_rules
|
29
|
+
|
30
|
+
def initialize(row)
|
31
|
+
@tag_origin = row[:tag_origin]
|
32
|
+
@tag_destination = row[:tag_destination]
|
33
|
+
@indicator = Util.indicator(row[:new_indecator])
|
34
|
+
@pre_existed_tag = row[:map_if_no_this_tag_existed]
|
35
|
+
@pre_existed_tag_subfield = existed_tag_subfield(row[:map_if_no_this_tag_subfield_existed]) # This tag should be the same as destination tag
|
36
|
+
@single_rule_hash = single_map_dic(row[:subfield_single_from], row[:subfield_single_to])
|
37
|
+
@single_rule_subfield_excluded_hash = single_map_subfield_excluded_dic
|
38
|
+
@combined_rules = rules_with_same_subfield_name(row)
|
39
|
+
end
|
40
|
+
|
41
|
+
# 1. Return an array of combined rules, an item in the array
|
42
|
+
# is an array of rules which have the same 'to subfield name'
|
43
|
+
# 2. An example: [[["a,b,c,d", "b", "--"],["o,p,q", "b", ""]],[["x,y,z", "a", "--"]]]
|
44
|
+
def rules_with_same_subfield_name(row)
|
45
|
+
rules = all_combined_rules(row)
|
46
|
+
identical_to_subfield_names = unique_tosubfield_names(rules)
|
47
|
+
identical_to_subfield_names.each_with_object([]) do |name, result|
|
48
|
+
result << rules_with_sametosubfield(name, rules)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
# return an array of tag and subfield name, example '255__a' => ['255','a']
|
55
|
+
def existed_tag_subfield(str)
|
56
|
+
str.nil? ? nil : str.split('__')
|
57
|
+
end
|
58
|
+
|
59
|
+
# list identical 'to subfield name's from combined mapping rules
|
60
|
+
# (an example rule ['a,b,c', 'b', ' -- '])
|
61
|
+
def unique_tosubfield_names(rules)
|
62
|
+
names = rules.map { |rule| rule[1] }
|
63
|
+
names.uniq
|
64
|
+
end
|
65
|
+
|
66
|
+
# numbers of combined rules
|
67
|
+
def combined_rule_counts(row)
|
68
|
+
headers = row.headers
|
69
|
+
headers.select { |h| h.to_s.include? 'subfield_combined_from_' }.count
|
70
|
+
end
|
71
|
+
|
72
|
+
# Three coulumns 'subfield_combined_from_*','subfield_combined_to_*','symbol_*'
|
73
|
+
# define a combined mapping rule
|
74
|
+
def combined_rule(row, i)
|
75
|
+
from_subfield = row["subfield_combined_from_#{i}".to_sym]
|
76
|
+
to_subfield = row["subfield_combined_to_#{i}".to_sym]
|
77
|
+
s = row["symbol_#{i}".to_sym]
|
78
|
+
from_subfield.nil? || to_subfield.nil? ? nil : [from_subfield, to_subfield, s] # add validation rule , such as not empty later
|
79
|
+
end
|
80
|
+
|
81
|
+
def all_combined_rules(row)
|
82
|
+
rules = []
|
83
|
+
n = combined_rule_counts(row)
|
84
|
+
(1..n).each do |i|
|
85
|
+
rule = combined_rule(row, i)
|
86
|
+
rules << rule if rule
|
87
|
+
end
|
88
|
+
rules
|
89
|
+
end
|
90
|
+
|
91
|
+
# list all combined rules with the same 'to subfield name'
|
92
|
+
def rules_with_sametosubfield(name, rules)
|
93
|
+
rules.each_with_object([]) do |rule, result|
|
94
|
+
result << rule if rule[1] == name
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# Define a hash for single map rules, key is 'from subfield name',
|
99
|
+
# value is 'to subfield name'
|
100
|
+
def single_map_dic(str_from, str_to)
|
101
|
+
dic = {}
|
102
|
+
if should_single_map?(str_from, str_to)
|
103
|
+
arr_from = str_from.strip.split(',')
|
104
|
+
arr_to = str_to.strip.split(',')
|
105
|
+
arr_from.each_with_index { |from_name, i| dic[from_name.to_s] = arr_to[i].to_s }
|
106
|
+
end
|
107
|
+
dic
|
108
|
+
end
|
109
|
+
|
110
|
+
# Hash removed the excluding subfield
|
111
|
+
def single_map_subfield_excluded_dic
|
112
|
+
dic = @single_rule_hash
|
113
|
+
return dic unless excluding_subfield?
|
114
|
+
|
115
|
+
excluding_subfield_name = @pre_existed_tag_subfield[1].to_s
|
116
|
+
dic.delete(excluding_subfield_name) if dic.key? excluding_subfield_name
|
117
|
+
dic
|
118
|
+
end
|
119
|
+
|
120
|
+
# Check excluding subfield
|
121
|
+
def excluding_subfield?
|
122
|
+
return false unless @pre_existed_tag_subfield
|
123
|
+
return false unless @pre_existed_tag_subfield[1]
|
124
|
+
|
125
|
+
true
|
126
|
+
end
|
127
|
+
|
128
|
+
# add this to validation
|
129
|
+
def should_single_map?(str_from, str_to)
|
130
|
+
return false unless str_from && str_to
|
131
|
+
|
132
|
+
arr_from = str_from.split(',')
|
133
|
+
arr_to = str_to.split(',')
|
134
|
+
return false unless arr_from.count == arr_to.count
|
135
|
+
|
136
|
+
true
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'marc'
|
2
|
+
|
3
|
+
module BerkeleyLibrary
|
4
|
+
module TIND
|
5
|
+
module Mapping
|
6
|
+
module TindControlSubfield
|
7
|
+
|
8
|
+
def extract_value(rule, value)
|
9
|
+
pos = rule.position_from_to
|
10
|
+
return nil unless pos
|
11
|
+
|
12
|
+
value[pos[0]..pos[1]]
|
13
|
+
end
|
14
|
+
|
15
|
+
# return a mapped datafield based on rule and extract value
|
16
|
+
def extracted_field(rule, sub_value)
|
17
|
+
subname = rule.subfield_key
|
18
|
+
destiantion_tag = rule.tag_destination
|
19
|
+
indicator = rule.indicator
|
20
|
+
return nil unless subname && destiantion_tag && indicator
|
21
|
+
|
22
|
+
new_sub_value = clean_subfield_value(destiantion_tag, sub_value)
|
23
|
+
return nil unless new_sub_value
|
24
|
+
|
25
|
+
new_sub_value = clean_subfield_value(destiantion_tag, sub_value)
|
26
|
+
subfields = [Util.subfield(subname, new_sub_value)]
|
27
|
+
Util.datafield(destiantion_tag, indicator, subfields)
|
28
|
+
end
|
29
|
+
|
30
|
+
# pass in rules, a string value; return datafields based on rules
|
31
|
+
def extracted_fields_from_leader(leader_rules, leader_value)
|
32
|
+
new_fls = []
|
33
|
+
leader_rules.each do |rule|
|
34
|
+
sub_value = extract_value(rule, leader_value)
|
35
|
+
next unless sub_value
|
36
|
+
|
37
|
+
newfield = extracted_field(rule, sub_value)
|
38
|
+
new_fls << newfield if newfield
|
39
|
+
end
|
40
|
+
new_fls
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def clean_subfield_value(tag, val)
|
46
|
+
return val if tag != '269'
|
47
|
+
|
48
|
+
new_val = val.downcase.sub(/u$/, '0')
|
49
|
+
qualified_269?(new_val) ? new_val : nil
|
50
|
+
end
|
51
|
+
|
52
|
+
def qualified_269?(val)
|
53
|
+
val =~ /^\d{4}$/
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'marc'
|
2
|
+
|
3
|
+
module BerkeleyLibrary
|
4
|
+
module TIND
|
5
|
+
module Mapping
|
6
|
+
module TindField
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def f_035_from_alma_id(alma_id, value_980)
|
10
|
+
val = "(#{value_980})#{alma_id}"
|
11
|
+
f('035', 'a', val)
|
12
|
+
end
|
13
|
+
|
14
|
+
def f_035(val)
|
15
|
+
f('035', 'a', val)
|
16
|
+
end
|
17
|
+
|
18
|
+
def f_245_p(val)
|
19
|
+
f('245', 'p', val)
|
20
|
+
end
|
21
|
+
|
22
|
+
def f_fft(url, txt = None)
|
23
|
+
return f('FFT', 'a', url) unless txt
|
24
|
+
|
25
|
+
::MARC::DataField.new('FFT', ' ', ' ', ['d', txt], ['a', url])
|
26
|
+
end
|
27
|
+
|
28
|
+
def f_902_d
|
29
|
+
f('902', 'd', Time.now.strftime('%F'))
|
30
|
+
end
|
31
|
+
|
32
|
+
def f_902_n(name_initial)
|
33
|
+
f('902', 'n', name_initial)
|
34
|
+
end
|
35
|
+
|
36
|
+
def f_982_p(val)
|
37
|
+
f('982', 'p', val)
|
38
|
+
end
|
39
|
+
|
40
|
+
def f(tag, code, value)
|
41
|
+
::MARC::DataField.new(tag, ' ', ' ', [code, value])
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'marc'
|
2
|
+
require 'berkeley_library/tind/mapping/tind_control_subfield'
|
3
|
+
|
4
|
+
module BerkeleyLibrary
|
5
|
+
module TIND
|
6
|
+
require 'marc'
|
7
|
+
module Mapping
|
8
|
+
|
9
|
+
class TindFieldFromLeader
|
10
|
+
include CsvMultipleMapper
|
11
|
+
include Util
|
12
|
+
include TindControlSubfield
|
13
|
+
|
14
|
+
def initialize(record)
|
15
|
+
@leader_value = record.leader
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_datafields
|
19
|
+
leader_rules = rules[Util.tag_symbol('LDR')]
|
20
|
+
return [] unless @leader_value && leader_rules
|
21
|
+
|
22
|
+
extracted_fields_from_leader(leader_rules, @leader_value)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|