berkeley_library-tind 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +15 -3
- data/.gitignore +3 -0
- data/.idea/inspectionProfiles/Project_Default.xml +10 -0
- data/.idea/tind.iml +4 -3
- data/CHANGES.md +6 -0
- data/README.md +121 -2
- data/berkeley_library-tind.gemspec +1 -0
- data/bin/alma-multiple-tind +50 -0
- data/bin/alma-single-tind +48 -0
- data/bin/save_tind_records +80 -0
- data/bin/tind-marc +73 -0
- data/lib/berkeley_library/tind/mapping/additional_datafield_process.rb +128 -0
- data/lib/berkeley_library/tind/mapping/alma.rb +42 -0
- data/lib/berkeley_library/tind/mapping/alma_base.rb +101 -0
- data/lib/berkeley_library/tind/mapping/alma_multiple_tind.rb +31 -0
- data/lib/berkeley_library/tind/mapping/alma_single_tind.rb +28 -0
- data/lib/berkeley_library/tind/mapping/config.rb +44 -0
- data/lib/berkeley_library/tind/mapping/csv_mapper.rb +35 -0
- data/lib/berkeley_library/tind/mapping/csv_multiple_mapper.rb +41 -0
- data/lib/berkeley_library/tind/mapping/data/one_to_multiple_mapping.csv +4 -0
- data/lib/berkeley_library/tind/mapping/data/one_to_one_mapping.csv +39 -0
- data/lib/berkeley_library/tind/mapping/external_tind_field.rb +103 -0
- data/lib/berkeley_library/tind/mapping/field_catalog.rb +146 -0
- data/lib/berkeley_library/tind/mapping/field_catalog_util.rb +59 -0
- data/lib/berkeley_library/tind/mapping/match_tind_field.rb +77 -0
- data/lib/berkeley_library/tind/mapping/misc.rb +69 -0
- data/lib/berkeley_library/tind/mapping/multiple_rule.rb +36 -0
- data/lib/berkeley_library/tind/mapping/single_rule.rb +143 -0
- data/lib/berkeley_library/tind/mapping/tind_control_subfield.rb +59 -0
- data/lib/berkeley_library/tind/mapping/tind_field.rb +49 -0
- data/lib/berkeley_library/tind/mapping/tind_field_from_leader.rb +27 -0
- data/lib/berkeley_library/tind/mapping/tind_field_from_multiple_map.rb +59 -0
- data/lib/berkeley_library/tind/mapping/tind_field_from_single_map.rb +170 -0
- data/lib/berkeley_library/tind/mapping/tind_field_util.rb +112 -0
- data/lib/berkeley_library/tind/mapping/tind_marc.rb +134 -0
- data/lib/berkeley_library/tind/mapping/tind_subfield_util.rb +154 -0
- data/lib/berkeley_library/tind/mapping/util.rb +117 -0
- data/lib/berkeley_library/tind/mapping.rb +1 -0
- data/lib/berkeley_library/tind/module_info.rb +1 -1
- data/lib/berkeley_library/util/files.rb +1 -2
- data/spec/berkeley_library/tind/mapping/additional_datafield_process_spec.rb +35 -0
- data/spec/berkeley_library/tind/mapping/alma_base_spec.rb +115 -0
- data/spec/berkeley_library/tind/mapping/alma_multiple_tind_spec.rb +20 -0
- data/spec/berkeley_library/tind/mapping/alma_single_tind_spec.rb +87 -0
- data/spec/berkeley_library/tind/mapping/alma_spec.rb +28 -0
- data/spec/berkeley_library/tind/mapping/config_spec.rb +19 -0
- data/spec/berkeley_library/tind/mapping/csv_mapper_spec.rb +27 -0
- data/spec/berkeley_library/tind/mapping/csv_multiple_mapper_spec.rb +27 -0
- data/spec/berkeley_library/tind/mapping/external_tind_field_spec.rb +45 -0
- data/spec/berkeley_library/tind/mapping/field_catalog_spec.rb +78 -0
- data/spec/berkeley_library/tind/mapping/field_catalog_util_spec.rb +57 -0
- data/spec/berkeley_library/tind/mapping/match_tind_field_spec.rb +25 -0
- data/spec/berkeley_library/tind/mapping/misc_spec.rb +51 -0
- data/spec/berkeley_library/tind/mapping/multiple_rule_spec.rb +44 -0
- data/spec/berkeley_library/tind/mapping/single_rule_spec.rb +52 -0
- data/spec/berkeley_library/tind/mapping/tind_control_subfield_spec.rb +96 -0
- data/spec/berkeley_library/tind/mapping/tind_field_from_leader_spec.rb +21 -0
- data/spec/berkeley_library/tind/mapping/tind_field_from_multiple_map_spec.rb +31 -0
- data/spec/berkeley_library/tind/mapping/tind_field_from_single_map_spec.rb +150 -0
- data/spec/berkeley_library/tind/mapping/tind_field_spec.rb +60 -0
- data/spec/berkeley_library/tind/mapping/tind_field_util_spec.rb +68 -0
- data/spec/berkeley_library/tind/mapping/tind_marc_spec.rb +88 -0
- data/spec/berkeley_library/tind/mapping/tind_subfield_util_spec.rb +48 -0
- data/spec/berkeley_library/tind/mapping/util_spec.rb +56 -0
- data/spec/berkeley_library/tind/marc/xml_writer_spec.rb +24 -0
- data/spec/data/mapping/991032333019706532-sru.xml +216 -0
- data/spec/data/mapping/one_to_multiple_mapping.csv +4 -0
- data/spec/data/mapping/one_to_one_mapping.csv +39 -0
- data/spec/data/mapping/record.xml +263 -0
- data/spec/data/mapping/record_not_qualified.xml +36 -0
- metadata +105 -2
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'marc'
|
2
|
+
|
3
|
+
module BerkeleyLibrary
|
4
|
+
module TIND
|
5
|
+
module Mapping
|
6
|
+
module TindFieldUtil
|
7
|
+
|
8
|
+
# tag - regular alma field
|
9
|
+
# referred tag - got tag from subfield6 value of a 880 field
|
10
|
+
# nil rule caused by nil referred tag - eg. 880 subfild6 has a value in wrong format
|
11
|
+
def rule(field)
|
12
|
+
tag = origin_mapping_tag(field)
|
13
|
+
return nil unless tag
|
14
|
+
|
15
|
+
rules[Util.tag_symbol(tag)]
|
16
|
+
end
|
17
|
+
|
18
|
+
def tindfield_existed?(field, fields)
|
19
|
+
return false unless field_has_rule?(field)
|
20
|
+
|
21
|
+
field_rule = rule(field)
|
22
|
+
mapping_to_tag = field_rule.pre_existed_tag
|
23
|
+
return false unless mapping_to_tag
|
24
|
+
|
25
|
+
map_to_tag_existed_in_fields?(field, fields, mapping_to_tag)
|
26
|
+
end
|
27
|
+
|
28
|
+
# To check TIND datafield and the specific subfield from rule existed
|
29
|
+
def tindfield_subfield_existed?(field, fields)
|
30
|
+
return false unless field_has_rule?(field)
|
31
|
+
|
32
|
+
field_rule = rule(field)
|
33
|
+
return false unless pre_existed_tag_subfield_in_rule?(field_rule)
|
34
|
+
|
35
|
+
tag_subfield = field_rule.pre_existed_tag_subfield
|
36
|
+
mapping_to_tag = tag_subfield[0]
|
37
|
+
return false unless map_to_tag_existed_in_fields?(field, fields, mapping_to_tag)
|
38
|
+
|
39
|
+
existed_datafield = field_pre_existed(mapping_to_tag, field, fields)
|
40
|
+
return false unless existed_datafield
|
41
|
+
|
42
|
+
subfield_name = tag_subfield[1]
|
43
|
+
existed_datafield[subfield_name] ? true : false
|
44
|
+
end
|
45
|
+
|
46
|
+
def field_880_on_subfield6_tag(tag, fields)
|
47
|
+
datafield_on_tag(tag, fields) { |f| referred_tag(f) == tag }
|
48
|
+
end
|
49
|
+
|
50
|
+
def field_on_tag(tag, fields)
|
51
|
+
datafield_on_tag(tag, fields) { |f| f.tag == tag }
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def field_has_rule?(field)
|
57
|
+
field_rule = rule(field)
|
58
|
+
return false unless field_rule
|
59
|
+
|
60
|
+
true
|
61
|
+
end
|
62
|
+
|
63
|
+
def pre_existed_tag_subfield_in_rule?(rule)
|
64
|
+
tag_subfield = rule.pre_existed_tag_subfield
|
65
|
+
return false unless tag_subfield
|
66
|
+
|
67
|
+
return false unless tag_subfield.length == 2
|
68
|
+
|
69
|
+
true
|
70
|
+
end
|
71
|
+
|
72
|
+
def map_to_tag_existed_in_fields?(field, fields, mapping_to_tag)
|
73
|
+
existed_tags = if is_880_field?(field)
|
74
|
+
tags_from_fields(fields) { |f| tag_from_880_subfield6(f) }
|
75
|
+
else
|
76
|
+
tags_from_fields(fields, &:tag)
|
77
|
+
end
|
78
|
+
|
79
|
+
existed_tags.include? mapping_to_tag
|
80
|
+
end
|
81
|
+
|
82
|
+
# field, fields be both regular fields
|
83
|
+
# or field, fields be both 880 fields
|
84
|
+
# since a field may mapped to another one in TIND, mapping_to_tag is not always the same as field.tag
|
85
|
+
def field_pre_existed(mapping_to_tag, field, fields)
|
86
|
+
if is_880_field?(field)
|
87
|
+
field_880_on_subfield6_tag(mapping_to_tag,
|
88
|
+
fields)
|
89
|
+
else
|
90
|
+
field_on_tag(mapping_to_tag, fields)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def datafield_on_tag(tag, fields)
|
95
|
+
fields.find do |f|
|
96
|
+
yield(f, tag)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def tags_from_fields(fields, &block)
|
101
|
+
fields.map(&block)
|
102
|
+
end
|
103
|
+
|
104
|
+
def tag_from_880_subfield6(field)
|
105
|
+
field['6'].split('-')[0]
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require 'marc'
|
2
|
+
|
3
|
+
module BerkeleyLibrary
|
4
|
+
module TIND
|
5
|
+
module Mapping
|
6
|
+
class TindMarc
|
7
|
+
include CsvMapper
|
8
|
+
include Util
|
9
|
+
include TindSubfieldUtil
|
10
|
+
include Misc
|
11
|
+
include TindFieldUtil
|
12
|
+
include AdditionalDatafieldProcess
|
13
|
+
include BerkeleyLibrary::Logging
|
14
|
+
include MatchTindField
|
15
|
+
|
16
|
+
attr_accessor :source_marc_record
|
17
|
+
attr_writer :tind_external_datafields
|
18
|
+
attr_reader :field_catalog
|
19
|
+
attr_reader :mms_id
|
20
|
+
|
21
|
+
# input an alma record
|
22
|
+
def initialize(record)
|
23
|
+
@source_marc_record = record
|
24
|
+
@field_catalog = DataFieldsCatalog.new(@source_marc_record)
|
25
|
+
@tind_external_datafields = []
|
26
|
+
end
|
27
|
+
|
28
|
+
# return mapped tind datafields
|
29
|
+
# keep the order of different mapping
|
30
|
+
def tindfields
|
31
|
+
fields = []
|
32
|
+
fields.concat tindfields_group
|
33
|
+
fields.concat tindfields_group_880
|
34
|
+
fields
|
35
|
+
end
|
36
|
+
|
37
|
+
# return a TIND Marc record
|
38
|
+
# add external datafields
|
39
|
+
# flag to do additional TIND datafield process before generating a TIND Marc record
|
40
|
+
def tind_record
|
41
|
+
fields = tindfields
|
42
|
+
fields.concat @tind_external_datafields
|
43
|
+
more_process(fields)
|
44
|
+
record = ::MARC::Record.new
|
45
|
+
fields.each { |f| record.append(f) }
|
46
|
+
record
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# return mapped tind datafields
|
52
|
+
# keep the order of different mapping
|
53
|
+
def tindfields_group
|
54
|
+
fields_from_normal = tindfields_from_normal(@field_catalog.data_fields_group[:normal])
|
55
|
+
fields_from_leader = TindFieldFromLeader.new(@source_marc_record).to_datafields
|
56
|
+
|
57
|
+
temp_fields = fields_from_normal.concat fields_from_leader
|
58
|
+
temp_fields.concat tindfields_from_control(temp_fields)
|
59
|
+
temp_fields.concat tindfields_with_pre_existed_field(@field_catalog.data_fields_group[:pre_tag], temp_fields)
|
60
|
+
temp_fields.concat tindfields_with_pre_existed_subfield(@field_catalog.data_fields_group[:pre_tag_subfield], temp_fields)
|
61
|
+
|
62
|
+
temp_fields
|
63
|
+
end
|
64
|
+
|
65
|
+
# return mapped tind 880 datafields
|
66
|
+
# keep the order of different mapping
|
67
|
+
def tindfields_group_880
|
68
|
+
fields = []
|
69
|
+
fields.concat tindfields_from_normal(@field_catalog.data_fields_880_group[:normal])
|
70
|
+
fields.concat tindfields_with_pre_existed_field(@field_catalog.data_fields_880_group[:pre_tag], fields)
|
71
|
+
fields.concat tindfields_with_pre_existed_subfield(@field_catalog.data_fields_880_group[:pre_tag_subfield], fields)
|
72
|
+
fields.concat @field_catalog.data_fields_880_00
|
73
|
+
fields
|
74
|
+
end
|
75
|
+
|
76
|
+
# # Return TIND datafields mapped in a normal way
|
77
|
+
# # Normal way mapping: one regular datafield is mapped one TIND datafield
|
78
|
+
def tindfields_from_normal(alma_fields)
|
79
|
+
new_fls = []
|
80
|
+
alma_fields.each do |f|
|
81
|
+
add_tindfield(new_fls, f, excluding_subfield: false)
|
82
|
+
end
|
83
|
+
new_fls
|
84
|
+
end
|
85
|
+
|
86
|
+
# Return TIND datafield mapped from a control datafield
|
87
|
+
# One control datafield could be mapped to multiple TIND datafields
|
88
|
+
def tindfields_from_control(currentfields)
|
89
|
+
new_fls = []
|
90
|
+
@field_catalog.control_fields.each do |f|
|
91
|
+
add_tindcontrolfield(new_fls, f, currentfields)
|
92
|
+
end
|
93
|
+
new_fls
|
94
|
+
end
|
95
|
+
|
96
|
+
# Return TIND datafields if no pre_existed field
|
97
|
+
def tindfields_with_pre_existed_field(alma_fields, currentfields)
|
98
|
+
new_fls = []
|
99
|
+
alma_fields.each do |f|
|
100
|
+
add_tindfield(new_fls, f, excluding_subfield: false) unless tindfield_existed?(f, currentfields)
|
101
|
+
end
|
102
|
+
new_fls
|
103
|
+
end
|
104
|
+
|
105
|
+
# Return TIND datafields considering excluding pre_existing subfields
|
106
|
+
def tindfields_with_pre_existed_subfield(alma_fields, currentfields)
|
107
|
+
new_fls = []
|
108
|
+
alma_fields.each do |f|
|
109
|
+
excluding_subfield = tindfield_subfield_existed?(f, currentfields)
|
110
|
+
add_tindfield(new_fls, f, excluding_subfield: excluding_subfield)
|
111
|
+
end
|
112
|
+
new_fls
|
113
|
+
end
|
114
|
+
|
115
|
+
def add_tindfield(fls, f, excluding_subfield: false)
|
116
|
+
tindfield = TindFieldFromSingleMap.new(f, excluding_subfield).to_datafield
|
117
|
+
fls << tindfield if tindfield
|
118
|
+
end
|
119
|
+
|
120
|
+
def add_tindcontrolfield(fls, f, currentfields)
|
121
|
+
fls.concat TindFieldFromMultipleMap.new(f, currentfields).to_datafields
|
122
|
+
end
|
123
|
+
|
124
|
+
# Additional processes - run in a sequence
|
125
|
+
def more_process(fields)
|
126
|
+
remove_repeats(fields)
|
127
|
+
clean_subfields(fields)
|
128
|
+
un_matched_fields_880(fields, @field_catalog.mms_id)
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'marc'
|
2
|
+
|
3
|
+
module BerkeleyLibrary
|
4
|
+
module TIND
|
5
|
+
module Mapping
|
6
|
+
module TindSubfieldUtil
|
7
|
+
|
8
|
+
def clean_subfield(subfield)
|
9
|
+
new_value = clr_value(subfield.value)
|
10
|
+
subfield.value = new_value
|
11
|
+
end
|
12
|
+
|
13
|
+
def fields_880_subfield6(datafields_880)
|
14
|
+
formated_subfield6_value_arr(fields_by(datafields_880) { |f| is_880_field?(f) })
|
15
|
+
end
|
16
|
+
|
17
|
+
# From all subfield 6 gotten for repeated fields - with the same tag
|
18
|
+
# return the first subfield 6
|
19
|
+
def the_first_subfield6(fields)
|
20
|
+
values = subfield_6_values(fields)
|
21
|
+
return nil if values.empty?
|
22
|
+
|
23
|
+
# keep it here, in case needed in future: this can make sure
|
24
|
+
# 880 and regular fields having matching sequence number
|
25
|
+
# subfield6_with_small_no(values)
|
26
|
+
|
27
|
+
# new implementation: keep the first subfield 6 value
|
28
|
+
logger.warn("#{f[0].tag} have multiple datafields with multiple subfield 6, the first subfield 6 is kept") if values.length > 1
|
29
|
+
Util.subfield('6', values[0])
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def subfield6?(f)
|
35
|
+
!f['6'].nil?
|
36
|
+
end
|
37
|
+
|
38
|
+
# f.tag can be a string or integer
|
39
|
+
def is_880_field?(f)
|
40
|
+
f.tag.to_s == '880'
|
41
|
+
end
|
42
|
+
|
43
|
+
# return subfield6 value formated the same way for both 880 and regular datafields
|
44
|
+
def formated_subfield6_value(f)
|
45
|
+
is_880_field?(f) ? formated_subfield6_from_880(f) : formated_subfield6_from_regular_field(f)
|
46
|
+
end
|
47
|
+
|
48
|
+
def fields_by(fields, &block)
|
49
|
+
fields.select(&block)
|
50
|
+
end
|
51
|
+
|
52
|
+
# return array of formated subfield6 values on inputted datafields
|
53
|
+
def formated_subfield6_value_arr(fields_source)
|
54
|
+
# fields_source = is_880_field ? fields_880(datafields) : fields_regular(datafields)
|
55
|
+
ls = []
|
56
|
+
fields_source.map do |f|
|
57
|
+
next unless subfield6?(f)
|
58
|
+
|
59
|
+
ls << formated_subfield6_value(f)
|
60
|
+
end
|
61
|
+
ls.compact
|
62
|
+
end
|
63
|
+
|
64
|
+
# return formated field6 for a regular datafield
|
65
|
+
# tag:246, subfields: 880-02 => 880-246-02
|
66
|
+
def formated_subfield6_from_regular_field(f)
|
67
|
+
return nil unless f['6']
|
68
|
+
|
69
|
+
tag = f.tag
|
70
|
+
arr = f['6'].strip.split('-')
|
71
|
+
"#{arr[0]}-#{tag}-#{arr[1]}"
|
72
|
+
end
|
73
|
+
|
74
|
+
# return formated subfield6 for a 880 datafield
|
75
|
+
# tag: 880, subfield6: 245-01/$1 => 880-245-01
|
76
|
+
def formated_subfield6_from_880(f)
|
77
|
+
return nil unless f['6']
|
78
|
+
|
79
|
+
"#{f.tag}-#{f['6'].strip[0, 6].strip}"
|
80
|
+
end
|
81
|
+
|
82
|
+
# return subfields of specific code
|
83
|
+
# Datafield returned from Alma may have mutiple subfields with the same code
|
84
|
+
def subfields(field, code)
|
85
|
+
sf = []
|
86
|
+
field.each do |s|
|
87
|
+
next unless s.code == code
|
88
|
+
|
89
|
+
captilize_subfield(s) if code == '3'
|
90
|
+
sf << s
|
91
|
+
end
|
92
|
+
sf
|
93
|
+
end
|
94
|
+
|
95
|
+
def captilize_subfield(subfield)
|
96
|
+
new_value = subfield.value.capitalize
|
97
|
+
subfield.value = new_value
|
98
|
+
end
|
99
|
+
|
100
|
+
# Combine multiple subfield values based on definition from csv file
|
101
|
+
def combined_subfield_value(field, code, symbol)
|
102
|
+
sf_arr = subfields(field, code)
|
103
|
+
return '' if sf_arr.empty?
|
104
|
+
|
105
|
+
sf_arr.map(&:value).join(symbol) << symbol # symbol exmaple ' -- '
|
106
|
+
end
|
107
|
+
|
108
|
+
# Mutilpe subfields in one field may have the same name
|
109
|
+
# Get all subfields on 'from subfield name'
|
110
|
+
# Change subfield names with 'to subfield name'
|
111
|
+
def subfields_from_to(field, from, to)
|
112
|
+
subfield_arr = subfields(field, from)
|
113
|
+
subfield_arr.each { |sf| sf.code = to } if !subfield_arr.empty? && (from != to)
|
114
|
+
subfield_arr
|
115
|
+
end
|
116
|
+
|
117
|
+
def subfield6_endwith_00?(f)
|
118
|
+
return false unless is_880_field?(f)
|
119
|
+
|
120
|
+
two_digits = f['6'].strip.split('-')[1][0..1]
|
121
|
+
two_digits.to_s == '00'
|
122
|
+
end
|
123
|
+
|
124
|
+
def fields_with_subfield6(fields)
|
125
|
+
fields.select { |f| subfield6?(f) }
|
126
|
+
end
|
127
|
+
|
128
|
+
# return subfield 6 with the smallest sequence number
|
129
|
+
def subfield_6_values(fields)
|
130
|
+
fields_with_subfield6(fields).map(&:value)
|
131
|
+
end
|
132
|
+
|
133
|
+
def subfield6_value_with_lowest_seq_no(values)
|
134
|
+
seq = 9999
|
135
|
+
txt = nil
|
136
|
+
values.each do |val|
|
137
|
+
num = seq_no(val)
|
138
|
+
if (num > 0) && (num < seq)
|
139
|
+
seq = num
|
140
|
+
txt = val
|
141
|
+
end
|
142
|
+
end
|
143
|
+
txt
|
144
|
+
end
|
145
|
+
|
146
|
+
# return all subfields except subfield6
|
147
|
+
def subfields_without_subfield6(field)
|
148
|
+
field.subfields.reject { |sf| sf.code == '6' }
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'marc'
|
3
|
+
|
4
|
+
module BerkeleyLibrary
|
5
|
+
module TIND
|
6
|
+
module Mapping
|
7
|
+
module Util
|
8
|
+
|
9
|
+
class << self
|
10
|
+
|
11
|
+
def csv_rows(file)
|
12
|
+
rows = []
|
13
|
+
CSV.foreach(file, headers: true, header_converters: :symbol, encoding: 'bom|utf-8') do |row|
|
14
|
+
# puts row.headers
|
15
|
+
rows << clr_row(row)
|
16
|
+
end
|
17
|
+
rows
|
18
|
+
end
|
19
|
+
|
20
|
+
def concatenation_symbol(str)
|
21
|
+
str ? str.gsub('a', ' ') : ' '
|
22
|
+
end
|
23
|
+
|
24
|
+
def indicator(str)
|
25
|
+
return [] unless str
|
26
|
+
|
27
|
+
str.strip.gsub('_', ' ').split(',')
|
28
|
+
end
|
29
|
+
|
30
|
+
def tag_symbol(tag)
|
31
|
+
"tag_#{tag}".to_sym
|
32
|
+
end
|
33
|
+
|
34
|
+
def datafield(tag, indicator, subfields)
|
35
|
+
datafield = ::MARC::DataField.new(tag, indicator[0], indicator[1])
|
36
|
+
subfields.each { |sf| datafield.append(sf) }
|
37
|
+
datafield
|
38
|
+
end
|
39
|
+
|
40
|
+
def subfield(code, value)
|
41
|
+
::MARC::Subfield.new(code, value)
|
42
|
+
end
|
43
|
+
|
44
|
+
def subfield_hash(field)
|
45
|
+
code_value_arr = field.to_hash[field.tag]['subfields']
|
46
|
+
{}.tap { |i| code_value_arr.each(&i.method(:update)) }
|
47
|
+
end
|
48
|
+
|
49
|
+
# input an array of rules, example: [["a,b,c,d", "b", "--"],["o,p,q", "b", ""]]
|
50
|
+
def symbols(rules)
|
51
|
+
rules.map { |rule| concatenation_symbol(rule[2]).strip }
|
52
|
+
end
|
53
|
+
|
54
|
+
def remove_extra_symbol(rules, val)
|
55
|
+
symbols = symbols(rules)
|
56
|
+
symbols.each { |s| val = val.strip.delete_suffix(s) }
|
57
|
+
val
|
58
|
+
end
|
59
|
+
|
60
|
+
def alma_datafield(tag, record)
|
61
|
+
record.fields.each { |f| return f if f.tag.to_s == tag }
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
|
65
|
+
def qualified_alma_record?(alma_record)
|
66
|
+
f_245 = alma_datafield('245', alma_record)
|
67
|
+
f_245_a = f_245['a'].downcase
|
68
|
+
|
69
|
+
val = 'Host bibliographic record'.downcase
|
70
|
+
!f_245_a.start_with? val
|
71
|
+
end
|
72
|
+
|
73
|
+
# From DM - get testing MARC from xml file
|
74
|
+
# @param xml [String] the XML to parse
|
75
|
+
# @return [MARC::Record, nil] the MARC record from the specified XML
|
76
|
+
def from_xml(xml)
|
77
|
+
# noinspection RubyYardReturnMatch,RubyMismatchedReturnType
|
78
|
+
all_from_xml(xml).first
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def clr_row(row)
|
84
|
+
row[:tag_origin] = format_tag(row[:tag_origin])
|
85
|
+
row[:tag_destination] = format_tag(row[:tag_destination])
|
86
|
+
row[:map_if_no_this_tag_existed] = format_tag(row[:map_if_no_this_tag_existed])
|
87
|
+
row
|
88
|
+
end
|
89
|
+
|
90
|
+
# To ensure tag from csv file in three digits
|
91
|
+
def format_tag(tag)
|
92
|
+
# LDR is not a numeric tag
|
93
|
+
return tag unless numeric? tag
|
94
|
+
|
95
|
+
format('%03d', tag)
|
96
|
+
end
|
97
|
+
|
98
|
+
def numeric?(str)
|
99
|
+
return false if str.nil?
|
100
|
+
|
101
|
+
str.scan(/\D/).empty?
|
102
|
+
end
|
103
|
+
|
104
|
+
# Parses MARCXML.
|
105
|
+
#
|
106
|
+
# @param xml [String] the XML to parse
|
107
|
+
# @return [MARC::XMLReader] the MARC records
|
108
|
+
def all_from_xml(xml)
|
109
|
+
input = StringIO.new(xml.scrub)
|
110
|
+
MARC::XMLReader.new(input)
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Dir.glob(File.expand_path('mapping/*.rb', __dir__)).sort.each(&method(:require))
|
@@ -7,7 +7,7 @@ module BerkeleyLibrary
|
|
7
7
|
SUMMARY = 'TIND DA utilities for the UC Berkeley Library'.freeze
|
8
8
|
DESCRIPTION = 'UC Berkeley Library utility gem for working with the TIND DA digital archive.'.freeze
|
9
9
|
LICENSE = 'MIT'.freeze
|
10
|
-
VERSION = '0.
|
10
|
+
VERSION = '0.6.0'.freeze
|
11
11
|
HOMEPAGE = 'https://github.com/BerkeleyLibrary/tind'.freeze
|
12
12
|
end
|
13
13
|
end
|
@@ -30,8 +30,7 @@ module BerkeleyLibrary
|
|
30
30
|
#
|
31
31
|
# @param obj [Object] the object that might be an IO
|
32
32
|
def writer_like?(obj)
|
33
|
-
|
34
|
-
obj.is_a?(IO) || obj.is_a?(StringIO)
|
33
|
+
obj.respond_to?(:write) && obj.respond_to?(:close)
|
35
34
|
end
|
36
35
|
|
37
36
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'marc'
|
3
|
+
|
4
|
+
module BerkeleyLibrary
|
5
|
+
module TIND
|
6
|
+
module Mapping
|
7
|
+
describe AdditionalDatafieldProcess do
|
8
|
+
|
9
|
+
let(:qualified_alma_obj) { Alma.new('spec/data/mapping/record.xml') }
|
10
|
+
let(:qualified_alm_record) { qualified_alma_obj.record }
|
11
|
+
|
12
|
+
let(:tind_marc) { TindMarc.new(qualified_alm_record) }
|
13
|
+
let(:tindfields) { tind_marc.tindfields }
|
14
|
+
let(:additona_245_field) { [Util.datafield('245', [' ', ' '], [Util.subfield('a', 'fake 245 a')])] }
|
15
|
+
let(:with_repeated_fields) { tindfields.concat additona_245_field }
|
16
|
+
|
17
|
+
context '# Process 1: Remove duplicated fields' do
|
18
|
+
# two 245 fields inputed, one is removed
|
19
|
+
it 'Input two 245 fields, return only one 245 field' do
|
20
|
+
expect(tind_marc.remove_repeats(with_repeated_fields).map(&:tag).count('245')).to eq 1
|
21
|
+
end
|
22
|
+
|
23
|
+
# one subfield 'a' is kept, subfield 'a' with a value of 'fake 245 a' is ignored
|
24
|
+
it 'Input two subfield $a, keeping the first one' do
|
25
|
+
without_repeated_fields = tind_marc.remove_repeats(with_repeated_fields)
|
26
|
+
single_field_245 = tind_marc.field_on_tag('245', without_repeated_fields)
|
27
|
+
expect(single_field_245['a']).to eq 'Cang jie pian'
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module BerkeleyLibrary
|
5
|
+
module TIND
|
6
|
+
module Mapping
|
7
|
+
describe AlmaBase do
|
8
|
+
extend BerkeleyLibrary::Logging
|
9
|
+
let(:dummy_obj) { Class.new { extend AlmaBase } }
|
10
|
+
let(:alma_record) { ::MARC::Record.new }
|
11
|
+
let(:save_to_file) { Tempfile.new('xml') }
|
12
|
+
let(:record_id) { Class.new { extend BerkeleyLibrary::Alma::RecordId } }
|
13
|
+
let(:hash) { { '980' => ['pre_1912'] } }
|
14
|
+
|
15
|
+
let(:qualified_alma_obj) { Alma.new('spec/data/mapping/record.xml') }
|
16
|
+
let(:qualified_alm_record) { qualified_alma_obj.record }
|
17
|
+
|
18
|
+
let(:un_qualified_alma_obj) { Alma.new('spec/data/mapping/record_not_qualified.xml') }
|
19
|
+
let(:un_qualified_alm_record) { un_qualified_alma_obj.record }
|
20
|
+
|
21
|
+
describe '# base_tind_record' do
|
22
|
+
it 'input a qualified Alma record - return a tind record' do
|
23
|
+
allow(dummy_obj).to receive(:tind_record).with('C084093187', qualified_alm_record, []).and_return(::MARC::Record.new)
|
24
|
+
expect(dummy_obj.base_tind_record('C084093187', [], qualified_alm_record)).to be_a ::MARC::Record
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'input an unqualified Alma record - return nil' do
|
28
|
+
expect { dummy_obj.base_tind_record('C084093187', [], un_qualified_alm_record) }.to raise_error(ArgumentError)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'no input Alma record with a nil (record) from id - return nil' do
|
32
|
+
allow(dummy_obj).to receive(:alma_record_from).with('C084093187').and_return(nil)
|
33
|
+
expect { dummy_obj.base_tind_record('C084093187', []) }.to raise_error(ArgumentError)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe '# base_save' do
|
38
|
+
it 'save tind record' do
|
39
|
+
dummy_obj.base_save('C084093187', qualified_alm_record, save_to_file)
|
40
|
+
expect(File.open(save_to_file.path).readlines[0]).to eq "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
describe '# alma_record' do
|
45
|
+
it 'get Alma record' do
|
46
|
+
allow(record_id).to receive(:get_marc_record).and_return(::MARC::Record.new)
|
47
|
+
allow(dummy_obj).to receive(:get_record_id).with('C084093187').and_return(record_id)
|
48
|
+
expect(dummy_obj.send(:alma_record_from, 'C084093187')).to be_instance_of ::MARC::Record
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe '# get_record_id' do
|
53
|
+
it 'return BerkeleyLibrary::Alma::BarCode' do
|
54
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_barcode = true
|
55
|
+
expect(dummy_obj.send(:get_record_id, 'C084093187')).to be_instance_of BerkeleyLibrary::Alma::BarCode
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'return BerkeleyLibrary::Alma::MMSID' do
|
59
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_barcode = false
|
60
|
+
expect(dummy_obj.send(:get_record_id, '991085821143406532')).to be_instance_of BerkeleyLibrary::Alma::MMSID
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'return BerkeleyLibrary::Alma::BibNumber' do
|
65
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_barcode = false
|
66
|
+
expect(dummy_obj.send(:get_record_id, 'b11082434')).to be_instance_of BerkeleyLibrary::Alma::BibNumber
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe '# add_f_035' do
|
72
|
+
it 'Add 035 from mms_id' do
|
73
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_035_from_mms_id = true
|
74
|
+
expect(dummy_obj.send(:add_f_035, '991085821143406532', hash).tag).to eq '035'
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'Not to add 035 from mms_id' do
|
78
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_035_from_mms_id = false
|
79
|
+
expect(dummy_obj.send(:add_f_035, '991085821143406532', hash)).to eq nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe '# tind_record, # derived_tind_fields' do
|
84
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.collection_parameter_hash = {
|
85
|
+
'336' => ['Image'],
|
86
|
+
'852' => ['East Asian Library'],
|
87
|
+
'980' => ['pre_1912'],
|
88
|
+
'982' => ['Pre 1912 Chinese Materials - short name', 'Pre 1912 Chinese Materials - long name'],
|
89
|
+
'991' => []
|
90
|
+
}
|
91
|
+
let(:id) { '991085821143406532' }
|
92
|
+
let(:tags) { %w[902 336 852 980 982 901 856] }
|
93
|
+
let(:tags_with_035) { %w[902 336 852 980 982 901 856 035] }
|
94
|
+
|
95
|
+
it ' get tind_record' do
|
96
|
+
datafields = []
|
97
|
+
marc_record = qualified_alm_record
|
98
|
+
expect(dummy_obj.send(:tind_record, id, marc_record, datafields)).to be_instance_of ::MARC::Record
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'get derived fields without 035' do
|
102
|
+
expect(dummy_obj.send(:derived_tind_fields, id).map(&:tag)).to eq tags
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'get derived fields with 035' do
|
106
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_035_from_mms_id = true
|
107
|
+
expect(dummy_obj.send(:derived_tind_fields, id).map(&:tag)).to eq tags_with_035
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|