berkeley_library-tind 0.4.3 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +16 -4
- data/.gitignore +3 -0
- data/.idea/inspectionProfiles/Project_Default.xml +10 -0
- data/.idea/tind.iml +24 -22
- data/CHANGES.md +24 -0
- data/README.md +136 -3
- data/berkeley_library-tind.gemspec +2 -0
- data/bin/alma-multiple-tind +50 -0
- data/bin/alma-single-tind +48 -0
- data/bin/save_tind_records +80 -0
- data/bin/tind-marc +73 -0
- data/lib/berkeley_library/tind/mapping/additional_datafield_process.rb +128 -0
- data/lib/berkeley_library/tind/mapping/alma.rb +42 -0
- data/lib/berkeley_library/tind/mapping/alma_base.rb +101 -0
- data/lib/berkeley_library/tind/mapping/alma_multiple_tind.rb +31 -0
- data/lib/berkeley_library/tind/mapping/alma_single_tind.rb +28 -0
- data/lib/berkeley_library/tind/mapping/config.rb +44 -0
- data/lib/berkeley_library/tind/mapping/csv_mapper.rb +35 -0
- data/lib/berkeley_library/tind/mapping/csv_multiple_mapper.rb +41 -0
- data/lib/berkeley_library/tind/mapping/data/one_to_multiple_mapping.csv +4 -0
- data/lib/berkeley_library/tind/mapping/data/one_to_one_mapping.csv +39 -0
- data/lib/berkeley_library/tind/mapping/external_tind_field.rb +103 -0
- data/lib/berkeley_library/tind/mapping/field_catalog.rb +146 -0
- data/lib/berkeley_library/tind/mapping/field_catalog_util.rb +59 -0
- data/lib/berkeley_library/tind/mapping/match_tind_field.rb +77 -0
- data/lib/berkeley_library/tind/mapping/misc.rb +69 -0
- data/lib/berkeley_library/tind/mapping/multiple_rule.rb +36 -0
- data/lib/berkeley_library/tind/mapping/single_rule.rb +143 -0
- data/lib/berkeley_library/tind/mapping/tind_control_subfield.rb +59 -0
- data/lib/berkeley_library/tind/mapping/tind_field.rb +49 -0
- data/lib/berkeley_library/tind/mapping/tind_field_from_leader.rb +27 -0
- data/lib/berkeley_library/tind/mapping/tind_field_from_multiple_map.rb +59 -0
- data/lib/berkeley_library/tind/mapping/tind_field_from_single_map.rb +170 -0
- data/lib/berkeley_library/tind/mapping/tind_field_util.rb +112 -0
- data/lib/berkeley_library/tind/mapping/tind_marc.rb +134 -0
- data/lib/berkeley_library/tind/mapping/tind_subfield_util.rb +154 -0
- data/lib/berkeley_library/tind/mapping/util.rb +117 -0
- data/lib/berkeley_library/tind/mapping.rb +1 -0
- data/lib/berkeley_library/tind/marc/xml_builder.rb +70 -0
- data/lib/berkeley_library/tind/marc/xml_reader.rb +22 -16
- data/lib/berkeley_library/tind/marc/xml_writer.rb +152 -0
- data/lib/berkeley_library/tind/module_info.rb +1 -1
- data/lib/berkeley_library/util/files.rb +38 -0
- data/spec/berkeley_library/tind/mapping/additional_datafield_process_spec.rb +35 -0
- data/spec/berkeley_library/tind/mapping/alma_base_spec.rb +115 -0
- data/spec/berkeley_library/tind/mapping/alma_multiple_tind_spec.rb +20 -0
- data/spec/berkeley_library/tind/mapping/alma_single_tind_spec.rb +87 -0
- data/spec/berkeley_library/tind/mapping/alma_spec.rb +28 -0
- data/spec/berkeley_library/tind/mapping/config_spec.rb +19 -0
- data/spec/berkeley_library/tind/mapping/csv_mapper_spec.rb +27 -0
- data/spec/berkeley_library/tind/mapping/csv_multiple_mapper_spec.rb +27 -0
- data/spec/berkeley_library/tind/mapping/external_tind_field_spec.rb +45 -0
- data/spec/berkeley_library/tind/mapping/field_catalog_spec.rb +78 -0
- data/spec/berkeley_library/tind/mapping/field_catalog_util_spec.rb +57 -0
- data/spec/berkeley_library/tind/mapping/match_tind_field_spec.rb +25 -0
- data/spec/berkeley_library/tind/mapping/misc_spec.rb +51 -0
- data/spec/berkeley_library/tind/mapping/multiple_rule_spec.rb +44 -0
- data/spec/berkeley_library/tind/mapping/single_rule_spec.rb +52 -0
- data/spec/berkeley_library/tind/mapping/tind_control_subfield_spec.rb +96 -0
- data/spec/berkeley_library/tind/mapping/tind_field_from_leader_spec.rb +21 -0
- data/spec/berkeley_library/tind/mapping/tind_field_from_multiple_map_spec.rb +31 -0
- data/spec/berkeley_library/tind/mapping/tind_field_from_single_map_spec.rb +150 -0
- data/spec/berkeley_library/tind/mapping/tind_field_spec.rb +60 -0
- data/spec/berkeley_library/tind/mapping/tind_field_util_spec.rb +68 -0
- data/spec/berkeley_library/tind/mapping/tind_marc_spec.rb +88 -0
- data/spec/berkeley_library/tind/mapping/tind_subfield_util_spec.rb +48 -0
- data/spec/berkeley_library/tind/mapping/util_spec.rb +56 -0
- data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +22 -0
- data/spec/berkeley_library/tind/marc/xml_writer_spec.rb +218 -0
- data/spec/data/issue-4.xml +157 -0
- data/spec/data/mapping/991032333019706532-sru.xml +216 -0
- data/spec/data/mapping/one_to_multiple_mapping.csv +4 -0
- data/spec/data/mapping/one_to_one_mapping.csv +39 -0
- data/spec/data/mapping/record.xml +263 -0
- data/spec/data/mapping/record_not_qualified.xml +36 -0
- data/spec/data/new-records.xml +46 -0
- metadata +128 -2
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'marc'
|
3
|
+
|
4
|
+
module BerkeleyLibrary
|
5
|
+
module TIND
|
6
|
+
module Mapping
|
7
|
+
module Util
|
8
|
+
|
9
|
+
class << self
|
10
|
+
|
11
|
+
def csv_rows(file)
|
12
|
+
rows = []
|
13
|
+
CSV.foreach(file, headers: true, header_converters: :symbol, encoding: 'bom|utf-8') do |row|
|
14
|
+
# puts row.headers
|
15
|
+
rows << clr_row(row)
|
16
|
+
end
|
17
|
+
rows
|
18
|
+
end
|
19
|
+
|
20
|
+
def concatenation_symbol(str)
|
21
|
+
str ? str.gsub('a', ' ') : ' '
|
22
|
+
end
|
23
|
+
|
24
|
+
def indicator(str)
|
25
|
+
return [] unless str
|
26
|
+
|
27
|
+
str.strip.gsub('_', ' ').split(',')
|
28
|
+
end
|
29
|
+
|
30
|
+
def tag_symbol(tag)
|
31
|
+
"tag_#{tag}".to_sym
|
32
|
+
end
|
33
|
+
|
34
|
+
def datafield(tag, indicator, subfields)
|
35
|
+
datafield = ::MARC::DataField.new(tag, indicator[0], indicator[1])
|
36
|
+
subfields.each { |sf| datafield.append(sf) }
|
37
|
+
datafield
|
38
|
+
end
|
39
|
+
|
40
|
+
def subfield(code, value)
|
41
|
+
::MARC::Subfield.new(code, value)
|
42
|
+
end
|
43
|
+
|
44
|
+
def subfield_hash(field)
|
45
|
+
code_value_arr = field.to_hash[field.tag]['subfields']
|
46
|
+
{}.tap { |i| code_value_arr.each(&i.method(:update)) }
|
47
|
+
end
|
48
|
+
|
49
|
+
# input an array of rules, example: [["a,b,c,d", "b", "--"],["o,p,q", "b", ""]]
|
50
|
+
def symbols(rules)
|
51
|
+
rules.map { |rule| concatenation_symbol(rule[2]).strip }
|
52
|
+
end
|
53
|
+
|
54
|
+
def remove_extra_symbol(rules, val)
|
55
|
+
symbols = symbols(rules)
|
56
|
+
symbols.each { |s| val = val.strip.delete_suffix(s) }
|
57
|
+
val
|
58
|
+
end
|
59
|
+
|
60
|
+
def alma_datafield(tag, record)
|
61
|
+
record.fields.each { |f| return f if f.tag.to_s == tag }
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
|
65
|
+
def qualified_alma_record?(alma_record)
|
66
|
+
f_245 = alma_datafield('245', alma_record)
|
67
|
+
f_245_a = f_245['a'].downcase
|
68
|
+
|
69
|
+
val = 'Host bibliographic record'.downcase
|
70
|
+
!f_245_a.start_with? val
|
71
|
+
end
|
72
|
+
|
73
|
+
# From DM - get testing MARC from xml file
|
74
|
+
# @param xml [String] the XML to parse
|
75
|
+
# @return [MARC::Record, nil] the MARC record from the specified XML
|
76
|
+
def from_xml(xml)
|
77
|
+
# noinspection RubyYardReturnMatch,RubyMismatchedReturnType
|
78
|
+
all_from_xml(xml).first
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def clr_row(row)
|
84
|
+
row[:tag_origin] = format_tag(row[:tag_origin])
|
85
|
+
row[:tag_destination] = format_tag(row[:tag_destination])
|
86
|
+
row[:map_if_no_this_tag_existed] = format_tag(row[:map_if_no_this_tag_existed])
|
87
|
+
row
|
88
|
+
end
|
89
|
+
|
90
|
+
# To ensure tag from csv file in three digits
|
91
|
+
def format_tag(tag)
|
92
|
+
# LDR is not a numeric tag
|
93
|
+
return tag unless numeric? tag
|
94
|
+
|
95
|
+
format('%03d', tag)
|
96
|
+
end
|
97
|
+
|
98
|
+
def numeric?(str)
|
99
|
+
return false if str.nil?
|
100
|
+
|
101
|
+
str.scan(/\D/).empty?
|
102
|
+
end
|
103
|
+
|
104
|
+
# Parses MARCXML.
|
105
|
+
#
|
106
|
+
# @param xml [String] the XML to parse
|
107
|
+
# @return [MARC::XMLReader] the MARC records
|
108
|
+
def all_from_xml(xml)
|
109
|
+
input = StringIO.new(xml.scrub)
|
110
|
+
MARC::XMLReader.new(input)
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Dir.glob(File.expand_path('mapping/*.rb', __dir__)).sort.each(&method(:require))
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module BerkeleyLibrary
|
4
|
+
module TIND
|
5
|
+
module MARC
|
6
|
+
class XMLBuilder
|
7
|
+
attr_reader :marc_record
|
8
|
+
|
9
|
+
def initialize(marc_record)
|
10
|
+
@marc_record = marc_record
|
11
|
+
end
|
12
|
+
|
13
|
+
def build
|
14
|
+
builder.doc.root.tap(&:unlink)
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def builder
|
20
|
+
Nokogiri::XML::Builder.new do |xml|
|
21
|
+
xml.record do
|
22
|
+
add_leader(xml)
|
23
|
+
marc_record.each { |f| add_field(xml, f) }
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def add_leader(xml)
|
29
|
+
leader = marc_record.leader
|
30
|
+
return if leader.nil? || leader == ''
|
31
|
+
|
32
|
+
# TIND uses <controlfield tag="000"/> instead of <leader/>
|
33
|
+
leader_as_cf = ::MARC::ControlField.new('000', clean_leader(leader))
|
34
|
+
add_control_field(xml, leader_as_cf)
|
35
|
+
end
|
36
|
+
|
37
|
+
def add_data_field(xml, df)
|
38
|
+
xml.datafield(tag: df.tag, ind1: df.indicator1, ind2: df.indicator2) do
|
39
|
+
df.subfields.each do |sf|
|
40
|
+
xml.subfield(sf.value, code: sf.code)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def add_control_field(xml, cf)
|
46
|
+
# TIND uses \ (0x5c), not space (0x32), for unspecified values in positional fields
|
47
|
+
value = cf.value&.gsub(' ', '\\')
|
48
|
+
xml.controlfield(value, tag: cf.tag)
|
49
|
+
end
|
50
|
+
|
51
|
+
def add_field(xml, f)
|
52
|
+
case f
|
53
|
+
when ::MARC::ControlField
|
54
|
+
add_control_field(xml, f)
|
55
|
+
when ::MARC::DataField
|
56
|
+
add_data_field(xml, f)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def clean_leader(leader)
|
61
|
+
leader.gsub(/[^\w|^\s]/, 'Z').tap do |ldr|
|
62
|
+
ldr[20..23] = '4500' unless ldr[20..23] == '4500'
|
63
|
+
ldr[6..6] = 'Z' if ldr[6..6] == ' '
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'marc/xml_parsers'
|
3
3
|
require 'marc_extensions'
|
4
|
+
require 'berkeley_library/util/files'
|
4
5
|
|
5
6
|
module BerkeleyLibrary
|
6
7
|
module TIND
|
@@ -9,7 +10,7 @@ module BerkeleyLibrary
|
|
9
10
|
class XMLReader
|
10
11
|
include Enumerable
|
11
12
|
include ::MARC::NokogiriReader
|
12
|
-
|
13
|
+
include BerkeleyLibrary::Util::Files
|
13
14
|
|
14
15
|
# ############################################################
|
15
16
|
# Constant
|
@@ -70,6 +71,12 @@ module BerkeleyLibrary
|
|
70
71
|
# MARC::GenericPullParser overrides
|
71
72
|
|
72
73
|
def yield_record
|
74
|
+
@record[:record].tap do |record|
|
75
|
+
clean_cf_values(record)
|
76
|
+
move_cf000_to_leader(record)
|
77
|
+
record.freeze if @freeze
|
78
|
+
end
|
79
|
+
|
73
80
|
super
|
74
81
|
ensure
|
75
82
|
increment_records_yielded!
|
@@ -120,26 +127,25 @@ module BerkeleyLibrary
|
|
120
127
|
|
121
128
|
private
|
122
129
|
|
123
|
-
|
124
|
-
|
125
|
-
return
|
126
|
-
return StringIO.new(file) if file =~ /^\s*</x
|
130
|
+
# TIND uses <controlfield tag="000"/> instead of <leader/>
|
131
|
+
def move_cf000_to_leader(record)
|
132
|
+
return unless (cf_000 = record['000'])
|
127
133
|
|
128
|
-
|
134
|
+
record.leader = cf_000.value
|
135
|
+
record.fields.delete(cf_000)
|
129
136
|
end
|
130
137
|
|
131
|
-
#
|
132
|
-
|
133
|
-
|
134
|
-
# @param obj [Object] the object that might be an IO
|
135
|
-
# @see https://github.com/sparklemotion/nokogiri/blob/v1.11.1/lib/nokogiri/xml/sax/parser.rb#L81 Nokogiri::XML::SAX::Parser#parse
|
136
|
-
def io_like?(obj)
|
137
|
-
obj.respond_to?(:read) && obj.respond_to?(:close)
|
138
|
+
# TIND uses \ (0x5c), not space (0x32), for unspecified values in positional fields
|
139
|
+
def clean_cf_values(record)
|
140
|
+
record.each_control_field { |cf| cf.value = cf.value&.gsub('\\', ' ') }
|
138
141
|
end
|
139
142
|
|
140
|
-
def
|
141
|
-
|
142
|
-
|
143
|
+
def ensure_io(file)
|
144
|
+
return file if reader_like?(file)
|
145
|
+
return File.new(file) if file_exists?(file)
|
146
|
+
return StringIO.new(file) if file =~ /^\s*</x
|
147
|
+
|
148
|
+
raise ArgumentError, "Don't know how to read XML from #{file.inspect}: not an IO, file path, or XML text"
|
143
149
|
end
|
144
150
|
|
145
151
|
def increment_records_yielded!
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'marc_extensions'
|
3
|
+
require 'berkeley_library/tind/marc/xml_builder'
|
4
|
+
|
5
|
+
module BerkeleyLibrary
|
6
|
+
module TIND
|
7
|
+
module MARC
|
8
|
+
class XMLWriter
|
9
|
+
include BerkeleyLibrary::Util::Files
|
10
|
+
include BerkeleyLibrary::Logging
|
11
|
+
|
12
|
+
# ------------------------------------------------------------
|
13
|
+
# Constants
|
14
|
+
|
15
|
+
UTF_8 = Encoding::UTF_8.name
|
16
|
+
|
17
|
+
EMPTY_COLLECTION_DOC = Nokogiri::XML::Builder.new(encoding: UTF_8) do |xml|
|
18
|
+
xml.collection(xmlns: ::MARC::MARC_NS)
|
19
|
+
end.doc.freeze
|
20
|
+
|
21
|
+
COLLECTION_CLOSING_TAG = '</collection>'.freeze
|
22
|
+
|
23
|
+
DEFAULT_NOKOGIRI_OPTS = { encoding: UTF_8 }.freeze
|
24
|
+
|
25
|
+
# ------------------------------------------------------------
|
26
|
+
# Fields
|
27
|
+
|
28
|
+
attr_reader :out
|
29
|
+
attr_reader :nokogiri_options
|
30
|
+
|
31
|
+
# ------------------------------------------------------------
|
32
|
+
# Initializer
|
33
|
+
|
34
|
+
# Initializes a new {XMLWriter}.
|
35
|
+
#
|
36
|
+
# ```ruby
|
37
|
+
# File.open('marc.xml', 'wb') do |f|
|
38
|
+
# w = XMLWriter.new(f)
|
39
|
+
# marc_records.each { |r| w.write(r) }
|
40
|
+
# w.close
|
41
|
+
# end
|
42
|
+
# ```
|
43
|
+
#
|
44
|
+
# @param out [IO, String] an IO, or the name of a file
|
45
|
+
# @param nokogiri_options [Hash] Options passed to
|
46
|
+
# {https://nokogiri.org/rdoc/Nokogiri/XML/Node.html#method-i-write_to Nokogiri::XML::Node#write_to}
|
47
|
+
# Note that the `encoding` option is ignored, except insofar as
|
48
|
+
# passing an encoding other than UTF-8 will raise an `ArgumentError`.
|
49
|
+
# @raise ArgumentError if `out` is not an IO or a string, or is a string referencing
|
50
|
+
# a file path that cannot be opened for writing; or if an encoding other than UTF-8
|
51
|
+
# is specified in `nokogiri-options`
|
52
|
+
# @see #open
|
53
|
+
def initialize(out, **nokogiri_options)
|
54
|
+
@nokogiri_options = valid_nokogiri_options(nokogiri_options)
|
55
|
+
@out = ensure_io(out)
|
56
|
+
end
|
57
|
+
|
58
|
+
# ------------------------------------------------------------
|
59
|
+
# Class methods
|
60
|
+
|
61
|
+
class << self
|
62
|
+
|
63
|
+
# Opens a new {XMLWriter} with the specified output destination and
|
64
|
+
# Nokogiri options, writes the XML prolog and opening `<collection>`
|
65
|
+
# tag, yields the writer to write one or more MARC records, and closes
|
66
|
+
# the writer.
|
67
|
+
#
|
68
|
+
# ```ruby
|
69
|
+
# XMLWriter.open('marc.xml') do |w|
|
70
|
+
# marc_records.each { |r| w.write(r) }
|
71
|
+
# end
|
72
|
+
# ```
|
73
|
+
#
|
74
|
+
# Note that unlike initializing a writer with {#new} and closing it
|
75
|
+
# immediately, this will write an XML document with an empty
|
76
|
+
# `<collection></collection>` tag even if no records are written.
|
77
|
+
#
|
78
|
+
# @yieldparam writer [XMLWriter] the writer
|
79
|
+
# @see #new
|
80
|
+
# @see #close
|
81
|
+
def open(out, **nokogiri_options)
|
82
|
+
writer = new(out, **nokogiri_options)
|
83
|
+
writer.send(:ensure_open!)
|
84
|
+
yield writer if block_given?
|
85
|
+
writer.close
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# ------------------------------------------------------------
|
90
|
+
# Instance methods
|
91
|
+
|
92
|
+
# Writes the specified record to the underlying stream, writing the
|
93
|
+
# XML prolog and opening `<collection>` tag if they have not yet
|
94
|
+
# been written.
|
95
|
+
#
|
96
|
+
# @param record [::MARC::Record] the MARC record to write.
|
97
|
+
# @raise IOError if the underlying stream has already been closed.
|
98
|
+
def write(record)
|
99
|
+
ensure_open!
|
100
|
+
record_element = XMLBuilder.new(record).build
|
101
|
+
record_element.write_to(out, nokogiri_options)
|
102
|
+
out.write("\n")
|
103
|
+
end
|
104
|
+
|
105
|
+
# Closes the underlying stream. If the XML prolog and opening `<collection>`
|
106
|
+
# tag have already been written, the closing `<collection/>` tag is written
|
107
|
+
# first.
|
108
|
+
def close
|
109
|
+
out.write(COLLECTION_CLOSING_TAG) if @open
|
110
|
+
out.close
|
111
|
+
end
|
112
|
+
|
113
|
+
# ------------------------------------------------------------
|
114
|
+
# Private
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
def ensure_open!
|
119
|
+
return if @open
|
120
|
+
|
121
|
+
out.write(prolog_and_opening_tag)
|
122
|
+
@open = true
|
123
|
+
end
|
124
|
+
|
125
|
+
def prolog_and_opening_tag
|
126
|
+
StringIO.open do |tmp|
|
127
|
+
EMPTY_COLLECTION_DOC.write_to(tmp, nokogiri_options)
|
128
|
+
result = tmp.string
|
129
|
+
result.sub!(%r{/>\s*$}, ">\n")
|
130
|
+
result
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def ensure_io(file)
|
135
|
+
return file if writer_like?(file)
|
136
|
+
return File.open(file, 'wb') if parent_exists?(file)
|
137
|
+
|
138
|
+
raise ArgumentError, "Don't know how to write XML to #{file.inspect}: not an IO or file path"
|
139
|
+
end
|
140
|
+
|
141
|
+
def valid_nokogiri_options(opts)
|
142
|
+
if (encoding = opts.delete(:encoding)) && encoding != UTF_8
|
143
|
+
raise ArgumentError, "#{self.class.name} only supports #{UTF_8}; unable to use specified encoding #{encoding}"
|
144
|
+
end
|
145
|
+
|
146
|
+
DEFAULT_NOKOGIRI_OPTS.merge(opts)
|
147
|
+
end
|
148
|
+
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -7,7 +7,7 @@ module BerkeleyLibrary
|
|
7
7
|
SUMMARY = 'TIND DA utilities for the UC Berkeley Library'.freeze
|
8
8
|
DESCRIPTION = 'UC Berkeley Library utility gem for working with the TIND DA digital archive.'.freeze
|
9
9
|
LICENSE = 'MIT'.freeze
|
10
|
-
VERSION = '0.
|
10
|
+
VERSION = '0.6.0'.freeze
|
11
11
|
HOMEPAGE = 'https://github.com/BerkeleyLibrary/tind'.freeze
|
12
12
|
end
|
13
13
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module BerkeleyLibrary
|
2
|
+
module Util
|
3
|
+
# TODO: Move this to `berkeley_library-util`
|
4
|
+
module Files
|
5
|
+
class << self
|
6
|
+
include Files
|
7
|
+
end
|
8
|
+
|
9
|
+
def file_exists?(path)
|
10
|
+
(path.respond_to?(:exist?) && path.exist?) ||
|
11
|
+
(path.respond_to?(:to_str) && File.exist?(path))
|
12
|
+
end
|
13
|
+
|
14
|
+
def parent_exists?(path)
|
15
|
+
path.respond_to?(:parent) && path.parent.exist? ||
|
16
|
+
path.respond_to?(:to_str) && Pathname.new(path).parent.exist?
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns true if `obj` is close enough to an IO object for Nokogiri
|
20
|
+
# to parse as one.
|
21
|
+
#
|
22
|
+
# @param obj [Object] the object that might be an IO
|
23
|
+
# @see https://github.com/sparklemotion/nokogiri/blob/v1.11.1/lib/nokogiri/xml/sax/parser.rb#L81 Nokogiri::XML::SAX::Parser#parse
|
24
|
+
def reader_like?(obj)
|
25
|
+
obj.respond_to?(:read) && obj.respond_to?(:close)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns true if `obj` is close enough to an IO object for Nokogiri
|
29
|
+
# to write to.
|
30
|
+
#
|
31
|
+
# @param obj [Object] the object that might be an IO
|
32
|
+
def writer_like?(obj)
|
33
|
+
obj.respond_to?(:write) && obj.respond_to?(:close)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'marc'
|
3
|
+
|
4
|
+
module BerkeleyLibrary
|
5
|
+
module TIND
|
6
|
+
module Mapping
|
7
|
+
describe AdditionalDatafieldProcess do
|
8
|
+
|
9
|
+
let(:qualified_alma_obj) { Alma.new('spec/data/mapping/record.xml') }
|
10
|
+
let(:qualified_alm_record) { qualified_alma_obj.record }
|
11
|
+
|
12
|
+
let(:tind_marc) { TindMarc.new(qualified_alm_record) }
|
13
|
+
let(:tindfields) { tind_marc.tindfields }
|
14
|
+
let(:additona_245_field) { [Util.datafield('245', [' ', ' '], [Util.subfield('a', 'fake 245 a')])] }
|
15
|
+
let(:with_repeated_fields) { tindfields.concat additona_245_field }
|
16
|
+
|
17
|
+
context '# Process 1: Remove duplicated fields' do
|
18
|
+
# two 245 fields inputed, one is removed
|
19
|
+
it 'Input two 245 fields, return only one 245 field' do
|
20
|
+
expect(tind_marc.remove_repeats(with_repeated_fields).map(&:tag).count('245')).to eq 1
|
21
|
+
end
|
22
|
+
|
23
|
+
# one subfield 'a' is kept, subfield 'a' with a value of 'fake 245 a' is ignored
|
24
|
+
it 'Input two subfield $a, keeping the first one' do
|
25
|
+
without_repeated_fields = tind_marc.remove_repeats(with_repeated_fields)
|
26
|
+
single_field_245 = tind_marc.field_on_tag('245', without_repeated_fields)
|
27
|
+
expect(single_field_245['a']).to eq 'Cang jie pian'
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module BerkeleyLibrary
|
5
|
+
module TIND
|
6
|
+
module Mapping
|
7
|
+
describe AlmaBase do
|
8
|
+
extend BerkeleyLibrary::Logging
|
9
|
+
let(:dummy_obj) { Class.new { extend AlmaBase } }
|
10
|
+
let(:alma_record) { ::MARC::Record.new }
|
11
|
+
let(:save_to_file) { Tempfile.new('xml') }
|
12
|
+
let(:record_id) { Class.new { extend BerkeleyLibrary::Alma::RecordId } }
|
13
|
+
let(:hash) { { '980' => ['pre_1912'] } }
|
14
|
+
|
15
|
+
let(:qualified_alma_obj) { Alma.new('spec/data/mapping/record.xml') }
|
16
|
+
let(:qualified_alm_record) { qualified_alma_obj.record }
|
17
|
+
|
18
|
+
let(:un_qualified_alma_obj) { Alma.new('spec/data/mapping/record_not_qualified.xml') }
|
19
|
+
let(:un_qualified_alm_record) { un_qualified_alma_obj.record }
|
20
|
+
|
21
|
+
describe '# base_tind_record' do
|
22
|
+
it 'input a qualified Alma record - return a tind record' do
|
23
|
+
allow(dummy_obj).to receive(:tind_record).with('C084093187', qualified_alm_record, []).and_return(::MARC::Record.new)
|
24
|
+
expect(dummy_obj.base_tind_record('C084093187', [], qualified_alm_record)).to be_a ::MARC::Record
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'input an unqualified Alma record - return nil' do
|
28
|
+
expect { dummy_obj.base_tind_record('C084093187', [], un_qualified_alm_record) }.to raise_error(ArgumentError)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'no input Alma record with a nil (record) from id - return nil' do
|
32
|
+
allow(dummy_obj).to receive(:alma_record_from).with('C084093187').and_return(nil)
|
33
|
+
expect { dummy_obj.base_tind_record('C084093187', []) }.to raise_error(ArgumentError)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe '# base_save' do
|
38
|
+
it 'save tind record' do
|
39
|
+
dummy_obj.base_save('C084093187', qualified_alm_record, save_to_file)
|
40
|
+
expect(File.open(save_to_file.path).readlines[0]).to eq "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
describe '# alma_record' do
|
45
|
+
it 'get Alma record' do
|
46
|
+
allow(record_id).to receive(:get_marc_record).and_return(::MARC::Record.new)
|
47
|
+
allow(dummy_obj).to receive(:get_record_id).with('C084093187').and_return(record_id)
|
48
|
+
expect(dummy_obj.send(:alma_record_from, 'C084093187')).to be_instance_of ::MARC::Record
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe '# get_record_id' do
|
53
|
+
it 'return BerkeleyLibrary::Alma::BarCode' do
|
54
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_barcode = true
|
55
|
+
expect(dummy_obj.send(:get_record_id, 'C084093187')).to be_instance_of BerkeleyLibrary::Alma::BarCode
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'return BerkeleyLibrary::Alma::MMSID' do
|
59
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_barcode = false
|
60
|
+
expect(dummy_obj.send(:get_record_id, '991085821143406532')).to be_instance_of BerkeleyLibrary::Alma::MMSID
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'return BerkeleyLibrary::Alma::BibNumber' do
|
65
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_barcode = false
|
66
|
+
expect(dummy_obj.send(:get_record_id, 'b11082434')).to be_instance_of BerkeleyLibrary::Alma::BibNumber
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe '# add_f_035' do
|
72
|
+
it 'Add 035 from mms_id' do
|
73
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_035_from_mms_id = true
|
74
|
+
expect(dummy_obj.send(:add_f_035, '991085821143406532', hash).tag).to eq '035'
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'Not to add 035 from mms_id' do
|
78
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_035_from_mms_id = false
|
79
|
+
expect(dummy_obj.send(:add_f_035, '991085821143406532', hash)).to eq nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe '# tind_record, # derived_tind_fields' do
|
84
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.collection_parameter_hash = {
|
85
|
+
'336' => ['Image'],
|
86
|
+
'852' => ['East Asian Library'],
|
87
|
+
'980' => ['pre_1912'],
|
88
|
+
'982' => ['Pre 1912 Chinese Materials - short name', 'Pre 1912 Chinese Materials - long name'],
|
89
|
+
'991' => []
|
90
|
+
}
|
91
|
+
let(:id) { '991085821143406532' }
|
92
|
+
let(:tags) { %w[902 336 852 980 982 901 856] }
|
93
|
+
let(:tags_with_035) { %w[902 336 852 980 982 901 856 035] }
|
94
|
+
|
95
|
+
it ' get tind_record' do
|
96
|
+
datafields = []
|
97
|
+
marc_record = qualified_alm_record
|
98
|
+
expect(dummy_obj.send(:tind_record, id, marc_record, datafields)).to be_instance_of ::MARC::Record
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'get derived fields without 035' do
|
102
|
+
expect(dummy_obj.send(:derived_tind_fields, id).map(&:tag)).to eq tags
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'get derived fields with 035' do
|
106
|
+
BerkeleyLibrary::TIND::Mapping::AlmaBase.is_035_from_mms_id = true
|
107
|
+
expect(dummy_obj.send(:derived_tind_fields, id).map(&:tag)).to eq tags_with_035
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module BerkeleyLibrary
|
4
|
+
module TIND
|
5
|
+
module Mapping
|
6
|
+
describe AlmaMultipleTIND do
|
7
|
+
let(:additona_245_field) { [Util.datafield('245', [' ', ' '], [Util.subfield('a', 'fake 245 a')])] }
|
8
|
+
let(:marc_obj) { (::MARC::Record.new).append(additona_245_field) }
|
9
|
+
|
10
|
+
it ' get tind record' do
|
11
|
+
allow_any_instance_of(BerkeleyLibrary::TIND::Mapping::AlmaMultipleTIND).to receive(:alma_record_from).with('991085821143406532').and_return(marc_obj)
|
12
|
+
alma_multiple_tind = BerkeleyLibrary::TIND::Mapping::AlmaMultipleTIND.new('991085821143406532')
|
13
|
+
|
14
|
+
allow(alma_multiple_tind).to receive(:base_tind_record).with('991085821143406532', additona_245_field, marc_obj).and_return(marc_obj)
|
15
|
+
expect(alma_multiple_tind.record(additona_245_field)).to be marc_obj
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|