berkeley_library-tind 0.4.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.idea/tind.iml +17 -16
- data/CHANGES.md +13 -0
- data/README.md +15 -1
- data/berkeley_library-tind.gemspec +1 -0
- data/lib/berkeley_library/tind/marc/xml_builder.rb +62 -0
- data/lib/berkeley_library/tind/marc/xml_reader.rb +22 -16
- data/lib/berkeley_library/tind/marc/xml_writer.rb +152 -0
- data/lib/berkeley_library/tind/module_info.rb +1 -1
- data/lib/berkeley_library/util/files.rb +39 -0
- data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +22 -0
- data/spec/berkeley_library/tind/marc/xml_writer_spec.rb +156 -0
- data/spec/data/new-records.xml +46 -0
- metadata +23 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3e7d25497211920f8416b44cef0c1571f0655ff92c3e3ff69405ce9659c9df3e
|
4
|
+
data.tar.gz: 915b8fdc55e4f5d7a48bc7ef3befd2ec4d9e3f3081eedc41980b04dd073fff5c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 187ede68d2a48c906ee37fa01555489e877d1ef4c941189fae5ba683d79a6ca7704e1903077bab6ef9e3e70f8cb1670dee5755aed85a241c538f0e380542879c
|
7
|
+
data.tar.gz: 361ec4a2b88f4f97e53ad1eb25554b97e7cef94222b828de2f27bfc9037df02e412884adf36b31bbf9aed4671a0f62c0ad0910a94225ae18c4efd1758ccf9b43
|
data/.idea/tind.iml
CHANGED
@@ -9,9 +9,9 @@
|
|
9
9
|
</content>
|
10
10
|
<orderEntry type="jdk" jdkName="RVM: ruby-2.7.5" jdkType="RUBY_SDK" />
|
11
11
|
<orderEntry type="sourceFolder" forTests="false" />
|
12
|
-
<orderEntry type="library" scope="PROVIDED" name="actionpack (v6.1.4.
|
13
|
-
<orderEntry type="library" scope="PROVIDED" name="actionview (v6.1.4.
|
14
|
-
<orderEntry type="library" scope="PROVIDED" name="activesupport (v6.1.4.
|
12
|
+
<orderEntry type="library" scope="PROVIDED" name="actionpack (v6.1.4.6, RVM: ruby-2.7.5) [gem]" level="application" />
|
13
|
+
<orderEntry type="library" scope="PROVIDED" name="actionview (v6.1.4.6, RVM: ruby-2.7.5) [gem]" level="application" />
|
14
|
+
<orderEntry type="library" scope="PROVIDED" name="activesupport (v6.1.4.6, RVM: ruby-2.7.5) [gem]" level="application" />
|
15
15
|
<orderEntry type="library" scope="PROVIDED" name="addressable (v2.8.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
16
16
|
<orderEntry type="library" scope="PROVIDED" name="amazing_print (v1.4.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
17
17
|
<orderEntry type="library" scope="PROVIDED" name="ast (v2.4.2, RVM: ruby-2.7.5) [gem]" level="application" />
|
@@ -32,14 +32,15 @@
|
|
32
32
|
<orderEntry type="library" scope="PROVIDED" name="docile (v1.4.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
33
33
|
<orderEntry type="library" scope="PROVIDED" name="domain_name (v0.5.20190701, RVM: ruby-2.7.5) [gem]" level="application" />
|
34
34
|
<orderEntry type="library" scope="PROVIDED" name="dotenv (v2.7.6, RVM: ruby-2.7.5) [gem]" level="application" />
|
35
|
+
<orderEntry type="library" scope="PROVIDED" name="equivalent-xml (v0.6.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
35
36
|
<orderEntry type="library" scope="PROVIDED" name="erubi (v1.10.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
36
37
|
<orderEntry type="library" scope="PROVIDED" name="hashdiff (v1.0.1, RVM: ruby-2.7.5) [gem]" level="application" />
|
37
38
|
<orderEntry type="library" scope="PROVIDED" name="http-accept (v1.7.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
38
39
|
<orderEntry type="library" scope="PROVIDED" name="http-cookie (v1.0.4, RVM: ruby-2.7.5) [gem]" level="application" />
|
39
|
-
<orderEntry type="library" scope="PROVIDED" name="i18n (v1.
|
40
|
+
<orderEntry type="library" scope="PROVIDED" name="i18n (v1.10.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
40
41
|
<orderEntry type="library" scope="PROVIDED" name="ice_nine (v0.11.2, RVM: ruby-2.7.5) [gem]" level="application" />
|
41
42
|
<orderEntry type="library" scope="PROVIDED" name="lograge (v0.11.2, RVM: ruby-2.7.5) [gem]" level="application" />
|
42
|
-
<orderEntry type="library" scope="PROVIDED" name="loofah (v2.
|
43
|
+
<orderEntry type="library" scope="PROVIDED" name="loofah (v2.14.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
43
44
|
<orderEntry type="library" scope="PROVIDED" name="marc (v1.1.1, RVM: ruby-2.7.5) [gem]" level="application" />
|
44
45
|
<orderEntry type="library" scope="PROVIDED" name="method_source (v1.0.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
45
46
|
<orderEntry type="library" scope="PROVIDED" name="mime-types (v3.4.1, RVM: ruby-2.7.5) [gem]" level="application" />
|
@@ -58,25 +59,25 @@
|
|
58
59
|
<orderEntry type="library" scope="PROVIDED" name="rack-test (v1.1.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
59
60
|
<orderEntry type="library" scope="PROVIDED" name="rails-dom-testing (v2.0.3, RVM: ruby-2.7.5) [gem]" level="application" />
|
60
61
|
<orderEntry type="library" scope="PROVIDED" name="rails-html-sanitizer (v1.4.2, RVM: ruby-2.7.5) [gem]" level="application" />
|
61
|
-
<orderEntry type="library" scope="PROVIDED" name="railties (v6.1.4.
|
62
|
+
<orderEntry type="library" scope="PROVIDED" name="railties (v6.1.4.6, RVM: ruby-2.7.5) [gem]" level="application" />
|
62
63
|
<orderEntry type="library" scope="PROVIDED" name="rainbow (v3.1.1, RVM: ruby-2.7.5) [gem]" level="application" />
|
63
64
|
<orderEntry type="library" scope="PROVIDED" name="rake (v13.0.6, RVM: ruby-2.7.5) [gem]" level="application" />
|
64
65
|
<orderEntry type="library" scope="PROVIDED" name="rchardet (v1.8.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
65
|
-
<orderEntry type="library" scope="PROVIDED" name="regexp_parser (v2.2.
|
66
|
+
<orderEntry type="library" scope="PROVIDED" name="regexp_parser (v2.2.1, RVM: ruby-2.7.5) [gem]" level="application" />
|
66
67
|
<orderEntry type="library" scope="PROVIDED" name="request_store (v1.5.1, RVM: ruby-2.7.5) [gem]" level="application" />
|
67
68
|
<orderEntry type="library" scope="PROVIDED" name="rest-client (v2.1.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
68
69
|
<orderEntry type="library" scope="PROVIDED" name="rexml (v3.2.5, RVM: ruby-2.7.5) [gem]" level="application" />
|
69
70
|
<orderEntry type="library" scope="PROVIDED" name="roo (v2.8.3, RVM: ruby-2.7.5) [gem]" level="application" />
|
70
|
-
<orderEntry type="library" scope="PROVIDED" name="rspec (v3.
|
71
|
-
<orderEntry type="library" scope="PROVIDED" name="rspec-core (v3.
|
72
|
-
<orderEntry type="library" scope="PROVIDED" name="rspec-expectations (v3.
|
73
|
-
<orderEntry type="library" scope="PROVIDED" name="rspec-mocks (v3.
|
74
|
-
<orderEntry type="library" scope="PROVIDED" name="rspec-support (v3.
|
71
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec (v3.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
72
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-core (v3.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
73
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-expectations (v3.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
74
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-mocks (v3.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
75
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-support (v3.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
75
76
|
<orderEntry type="library" scope="PROVIDED" name="rubocop (v1.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
76
|
-
<orderEntry type="library" scope="PROVIDED" name="rubocop-ast (v1.15.
|
77
|
+
<orderEntry type="library" scope="PROVIDED" name="rubocop-ast (v1.15.2, RVM: ruby-2.7.5) [gem]" level="application" />
|
77
78
|
<orderEntry type="library" scope="PROVIDED" name="rubocop-rake (v0.6.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
78
79
|
<orderEntry type="library" scope="PROVIDED" name="rubocop-rspec (v2.4.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
79
|
-
<orderEntry type="library" scope="PROVIDED" name="ruby-marc-spec (v0.1.
|
80
|
+
<orderEntry type="library" scope="PROVIDED" name="ruby-marc-spec (v0.1.3, RVM: ruby-2.7.5) [gem]" level="application" />
|
80
81
|
<orderEntry type="library" scope="PROVIDED" name="ruby-prof (v0.17.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
81
82
|
<orderEntry type="library" scope="PROVIDED" name="ruby-progressbar (v1.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
82
83
|
<orderEntry type="library" scope="PROVIDED" name="rubyzip (v2.3.2, RVM: ruby-2.7.5) [gem]" level="application" />
|
@@ -84,7 +85,7 @@
|
|
84
85
|
<orderEntry type="library" scope="PROVIDED" name="simplecov (v0.21.2, RVM: ruby-2.7.5) [gem]" level="application" />
|
85
86
|
<orderEntry type="library" scope="PROVIDED" name="simplecov-html (v0.12.3, RVM: ruby-2.7.5) [gem]" level="application" />
|
86
87
|
<orderEntry type="library" scope="PROVIDED" name="simplecov-rcov (v0.2.3, RVM: ruby-2.7.5) [gem]" level="application" />
|
87
|
-
<orderEntry type="library" scope="PROVIDED" name="simplecov_json_formatter (v0.1.
|
88
|
+
<orderEntry type="library" scope="PROVIDED" name="simplecov_json_formatter (v0.1.4, RVM: ruby-2.7.5) [gem]" level="application" />
|
88
89
|
<orderEntry type="library" scope="PROVIDED" name="thor (v1.2.1, RVM: ruby-2.7.5) [gem]" level="application" />
|
89
90
|
<orderEntry type="library" scope="PROVIDED" name="typesafe_enum (v0.3.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
90
91
|
<orderEntry type="library" scope="PROVIDED" name="tzinfo (v2.0.4, RVM: ruby-2.7.5) [gem]" level="application" />
|
@@ -92,7 +93,7 @@
|
|
92
93
|
<orderEntry type="library" scope="PROVIDED" name="unf_ext (v0.0.8, RVM: ruby-2.7.5) [gem]" level="application" />
|
93
94
|
<orderEntry type="library" scope="PROVIDED" name="unicode-display_width (v2.1.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
94
95
|
<orderEntry type="library" scope="PROVIDED" name="webmock (v3.14.0, RVM: ruby-2.7.5) [gem]" level="application" />
|
95
|
-
<orderEntry type="library" scope="PROVIDED" name="zeitwerk (v2.5.
|
96
|
+
<orderEntry type="library" scope="PROVIDED" name="zeitwerk (v2.5.4, RVM: ruby-2.7.5) [gem]" level="application" />
|
96
97
|
</component>
|
97
98
|
<component name="RModuleSettingsStorage">
|
98
99
|
<LOAD_PATH number="2" string0="$MODULE_DIR$/lib" string1="$MODULE_DIR$/spec" />
|
data/CHANGES.md
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
# 0.5.0 (2022-01-17)
|
2
|
+
|
3
|
+
- Adds a class `BerkeleyLibrary::TIND::MARC::XMLWriter` to write MARCXML in the format expected by the TIND batch uploader:
|
4
|
+
|
5
|
+
- MARC leader is written to control field 000 as required by TIND
|
6
|
+
- control fields (including the leader) use `\` (0x5c), not space (0x32), for unspecified positional
|
7
|
+
values
|
8
|
+
|
9
|
+
In addition, a `nil` or empty MARC leader is not written at all.
|
10
|
+
- Modifies `BerkeleyLibrary::TIND::MARC::XMLReader` to take into account the same peculiarities:
|
11
|
+
control field 000 is read into the leader of the MARC record, and slashes in control field values
|
12
|
+
(including the leader) are replaced with spaces.
|
13
|
+
|
1
14
|
# 0.4.3 (2022-01-26)
|
2
15
|
|
3
16
|
- Pins `berkeley_library-marc` to version 0.3.x (0.3.1 or higher).
|
data/README.md
CHANGED
@@ -1,10 +1,24 @@
|
|
1
1
|
# BerkeleyLibrary::TIND
|
2
2
|
|
3
3
|
[![Build Status](https://github.com/BerkeleyLibrary/tind/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/BerkeleyLibrary/tind/actions/workflows/build.yml)
|
4
|
-
[![Gem Version](https://img.shields.io/gem/v/berkeley_library-tind.svg)](https://
|
4
|
+
[![Gem Version](https://img.shields.io/gem/v/berkeley_library-tind.svg)](https://rubygems.org/gems/berkeley_library-tind)
|
5
5
|
|
6
6
|
Utility gem for working with the TIND DA digital archive.
|
7
7
|
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
In your Gemfile:
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
gem 'berkeley_library-tind'
|
14
|
+
```
|
15
|
+
|
16
|
+
In your code:
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
require 'berkeley_library/tind'
|
20
|
+
```
|
21
|
+
|
8
22
|
## Configuration
|
9
23
|
|
10
24
|
To access the TIND API, you will need to set:
|
@@ -37,6 +37,7 @@ Gem::Specification.new do |spec|
|
|
37
37
|
spec.add_development_dependency 'ci_reporter_rspec', '~> 1.0'
|
38
38
|
spec.add_development_dependency 'colorize', '~> 0.8'
|
39
39
|
spec.add_development_dependency 'dotenv', '~> 2.7'
|
40
|
+
spec.add_development_dependency 'equivalent-xml', '~> 0.6'
|
40
41
|
spec.add_development_dependency 'rake', '~> 13.0'
|
41
42
|
spec.add_development_dependency 'roo', '~> 2.8'
|
42
43
|
spec.add_development_dependency 'rspec', '~> 3.10'
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module BerkeleyLibrary
|
4
|
+
module TIND
|
5
|
+
module MARC
|
6
|
+
class XMLBuilder
|
7
|
+
attr_reader :marc_record
|
8
|
+
|
9
|
+
def initialize(marc_record)
|
10
|
+
@marc_record = marc_record
|
11
|
+
end
|
12
|
+
|
13
|
+
def build
|
14
|
+
builder.doc.root.tap(&:unlink)
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def builder
|
20
|
+
Nokogiri::XML::Builder.new do |xml|
|
21
|
+
xml.record do
|
22
|
+
add_leader(xml)
|
23
|
+
marc_record.each_control_field { |cf| add_control_field(xml, cf) }
|
24
|
+
marc_record.each_data_field { |df| add_data_field(xml, df) }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def add_leader(xml)
|
30
|
+
leader = marc_record.leader
|
31
|
+
return if leader.nil? || leader == ''
|
32
|
+
|
33
|
+
# TIND uses <controlfield tag="000"/> instead of <leader/>
|
34
|
+
leader_as_cf = ::MARC::ControlField.new('000', clean_leader(leader))
|
35
|
+
add_control_field(xml, leader_as_cf)
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_data_field(xml, df)
|
39
|
+
xml.datafield(tag: df.tag, ind1: df.indicator1, ind2: df.indicator2) do
|
40
|
+
df.subfields.each do |sf|
|
41
|
+
xml.subfield(sf.value, code: sf.code)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def add_control_field(xml, cf)
|
47
|
+
# TIND uses \ (0x5c), not space (0x32), for unspecified values in positional fields
|
48
|
+
value = cf.value&.gsub(' ', '\\')
|
49
|
+
xml.controlfield(value, tag: cf.tag)
|
50
|
+
end
|
51
|
+
|
52
|
+
def clean_leader(leader)
|
53
|
+
leader.gsub(/[^\w|^\s]/, 'Z').tap do |ldr|
|
54
|
+
ldr[20..23] = '4500' unless ldr[20..23] == '4500'
|
55
|
+
ldr[6..6] = 'Z' if ldr[6..6] == ' '
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'marc/xml_parsers'
|
3
3
|
require 'marc_extensions'
|
4
|
+
require 'berkeley_library/util/files'
|
4
5
|
|
5
6
|
module BerkeleyLibrary
|
6
7
|
module TIND
|
@@ -9,7 +10,7 @@ module BerkeleyLibrary
|
|
9
10
|
class XMLReader
|
10
11
|
include Enumerable
|
11
12
|
include ::MARC::NokogiriReader
|
12
|
-
|
13
|
+
include BerkeleyLibrary::Util::Files
|
13
14
|
|
14
15
|
# ############################################################
|
15
16
|
# Constant
|
@@ -70,6 +71,12 @@ module BerkeleyLibrary
|
|
70
71
|
# MARC::GenericPullParser overrides
|
71
72
|
|
72
73
|
def yield_record
|
74
|
+
@record[:record].tap do |record|
|
75
|
+
clean_cf_values(record)
|
76
|
+
move_cf000_to_leader(record)
|
77
|
+
record.freeze if @freeze
|
78
|
+
end
|
79
|
+
|
73
80
|
super
|
74
81
|
ensure
|
75
82
|
increment_records_yielded!
|
@@ -120,26 +127,25 @@ module BerkeleyLibrary
|
|
120
127
|
|
121
128
|
private
|
122
129
|
|
123
|
-
|
124
|
-
|
125
|
-
return
|
126
|
-
return StringIO.new(file) if file =~ /^\s*</x
|
130
|
+
# TIND uses <controlfield tag="000"/> instead of <leader/>
|
131
|
+
def move_cf000_to_leader(record)
|
132
|
+
return unless (cf_000 = record['000'])
|
127
133
|
|
128
|
-
|
134
|
+
record.leader = cf_000.value
|
135
|
+
record.fields.delete(cf_000)
|
129
136
|
end
|
130
137
|
|
131
|
-
#
|
132
|
-
|
133
|
-
|
134
|
-
# @param obj [Object] the object that might be an IO
|
135
|
-
# @see https://github.com/sparklemotion/nokogiri/blob/v1.11.1/lib/nokogiri/xml/sax/parser.rb#L81 Nokogiri::XML::SAX::Parser#parse
|
136
|
-
def io_like?(obj)
|
137
|
-
obj.respond_to?(:read) && obj.respond_to?(:close)
|
138
|
+
# TIND uses \ (0x5c), not space (0x32), for unspecified values in positional fields
|
139
|
+
def clean_cf_values(record)
|
140
|
+
record.each_control_field { |cf| cf.value = cf.value&.gsub('\\', ' ') }
|
138
141
|
end
|
139
142
|
|
140
|
-
def
|
141
|
-
|
142
|
-
|
143
|
+
def ensure_io(file)
|
144
|
+
return file if reader_like?(file)
|
145
|
+
return File.new(file) if file_exists?(file)
|
146
|
+
return StringIO.new(file) if file =~ /^\s*</x
|
147
|
+
|
148
|
+
raise ArgumentError, "Don't know how to read XML from #{file.inspect}: not an IO, file path, or XML text"
|
143
149
|
end
|
144
150
|
|
145
151
|
def increment_records_yielded!
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'marc_extensions'
|
3
|
+
require 'berkeley_library/tind/marc/xml_builder'
|
4
|
+
|
5
|
+
module BerkeleyLibrary
|
6
|
+
module TIND
|
7
|
+
module MARC
|
8
|
+
class XMLWriter
|
9
|
+
include BerkeleyLibrary::Util::Files
|
10
|
+
include BerkeleyLibrary::Logging
|
11
|
+
|
12
|
+
# ------------------------------------------------------------
|
13
|
+
# Constants
|
14
|
+
|
15
|
+
UTF_8 = Encoding::UTF_8.name
|
16
|
+
|
17
|
+
EMPTY_COLLECTION_DOC = Nokogiri::XML::Builder.new(encoding: UTF_8) do |xml|
|
18
|
+
xml.collection(xmlns: ::MARC::MARC_NS)
|
19
|
+
end.doc.freeze
|
20
|
+
|
21
|
+
COLLECTION_CLOSING_TAG = '</collection>'.freeze
|
22
|
+
|
23
|
+
DEFAULT_NOKOGIRI_OPTS = { encoding: UTF_8 }.freeze
|
24
|
+
|
25
|
+
# ------------------------------------------------------------
|
26
|
+
# Fields
|
27
|
+
|
28
|
+
attr_reader :out
|
29
|
+
attr_reader :nokogiri_options
|
30
|
+
|
31
|
+
# ------------------------------------------------------------
|
32
|
+
# Initializer
|
33
|
+
|
34
|
+
# Initializes a new {XMLWriter}.
|
35
|
+
#
|
36
|
+
# ```ruby
|
37
|
+
# File.open('marc.xml', 'wb') do |f|
|
38
|
+
# w = XMLWriter.new(f)
|
39
|
+
# marc_records.each { |r| w.write(r) }
|
40
|
+
# w.close
|
41
|
+
# end
|
42
|
+
# ```
|
43
|
+
#
|
44
|
+
# @param out [IO, String] an IO, or the name of a file
|
45
|
+
# @param nokogiri_options [Hash] Options passed to
|
46
|
+
# {https://nokogiri.org/rdoc/Nokogiri/XML/Node.html#method-i-write_to Nokogiri::XML::Node#write_to}
|
47
|
+
# Note that the `encoding` option is ignored, except insofar as
|
48
|
+
# passing an encoding other than UTF-8 will raise an `ArgumentError`.
|
49
|
+
# @raise ArgumentError if `out` is not an IO or a string, or is a string referencing
|
50
|
+
# a file path that cannot be opened for writing; or if an encoding other than UTF-8
|
51
|
+
# is specified in `nokogiri-options`
|
52
|
+
# @see #open
|
53
|
+
def initialize(out, **nokogiri_options)
|
54
|
+
@nokogiri_options = valid_nokogiri_options(nokogiri_options)
|
55
|
+
@out = ensure_io(out)
|
56
|
+
end
|
57
|
+
|
58
|
+
# ------------------------------------------------------------
|
59
|
+
# Class methods
|
60
|
+
|
61
|
+
class << self
|
62
|
+
|
63
|
+
# Opens a new {XMLWriter} with the specified output destination and
|
64
|
+
# Nokogiri options, writes the XML prolog and opening `<collection>`
|
65
|
+
# tag, yields the writer to write one or more MARC records, and closes
|
66
|
+
# the writer.
|
67
|
+
#
|
68
|
+
# ```ruby
|
69
|
+
# XMLWriter.open('marc.xml') do |w|
|
70
|
+
# marc_records.each { |r| w.write(r) }
|
71
|
+
# end
|
72
|
+
# ```
|
73
|
+
#
|
74
|
+
# Note that unlike initializing a writer with {#new} and closing it
|
75
|
+
# immediately, this will write an XML document with an empty
|
76
|
+
# `<collection></collection>` tag even if no records are written.
|
77
|
+
#
|
78
|
+
# @yieldparam writer [XMLWriter] the writer
|
79
|
+
# @see #new
|
80
|
+
# @see #close
|
81
|
+
def open(out, **nokogiri_options)
|
82
|
+
writer = new(out, **nokogiri_options)
|
83
|
+
writer.send(:ensure_open!)
|
84
|
+
yield writer if block_given?
|
85
|
+
writer.close
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# ------------------------------------------------------------
|
90
|
+
# Instance methods
|
91
|
+
|
92
|
+
# Writes the specified record to the underlying stream, writing the
|
93
|
+
# XML prolog and opening `<collection>` tag if they have not yet
|
94
|
+
# been written.
|
95
|
+
#
|
96
|
+
# @param record [::MARC::Record] the MARC record to write.
|
97
|
+
# @raise IOError if the underlying stream has already been closed.
|
98
|
+
def write(record)
|
99
|
+
ensure_open!
|
100
|
+
record_element = XMLBuilder.new(record).build
|
101
|
+
record_element.write_to(out, nokogiri_options)
|
102
|
+
out.write("\n")
|
103
|
+
end
|
104
|
+
|
105
|
+
# Closes the underlying stream. If the XML prolog and opening `<collection>`
|
106
|
+
# tag have already been written, the closing `<collection/>` tag is written
|
107
|
+
# first.
|
108
|
+
def close
|
109
|
+
out.write(COLLECTION_CLOSING_TAG) if @open
|
110
|
+
out.close
|
111
|
+
end
|
112
|
+
|
113
|
+
# ------------------------------------------------------------
|
114
|
+
# Private
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
def ensure_open!
|
119
|
+
return if @open
|
120
|
+
|
121
|
+
out.write(prolog_and_opening_tag)
|
122
|
+
@open = true
|
123
|
+
end
|
124
|
+
|
125
|
+
def prolog_and_opening_tag
|
126
|
+
StringIO.open do |tmp|
|
127
|
+
EMPTY_COLLECTION_DOC.write_to(tmp, nokogiri_options)
|
128
|
+
result = tmp.string
|
129
|
+
result.sub!(%r{/>\s*$}, ">\n")
|
130
|
+
result
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def ensure_io(file)
|
135
|
+
return file if writer_like?(file)
|
136
|
+
return File.open(file, 'wb') if parent_exists?(file)
|
137
|
+
|
138
|
+
raise ArgumentError, "Don't know how to write XML to #{file.inspect}: not an IO or file path"
|
139
|
+
end
|
140
|
+
|
141
|
+
def valid_nokogiri_options(opts)
|
142
|
+
if (encoding = opts.delete(:encoding)) && encoding != UTF_8
|
143
|
+
raise ArgumentError, "#{self.class.name} only supports #{UTF_8}; unable to use specified encoding #{encoding}"
|
144
|
+
end
|
145
|
+
|
146
|
+
DEFAULT_NOKOGIRI_OPTS.merge(opts)
|
147
|
+
end
|
148
|
+
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -7,7 +7,7 @@ module BerkeleyLibrary
|
|
7
7
|
SUMMARY = 'TIND DA utilities for the UC Berkeley Library'.freeze
|
8
8
|
DESCRIPTION = 'UC Berkeley Library utility gem for working with the TIND DA digital archive.'.freeze
|
9
9
|
LICENSE = 'MIT'.freeze
|
10
|
-
VERSION = '0.
|
10
|
+
VERSION = '0.5.0'.freeze
|
11
11
|
HOMEPAGE = 'https://github.com/BerkeleyLibrary/tind'.freeze
|
12
12
|
end
|
13
13
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module BerkeleyLibrary
|
2
|
+
module Util
|
3
|
+
# TODO: Move this to `berkeley_library-util`
|
4
|
+
module Files
|
5
|
+
class << self
|
6
|
+
include Files
|
7
|
+
end
|
8
|
+
|
9
|
+
def file_exists?(path)
|
10
|
+
(path.respond_to?(:exist?) && path.exist?) ||
|
11
|
+
(path.respond_to?(:to_str) && File.exist?(path))
|
12
|
+
end
|
13
|
+
|
14
|
+
def parent_exists?(path)
|
15
|
+
path.respond_to?(:parent) && path.parent.exist? ||
|
16
|
+
path.respond_to?(:to_str) && Pathname.new(path).parent.exist?
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns true if `obj` is close enough to an IO object for Nokogiri
|
20
|
+
# to parse as one.
|
21
|
+
#
|
22
|
+
# @param obj [Object] the object that might be an IO
|
23
|
+
# @see https://github.com/sparklemotion/nokogiri/blob/v1.11.1/lib/nokogiri/xml/sax/parser.rb#L81 Nokogiri::XML::SAX::Parser#parse
|
24
|
+
def reader_like?(obj)
|
25
|
+
obj.respond_to?(:read) && obj.respond_to?(:close)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns true if `obj` is close enough to an IO object for Nokogiri
|
29
|
+
# to write to.
|
30
|
+
#
|
31
|
+
# @param obj [Object] the object that might be an IO
|
32
|
+
def writer_like?(obj)
|
33
|
+
# TODO: is it possible/desirable to loosen this? how strict is libxml2?
|
34
|
+
obj.is_a?(IO) || obj.is_a?(StringIO)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -106,6 +106,28 @@ module BerkeleyLibrary
|
|
106
106
|
end
|
107
107
|
end
|
108
108
|
|
109
|
+
describe 'TIND peculiarities' do
|
110
|
+
attr_reader :record
|
111
|
+
|
112
|
+
before(:each) do
|
113
|
+
reader = XMLReader.new('spec/data/new-records.xml')
|
114
|
+
records = reader.to_a
|
115
|
+
expect(records.size).to eq(1) # just to be sure
|
116
|
+
@record = records.first
|
117
|
+
end
|
118
|
+
|
119
|
+
it 'converts backslashes in control fields to spaces' do
|
120
|
+
cf_008 = record['008']
|
121
|
+
expect(cf_008).to be_a(::MARC::ControlField)
|
122
|
+
expect(cf_008.value).to eq('190409s2015 xx eng ')
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'parses CF 000 as the leader' do
|
126
|
+
expect(record.leader).to eq('00287cam a2200313 4500')
|
127
|
+
expect(record['000']).to be_nil
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
109
131
|
end
|
110
132
|
end
|
111
133
|
end
|
@@ -0,0 +1,156 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'equivalent-xml'
|
3
|
+
|
4
|
+
module BerkeleyLibrary
|
5
|
+
module TIND
|
6
|
+
module MARC
|
7
|
+
describe XMLWriter do
|
8
|
+
let(:input_path) { 'spec/data/new-records.xml' }
|
9
|
+
attr_reader :record
|
10
|
+
|
11
|
+
before(:each) do
|
12
|
+
reader = XMLReader.new(input_path)
|
13
|
+
@record = reader.first
|
14
|
+
end
|
15
|
+
|
16
|
+
describe :open do
|
17
|
+
|
18
|
+
it 'writes a MARC record to a file as XML' do
|
19
|
+
Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
|
20
|
+
output_path = File.join(dir, 'marc.xml')
|
21
|
+
XMLWriter.open(output_path) { |w| w.write(record) }
|
22
|
+
|
23
|
+
expected = File.open(input_path) { |f| Nokogiri::XML(f) }
|
24
|
+
actual = File.open(output_path) { |f| Nokogiri::XML(f) }
|
25
|
+
|
26
|
+
aggregate_failures do
|
27
|
+
EquivalentXml.equivalent?(expected, actual) do |n1, n2, result|
|
28
|
+
expect(n2.to_s).to eq(n1.to_s) unless result
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'writes a MARC record to a StringIO' do
|
35
|
+
out = StringIO.new
|
36
|
+
XMLWriter.open(out) { |w| w.write(record) }
|
37
|
+
expected = File.open(input_path) { |f| Nokogiri::XML(f) }
|
38
|
+
actual = Nokogiri::XML(out.string)
|
39
|
+
aggregate_failures do
|
40
|
+
EquivalentXml.equivalent?(expected, actual) do |n1, n2, result|
|
41
|
+
expect(n2.to_s).to eq(n1.to_s) unless result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'accepts Nokogiri options' do
|
47
|
+
Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
|
48
|
+
expected_path = File.join(dir, 'expected.xml')
|
49
|
+
XMLWriter.open(expected_path) { |w| w.write(record) }
|
50
|
+
|
51
|
+
actual_path = File.join(dir, 'actual.xml')
|
52
|
+
XMLWriter.open(actual_path, indent_text: "\t") { |w| w.write(record) }
|
53
|
+
|
54
|
+
expected = File.read(expected_path).gsub(%r{ (?= *<)(?!/)}, "\t")
|
55
|
+
actual = File.read(actual_path)
|
56
|
+
expect(actual).to eq(expected)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'accepts an explicit UTF-8 argument' do
|
61
|
+
Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
|
62
|
+
output_path = File.join(dir, 'marc.xml')
|
63
|
+
XMLWriter.open(output_path, encoding: 'UTF-8') { |w| w.write(record) }
|
64
|
+
|
65
|
+
expected = File.open(input_path) { |f| Nokogiri::XML(f) }
|
66
|
+
actual = File.open(output_path) { |f| Nokogiri::XML(f) }
|
67
|
+
|
68
|
+
aggregate_failures do
|
69
|
+
EquivalentXml.equivalent?(expected, actual) do |n1, n2, result|
|
70
|
+
expect(n2.to_s).to eq(n1.to_s) unless result
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'only writes UTF-8' do
|
77
|
+
Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
|
78
|
+
output_path = File.join(dir, 'marc.xml')
|
79
|
+
expect { XMLWriter.open(output_path, encoding: 'UTF-16') }.to raise_error(ArgumentError)
|
80
|
+
expect(File.exist?(output_path)).to eq(false)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'rejects an invalid file path' do
|
85
|
+
bad_directory = Dir.mktmpdir(File.basename(__FILE__, '.rb')) { |dir| dir }
|
86
|
+
expect(File.directory?(bad_directory)).to eq(false)
|
87
|
+
output_path = File.join(bad_directory, 'marc.xml')
|
88
|
+
expect { XMLWriter.open(output_path) }.to raise_error(ArgumentError)
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'rejects a non-IO, non-String argument' do
|
92
|
+
invalid_target = Object.new
|
93
|
+
expect { XMLWriter.open(invalid_target) }.to raise_error(ArgumentError)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe :close do
|
98
|
+
it 'closes without writing the closing tag if nothing has been written' do
|
99
|
+
Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
|
100
|
+
output_path = File.join(dir, 'marc.xml')
|
101
|
+
w = XMLWriter.new(output_path)
|
102
|
+
w.close
|
103
|
+
|
104
|
+
stat = File.stat(output_path)
|
105
|
+
expect(stat.size).to eq(0)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'writes the closing tag if the opening tag has been written' do
|
110
|
+
Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
|
111
|
+
output_path = File.join(dir, 'marc.xml')
|
112
|
+
XMLWriter.open(output_path)
|
113
|
+
expect(File.exist?(output_path)).to eq(true)
|
114
|
+
|
115
|
+
doc = File.open(output_path) { |f| Nokogiri::XML(f) }
|
116
|
+
expect(doc.root.name).to eq('collection')
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
describe :write do
|
122
|
+
it 'raises an IOError if the writer has already been closed' do
|
123
|
+
Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
|
124
|
+
output_path = File.join(dir, 'marc.xml')
|
125
|
+
w = XMLWriter.new(output_path)
|
126
|
+
w.close
|
127
|
+
|
128
|
+
expect { w.write(record) }.to raise_error(IOError)
|
129
|
+
|
130
|
+
stat = File.stat(output_path)
|
131
|
+
expect(stat.size).to eq(0)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
it 'does not write a nil leader' do
|
136
|
+
record.leader = nil
|
137
|
+
marc_xml = StringIO.open do |out|
|
138
|
+
XMLWriter.open(out) { |w| w.write(record) }
|
139
|
+
out.string
|
140
|
+
end
|
141
|
+
expect(marc_xml).not_to include('leader')
|
142
|
+
end
|
143
|
+
|
144
|
+
it 'does not write a blank leader' do
|
145
|
+
record.leader = ''
|
146
|
+
marc_xml = StringIO.open do |out|
|
147
|
+
XMLWriter.open(out) { |w| w.write(record) }
|
148
|
+
out.string
|
149
|
+
end
|
150
|
+
expect(marc_xml).not_to include('leader')
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!--
|
3
|
+
Source: "Batch Uploader: Caveats, common errors and example metadata files", docs.tind.io
|
4
|
+
-->
|
5
|
+
<collection xmlns="http://www.loc.gov/MARC21/slim">
|
6
|
+
<record>
|
7
|
+
|
8
|
+
<!-- The Leader is encoded in the `000` control field.
|
9
|
+
If you want to edit the leader in software such as
|
10
|
+
MarcEdit, you will need to change these fields to a
|
11
|
+
leader tag. Then, before import into the repository
|
12
|
+
you will need to change the fields back to controlfields
|
13
|
+
with tag `000`.
|
14
|
+
-->
|
15
|
+
<controlfield tag="000">00287cam\a2200313\\\4500</controlfield>
|
16
|
+
|
17
|
+
<!-- All whitespace in control fields need to be replaced with
|
18
|
+
backspaces.
|
19
|
+
-->
|
20
|
+
<controlfield tag="008">190409s2015\\\\xx\\\\\\\\\\\\\\\\\\eng\\</controlfield>
|
21
|
+
|
22
|
+
<!-- Regular fields are encoded in datafield elements. -->
|
23
|
+
<datafield tag="100" ind1="0" ind2=" ">
|
24
|
+
<subfield code="a">Aristotle</subfield>
|
25
|
+
<subfield code="0">580897</subfield>
|
26
|
+
</datafield>
|
27
|
+
|
28
|
+
<datafield tag="245" ind1="0" ind2="0">
|
29
|
+
<subfield code="a">Metaphysics</subfield>
|
30
|
+
<subfield code="c">Aristotle</subfield>
|
31
|
+
</datafield>
|
32
|
+
|
33
|
+
<datafield tag="260" ind1=" " ind2=" ">
|
34
|
+
<subfield code="a">Narnia</subfield>
|
35
|
+
<subfield code="b">Fictive Books</subfield>
|
36
|
+
<subfield code="c">2015</subfield>
|
37
|
+
</datafield>
|
38
|
+
|
39
|
+
<!-- Make sure to include a collection when uploading new
|
40
|
+
records, so that the record will be searchable.
|
41
|
+
-->
|
42
|
+
<datafield tag="980" ind1=" " ind2=" ">
|
43
|
+
<subfield code="a">BIB</subfield>
|
44
|
+
</datafield>
|
45
|
+
</record>
|
46
|
+
</collection>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: berkeley_library-tind
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Moles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-02-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: berkeley_library-logging
|
@@ -198,6 +198,20 @@ dependencies:
|
|
198
198
|
- - "~>"
|
199
199
|
- !ruby/object:Gem::Version
|
200
200
|
version: '2.7'
|
201
|
+
- !ruby/object:Gem::Dependency
|
202
|
+
name: equivalent-xml
|
203
|
+
requirement: !ruby/object:Gem::Requirement
|
204
|
+
requirements:
|
205
|
+
- - "~>"
|
206
|
+
- !ruby/object:Gem::Version
|
207
|
+
version: '0.6'
|
208
|
+
type: :development
|
209
|
+
prerelease: false
|
210
|
+
version_requirements: !ruby/object:Gem::Requirement
|
211
|
+
requirements:
|
212
|
+
- - "~>"
|
213
|
+
- !ruby/object:Gem::Version
|
214
|
+
version: '0.6'
|
201
215
|
- !ruby/object:Gem::Dependency
|
202
216
|
name: rake
|
203
217
|
requirement: !ruby/object:Gem::Requirement
|
@@ -395,8 +409,11 @@ files:
|
|
395
409
|
- lib/berkeley_library/tind/export/table.rb
|
396
410
|
- lib/berkeley_library/tind/export/table_metrics.rb
|
397
411
|
- lib/berkeley_library/tind/marc.rb
|
412
|
+
- lib/berkeley_library/tind/marc/xml_builder.rb
|
398
413
|
- lib/berkeley_library/tind/marc/xml_reader.rb
|
414
|
+
- lib/berkeley_library/tind/marc/xml_writer.rb
|
399
415
|
- lib/berkeley_library/tind/module_info.rb
|
416
|
+
- lib/berkeley_library/util/files.rb
|
400
417
|
- lib/berkeley_library/util/ods/spreadsheet.rb
|
401
418
|
- lib/berkeley_library/util/ods/xml/content_doc.rb
|
402
419
|
- lib/berkeley_library/util/ods/xml/document_node.rb
|
@@ -460,6 +477,7 @@ files:
|
|
460
477
|
- spec/berkeley_library/tind/export/row_spec.rb
|
461
478
|
- spec/berkeley_library/tind/export/table_spec.rb
|
462
479
|
- spec/berkeley_library/tind/marc/xml_reader_spec.rb
|
480
|
+
- spec/berkeley_library/tind/marc/xml_writer_spec.rb
|
463
481
|
- spec/berkeley_library/util/ods/spreadsheet_spec.rb
|
464
482
|
- spec/berkeley_library/util/ods/xml/content_doc_spec.rb
|
465
483
|
- spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb
|
@@ -472,6 +490,7 @@ files:
|
|
472
490
|
- spec/data/collection-names.txt
|
473
491
|
- spec/data/collections.json
|
474
492
|
- spec/data/disjoint-records.xml
|
493
|
+
- spec/data/new-records.xml
|
475
494
|
- spec/data/record-184453.xml
|
476
495
|
- spec/data/record-184458.xml
|
477
496
|
- spec/data/record-187888.xml
|
@@ -531,6 +550,7 @@ test_files:
|
|
531
550
|
- spec/berkeley_library/tind/export/row_spec.rb
|
532
551
|
- spec/berkeley_library/tind/export/table_spec.rb
|
533
552
|
- spec/berkeley_library/tind/marc/xml_reader_spec.rb
|
553
|
+
- spec/berkeley_library/tind/marc/xml_writer_spec.rb
|
534
554
|
- spec/berkeley_library/util/ods/spreadsheet_spec.rb
|
535
555
|
- spec/berkeley_library/util/ods/xml/content_doc_spec.rb
|
536
556
|
- spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb
|
@@ -543,6 +563,7 @@ test_files:
|
|
543
563
|
- spec/data/collection-names.txt
|
544
564
|
- spec/data/collections.json
|
545
565
|
- spec/data/disjoint-records.xml
|
566
|
+
- spec/data/new-records.xml
|
546
567
|
- spec/data/record-184453.xml
|
547
568
|
- spec/data/record-184458.xml
|
548
569
|
- spec/data/record-187888.xml
|