berkeley_library-tind 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dbfd8d5b1299063dc4986ae86860bcd050d64f4d22d1592f6373f8cf9c661fd9
4
- data.tar.gz: fefc0570733f3116aae9fe5377773d7fb327ba4f71ccfa15399bf5dae8a67ac5
3
+ metadata.gz: 3e7d25497211920f8416b44cef0c1571f0655ff92c3e3ff69405ce9659c9df3e
4
+ data.tar.gz: 915b8fdc55e4f5d7a48bc7ef3befd2ec4d9e3f3081eedc41980b04dd073fff5c
5
5
  SHA512:
6
- metadata.gz: 43a3e9e88ddacc0c37619b49b223a6fe4967d2bd13fb7a493514a72e7903cd0f54bfb92c052e347e4aebbf5cd33106410f39dff81936f606d25df21e7dc1f000
7
- data.tar.gz: a0295b58cb6139205ccfc7e381f1c0111d8e125ab55b5d1109d467fee59b5a80436e06b2c6a77c1c11c6e38962ee19f91695640800658a00ba054608be423d56
6
+ metadata.gz: 187ede68d2a48c906ee37fa01555489e877d1ef4c941189fae5ba683d79a6ca7704e1903077bab6ef9e3e70f8cb1670dee5755aed85a241c538f0e380542879c
7
+ data.tar.gz: 361ec4a2b88f4f97e53ad1eb25554b97e7cef94222b828de2f27bfc9037df02e412884adf36b31bbf9aed4671a0f62c0ad0910a94225ae18c4efd1758ccf9b43
data/.idea/tind.iml CHANGED
@@ -9,9 +9,9 @@
9
9
  </content>
10
10
  <orderEntry type="jdk" jdkName="RVM: ruby-2.7.5" jdkType="RUBY_SDK" />
11
11
  <orderEntry type="sourceFolder" forTests="false" />
12
- <orderEntry type="library" scope="PROVIDED" name="actionpack (v6.1.4.4, RVM: ruby-2.7.5) [gem]" level="application" />
13
- <orderEntry type="library" scope="PROVIDED" name="actionview (v6.1.4.4, RVM: ruby-2.7.5) [gem]" level="application" />
14
- <orderEntry type="library" scope="PROVIDED" name="activesupport (v6.1.4.4, RVM: ruby-2.7.5) [gem]" level="application" />
12
+ <orderEntry type="library" scope="PROVIDED" name="actionpack (v6.1.4.6, RVM: ruby-2.7.5) [gem]" level="application" />
13
+ <orderEntry type="library" scope="PROVIDED" name="actionview (v6.1.4.6, RVM: ruby-2.7.5) [gem]" level="application" />
14
+ <orderEntry type="library" scope="PROVIDED" name="activesupport (v6.1.4.6, RVM: ruby-2.7.5) [gem]" level="application" />
15
15
  <orderEntry type="library" scope="PROVIDED" name="addressable (v2.8.0, RVM: ruby-2.7.5) [gem]" level="application" />
16
16
  <orderEntry type="library" scope="PROVIDED" name="amazing_print (v1.4.0, RVM: ruby-2.7.5) [gem]" level="application" />
17
17
  <orderEntry type="library" scope="PROVIDED" name="ast (v2.4.2, RVM: ruby-2.7.5) [gem]" level="application" />
@@ -32,14 +32,15 @@
32
32
  <orderEntry type="library" scope="PROVIDED" name="docile (v1.4.0, RVM: ruby-2.7.5) [gem]" level="application" />
33
33
  <orderEntry type="library" scope="PROVIDED" name="domain_name (v0.5.20190701, RVM: ruby-2.7.5) [gem]" level="application" />
34
34
  <orderEntry type="library" scope="PROVIDED" name="dotenv (v2.7.6, RVM: ruby-2.7.5) [gem]" level="application" />
35
+ <orderEntry type="library" scope="PROVIDED" name="equivalent-xml (v0.6.0, RVM: ruby-2.7.5) [gem]" level="application" />
35
36
  <orderEntry type="library" scope="PROVIDED" name="erubi (v1.10.0, RVM: ruby-2.7.5) [gem]" level="application" />
36
37
  <orderEntry type="library" scope="PROVIDED" name="hashdiff (v1.0.1, RVM: ruby-2.7.5) [gem]" level="application" />
37
38
  <orderEntry type="library" scope="PROVIDED" name="http-accept (v1.7.0, RVM: ruby-2.7.5) [gem]" level="application" />
38
39
  <orderEntry type="library" scope="PROVIDED" name="http-cookie (v1.0.4, RVM: ruby-2.7.5) [gem]" level="application" />
39
- <orderEntry type="library" scope="PROVIDED" name="i18n (v1.8.11, RVM: ruby-2.7.5) [gem]" level="application" />
40
+ <orderEntry type="library" scope="PROVIDED" name="i18n (v1.10.0, RVM: ruby-2.7.5) [gem]" level="application" />
40
41
  <orderEntry type="library" scope="PROVIDED" name="ice_nine (v0.11.2, RVM: ruby-2.7.5) [gem]" level="application" />
41
42
  <orderEntry type="library" scope="PROVIDED" name="lograge (v0.11.2, RVM: ruby-2.7.5) [gem]" level="application" />
42
- <orderEntry type="library" scope="PROVIDED" name="loofah (v2.13.0, RVM: ruby-2.7.5) [gem]" level="application" />
43
+ <orderEntry type="library" scope="PROVIDED" name="loofah (v2.14.0, RVM: ruby-2.7.5) [gem]" level="application" />
43
44
  <orderEntry type="library" scope="PROVIDED" name="marc (v1.1.1, RVM: ruby-2.7.5) [gem]" level="application" />
44
45
  <orderEntry type="library" scope="PROVIDED" name="method_source (v1.0.0, RVM: ruby-2.7.5) [gem]" level="application" />
45
46
  <orderEntry type="library" scope="PROVIDED" name="mime-types (v3.4.1, RVM: ruby-2.7.5) [gem]" level="application" />
@@ -58,25 +59,25 @@
58
59
  <orderEntry type="library" scope="PROVIDED" name="rack-test (v1.1.0, RVM: ruby-2.7.5) [gem]" level="application" />
59
60
  <orderEntry type="library" scope="PROVIDED" name="rails-dom-testing (v2.0.3, RVM: ruby-2.7.5) [gem]" level="application" />
60
61
  <orderEntry type="library" scope="PROVIDED" name="rails-html-sanitizer (v1.4.2, RVM: ruby-2.7.5) [gem]" level="application" />
61
- <orderEntry type="library" scope="PROVIDED" name="railties (v6.1.4.4, RVM: ruby-2.7.5) [gem]" level="application" />
62
+ <orderEntry type="library" scope="PROVIDED" name="railties (v6.1.4.6, RVM: ruby-2.7.5) [gem]" level="application" />
62
63
  <orderEntry type="library" scope="PROVIDED" name="rainbow (v3.1.1, RVM: ruby-2.7.5) [gem]" level="application" />
63
64
  <orderEntry type="library" scope="PROVIDED" name="rake (v13.0.6, RVM: ruby-2.7.5) [gem]" level="application" />
64
65
  <orderEntry type="library" scope="PROVIDED" name="rchardet (v1.8.0, RVM: ruby-2.7.5) [gem]" level="application" />
65
- <orderEntry type="library" scope="PROVIDED" name="regexp_parser (v2.2.0, RVM: ruby-2.7.5) [gem]" level="application" />
66
+ <orderEntry type="library" scope="PROVIDED" name="regexp_parser (v2.2.1, RVM: ruby-2.7.5) [gem]" level="application" />
66
67
  <orderEntry type="library" scope="PROVIDED" name="request_store (v1.5.1, RVM: ruby-2.7.5) [gem]" level="application" />
67
68
  <orderEntry type="library" scope="PROVIDED" name="rest-client (v2.1.0, RVM: ruby-2.7.5) [gem]" level="application" />
68
69
  <orderEntry type="library" scope="PROVIDED" name="rexml (v3.2.5, RVM: ruby-2.7.5) [gem]" level="application" />
69
70
  <orderEntry type="library" scope="PROVIDED" name="roo (v2.8.3, RVM: ruby-2.7.5) [gem]" level="application" />
70
- <orderEntry type="library" scope="PROVIDED" name="rspec (v3.10.0, RVM: ruby-2.7.5) [gem]" level="application" />
71
- <orderEntry type="library" scope="PROVIDED" name="rspec-core (v3.10.1, RVM: ruby-2.7.5) [gem]" level="application" />
72
- <orderEntry type="library" scope="PROVIDED" name="rspec-expectations (v3.10.2, RVM: ruby-2.7.5) [gem]" level="application" />
73
- <orderEntry type="library" scope="PROVIDED" name="rspec-mocks (v3.10.2, RVM: ruby-2.7.5) [gem]" level="application" />
74
- <orderEntry type="library" scope="PROVIDED" name="rspec-support (v3.10.3, RVM: ruby-2.7.5) [gem]" level="application" />
71
+ <orderEntry type="library" scope="PROVIDED" name="rspec (v3.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
72
+ <orderEntry type="library" scope="PROVIDED" name="rspec-core (v3.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
73
+ <orderEntry type="library" scope="PROVIDED" name="rspec-expectations (v3.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
74
+ <orderEntry type="library" scope="PROVIDED" name="rspec-mocks (v3.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
75
+ <orderEntry type="library" scope="PROVIDED" name="rspec-support (v3.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
75
76
  <orderEntry type="library" scope="PROVIDED" name="rubocop (v1.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
76
- <orderEntry type="library" scope="PROVIDED" name="rubocop-ast (v1.15.1, RVM: ruby-2.7.5) [gem]" level="application" />
77
+ <orderEntry type="library" scope="PROVIDED" name="rubocop-ast (v1.15.2, RVM: ruby-2.7.5) [gem]" level="application" />
77
78
  <orderEntry type="library" scope="PROVIDED" name="rubocop-rake (v0.6.0, RVM: ruby-2.7.5) [gem]" level="application" />
78
79
  <orderEntry type="library" scope="PROVIDED" name="rubocop-rspec (v2.4.0, RVM: ruby-2.7.5) [gem]" level="application" />
79
- <orderEntry type="library" scope="PROVIDED" name="ruby-marc-spec (v0.1.1, RVM: ruby-2.7.5) [gem]" level="application" />
80
+ <orderEntry type="library" scope="PROVIDED" name="ruby-marc-spec (v0.1.3, RVM: ruby-2.7.5) [gem]" level="application" />
80
81
  <orderEntry type="library" scope="PROVIDED" name="ruby-prof (v0.17.0, RVM: ruby-2.7.5) [gem]" level="application" />
81
82
  <orderEntry type="library" scope="PROVIDED" name="ruby-progressbar (v1.11.0, RVM: ruby-2.7.5) [gem]" level="application" />
82
83
  <orderEntry type="library" scope="PROVIDED" name="rubyzip (v2.3.2, RVM: ruby-2.7.5) [gem]" level="application" />
@@ -84,7 +85,7 @@
84
85
  <orderEntry type="library" scope="PROVIDED" name="simplecov (v0.21.2, RVM: ruby-2.7.5) [gem]" level="application" />
85
86
  <orderEntry type="library" scope="PROVIDED" name="simplecov-html (v0.12.3, RVM: ruby-2.7.5) [gem]" level="application" />
86
87
  <orderEntry type="library" scope="PROVIDED" name="simplecov-rcov (v0.2.3, RVM: ruby-2.7.5) [gem]" level="application" />
87
- <orderEntry type="library" scope="PROVIDED" name="simplecov_json_formatter (v0.1.3, RVM: ruby-2.7.5) [gem]" level="application" />
88
+ <orderEntry type="library" scope="PROVIDED" name="simplecov_json_formatter (v0.1.4, RVM: ruby-2.7.5) [gem]" level="application" />
88
89
  <orderEntry type="library" scope="PROVIDED" name="thor (v1.2.1, RVM: ruby-2.7.5) [gem]" level="application" />
89
90
  <orderEntry type="library" scope="PROVIDED" name="typesafe_enum (v0.3.0, RVM: ruby-2.7.5) [gem]" level="application" />
90
91
  <orderEntry type="library" scope="PROVIDED" name="tzinfo (v2.0.4, RVM: ruby-2.7.5) [gem]" level="application" />
@@ -92,7 +93,7 @@
92
93
  <orderEntry type="library" scope="PROVIDED" name="unf_ext (v0.0.8, RVM: ruby-2.7.5) [gem]" level="application" />
93
94
  <orderEntry type="library" scope="PROVIDED" name="unicode-display_width (v2.1.0, RVM: ruby-2.7.5) [gem]" level="application" />
94
95
  <orderEntry type="library" scope="PROVIDED" name="webmock (v3.14.0, RVM: ruby-2.7.5) [gem]" level="application" />
95
- <orderEntry type="library" scope="PROVIDED" name="zeitwerk (v2.5.3, RVM: ruby-2.7.5) [gem]" level="application" />
96
+ <orderEntry type="library" scope="PROVIDED" name="zeitwerk (v2.5.4, RVM: ruby-2.7.5) [gem]" level="application" />
96
97
  </component>
97
98
  <component name="RModuleSettingsStorage">
98
99
  <LOAD_PATH number="2" string0="$MODULE_DIR$/lib" string1="$MODULE_DIR$/spec" />
data/CHANGES.md CHANGED
@@ -1,3 +1,16 @@
1
+ # 0.5.0 (2022-01-17)
2
+
3
+ - Adds a class `BerkeleyLibrary::TIND::MARC::XMLWriter` to write MARCXML in the format expected by the TIND batch uploader:
4
+
5
+ - MARC leader is written to control field 000 as required by TIND
6
+ - control fields (including the leader) use `\` (0x5c), not space (0x32), for unspecified positional
7
+ values
8
+
9
+ In addition, a `nil` or empty MARC leader is not written at all.
10
+ - Modifies `BerkeleyLibrary::TIND::MARC::XMLReader` to take into account the same peculiarities:
11
+ control field 000 is read into the leader of the MARC record, and slashes in control field values
12
+ (including the leader) are replaced with spaces.
13
+
1
14
  # 0.4.3 (2022-01-26)
2
15
 
3
16
  - Pins `berkeley_library-marc` to version 0.3.x (0.3.1 or higher).
data/README.md CHANGED
@@ -1,10 +1,24 @@
1
1
  # BerkeleyLibrary::TIND
2
2
 
3
3
  [![Build Status](https://github.com/BerkeleyLibrary/tind/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/BerkeleyLibrary/tind/actions/workflows/build.yml)
4
- [![Gem Version](https://img.shields.io/gem/v/berkeley_library-tind.svg)](https://github.com/BerkeleyLibrary/tind/releases)
4
+ [![Gem Version](https://img.shields.io/gem/v/berkeley_library-tind.svg)](https://rubygems.org/gems/berkeley_library-tind)
5
5
 
6
6
  Utility gem for working with the TIND DA digital archive.
7
7
 
8
+ ## Installation
9
+
10
+ In your Gemfile:
11
+
12
+ ```ruby
13
+ gem 'berkeley_library-tind'
14
+ ```
15
+
16
+ In your code:
17
+
18
+ ```ruby
19
+ require 'berkeley_library/tind'
20
+ ```
21
+
8
22
  ## Configuration
9
23
 
10
24
  To access the TIND API, you will need to set:
@@ -37,6 +37,7 @@ Gem::Specification.new do |spec|
37
37
  spec.add_development_dependency 'ci_reporter_rspec', '~> 1.0'
38
38
  spec.add_development_dependency 'colorize', '~> 0.8'
39
39
  spec.add_development_dependency 'dotenv', '~> 2.7'
40
+ spec.add_development_dependency 'equivalent-xml', '~> 0.6'
40
41
  spec.add_development_dependency 'rake', '~> 13.0'
41
42
  spec.add_development_dependency 'roo', '~> 2.8'
42
43
  spec.add_development_dependency 'rspec', '~> 3.10'
@@ -0,0 +1,62 @@
1
+ require 'nokogiri'
2
+
3
+ module BerkeleyLibrary
4
+ module TIND
5
+ module MARC
6
+ class XMLBuilder
7
+ attr_reader :marc_record
8
+
9
+ def initialize(marc_record)
10
+ @marc_record = marc_record
11
+ end
12
+
13
+ def build
14
+ builder.doc.root.tap(&:unlink)
15
+ end
16
+
17
+ private
18
+
19
+ def builder
20
+ Nokogiri::XML::Builder.new do |xml|
21
+ xml.record do
22
+ add_leader(xml)
23
+ marc_record.each_control_field { |cf| add_control_field(xml, cf) }
24
+ marc_record.each_data_field { |df| add_data_field(xml, df) }
25
+ end
26
+ end
27
+ end
28
+
29
+ def add_leader(xml)
30
+ leader = marc_record.leader
31
+ return if leader.nil? || leader == ''
32
+
33
+ # TIND uses <controlfield tag="000"/> instead of <leader/>
34
+ leader_as_cf = ::MARC::ControlField.new('000', clean_leader(leader))
35
+ add_control_field(xml, leader_as_cf)
36
+ end
37
+
38
+ def add_data_field(xml, df)
39
+ xml.datafield(tag: df.tag, ind1: df.indicator1, ind2: df.indicator2) do
40
+ df.subfields.each do |sf|
41
+ xml.subfield(sf.value, code: sf.code)
42
+ end
43
+ end
44
+ end
45
+
46
+ def add_control_field(xml, cf)
47
+ # TIND uses \ (0x5c), not space (0x32), for unspecified values in positional fields
48
+ value = cf.value&.gsub(' ', '\\')
49
+ xml.controlfield(value, tag: cf.tag)
50
+ end
51
+
52
+ def clean_leader(leader)
53
+ leader.gsub(/[^\w|^\s]/, 'Z').tap do |ldr|
54
+ ldr[20..23] = '4500' unless ldr[20..23] == '4500'
55
+ ldr[6..6] = 'Z' if ldr[6..6] == ' '
56
+ end
57
+ end
58
+
59
+ end
60
+ end
61
+ end
62
+ end
@@ -1,6 +1,7 @@
1
1
  require 'nokogiri'
2
2
  require 'marc/xml_parsers'
3
3
  require 'marc_extensions'
4
+ require 'berkeley_library/util/files'
4
5
 
5
6
  module BerkeleyLibrary
6
7
  module TIND
@@ -9,7 +10,7 @@ module BerkeleyLibrary
9
10
  class XMLReader
10
11
  include Enumerable
11
12
  include ::MARC::NokogiriReader
12
- prepend MARCExtensions::YieldFrozenRecord
13
+ include BerkeleyLibrary::Util::Files
13
14
 
14
15
  # ############################################################
15
16
  # Constant
@@ -70,6 +71,12 @@ module BerkeleyLibrary
70
71
  # MARC::GenericPullParser overrides
71
72
 
72
73
  def yield_record
74
+ @record[:record].tap do |record|
75
+ clean_cf_values(record)
76
+ move_cf000_to_leader(record)
77
+ record.freeze if @freeze
78
+ end
79
+
73
80
  super
74
81
  ensure
75
82
  increment_records_yielded!
@@ -120,26 +127,25 @@ module BerkeleyLibrary
120
127
 
121
128
  private
122
129
 
123
- def ensure_io(file)
124
- return file if io_like?(file)
125
- return File.new(file) if file_exists?(file)
126
- return StringIO.new(file) if file =~ /^\s*</x
130
+ # TIND uses <controlfield tag="000"/> instead of <leader/>
131
+ def move_cf000_to_leader(record)
132
+ return unless (cf_000 = record['000'])
127
133
 
128
- raise ArgumentError, "Don't know how to read XML from #{file.inspect}: not an IO, file path, or XML text"
134
+ record.leader = cf_000.value
135
+ record.fields.delete(cf_000)
129
136
  end
130
137
 
131
- # Returns true if `obj` is close enough to an IO object for Nokogiri
132
- # to parse as one.
133
- #
134
- # @param obj [Object] the object that might be an IO
135
- # @see https://github.com/sparklemotion/nokogiri/blob/v1.11.1/lib/nokogiri/xml/sax/parser.rb#L81 Nokogiri::XML::SAX::Parser#parse
136
- def io_like?(obj)
137
- obj.respond_to?(:read) && obj.respond_to?(:close)
138
+ # TIND uses \ (0x5c), not space (0x32), for unspecified values in positional fields
139
+ def clean_cf_values(record)
140
+ record.each_control_field { |cf| cf.value = cf.value&.gsub('\\', ' ') }
138
141
  end
139
142
 
140
- def file_exists?(path)
141
- (path.respond_to?(:exist?) && path.exist?) ||
142
- (path.respond_to?(:to_str) && File.exist?(path))
143
+ def ensure_io(file)
144
+ return file if reader_like?(file)
145
+ return File.new(file) if file_exists?(file)
146
+ return StringIO.new(file) if file =~ /^\s*</x
147
+
148
+ raise ArgumentError, "Don't know how to read XML from #{file.inspect}: not an IO, file path, or XML text"
143
149
  end
144
150
 
145
151
  def increment_records_yielded!
@@ -0,0 +1,152 @@
1
+ require 'nokogiri'
2
+ require 'marc_extensions'
3
+ require 'berkeley_library/tind/marc/xml_builder'
4
+
5
+ module BerkeleyLibrary
6
+ module TIND
7
+ module MARC
8
+ class XMLWriter
9
+ include BerkeleyLibrary::Util::Files
10
+ include BerkeleyLibrary::Logging
11
+
12
+ # ------------------------------------------------------------
13
+ # Constants
14
+
15
+ UTF_8 = Encoding::UTF_8.name
16
+
17
+ EMPTY_COLLECTION_DOC = Nokogiri::XML::Builder.new(encoding: UTF_8) do |xml|
18
+ xml.collection(xmlns: ::MARC::MARC_NS)
19
+ end.doc.freeze
20
+
21
+ COLLECTION_CLOSING_TAG = '</collection>'.freeze
22
+
23
+ DEFAULT_NOKOGIRI_OPTS = { encoding: UTF_8 }.freeze
24
+
25
+ # ------------------------------------------------------------
26
+ # Fields
27
+
28
+ attr_reader :out
29
+ attr_reader :nokogiri_options
30
+
31
+ # ------------------------------------------------------------
32
+ # Initializer
33
+
34
+ # Initializes a new {XMLWriter}.
35
+ #
36
+ # ```ruby
37
+ # File.open('marc.xml', 'wb') do |f|
38
+ # w = XMLWriter.new(f)
39
+ # marc_records.each { |r| w.write(r) }
40
+ # w.close
41
+ # end
42
+ # ```
43
+ #
44
+ # @param out [IO, String] an IO, or the name of a file
45
+ # @param nokogiri_options [Hash] Options passed to
46
+ # {https://nokogiri.org/rdoc/Nokogiri/XML/Node.html#method-i-write_to Nokogiri::XML::Node#write_to}
47
+ # Note that the `encoding` option is ignored, except insofar as
48
+ # passing an encoding other than UTF-8 will raise an `ArgumentError`.
49
+ # @raise ArgumentError if `out` is not an IO or a string, or is a string referencing
50
+ # a file path that cannot be opened for writing; or if an encoding other than UTF-8
51
+ # is specified in `nokogiri-options`
52
+ # @see #open
53
+ def initialize(out, **nokogiri_options)
54
+ @nokogiri_options = valid_nokogiri_options(nokogiri_options)
55
+ @out = ensure_io(out)
56
+ end
57
+
58
+ # ------------------------------------------------------------
59
+ # Class methods
60
+
61
+ class << self
62
+
63
+ # Opens a new {XMLWriter} with the specified output destination and
64
+ # Nokogiri options, writes the XML prolog and opening `<collection>`
65
+ # tag, yields the writer to write one or more MARC records, and closes
66
+ # the writer.
67
+ #
68
+ # ```ruby
69
+ # XMLWriter.open('marc.xml') do |w|
70
+ # marc_records.each { |r| w.write(r) }
71
+ # end
72
+ # ```
73
+ #
74
+ # Note that unlike initializing a writer with {#new} and closing it
75
+ # immediately, this will write an XML document with an empty
76
+ # `<collection></collection>` tag even if no records are written.
77
+ #
78
+ # @yieldparam writer [XMLWriter] the writer
79
+ # @see #new
80
+ # @see #close
81
+ def open(out, **nokogiri_options)
82
+ writer = new(out, **nokogiri_options)
83
+ writer.send(:ensure_open!)
84
+ yield writer if block_given?
85
+ writer.close
86
+ end
87
+ end
88
+
89
+ # ------------------------------------------------------------
90
+ # Instance methods
91
+
92
+ # Writes the specified record to the underlying stream, writing the
93
+ # XML prolog and opening `<collection>` tag if they have not yet
94
+ # been written.
95
+ #
96
+ # @param record [::MARC::Record] the MARC record to write.
97
+ # @raise IOError if the underlying stream has already been closed.
98
+ def write(record)
99
+ ensure_open!
100
+ record_element = XMLBuilder.new(record).build
101
+ record_element.write_to(out, nokogiri_options)
102
+ out.write("\n")
103
+ end
104
+
105
+ # Closes the underlying stream. If the XML prolog and opening `<collection>`
106
+ # tag have already been written, the closing `<collection/>` tag is written
107
+ # first.
108
+ def close
109
+ out.write(COLLECTION_CLOSING_TAG) if @open
110
+ out.close
111
+ end
112
+
113
+ # ------------------------------------------------------------
114
+ # Private
115
+
116
+ private
117
+
118
+ def ensure_open!
119
+ return if @open
120
+
121
+ out.write(prolog_and_opening_tag)
122
+ @open = true
123
+ end
124
+
125
+ def prolog_and_opening_tag
126
+ StringIO.open do |tmp|
127
+ EMPTY_COLLECTION_DOC.write_to(tmp, nokogiri_options)
128
+ result = tmp.string
129
+ result.sub!(%r{/>\s*$}, ">\n")
130
+ result
131
+ end
132
+ end
133
+
134
+ def ensure_io(file)
135
+ return file if writer_like?(file)
136
+ return File.open(file, 'wb') if parent_exists?(file)
137
+
138
+ raise ArgumentError, "Don't know how to write XML to #{file.inspect}: not an IO or file path"
139
+ end
140
+
141
+ def valid_nokogiri_options(opts)
142
+ if (encoding = opts.delete(:encoding)) && encoding != UTF_8
143
+ raise ArgumentError, "#{self.class.name} only supports #{UTF_8}; unable to use specified encoding #{encoding}"
144
+ end
145
+
146
+ DEFAULT_NOKOGIRI_OPTS.merge(opts)
147
+ end
148
+
149
+ end
150
+ end
151
+ end
152
+ end
@@ -7,7 +7,7 @@ module BerkeleyLibrary
7
7
  SUMMARY = 'TIND DA utilities for the UC Berkeley Library'.freeze
8
8
  DESCRIPTION = 'UC Berkeley Library utility gem for working with the TIND DA digital archive.'.freeze
9
9
  LICENSE = 'MIT'.freeze
10
- VERSION = '0.4.3'.freeze
10
+ VERSION = '0.5.0'.freeze
11
11
  HOMEPAGE = 'https://github.com/BerkeleyLibrary/tind'.freeze
12
12
  end
13
13
  end
@@ -0,0 +1,39 @@
1
+ module BerkeleyLibrary
2
+ module Util
3
+ # TODO: Move this to `berkeley_library-util`
4
+ module Files
5
+ class << self
6
+ include Files
7
+ end
8
+
9
+ def file_exists?(path)
10
+ (path.respond_to?(:exist?) && path.exist?) ||
11
+ (path.respond_to?(:to_str) && File.exist?(path))
12
+ end
13
+
14
+ def parent_exists?(path)
15
+ path.respond_to?(:parent) && path.parent.exist? ||
16
+ path.respond_to?(:to_str) && Pathname.new(path).parent.exist?
17
+ end
18
+
19
+ # Returns true if `obj` is close enough to an IO object for Nokogiri
20
+ # to parse as one.
21
+ #
22
+ # @param obj [Object] the object that might be an IO
23
+ # @see https://github.com/sparklemotion/nokogiri/blob/v1.11.1/lib/nokogiri/xml/sax/parser.rb#L81 Nokogiri::XML::SAX::Parser#parse
24
+ def reader_like?(obj)
25
+ obj.respond_to?(:read) && obj.respond_to?(:close)
26
+ end
27
+
28
+ # Returns true if `obj` is close enough to an IO object for Nokogiri
29
+ # to write to.
30
+ #
31
+ # @param obj [Object] the object that might be an IO
32
+ def writer_like?(obj)
33
+ # TODO: is it possible/desirable to loosen this? how strict is libxml2?
34
+ obj.is_a?(IO) || obj.is_a?(StringIO)
35
+ end
36
+
37
+ end
38
+ end
39
+ end
@@ -106,6 +106,28 @@ module BerkeleyLibrary
106
106
  end
107
107
  end
108
108
 
109
+ describe 'TIND peculiarities' do
110
+ attr_reader :record
111
+
112
+ before(:each) do
113
+ reader = XMLReader.new('spec/data/new-records.xml')
114
+ records = reader.to_a
115
+ expect(records.size).to eq(1) # just to be sure
116
+ @record = records.first
117
+ end
118
+
119
+ it 'converts backslashes in control fields to spaces' do
120
+ cf_008 = record['008']
121
+ expect(cf_008).to be_a(::MARC::ControlField)
122
+ expect(cf_008.value).to eq('190409s2015 xx eng ')
123
+ end
124
+
125
+ it 'parses CF 000 as the leader' do
126
+ expect(record.leader).to eq('00287cam a2200313 4500')
127
+ expect(record['000']).to be_nil
128
+ end
129
+ end
130
+
109
131
  end
110
132
  end
111
133
  end
@@ -0,0 +1,156 @@
1
+ require 'spec_helper'
2
+ require 'equivalent-xml'
3
+
4
+ module BerkeleyLibrary
5
+ module TIND
6
+ module MARC
7
+ describe XMLWriter do
8
+ let(:input_path) { 'spec/data/new-records.xml' }
9
+ attr_reader :record
10
+
11
+ before(:each) do
12
+ reader = XMLReader.new(input_path)
13
+ @record = reader.first
14
+ end
15
+
16
+ describe :open do
17
+
18
+ it 'writes a MARC record to a file as XML' do
19
+ Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
20
+ output_path = File.join(dir, 'marc.xml')
21
+ XMLWriter.open(output_path) { |w| w.write(record) }
22
+
23
+ expected = File.open(input_path) { |f| Nokogiri::XML(f) }
24
+ actual = File.open(output_path) { |f| Nokogiri::XML(f) }
25
+
26
+ aggregate_failures do
27
+ EquivalentXml.equivalent?(expected, actual) do |n1, n2, result|
28
+ expect(n2.to_s).to eq(n1.to_s) unless result
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ it 'writes a MARC record to a StringIO' do
35
+ out = StringIO.new
36
+ XMLWriter.open(out) { |w| w.write(record) }
37
+ expected = File.open(input_path) { |f| Nokogiri::XML(f) }
38
+ actual = Nokogiri::XML(out.string)
39
+ aggregate_failures do
40
+ EquivalentXml.equivalent?(expected, actual) do |n1, n2, result|
41
+ expect(n2.to_s).to eq(n1.to_s) unless result
42
+ end
43
+ end
44
+ end
45
+
46
+ it 'accepts Nokogiri options' do
47
+ Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
48
+ expected_path = File.join(dir, 'expected.xml')
49
+ XMLWriter.open(expected_path) { |w| w.write(record) }
50
+
51
+ actual_path = File.join(dir, 'actual.xml')
52
+ XMLWriter.open(actual_path, indent_text: "\t") { |w| w.write(record) }
53
+
54
+ expected = File.read(expected_path).gsub(%r{ (?= *<)(?!/)}, "\t")
55
+ actual = File.read(actual_path)
56
+ expect(actual).to eq(expected)
57
+ end
58
+ end
59
+
60
+ it 'accepts an explicit UTF-8 argument' do
61
+ Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
62
+ output_path = File.join(dir, 'marc.xml')
63
+ XMLWriter.open(output_path, encoding: 'UTF-8') { |w| w.write(record) }
64
+
65
+ expected = File.open(input_path) { |f| Nokogiri::XML(f) }
66
+ actual = File.open(output_path) { |f| Nokogiri::XML(f) }
67
+
68
+ aggregate_failures do
69
+ EquivalentXml.equivalent?(expected, actual) do |n1, n2, result|
70
+ expect(n2.to_s).to eq(n1.to_s) unless result
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ it 'only writes UTF-8' do
77
+ Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
78
+ output_path = File.join(dir, 'marc.xml')
79
+ expect { XMLWriter.open(output_path, encoding: 'UTF-16') }.to raise_error(ArgumentError)
80
+ expect(File.exist?(output_path)).to eq(false)
81
+ end
82
+ end
83
+
84
+ it 'rejects an invalid file path' do
85
+ bad_directory = Dir.mktmpdir(File.basename(__FILE__, '.rb')) { |dir| dir }
86
+ expect(File.directory?(bad_directory)).to eq(false)
87
+ output_path = File.join(bad_directory, 'marc.xml')
88
+ expect { XMLWriter.open(output_path) }.to raise_error(ArgumentError)
89
+ end
90
+
91
+ it 'rejects a non-IO, non-String argument' do
92
+ invalid_target = Object.new
93
+ expect { XMLWriter.open(invalid_target) }.to raise_error(ArgumentError)
94
+ end
95
+ end
96
+
97
+ describe :close do
98
+ it 'closes without writing the closing tag if nothing has been written' do
99
+ Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
100
+ output_path = File.join(dir, 'marc.xml')
101
+ w = XMLWriter.new(output_path)
102
+ w.close
103
+
104
+ stat = File.stat(output_path)
105
+ expect(stat.size).to eq(0)
106
+ end
107
+ end
108
+
109
+ it 'writes the closing tag if the opening tag has been written' do
110
+ Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
111
+ output_path = File.join(dir, 'marc.xml')
112
+ XMLWriter.open(output_path)
113
+ expect(File.exist?(output_path)).to eq(true)
114
+
115
+ doc = File.open(output_path) { |f| Nokogiri::XML(f) }
116
+ expect(doc.root.name).to eq('collection')
117
+ end
118
+ end
119
+ end
120
+
121
+ describe :write do
122
+ it 'raises an IOError if the writer has already been closed' do
123
+ Dir.mktmpdir(File.basename(__FILE__, '.rb')) do |dir|
124
+ output_path = File.join(dir, 'marc.xml')
125
+ w = XMLWriter.new(output_path)
126
+ w.close
127
+
128
+ expect { w.write(record) }.to raise_error(IOError)
129
+
130
+ stat = File.stat(output_path)
131
+ expect(stat.size).to eq(0)
132
+ end
133
+ end
134
+
135
+ it 'does not write a nil leader' do
136
+ record.leader = nil
137
+ marc_xml = StringIO.open do |out|
138
+ XMLWriter.open(out) { |w| w.write(record) }
139
+ out.string
140
+ end
141
+ expect(marc_xml).not_to include('leader')
142
+ end
143
+
144
+ it 'does not write a blank leader' do
145
+ record.leader = ''
146
+ marc_xml = StringIO.open do |out|
147
+ XMLWriter.open(out) { |w| w.write(record) }
148
+ out.string
149
+ end
150
+ expect(marc_xml).not_to include('leader')
151
+ end
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,46 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!--
3
+ Source: "Batch Uploader: Caveats, common errors and example metadata files", docs.tind.io
4
+ -->
5
+ <collection xmlns="http://www.loc.gov/MARC21/slim">
6
+ <record>
7
+
8
+ <!-- The Leader is encoded in the `000` control field.
9
+ If you want to edit the leader in software such as
10
+ MarcEdit, you will need to change these fields to a
11
+ leader tag. Then, before import into the repository
12
+ you will need to change the fields back to controlfields
13
+ with tag `000`.
14
+ -->
15
+ <controlfield tag="000">00287cam\a2200313\\\4500</controlfield>
16
+
17
+ <!-- All whitespace in control fields need to be replaced with
18
+ backspaces.
19
+ -->
20
+ <controlfield tag="008">190409s2015\\\\xx\\\\\\\\\\\\\\\\\\eng\\</controlfield>
21
+
22
+ <!-- Regular fields are encoded in datafield elements. -->
23
+ <datafield tag="100" ind1="0" ind2=" ">
24
+ <subfield code="a">Aristotle</subfield>
25
+ <subfield code="0">580897</subfield>
26
+ </datafield>
27
+
28
+ <datafield tag="245" ind1="0" ind2="0">
29
+ <subfield code="a">Metaphysics</subfield>
30
+ <subfield code="c">Aristotle</subfield>
31
+ </datafield>
32
+
33
+ <datafield tag="260" ind1=" " ind2=" ">
34
+ <subfield code="a">Narnia</subfield>
35
+ <subfield code="b">Fictive Books</subfield>
36
+ <subfield code="c">2015</subfield>
37
+ </datafield>
38
+
39
+ <!-- Make sure to include a collection when uploading new
40
+ records, so that the record will be searchable.
41
+ -->
42
+ <datafield tag="980" ind1=" " ind2=" ">
43
+ <subfield code="a">BIB</subfield>
44
+ </datafield>
45
+ </record>
46
+ </collection>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: berkeley_library-tind
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Moles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-27 00:00:00.000000000 Z
11
+ date: 2022-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: berkeley_library-logging
@@ -198,6 +198,20 @@ dependencies:
198
198
  - - "~>"
199
199
  - !ruby/object:Gem::Version
200
200
  version: '2.7'
201
+ - !ruby/object:Gem::Dependency
202
+ name: equivalent-xml
203
+ requirement: !ruby/object:Gem::Requirement
204
+ requirements:
205
+ - - "~>"
206
+ - !ruby/object:Gem::Version
207
+ version: '0.6'
208
+ type: :development
209
+ prerelease: false
210
+ version_requirements: !ruby/object:Gem::Requirement
211
+ requirements:
212
+ - - "~>"
213
+ - !ruby/object:Gem::Version
214
+ version: '0.6'
201
215
  - !ruby/object:Gem::Dependency
202
216
  name: rake
203
217
  requirement: !ruby/object:Gem::Requirement
@@ -395,8 +409,11 @@ files:
395
409
  - lib/berkeley_library/tind/export/table.rb
396
410
  - lib/berkeley_library/tind/export/table_metrics.rb
397
411
  - lib/berkeley_library/tind/marc.rb
412
+ - lib/berkeley_library/tind/marc/xml_builder.rb
398
413
  - lib/berkeley_library/tind/marc/xml_reader.rb
414
+ - lib/berkeley_library/tind/marc/xml_writer.rb
399
415
  - lib/berkeley_library/tind/module_info.rb
416
+ - lib/berkeley_library/util/files.rb
400
417
  - lib/berkeley_library/util/ods/spreadsheet.rb
401
418
  - lib/berkeley_library/util/ods/xml/content_doc.rb
402
419
  - lib/berkeley_library/util/ods/xml/document_node.rb
@@ -460,6 +477,7 @@ files:
460
477
  - spec/berkeley_library/tind/export/row_spec.rb
461
478
  - spec/berkeley_library/tind/export/table_spec.rb
462
479
  - spec/berkeley_library/tind/marc/xml_reader_spec.rb
480
+ - spec/berkeley_library/tind/marc/xml_writer_spec.rb
463
481
  - spec/berkeley_library/util/ods/spreadsheet_spec.rb
464
482
  - spec/berkeley_library/util/ods/xml/content_doc_spec.rb
465
483
  - spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb
@@ -472,6 +490,7 @@ files:
472
490
  - spec/data/collection-names.txt
473
491
  - spec/data/collections.json
474
492
  - spec/data/disjoint-records.xml
493
+ - spec/data/new-records.xml
475
494
  - spec/data/record-184453.xml
476
495
  - spec/data/record-184458.xml
477
496
  - spec/data/record-187888.xml
@@ -531,6 +550,7 @@ test_files:
531
550
  - spec/berkeley_library/tind/export/row_spec.rb
532
551
  - spec/berkeley_library/tind/export/table_spec.rb
533
552
  - spec/berkeley_library/tind/marc/xml_reader_spec.rb
553
+ - spec/berkeley_library/tind/marc/xml_writer_spec.rb
534
554
  - spec/berkeley_library/util/ods/spreadsheet_spec.rb
535
555
  - spec/berkeley_library/util/ods/xml/content_doc_spec.rb
536
556
  - spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb
@@ -543,6 +563,7 @@ test_files:
543
563
  - spec/data/collection-names.txt
544
564
  - spec/data/collections.json
545
565
  - spec/data/disjoint-records.xml
566
+ - spec/data/new-records.xml
546
567
  - spec/data/record-184453.xml
547
568
  - spec/data/record-184458.xml
548
569
  - spec/data/record-187888.xml