marc 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +11 -1
- data/lib/marc/reader.rb +2 -2
- data/lib/marc/version.rb +1 -1
- data/lib/marc/xml_parsers.rb +9 -1
- data/test/tc_xml_error_handling.rb +22 -0
- data/test/three-records-second-bad.xml +160 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3c354c92026e4cf40c482ef72de120b7aaffac5cbc24e1c369a498bf4519a6ff
|
4
|
+
data.tar.gz: b6daa3e964746945d35cae76eb290cbc0b3dc2e5d03eb9d383e5e5aba473c676
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b49c9c88fb12854317d0f2fc14ce465f261a9ba2db3941f694e690f124063bd905522d9a21e862e777d84992e858839956bd289a275e731be60f638e2b327e89
|
7
|
+
data.tar.gz: 22be396099b50aa7dea829b10485e9f4dc0d7d9232470d1db05b56f95815f591f3eebdfdc44af90c6f8f212ddd982814acd5356de8cd9c88e194d54c93f9ba99
|
data/README.md
CHANGED
@@ -56,7 +56,17 @@ Consult the MARC::Reader class docs for a more complete discussion and range of
|
|
56
56
|
|
57
57
|
The MARC binary Writer (MARC::Writer) does not have any encoding-related features -- it's up to you the developer to make sure you create MARC::Records with consistent and expected char encodings, although MARC::Writer will write out a legal ISO 2709 either way, it just might have corrupted encodings.
|
58
58
|
|
59
|
-
|
59
|
+
When parsing MARCXML _with Nokogiri as your XML parser implementation_ up to
|
60
|
+
and including version `1.0.2` of this gem, if the XML was badly formed, parsing
|
61
|
+
would stop and no error would be reported to your code.
|
62
|
+
|
63
|
+
If you are using a version > `1.0.2` of `ruby-marc` with MRI + Nokogiri, XML
|
64
|
+
syntax errors will be thrown (and you may need to adjust your code to account
|
65
|
+
for this). *JRuby users*: If you are using a version later than `1.0.2` and
|
66
|
+
using Nokogiri as an XML parser with JRuby as your ruby implementation, XML
|
67
|
+
syntax errors will still be ignored unless you have Nokogiri version `1.10.2`
|
68
|
+
or later.
|
69
|
+
|
60
70
|
## Miscellany
|
61
71
|
|
62
72
|
Source code at: https://github.com/ruby-marc/ruby-marc/
|
data/lib/marc/reader.rb
CHANGED
@@ -138,10 +138,10 @@ module MARC
|
|
138
138
|
# Encoding.default_internal = "utf-8"
|
139
139
|
# MARC::Reader.new( File.new("marc_in_cp866.mrc", "r:cp866") )
|
140
140
|
#
|
141
|
-
# # However this
|
141
|
+
# # However this should be safe:
|
142
142
|
# MARC::Reader.new( "marc_in_cp866.mrc", :external_encoding => "cp866")
|
143
143
|
#
|
144
|
-
# # And this
|
144
|
+
# # And this should be safe, if you do want to transcode:
|
145
145
|
# MARC::Reader.new( "marc_in_cp866.mrc", :external_encoding => "cp866",
|
146
146
|
# :internal_encoding => "utf-8")
|
147
147
|
#
|
data/lib/marc/version.rb
CHANGED
data/lib/marc/xml_parsers.rb
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
module MARC
|
2
|
+
# Exception class to be thrown when an XML parser
|
3
|
+
# encounters an unrecoverable error.
|
4
|
+
class XMLParseError < StandardError; end
|
5
|
+
|
2
6
|
# The MagicReader will try to use the best available XML Parser at the
|
3
7
|
# time of initialization.
|
4
8
|
# The order is currently:
|
@@ -112,6 +116,10 @@ module MARC
|
|
112
116
|
@parser.parse(@handle)
|
113
117
|
end
|
114
118
|
end
|
119
|
+
|
120
|
+
def error(evt)
|
121
|
+
raise(XMLParseError, "XML parsing error: #{evt}")
|
122
|
+
end
|
115
123
|
|
116
124
|
|
117
125
|
def method_missing(methName, *args)
|
@@ -413,4 +421,4 @@ end
|
|
413
421
|
end
|
414
422
|
end # end of module
|
415
423
|
end # end of if jruby
|
416
|
-
end
|
424
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'marc'
|
3
|
+
|
4
|
+
class BadXMLHandlingTestCase < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_nokogiri_bad_xml
|
7
|
+
begin
|
8
|
+
require 'nokogiri'
|
9
|
+
rescue LoadError
|
10
|
+
omit("nokogiri not installed, cannot test")
|
11
|
+
end
|
12
|
+
omit("nokogiri (<1.10.2) under jruby doesn't support error handling: sparklemotion/nokogiri#1847") if RUBY_PLATFORM == 'java' && Gem::Version.new(Nokogiri::VERSION) < Gem::Version.new('1.10.2')
|
13
|
+
count = 0
|
14
|
+
reader = MARC::XMLReader.new('test/three-records-second-bad.xml', :parser => :nokogiri)
|
15
|
+
assert_raise MARC::XMLParseError do
|
16
|
+
reader.each do |rec|
|
17
|
+
count += 1 if rec['260']
|
18
|
+
end
|
19
|
+
end
|
20
|
+
assert_equal(1, count, 'should only be able to parse one record')
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<collection xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/MARC21/slim" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
|
3
|
+
<record>
|
4
|
+
<leader> njm a22 uu 4500</leader>
|
5
|
+
<controlfield tag="001">afc99990058366</controlfield>
|
6
|
+
<controlfield tag="003">DLC</controlfield>
|
7
|
+
<controlfield tag="005">20071104155141.9</controlfield>
|
8
|
+
<controlfield tag="007">sd ummunniauub</controlfield>
|
9
|
+
<controlfield tag="008">071103s1939 xxufmnne||||||||| u eng||</controlfield>
|
10
|
+
<datafield tag="010" ind1=" " ind2=" ">
|
11
|
+
<subfield code="a">afc99990058366</subfield>
|
12
|
+
</datafield>
|
13
|
+
<datafield tag="040" ind1=" " ind2=" ">
|
14
|
+
<subfield code="a">DLC</subfield>
|
15
|
+
<subfield code="c">DLC</subfield>
|
16
|
+
</datafield>
|
17
|
+
<datafield tag="245" ind1="0" ind2="4">
|
18
|
+
<subfield code="a">The Texas ranger</subfield>
|
19
|
+
<subfield code="h">[sound recording] /</subfield>
|
20
|
+
<subfield code="c">Sung by Beale D. Taylor.</subfield>
|
21
|
+
</datafield>
|
22
|
+
<datafield tag="260" ind1=" " ind2=" ">
|
23
|
+
<subfield code="a">Medina, Texas,</subfield>
|
24
|
+
<subfield code="c">1939.</subfield>
|
25
|
+
</datafield>
|
26
|
+
<datafield tag="300" ind1=" " ind2=" ">
|
27
|
+
<subfield code="a">1 sound disc :</subfield>
|
28
|
+
<subfield code="b">analog, 33 1/3 rpm, mono. ;</subfield>
|
29
|
+
<subfield code="c">12 in.</subfield>
|
30
|
+
</datafield>
|
31
|
+
<datafield tag="651" ind1=" " ind2="0">
|
32
|
+
<subfield code="a">Medina</subfield>
|
33
|
+
<subfield code="z">Texas</subfield>
|
34
|
+
<subfield code="z">United States of America.</subfield>
|
35
|
+
</datafield>
|
36
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
37
|
+
<subfield code="a">Lomax, John Avery, 1867-1948</subfield>
|
38
|
+
<subfield code="e">Recording engineer.</subfield>
|
39
|
+
</datafield>
|
40
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
41
|
+
<subfield code="a">Lomax, Ruby T. (Ruby Terrill)</subfield>
|
42
|
+
<subfield code="e">Recording engineer.</subfield>
|
43
|
+
</datafield>
|
44
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
45
|
+
<subfield code="a">Taylor, Beale D.</subfield>
|
46
|
+
<subfield code="e">Singer.</subfield>
|
47
|
+
</datafield>
|
48
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
49
|
+
<subfield code="a">American Folklife Center, Library of Congress</subfield>
|
50
|
+
</datafield>
|
51
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
52
|
+
<subfield code="a">DLC</subfield>
|
53
|
+
</datafield>
|
54
|
+
</record>
|
55
|
+
<record>
|
56
|
+
<leader> njm a22 uu 4500</leader>
|
57
|
+
<controlfield tag="001">afc99990058366</controlfield>
|
58
|
+
<controlfield tag="003">DLC</controlfield>
|
59
|
+
<controlfield tag="005">20071104155141.9</controlfield>
|
60
|
+
<controlfield tag="007">sd ummunniauub</controlfield>
|
61
|
+
<controlfield tag="008">071103s1939 xxufmnne||||||||| u eng||</controlfield>
|
62
|
+
<datafield tag="010" ind1=" " ind2=" ">
|
63
|
+
<subfield code="a">afc99990058366</subfield>
|
64
|
+
</datafield>
|
65
|
+
<datafield tag="040" ind1=" " ind2=" ">
|
66
|
+
<subfield code="a">DLC</subfield>
|
67
|
+
<subfield code="c">DLC</subfield>
|
68
|
+
</datafield>
|
69
|
+
<datafield tag="245" ind1="0" ind2="4">
|
70
|
+
<subfield code="a">The Texas ranger</subfield>
|
71
|
+
<!-- invalid utf-8 bytes in the non-printing subfield code -->
|
72
|
+
<subfield code="">[sound recording] /</subfield>
|
73
|
+
<subfield code="c">Sung by Beale D. Taylor.</subfield>
|
74
|
+
</datafield>
|
75
|
+
<datafield tag="260" ind1=" " ind2=" ">
|
76
|
+
<subfield code="a">Medina, Texas,</subfield>
|
77
|
+
<subfield code="c">1939.</subfield>
|
78
|
+
</datafield>
|
79
|
+
<datafield tag="300" ind1=" " ind2=" ">
|
80
|
+
<subfield code="a">1 sound disc :</subfield>
|
81
|
+
<subfield code="b">analog, 33 1/3 rpm, mono. ;</subfield>
|
82
|
+
<subfield code="c">12 in.</subfield>
|
83
|
+
</datafield>
|
84
|
+
<datafield tag="651" ind1=" " ind2="0">
|
85
|
+
<subfield code="a">Medina</subfield>
|
86
|
+
<subfield code="z">Texas</subfield>
|
87
|
+
<subfield code="z">United States of America.</subfield>
|
88
|
+
</datafield>
|
89
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
90
|
+
<subfield code="a">Lomax, John Avery, 1867-1948</subfield>
|
91
|
+
<subfield code="e">Recording engineer.</subfield>
|
92
|
+
</datafield>
|
93
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
94
|
+
<subfield code="a">Lomax, Ruby T. (Ruby Terrill)</subfield>
|
95
|
+
<subfield code="e">Recording engineer.</subfield>
|
96
|
+
</datafield>
|
97
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
98
|
+
<subfield code="a">Taylor, Beale D.</subfield>
|
99
|
+
<subfield code="e">Singer.</subfield>
|
100
|
+
</datafield>
|
101
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
102
|
+
<subfield code="a">American Folklife Center, Library of Congress</subfield>
|
103
|
+
</datafield>
|
104
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
105
|
+
<subfield code="a">DLC</subfield>
|
106
|
+
</datafield>
|
107
|
+
</record>
|
108
|
+
<record>
|
109
|
+
<leader> njm a22 uu 4500</leader>
|
110
|
+
<controlfield tag="001">afc99990058366</controlfield>
|
111
|
+
<controlfield tag="003">DLC</controlfield>
|
112
|
+
<controlfield tag="005">20071104155141.9</controlfield>
|
113
|
+
<controlfield tag="007">sd ummunniauub</controlfield>
|
114
|
+
<controlfield tag="008">071103s1939 xxufmnne||||||||| u eng||</controlfield>
|
115
|
+
<datafield tag="010" ind1=" " ind2=" ">
|
116
|
+
<subfield code="a">afc99990058366</subfield>
|
117
|
+
</datafield>
|
118
|
+
<datafield tag="040" ind1=" " ind2=" ">
|
119
|
+
<subfield code="a">DLC</subfield>
|
120
|
+
<subfield code="c">DLC</subfield>
|
121
|
+
</datafield>
|
122
|
+
<datafield tag="245" ind1="0" ind2="4">
|
123
|
+
<subfield code="a">The Texas ranger</subfield>
|
124
|
+
<subfield code="h">[sound recording] /</subfield>
|
125
|
+
<subfield code="c">Sung by Beale D. Taylor.</subfield>
|
126
|
+
</datafield>
|
127
|
+
<datafield tag="260" ind1=" " ind2=" ">
|
128
|
+
<subfield code="a">Medina, Texas,</subfield>
|
129
|
+
<subfield code="c">1939.</subfield>
|
130
|
+
</datafield>
|
131
|
+
<datafield tag="300" ind1=" " ind2=" ">
|
132
|
+
<subfield code="a">1 sound disc :</subfield>
|
133
|
+
<subfield code="b">analog, 33 1/3 rpm, mono. ;</subfield>
|
134
|
+
<subfield code="c">12 in.</subfield>
|
135
|
+
</datafield>
|
136
|
+
<datafield tag="651" ind1=" " ind2="0">
|
137
|
+
<subfield code="a">Medina</subfield>
|
138
|
+
<subfield code="z">Texas</subfield>
|
139
|
+
<subfield code="z">United States of America.</subfield>
|
140
|
+
</datafield>
|
141
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
142
|
+
<subfield code="a">Lomax, John Avery, 1867-1948</subfield>
|
143
|
+
<subfield code="e">Recording engineer.</subfield>
|
144
|
+
</datafield>
|
145
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
146
|
+
<subfield code="a">Lomax, Ruby T. (Ruby Terrill)</subfield>
|
147
|
+
<subfield code="e">Recording engineer.</subfield>
|
148
|
+
</datafield>
|
149
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
150
|
+
<subfield code="a">Taylor, Beale D.</subfield>
|
151
|
+
<subfield code="e">Singer.</subfield>
|
152
|
+
</datafield>
|
153
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
154
|
+
<subfield code="a">American Folklife Center, Library of Congress</subfield>
|
155
|
+
</datafield>
|
156
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
157
|
+
<subfield code="a">DLC</subfield>
|
158
|
+
</datafield>
|
159
|
+
</record>
|
160
|
+
</collection>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin Clarke
|
@@ -13,7 +13,7 @@ authors:
|
|
13
13
|
autorequire: marc
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
|
-
date:
|
16
|
+
date: 2019-03-27 00:00:00.000000000 Z
|
17
17
|
dependencies:
|
18
18
|
- !ruby/object:Gem::Dependency
|
19
19
|
name: scrub_rb
|
@@ -106,6 +106,8 @@ files:
|
|
106
106
|
- test/tc_subfield.rb
|
107
107
|
- test/tc_writer.rb
|
108
108
|
- test/tc_xml.rb
|
109
|
+
- test/tc_xml_error_handling.rb
|
110
|
+
- test/three-records-second-bad.xml
|
109
111
|
- test/ts_marc.rb
|
110
112
|
- test/utf8.marc
|
111
113
|
- test/utf8_multirecord.marc
|
@@ -130,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
130
132
|
version: '0'
|
131
133
|
requirements: []
|
132
134
|
rubyforge_project:
|
133
|
-
rubygems_version: 2.6
|
135
|
+
rubygems_version: 2.7.6
|
134
136
|
signing_key:
|
135
137
|
specification_version: 4
|
136
138
|
summary: A ruby library for working with Machine Readable Cataloging
|