marc 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +11 -1
- data/lib/marc/reader.rb +2 -2
- data/lib/marc/version.rb +1 -1
- data/lib/marc/xml_parsers.rb +9 -1
- data/test/tc_xml_error_handling.rb +22 -0
- data/test/three-records-second-bad.xml +160 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3c354c92026e4cf40c482ef72de120b7aaffac5cbc24e1c369a498bf4519a6ff
|
4
|
+
data.tar.gz: b6daa3e964746945d35cae76eb290cbc0b3dc2e5d03eb9d383e5e5aba473c676
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b49c9c88fb12854317d0f2fc14ce465f261a9ba2db3941f694e690f124063bd905522d9a21e862e777d84992e858839956bd289a275e731be60f638e2b327e89
|
7
|
+
data.tar.gz: 22be396099b50aa7dea829b10485e9f4dc0d7d9232470d1db05b56f95815f591f3eebdfdc44af90c6f8f212ddd982814acd5356de8cd9c88e194d54c93f9ba99
|
data/README.md
CHANGED
@@ -56,7 +56,17 @@ Consult the MARC::Reader class docs for a more complete discussion and range of
|
|
56
56
|
|
57
57
|
The MARC binary Writer (MARC::Writer) does not have any encoding-related features -- it's up to you the developer to make sure you create MARC::Records with consistent and expected char encodings, although MARC::Writer will write out a legal ISO 2709 either way, it just might have corrupted encodings.
|
58
58
|
|
59
|
-
|
59
|
+
When parsing MARCXML _with Nokogiri as your XML parser implementation_ up to
|
60
|
+
and including version `1.0.2` of this gem, if the XML was badly formed, parsing
|
61
|
+
would stop and no error would be reported to your code.
|
62
|
+
|
63
|
+
If you are using a version > `1.0.2` of `ruby-marc` with MRI + Nokogiri, XML
|
64
|
+
syntax errors will be thrown (and you may need to adjust your code to account
|
65
|
+
for this). *JRuby users*: If you are using a version later than `1.0.2` and
|
66
|
+
using Nokogiri as an XML parser with JRuby as your ruby implementation, XML
|
67
|
+
syntax errors will still be ignored unless you have Nokogiri version `1.10.2`
|
68
|
+
or later.
|
69
|
+
|
60
70
|
## Miscellany
|
61
71
|
|
62
72
|
Source code at: https://github.com/ruby-marc/ruby-marc/
|
data/lib/marc/reader.rb
CHANGED
@@ -138,10 +138,10 @@ module MARC
|
|
138
138
|
# Encoding.default_internal = "utf-8"
|
139
139
|
# MARC::Reader.new( File.new("marc_in_cp866.mrc", "r:cp866") )
|
140
140
|
#
|
141
|
-
# # However this
|
141
|
+
# # However this should be safe:
|
142
142
|
# MARC::Reader.new( "marc_in_cp866.mrc", :external_encoding => "cp866")
|
143
143
|
#
|
144
|
-
# # And this
|
144
|
+
# # And this should be safe, if you do want to transcode:
|
145
145
|
# MARC::Reader.new( "marc_in_cp866.mrc", :external_encoding => "cp866",
|
146
146
|
# :internal_encoding => "utf-8")
|
147
147
|
#
|
data/lib/marc/version.rb
CHANGED
data/lib/marc/xml_parsers.rb
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
module MARC
|
2
|
+
# Exception class to be thrown when an XML parser
|
3
|
+
# encounters an unrecoverable error.
|
4
|
+
class XMLParseError < StandardError; end
|
5
|
+
|
2
6
|
# The MagicReader will try to use the best available XML Parser at the
|
3
7
|
# time of initialization.
|
4
8
|
# The order is currently:
|
@@ -112,6 +116,10 @@ module MARC
|
|
112
116
|
@parser.parse(@handle)
|
113
117
|
end
|
114
118
|
end
|
119
|
+
|
120
|
+
def error(evt)
|
121
|
+
raise(XMLParseError, "XML parsing error: #{evt}")
|
122
|
+
end
|
115
123
|
|
116
124
|
|
117
125
|
def method_missing(methName, *args)
|
@@ -413,4 +421,4 @@ end
|
|
413
421
|
end
|
414
422
|
end # end of module
|
415
423
|
end # end of if jruby
|
416
|
-
end
|
424
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'marc'
|
3
|
+
|
4
|
+
class BadXMLHandlingTestCase < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_nokogiri_bad_xml
|
7
|
+
begin
|
8
|
+
require 'nokogiri'
|
9
|
+
rescue LoadError
|
10
|
+
omit("nokogiri not installed, cannot test")
|
11
|
+
end
|
12
|
+
omit("nokogiri (<1.10.2) under jruby doesn't support error handling: sparklemotion/nokogiri#1847") if RUBY_PLATFORM == 'java' && Gem::Version.new(Nokogiri::VERSION) < Gem::Version.new('1.10.2')
|
13
|
+
count = 0
|
14
|
+
reader = MARC::XMLReader.new('test/three-records-second-bad.xml', :parser => :nokogiri)
|
15
|
+
assert_raise MARC::XMLParseError do
|
16
|
+
reader.each do |rec|
|
17
|
+
count += 1 if rec['260']
|
18
|
+
end
|
19
|
+
end
|
20
|
+
assert_equal(1, count, 'should only be able to parse one record')
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<collection xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/MARC21/slim" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
|
3
|
+
<record>
|
4
|
+
<leader> njm a22 uu 4500</leader>
|
5
|
+
<controlfield tag="001">afc99990058366</controlfield>
|
6
|
+
<controlfield tag="003">DLC</controlfield>
|
7
|
+
<controlfield tag="005">20071104155141.9</controlfield>
|
8
|
+
<controlfield tag="007">sd ummunniauub</controlfield>
|
9
|
+
<controlfield tag="008">071103s1939 xxufmnne||||||||| u eng||</controlfield>
|
10
|
+
<datafield tag="010" ind1=" " ind2=" ">
|
11
|
+
<subfield code="a">afc99990058366</subfield>
|
12
|
+
</datafield>
|
13
|
+
<datafield tag="040" ind1=" " ind2=" ">
|
14
|
+
<subfield code="a">DLC</subfield>
|
15
|
+
<subfield code="c">DLC</subfield>
|
16
|
+
</datafield>
|
17
|
+
<datafield tag="245" ind1="0" ind2="4">
|
18
|
+
<subfield code="a">The Texas ranger</subfield>
|
19
|
+
<subfield code="h">[sound recording] /</subfield>
|
20
|
+
<subfield code="c">Sung by Beale D. Taylor.</subfield>
|
21
|
+
</datafield>
|
22
|
+
<datafield tag="260" ind1=" " ind2=" ">
|
23
|
+
<subfield code="a">Medina, Texas,</subfield>
|
24
|
+
<subfield code="c">1939.</subfield>
|
25
|
+
</datafield>
|
26
|
+
<datafield tag="300" ind1=" " ind2=" ">
|
27
|
+
<subfield code="a">1 sound disc :</subfield>
|
28
|
+
<subfield code="b">analog, 33 1/3 rpm, mono. ;</subfield>
|
29
|
+
<subfield code="c">12 in.</subfield>
|
30
|
+
</datafield>
|
31
|
+
<datafield tag="651" ind1=" " ind2="0">
|
32
|
+
<subfield code="a">Medina</subfield>
|
33
|
+
<subfield code="z">Texas</subfield>
|
34
|
+
<subfield code="z">United States of America.</subfield>
|
35
|
+
</datafield>
|
36
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
37
|
+
<subfield code="a">Lomax, John Avery, 1867-1948</subfield>
|
38
|
+
<subfield code="e">Recording engineer.</subfield>
|
39
|
+
</datafield>
|
40
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
41
|
+
<subfield code="a">Lomax, Ruby T. (Ruby Terrill)</subfield>
|
42
|
+
<subfield code="e">Recording engineer.</subfield>
|
43
|
+
</datafield>
|
44
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
45
|
+
<subfield code="a">Taylor, Beale D.</subfield>
|
46
|
+
<subfield code="e">Singer.</subfield>
|
47
|
+
</datafield>
|
48
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
49
|
+
<subfield code="a">American Folklife Center, Library of Congress</subfield>
|
50
|
+
</datafield>
|
51
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
52
|
+
<subfield code="a">DLC</subfield>
|
53
|
+
</datafield>
|
54
|
+
</record>
|
55
|
+
<record>
|
56
|
+
<leader> njm a22 uu 4500</leader>
|
57
|
+
<controlfield tag="001">afc99990058366</controlfield>
|
58
|
+
<controlfield tag="003">DLC</controlfield>
|
59
|
+
<controlfield tag="005">20071104155141.9</controlfield>
|
60
|
+
<controlfield tag="007">sd ummunniauub</controlfield>
|
61
|
+
<controlfield tag="008">071103s1939 xxufmnne||||||||| u eng||</controlfield>
|
62
|
+
<datafield tag="010" ind1=" " ind2=" ">
|
63
|
+
<subfield code="a">afc99990058366</subfield>
|
64
|
+
</datafield>
|
65
|
+
<datafield tag="040" ind1=" " ind2=" ">
|
66
|
+
<subfield code="a">DLC</subfield>
|
67
|
+
<subfield code="c">DLC</subfield>
|
68
|
+
</datafield>
|
69
|
+
<datafield tag="245" ind1="0" ind2="4">
|
70
|
+
<subfield code="a">The Texas ranger</subfield>
|
71
|
+
<!-- invalid utf-8 bytes in the non-printing subfield code -->
|
72
|
+
<subfield code="">[sound recording] /</subfield>
|
73
|
+
<subfield code="c">Sung by Beale D. Taylor.</subfield>
|
74
|
+
</datafield>
|
75
|
+
<datafield tag="260" ind1=" " ind2=" ">
|
76
|
+
<subfield code="a">Medina, Texas,</subfield>
|
77
|
+
<subfield code="c">1939.</subfield>
|
78
|
+
</datafield>
|
79
|
+
<datafield tag="300" ind1=" " ind2=" ">
|
80
|
+
<subfield code="a">1 sound disc :</subfield>
|
81
|
+
<subfield code="b">analog, 33 1/3 rpm, mono. ;</subfield>
|
82
|
+
<subfield code="c">12 in.</subfield>
|
83
|
+
</datafield>
|
84
|
+
<datafield tag="651" ind1=" " ind2="0">
|
85
|
+
<subfield code="a">Medina</subfield>
|
86
|
+
<subfield code="z">Texas</subfield>
|
87
|
+
<subfield code="z">United States of America.</subfield>
|
88
|
+
</datafield>
|
89
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
90
|
+
<subfield code="a">Lomax, John Avery, 1867-1948</subfield>
|
91
|
+
<subfield code="e">Recording engineer.</subfield>
|
92
|
+
</datafield>
|
93
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
94
|
+
<subfield code="a">Lomax, Ruby T. (Ruby Terrill)</subfield>
|
95
|
+
<subfield code="e">Recording engineer.</subfield>
|
96
|
+
</datafield>
|
97
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
98
|
+
<subfield code="a">Taylor, Beale D.</subfield>
|
99
|
+
<subfield code="e">Singer.</subfield>
|
100
|
+
</datafield>
|
101
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
102
|
+
<subfield code="a">American Folklife Center, Library of Congress</subfield>
|
103
|
+
</datafield>
|
104
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
105
|
+
<subfield code="a">DLC</subfield>
|
106
|
+
</datafield>
|
107
|
+
</record>
|
108
|
+
<record>
|
109
|
+
<leader> njm a22 uu 4500</leader>
|
110
|
+
<controlfield tag="001">afc99990058366</controlfield>
|
111
|
+
<controlfield tag="003">DLC</controlfield>
|
112
|
+
<controlfield tag="005">20071104155141.9</controlfield>
|
113
|
+
<controlfield tag="007">sd ummunniauub</controlfield>
|
114
|
+
<controlfield tag="008">071103s1939 xxufmnne||||||||| u eng||</controlfield>
|
115
|
+
<datafield tag="010" ind1=" " ind2=" ">
|
116
|
+
<subfield code="a">afc99990058366</subfield>
|
117
|
+
</datafield>
|
118
|
+
<datafield tag="040" ind1=" " ind2=" ">
|
119
|
+
<subfield code="a">DLC</subfield>
|
120
|
+
<subfield code="c">DLC</subfield>
|
121
|
+
</datafield>
|
122
|
+
<datafield tag="245" ind1="0" ind2="4">
|
123
|
+
<subfield code="a">The Texas ranger</subfield>
|
124
|
+
<subfield code="h">[sound recording] /</subfield>
|
125
|
+
<subfield code="c">Sung by Beale D. Taylor.</subfield>
|
126
|
+
</datafield>
|
127
|
+
<datafield tag="260" ind1=" " ind2=" ">
|
128
|
+
<subfield code="a">Medina, Texas,</subfield>
|
129
|
+
<subfield code="c">1939.</subfield>
|
130
|
+
</datafield>
|
131
|
+
<datafield tag="300" ind1=" " ind2=" ">
|
132
|
+
<subfield code="a">1 sound disc :</subfield>
|
133
|
+
<subfield code="b">analog, 33 1/3 rpm, mono. ;</subfield>
|
134
|
+
<subfield code="c">12 in.</subfield>
|
135
|
+
</datafield>
|
136
|
+
<datafield tag="651" ind1=" " ind2="0">
|
137
|
+
<subfield code="a">Medina</subfield>
|
138
|
+
<subfield code="z">Texas</subfield>
|
139
|
+
<subfield code="z">United States of America.</subfield>
|
140
|
+
</datafield>
|
141
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
142
|
+
<subfield code="a">Lomax, John Avery, 1867-1948</subfield>
|
143
|
+
<subfield code="e">Recording engineer.</subfield>
|
144
|
+
</datafield>
|
145
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
146
|
+
<subfield code="a">Lomax, Ruby T. (Ruby Terrill)</subfield>
|
147
|
+
<subfield code="e">Recording engineer.</subfield>
|
148
|
+
</datafield>
|
149
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
150
|
+
<subfield code="a">Taylor, Beale D.</subfield>
|
151
|
+
<subfield code="e">Singer.</subfield>
|
152
|
+
</datafield>
|
153
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
154
|
+
<subfield code="a">American Folklife Center, Library of Congress</subfield>
|
155
|
+
</datafield>
|
156
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
157
|
+
<subfield code="a">DLC</subfield>
|
158
|
+
</datafield>
|
159
|
+
</record>
|
160
|
+
</collection>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin Clarke
|
@@ -13,7 +13,7 @@ authors:
|
|
13
13
|
autorequire: marc
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
|
-
date:
|
16
|
+
date: 2019-03-27 00:00:00.000000000 Z
|
17
17
|
dependencies:
|
18
18
|
- !ruby/object:Gem::Dependency
|
19
19
|
name: scrub_rb
|
@@ -106,6 +106,8 @@ files:
|
|
106
106
|
- test/tc_subfield.rb
|
107
107
|
- test/tc_writer.rb
|
108
108
|
- test/tc_xml.rb
|
109
|
+
- test/tc_xml_error_handling.rb
|
110
|
+
- test/three-records-second-bad.xml
|
109
111
|
- test/ts_marc.rb
|
110
112
|
- test/utf8.marc
|
111
113
|
- test/utf8_multirecord.marc
|
@@ -130,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
130
132
|
version: '0'
|
131
133
|
requirements: []
|
132
134
|
rubyforge_project:
|
133
|
-
rubygems_version: 2.6
|
135
|
+
rubygems_version: 2.7.6
|
134
136
|
signing_key:
|
135
137
|
specification_version: 4
|
136
138
|
summary: A ruby library for working with Machine Readable Cataloging
|