marc 1.1.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
- data/.github/workflows/ruby.yml +24 -0
- data/.gitignore +17 -0
- data/.standard.yml +1 -0
- data/{Changes → CHANGELOG.md} +116 -30
- data/Gemfile +5 -0
- data/README.md +239 -46
- data/Rakefile +14 -14
- data/bin/marc +14 -0
- data/bin/marc2xml +17 -0
- data/examples/xml2marc.rb +10 -0
- data/lib/marc/constants.rb +3 -3
- data/lib/marc/controlfield.rb +35 -23
- data/lib/marc/datafield.rb +70 -63
- data/lib/marc/dublincore.rb +59 -41
- data/lib/marc/exception.rb +9 -1
- data/lib/marc/jsonl_reader.rb +33 -0
- data/lib/marc/jsonl_writer.rb +44 -0
- data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
- data/lib/marc/marc8/to_unicode.rb +80 -87
- data/lib/marc/reader.rb +116 -124
- data/lib/marc/record.rb +72 -62
- data/lib/marc/subfield.rb +12 -10
- data/lib/marc/unsafe_xmlwriter.rb +93 -0
- data/lib/marc/version.rb +1 -1
- data/lib/marc/writer.rb +27 -30
- data/lib/marc/xml_parsers.rb +222 -197
- data/lib/marc/xmlreader.rb +131 -114
- data/lib/marc/xmlwriter.rb +93 -82
- data/lib/marc.rb +20 -18
- data/marc.gemspec +28 -0
- data/test/marc8/tc_marc8_mapping.rb +3 -3
- data/test/marc8/tc_to_unicode.rb +28 -34
- data/test/messed_up_leader.xml +9 -0
- data/test/tc_controlfield.rb +37 -34
- data/test/tc_datafield.rb +65 -60
- data/test/tc_dublincore.rb +9 -11
- data/test/tc_hash.rb +10 -13
- data/test/tc_jsonl.rb +19 -0
- data/test/tc_marchash.rb +17 -21
- data/test/tc_parsers.rb +108 -144
- data/test/tc_reader.rb +35 -36
- data/test/tc_reader_char_encodings.rb +149 -169
- data/test/tc_record.rb +143 -148
- data/test/tc_subfield.rb +14 -13
- data/test/tc_unsafe_xml.rb +95 -0
- data/test/tc_writer.rb +101 -108
- data/test/tc_xml.rb +101 -94
- data/test/tc_xml_error_handling.rb +7 -8
- data/test/ts_marc.rb +8 -8
- metadata +129 -22
data/test/tc_writer.rb
CHANGED
@@ -1,142 +1,135 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "test/unit"
|
2
|
+
require "marc"
|
3
3
|
|
4
|
-
require
|
4
|
+
require "stringio"
|
5
5
|
|
6
6
|
class WriterTest < Test::Unit::TestCase
|
7
|
+
def test_writer
|
8
|
+
writer = MARC::Writer.new("test/writer.dat")
|
9
|
+
record = MARC::Record.new
|
10
|
+
record.append(MARC::DataField.new("245", "0", "1", ["a", "foo"]))
|
11
|
+
writer.write(record)
|
12
|
+
writer.close
|
7
13
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
writer.close()
|
14
|
-
|
15
|
-
# read it back to make sure
|
16
|
-
reader = MARC::Reader.new('test/writer.dat')
|
17
|
-
records = reader.entries()
|
18
|
-
assert_equal(records.length(), 1)
|
19
|
-
assert_equal(records[0], record)
|
20
|
-
|
21
|
-
# cleanup
|
22
|
-
File.unlink('test/writer.dat')
|
23
|
-
end
|
14
|
+
# read it back to make sure
|
15
|
+
reader = MARC::Reader.new("test/writer.dat")
|
16
|
+
records = reader.entries
|
17
|
+
assert_equal(records.length, 1)
|
18
|
+
assert_equal(records[0], record)
|
24
19
|
|
25
|
-
#
|
26
|
-
|
27
|
-
|
28
|
-
writer = MARC::Writer.new('test/writer.dat')
|
20
|
+
# cleanup
|
21
|
+
File.unlink("test/writer.dat")
|
22
|
+
end
|
29
23
|
|
24
|
+
# Only in ruby 1.9
|
25
|
+
if "".respond_to?(:encoding)
|
26
|
+
def test_writer_bad_encoding
|
27
|
+
writer = MARC::Writer.new("test/writer.dat")
|
30
28
|
|
31
|
-
|
32
|
-
|
33
|
-
|
29
|
+
# MARC::Writer should just happily write out whatever bytes you give it, even
|
30
|
+
# mixing encodings that can't be mixed. We ran into an actual example mixing
|
31
|
+
# MARC8 (tagged ruby binary) and UTF8, we want it to be written out.
|
34
32
|
|
35
|
-
|
33
|
+
record = MARC::Record.new
|
36
34
|
|
37
|
-
|
38
|
-
|
35
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", "Nhouy Abhay,".force_encoding("BINARY")], ["c", "Th\xE5ao,".force_encoding("BINARY")], ["d", "1909-"])
|
36
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", "Somchin P\xF8\xE5o. Ngin,".force_encoding("BINARY")])
|
39
37
|
|
40
|
-
|
41
|
-
|
38
|
+
record.append MARC::DataField.new("100", "0", "0", ["a", "\xE5angkham. ".force_encoding("BINARY")])
|
39
|
+
record.append MARC::DataField.new("245", "1", "0", ["b", "chef-d'oeuvre de la litt\xE2erature lao".force_encoding("BINARY")])
|
42
40
|
|
43
|
-
|
44
|
-
|
41
|
+
# One in UTF8 and marked
|
42
|
+
record.append MARC::DataField.new("999", "0", "1", ["a", "chef-d'ocuvre de la littU+FFC3\U+FFA9rature".force_encoding("UTF-8")])
|
45
43
|
|
46
|
-
|
47
|
-
|
44
|
+
writer.write(record)
|
45
|
+
writer.close
|
46
|
+
ensure
|
47
|
+
File.unlink("test/writer.dat")
|
48
|
+
end
|
49
|
+
end
|
48
50
|
|
49
|
-
|
50
|
-
|
51
|
-
|
51
|
+
def test_write_too_long_iso2709
|
52
|
+
too_long_record = MARC::Record.new
|
53
|
+
1.upto(1001) do
|
54
|
+
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
52
55
|
end
|
53
56
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
too_long_record.append MARC::DataField.new("500", ' ', ' ', ['a', 'A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789'])
|
58
|
-
end
|
57
|
+
wbuffer = StringIO.new("", "w")
|
58
|
+
writer = MARC::Writer.new(wbuffer)
|
59
|
+
writer.allow_oversized = true
|
59
60
|
|
60
|
-
|
61
|
-
|
62
|
-
writer.allow_oversized = true
|
61
|
+
writer.write(too_long_record)
|
62
|
+
writer.close
|
63
63
|
|
64
|
-
|
65
|
-
writer.close
|
64
|
+
assert_equal "00000", wbuffer.string.slice(0, 5), "zero'd out length bytes when too long"
|
66
65
|
|
67
|
-
|
66
|
+
rbuffer = StringIO.new(wbuffer.string.dup)
|
68
67
|
|
69
|
-
|
68
|
+
# Regular reader won't read our illegal record.
|
69
|
+
# assert_raise(NoMethodError) do
|
70
|
+
# reader = MARC::Reader.new(rbuffer)
|
71
|
+
# reader.first
|
72
|
+
# end
|
70
73
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
# reader.first
|
75
|
-
#end
|
74
|
+
# Forgiving reader will, round trippable
|
75
|
+
new_record = MARC::Reader.decode(rbuffer.string, forgiving: true)
|
76
|
+
assert_equal too_long_record, new_record, "Too long record round-trippable with forgiving mode"
|
76
77
|
|
77
|
-
|
78
|
-
|
79
|
-
|
78
|
+
# Test in the middle of a MARC file
|
79
|
+
good_record = MARC::Record.new
|
80
|
+
good_record.append MARC::DataField.new("500", " ", " ", ["a", "A short record"])
|
81
|
+
wbuffer = StringIO.new("", "w")
|
82
|
+
writer = MARC::Writer.new(wbuffer)
|
83
|
+
writer.allow_oversized = true
|
80
84
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
wbuffer = StringIO.new("", "w")
|
85
|
-
writer = MARC::Writer.new(wbuffer)
|
86
|
-
writer.allow_oversized = true
|
85
|
+
writer.write(good_record)
|
86
|
+
writer.write(too_long_record)
|
87
|
+
writer.write(good_record)
|
87
88
|
|
88
|
-
|
89
|
-
|
90
|
-
|
89
|
+
rbuffer = StringIO.new(wbuffer.string.dup)
|
90
|
+
reader = MARC::ForgivingReader.new(rbuffer)
|
91
|
+
records = reader.to_a
|
91
92
|
|
92
|
-
|
93
|
-
|
94
|
-
|
93
|
+
assert_equal 3, records.length
|
94
|
+
assert_equal good_record, records[0]
|
95
|
+
assert_equal good_record, records[2]
|
96
|
+
assert_equal too_long_record, records[1]
|
97
|
+
end
|
95
98
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
99
|
+
def test_raises_on_too_long_if_configured
|
100
|
+
too_long_record = MARC::Record.new
|
101
|
+
1.upto(1001) do
|
102
|
+
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
100
103
|
end
|
101
104
|
|
102
|
-
|
103
|
-
|
104
|
-
1.upto(1001) do
|
105
|
-
too_long_record.append MARC::DataField.new("500", ' ', ' ', ['a', 'A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789'])
|
106
|
-
end
|
107
|
-
|
108
|
-
wbuffer = StringIO.new("", "w")
|
109
|
-
writer = MARC::Writer.new(wbuffer)
|
110
|
-
|
111
|
-
assert_raise(MARC::Exception) do
|
112
|
-
writer.write too_long_record
|
113
|
-
end
|
105
|
+
wbuffer = StringIO.new("", "w")
|
106
|
+
writer = MARC::Writer.new(wbuffer)
|
114
107
|
|
108
|
+
assert_raise(MARC::Exception) do
|
109
|
+
writer.write too_long_record
|
115
110
|
end
|
111
|
+
end
|
116
112
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
rec.to_marc
|
123
|
-
end
|
113
|
+
def test_forgiving_writer
|
114
|
+
marc = "00305cam a2200133 a 4500001000700000003000900007005001700016008004100033008004100074035002500115245001700140909001000157909000400167\036635145\036UK-BiLMS\03620060329173705.0\036s1982iieng6 000 0 eng||\036060116|||||||||xxk eng||\036 \037a(UK-BiLMS)M0017366ZW\03600\037aTest record.\036 \037aa\037b\037c\036\037b0\036\035\000"
|
115
|
+
rec = MARC::Record.new_from_marc(marc)
|
116
|
+
assert_nothing_raised do
|
117
|
+
rec.to_marc
|
124
118
|
end
|
119
|
+
end
|
125
120
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
# Make sure the one we wrote out then read in again
|
136
|
-
# is the same as the one we read the first time
|
137
|
-
# Looks like "==" is over-ridden to do that. Don't ever change, #==
|
138
|
-
assert_equal record, read_back_record, "Round-tripped record must equal original record"
|
139
|
-
end
|
140
|
-
|
121
|
+
def test_unicode_roundtrip
|
122
|
+
record = MARC::Reader.new("test/utf8.marc", external_encoding: "UTF-8").first
|
123
|
+
|
124
|
+
writer = MARC::Writer.new("test/writer.dat")
|
125
|
+
writer.write(record)
|
126
|
+
writer.close
|
127
|
+
|
128
|
+
read_back_record = MARC::Reader.new("test/writer.dat", external_encoding: "UTF-8").first
|
141
129
|
|
130
|
+
# Make sure the one we wrote out then read in again
|
131
|
+
# is the same as the one we read the first time
|
132
|
+
# Looks like "==" is over-ridden to do that. Don't ever change, #==
|
133
|
+
assert_equal record, read_back_record, "Round-tripped record must equal original record"
|
134
|
+
end
|
142
135
|
end
|
data/test/tc_xml.rb
CHANGED
@@ -1,28 +1,26 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "test/unit"
|
2
|
+
require "marc"
|
3
|
+
require "stringio"
|
4
|
+
require "warning"
|
4
5
|
|
5
6
|
class XMLTest < Test::Unit::TestCase
|
7
|
+
Warning.ignore(/is deprecated and will be removed in a future version of ruby-marc/)
|
8
|
+
Warning.ignore(/setting Encoding.default_internal/)
|
6
9
|
def setup
|
7
10
|
@parsers = [:rexml]
|
8
11
|
begin
|
9
|
-
require
|
12
|
+
require "nokogiri"
|
10
13
|
@parsers << :nokogiri
|
11
14
|
rescue LoadError
|
12
15
|
end
|
13
16
|
unless defined? JRUBY_VERSION
|
14
17
|
begin
|
15
|
-
require
|
18
|
+
require "xml"
|
16
19
|
@parsers << :libxml
|
17
20
|
rescue LoadError
|
18
21
|
end
|
19
22
|
end
|
20
23
|
if defined? JRUBY_VERSION
|
21
|
-
begin
|
22
|
-
require 'jrexml'
|
23
|
-
@parsers << :jrexml
|
24
|
-
rescue LoadError
|
25
|
-
end
|
26
24
|
begin
|
27
25
|
java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
|
28
26
|
@parsers << :jstax
|
@@ -31,156 +29,165 @@ class XMLTest < Test::Unit::TestCase
|
|
31
29
|
end
|
32
30
|
end
|
33
31
|
|
34
|
-
|
35
|
-
|
36
|
-
@parsers.each do | parser |
|
37
|
-
puts "\nRunning test_xml_entities with: #{parser}.\n"
|
32
|
+
def test_xml_entities
|
33
|
+
@parsers.each do |parser|
|
38
34
|
xml_entities_test(parser)
|
39
35
|
end
|
40
36
|
end
|
41
|
-
|
37
|
+
|
42
38
|
def xml_entities_test(parser)
|
43
39
|
r1 = MARC::Record.new
|
44
|
-
r1 << MARC::DataField.new(
|
40
|
+
r1 << MARC::DataField.new("245", "0", "0", ["a", "foo & bar & baz"])
|
45
41
|
xml = r1.to_xml.to_s
|
46
|
-
assert_match
|
42
|
+
assert_match(/foo & bar & baz/, xml, "Failed with parser '#{parser}'")
|
47
43
|
|
48
|
-
reader = MARC::XMLReader.new(StringIO.new(xml), :parser
|
44
|
+
reader = MARC::XMLReader.new(StringIO.new(xml), parser: parser)
|
49
45
|
r2 = reader.entries[0]
|
50
|
-
assert_equal
|
46
|
+
assert_equal("foo & bar & baz", r2["245"]["a"], "Failed with parser '#{parser}'")
|
51
47
|
end
|
52
|
-
|
48
|
+
|
53
49
|
def test_batch
|
54
|
-
@parsers.each do |
|
55
|
-
puts "\nRunning test_batch with: #{parser}.\n"
|
50
|
+
@parsers.each do |parser|
|
56
51
|
batch_test(parser)
|
57
|
-
end
|
52
|
+
end
|
58
53
|
end
|
59
|
-
|
54
|
+
|
60
55
|
def batch_test(parser)
|
61
|
-
reader = MARC::XMLReader.new(
|
56
|
+
reader = MARC::XMLReader.new("test/batch.xml", parser: parser)
|
62
57
|
count = 0
|
63
|
-
|
58
|
+
reader.each do |record|
|
64
59
|
count += 1
|
65
|
-
assert_instance_of(MARC::Record, record)
|
60
|
+
assert_instance_of(MARC::Record, record, "Failed with parser '#{parser}'")
|
66
61
|
end
|
67
|
-
assert_equal(count, 2)
|
62
|
+
assert_equal(count, 2, "Failed with parser '#{parser}'")
|
68
63
|
end
|
69
|
-
|
64
|
+
|
70
65
|
def test_read_string
|
71
|
-
@parsers.each do |
|
72
|
-
puts "\nRunning test_read_string with: #{parser}.\n"
|
66
|
+
@parsers.each do |parser|
|
73
67
|
read_string_test(parser)
|
74
|
-
end
|
68
|
+
end
|
75
69
|
end
|
76
70
|
|
77
71
|
def read_string_test(parser)
|
78
|
-
xml = File.new(
|
79
|
-
reader = MARC::XMLReader.new(StringIO.new(xml), :parser
|
80
|
-
assert_equal 2, reader.entries.length
|
72
|
+
xml = File.new("test/batch.xml").read
|
73
|
+
reader = MARC::XMLReader.new(StringIO.new(xml), parser: parser)
|
74
|
+
assert_equal 2, reader.entries.length, "Failed with parser '#{parser}'"
|
81
75
|
end
|
82
|
-
|
76
|
+
|
83
77
|
def test_non_numeric_fields
|
84
|
-
@parsers.each do |
|
85
|
-
puts "\nRunning test_non_numeric_fields with: #{parser}.\n"
|
78
|
+
@parsers.each do |parser|
|
86
79
|
non_numeric_fields_test(parser)
|
87
80
|
end
|
88
81
|
end
|
89
|
-
|
82
|
+
|
90
83
|
def non_numeric_fields_test(parser)
|
91
|
-
reader = MARC::XMLReader.new(
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
end
|
98
|
-
assert_equal(1, count)
|
99
|
-
assert_equal('9780061317842', record['ISB']['a'])
|
100
|
-
assert_equal('1', record['LOC']['9'])
|
84
|
+
reader = MARC::XMLReader.new("test/non-numeric.xml", parser: parser)
|
85
|
+
count = 0
|
86
|
+
record = nil
|
87
|
+
reader.each do |rec|
|
88
|
+
count += 1
|
89
|
+
record = rec
|
101
90
|
end
|
91
|
+
assert_equal(1, count, "Failed with parser '#{parser}'")
|
92
|
+
assert_equal("9780061317842", record["ISB"]["a"], "Failed with parser '#{parser}'")
|
93
|
+
assert_equal("1", record["LOC"]["9"], "Failed with parser '#{parser}'")
|
94
|
+
end
|
102
95
|
|
103
96
|
def test_read_no_leading_zero_write_leading_zero
|
104
|
-
@parsers.each do |
|
105
|
-
puts "\nRunning test_read_no_leading_zero_write_leading_zero with: #{parser}.\n"
|
97
|
+
@parsers.each do |parser|
|
106
98
|
read_no_leading_zero_write_leading_zero_test(parser)
|
107
|
-
end
|
99
|
+
end
|
108
100
|
end
|
109
|
-
|
101
|
+
|
110
102
|
def read_no_leading_zero_write_leading_zero_test(parser)
|
111
|
-
reader = MARC::XMLReader.new(
|
103
|
+
reader = MARC::XMLReader.new("test/no-leading-zero.xml", parser: parser)
|
112
104
|
record = reader.to_a[0]
|
113
|
-
assert_equal("042 zz $a dc ", record[
|
105
|
+
assert_equal("042 zz $a dc ", record["042"].to_s, "Failed with parser '#{parser}'")
|
114
106
|
end
|
115
107
|
|
116
108
|
def test_leader_from_xml
|
117
|
-
@parsers.each do |
|
118
|
-
puts "\nRunning test_leader_from_xml with: #{parser}.\n"
|
109
|
+
@parsers.each do |parser|
|
119
110
|
leader_from_xml_test(parser)
|
120
111
|
end
|
121
|
-
end
|
112
|
+
end
|
122
113
|
|
123
114
|
def leader_from_xml_test(parser)
|
124
|
-
reader = MARC::XMLReader.new(
|
115
|
+
reader = MARC::XMLReader.new("test/one.xml", parser: parser)
|
125
116
|
record = reader.entries[0]
|
126
|
-
assert_equal
|
117
|
+
assert_equal " njm a22 uu 4500", record.leader, "Failed with parser '#{parser}'"
|
118
|
+
|
127
119
|
# serializing as MARC should populate the record length and directory offset
|
128
120
|
record = MARC::Record.new_from_marc(record.to_marc)
|
129
|
-
assert_equal
|
121
|
+
assert_equal "00734njm a2200217uu 4500", record.leader, "Failed with parser '#{parser}'"
|
130
122
|
end
|
131
|
-
|
123
|
+
|
132
124
|
def test_read_write
|
133
|
-
@parsers.each do |
|
134
|
-
puts "\nRunning test_read_write with: #{parser}.\n"
|
125
|
+
@parsers.each do |parser|
|
135
126
|
read_write_test(parser)
|
136
127
|
end
|
137
|
-
end
|
128
|
+
end
|
138
129
|
|
139
130
|
def read_write_test(parser)
|
140
131
|
record1 = MARC::Record.new
|
141
|
-
record1.leader =
|
142
|
-
record1.append MARC::ControlField.new(
|
143
|
-
record1.append MARC::DataField.new(
|
144
|
-
[
|
145
|
-
record1.append MARC::DataField.new(
|
146
|
-
[
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
writer = MARC::XMLWriter.new('test/test.xml', :stylesheet => 'style.xsl')
|
152
|
-
writer.write(dup_record)
|
132
|
+
record1.leader = "00925njm 22002777a 4500"
|
133
|
+
record1.append MARC::ControlField.new("007", "sdubumennmplu")
|
134
|
+
record1.append MARC::DataField.new("245", "0", "4",
|
135
|
+
["a", "The Great Ray Charles"], ["h", "[sound recording]."])
|
136
|
+
record1.append MARC::DataField.new("998", " ", " ",
|
137
|
+
["^", "Valid local subfield"])
|
138
|
+
|
139
|
+
writer = MARC::XMLWriter.new("test/test.xml", stylesheet: "style.xsl")
|
140
|
+
writer.write(record1)
|
153
141
|
writer.close
|
154
142
|
|
155
|
-
xml = File.read(
|
156
|
-
assert_match
|
157
|
-
assert_match
|
143
|
+
xml = File.read("test/test.xml")
|
144
|
+
assert_match(/<controlfield tag='007'>sdubumennmplu<\/controlfield>/, xml, "Failed with parser '#{parser}'")
|
145
|
+
assert_match(/<\?xml-stylesheet type="text\/xsl" href="style.xsl"\?>/, xml, "Failed with parser '#{parser}'")
|
158
146
|
|
159
|
-
reader = MARC::XMLReader.new(
|
147
|
+
reader = MARC::XMLReader.new("test/test.xml", parser: parser)
|
160
148
|
record2 = reader.entries[0]
|
161
|
-
assert_equal(record1, record2)
|
162
|
-
|
163
|
-
File.unlink(
|
149
|
+
assert_equal(record1, record2, "Failed with parser '#{parser}'")
|
150
|
+
ensure
|
151
|
+
File.unlink("test/test.xml")
|
164
152
|
end
|
165
|
-
|
153
|
+
|
166
154
|
def test_xml_enumerator
|
167
|
-
@parsers.each do |
|
168
|
-
puts "\nRunning test_xml_enumerator with: #{parser}.\n"
|
155
|
+
@parsers.each do |parser|
|
169
156
|
xml_enumerator_test(parser)
|
170
157
|
end
|
171
158
|
end
|
172
|
-
|
173
|
-
|
159
|
+
|
174
160
|
def xml_enumerator_test(parser)
|
175
161
|
# confusingly, test/batch.xml only has two records, not 10 like batch.dat
|
176
|
-
reader = MARC::XMLReader.new(
|
162
|
+
reader = MARC::XMLReader.new("test/batch.xml", parser: parser)
|
177
163
|
iter = reader.each
|
178
164
|
r = iter.next
|
179
|
-
assert_instance_of(MARC::Record, r)
|
165
|
+
assert_instance_of(MARC::Record, r, "Failed with parser '#{parser}'")
|
180
166
|
iter.next # total of two records
|
181
|
-
|
167
|
+
assert_raise(StopIteration, "Failed with parser '#{parser}'") { iter.next }
|
182
168
|
end
|
183
|
-
|
184
169
|
|
185
|
-
|
170
|
+
def test_truncated_leader_roundtripping
|
171
|
+
record1 = MARC::Record.new
|
172
|
+
record1.leader = "00925njm 22002777a"
|
186
173
|
|
174
|
+
writer = MARC::XMLWriter.new("test/test.xml", stylesheet: "style.xsl")
|
175
|
+
writer.write(record1)
|
176
|
+
writer.close
|
177
|
+
|
178
|
+
reader = MARC::XMLReader.new("test/test.xml")
|
179
|
+
record2 = reader.entries[0]
|
180
|
+
|
181
|
+
assert_equal("00925njm 22002777a 4500", record2.leader)
|
182
|
+
ensure
|
183
|
+
File.unlink("test/test.xml")
|
184
|
+
end
|
185
|
+
|
186
|
+
def test_xml_weird_leader
|
187
|
+
@parsers.each do |parser|
|
188
|
+
reader = MARC::XMLReader.new("test/messed_up_leader.xml", parser: parser)
|
189
|
+
record = reader.first
|
190
|
+
assert_equal(record.leader, "01301nam a22003618< 4500", "Failed with parser '#{parser}'")
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -1,22 +1,21 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "test/unit"
|
2
|
+
require "marc"
|
3
3
|
|
4
4
|
class BadXMLHandlingTestCase < Test::Unit::TestCase
|
5
|
-
|
6
5
|
def test_nokogiri_bad_xml
|
7
6
|
begin
|
8
|
-
require
|
7
|
+
require "nokogiri"
|
9
8
|
rescue LoadError
|
10
9
|
omit("nokogiri not installed, cannot test")
|
11
10
|
end
|
12
|
-
omit("nokogiri (<1.10.2) under jruby doesn't support error handling: sparklemotion/nokogiri#1847") if RUBY_PLATFORM ==
|
11
|
+
omit("nokogiri (<1.10.2) under jruby doesn't support error handling: sparklemotion/nokogiri#1847") if RUBY_PLATFORM == "java" && Gem::Version.new(Nokogiri::VERSION) < Gem::Version.new("1.10.2")
|
13
12
|
count = 0
|
14
|
-
reader = MARC::XMLReader.new(
|
13
|
+
reader = MARC::XMLReader.new("test/three-records-second-bad.xml", parser: :nokogiri)
|
15
14
|
assert_raise MARC::XMLParseError do
|
16
15
|
reader.each do |rec|
|
17
|
-
count += 1 if rec[
|
16
|
+
count += 1 if rec["260"]
|
18
17
|
end
|
19
18
|
end
|
20
|
-
assert_equal(1, count,
|
19
|
+
assert_equal(1, count, "should only be able to parse one record")
|
21
20
|
end
|
22
21
|
end
|
data/test/ts_marc.rb
CHANGED
@@ -4,11 +4,11 @@
|
|
4
4
|
# not already installed one
|
5
5
|
$LOAD_PATH.unshift("lib")
|
6
6
|
|
7
|
-
require
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
require
|
13
|
-
require
|
14
|
-
require
|
7
|
+
require "test/unit"
|
8
|
+
require "test/tc_subfield"
|
9
|
+
require "test/tc_datafield"
|
10
|
+
require "test/tc_controlfield"
|
11
|
+
require "test/tc_record"
|
12
|
+
require "test/tc_reader"
|
13
|
+
require "test/tc_writer"
|
14
|
+
require "test/tc_xml"
|