marc 1.0.4 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
- data/.github/workflows/ruby.yml +24 -0
- data/.gitignore +17 -0
- data/.standard.yml +1 -0
- data/{Changes → CHANGELOG.md} +106 -29
- data/Gemfile +15 -0
- data/README.md +240 -47
- data/Rakefile +14 -14
- data/bin/marc +14 -0
- data/bin/marc2xml +17 -0
- data/examples/xml2marc.rb +10 -0
- data/lib/marc/constants.rb +3 -3
- data/lib/marc/controlfield.rb +35 -23
- data/lib/marc/datafield.rb +70 -63
- data/lib/marc/dublincore.rb +59 -41
- data/lib/marc/exception.rb +9 -1
- data/lib/marc/jsonl_reader.rb +33 -0
- data/lib/marc/jsonl_writer.rb +44 -0
- data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
- data/lib/marc/marc8/to_unicode.rb +80 -86
- data/lib/marc/reader.rb +119 -121
- data/lib/marc/record.rb +72 -62
- data/lib/marc/subfield.rb +12 -10
- data/lib/marc/unsafe_xmlwriter.rb +93 -0
- data/lib/marc/version.rb +1 -1
- data/lib/marc/writer.rb +27 -30
- data/lib/marc/xml_parsers.rb +222 -197
- data/lib/marc/xmlreader.rb +131 -114
- data/lib/marc/xmlwriter.rb +93 -81
- data/lib/marc.rb +20 -18
- data/marc.gemspec +23 -0
- data/test/marc8/tc_marc8_mapping.rb +3 -3
- data/test/marc8/tc_to_unicode.rb +28 -32
- data/test/messed_up_leader.xml +9 -0
- data/test/tc_controlfield.rb +37 -34
- data/test/tc_datafield.rb +65 -60
- data/test/tc_dublincore.rb +9 -11
- data/test/tc_hash.rb +10 -13
- data/test/tc_jsonl.rb +19 -0
- data/test/tc_marchash.rb +17 -21
- data/test/tc_parsers.rb +108 -144
- data/test/tc_reader.rb +35 -36
- data/test/tc_reader_char_encodings.rb +149 -169
- data/test/tc_record.rb +143 -148
- data/test/tc_subfield.rb +14 -13
- data/test/tc_unsafe_xml.rb +95 -0
- data/test/tc_writer.rb +101 -108
- data/test/tc_xml.rb +99 -87
- data/test/tc_xml_error_handling.rb +7 -8
- data/test/ts_marc.rb +8 -8
- metadata +94 -9
data/test/tc_writer.rb
CHANGED
@@ -1,142 +1,135 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "test/unit"
|
2
|
+
require "marc"
|
3
3
|
|
4
|
-
require
|
4
|
+
require "stringio"
|
5
5
|
|
6
6
|
class WriterTest < Test::Unit::TestCase
|
7
|
+
def test_writer
|
8
|
+
writer = MARC::Writer.new("test/writer.dat")
|
9
|
+
record = MARC::Record.new
|
10
|
+
record.append(MARC::DataField.new("245", "0", "1", ["a", "foo"]))
|
11
|
+
writer.write(record)
|
12
|
+
writer.close
|
7
13
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
writer.close()
|
14
|
-
|
15
|
-
# read it back to make sure
|
16
|
-
reader = MARC::Reader.new('test/writer.dat')
|
17
|
-
records = reader.entries()
|
18
|
-
assert_equal(records.length(), 1)
|
19
|
-
assert_equal(records[0], record)
|
20
|
-
|
21
|
-
# cleanup
|
22
|
-
File.unlink('test/writer.dat')
|
23
|
-
end
|
14
|
+
# read it back to make sure
|
15
|
+
reader = MARC::Reader.new("test/writer.dat")
|
16
|
+
records = reader.entries
|
17
|
+
assert_equal(records.length, 1)
|
18
|
+
assert_equal(records[0], record)
|
24
19
|
|
25
|
-
#
|
26
|
-
|
27
|
-
|
28
|
-
writer = MARC::Writer.new('test/writer.dat')
|
20
|
+
# cleanup
|
21
|
+
File.unlink("test/writer.dat")
|
22
|
+
end
|
29
23
|
|
24
|
+
# Only in ruby 1.9
|
25
|
+
if "".respond_to?(:encoding)
|
26
|
+
def test_writer_bad_encoding
|
27
|
+
writer = MARC::Writer.new("test/writer.dat")
|
30
28
|
|
31
|
-
|
32
|
-
|
33
|
-
|
29
|
+
# MARC::Writer should just happily write out whatever bytes you give it, even
|
30
|
+
# mixing encodings that can't be mixed. We ran into an actual example mixing
|
31
|
+
# MARC8 (tagged ruby binary) and UTF8, we want it to be written out.
|
34
32
|
|
35
|
-
|
33
|
+
record = MARC::Record.new
|
36
34
|
|
37
|
-
|
38
|
-
|
35
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", "Nhouy Abhay,".force_encoding("BINARY")], ["c", "Th\xE5ao,".force_encoding("BINARY")], ["d", "1909-"])
|
36
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", "Somchin P\xF8\xE5o. Ngin,".force_encoding("BINARY")])
|
39
37
|
|
40
|
-
|
41
|
-
|
38
|
+
record.append MARC::DataField.new("100", "0", "0", ["a", "\xE5angkham. ".force_encoding("BINARY")])
|
39
|
+
record.append MARC::DataField.new("245", "1", "0", ["b", "chef-d'oeuvre de la litt\xE2erature lao".force_encoding("BINARY")])
|
42
40
|
|
43
|
-
|
44
|
-
|
41
|
+
# One in UTF8 and marked
|
42
|
+
record.append MARC::DataField.new("999", "0", "1", ["a", "chef-d'ocuvre de la littU+FFC3\U+FFA9rature".force_encoding("UTF-8")])
|
45
43
|
|
46
|
-
|
47
|
-
|
44
|
+
writer.write(record)
|
45
|
+
writer.close
|
46
|
+
ensure
|
47
|
+
File.unlink("test/writer.dat")
|
48
|
+
end
|
49
|
+
end
|
48
50
|
|
49
|
-
|
50
|
-
|
51
|
-
|
51
|
+
def test_write_too_long_iso2709
|
52
|
+
too_long_record = MARC::Record.new
|
53
|
+
1.upto(1001) do
|
54
|
+
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
52
55
|
end
|
53
56
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
too_long_record.append MARC::DataField.new("500", ' ', ' ', ['a', 'A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789'])
|
58
|
-
end
|
57
|
+
wbuffer = StringIO.new("", "w")
|
58
|
+
writer = MARC::Writer.new(wbuffer)
|
59
|
+
writer.allow_oversized = true
|
59
60
|
|
60
|
-
|
61
|
-
|
62
|
-
writer.allow_oversized = true
|
61
|
+
writer.write(too_long_record)
|
62
|
+
writer.close
|
63
63
|
|
64
|
-
|
65
|
-
writer.close
|
64
|
+
assert_equal "00000", wbuffer.string.slice(0, 5), "zero'd out length bytes when too long"
|
66
65
|
|
67
|
-
|
66
|
+
rbuffer = StringIO.new(wbuffer.string.dup)
|
68
67
|
|
69
|
-
|
68
|
+
# Regular reader won't read our illegal record.
|
69
|
+
# assert_raise(NoMethodError) do
|
70
|
+
# reader = MARC::Reader.new(rbuffer)
|
71
|
+
# reader.first
|
72
|
+
# end
|
70
73
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
# reader.first
|
75
|
-
#end
|
74
|
+
# Forgiving reader will, round trippable
|
75
|
+
new_record = MARC::Reader.decode(rbuffer.string, forgiving: true)
|
76
|
+
assert_equal too_long_record, new_record, "Too long record round-trippable with forgiving mode"
|
76
77
|
|
77
|
-
|
78
|
-
|
79
|
-
|
78
|
+
# Test in the middle of a MARC file
|
79
|
+
good_record = MARC::Record.new
|
80
|
+
good_record.append MARC::DataField.new("500", " ", " ", ["a", "A short record"])
|
81
|
+
wbuffer = StringIO.new("", "w")
|
82
|
+
writer = MARC::Writer.new(wbuffer)
|
83
|
+
writer.allow_oversized = true
|
80
84
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
wbuffer = StringIO.new("", "w")
|
85
|
-
writer = MARC::Writer.new(wbuffer)
|
86
|
-
writer.allow_oversized = true
|
85
|
+
writer.write(good_record)
|
86
|
+
writer.write(too_long_record)
|
87
|
+
writer.write(good_record)
|
87
88
|
|
88
|
-
|
89
|
-
|
90
|
-
|
89
|
+
rbuffer = StringIO.new(wbuffer.string.dup)
|
90
|
+
reader = MARC::ForgivingReader.new(rbuffer)
|
91
|
+
records = reader.to_a
|
91
92
|
|
92
|
-
|
93
|
-
|
94
|
-
|
93
|
+
assert_equal 3, records.length
|
94
|
+
assert_equal good_record, records[0]
|
95
|
+
assert_equal good_record, records[2]
|
96
|
+
assert_equal too_long_record, records[1]
|
97
|
+
end
|
95
98
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
99
|
+
def test_raises_on_too_long_if_configured
|
100
|
+
too_long_record = MARC::Record.new
|
101
|
+
1.upto(1001) do
|
102
|
+
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
100
103
|
end
|
101
104
|
|
102
|
-
|
103
|
-
|
104
|
-
1.upto(1001) do
|
105
|
-
too_long_record.append MARC::DataField.new("500", ' ', ' ', ['a', 'A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789'])
|
106
|
-
end
|
107
|
-
|
108
|
-
wbuffer = StringIO.new("", "w")
|
109
|
-
writer = MARC::Writer.new(wbuffer)
|
110
|
-
|
111
|
-
assert_raise(MARC::Exception) do
|
112
|
-
writer.write too_long_record
|
113
|
-
end
|
105
|
+
wbuffer = StringIO.new("", "w")
|
106
|
+
writer = MARC::Writer.new(wbuffer)
|
114
107
|
|
108
|
+
assert_raise(MARC::Exception) do
|
109
|
+
writer.write too_long_record
|
115
110
|
end
|
111
|
+
end
|
116
112
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
rec.to_marc
|
123
|
-
end
|
113
|
+
def test_forgiving_writer
|
114
|
+
marc = "00305cam a2200133 a 4500001000700000003000900007005001700016008004100033008004100074035002500115245001700140909001000157909000400167\036635145\036UK-BiLMS\03620060329173705.0\036s1982iieng6 000 0 eng||\036060116|||||||||xxk eng||\036 \037a(UK-BiLMS)M0017366ZW\03600\037aTest record.\036 \037aa\037b\037c\036\037b0\036\035\000"
|
115
|
+
rec = MARC::Record.new_from_marc(marc)
|
116
|
+
assert_nothing_raised do
|
117
|
+
rec.to_marc
|
124
118
|
end
|
119
|
+
end
|
125
120
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
# Make sure the one we wrote out then read in again
|
136
|
-
# is the same as the one we read the first time
|
137
|
-
# Looks like "==" is over-ridden to do that. Don't ever change, #==
|
138
|
-
assert_equal record, read_back_record, "Round-tripped record must equal original record"
|
139
|
-
end
|
140
|
-
|
121
|
+
def test_unicode_roundtrip
|
122
|
+
record = MARC::Reader.new("test/utf8.marc", external_encoding: "UTF-8").first
|
123
|
+
|
124
|
+
writer = MARC::Writer.new("test/writer.dat")
|
125
|
+
writer.write(record)
|
126
|
+
writer.close
|
127
|
+
|
128
|
+
read_back_record = MARC::Reader.new("test/writer.dat", external_encoding: "UTF-8").first
|
141
129
|
|
130
|
+
# Make sure the one we wrote out then read in again
|
131
|
+
# is the same as the one we read the first time
|
132
|
+
# Looks like "==" is over-ridden to do that. Don't ever change, #==
|
133
|
+
assert_equal record, read_back_record, "Round-tripped record must equal original record"
|
134
|
+
end
|
142
135
|
end
|
data/test/tc_xml.rb
CHANGED
@@ -1,28 +1,26 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "test/unit"
|
2
|
+
require "marc"
|
3
|
+
require "stringio"
|
4
|
+
require "warning"
|
4
5
|
|
5
6
|
class XMLTest < Test::Unit::TestCase
|
7
|
+
Warning.ignore(/is deprecated and will be removed in a future version of ruby-marc/)
|
8
|
+
Warning.ignore(/setting Encoding.default_internal/)
|
6
9
|
def setup
|
7
10
|
@parsers = [:rexml]
|
8
11
|
begin
|
9
|
-
require
|
12
|
+
require "nokogiri"
|
10
13
|
@parsers << :nokogiri
|
11
14
|
rescue LoadError
|
12
15
|
end
|
13
16
|
unless defined? JRUBY_VERSION
|
14
17
|
begin
|
15
|
-
require
|
18
|
+
require "xml"
|
16
19
|
@parsers << :libxml
|
17
20
|
rescue LoadError
|
18
21
|
end
|
19
22
|
end
|
20
23
|
if defined? JRUBY_VERSION
|
21
|
-
begin
|
22
|
-
require 'jrexml'
|
23
|
-
@parsers << :jrexml
|
24
|
-
rescue LoadError
|
25
|
-
end
|
26
24
|
begin
|
27
25
|
java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
|
28
26
|
@parsers << :jstax
|
@@ -31,151 +29,165 @@ class XMLTest < Test::Unit::TestCase
|
|
31
29
|
end
|
32
30
|
end
|
33
31
|
|
34
|
-
|
35
|
-
|
36
|
-
@parsers.each do | parser |
|
37
|
-
puts "\nRunning test_xml_entities with: #{parser}.\n"
|
32
|
+
def test_xml_entities
|
33
|
+
@parsers.each do |parser|
|
38
34
|
xml_entities_test(parser)
|
39
35
|
end
|
40
36
|
end
|
41
|
-
|
37
|
+
|
42
38
|
def xml_entities_test(parser)
|
43
39
|
r1 = MARC::Record.new
|
44
|
-
r1 << MARC::DataField.new(
|
40
|
+
r1 << MARC::DataField.new("245", "0", "0", ["a", "foo & bar & baz"])
|
45
41
|
xml = r1.to_xml.to_s
|
46
|
-
assert_match
|
42
|
+
assert_match(/foo & bar & baz/, xml, "Failed with parser '#{parser}'")
|
47
43
|
|
48
|
-
reader = MARC::XMLReader.new(StringIO.new(xml), :parser
|
44
|
+
reader = MARC::XMLReader.new(StringIO.new(xml), parser: parser)
|
49
45
|
r2 = reader.entries[0]
|
50
|
-
assert_equal
|
46
|
+
assert_equal("foo & bar & baz", r2["245"]["a"], "Failed with parser '#{parser}'")
|
51
47
|
end
|
52
|
-
|
48
|
+
|
53
49
|
def test_batch
|
54
|
-
@parsers.each do |
|
55
|
-
puts "\nRunning test_batch with: #{parser}.\n"
|
50
|
+
@parsers.each do |parser|
|
56
51
|
batch_test(parser)
|
57
|
-
end
|
52
|
+
end
|
58
53
|
end
|
59
|
-
|
54
|
+
|
60
55
|
def batch_test(parser)
|
61
|
-
reader = MARC::XMLReader.new(
|
56
|
+
reader = MARC::XMLReader.new("test/batch.xml", parser: parser)
|
62
57
|
count = 0
|
63
|
-
|
58
|
+
reader.each do |record|
|
64
59
|
count += 1
|
65
|
-
assert_instance_of(MARC::Record, record)
|
60
|
+
assert_instance_of(MARC::Record, record, "Failed with parser '#{parser}'")
|
66
61
|
end
|
67
|
-
assert_equal(count, 2)
|
62
|
+
assert_equal(count, 2, "Failed with parser '#{parser}'")
|
68
63
|
end
|
69
|
-
|
64
|
+
|
70
65
|
def test_read_string
|
71
|
-
@parsers.each do |
|
72
|
-
puts "\nRunning test_read_string with: #{parser}.\n"
|
66
|
+
@parsers.each do |parser|
|
73
67
|
read_string_test(parser)
|
74
|
-
end
|
68
|
+
end
|
75
69
|
end
|
76
70
|
|
77
71
|
def read_string_test(parser)
|
78
|
-
xml = File.new(
|
79
|
-
reader = MARC::XMLReader.new(StringIO.new(xml), :parser
|
80
|
-
assert_equal 2, reader.entries.length
|
72
|
+
xml = File.new("test/batch.xml").read
|
73
|
+
reader = MARC::XMLReader.new(StringIO.new(xml), parser: parser)
|
74
|
+
assert_equal 2, reader.entries.length, "Failed with parser '#{parser}'"
|
81
75
|
end
|
82
|
-
|
76
|
+
|
83
77
|
def test_non_numeric_fields
|
84
|
-
@parsers.each do |
|
85
|
-
puts "\nRunning test_non_numeric_fields with: #{parser}.\n"
|
78
|
+
@parsers.each do |parser|
|
86
79
|
non_numeric_fields_test(parser)
|
87
80
|
end
|
88
81
|
end
|
89
|
-
|
82
|
+
|
90
83
|
def non_numeric_fields_test(parser)
|
91
|
-
reader = MARC::XMLReader.new(
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
end
|
98
|
-
assert_equal(1, count)
|
99
|
-
assert_equal('9780061317842', record['ISB']['a'])
|
100
|
-
assert_equal('1', record['LOC']['9'])
|
84
|
+
reader = MARC::XMLReader.new("test/non-numeric.xml", parser: parser)
|
85
|
+
count = 0
|
86
|
+
record = nil
|
87
|
+
reader.each do |rec|
|
88
|
+
count += 1
|
89
|
+
record = rec
|
101
90
|
end
|
91
|
+
assert_equal(1, count, "Failed with parser '#{parser}'")
|
92
|
+
assert_equal("9780061317842", record["ISB"]["a"], "Failed with parser '#{parser}'")
|
93
|
+
assert_equal("1", record["LOC"]["9"], "Failed with parser '#{parser}'")
|
94
|
+
end
|
102
95
|
|
103
96
|
def test_read_no_leading_zero_write_leading_zero
|
104
|
-
@parsers.each do |
|
105
|
-
puts "\nRunning test_read_no_leading_zero_write_leading_zero with: #{parser}.\n"
|
97
|
+
@parsers.each do |parser|
|
106
98
|
read_no_leading_zero_write_leading_zero_test(parser)
|
107
|
-
end
|
99
|
+
end
|
108
100
|
end
|
109
|
-
|
101
|
+
|
110
102
|
def read_no_leading_zero_write_leading_zero_test(parser)
|
111
|
-
reader = MARC::XMLReader.new(
|
103
|
+
reader = MARC::XMLReader.new("test/no-leading-zero.xml", parser: parser)
|
112
104
|
record = reader.to_a[0]
|
113
|
-
assert_equal("042 zz $a dc ", record[
|
105
|
+
assert_equal("042 zz $a dc ", record["042"].to_s, "Failed with parser '#{parser}'")
|
114
106
|
end
|
115
107
|
|
116
108
|
def test_leader_from_xml
|
117
|
-
@parsers.each do |
|
118
|
-
puts "\nRunning test_leader_from_xml with: #{parser}.\n"
|
109
|
+
@parsers.each do |parser|
|
119
110
|
leader_from_xml_test(parser)
|
120
111
|
end
|
121
|
-
end
|
112
|
+
end
|
122
113
|
|
123
114
|
def leader_from_xml_test(parser)
|
124
|
-
reader = MARC::XMLReader.new(
|
115
|
+
reader = MARC::XMLReader.new("test/one.xml", parser: parser)
|
125
116
|
record = reader.entries[0]
|
126
|
-
assert_equal
|
117
|
+
assert_equal " njm a22 uu 4500", record.leader, "Failed with parser '#{parser}'"
|
118
|
+
|
127
119
|
# serializing as MARC should populate the record length and directory offset
|
128
120
|
record = MARC::Record.new_from_marc(record.to_marc)
|
129
|
-
assert_equal
|
121
|
+
assert_equal "00734njm a2200217uu 4500", record.leader, "Failed with parser '#{parser}'"
|
130
122
|
end
|
131
|
-
|
123
|
+
|
132
124
|
def test_read_write
|
133
|
-
@parsers.each do |
|
134
|
-
puts "\nRunning test_read_write with: #{parser}.\n"
|
125
|
+
@parsers.each do |parser|
|
135
126
|
read_write_test(parser)
|
136
127
|
end
|
137
|
-
end
|
128
|
+
end
|
138
129
|
|
139
130
|
def read_write_test(parser)
|
140
131
|
record1 = MARC::Record.new
|
141
|
-
record1.leader =
|
142
|
-
record1.append MARC::ControlField.new(
|
143
|
-
record1.append MARC::DataField.new(
|
144
|
-
[
|
132
|
+
record1.leader = "00925njm 22002777a 4500"
|
133
|
+
record1.append MARC::ControlField.new("007", "sdubumennmplu")
|
134
|
+
record1.append MARC::DataField.new("245", "0", "4",
|
135
|
+
["a", "The Great Ray Charles"], ["h", "[sound recording]."])
|
136
|
+
record1.append MARC::DataField.new("998", " ", " ",
|
137
|
+
["^", "Valid local subfield"])
|
145
138
|
|
146
|
-
writer = MARC::XMLWriter.new(
|
139
|
+
writer = MARC::XMLWriter.new("test/test.xml", stylesheet: "style.xsl")
|
147
140
|
writer.write(record1)
|
148
141
|
writer.close
|
149
142
|
|
150
|
-
xml = File.read(
|
151
|
-
assert_match
|
152
|
-
assert_match
|
143
|
+
xml = File.read("test/test.xml")
|
144
|
+
assert_match(/<controlfield tag='007'>sdubumennmplu<\/controlfield>/, xml, "Failed with parser '#{parser}'")
|
145
|
+
assert_match(/<\?xml-stylesheet type="text\/xsl" href="style.xsl"\?>/, xml, "Failed with parser '#{parser}'")
|
153
146
|
|
154
|
-
reader = MARC::XMLReader.new(
|
147
|
+
reader = MARC::XMLReader.new("test/test.xml", parser: parser)
|
155
148
|
record2 = reader.entries[0]
|
156
|
-
assert_equal(record1, record2)
|
157
|
-
|
158
|
-
File.unlink(
|
149
|
+
assert_equal(record1, record2, "Failed with parser '#{parser}'")
|
150
|
+
ensure
|
151
|
+
File.unlink("test/test.xml")
|
159
152
|
end
|
160
|
-
|
153
|
+
|
161
154
|
def test_xml_enumerator
|
162
|
-
@parsers.each do |
|
163
|
-
puts "\nRunning test_xml_enumerator with: #{parser}.\n"
|
155
|
+
@parsers.each do |parser|
|
164
156
|
xml_enumerator_test(parser)
|
165
157
|
end
|
166
158
|
end
|
167
|
-
|
168
|
-
|
159
|
+
|
169
160
|
def xml_enumerator_test(parser)
|
170
161
|
# confusingly, test/batch.xml only has two records, not 10 like batch.dat
|
171
|
-
reader = MARC::XMLReader.new(
|
162
|
+
reader = MARC::XMLReader.new("test/batch.xml", parser: parser)
|
172
163
|
iter = reader.each
|
173
164
|
r = iter.next
|
174
|
-
assert_instance_of(MARC::Record, r)
|
165
|
+
assert_instance_of(MARC::Record, r, "Failed with parser '#{parser}'")
|
175
166
|
iter.next # total of two records
|
176
|
-
|
167
|
+
assert_raise(StopIteration, "Failed with parser '#{parser}'") { iter.next }
|
177
168
|
end
|
178
|
-
|
179
169
|
|
180
|
-
|
170
|
+
def test_truncated_leader_roundtripping
|
171
|
+
record1 = MARC::Record.new
|
172
|
+
record1.leader = "00925njm 22002777a"
|
173
|
+
|
174
|
+
writer = MARC::XMLWriter.new("test/test.xml", stylesheet: "style.xsl")
|
175
|
+
writer.write(record1)
|
176
|
+
writer.close
|
181
177
|
|
178
|
+
reader = MARC::XMLReader.new("test/test.xml")
|
179
|
+
record2 = reader.entries[0]
|
180
|
+
|
181
|
+
assert_equal("00925njm 22002777a 4500", record2.leader)
|
182
|
+
ensure
|
183
|
+
File.unlink("test/test.xml")
|
184
|
+
end
|
185
|
+
|
186
|
+
def test_xml_weird_leader
|
187
|
+
@parsers.each do |parser|
|
188
|
+
reader = MARC::XMLReader.new("test/messed_up_leader.xml", parser: parser)
|
189
|
+
record = reader.first
|
190
|
+
assert_equal(record.leader, "01301nam a22003618< 4500", "Failed with parser '#{parser}'")
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -1,22 +1,21 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "test/unit"
|
2
|
+
require "marc"
|
3
3
|
|
4
4
|
class BadXMLHandlingTestCase < Test::Unit::TestCase
|
5
|
-
|
6
5
|
def test_nokogiri_bad_xml
|
7
6
|
begin
|
8
|
-
require
|
7
|
+
require "nokogiri"
|
9
8
|
rescue LoadError
|
10
9
|
omit("nokogiri not installed, cannot test")
|
11
10
|
end
|
12
|
-
omit("nokogiri (<1.10.2) under jruby doesn't support error handling: sparklemotion/nokogiri#1847") if RUBY_PLATFORM ==
|
11
|
+
omit("nokogiri (<1.10.2) under jruby doesn't support error handling: sparklemotion/nokogiri#1847") if RUBY_PLATFORM == "java" && Gem::Version.new(Nokogiri::VERSION) < Gem::Version.new("1.10.2")
|
13
12
|
count = 0
|
14
|
-
reader = MARC::XMLReader.new(
|
13
|
+
reader = MARC::XMLReader.new("test/three-records-second-bad.xml", parser: :nokogiri)
|
15
14
|
assert_raise MARC::XMLParseError do
|
16
15
|
reader.each do |rec|
|
17
|
-
count += 1 if rec[
|
16
|
+
count += 1 if rec["260"]
|
18
17
|
end
|
19
18
|
end
|
20
|
-
assert_equal(1, count,
|
19
|
+
assert_equal(1, count, "should only be able to parse one record")
|
21
20
|
end
|
22
21
|
end
|
data/test/ts_marc.rb
CHANGED
@@ -4,11 +4,11 @@
|
|
4
4
|
# not already installed one
|
5
5
|
$LOAD_PATH.unshift("lib")
|
6
6
|
|
7
|
-
require
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
require
|
13
|
-
require
|
14
|
-
require
|
7
|
+
require "test/unit"
|
8
|
+
require "test/tc_subfield"
|
9
|
+
require "test/tc_datafield"
|
10
|
+
require "test/tc_controlfield"
|
11
|
+
require "test/tc_record"
|
12
|
+
require "test/tc_reader"
|
13
|
+
require "test/tc_writer"
|
14
|
+
require "test/tc_xml"
|