marc 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +3 -3
- data/CHANGELOG.md +18 -3
- data/lib/marc/controlfield.rb +1 -1
- data/lib/marc/dublincore.rb +25 -23
- data/lib/marc/jsonl_writer.rb +1 -1
- data/lib/marc/marc8/to_unicode.rb +10 -3
- data/lib/marc/reader.rb +17 -11
- data/lib/marc/subfield.rb +3 -3
- data/lib/marc/version.rb +1 -1
- data/lib/marc/writer.rb +3 -3
- data/lib/marc/xml_parsers.rb +7 -7
- data/marc.gemspec +3 -2
- data/spec/controlfield_spec.rb +52 -0
- data/spec/datafield_spec.rb +75 -0
- data/spec/reader_char_encodings_spec.rb +245 -0
- data/spec/reader_spec.rb +108 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/writer_spec.rb +121 -0
- data/test/tc_writer.rb +8 -8
- metadata +32 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8e34cbc11eeea3ea9745181b1f3a958430842993c3f7d3624d1251eb3e07d34e
|
|
4
|
+
data.tar.gz: f10be646a7e983ebed7ac2e27e2d7733335d5fae66ec2c556d694e49d2e4bba0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d86a24aa80866e8bde4579615909d5eef1a753b8ae483e9a9928ae1d1417a1d7819ac87339e0d2fd5a5ac9b220370790aacfb365805f7790c80e54a9e23cdcc0
|
|
7
|
+
data.tar.gz: 396870e7768e6342a7d7d9950fbef801a89e5219284217a20121f62b1e641395dfaded36b3db903e957da7751fa755221177ed4301bafcd41cbd688fbd858a55
|
data/.github/workflows/ruby.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
name: CI
|
|
2
2
|
|
|
3
|
-
on: [push, pull_request]
|
|
3
|
+
on: [push, pull_request, workflow_dispatch]
|
|
4
4
|
|
|
5
5
|
env:
|
|
6
6
|
# See https://github.com/jruby/jruby/issues/5509
|
|
@@ -11,7 +11,7 @@ jobs:
|
|
|
11
11
|
runs-on: ubuntu-latest
|
|
12
12
|
strategy:
|
|
13
13
|
matrix:
|
|
14
|
-
ruby: [2.7, 3.0, 3.1, 3.2, 3.3, 3.4, jruby, truffleruby, "truffleruby+graalvm"]
|
|
14
|
+
ruby: [2.4, 2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 4.0, jruby, truffleruby, "truffleruby+graalvm"]
|
|
15
15
|
steps:
|
|
16
16
|
- uses: actions/checkout@v4
|
|
17
17
|
- name: Set up Ruby
|
|
@@ -19,6 +19,6 @@ jobs:
|
|
|
19
19
|
with:
|
|
20
20
|
ruby-version: ${{ matrix.ruby }}
|
|
21
21
|
- name: Install dependencies
|
|
22
|
-
run: bundle install
|
|
22
|
+
run: bundle install
|
|
23
23
|
- name: Run tests
|
|
24
24
|
run: bundle exec rake
|
data/CHANGELOG.md
CHANGED
|
@@ -2,12 +2,27 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
## [1.
|
|
5
|
+
## [1.4] - 2026-01-22
|
|
6
6
|
|
|
7
7
|
### Breaking Change
|
|
8
8
|
|
|
9
|
-
**ruby >= 2.
|
|
10
|
-
|
|
9
|
+
**ruby >= 2.4 is now required**.
|
|
10
|
+
|
|
11
|
+
2.3 was EOL'd almost 7 years ago, and backporting support for other
|
|
12
|
+
gems was deemed to be not worth it.
|
|
13
|
+
|
|
14
|
+
This is mostly a cleanup release, to squash warnings
|
|
15
|
+
about frozen strings in more recent rubys, update the
|
|
16
|
+
CI configuration.
|
|
17
|
+
|
|
18
|
+
## [1.3] - 2025-01-09
|
|
19
|
+
|
|
20
|
+
### Breaking Change
|
|
21
|
+
|
|
22
|
+
**ruby >= 2.2 is now required**.
|
|
23
|
+
|
|
24
|
+
- Removed no-longer-necessary `unf` gem in favor of built-in string
|
|
25
|
+
methods for dealing with encodings. (Aaron Elkiss)
|
|
11
26
|
|
|
12
27
|
### Non-user-facing changes
|
|
13
28
|
|
data/lib/marc/controlfield.rb
CHANGED
data/lib/marc/dublincore.rb
CHANGED
|
@@ -26,39 +26,41 @@ module MARC
|
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
dc_hash["publisher"] = begin
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
get_field_value(record["260"]["a"]["b"])
|
|
30
|
+
rescue
|
|
31
|
+
nil
|
|
32
|
+
end
|
|
33
33
|
dc_hash["date"] = begin
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
get_field_value(record["260"]["c"])
|
|
35
|
+
rescue
|
|
36
|
+
nil
|
|
37
|
+
end
|
|
38
38
|
dc_hash["type"] = get_field_value(record["655"])
|
|
39
39
|
dc_hash["format"] = begin
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
get_field_value(record["856"]["q"])
|
|
41
|
+
rescue
|
|
42
|
+
nil
|
|
43
|
+
end
|
|
44
44
|
dc_hash["identifier"] = begin
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
get_field_value(record["856"]["u"])
|
|
46
|
+
rescue
|
|
47
|
+
nil
|
|
48
|
+
end
|
|
49
49
|
dc_hash["source"] = begin
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
50
|
+
get_field_value(record["786"]["o"]["t"])
|
|
51
|
+
rescue
|
|
52
|
+
nil
|
|
53
|
+
end
|
|
54
54
|
dc_hash["language"] = get_field_value(record["546"])
|
|
55
55
|
|
|
56
56
|
dc_hash["relation"] = []
|
|
57
57
|
dc_hash["relation"] << get_field_value(record["530"])
|
|
58
58
|
("760".."787").each do |field|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
begin
|
|
60
|
+
dc_hash["relation"] << get_field_value(record[field]["o"]["t"])
|
|
61
|
+
rescue
|
|
62
|
+
nil
|
|
63
|
+
end
|
|
62
64
|
end
|
|
63
65
|
|
|
64
66
|
["651", "752"].each do |field|
|
data/lib/marc/jsonl_writer.rb
CHANGED
|
@@ -65,14 +65,21 @@ module MARC
|
|
|
65
65
|
normalization = options.fetch(:normalization, :nfc)
|
|
66
66
|
|
|
67
67
|
# don't choke on empty marc8_string
|
|
68
|
-
return "" if marc8_string.nil? || marc8_string.empty?
|
|
68
|
+
return +"" if marc8_string.nil? || marc8_string.empty?
|
|
69
69
|
|
|
70
70
|
# Make sure to call it 'binary', so we can slice it
|
|
71
71
|
# byte by byte, and so ruby doesn't complain about bad
|
|
72
72
|
# bytes for some other encoding. Yeah, we're changing
|
|
73
73
|
# encoding on input! If it's Marc8, it ought to be tagged
|
|
74
74
|
# binary already.
|
|
75
|
-
|
|
75
|
+
|
|
76
|
+
# Due to the changes with default frozen strings, we'll check
|
|
77
|
+
# to see if it's binary already, and only duplicate the string
|
|
78
|
+
# if it's not. It SHOULD be binary already.
|
|
79
|
+
unless marc8_string.encoding.to_s == "ASCII-8BIT"
|
|
80
|
+
marc8_string = marc8_string.dup
|
|
81
|
+
marc8_string.force_encoding("binary")
|
|
82
|
+
end
|
|
76
83
|
|
|
77
84
|
uni_list = []
|
|
78
85
|
combinings = []
|
|
@@ -130,7 +137,7 @@ module MARC
|
|
|
130
137
|
end
|
|
131
138
|
|
|
132
139
|
if (code_point < 0x20) ||
|
|
133
|
-
|
|
140
|
+
((code_point > 0x80) && (code_point < 0xa0))
|
|
134
141
|
uni = unichr(code_point)
|
|
135
142
|
next
|
|
136
143
|
end
|
data/lib/marc/reader.rb
CHANGED
|
@@ -301,7 +301,11 @@ module MARC
|
|
|
301
301
|
# And now that we've recorded the current encoding, we force
|
|
302
302
|
# to binary encoding, because we're going to be doing byte arithmetic,
|
|
303
303
|
# and want to avoid byte-vs-char confusion.
|
|
304
|
-
|
|
304
|
+
|
|
305
|
+
if (marc.respond_to?(:force_encoding) && marc.encoding != "ASCII-8BIT")
|
|
306
|
+
marc = marc.dup
|
|
307
|
+
marc.force_encoding("binary")
|
|
308
|
+
end
|
|
305
309
|
|
|
306
310
|
record = Record.new
|
|
307
311
|
record.leader = marc[0..LEADER_LENGTH - 1]
|
|
@@ -346,7 +350,7 @@ module MARC
|
|
|
346
350
|
# if we were told to be forgiving we just use the
|
|
347
351
|
# next available chuck of field data that we
|
|
348
352
|
# split apart based on the END_OF_FIELD
|
|
349
|
-
field_data = ""
|
|
353
|
+
field_data = +""
|
|
350
354
|
if params[:forgiving]
|
|
351
355
|
field_data = all_fields.shift
|
|
352
356
|
|
|
@@ -442,10 +446,10 @@ module MARC
|
|
|
442
446
|
# in future implementations.
|
|
443
447
|
if params[:internal_encoding]
|
|
444
448
|
str = if RUBY_VERSION >= "3.0"
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
+
str.encode(params[:internal_encoding], **params)
|
|
450
|
+
else
|
|
451
|
+
str.encode(params[:internal_encoding], params)
|
|
452
|
+
end
|
|
449
453
|
elsif params[:invalid] || params[:replace] || (params[:validate_encoding] == true)
|
|
450
454
|
|
|
451
455
|
if params[:validate_encoding] == true && !str.valid_encoding?
|
|
@@ -482,11 +486,13 @@ module MARC
|
|
|
482
486
|
class ForgivingReader < Reader
|
|
483
487
|
def each
|
|
484
488
|
@handle.each_line(END_OF_RECORD) do |raw|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
489
|
+
begin
|
|
490
|
+
record = MARC::Reader.decode(raw, @encoding_options.merge(forgiving: true))
|
|
491
|
+
yield record
|
|
492
|
+
rescue
|
|
493
|
+
# caught exception just keep barrelling along
|
|
494
|
+
# TODO add logging
|
|
495
|
+
end
|
|
490
496
|
end
|
|
491
497
|
end
|
|
492
498
|
end
|
data/lib/marc/subfield.rb
CHANGED
|
@@ -7,11 +7,11 @@ module MARC
|
|
|
7
7
|
class Subfield
|
|
8
8
|
attr_accessor :code, :value
|
|
9
9
|
|
|
10
|
-
def initialize(code = "", value = "")
|
|
10
|
+
def initialize(code = +"", value = +"")
|
|
11
11
|
# can't allow code of value to be nil
|
|
12
12
|
# or else it'll screw us up later on
|
|
13
|
-
@code = code.nil? ? "" : code
|
|
14
|
-
@value = value.nil? ? "" : value
|
|
13
|
+
@code = code.nil? ? +"" : code
|
|
14
|
+
@value = value.nil? ? +"" : value
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def ==(other)
|
data/lib/marc/version.rb
CHANGED
data/lib/marc/writer.rb
CHANGED
|
@@ -62,12 +62,12 @@ module MARC
|
|
|
62
62
|
# Second arg allow_oversized, default false, set to true
|
|
63
63
|
# to raise on MARC record that can't fit into ISO 2709.
|
|
64
64
|
def self.encode(record, allow_oversized = false)
|
|
65
|
-
directory = ""
|
|
66
|
-
fields = ""
|
|
65
|
+
directory = +""
|
|
66
|
+
fields = +""
|
|
67
67
|
offset = 0
|
|
68
68
|
record.each do |field|
|
|
69
69
|
# encode the field
|
|
70
|
-
field_data = ""
|
|
70
|
+
field_data = +""
|
|
71
71
|
if field.instance_of?(MARC::DataField)
|
|
72
72
|
warn("Warn: Missing indicator") unless field.indicator1 && field.indicator2
|
|
73
73
|
field_data = (field.indicator1 || " ") + (field.indicator2 || " ")
|
data/lib/marc/xml_parsers.rb
CHANGED
|
@@ -56,7 +56,7 @@ module MARC
|
|
|
56
56
|
SF_TAG = "subfield".freeze
|
|
57
57
|
|
|
58
58
|
def init
|
|
59
|
-
@record = {record: nil, leader: "", field: nil, subfield: nil}
|
|
59
|
+
@record = {record: nil, leader: +"", field: nil, subfield: nil}
|
|
60
60
|
@current_element = nil
|
|
61
61
|
@ns = "http://www.loc.gov/MARC21/slim"
|
|
62
62
|
end
|
|
@@ -115,7 +115,7 @@ module MARC
|
|
|
115
115
|
when REC_TAG then yield_record
|
|
116
116
|
when LEAD_TAG
|
|
117
117
|
@record[:record].leader = @record[:leader]
|
|
118
|
-
@record[:leader] = ""
|
|
118
|
+
@record[:leader] = +""
|
|
119
119
|
@current_element = nil if @current_element == :leader
|
|
120
120
|
end
|
|
121
121
|
end
|
|
@@ -238,7 +238,7 @@ module MARC
|
|
|
238
238
|
data_field = nil
|
|
239
239
|
control_field = nil
|
|
240
240
|
subfield = nil
|
|
241
|
-
text = ""
|
|
241
|
+
text = +""
|
|
242
242
|
attrs = nil
|
|
243
243
|
if Module.constants.index("Nokogiri") && @parser.is_a?(Nokogiri::XML::Reader)
|
|
244
244
|
datafield = nil
|
|
@@ -295,18 +295,18 @@ module MARC
|
|
|
295
295
|
end
|
|
296
296
|
|
|
297
297
|
if event.start_element?
|
|
298
|
-
text = ""
|
|
298
|
+
text = +""
|
|
299
299
|
attrs = event[1]
|
|
300
300
|
case strip_ns(event[0])
|
|
301
301
|
when "controlfield"
|
|
302
|
-
text = ""
|
|
302
|
+
text = +""
|
|
303
303
|
control_field = MARC::ControlField.new(attrs[TAG])
|
|
304
304
|
when "datafield"
|
|
305
|
-
text = ""
|
|
305
|
+
text = +""
|
|
306
306
|
data_field = MARC::DataField.new(attrs[TAG], attrs[IND1],
|
|
307
307
|
attrs[IND2])
|
|
308
308
|
when "subfield"
|
|
309
|
-
text = ""
|
|
309
|
+
text = +""
|
|
310
310
|
subfield = MARC::Subfield.new(attrs[CODE])
|
|
311
311
|
end
|
|
312
312
|
end
|
data/marc.gemspec
CHANGED
|
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
|
|
|
8
8
|
s.homepage = "https://github.com/ruby-marc/ruby-marc/"
|
|
9
9
|
s.summary = "A ruby library for working with Machine Readable Cataloging"
|
|
10
10
|
s.license = "MIT"
|
|
11
|
-
s.required_ruby_version = ">= 2.
|
|
11
|
+
s.required_ruby_version = ">= 2.3.0"
|
|
12
12
|
s.authors = ["Kevin Clarke", "Bill Dueber", "William Groppe", "Jonathan Rochkind", "Ross Singer", "Ed Summers", "Chris Beer"]
|
|
13
13
|
|
|
14
14
|
s.files = `git ls-files -z`.split("\x0")
|
|
@@ -17,12 +17,13 @@ Gem::Specification.new do |s|
|
|
|
17
17
|
s.require_paths = ["lib"]
|
|
18
18
|
|
|
19
19
|
s.add_dependency "nokogiri", "~>1.0"
|
|
20
|
+
s.add_dependency "rexml" # rexml was unbundled from the stdlib in ruby 3
|
|
20
21
|
|
|
21
22
|
s.add_development_dependency "rake", "~>13.0"
|
|
22
23
|
s.add_development_dependency "test-unit", "~>3.0"
|
|
23
24
|
s.add_development_dependency "standard", "~>1.0"
|
|
24
25
|
s.add_development_dependency "warning", "~>1.5"
|
|
25
26
|
s.add_development_dependency "xml-simple"
|
|
27
|
+
s.add_development_dependency "rdoc"
|
|
26
28
|
|
|
27
|
-
s.add_dependency "rexml" # rexml was unbundled from the stdlib in ruby 3
|
|
28
29
|
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe MARC::ControlField do
|
|
4
|
+
it "formats a control field correctly" do
|
|
5
|
+
control = MARC::ControlField.new("005", "foobarbaz")
|
|
6
|
+
expect(control.to_s).to eq("005 foobarbaz")
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
it "rejects data field as control field" do
|
|
10
|
+
field = MARC::DataField.new("007")
|
|
11
|
+
expect(field.valid?).to be(false)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "rejects alpha control field tags" do
|
|
15
|
+
# can't have a field with a tag < 010
|
|
16
|
+
field = MARC::ControlField.new("DDD")
|
|
17
|
+
expect(field.valid?).to be(false)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it "supports adding custom control field tags" do
|
|
21
|
+
MARC::ControlField.control_tags << "FMT"
|
|
22
|
+
field = MARC::ControlField.new("FMT")
|
|
23
|
+
expect(field.valid?).to be(true)
|
|
24
|
+
field = MARC::DataField.new("FMT")
|
|
25
|
+
expect(field.valid?).to be(false)
|
|
26
|
+
MARC::ControlField.control_tags.delete("FMT")
|
|
27
|
+
field = MARC::DataField.new("FMT")
|
|
28
|
+
expect(field.valid?).to be(true)
|
|
29
|
+
field = MARC::ControlField.new("FMT")
|
|
30
|
+
expect(field.valid?).to be(false)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it "rejects control field with data field tag" do
|
|
34
|
+
# can't have a control with a tag > 009
|
|
35
|
+
f = MARC::ControlField.new("245")
|
|
36
|
+
expect(f.valid?).to be(false)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "compares control fields correctly" do
|
|
40
|
+
f1 = MARC::ControlField.new("001", "foobarbaz")
|
|
41
|
+
f2 = MARC::ControlField.new("001", "foobarbaz")
|
|
42
|
+
expect(f1).to eq(f2)
|
|
43
|
+
|
|
44
|
+
f3 = MARC::ControlField.new("001", "foobarbazqux")
|
|
45
|
+
expect(f1).not_to eq(f3)
|
|
46
|
+
f4 = MARC::ControlField.new("002", "foobarbaz")
|
|
47
|
+
expect(f1).not_to eq(f4)
|
|
48
|
+
|
|
49
|
+
expect(f1).not_to eq("001")
|
|
50
|
+
expect(f2).not_to eq("foobarbaz")
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe MARC::DataField do
|
|
4
|
+
it "handles tags correctly" do
|
|
5
|
+
f1 = MARC::DataField.new("100")
|
|
6
|
+
expect(f1.tag).to eq("100")
|
|
7
|
+
f2 = MARC::DataField.new("100")
|
|
8
|
+
expect(f2.tag).to eq("100")
|
|
9
|
+
expect(f1).to eq(f2)
|
|
10
|
+
f3 = MARC::DataField.new("245")
|
|
11
|
+
expect(f1).not_to eq(f3)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "handles alphabetic tags" do
|
|
15
|
+
alph = MARC::DataField.new("ALF")
|
|
16
|
+
expect(alph.tag).to eq("ALF")
|
|
17
|
+
|
|
18
|
+
alphnum = MARC::DataField.new("0D9")
|
|
19
|
+
expect(alphnum.tag).to eq("0D9")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it "handles indicators" do
|
|
23
|
+
f1 = MARC::DataField.new("100", "0", "1")
|
|
24
|
+
expect(f1.indicator1).to eq("0")
|
|
25
|
+
expect(f1.indicator2).to eq("1")
|
|
26
|
+
f2 = MARC::DataField.new("100", "0", "1")
|
|
27
|
+
expect(f2.indicator1).to eq("0")
|
|
28
|
+
expect(f2.indicator2).to eq("1")
|
|
29
|
+
expect(f1).to eq(f2)
|
|
30
|
+
f3 = MARC::DataField.new("100", "1", "1")
|
|
31
|
+
expect(f1).not_to eq(f3)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it "handles subfields" do
|
|
35
|
+
f1 = MARC::DataField.new("100", "0", "1",
|
|
36
|
+
MARC::Subfield.new("a", "Foo"),
|
|
37
|
+
MARC::Subfield.new("b", "Bar"))
|
|
38
|
+
expect(f1.to_s).to eq("100 01 $a Foo $b Bar ")
|
|
39
|
+
expect(f1.value).to eq("FooBar")
|
|
40
|
+
f2 = MARC::DataField.new("100", "0", "1",
|
|
41
|
+
MARC::Subfield.new("a", "Foo"),
|
|
42
|
+
MARC::Subfield.new("b", "Bar"))
|
|
43
|
+
expect(f1).to eq(f2)
|
|
44
|
+
f3 = MARC::DataField.new("100", "0", "1",
|
|
45
|
+
MARC::Subfield.new("a", "Foo"),
|
|
46
|
+
MARC::Subfield.new("b", "Bez"))
|
|
47
|
+
expect(f1).not_to eq(f3)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it "supports subfield shorthand" do
|
|
51
|
+
f = MARC::DataField.new("100", "0", "1", ["a", "Foo"], ["b", "Bar"])
|
|
52
|
+
expect(f.to_s).to eq("100 01 $a Foo $b Bar ")
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it "iterates through subfields" do
|
|
56
|
+
field = MARC::DataField.new("100", "0", "1", ["a", "Foo"], ["b", "Bar"],
|
|
57
|
+
["a", "Bez"])
|
|
58
|
+
count = 0
|
|
59
|
+
field.each { |x| count += 1 }
|
|
60
|
+
expect(count).to eq(3)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it "supports lookup shorthand" do
|
|
64
|
+
f = MARC::DataField.new("100", "0", "1", ["a", "Foo"], ["b", "Bar"])
|
|
65
|
+
expect(f["b"]).to eq("Bar")
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
it "distinguishes from other types" do
|
|
69
|
+
f = MARC::DataField.new("100", "0", "1",
|
|
70
|
+
MARC::Subfield.new("a", "Foo"),
|
|
71
|
+
MARC::Subfield.new("b", "Bar"))
|
|
72
|
+
expect(f).not_to eq("100 01 $a Foo $b Bar ")
|
|
73
|
+
expect(f).not_to eq(f["a"])
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'stringio'
|
|
3
|
+
|
|
4
|
+
# Testing char encodings under 1.9, don't bother running
|
|
5
|
+
# these tests except under 1.9, will either fail (because
|
|
6
|
+
# 1.9 func the test itself uses isn't there), or trivially pass
|
|
7
|
+
# (because the func they are testing is no-op on 1.9).
|
|
8
|
+
|
|
9
|
+
if "".respond_to?(:encoding)
|
|
10
|
+
RSpec.describe "Reader Character Encodings" do
|
|
11
|
+
# Common test files
|
|
12
|
+
let(:utf_marc_path) { "test/utf8.marc" }
|
|
13
|
+
let(:cp866_marc_path) { "test/cp866_multirecord.marc" }
|
|
14
|
+
let(:bad_marc8_path) { "test/bad_eacc_encoding.marc8.marc" }
|
|
15
|
+
|
|
16
|
+
# Helper methods
|
|
17
|
+
def assert_utf8_right_in_utf8(record)
|
|
18
|
+
expect(record["245"].subfields.first.value.encoding.name).to eq("UTF-8")
|
|
19
|
+
expect(record["245"].to_s.encoding.name).to eq("UTF-8")
|
|
20
|
+
expect(record["245"].subfields.first.to_s.encoding.name).to eq("UTF-8")
|
|
21
|
+
expect(record["245"].subfields.first.value.encoding.name).to eq("UTF-8")
|
|
22
|
+
expect(record["245"]["a"].encoding.name).to eq("UTF-8")
|
|
23
|
+
expect(record["245"]["a"]).to start_with("Photčhanānukrom")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def assert_cp866_right(record, encoding = "IBM866")
|
|
27
|
+
expect(record["001"].value.encoding.name).to eq(encoding)
|
|
28
|
+
expect(record["001"].value.encode("UTF-8").unpack("H4")).to eq(["d09d"]) # russian capital N
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def assert_all_values_valid_encoding(record, encoding_name = "UTF-8")
|
|
32
|
+
record.fields.each do |field|
|
|
33
|
+
if field.is_a? MARC::DataField
|
|
34
|
+
field.subfields.each do |sf|
|
|
35
|
+
expect(sf.value.encoding.name).to eq(encoding_name), "Is tagged #{encoding_name}: #{field.tag}: #{sf}"
|
|
36
|
+
expect(field.value.valid_encoding?).to be(true), "Is valid encoding: #{field.tag}: #{sf}"
|
|
37
|
+
end
|
|
38
|
+
else
|
|
39
|
+
expect(field.value.encoding.name).to eq(encoding_name), "Is tagged #{encoding_name}: #{field}"
|
|
40
|
+
expect(field.value.valid_encoding?).to be(true), "Is valid encoding: #{field}"
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it "loads unicode correctly" do
|
|
46
|
+
reader = MARC::Reader.new(utf_marc_path)
|
|
47
|
+
record = nil
|
|
48
|
+
expect { record = reader.first }.not_to raise_error
|
|
49
|
+
assert_utf8_right_in_utf8(record)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it "decodes unicode with forgiving mode" do
|
|
53
|
+
# two kinds of forgiving invocation, they shouldn't be different,
|
|
54
|
+
# but just in case they have slightly different code paths, test em too.
|
|
55
|
+
marc_string = File.read(utf_marc_path).force_encoding("utf-8")
|
|
56
|
+
record = MARC::Reader.decode(marc_string, forgiving: true)
|
|
57
|
+
assert_utf8_right_in_utf8(record)
|
|
58
|
+
|
|
59
|
+
reader = MARC::ForgivingReader.new(utf_marc_path)
|
|
60
|
+
record = reader.first
|
|
61
|
+
assert_utf8_right_in_utf8(record)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it "passes options through ForgivingReader" do
|
|
65
|
+
# Make sure ForgivingReader accepts same options as MARC::Reader
|
|
66
|
+
# We don't test them ALL though, just a sample.
|
|
67
|
+
# Tell it we're reading cp866, but trancode to utf8 for us.
|
|
68
|
+
reader = MARC::ForgivingReader.new(cp866_marc_path, external_encoding: "cp866", internal_encoding: "utf-8")
|
|
69
|
+
record = reader.first
|
|
70
|
+
assert_cp866_right(record, "UTF-8")
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
it "handles explicit encoding" do
|
|
74
|
+
reader = MARC::Reader.new(cp866_marc_path, external_encoding: "cp866")
|
|
75
|
+
assert_cp866_right(reader.first, "IBM866")
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it "raises error on bad encoding name" do
|
|
79
|
+
reader = MARC::Reader.new(cp866_marc_path, external_encoding: "adadfadf")
|
|
80
|
+
expect { reader.first }.to raise_error(ArgumentError)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it "handles marc8 with binary encoding" do
|
|
84
|
+
# Marc8, if we want to keep it without transcoding, best we can do is read it in binary.
|
|
85
|
+
reader = MARC::Reader.new("test/marc8_accented_chars.marc", external_encoding: "binary")
|
|
86
|
+
record = reader.first
|
|
87
|
+
expect(record["100"].subfields.first.value.encoding.name).to eq("ASCII-8BIT")
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
it "converts marc8 to unicode" do
|
|
91
|
+
reader = MARC::Reader.new("test/marc8_accented_chars.marc", external_encoding: "MARC-8")
|
|
92
|
+
record = reader.first
|
|
93
|
+
assert_all_values_valid_encoding(record)
|
|
94
|
+
expect(record["100"]["a"]).to eq("Serreau, Geneviève.")
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it "converts marc8 to unicode with file handle" do
|
|
98
|
+
# had some trouble with this one, let's ensure it with a test
|
|
99
|
+
file = File.new("test/marc8_accented_chars.marc")
|
|
100
|
+
reader = MARC::Reader.new(file, external_encoding: "MARC-8")
|
|
101
|
+
record = reader.first
|
|
102
|
+
assert_all_values_valid_encoding(record)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it "handles marc8 with character entities" do
|
|
106
|
+
reader = MARC::Reader.new("test/escaped_character_reference.marc8.marc", external_encoding: "MARC-8")
|
|
107
|
+
record = reader.first
|
|
108
|
+
assert_all_values_valid_encoding(record)
|
|
109
|
+
expect(record["260"]["a"]).to eq("Rio de Janeiro escaped replacement char: \uFFFD .")
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
it "raises error on bad marc8" do
|
|
113
|
+
expect {
|
|
114
|
+
reader = MARC::Reader.new(bad_marc8_path, external_encoding: "MARC-8")
|
|
115
|
+
reader.first
|
|
116
|
+
}.to raise_error(Encoding::InvalidByteSequenceError)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
it "handles bad marc8 with replacement" do
|
|
120
|
+
reader = MARC::Reader.new(bad_marc8_path, external_encoding: "MARC-8", invalid: :replace, replace: "[?]")
|
|
121
|
+
record = reader.first
|
|
122
|
+
assert_all_values_valid_encoding(record)
|
|
123
|
+
expect(record["880"]["a"]).to include("[?]")
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
it "handles files opened with external encoding" do
|
|
127
|
+
reader = MARC::Reader.new(File.open(cp866_marc_path, "r:cp866"))
|
|
128
|
+
record = reader.first
|
|
129
|
+
assert_cp866_right(record, "IBM866")
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
it "prioritizes explicit encoding over file encoding" do
|
|
133
|
+
reader = MARC::Reader.new(File.open(cp866_marc_path, "r:utf-8"), external_encoding: "cp866")
|
|
134
|
+
record = reader.first
|
|
135
|
+
assert_cp866_right(record, "IBM866")
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
it "handles strings with utf8 encoding" do
|
|
139
|
+
marc_file = File.open(utf_marc_path)
|
|
140
|
+
reader = MARC::Reader.new(marc_file)
|
|
141
|
+
expect { reader.first }.not_to raise_error
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
it "handles utf8 with bad bytes" do
|
|
145
|
+
marc_file = File.open("test/marc_with_bad_utf8.utf8.marc")
|
|
146
|
+
reader = MARC::Reader.new(marc_file, invalid: :replace)
|
|
147
|
+
record = reader.first
|
|
148
|
+
|
|
149
|
+
record.fields.each do |field|
|
|
150
|
+
if field.is_a? MARC::ControlField
|
|
151
|
+
expect(field.value.encoding.name).to eq("UTF-8")
|
|
152
|
+
expect(field.value.valid_encoding?).to be(true)
|
|
153
|
+
else
|
|
154
|
+
field.subfields.each do |subfield|
|
|
155
|
+
expect(subfield.value.encoding.name).to eq("UTF-8")
|
|
156
|
+
expect(subfield.value.valid_encoding?).to be(true)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
expect(record["520"]["a"]).to include("\uFFFD")
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
it "handles string with cp866 encoding" do
|
|
165
|
+
marc_string = File.read(cp866_marc_path).force_encoding("cp866")
|
|
166
|
+
reader = MARC::Reader.new(StringIO.new(marc_string))
|
|
167
|
+
record = reader.first
|
|
168
|
+
assert_cp866_right(record, "IBM866")
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
it "decodes strings with cp866 encoding" do
|
|
172
|
+
marc_string = File.read(cp866_marc_path).force_encoding("cp866")
|
|
173
|
+
record = MARC::Reader.decode(marc_string)
|
|
174
|
+
assert_cp866_right(record, "IBM866")
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
it "supports transcoding" do
|
|
178
|
+
reader = MARC::Reader.new(cp866_marc_path,
|
|
179
|
+
external_encoding: "cp866",
|
|
180
|
+
internal_encoding: "UTF-8")
|
|
181
|
+
record = reader.first
|
|
182
|
+
assert_cp866_right(record, "UTF-8")
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
it "works with binary filehandle" do
|
|
186
|
+
# about to recommend this as a foolproof way to avoid
|
|
187
|
+
# ruby transcoding behind your back in docs, let's make
|
|
188
|
+
# sure it really works.
|
|
189
|
+
reader = MARC::Reader.new(File.open(cp866_marc_path, external_encoding: "binary", internal_encoding: "binary"),
|
|
190
|
+
external_encoding: "IBM866")
|
|
191
|
+
record = reader.first
|
|
192
|
+
assert_cp866_right(record, "IBM866")
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
it "handles bad source bytes" do
|
|
196
|
+
reader = MARC::Reader.new("test/utf8_with_bad_bytes.marc",
|
|
197
|
+
external_encoding: "UTF-8",
|
|
198
|
+
validate_encoding: true)
|
|
199
|
+
expect { reader.first }.to raise_error(Encoding::InvalidByteSequenceError)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
it "replaces bad source bytes when configured" do
|
|
203
|
+
reader = MARC::Reader.new("test/utf8_with_bad_bytes.marc",
|
|
204
|
+
external_encoding: "UTF-8", invalid: :replace)
|
|
205
|
+
record = nil
|
|
206
|
+
expect { record = reader.first }.not_to raise_error
|
|
207
|
+
expect(record["245"]["a"]).to match(/=> #{"\uFFFD"} \(<=/)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
it "supports custom replacement for bad bytes" do
|
|
211
|
+
reader = MARC::Reader.new("test/utf8_with_bad_bytes.marc",
|
|
212
|
+
external_encoding: "UTF-8", invalid: :replace, replace: "")
|
|
213
|
+
record = reader.first
|
|
214
|
+
expect(record["245"]["a"]).to match(/=> \( <=/)
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
it "works with default_internal encoding" do
|
|
218
|
+
original = Encoding.default_internal
|
|
219
|
+
Encoding.default_internal = "UTF-8"
|
|
220
|
+
|
|
221
|
+
reader = MARC::Reader.new(File.open(cp866_marc_path, "r:cp866"))
|
|
222
|
+
record = reader.first
|
|
223
|
+
assert_cp866_right(record, "IBM866")
|
|
224
|
+
ensure
|
|
225
|
+
Encoding.default_internal = original
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
it "works with default_internal encoding using string arg" do
|
|
229
|
+
original = Encoding.default_internal
|
|
230
|
+
Encoding.default_internal = "UTF-8"
|
|
231
|
+
|
|
232
|
+
reader = MARC::Reader.new(cp866_marc_path, external_encoding: "cp866")
|
|
233
|
+
record = reader.first
|
|
234
|
+
assert_cp866_right(record, "IBM866")
|
|
235
|
+
ensure
|
|
236
|
+
Encoding.default_internal = original
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
else
|
|
240
|
+
RSpec.describe "Reader Character Encodings" do
|
|
241
|
+
it "skips tests on Ruby < 1.9" do
|
|
242
|
+
skip("Tests not being run in ruby 1.9.x or higher")
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
end
|
data/spec/reader_spec.rb
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe MARC::Reader do
|
|
4
|
+
it "reads batch records correctly" do
|
|
5
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
6
|
+
count = 0
|
|
7
|
+
reader.each { count += 1 }
|
|
8
|
+
expect(count).to eq(10)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it "handles loose records with ForgivingReader" do
|
|
12
|
+
reader = MARC::ForgivingReader.new("test/batch.dat")
|
|
13
|
+
count = 0
|
|
14
|
+
reader.each { count += 1 }
|
|
15
|
+
expect(count).to eq(10)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it "handles UTF-8 in ForgivingReader" do
|
|
19
|
+
# This isn't actually a corrupt file, but it is utf8,
|
|
20
|
+
# and I have some reason to believe forgiving reader isn't
|
|
21
|
+
# working properly with UTF8 in ruby 1.9, so testing it.
|
|
22
|
+
reader = MARC::ForgivingReader.new("test/utf8.marc")
|
|
23
|
+
count = 0
|
|
24
|
+
reader.each { count += 1 }
|
|
25
|
+
expect(count).to eq(1)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it "handles unimarc records" do
|
|
29
|
+
# Unimarc might use a different record seperator? Let's make sure it works.
|
|
30
|
+
reader = MARC::Reader.new(File.open("test/cp866_unimarc.marc", "r:cp866"))
|
|
31
|
+
count = 0
|
|
32
|
+
reader.each { |a| count += 1 }
|
|
33
|
+
expect(count).to eq(1)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it "handles non-numeric tags" do
|
|
37
|
+
reader = MARC::Reader.new("test/non-numeric.dat")
|
|
38
|
+
count = 0
|
|
39
|
+
record = nil
|
|
40
|
+
reader.each do |rec|
|
|
41
|
+
count += 1
|
|
42
|
+
record = rec
|
|
43
|
+
end
|
|
44
|
+
expect(count).to eq(1)
|
|
45
|
+
expect(record["ISB"]["a"]).to eq("9780061317842")
|
|
46
|
+
expect(record["LOC"]["9"]).to eq("1")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it "raises exception for bad MARC data" do
|
|
50
|
+
reader = MARC::Reader.new("test/tc_reader.rb")
|
|
51
|
+
expect { reader.entries[0] }.to raise_error(MARC::Exception)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "supports search functionality" do
|
|
55
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
56
|
+
records = reader.find_all { |r| r =~ /Perl/ }
|
|
57
|
+
expect(records.length).to eq(10)
|
|
58
|
+
|
|
59
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
60
|
+
records = reader.find_all { |r| r["245"] =~ /Perl/ }
|
|
61
|
+
expect(records.length).to eq(10)
|
|
62
|
+
|
|
63
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
64
|
+
records = reader.find_all { |r| r["245"]["a"] =~ /Perl/ }
|
|
65
|
+
expect(records.length).to eq(10)
|
|
66
|
+
|
|
67
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
68
|
+
records = reader.find_all { |r| r =~ /Foo/ }
|
|
69
|
+
expect(records.length).to eq(0)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it "provides a binary enumerator" do
|
|
73
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
74
|
+
iter = reader.each
|
|
75
|
+
r = iter.next
|
|
76
|
+
expect(r).to be_an_instance_of(MARC::Record)
|
|
77
|
+
9.times { iter.next } # total of ten records
|
|
78
|
+
expect { iter.next }.to raise_error(StopIteration)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it "supports each_raw method" do
|
|
82
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
83
|
+
count = 0
|
|
84
|
+
raw = nil
|
|
85
|
+
reader.each_raw { |r|
|
|
86
|
+
count += 1
|
|
87
|
+
raw = r
|
|
88
|
+
}
|
|
89
|
+
expect(count).to eq(10)
|
|
90
|
+
expect(raw).to be_an_instance_of(String)
|
|
91
|
+
|
|
92
|
+
record = MARC::Reader.decode(raw)
|
|
93
|
+
expect(record).to be_an_instance_of(MARC::Record)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
it "supports each_raw enumerator" do
|
|
97
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
98
|
+
enum = reader.each_raw
|
|
99
|
+
r = enum.next
|
|
100
|
+
expect(r).to be_an_instance_of(String)
|
|
101
|
+
|
|
102
|
+
record = MARC::Reader.decode(r)
|
|
103
|
+
expect(record).to be_an_instance_of(MARC::Record)
|
|
104
|
+
|
|
105
|
+
9.times { enum.next } # total of ten records
|
|
106
|
+
expect { enum.next }.to raise_error(StopIteration)
|
|
107
|
+
end
|
|
108
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require 'rspec'
|
|
2
|
+
require 'marc'
|
|
3
|
+
|
|
4
|
+
RSpec.configure do |config|
|
|
5
|
+
config.expect_with :rspec do |expectations|
|
|
6
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
config.mock_with :rspec do |mocks|
|
|
10
|
+
mocks.verify_partial_doubles = true
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
config.shared_context_metadata_behavior = :apply_to_host_groups
|
|
14
|
+
config.filter_run_when_matching :focus
|
|
15
|
+
config.disable_monkey_patching!
|
|
16
|
+
config.warnings = true
|
|
17
|
+
|
|
18
|
+
if config.files_to_run.one?
|
|
19
|
+
config.default_formatter = "doc"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
config.order = :random
|
|
23
|
+
Kernel.srand config.seed
|
|
24
|
+
end
|
data/spec/writer_spec.rb
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'stringio'
|
|
3
|
+
|
|
4
|
+
RSpec.describe MARC::Writer do
|
|
5
|
+
it "writes and reads MARC records properly" do
|
|
6
|
+
writer = MARC::Writer.new("test/writer.dat")
|
|
7
|
+
record = MARC::Record.new
|
|
8
|
+
record.append(MARC::DataField.new("245", "0", "1", ["a", "foo"]))
|
|
9
|
+
writer.write(record)
|
|
10
|
+
writer.close
|
|
11
|
+
|
|
12
|
+
# read it back to make sure
|
|
13
|
+
reader = MARC::Reader.new("test/writer.dat")
|
|
14
|
+
records = reader.entries
|
|
15
|
+
expect(records.length).to eq(1)
|
|
16
|
+
expect(records[0]).to eq(record)
|
|
17
|
+
|
|
18
|
+
# cleanup
|
|
19
|
+
File.unlink("test/writer.dat")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
if "".respond_to?(:encoding)
|
|
23
|
+
it "handles mixed encodings properly" do
|
|
24
|
+
writer = MARC::Writer.new("test/writer.dat")
|
|
25
|
+
|
|
26
|
+
# MARC::Writer should just happily write out whatever bytes you give it, even
|
|
27
|
+
# mixing encodings that can't be mixed. We ran into an actual example mixing
|
|
28
|
+
# MARC8 (tagged ruby binary) and UTF8, we want it to be written out.
|
|
29
|
+
|
|
30
|
+
record = MARC::Record.new
|
|
31
|
+
|
|
32
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", "Nhouy Abhay,".force_encoding("BINARY")], ["c", "Th\xE5ao,".force_encoding("BINARY")], ["d", "1909-"])
|
|
33
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", "Somchin P\xF8\xE5o. Ngin,".force_encoding("BINARY")])
|
|
34
|
+
|
|
35
|
+
record.append MARC::DataField.new("100", "0", "0", ["a", "\xE5angkham. ".force_encoding("BINARY")])
|
|
36
|
+
record.append MARC::DataField.new("245", "1", "0", ["b", "chef-d'oeuvre de la litt\xE2erature lao".force_encoding("BINARY")])
|
|
37
|
+
|
|
38
|
+
# One in UTF8 and marked
|
|
39
|
+
record.append MARC::DataField.new("999", "0", "1", ["a", "chef-d'ocuvre de la littU+FFC3\U+FFA9rature".force_encoding("UTF-8")])
|
|
40
|
+
|
|
41
|
+
writer.write(record)
|
|
42
|
+
writer.close
|
|
43
|
+
ensure
|
|
44
|
+
File.unlink("test/writer.dat") if File.exist?("test/writer.dat")
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it "supports oversized records when configured" do
|
|
49
|
+
too_long_record = MARC::Record.new
|
|
50
|
+
1.upto(1001) do
|
|
51
|
+
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
wbuffer = StringIO.new("", "w")
|
|
55
|
+
writer = MARC::Writer.new(wbuffer)
|
|
56
|
+
writer.allow_oversized = true
|
|
57
|
+
|
|
58
|
+
writer.write(too_long_record)
|
|
59
|
+
writer.close
|
|
60
|
+
|
|
61
|
+
expect(wbuffer.string.slice(0, 5)).to eq("00000")
|
|
62
|
+
|
|
63
|
+
rbuffer = StringIO.new(wbuffer.string.dup)
|
|
64
|
+
|
|
65
|
+
# Forgiving reader will, round trippable
|
|
66
|
+
new_record = MARC::Reader.decode(rbuffer.string, forgiving: true)
|
|
67
|
+
expect(new_record).to eq(too_long_record)
|
|
68
|
+
|
|
69
|
+
# Test in the middle of a MARC file
|
|
70
|
+
good_record = MARC::Record.new
|
|
71
|
+
good_record.append MARC::DataField.new("500", " ", " ", ["a", "A short record"])
|
|
72
|
+
wbuffer = StringIO.new("", "w")
|
|
73
|
+
writer = MARC::Writer.new(wbuffer)
|
|
74
|
+
writer.allow_oversized = true
|
|
75
|
+
|
|
76
|
+
writer.write(good_record)
|
|
77
|
+
writer.write(too_long_record)
|
|
78
|
+
writer.write(good_record)
|
|
79
|
+
|
|
80
|
+
rbuffer = StringIO.new(wbuffer.string.dup)
|
|
81
|
+
reader = MARC::ForgivingReader.new(rbuffer)
|
|
82
|
+
records = reader.to_a
|
|
83
|
+
|
|
84
|
+
expect(records.length).to eq(3)
|
|
85
|
+
expect(records[0]).to eq(good_record)
|
|
86
|
+
expect(records[2]).to eq(good_record)
|
|
87
|
+
expect(records[1]).to eq(too_long_record)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
it "raises exception for oversized records by default" do
|
|
91
|
+
too_long_record = MARC::Record.new
|
|
92
|
+
1.upto(1001) do
|
|
93
|
+
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
wbuffer = StringIO.new("", "w")
|
|
97
|
+
writer = MARC::Writer.new(wbuffer)
|
|
98
|
+
|
|
99
|
+
expect { writer.write too_long_record }.to raise_error(MARC::Exception)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
it "handles forgiving writing" do
|
|
103
|
+
marc = "00305cam a2200133 a 4500001000700000003000900007005001700016008004100033008004100074035002500115245001700140909001000157909000400167\036635145\036UK-BiLMS\03620060329173705.0\036s1982iieng6 000 0 eng||\036060116|||||||||xxk eng||\036 \037a(UK-BiLMS)M0017366ZW\03600\037aTest record.\036 \037aa\037b\037c\036\037b0\036\035\000"
|
|
104
|
+
rec = MARC::Record.new_from_marc(marc)
|
|
105
|
+
expect { rec.to_marc }.not_to raise_error
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
it "handles Unicode roundtrip" do
|
|
109
|
+
record = MARC::Reader.new("test/utf8.marc", external_encoding: "UTF-8").first
|
|
110
|
+
|
|
111
|
+
writer = MARC::Writer.new("test/writer.dat")
|
|
112
|
+
writer.write(record)
|
|
113
|
+
writer.close
|
|
114
|
+
|
|
115
|
+
read_back_record = MARC::Reader.new("test/writer.dat", external_encoding: "UTF-8").first
|
|
116
|
+
|
|
117
|
+
# Make sure the one we wrote out then read in again
|
|
118
|
+
# is the same as the one we read the first time
|
|
119
|
+
expect(record).to eq(read_back_record)
|
|
120
|
+
end
|
|
121
|
+
end
|
data/test/tc_writer.rb
CHANGED
|
@@ -32,14 +32,14 @@ class WriterTest < Test::Unit::TestCase
|
|
|
32
32
|
|
|
33
33
|
record = MARC::Record.new
|
|
34
34
|
|
|
35
|
-
record.append MARC::DataField.new("700", "0", " ", ["a", "Nhouy Abhay,".force_encoding("BINARY")], ["c", "Th\xE5ao,".force_encoding("BINARY")], ["d", "1909-"])
|
|
36
|
-
record.append MARC::DataField.new("700", "0", " ", ["a", "Somchin P\xF8\xE5o. Ngin,".force_encoding("BINARY")])
|
|
35
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", +"Nhouy Abhay,".dup.force_encoding("BINARY")], ["c", +"Th\xE5ao,".dup.force_encoding("BINARY")], ["d", "1909-"])
|
|
36
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", +"Somchin P\xF8\xE5o. Ngin,".dup.force_encoding("BINARY")])
|
|
37
37
|
|
|
38
|
-
record.append MARC::DataField.new("100", "0", "0", ["a", "\xE5angkham. ".force_encoding("BINARY")])
|
|
39
|
-
record.append MARC::DataField.new("245", "1", "0", ["b", "chef-d'oeuvre de la litt\xE2erature lao".force_encoding("BINARY")])
|
|
38
|
+
record.append MARC::DataField.new("100", "0", "0", ["a", +"\xE5angkham. ".dup.force_encoding("BINARY")])
|
|
39
|
+
record.append MARC::DataField.new("245", "1", "0", ["b", +"chef-d'oeuvre de la litt\xE2erature lao".dup.force_encoding("BINARY")])
|
|
40
40
|
|
|
41
41
|
# One in UTF8 and marked
|
|
42
|
-
record.append MARC::DataField.new("999", "0", "1", ["a", "chef-d'ocuvre de la littU+FFC3\U+FFA9rature".force_encoding("UTF-8")])
|
|
42
|
+
record.append MARC::DataField.new("999", "0", "1", ["a", +"chef-d'ocuvre de la littU+FFC3\U+FFA9rature".dup.force_encoding("UTF-8")])
|
|
43
43
|
|
|
44
44
|
writer.write(record)
|
|
45
45
|
writer.close
|
|
@@ -54,7 +54,7 @@ class WriterTest < Test::Unit::TestCase
|
|
|
54
54
|
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
|
55
55
|
end
|
|
56
56
|
|
|
57
|
-
wbuffer = StringIO.new("", "w")
|
|
57
|
+
wbuffer = StringIO.new(+"", "w")
|
|
58
58
|
writer = MARC::Writer.new(wbuffer)
|
|
59
59
|
writer.allow_oversized = true
|
|
60
60
|
|
|
@@ -78,7 +78,7 @@ class WriterTest < Test::Unit::TestCase
|
|
|
78
78
|
# Test in the middle of a MARC file
|
|
79
79
|
good_record = MARC::Record.new
|
|
80
80
|
good_record.append MARC::DataField.new("500", " ", " ", ["a", "A short record"])
|
|
81
|
-
wbuffer = StringIO.new("", "w")
|
|
81
|
+
wbuffer = StringIO.new(+"", "w")
|
|
82
82
|
writer = MARC::Writer.new(wbuffer)
|
|
83
83
|
writer.allow_oversized = true
|
|
84
84
|
|
|
@@ -102,7 +102,7 @@ class WriterTest < Test::Unit::TestCase
|
|
|
102
102
|
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
|
103
103
|
end
|
|
104
104
|
|
|
105
|
-
wbuffer = StringIO.new("", "w")
|
|
105
|
+
wbuffer = StringIO.new(+"", "w")
|
|
106
106
|
writer = MARC::Writer.new(wbuffer)
|
|
107
107
|
|
|
108
108
|
assert_raise(MARC::Exception) do
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: marc
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kevin Clarke
|
|
@@ -11,10 +11,9 @@ authors:
|
|
|
11
11
|
- Ross Singer
|
|
12
12
|
- Ed Summers
|
|
13
13
|
- Chris Beer
|
|
14
|
-
autorequire:
|
|
15
14
|
bindir: bin
|
|
16
15
|
cert_chain: []
|
|
17
|
-
date:
|
|
16
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
18
17
|
dependencies:
|
|
19
18
|
- !ruby/object:Gem::Dependency
|
|
20
19
|
name: nokogiri
|
|
@@ -30,6 +29,20 @@ dependencies:
|
|
|
30
29
|
- - "~>"
|
|
31
30
|
- !ruby/object:Gem::Version
|
|
32
31
|
version: '1.0'
|
|
32
|
+
- !ruby/object:Gem::Dependency
|
|
33
|
+
name: rexml
|
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
|
35
|
+
requirements:
|
|
36
|
+
- - ">="
|
|
37
|
+
- !ruby/object:Gem::Version
|
|
38
|
+
version: '0'
|
|
39
|
+
type: :runtime
|
|
40
|
+
prerelease: false
|
|
41
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
42
|
+
requirements:
|
|
43
|
+
- - ">="
|
|
44
|
+
- !ruby/object:Gem::Version
|
|
45
|
+
version: '0'
|
|
33
46
|
- !ruby/object:Gem::Dependency
|
|
34
47
|
name: rake
|
|
35
48
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -101,20 +114,19 @@ dependencies:
|
|
|
101
114
|
- !ruby/object:Gem::Version
|
|
102
115
|
version: '0'
|
|
103
116
|
- !ruby/object:Gem::Dependency
|
|
104
|
-
name:
|
|
117
|
+
name: rdoc
|
|
105
118
|
requirement: !ruby/object:Gem::Requirement
|
|
106
119
|
requirements:
|
|
107
120
|
- - ">="
|
|
108
121
|
- !ruby/object:Gem::Version
|
|
109
122
|
version: '0'
|
|
110
|
-
type: :
|
|
123
|
+
type: :development
|
|
111
124
|
prerelease: false
|
|
112
125
|
version_requirements: !ruby/object:Gem::Requirement
|
|
113
126
|
requirements:
|
|
114
127
|
- - ">="
|
|
115
128
|
- !ruby/object:Gem::Version
|
|
116
129
|
version: '0'
|
|
117
|
-
description:
|
|
118
130
|
email: ehs@pobox.com
|
|
119
131
|
executables:
|
|
120
132
|
- marc
|
|
@@ -154,6 +166,12 @@ files:
|
|
|
154
166
|
- lib/marc/xmlreader.rb
|
|
155
167
|
- lib/marc/xmlwriter.rb
|
|
156
168
|
- marc.gemspec
|
|
169
|
+
- spec/controlfield_spec.rb
|
|
170
|
+
- spec/datafield_spec.rb
|
|
171
|
+
- spec/reader_char_encodings_spec.rb
|
|
172
|
+
- spec/reader_spec.rb
|
|
173
|
+
- spec/spec_helper.rb
|
|
174
|
+
- spec/writer_spec.rb
|
|
157
175
|
- test/bad_eacc_encoding.marc8.marc
|
|
158
176
|
- test/batch.dat
|
|
159
177
|
- test/batch.xml
|
|
@@ -198,7 +216,6 @@ homepage: https://github.com/ruby-marc/ruby-marc/
|
|
|
198
216
|
licenses:
|
|
199
217
|
- MIT
|
|
200
218
|
metadata: {}
|
|
201
|
-
post_install_message:
|
|
202
219
|
rdoc_options: []
|
|
203
220
|
require_paths:
|
|
204
221
|
- lib
|
|
@@ -206,18 +223,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
206
223
|
requirements:
|
|
207
224
|
- - ">="
|
|
208
225
|
- !ruby/object:Gem::Version
|
|
209
|
-
version: 2.
|
|
226
|
+
version: 2.3.0
|
|
210
227
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
211
228
|
requirements:
|
|
212
229
|
- - ">="
|
|
213
230
|
- !ruby/object:Gem::Version
|
|
214
231
|
version: '0'
|
|
215
232
|
requirements: []
|
|
216
|
-
rubygems_version:
|
|
217
|
-
signing_key:
|
|
233
|
+
rubygems_version: 4.0.3
|
|
218
234
|
specification_version: 4
|
|
219
235
|
summary: A ruby library for working with Machine Readable Cataloging
|
|
220
236
|
test_files:
|
|
237
|
+
- spec/controlfield_spec.rb
|
|
238
|
+
- spec/datafield_spec.rb
|
|
239
|
+
- spec/reader_char_encodings_spec.rb
|
|
240
|
+
- spec/reader_spec.rb
|
|
241
|
+
- spec/spec_helper.rb
|
|
242
|
+
- spec/writer_spec.rb
|
|
221
243
|
- test/bad_eacc_encoding.marc8.marc
|
|
222
244
|
- test/batch.dat
|
|
223
245
|
- test/batch.xml
|