marc 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +4 -4
- data/CHANGELOG.md +29 -0
- data/Gemfile +0 -10
- data/lib/marc/controlfield.rb +1 -1
- data/lib/marc/dublincore.rb +25 -23
- data/lib/marc/jsonl_writer.rb +1 -1
- data/lib/marc/marc8/to_unicode.rb +11 -5
- data/lib/marc/reader.rb +17 -13
- data/lib/marc/subfield.rb +3 -3
- data/lib/marc/version.rb +1 -1
- data/lib/marc/writer.rb +3 -3
- data/lib/marc/xml_parsers.rb +7 -7
- data/marc.gemspec +10 -4
- data/spec/controlfield_spec.rb +52 -0
- data/spec/datafield_spec.rb +75 -0
- data/spec/reader_char_encodings_spec.rb +245 -0
- data/spec/reader_spec.rb +108 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/writer_spec.rb +121 -0
- data/test/marc8/tc_to_unicode.rb +5 -7
- data/test/tc_writer.rb +8 -8
- metadata +80 -22
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8e34cbc11eeea3ea9745181b1f3a958430842993c3f7d3624d1251eb3e07d34e
|
|
4
|
+
data.tar.gz: f10be646a7e983ebed7ac2e27e2d7733335d5fae66ec2c556d694e49d2e4bba0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d86a24aa80866e8bde4579615909d5eef1a753b8ae483e9a9928ae1d1417a1d7819ac87339e0d2fd5a5ac9b220370790aacfb365805f7790c80e54a9e23cdcc0
|
|
7
|
+
data.tar.gz: 396870e7768e6342a7d7d9950fbef801a89e5219284217a20121f62b1e641395dfaded36b3db903e957da7751fa755221177ed4301bafcd41cbd688fbd858a55
|
data/.github/workflows/ruby.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
name: CI
|
|
2
2
|
|
|
3
|
-
on: [push, pull_request]
|
|
3
|
+
on: [push, pull_request, workflow_dispatch]
|
|
4
4
|
|
|
5
5
|
env:
|
|
6
6
|
# See https://github.com/jruby/jruby/issues/5509
|
|
@@ -11,14 +11,14 @@ jobs:
|
|
|
11
11
|
runs-on: ubuntu-latest
|
|
12
12
|
strategy:
|
|
13
13
|
matrix:
|
|
14
|
-
ruby: [2.
|
|
14
|
+
ruby: [2.4, 2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 4.0, jruby, truffleruby, "truffleruby+graalvm"]
|
|
15
15
|
steps:
|
|
16
|
-
- uses: actions/checkout@
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
17
|
- name: Set up Ruby
|
|
18
18
|
uses: ruby/setup-ruby@v1
|
|
19
19
|
with:
|
|
20
20
|
ruby-version: ${{ matrix.ruby }}
|
|
21
21
|
- name: Install dependencies
|
|
22
|
-
run: bundle install
|
|
22
|
+
run: bundle install
|
|
23
23
|
- name: Run tests
|
|
24
24
|
run: bundle exec rake
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,35 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [1.4] - 2026-01-22
|
|
6
|
+
|
|
7
|
+
### Breaking Change
|
|
8
|
+
|
|
9
|
+
**ruby >= 2.4 is now required**.
|
|
10
|
+
|
|
11
|
+
2.3 was EOL'd almost 7 years ago, and backporting support for other
|
|
12
|
+
gems was deemed to be not worth it.
|
|
13
|
+
|
|
14
|
+
This is mostly a cleanup release, to squash warnings
|
|
15
|
+
about frozen strings in more recent rubys, update the
|
|
16
|
+
CI configuration.
|
|
17
|
+
|
|
18
|
+
## [1.3] - 2025-01-09
|
|
19
|
+
|
|
20
|
+
### Breaking Change
|
|
21
|
+
|
|
22
|
+
**ruby >= 2.2 is now required**.
|
|
23
|
+
|
|
24
|
+
- Removed no-longer-necessary `unf` gem in favor of built-in string
|
|
25
|
+
methods for dealing with encodings. (Aaron Elkiss)
|
|
26
|
+
|
|
27
|
+
### Non-user-facing changes
|
|
28
|
+
|
|
29
|
+
- Pulled everything into the gemspec instead of having some stuff
|
|
30
|
+
hanging out in the Gemfile
|
|
31
|
+
- Changed tested rubies to only include 2.7, 3.[0,1,2,3,4], latest
|
|
32
|
+
jruby, and latest truffleruby (with and without graalvm)
|
|
33
|
+
|
|
5
34
|
## [1.2] - 2022-08-02
|
|
6
35
|
|
|
7
36
|
### Added
|
data/Gemfile
CHANGED
|
@@ -1,15 +1,5 @@
|
|
|
1
1
|
source "https://rubygems.org"
|
|
2
2
|
|
|
3
|
-
group :test do
|
|
4
|
-
if RUBY_VERSION != "1.8.7"
|
|
5
|
-
gem "nokogiri"
|
|
6
|
-
end
|
|
7
|
-
gem "rake"
|
|
8
|
-
gem "rdoc"
|
|
9
|
-
gem "xml-simple"
|
|
10
|
-
gem "test-unit"
|
|
11
|
-
gem "warning"
|
|
12
|
-
end
|
|
13
3
|
|
|
14
4
|
# Specify your gem's dependencies in ..gemspec
|
|
15
5
|
gemspec
|
data/lib/marc/controlfield.rb
CHANGED
data/lib/marc/dublincore.rb
CHANGED
|
@@ -26,39 +26,41 @@ module MARC
|
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
dc_hash["publisher"] = begin
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
get_field_value(record["260"]["a"]["b"])
|
|
30
|
+
rescue
|
|
31
|
+
nil
|
|
32
|
+
end
|
|
33
33
|
dc_hash["date"] = begin
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
get_field_value(record["260"]["c"])
|
|
35
|
+
rescue
|
|
36
|
+
nil
|
|
37
|
+
end
|
|
38
38
|
dc_hash["type"] = get_field_value(record["655"])
|
|
39
39
|
dc_hash["format"] = begin
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
get_field_value(record["856"]["q"])
|
|
41
|
+
rescue
|
|
42
|
+
nil
|
|
43
|
+
end
|
|
44
44
|
dc_hash["identifier"] = begin
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
get_field_value(record["856"]["u"])
|
|
46
|
+
rescue
|
|
47
|
+
nil
|
|
48
|
+
end
|
|
49
49
|
dc_hash["source"] = begin
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
50
|
+
get_field_value(record["786"]["o"]["t"])
|
|
51
|
+
rescue
|
|
52
|
+
nil
|
|
53
|
+
end
|
|
54
54
|
dc_hash["language"] = get_field_value(record["546"])
|
|
55
55
|
|
|
56
56
|
dc_hash["relation"] = []
|
|
57
57
|
dc_hash["relation"] << get_field_value(record["530"])
|
|
58
58
|
("760".."787").each do |field|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
begin
|
|
60
|
+
dc_hash["relation"] << get_field_value(record[field]["o"]["t"])
|
|
61
|
+
rescue
|
|
62
|
+
nil
|
|
63
|
+
end
|
|
62
64
|
end
|
|
63
65
|
|
|
64
66
|
["651", "752"].each do |field|
|
data/lib/marc/jsonl_writer.rb
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
require "marc"
|
|
2
2
|
require "marc/marc8/map_to_unicode"
|
|
3
|
-
require "unf/normalizer"
|
|
4
3
|
|
|
5
4
|
module MARC
|
|
6
5
|
module Marc8
|
|
@@ -66,14 +65,21 @@ module MARC
|
|
|
66
65
|
normalization = options.fetch(:normalization, :nfc)
|
|
67
66
|
|
|
68
67
|
# don't choke on empty marc8_string
|
|
69
|
-
return "" if marc8_string.nil? || marc8_string.empty?
|
|
68
|
+
return +"" if marc8_string.nil? || marc8_string.empty?
|
|
70
69
|
|
|
71
70
|
# Make sure to call it 'binary', so we can slice it
|
|
72
71
|
# byte by byte, and so ruby doesn't complain about bad
|
|
73
72
|
# bytes for some other encoding. Yeah, we're changing
|
|
74
73
|
# encoding on input! If it's Marc8, it ought to be tagged
|
|
75
74
|
# binary already.
|
|
76
|
-
|
|
75
|
+
|
|
76
|
+
# Due to the changes with default frozen strings, we'll check
|
|
77
|
+
# to see if it's binary already, and only duplicate the string
|
|
78
|
+
# if it's not. It SHOULD be binary already.
|
|
79
|
+
unless marc8_string.encoding.to_s == "ASCII-8BIT"
|
|
80
|
+
marc8_string = marc8_string.dup
|
|
81
|
+
marc8_string.force_encoding("binary")
|
|
82
|
+
end
|
|
77
83
|
|
|
78
84
|
uni_list = []
|
|
79
85
|
combinings = []
|
|
@@ -131,7 +137,7 @@ module MARC
|
|
|
131
137
|
end
|
|
132
138
|
|
|
133
139
|
if (code_point < 0x20) ||
|
|
134
|
-
|
|
140
|
+
((code_point > 0x80) && (code_point < 0xa0))
|
|
135
141
|
uni = unichr(code_point)
|
|
136
142
|
next
|
|
137
143
|
end
|
|
@@ -170,7 +176,7 @@ module MARC
|
|
|
170
176
|
end
|
|
171
177
|
|
|
172
178
|
if normalization
|
|
173
|
-
uni_str =
|
|
179
|
+
uni_str = uni_str.unicode_normalize(normalization)
|
|
174
180
|
end
|
|
175
181
|
|
|
176
182
|
uni_str
|
data/lib/marc/reader.rb
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
require "scrub_rb"
|
|
2
|
-
|
|
3
1
|
# Note: requiring 'marc/marc8/to_unicode' below, in #initialize,
|
|
4
2
|
# only when necessary
|
|
5
3
|
|
|
@@ -303,7 +301,11 @@ module MARC
|
|
|
303
301
|
# And now that we've recorded the current encoding, we force
|
|
304
302
|
# to binary encoding, because we're going to be doing byte arithmetic,
|
|
305
303
|
# and want to avoid byte-vs-char confusion.
|
|
306
|
-
|
|
304
|
+
|
|
305
|
+
if (marc.respond_to?(:force_encoding) && marc.encoding != "ASCII-8BIT")
|
|
306
|
+
marc = marc.dup
|
|
307
|
+
marc.force_encoding("binary")
|
|
308
|
+
end
|
|
307
309
|
|
|
308
310
|
record = Record.new
|
|
309
311
|
record.leader = marc[0..LEADER_LENGTH - 1]
|
|
@@ -348,7 +350,7 @@ module MARC
|
|
|
348
350
|
# if we were told to be forgiving we just use the
|
|
349
351
|
# next available chuck of field data that we
|
|
350
352
|
# split apart based on the END_OF_FIELD
|
|
351
|
-
field_data = ""
|
|
353
|
+
field_data = +""
|
|
352
354
|
if params[:forgiving]
|
|
353
355
|
field_data = all_fields.shift
|
|
354
356
|
|
|
@@ -444,10 +446,10 @@ module MARC
|
|
|
444
446
|
# in future implementations.
|
|
445
447
|
if params[:internal_encoding]
|
|
446
448
|
str = if RUBY_VERSION >= "3.0"
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
449
|
+
str.encode(params[:internal_encoding], **params)
|
|
450
|
+
else
|
|
451
|
+
str.encode(params[:internal_encoding], params)
|
|
452
|
+
end
|
|
451
453
|
elsif params[:invalid] || params[:replace] || (params[:validate_encoding] == true)
|
|
452
454
|
|
|
453
455
|
if params[:validate_encoding] == true && !str.valid_encoding?
|
|
@@ -484,11 +486,13 @@ module MARC
|
|
|
484
486
|
class ForgivingReader < Reader
|
|
485
487
|
def each
|
|
486
488
|
@handle.each_line(END_OF_RECORD) do |raw|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
489
|
+
begin
|
|
490
|
+
record = MARC::Reader.decode(raw, @encoding_options.merge(forgiving: true))
|
|
491
|
+
yield record
|
|
492
|
+
rescue
|
|
493
|
+
# caught exception just keep barrelling along
|
|
494
|
+
# TODO add logging
|
|
495
|
+
end
|
|
492
496
|
end
|
|
493
497
|
end
|
|
494
498
|
end
|
data/lib/marc/subfield.rb
CHANGED
|
@@ -7,11 +7,11 @@ module MARC
|
|
|
7
7
|
class Subfield
|
|
8
8
|
attr_accessor :code, :value
|
|
9
9
|
|
|
10
|
-
def initialize(code = "", value = "")
|
|
10
|
+
def initialize(code = +"", value = +"")
|
|
11
11
|
# can't allow code of value to be nil
|
|
12
12
|
# or else it'll screw us up later on
|
|
13
|
-
@code = code.nil? ? "" : code
|
|
14
|
-
@value = value.nil? ? "" : value
|
|
13
|
+
@code = code.nil? ? +"" : code
|
|
14
|
+
@value = value.nil? ? +"" : value
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def ==(other)
|
data/lib/marc/version.rb
CHANGED
data/lib/marc/writer.rb
CHANGED
|
@@ -62,12 +62,12 @@ module MARC
|
|
|
62
62
|
# Second arg allow_oversized, default false, set to true
|
|
63
63
|
# to raise on MARC record that can't fit into ISO 2709.
|
|
64
64
|
def self.encode(record, allow_oversized = false)
|
|
65
|
-
directory = ""
|
|
66
|
-
fields = ""
|
|
65
|
+
directory = +""
|
|
66
|
+
fields = +""
|
|
67
67
|
offset = 0
|
|
68
68
|
record.each do |field|
|
|
69
69
|
# encode the field
|
|
70
|
-
field_data = ""
|
|
70
|
+
field_data = +""
|
|
71
71
|
if field.instance_of?(MARC::DataField)
|
|
72
72
|
warn("Warn: Missing indicator") unless field.indicator1 && field.indicator2
|
|
73
73
|
field_data = (field.indicator1 || " ") + (field.indicator2 || " ")
|
data/lib/marc/xml_parsers.rb
CHANGED
|
@@ -56,7 +56,7 @@ module MARC
|
|
|
56
56
|
SF_TAG = "subfield".freeze
|
|
57
57
|
|
|
58
58
|
def init
|
|
59
|
-
@record = {record: nil, leader: "", field: nil, subfield: nil}
|
|
59
|
+
@record = {record: nil, leader: +"", field: nil, subfield: nil}
|
|
60
60
|
@current_element = nil
|
|
61
61
|
@ns = "http://www.loc.gov/MARC21/slim"
|
|
62
62
|
end
|
|
@@ -115,7 +115,7 @@ module MARC
|
|
|
115
115
|
when REC_TAG then yield_record
|
|
116
116
|
when LEAD_TAG
|
|
117
117
|
@record[:record].leader = @record[:leader]
|
|
118
|
-
@record[:leader] = ""
|
|
118
|
+
@record[:leader] = +""
|
|
119
119
|
@current_element = nil if @current_element == :leader
|
|
120
120
|
end
|
|
121
121
|
end
|
|
@@ -238,7 +238,7 @@ module MARC
|
|
|
238
238
|
data_field = nil
|
|
239
239
|
control_field = nil
|
|
240
240
|
subfield = nil
|
|
241
|
-
text = ""
|
|
241
|
+
text = +""
|
|
242
242
|
attrs = nil
|
|
243
243
|
if Module.constants.index("Nokogiri") && @parser.is_a?(Nokogiri::XML::Reader)
|
|
244
244
|
datafield = nil
|
|
@@ -295,18 +295,18 @@ module MARC
|
|
|
295
295
|
end
|
|
296
296
|
|
|
297
297
|
if event.start_element?
|
|
298
|
-
text = ""
|
|
298
|
+
text = +""
|
|
299
299
|
attrs = event[1]
|
|
300
300
|
case strip_ns(event[0])
|
|
301
301
|
when "controlfield"
|
|
302
|
-
text = ""
|
|
302
|
+
text = +""
|
|
303
303
|
control_field = MARC::ControlField.new(attrs[TAG])
|
|
304
304
|
when "datafield"
|
|
305
|
-
text = ""
|
|
305
|
+
text = +""
|
|
306
306
|
data_field = MARC::DataField.new(attrs[TAG], attrs[IND1],
|
|
307
307
|
attrs[IND2])
|
|
308
308
|
when "subfield"
|
|
309
|
-
text = ""
|
|
309
|
+
text = +""
|
|
310
310
|
subfield = MARC::Subfield.new(attrs[CODE])
|
|
311
311
|
end
|
|
312
312
|
end
|
data/marc.gemspec
CHANGED
|
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
|
|
|
8
8
|
s.homepage = "https://github.com/ruby-marc/ruby-marc/"
|
|
9
9
|
s.summary = "A ruby library for working with Machine Readable Cataloging"
|
|
10
10
|
s.license = "MIT"
|
|
11
|
-
s.required_ruby_version = ">=
|
|
11
|
+
s.required_ruby_version = ">= 2.3.0"
|
|
12
12
|
s.authors = ["Kevin Clarke", "Bill Dueber", "William Groppe", "Jonathan Rochkind", "Ross Singer", "Ed Summers", "Chris Beer"]
|
|
13
13
|
|
|
14
14
|
s.files = `git ls-files -z`.split("\x0")
|
|
@@ -16,8 +16,14 @@ Gem::Specification.new do |s|
|
|
|
16
16
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
|
17
17
|
s.require_paths = ["lib"]
|
|
18
18
|
|
|
19
|
-
s.
|
|
20
|
-
s.add_dependency "scrub_rb", ">= 1.0.1", "< 2" # backport for ruby 2.1 String#scrub
|
|
21
|
-
s.add_dependency "unf" # unicode normalization
|
|
19
|
+
s.add_dependency "nokogiri", "~>1.0"
|
|
22
20
|
s.add_dependency "rexml" # rexml was unbundled from the stdlib in ruby 3
|
|
21
|
+
|
|
22
|
+
s.add_development_dependency "rake", "~>13.0"
|
|
23
|
+
s.add_development_dependency "test-unit", "~>3.0"
|
|
24
|
+
s.add_development_dependency "standard", "~>1.0"
|
|
25
|
+
s.add_development_dependency "warning", "~>1.5"
|
|
26
|
+
s.add_development_dependency "xml-simple"
|
|
27
|
+
s.add_development_dependency "rdoc"
|
|
28
|
+
|
|
23
29
|
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe MARC::ControlField do
|
|
4
|
+
it "formats a control field correctly" do
|
|
5
|
+
control = MARC::ControlField.new("005", "foobarbaz")
|
|
6
|
+
expect(control.to_s).to eq("005 foobarbaz")
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
it "rejects data field as control field" do
|
|
10
|
+
field = MARC::DataField.new("007")
|
|
11
|
+
expect(field.valid?).to be(false)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "rejects alpha control field tags" do
|
|
15
|
+
# can't have a field with a tag < 010
|
|
16
|
+
field = MARC::ControlField.new("DDD")
|
|
17
|
+
expect(field.valid?).to be(false)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it "supports adding custom control field tags" do
|
|
21
|
+
MARC::ControlField.control_tags << "FMT"
|
|
22
|
+
field = MARC::ControlField.new("FMT")
|
|
23
|
+
expect(field.valid?).to be(true)
|
|
24
|
+
field = MARC::DataField.new("FMT")
|
|
25
|
+
expect(field.valid?).to be(false)
|
|
26
|
+
MARC::ControlField.control_tags.delete("FMT")
|
|
27
|
+
field = MARC::DataField.new("FMT")
|
|
28
|
+
expect(field.valid?).to be(true)
|
|
29
|
+
field = MARC::ControlField.new("FMT")
|
|
30
|
+
expect(field.valid?).to be(false)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it "rejects control field with data field tag" do
|
|
34
|
+
# can't have a control with a tag > 009
|
|
35
|
+
f = MARC::ControlField.new("245")
|
|
36
|
+
expect(f.valid?).to be(false)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "compares control fields correctly" do
|
|
40
|
+
f1 = MARC::ControlField.new("001", "foobarbaz")
|
|
41
|
+
f2 = MARC::ControlField.new("001", "foobarbaz")
|
|
42
|
+
expect(f1).to eq(f2)
|
|
43
|
+
|
|
44
|
+
f3 = MARC::ControlField.new("001", "foobarbazqux")
|
|
45
|
+
expect(f1).not_to eq(f3)
|
|
46
|
+
f4 = MARC::ControlField.new("002", "foobarbaz")
|
|
47
|
+
expect(f1).not_to eq(f4)
|
|
48
|
+
|
|
49
|
+
expect(f1).not_to eq("001")
|
|
50
|
+
expect(f2).not_to eq("foobarbaz")
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe MARC::DataField do
|
|
4
|
+
it "handles tags correctly" do
|
|
5
|
+
f1 = MARC::DataField.new("100")
|
|
6
|
+
expect(f1.tag).to eq("100")
|
|
7
|
+
f2 = MARC::DataField.new("100")
|
|
8
|
+
expect(f2.tag).to eq("100")
|
|
9
|
+
expect(f1).to eq(f2)
|
|
10
|
+
f3 = MARC::DataField.new("245")
|
|
11
|
+
expect(f1).not_to eq(f3)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "handles alphabetic tags" do
|
|
15
|
+
alph = MARC::DataField.new("ALF")
|
|
16
|
+
expect(alph.tag).to eq("ALF")
|
|
17
|
+
|
|
18
|
+
alphnum = MARC::DataField.new("0D9")
|
|
19
|
+
expect(alphnum.tag).to eq("0D9")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it "handles indicators" do
|
|
23
|
+
f1 = MARC::DataField.new("100", "0", "1")
|
|
24
|
+
expect(f1.indicator1).to eq("0")
|
|
25
|
+
expect(f1.indicator2).to eq("1")
|
|
26
|
+
f2 = MARC::DataField.new("100", "0", "1")
|
|
27
|
+
expect(f2.indicator1).to eq("0")
|
|
28
|
+
expect(f2.indicator2).to eq("1")
|
|
29
|
+
expect(f1).to eq(f2)
|
|
30
|
+
f3 = MARC::DataField.new("100", "1", "1")
|
|
31
|
+
expect(f1).not_to eq(f3)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it "handles subfields" do
|
|
35
|
+
f1 = MARC::DataField.new("100", "0", "1",
|
|
36
|
+
MARC::Subfield.new("a", "Foo"),
|
|
37
|
+
MARC::Subfield.new("b", "Bar"))
|
|
38
|
+
expect(f1.to_s).to eq("100 01 $a Foo $b Bar ")
|
|
39
|
+
expect(f1.value).to eq("FooBar")
|
|
40
|
+
f2 = MARC::DataField.new("100", "0", "1",
|
|
41
|
+
MARC::Subfield.new("a", "Foo"),
|
|
42
|
+
MARC::Subfield.new("b", "Bar"))
|
|
43
|
+
expect(f1).to eq(f2)
|
|
44
|
+
f3 = MARC::DataField.new("100", "0", "1",
|
|
45
|
+
MARC::Subfield.new("a", "Foo"),
|
|
46
|
+
MARC::Subfield.new("b", "Bez"))
|
|
47
|
+
expect(f1).not_to eq(f3)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it "supports subfield shorthand" do
|
|
51
|
+
f = MARC::DataField.new("100", "0", "1", ["a", "Foo"], ["b", "Bar"])
|
|
52
|
+
expect(f.to_s).to eq("100 01 $a Foo $b Bar ")
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it "iterates through subfields" do
|
|
56
|
+
field = MARC::DataField.new("100", "0", "1", ["a", "Foo"], ["b", "Bar"],
|
|
57
|
+
["a", "Bez"])
|
|
58
|
+
count = 0
|
|
59
|
+
field.each { |x| count += 1 }
|
|
60
|
+
expect(count).to eq(3)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it "supports lookup shorthand" do
|
|
64
|
+
f = MARC::DataField.new("100", "0", "1", ["a", "Foo"], ["b", "Bar"])
|
|
65
|
+
expect(f["b"]).to eq("Bar")
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
it "distinguishes from other types" do
|
|
69
|
+
f = MARC::DataField.new("100", "0", "1",
|
|
70
|
+
MARC::Subfield.new("a", "Foo"),
|
|
71
|
+
MARC::Subfield.new("b", "Bar"))
|
|
72
|
+
expect(f).not_to eq("100 01 $a Foo $b Bar ")
|
|
73
|
+
expect(f).not_to eq(f["a"])
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'stringio'
|
|
3
|
+
|
|
4
|
+
# Testing char encodings under 1.9, don't bother running
|
|
5
|
+
# these tests except under 1.9, will either fail (because
|
|
6
|
+
# 1.9 func the test itself uses isn't there), or trivially pass
|
|
7
|
+
# (because the func they are testing is no-op on 1.9).
|
|
8
|
+
|
|
9
|
+
if "".respond_to?(:encoding)
|
|
10
|
+
RSpec.describe "Reader Character Encodings" do
|
|
11
|
+
# Common test files
|
|
12
|
+
let(:utf_marc_path) { "test/utf8.marc" }
|
|
13
|
+
let(:cp866_marc_path) { "test/cp866_multirecord.marc" }
|
|
14
|
+
let(:bad_marc8_path) { "test/bad_eacc_encoding.marc8.marc" }
|
|
15
|
+
|
|
16
|
+
# Helper methods
|
|
17
|
+
def assert_utf8_right_in_utf8(record)
|
|
18
|
+
expect(record["245"].subfields.first.value.encoding.name).to eq("UTF-8")
|
|
19
|
+
expect(record["245"].to_s.encoding.name).to eq("UTF-8")
|
|
20
|
+
expect(record["245"].subfields.first.to_s.encoding.name).to eq("UTF-8")
|
|
21
|
+
expect(record["245"].subfields.first.value.encoding.name).to eq("UTF-8")
|
|
22
|
+
expect(record["245"]["a"].encoding.name).to eq("UTF-8")
|
|
23
|
+
expect(record["245"]["a"]).to start_with("Photčhanānukrom")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def assert_cp866_right(record, encoding = "IBM866")
|
|
27
|
+
expect(record["001"].value.encoding.name).to eq(encoding)
|
|
28
|
+
expect(record["001"].value.encode("UTF-8").unpack("H4")).to eq(["d09d"]) # russian capital N
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def assert_all_values_valid_encoding(record, encoding_name = "UTF-8")
|
|
32
|
+
record.fields.each do |field|
|
|
33
|
+
if field.is_a? MARC::DataField
|
|
34
|
+
field.subfields.each do |sf|
|
|
35
|
+
expect(sf.value.encoding.name).to eq(encoding_name), "Is tagged #{encoding_name}: #{field.tag}: #{sf}"
|
|
36
|
+
expect(field.value.valid_encoding?).to be(true), "Is valid encoding: #{field.tag}: #{sf}"
|
|
37
|
+
end
|
|
38
|
+
else
|
|
39
|
+
expect(field.value.encoding.name).to eq(encoding_name), "Is tagged #{encoding_name}: #{field}"
|
|
40
|
+
expect(field.value.valid_encoding?).to be(true), "Is valid encoding: #{field}"
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it "loads unicode correctly" do
|
|
46
|
+
reader = MARC::Reader.new(utf_marc_path)
|
|
47
|
+
record = nil
|
|
48
|
+
expect { record = reader.first }.not_to raise_error
|
|
49
|
+
assert_utf8_right_in_utf8(record)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it "decodes unicode with forgiving mode" do
|
|
53
|
+
# two kinds of forgiving invocation, they shouldn't be different,
|
|
54
|
+
# but just in case they have slightly different code paths, test em too.
|
|
55
|
+
marc_string = File.read(utf_marc_path).force_encoding("utf-8")
|
|
56
|
+
record = MARC::Reader.decode(marc_string, forgiving: true)
|
|
57
|
+
assert_utf8_right_in_utf8(record)
|
|
58
|
+
|
|
59
|
+
reader = MARC::ForgivingReader.new(utf_marc_path)
|
|
60
|
+
record = reader.first
|
|
61
|
+
assert_utf8_right_in_utf8(record)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it "passes options through ForgivingReader" do
|
|
65
|
+
# Make sure ForgivingReader accepts same options as MARC::Reader
|
|
66
|
+
# We don't test them ALL though, just a sample.
|
|
67
|
+
# Tell it we're reading cp866, but trancode to utf8 for us.
|
|
68
|
+
reader = MARC::ForgivingReader.new(cp866_marc_path, external_encoding: "cp866", internal_encoding: "utf-8")
|
|
69
|
+
record = reader.first
|
|
70
|
+
assert_cp866_right(record, "UTF-8")
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
it "handles explicit encoding" do
|
|
74
|
+
reader = MARC::Reader.new(cp866_marc_path, external_encoding: "cp866")
|
|
75
|
+
assert_cp866_right(reader.first, "IBM866")
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it "raises error on bad encoding name" do
|
|
79
|
+
reader = MARC::Reader.new(cp866_marc_path, external_encoding: "adadfadf")
|
|
80
|
+
expect { reader.first }.to raise_error(ArgumentError)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it "handles marc8 with binary encoding" do
|
|
84
|
+
# Marc8, if we want to keep it without transcoding, best we can do is read it in binary.
|
|
85
|
+
reader = MARC::Reader.new("test/marc8_accented_chars.marc", external_encoding: "binary")
|
|
86
|
+
record = reader.first
|
|
87
|
+
expect(record["100"].subfields.first.value.encoding.name).to eq("ASCII-8BIT")
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
it "converts marc8 to unicode" do
|
|
91
|
+
reader = MARC::Reader.new("test/marc8_accented_chars.marc", external_encoding: "MARC-8")
|
|
92
|
+
record = reader.first
|
|
93
|
+
assert_all_values_valid_encoding(record)
|
|
94
|
+
expect(record["100"]["a"]).to eq("Serreau, Geneviève.")
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it "converts marc8 to unicode with file handle" do
|
|
98
|
+
# had some trouble with this one, let's ensure it with a test
|
|
99
|
+
file = File.new("test/marc8_accented_chars.marc")
|
|
100
|
+
reader = MARC::Reader.new(file, external_encoding: "MARC-8")
|
|
101
|
+
record = reader.first
|
|
102
|
+
assert_all_values_valid_encoding(record)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it "handles marc8 with character entities" do
|
|
106
|
+
reader = MARC::Reader.new("test/escaped_character_reference.marc8.marc", external_encoding: "MARC-8")
|
|
107
|
+
record = reader.first
|
|
108
|
+
assert_all_values_valid_encoding(record)
|
|
109
|
+
expect(record["260"]["a"]).to eq("Rio de Janeiro escaped replacement char: \uFFFD .")
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
it "raises error on bad marc8" do
|
|
113
|
+
expect {
|
|
114
|
+
reader = MARC::Reader.new(bad_marc8_path, external_encoding: "MARC-8")
|
|
115
|
+
reader.first
|
|
116
|
+
}.to raise_error(Encoding::InvalidByteSequenceError)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
it "handles bad marc8 with replacement" do
|
|
120
|
+
reader = MARC::Reader.new(bad_marc8_path, external_encoding: "MARC-8", invalid: :replace, replace: "[?]")
|
|
121
|
+
record = reader.first
|
|
122
|
+
assert_all_values_valid_encoding(record)
|
|
123
|
+
expect(record["880"]["a"]).to include("[?]")
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
it "handles files opened with external encoding" do
|
|
127
|
+
reader = MARC::Reader.new(File.open(cp866_marc_path, "r:cp866"))
|
|
128
|
+
record = reader.first
|
|
129
|
+
assert_cp866_right(record, "IBM866")
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
it "prioritizes explicit encoding over file encoding" do
|
|
133
|
+
reader = MARC::Reader.new(File.open(cp866_marc_path, "r:utf-8"), external_encoding: "cp866")
|
|
134
|
+
record = reader.first
|
|
135
|
+
assert_cp866_right(record, "IBM866")
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
it "handles strings with utf8 encoding" do
|
|
139
|
+
marc_file = File.open(utf_marc_path)
|
|
140
|
+
reader = MARC::Reader.new(marc_file)
|
|
141
|
+
expect { reader.first }.not_to raise_error
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
it "handles utf8 with bad bytes" do
|
|
145
|
+
marc_file = File.open("test/marc_with_bad_utf8.utf8.marc")
|
|
146
|
+
reader = MARC::Reader.new(marc_file, invalid: :replace)
|
|
147
|
+
record = reader.first
|
|
148
|
+
|
|
149
|
+
record.fields.each do |field|
|
|
150
|
+
if field.is_a? MARC::ControlField
|
|
151
|
+
expect(field.value.encoding.name).to eq("UTF-8")
|
|
152
|
+
expect(field.value.valid_encoding?).to be(true)
|
|
153
|
+
else
|
|
154
|
+
field.subfields.each do |subfield|
|
|
155
|
+
expect(subfield.value.encoding.name).to eq("UTF-8")
|
|
156
|
+
expect(subfield.value.valid_encoding?).to be(true)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
expect(record["520"]["a"]).to include("\uFFFD")
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
it "handles string with cp866 encoding" do
|
|
165
|
+
marc_string = File.read(cp866_marc_path).force_encoding("cp866")
|
|
166
|
+
reader = MARC::Reader.new(StringIO.new(marc_string))
|
|
167
|
+
record = reader.first
|
|
168
|
+
assert_cp866_right(record, "IBM866")
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
it "decodes strings with cp866 encoding" do
|
|
172
|
+
marc_string = File.read(cp866_marc_path).force_encoding("cp866")
|
|
173
|
+
record = MARC::Reader.decode(marc_string)
|
|
174
|
+
assert_cp866_right(record, "IBM866")
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
it "supports transcoding" do
|
|
178
|
+
reader = MARC::Reader.new(cp866_marc_path,
|
|
179
|
+
external_encoding: "cp866",
|
|
180
|
+
internal_encoding: "UTF-8")
|
|
181
|
+
record = reader.first
|
|
182
|
+
assert_cp866_right(record, "UTF-8")
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
it "works with binary filehandle" do
|
|
186
|
+
# about to recommend this as a foolproof way to avoid
|
|
187
|
+
# ruby transcoding behind your back in docs, let's make
|
|
188
|
+
# sure it really works.
|
|
189
|
+
reader = MARC::Reader.new(File.open(cp866_marc_path, external_encoding: "binary", internal_encoding: "binary"),
|
|
190
|
+
external_encoding: "IBM866")
|
|
191
|
+
record = reader.first
|
|
192
|
+
assert_cp866_right(record, "IBM866")
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
it "handles bad source bytes" do
|
|
196
|
+
reader = MARC::Reader.new("test/utf8_with_bad_bytes.marc",
|
|
197
|
+
external_encoding: "UTF-8",
|
|
198
|
+
validate_encoding: true)
|
|
199
|
+
expect { reader.first }.to raise_error(Encoding::InvalidByteSequenceError)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
it "replaces bad source bytes when configured" do
|
|
203
|
+
reader = MARC::Reader.new("test/utf8_with_bad_bytes.marc",
|
|
204
|
+
external_encoding: "UTF-8", invalid: :replace)
|
|
205
|
+
record = nil
|
|
206
|
+
expect { record = reader.first }.not_to raise_error
|
|
207
|
+
expect(record["245"]["a"]).to match(/=> #{"\uFFFD"} \(<=/)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
it "supports custom replacement for bad bytes" do
|
|
211
|
+
reader = MARC::Reader.new("test/utf8_with_bad_bytes.marc",
|
|
212
|
+
external_encoding: "UTF-8", invalid: :replace, replace: "")
|
|
213
|
+
record = reader.first
|
|
214
|
+
expect(record["245"]["a"]).to match(/=> \( <=/)
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
it "works with default_internal encoding" do
|
|
218
|
+
original = Encoding.default_internal
|
|
219
|
+
Encoding.default_internal = "UTF-8"
|
|
220
|
+
|
|
221
|
+
reader = MARC::Reader.new(File.open(cp866_marc_path, "r:cp866"))
|
|
222
|
+
record = reader.first
|
|
223
|
+
assert_cp866_right(record, "IBM866")
|
|
224
|
+
ensure
|
|
225
|
+
Encoding.default_internal = original
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
it "works with default_internal encoding using string arg" do
|
|
229
|
+
original = Encoding.default_internal
|
|
230
|
+
Encoding.default_internal = "UTF-8"
|
|
231
|
+
|
|
232
|
+
reader = MARC::Reader.new(cp866_marc_path, external_encoding: "cp866")
|
|
233
|
+
record = reader.first
|
|
234
|
+
assert_cp866_right(record, "IBM866")
|
|
235
|
+
ensure
|
|
236
|
+
Encoding.default_internal = original
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
else
|
|
240
|
+
RSpec.describe "Reader Character Encodings" do
|
|
241
|
+
it "skips tests on Ruby < 1.9" do
|
|
242
|
+
skip("Tests not being run in ruby 1.9.x or higher")
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
end
|
data/spec/reader_spec.rb
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe MARC::Reader do
|
|
4
|
+
it "reads batch records correctly" do
|
|
5
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
6
|
+
count = 0
|
|
7
|
+
reader.each { count += 1 }
|
|
8
|
+
expect(count).to eq(10)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it "handles loose records with ForgivingReader" do
|
|
12
|
+
reader = MARC::ForgivingReader.new("test/batch.dat")
|
|
13
|
+
count = 0
|
|
14
|
+
reader.each { count += 1 }
|
|
15
|
+
expect(count).to eq(10)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it "handles UTF-8 in ForgivingReader" do
|
|
19
|
+
# This isn't actually a corrupt file, but it is utf8,
|
|
20
|
+
# and I have some reason to believe forgiving reader isn't
|
|
21
|
+
# working properly with UTF8 in ruby 1.9, so testing it.
|
|
22
|
+
reader = MARC::ForgivingReader.new("test/utf8.marc")
|
|
23
|
+
count = 0
|
|
24
|
+
reader.each { count += 1 }
|
|
25
|
+
expect(count).to eq(1)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it "handles unimarc records" do
|
|
29
|
+
# Unimarc might use a different record seperator? Let's make sure it works.
|
|
30
|
+
reader = MARC::Reader.new(File.open("test/cp866_unimarc.marc", "r:cp866"))
|
|
31
|
+
count = 0
|
|
32
|
+
reader.each { |a| count += 1 }
|
|
33
|
+
expect(count).to eq(1)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it "handles non-numeric tags" do
|
|
37
|
+
reader = MARC::Reader.new("test/non-numeric.dat")
|
|
38
|
+
count = 0
|
|
39
|
+
record = nil
|
|
40
|
+
reader.each do |rec|
|
|
41
|
+
count += 1
|
|
42
|
+
record = rec
|
|
43
|
+
end
|
|
44
|
+
expect(count).to eq(1)
|
|
45
|
+
expect(record["ISB"]["a"]).to eq("9780061317842")
|
|
46
|
+
expect(record["LOC"]["9"]).to eq("1")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it "raises exception for bad MARC data" do
|
|
50
|
+
reader = MARC::Reader.new("test/tc_reader.rb")
|
|
51
|
+
expect { reader.entries[0] }.to raise_error(MARC::Exception)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "supports search functionality" do
|
|
55
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
56
|
+
records = reader.find_all { |r| r =~ /Perl/ }
|
|
57
|
+
expect(records.length).to eq(10)
|
|
58
|
+
|
|
59
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
60
|
+
records = reader.find_all { |r| r["245"] =~ /Perl/ }
|
|
61
|
+
expect(records.length).to eq(10)
|
|
62
|
+
|
|
63
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
64
|
+
records = reader.find_all { |r| r["245"]["a"] =~ /Perl/ }
|
|
65
|
+
expect(records.length).to eq(10)
|
|
66
|
+
|
|
67
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
68
|
+
records = reader.find_all { |r| r =~ /Foo/ }
|
|
69
|
+
expect(records.length).to eq(0)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it "provides a binary enumerator" do
|
|
73
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
74
|
+
iter = reader.each
|
|
75
|
+
r = iter.next
|
|
76
|
+
expect(r).to be_an_instance_of(MARC::Record)
|
|
77
|
+
9.times { iter.next } # total of ten records
|
|
78
|
+
expect { iter.next }.to raise_error(StopIteration)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it "supports each_raw method" do
|
|
82
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
83
|
+
count = 0
|
|
84
|
+
raw = nil
|
|
85
|
+
reader.each_raw { |r|
|
|
86
|
+
count += 1
|
|
87
|
+
raw = r
|
|
88
|
+
}
|
|
89
|
+
expect(count).to eq(10)
|
|
90
|
+
expect(raw).to be_an_instance_of(String)
|
|
91
|
+
|
|
92
|
+
record = MARC::Reader.decode(raw)
|
|
93
|
+
expect(record).to be_an_instance_of(MARC::Record)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
it "supports each_raw enumerator" do
|
|
97
|
+
reader = MARC::Reader.new("test/batch.dat")
|
|
98
|
+
enum = reader.each_raw
|
|
99
|
+
r = enum.next
|
|
100
|
+
expect(r).to be_an_instance_of(String)
|
|
101
|
+
|
|
102
|
+
record = MARC::Reader.decode(r)
|
|
103
|
+
expect(record).to be_an_instance_of(MARC::Record)
|
|
104
|
+
|
|
105
|
+
9.times { enum.next } # total of ten records
|
|
106
|
+
expect { enum.next }.to raise_error(StopIteration)
|
|
107
|
+
end
|
|
108
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require 'rspec'
|
|
2
|
+
require 'marc'
|
|
3
|
+
|
|
4
|
+
RSpec.configure do |config|
|
|
5
|
+
config.expect_with :rspec do |expectations|
|
|
6
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
config.mock_with :rspec do |mocks|
|
|
10
|
+
mocks.verify_partial_doubles = true
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
config.shared_context_metadata_behavior = :apply_to_host_groups
|
|
14
|
+
config.filter_run_when_matching :focus
|
|
15
|
+
config.disable_monkey_patching!
|
|
16
|
+
config.warnings = true
|
|
17
|
+
|
|
18
|
+
if config.files_to_run.one?
|
|
19
|
+
config.default_formatter = "doc"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
config.order = :random
|
|
23
|
+
Kernel.srand config.seed
|
|
24
|
+
end
|
data/spec/writer_spec.rb
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'stringio'
|
|
3
|
+
|
|
4
|
+
RSpec.describe MARC::Writer do
|
|
5
|
+
it "writes and reads MARC records properly" do
|
|
6
|
+
writer = MARC::Writer.new("test/writer.dat")
|
|
7
|
+
record = MARC::Record.new
|
|
8
|
+
record.append(MARC::DataField.new("245", "0", "1", ["a", "foo"]))
|
|
9
|
+
writer.write(record)
|
|
10
|
+
writer.close
|
|
11
|
+
|
|
12
|
+
# read it back to make sure
|
|
13
|
+
reader = MARC::Reader.new("test/writer.dat")
|
|
14
|
+
records = reader.entries
|
|
15
|
+
expect(records.length).to eq(1)
|
|
16
|
+
expect(records[0]).to eq(record)
|
|
17
|
+
|
|
18
|
+
# cleanup
|
|
19
|
+
File.unlink("test/writer.dat")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
if "".respond_to?(:encoding)
|
|
23
|
+
it "handles mixed encodings properly" do
|
|
24
|
+
writer = MARC::Writer.new("test/writer.dat")
|
|
25
|
+
|
|
26
|
+
# MARC::Writer should just happily write out whatever bytes you give it, even
|
|
27
|
+
# mixing encodings that can't be mixed. We ran into an actual example mixing
|
|
28
|
+
# MARC8 (tagged ruby binary) and UTF8, we want it to be written out.
|
|
29
|
+
|
|
30
|
+
record = MARC::Record.new
|
|
31
|
+
|
|
32
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", "Nhouy Abhay,".force_encoding("BINARY")], ["c", "Th\xE5ao,".force_encoding("BINARY")], ["d", "1909-"])
|
|
33
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", "Somchin P\xF8\xE5o. Ngin,".force_encoding("BINARY")])
|
|
34
|
+
|
|
35
|
+
record.append MARC::DataField.new("100", "0", "0", ["a", "\xE5angkham. ".force_encoding("BINARY")])
|
|
36
|
+
record.append MARC::DataField.new("245", "1", "0", ["b", "chef-d'oeuvre de la litt\xE2erature lao".force_encoding("BINARY")])
|
|
37
|
+
|
|
38
|
+
# One in UTF8 and marked
|
|
39
|
+
record.append MARC::DataField.new("999", "0", "1", ["a", "chef-d'ocuvre de la littU+FFC3\U+FFA9rature".force_encoding("UTF-8")])
|
|
40
|
+
|
|
41
|
+
writer.write(record)
|
|
42
|
+
writer.close
|
|
43
|
+
ensure
|
|
44
|
+
File.unlink("test/writer.dat") if File.exist?("test/writer.dat")
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it "supports oversized records when configured" do
|
|
49
|
+
too_long_record = MARC::Record.new
|
|
50
|
+
1.upto(1001) do
|
|
51
|
+
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
wbuffer = StringIO.new("", "w")
|
|
55
|
+
writer = MARC::Writer.new(wbuffer)
|
|
56
|
+
writer.allow_oversized = true
|
|
57
|
+
|
|
58
|
+
writer.write(too_long_record)
|
|
59
|
+
writer.close
|
|
60
|
+
|
|
61
|
+
expect(wbuffer.string.slice(0, 5)).to eq("00000")
|
|
62
|
+
|
|
63
|
+
rbuffer = StringIO.new(wbuffer.string.dup)
|
|
64
|
+
|
|
65
|
+
# Forgiving reader will, round trippable
|
|
66
|
+
new_record = MARC::Reader.decode(rbuffer.string, forgiving: true)
|
|
67
|
+
expect(new_record).to eq(too_long_record)
|
|
68
|
+
|
|
69
|
+
# Test in the middle of a MARC file
|
|
70
|
+
good_record = MARC::Record.new
|
|
71
|
+
good_record.append MARC::DataField.new("500", " ", " ", ["a", "A short record"])
|
|
72
|
+
wbuffer = StringIO.new("", "w")
|
|
73
|
+
writer = MARC::Writer.new(wbuffer)
|
|
74
|
+
writer.allow_oversized = true
|
|
75
|
+
|
|
76
|
+
writer.write(good_record)
|
|
77
|
+
writer.write(too_long_record)
|
|
78
|
+
writer.write(good_record)
|
|
79
|
+
|
|
80
|
+
rbuffer = StringIO.new(wbuffer.string.dup)
|
|
81
|
+
reader = MARC::ForgivingReader.new(rbuffer)
|
|
82
|
+
records = reader.to_a
|
|
83
|
+
|
|
84
|
+
expect(records.length).to eq(3)
|
|
85
|
+
expect(records[0]).to eq(good_record)
|
|
86
|
+
expect(records[2]).to eq(good_record)
|
|
87
|
+
expect(records[1]).to eq(too_long_record)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
it "raises exception for oversized records by default" do
|
|
91
|
+
too_long_record = MARC::Record.new
|
|
92
|
+
1.upto(1001) do
|
|
93
|
+
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
wbuffer = StringIO.new("", "w")
|
|
97
|
+
writer = MARC::Writer.new(wbuffer)
|
|
98
|
+
|
|
99
|
+
expect { writer.write too_long_record }.to raise_error(MARC::Exception)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
it "handles forgiving writing" do
|
|
103
|
+
marc = "00305cam a2200133 a 4500001000700000003000900007005001700016008004100033008004100074035002500115245001700140909001000157909000400167\036635145\036UK-BiLMS\03620060329173705.0\036s1982iieng6 000 0 eng||\036060116|||||||||xxk eng||\036 \037a(UK-BiLMS)M0017366ZW\03600\037aTest record.\036 \037aa\037b\037c\036\037b0\036\035\000"
|
|
104
|
+
rec = MARC::Record.new_from_marc(marc)
|
|
105
|
+
expect { rec.to_marc }.not_to raise_error
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
it "handles Unicode roundtrip" do
|
|
109
|
+
record = MARC::Reader.new("test/utf8.marc", external_encoding: "UTF-8").first
|
|
110
|
+
|
|
111
|
+
writer = MARC::Writer.new("test/writer.dat")
|
|
112
|
+
writer.write(record)
|
|
113
|
+
writer.close
|
|
114
|
+
|
|
115
|
+
read_back_record = MARC::Reader.new("test/writer.dat", external_encoding: "UTF-8").first
|
|
116
|
+
|
|
117
|
+
# Make sure the one we wrote out then read in again
|
|
118
|
+
# is the same as the one we read the first time
|
|
119
|
+
expect(record).to eq(read_back_record)
|
|
120
|
+
end
|
|
121
|
+
end
|
data/test/marc8/tc_to_unicode.rb
CHANGED
|
@@ -3,8 +3,6 @@ require "marc"
|
|
|
3
3
|
|
|
4
4
|
require "marc/marc8/to_unicode"
|
|
5
5
|
|
|
6
|
-
require "unf"
|
|
7
|
-
|
|
8
6
|
if "".respond_to?(:encoding)
|
|
9
7
|
|
|
10
8
|
class TestMarc8ToUnicode < Test::Unit::TestCase
|
|
@@ -22,7 +20,7 @@ if "".respond_to?(:encoding)
|
|
|
22
20
|
value = MARC::Marc8::ToUnicode.new.transcode("Conversa\xF0c\xE4ao")
|
|
23
21
|
assert_equal "UTF-8", value.encoding.name
|
|
24
22
|
|
|
25
|
-
expected =
|
|
23
|
+
expected = "Conversação".unicode_normalize(:nfc)
|
|
26
24
|
|
|
27
25
|
assert_equal expected, value
|
|
28
26
|
end
|
|
@@ -67,10 +65,10 @@ if "".respond_to?(:encoding)
|
|
|
67
65
|
marc8 = "Conversa\xF0c\xE4ao \xC1"
|
|
68
66
|
unicode = "Conversação \u2113"
|
|
69
67
|
|
|
70
|
-
unicode_c =
|
|
71
|
-
unicode_kc =
|
|
72
|
-
unicode_d =
|
|
73
|
-
unicode_kd =
|
|
68
|
+
unicode_c = unicode.unicode_normalize(:nfc)
|
|
69
|
+
unicode_kc = unicode.unicode_normalize(:nfkc)
|
|
70
|
+
unicode_d = unicode.unicode_normalize(:nfd)
|
|
71
|
+
unicode_kd = unicode.unicode_normalize(:nfkd)
|
|
74
72
|
|
|
75
73
|
converter = MARC::Marc8::ToUnicode.new
|
|
76
74
|
|
data/test/tc_writer.rb
CHANGED
|
@@ -32,14 +32,14 @@ class WriterTest < Test::Unit::TestCase
|
|
|
32
32
|
|
|
33
33
|
record = MARC::Record.new
|
|
34
34
|
|
|
35
|
-
record.append MARC::DataField.new("700", "0", " ", ["a", "Nhouy Abhay,".force_encoding("BINARY")], ["c", "Th\xE5ao,".force_encoding("BINARY")], ["d", "1909-"])
|
|
36
|
-
record.append MARC::DataField.new("700", "0", " ", ["a", "Somchin P\xF8\xE5o. Ngin,".force_encoding("BINARY")])
|
|
35
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", +"Nhouy Abhay,".dup.force_encoding("BINARY")], ["c", +"Th\xE5ao,".dup.force_encoding("BINARY")], ["d", "1909-"])
|
|
36
|
+
record.append MARC::DataField.new("700", "0", " ", ["a", +"Somchin P\xF8\xE5o. Ngin,".dup.force_encoding("BINARY")])
|
|
37
37
|
|
|
38
|
-
record.append MARC::DataField.new("100", "0", "0", ["a", "\xE5angkham. ".force_encoding("BINARY")])
|
|
39
|
-
record.append MARC::DataField.new("245", "1", "0", ["b", "chef-d'oeuvre de la litt\xE2erature lao".force_encoding("BINARY")])
|
|
38
|
+
record.append MARC::DataField.new("100", "0", "0", ["a", +"\xE5angkham. ".dup.force_encoding("BINARY")])
|
|
39
|
+
record.append MARC::DataField.new("245", "1", "0", ["b", +"chef-d'oeuvre de la litt\xE2erature lao".dup.force_encoding("BINARY")])
|
|
40
40
|
|
|
41
41
|
# One in UTF8 and marked
|
|
42
|
-
record.append MARC::DataField.new("999", "0", "1", ["a", "chef-d'ocuvre de la littU+FFC3\U+FFA9rature".force_encoding("UTF-8")])
|
|
42
|
+
record.append MARC::DataField.new("999", "0", "1", ["a", +"chef-d'ocuvre de la littU+FFC3\U+FFA9rature".dup.force_encoding("UTF-8")])
|
|
43
43
|
|
|
44
44
|
writer.write(record)
|
|
45
45
|
writer.close
|
|
@@ -54,7 +54,7 @@ class WriterTest < Test::Unit::TestCase
|
|
|
54
54
|
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
|
55
55
|
end
|
|
56
56
|
|
|
57
|
-
wbuffer = StringIO.new("", "w")
|
|
57
|
+
wbuffer = StringIO.new(+"", "w")
|
|
58
58
|
writer = MARC::Writer.new(wbuffer)
|
|
59
59
|
writer.allow_oversized = true
|
|
60
60
|
|
|
@@ -78,7 +78,7 @@ class WriterTest < Test::Unit::TestCase
|
|
|
78
78
|
# Test in the middle of a MARC file
|
|
79
79
|
good_record = MARC::Record.new
|
|
80
80
|
good_record.append MARC::DataField.new("500", " ", " ", ["a", "A short record"])
|
|
81
|
-
wbuffer = StringIO.new("", "w")
|
|
81
|
+
wbuffer = StringIO.new(+"", "w")
|
|
82
82
|
writer = MARC::Writer.new(wbuffer)
|
|
83
83
|
writer.allow_oversized = true
|
|
84
84
|
|
|
@@ -102,7 +102,7 @@ class WriterTest < Test::Unit::TestCase
|
|
|
102
102
|
too_long_record.append MARC::DataField.new("500", " ", " ", ["a", "A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789"])
|
|
103
103
|
end
|
|
104
104
|
|
|
105
|
-
wbuffer = StringIO.new("", "w")
|
|
105
|
+
wbuffer = StringIO.new(+"", "w")
|
|
106
106
|
writer = MARC::Writer.new(wbuffer)
|
|
107
107
|
|
|
108
108
|
assert_raise(MARC::Exception) do
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: marc
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kevin Clarke
|
|
@@ -11,19 +11,18 @@ authors:
|
|
|
11
11
|
- Ross Singer
|
|
12
12
|
- Ed Summers
|
|
13
13
|
- Chris Beer
|
|
14
|
-
autorequire:
|
|
15
14
|
bindir: bin
|
|
16
15
|
cert_chain: []
|
|
17
|
-
date:
|
|
16
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
18
17
|
dependencies:
|
|
19
18
|
- !ruby/object:Gem::Dependency
|
|
20
|
-
name:
|
|
19
|
+
name: nokogiri
|
|
21
20
|
requirement: !ruby/object:Gem::Requirement
|
|
22
21
|
requirements:
|
|
23
22
|
- - "~>"
|
|
24
23
|
- !ruby/object:Gem::Version
|
|
25
24
|
version: '1.0'
|
|
26
|
-
type: :
|
|
25
|
+
type: :runtime
|
|
27
26
|
prerelease: false
|
|
28
27
|
version_requirements: !ruby/object:Gem::Requirement
|
|
29
28
|
requirements:
|
|
@@ -31,33 +30,83 @@ dependencies:
|
|
|
31
30
|
- !ruby/object:Gem::Version
|
|
32
31
|
version: '1.0'
|
|
33
32
|
- !ruby/object:Gem::Dependency
|
|
34
|
-
name:
|
|
33
|
+
name: rexml
|
|
35
34
|
requirement: !ruby/object:Gem::Requirement
|
|
36
35
|
requirements:
|
|
37
36
|
- - ">="
|
|
38
37
|
- !ruby/object:Gem::Version
|
|
39
|
-
version:
|
|
40
|
-
- - "<"
|
|
41
|
-
- !ruby/object:Gem::Version
|
|
42
|
-
version: '2'
|
|
38
|
+
version: '0'
|
|
43
39
|
type: :runtime
|
|
44
40
|
prerelease: false
|
|
45
41
|
version_requirements: !ruby/object:Gem::Requirement
|
|
46
42
|
requirements:
|
|
47
43
|
- - ">="
|
|
48
44
|
- !ruby/object:Gem::Version
|
|
49
|
-
version:
|
|
50
|
-
|
|
45
|
+
version: '0'
|
|
46
|
+
- !ruby/object:Gem::Dependency
|
|
47
|
+
name: rake
|
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
|
49
|
+
requirements:
|
|
50
|
+
- - "~>"
|
|
51
|
+
- !ruby/object:Gem::Version
|
|
52
|
+
version: '13.0'
|
|
53
|
+
type: :development
|
|
54
|
+
prerelease: false
|
|
55
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
56
|
+
requirements:
|
|
57
|
+
- - "~>"
|
|
58
|
+
- !ruby/object:Gem::Version
|
|
59
|
+
version: '13.0'
|
|
60
|
+
- !ruby/object:Gem::Dependency
|
|
61
|
+
name: test-unit
|
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
|
63
|
+
requirements:
|
|
64
|
+
- - "~>"
|
|
65
|
+
- !ruby/object:Gem::Version
|
|
66
|
+
version: '3.0'
|
|
67
|
+
type: :development
|
|
68
|
+
prerelease: false
|
|
69
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
70
|
+
requirements:
|
|
71
|
+
- - "~>"
|
|
72
|
+
- !ruby/object:Gem::Version
|
|
73
|
+
version: '3.0'
|
|
74
|
+
- !ruby/object:Gem::Dependency
|
|
75
|
+
name: standard
|
|
76
|
+
requirement: !ruby/object:Gem::Requirement
|
|
77
|
+
requirements:
|
|
78
|
+
- - "~>"
|
|
51
79
|
- !ruby/object:Gem::Version
|
|
52
|
-
version: '
|
|
80
|
+
version: '1.0'
|
|
81
|
+
type: :development
|
|
82
|
+
prerelease: false
|
|
83
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
84
|
+
requirements:
|
|
85
|
+
- - "~>"
|
|
86
|
+
- !ruby/object:Gem::Version
|
|
87
|
+
version: '1.0'
|
|
53
88
|
- !ruby/object:Gem::Dependency
|
|
54
|
-
name:
|
|
89
|
+
name: warning
|
|
90
|
+
requirement: !ruby/object:Gem::Requirement
|
|
91
|
+
requirements:
|
|
92
|
+
- - "~>"
|
|
93
|
+
- !ruby/object:Gem::Version
|
|
94
|
+
version: '1.5'
|
|
95
|
+
type: :development
|
|
96
|
+
prerelease: false
|
|
97
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
98
|
+
requirements:
|
|
99
|
+
- - "~>"
|
|
100
|
+
- !ruby/object:Gem::Version
|
|
101
|
+
version: '1.5'
|
|
102
|
+
- !ruby/object:Gem::Dependency
|
|
103
|
+
name: xml-simple
|
|
55
104
|
requirement: !ruby/object:Gem::Requirement
|
|
56
105
|
requirements:
|
|
57
106
|
- - ">="
|
|
58
107
|
- !ruby/object:Gem::Version
|
|
59
108
|
version: '0'
|
|
60
|
-
type: :
|
|
109
|
+
type: :development
|
|
61
110
|
prerelease: false
|
|
62
111
|
version_requirements: !ruby/object:Gem::Requirement
|
|
63
112
|
requirements:
|
|
@@ -65,20 +114,19 @@ dependencies:
|
|
|
65
114
|
- !ruby/object:Gem::Version
|
|
66
115
|
version: '0'
|
|
67
116
|
- !ruby/object:Gem::Dependency
|
|
68
|
-
name:
|
|
117
|
+
name: rdoc
|
|
69
118
|
requirement: !ruby/object:Gem::Requirement
|
|
70
119
|
requirements:
|
|
71
120
|
- - ">="
|
|
72
121
|
- !ruby/object:Gem::Version
|
|
73
122
|
version: '0'
|
|
74
|
-
type: :
|
|
123
|
+
type: :development
|
|
75
124
|
prerelease: false
|
|
76
125
|
version_requirements: !ruby/object:Gem::Requirement
|
|
77
126
|
requirements:
|
|
78
127
|
- - ">="
|
|
79
128
|
- !ruby/object:Gem::Version
|
|
80
129
|
version: '0'
|
|
81
|
-
description:
|
|
82
130
|
email: ehs@pobox.com
|
|
83
131
|
executables:
|
|
84
132
|
- marc
|
|
@@ -118,6 +166,12 @@ files:
|
|
|
118
166
|
- lib/marc/xmlreader.rb
|
|
119
167
|
- lib/marc/xmlwriter.rb
|
|
120
168
|
- marc.gemspec
|
|
169
|
+
- spec/controlfield_spec.rb
|
|
170
|
+
- spec/datafield_spec.rb
|
|
171
|
+
- spec/reader_char_encodings_spec.rb
|
|
172
|
+
- spec/reader_spec.rb
|
|
173
|
+
- spec/spec_helper.rb
|
|
174
|
+
- spec/writer_spec.rb
|
|
121
175
|
- test/bad_eacc_encoding.marc8.marc
|
|
122
176
|
- test/batch.dat
|
|
123
177
|
- test/batch.xml
|
|
@@ -162,7 +216,6 @@ homepage: https://github.com/ruby-marc/ruby-marc/
|
|
|
162
216
|
licenses:
|
|
163
217
|
- MIT
|
|
164
218
|
metadata: {}
|
|
165
|
-
post_install_message:
|
|
166
219
|
rdoc_options: []
|
|
167
220
|
require_paths:
|
|
168
221
|
- lib
|
|
@@ -170,18 +223,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
170
223
|
requirements:
|
|
171
224
|
- - ">="
|
|
172
225
|
- !ruby/object:Gem::Version
|
|
173
|
-
version:
|
|
226
|
+
version: 2.3.0
|
|
174
227
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
228
|
requirements:
|
|
176
229
|
- - ">="
|
|
177
230
|
- !ruby/object:Gem::Version
|
|
178
231
|
version: '0'
|
|
179
232
|
requirements: []
|
|
180
|
-
rubygems_version:
|
|
181
|
-
signing_key:
|
|
233
|
+
rubygems_version: 4.0.3
|
|
182
234
|
specification_version: 4
|
|
183
235
|
summary: A ruby library for working with Machine Readable Cataloging
|
|
184
236
|
test_files:
|
|
237
|
+
- spec/controlfield_spec.rb
|
|
238
|
+
- spec/datafield_spec.rb
|
|
239
|
+
- spec/reader_char_encodings_spec.rb
|
|
240
|
+
- spec/reader_spec.rb
|
|
241
|
+
- spec/spec_helper.rb
|
|
242
|
+
- spec/writer_spec.rb
|
|
185
243
|
- test/bad_eacc_encoding.marc8.marc
|
|
186
244
|
- test/batch.dat
|
|
187
245
|
- test/batch.xml
|