marc 1.0.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/Changes +33 -18
- data/README.md +13 -5
- data/lib/marc/datafield.rb +1 -1
- data/lib/marc/marc8/to_unicode.rb +21 -21
- data/lib/marc/reader.rb +7 -3
- data/lib/marc/record.rb +69 -53
- data/lib/marc/version.rb +1 -1
- data/lib/marc/xml_parsers.rb +9 -1
- data/lib/marc/xmlwriter.rb +2 -1
- data/test/marc8/tc_to_unicode.rb +33 -10
- data/test/tc_xml.rb +7 -2
- data/test/tc_xml_error_handling.rb +22 -0
- data/test/three-records-second-bad.xml +160 -0
- metadata +43 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 53c1e338a00e1dbd7c09ed14edc916edd1211abe0edbabcf757b8b00a5aa209c
|
4
|
+
data.tar.gz: 80b4c48c2fc95887216194d264583302bacae6c606616a087c888668ba2bfb68
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b6dd17fa76ff33ef0da68946d29ec2079c495423559c47391cabf80b59393f0d481ef6b1467b839465d3225c1a52aec2d4945994986e8e956eccfc2ea73ada5b
|
7
|
+
data.tar.gz: a745e41aa2cbe87c70f9a2cbfe0de0dd12f47c167f867127353d86ff771ada14327dbce1ec461fcb81cacb3cf3f60f821cdb4c0087213f37a0ef42dc7ade78e8
|
data/Changes
CHANGED
@@ -1,15 +1,31 @@
|
|
1
|
+
v1.1.1 June 2021
|
2
|
+
- Fix a regression when normalizing indicator values when serializing marcxml
|
3
|
+
|
4
|
+
v1.1.0 June 2021
|
5
|
+
- Add support for additional valid subfield codes in marcxml
|
6
|
+
|
7
|
+
v1.0.2 July 2017
|
8
|
+
- Now (correctly) throw an error if datafield string is the empty string
|
9
|
+
(thanks to @bibliotechy)
|
10
|
+
|
11
|
+
v1.0.1 February 2016
|
12
|
+
- Non-user-facing change in implementation of FieldMap strictly for performance
|
13
|
+
|
14
|
+
v1.0.0 January 2015
|
15
|
+
- Mostly changes that deal with encoding, plus the plunge to a 1.0 release
|
16
|
+
|
1
17
|
v0.5.0 April 2012
|
2
|
-
- Extensive rewrite of MARC::Reader (ISO 2709 binary reader) to provide a
|
3
|
-
fairly complete and consistent handing of char encoding issues in ruby 1.9.
|
18
|
+
- Extensive rewrite of MARC::Reader (ISO 2709 binary reader) to provide a
|
19
|
+
fairly complete and consistent handing of char encoding issues in ruby 1.9.
|
4
20
|
- This code is well covered by automated tests, but ends up complex, there
|
5
|
-
may be bugs, please report them.
|
6
|
-
- May not work properly under jruby with non-unicode source encodings.
|
7
|
-
- Still can't handle Marc8 encoding.
|
21
|
+
may be bugs, please report them.
|
22
|
+
- May not work properly under jruby with non-unicode source encodings.
|
23
|
+
- Still can't handle Marc8 encoding.
|
8
24
|
- May not have entirely backwards compatible behavior with regard to char
|
9
|
-
encodings under ruby 1.9.x as previous 0.4.x versions. Test your code.
|
25
|
+
encodings under ruby 1.9.x as previous 0.4.x versions. Test your code.
|
10
26
|
In particular, previous versions may have automatically _transcoded_
|
11
27
|
non-unicode encodings to UTF-8 for you. This version will not do
|
12
|
-
so unless you ask it to with correct arguments.
|
28
|
+
so unless you ask it to with correct arguments.
|
13
29
|
|
14
30
|
v0.4.4 Sat Mar 03 14:55:00 EDT 2012
|
15
31
|
- Fixed performance regression: strict reader will parse about 5x faster now
|
@@ -26,8 +42,8 @@ v0.2.1 Mon Aug 18 14:14:16 EDT 2008
|
|
26
42
|
Ross Singer)
|
27
43
|
|
28
44
|
v0.2.0 Wed Jun 11 12:42:20 EDT 2008
|
29
|
-
- added newline to output generated by REXML::Formatters::Default to make
|
30
|
-
it a bit more friendly. REXML::Formatters::Pretty and Transitive just
|
45
|
+
- added newline to output generated by REXML::Formatters::Default to make
|
46
|
+
it a bit more friendly. REXML::Formatters::Pretty and Transitive just
|
31
47
|
don't do what I want (whitespace in weird places).
|
32
48
|
|
33
49
|
v0.1.9 Thu Jun 5 12:00:01 EDT 2008
|
@@ -36,7 +52,7 @@ v0.1.9 Thu Jun 5 12:00:01 EDT 2008
|
|
36
52
|
|
37
53
|
v0.1.8 Tue Nov 13 22:51:03 EST 2007
|
38
54
|
- added examples directory
|
39
|
-
- fixed problem with leading whitespace and the leader in xml reader
|
55
|
+
- fixed problem with leading whitespace and the leader in xml reader
|
40
56
|
(thanks Morgan Cundiff)
|
41
57
|
|
42
58
|
v0.1.7 Mon Nov 12 09:33:57 EST 2007
|
@@ -58,7 +74,7 @@ v0.1.4 Tue Jan 2 15:45:53 EST 2007
|
|
58
74
|
- fixed bug in MARC::XMLWriter that was outputting all control field tags as 00z
|
59
75
|
(thanks Ross Singer)
|
60
76
|
- added :include_namespace option to MARC::XMLWriter::encode to include the
|
61
|
-
marcxml namespace, which allows MARC::Record::to_xml to emit the namespace
|
77
|
+
marcxml namespace, which allows MARC::Record::to_xml to emit the namespace
|
62
78
|
for a single record.
|
63
79
|
|
64
80
|
v0.1.3 Tue Jan 2 12:56:36 EST 2007
|
@@ -67,11 +83,11 @@ v0.1.3 Tue Jan 2 12:56:36 EST 2007
|
|
67
83
|
as the hash keys.
|
68
84
|
|
69
85
|
v0.1.2 Thu Dec 21 18:46:01 EST 2007
|
70
|
-
- fixed MARC::Record::to_xml so that it actually is tested and works (thanks
|
86
|
+
- fixed MARC::Record::to_xml so that it actually is tested and works (thanks
|
71
87
|
Ross Singer)
|
72
88
|
|
73
89
|
v0.1.1
|
74
|
-
- added ability to pass File like objects to the constructor for
|
90
|
+
- added ability to pass File like objects to the constructor for
|
75
91
|
MARC::XMLReader like MARC::Reader (thanks Jake Glenn)
|
76
92
|
|
77
93
|
v0.1.0 Wed Dec 6 15:40:40 EST 2006
|
@@ -93,11 +109,11 @@ v0.0.9 Tue Mar 28 10:02:16 CST 2006
|
|
93
109
|
- added :stylesheet argument to XLMWriter.new
|
94
110
|
|
95
111
|
v0.0.8 Mon Jan 16 22:31:00 EST 2006
|
96
|
-
- removed control tests out of tc_field.rb into tc_control.rb
|
112
|
+
- removed control tests out of tc_field.rb into tc_control.rb
|
97
113
|
- fixed some formatting
|
98
114
|
- changed control/field to controlfield/datafield
|
99
115
|
- added == check for controlfield
|
100
|
-
- removed namespace declarations on record elements in favor of default
|
116
|
+
- removed namespace declarations on record elements in favor of default
|
101
117
|
namespace on collection element
|
102
118
|
- added spaces around subfield code and delimeter in to_s
|
103
119
|
- fixed up relevant tests that were expecting old formatting
|
@@ -106,8 +122,8 @@ v0.0.8 Mon Jan 16 22:31:00 EST 2006
|
|
106
122
|
|
107
123
|
v0.0.7 Mon Jan 2 21:39:28 CST 2006
|
108
124
|
- MARC::XMLWriter added
|
109
|
-
- removed encode/decode methods in MARC::MARC21 into MARC::Writer and
|
110
|
-
MARC::Reader respectively. This required pushing MARC21 specific constants
|
125
|
+
- removed encode/decode methods in MARC::MARC21 into MARC::Writer and
|
126
|
+
MARC::Reader respectively. This required pushing MARC21 specific constants
|
111
127
|
out into MARC::Constants which is required as necessary.
|
112
128
|
- moved encode from MARC::MARXML into MARC::XMLWriter and added constants
|
113
129
|
to MARC::Constants
|
@@ -137,4 +153,3 @@ v0.0.2 Mon Oct 17 17:42:57 CDT 2005
|
|
137
153
|
|
138
154
|
v0.0.1 Mon Oct 10 10:29:20 CDT 2005
|
139
155
|
- initial release
|
140
|
-
|
data/README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
[![Gem Version](https://badge.fury.io/rb/marc.png)](http://badge.fury.io/rb/marc)
|
2
|
-
|
2
|
+
![Build Status](https://github.com/ruby-marc/ruby-marc/workflows/CI/badge.svg) |
|
3
3
|
|
4
4
|
marc is a ruby library for reading and writing MAchine Readable Cataloging
|
5
5
|
(MARC). More information about MARC can be found at <http://www.loc.gov/marc>.
|
@@ -34,7 +34,7 @@ marc is a ruby library for reading and writing MAchine Readable Cataloging
|
|
34
34
|
|
35
35
|
MARC::Record provides `#to_hash` and `#from_hash` implementations that deal in ruby
|
36
36
|
hash's that are compatible with the
|
37
|
-
[marc-in-json](
|
37
|
+
[marc-in-json](https://rossfsinger.com/blog/2010/09/a-proposal-to-serialize-marc-in-json/)
|
38
38
|
serialization format. You are responsible for serializing the hash to/from JSON yourself.
|
39
39
|
|
40
40
|
## Installation
|
@@ -56,7 +56,17 @@ Consult the MARC::Reader class docs for a more complete discussion and range of
|
|
56
56
|
|
57
57
|
The MARC binary Writer (MARC::Writer) does not have any encoding-related features -- it's up to you the developer to make sure you create MARC::Records with consistent and expected char encodings, although MARC::Writer will write out a legal ISO 2709 either way, it just might have corrupted encodings.
|
58
58
|
|
59
|
-
|
59
|
+
When parsing MARCXML _with Nokogiri as your XML parser implementation_ up to
|
60
|
+
and including version `1.0.2` of this gem, if the XML was badly formed, parsing
|
61
|
+
would stop and no error would be reported to your code.
|
62
|
+
|
63
|
+
If you are using a version > `1.0.2` of `ruby-marc` with MRI + Nokogiri, XML
|
64
|
+
syntax errors will be thrown (and you may need to adjust your code to account
|
65
|
+
for this). *JRuby users*: If you are using a version later than `1.0.2` and
|
66
|
+
using Nokogiri as an XML parser with JRuby as your ruby implementation, XML
|
67
|
+
syntax errors will still be ignored unless you have Nokogiri version `1.10.2`
|
68
|
+
or later.
|
69
|
+
|
60
70
|
## Miscellany
|
61
71
|
|
62
72
|
Source code at: https://github.com/ruby-marc/ruby-marc/
|
@@ -69,8 +79,6 @@ Developers, release new version of gem to rubygems with `rake release`
|
|
69
79
|
(bundler-supplied task). Note that one nice thing this will do is automatically
|
70
80
|
tag the version in git, very important for later figuring out what's going on.
|
71
81
|
|
72
|
-
Please send bugs, requests and comments to Code4Lib Mailing list (https://listserv.nd.edu/cgi-bin/wa?A0=CODE4LIB).
|
73
|
-
|
74
82
|
## Authors
|
75
83
|
|
76
84
|
Kevin Clarke <ksclarke@gmail.com>
|
data/lib/marc/datafield.rb
CHANGED
@@ -50,7 +50,7 @@ module MARC
|
|
50
50
|
def initialize(tag, i1=' ', i2=' ', *subfields)
|
51
51
|
# if the tag is less than 3 characters long and
|
52
52
|
# the string is all numeric then we pad with zeros
|
53
|
-
if tag.length < 3 and /^[0-9]
|
53
|
+
if tag.length < 3 and /^[0-9]+$/ =~ tag
|
54
54
|
@tag = "%03d" % tag
|
55
55
|
else
|
56
56
|
@tag = tag
|
@@ -12,12 +12,12 @@ module MARC
|
|
12
12
|
# http://www.loc.gov/marc/specifications/speccharmarc8.html
|
13
13
|
#
|
14
14
|
# NOT thread-safe, it needs to keep state as it goes through a string,
|
15
|
-
# do not re-use between threads.
|
15
|
+
# do not re-use between threads.
|
16
16
|
#
|
17
|
-
# Uses 4 spaces per indent, rather than usual ruby 2 space, just to change the python less.
|
17
|
+
# Uses 4 spaces per indent, rather than usual ruby 2 space, just to change the python less.
|
18
18
|
#
|
19
19
|
# Returns UTF-8 encoded string! Encode to something else if you want
|
20
|
-
# something else.
|
20
|
+
# something else.
|
21
21
|
#
|
22
22
|
# III proprietary code points?
|
23
23
|
class ToUnicode
|
@@ -31,7 +31,7 @@ module MARC
|
|
31
31
|
|
32
32
|
# These are state flags, MARC8 requires you to keep
|
33
33
|
# track of 'current char sets' or something like that, which
|
34
|
-
# are changed with escape codes, or something like that.
|
34
|
+
# are changed with escape codes, or something like that.
|
35
35
|
attr_accessor :g0, :g1
|
36
36
|
|
37
37
|
def initialize
|
@@ -39,21 +39,21 @@ module MARC
|
|
39
39
|
self.g1 = ANSEL
|
40
40
|
end
|
41
41
|
|
42
|
-
# Returns UTF-8 encoded string equivalent of marc8_string passed in.
|
42
|
+
# Returns UTF-8 encoded string equivalent of marc8_string passed in.
|
43
43
|
#
|
44
44
|
# Bad Marc8 bytes? By default will raise an Encoding::InvalidByteSequenceError
|
45
45
|
# (will not have full metadata filled out, but will have a decent error message)
|
46
46
|
#
|
47
47
|
# Set option :invalid => :replace to instead silently replace bad bytes
|
48
|
-
# with a replacement char -- by default Unicode Replacement Char, but can set
|
49
|
-
# option :replace to something else, including empty string.
|
48
|
+
# with a replacement char -- by default Unicode Replacement Char, but can set
|
49
|
+
# option :replace to something else, including empty string.
|
50
50
|
#
|
51
51
|
# converter.transcode(bad_marc8, :invalid => :replace, :replace => "")
|
52
52
|
#
|
53
53
|
# By default returns NFC normalized, but set :normalization option to:
|
54
54
|
# :nfd, :nfkd, :nfkc, :nfc, or nil. Set to nil for higher performance,
|
55
55
|
# we won't do any normalization just take it as it comes out of the
|
56
|
-
# transcode algorithm. This will generally NOT be composed.
|
56
|
+
# transcode algorithm. This will generally NOT be composed.
|
57
57
|
#
|
58
58
|
# By default, escaped unicode 'named character references' in Marc8 will
|
59
59
|
# be translated to actual UTF8. Eg. "‏" But pass :expand_ncr => false
|
@@ -61,21 +61,21 @@ module MARC
|
|
61
61
|
#
|
62
62
|
# String arg passed in WILL have it's encoding tagged 'binary' if
|
63
63
|
# it's not already, if it's Marc8 there's no good reason for it not to
|
64
|
-
# be already.
|
64
|
+
# be already.
|
65
65
|
def transcode(marc8_string, options = {})
|
66
66
|
invalid_replacement = options.fetch(:replace, "\uFFFD")
|
67
67
|
expand_ncr = options.fetch(:expand_ncr, true)
|
68
68
|
normalization = options.fetch(:normalization, :nfc)
|
69
69
|
|
70
|
-
|
70
|
+
|
71
71
|
# don't choke on empty marc8_string
|
72
72
|
return "" if marc8_string.nil? || marc8_string.empty?
|
73
|
-
|
73
|
+
|
74
74
|
# Make sure to call it 'binary', so we can slice it
|
75
75
|
# byte by byte, and so ruby doesn't complain about bad
|
76
76
|
# bytes for some other encoding. Yeah, we're changing
|
77
77
|
# encoding on input! If it's Marc8, it ought to be tagged
|
78
|
-
# binary already.
|
78
|
+
# binary already.
|
79
79
|
marc8_string.force_encoding("binary")
|
80
80
|
|
81
81
|
uni_list = []
|
@@ -124,7 +124,7 @@ module MARC
|
|
124
124
|
end
|
125
125
|
|
126
126
|
mb_flag = is_multibyte(self.g0)
|
127
|
-
|
127
|
+
|
128
128
|
if mb_flag
|
129
129
|
code_point = (marc8_string[pos].ord * 65536 +
|
130
130
|
marc8_string[pos+1].ord * 256 +
|
@@ -134,7 +134,7 @@ module MARC
|
|
134
134
|
code_point = marc8_string[pos].ord
|
135
135
|
pos += 1
|
136
136
|
end
|
137
|
-
|
137
|
+
|
138
138
|
if (code_point < 0x20 or
|
139
139
|
(code_point > 0x80 and code_point < 0xa0))
|
140
140
|
uni = unichr(code_point)
|
@@ -144,7 +144,7 @@ module MARC
|
|
144
144
|
begin
|
145
145
|
code_set = (code_point > 0x80 and not mb_flag) ? self.g1 : self.g0
|
146
146
|
(uni, cflag) = CODESETS.fetch(code_set).fetch(code_point)
|
147
|
-
|
147
|
+
|
148
148
|
if cflag
|
149
149
|
combinings.push unichr(uni)
|
150
150
|
else
|
@@ -160,16 +160,16 @@ module MARC
|
|
160
160
|
uni_list.push invalid_replacement unless uni_list.last == invalid_replacement
|
161
161
|
pos += 1
|
162
162
|
else
|
163
|
-
raise Encoding::InvalidByteSequenceError.new("MARC8, input byte offset #{pos}, code set: 0x#{code_set.to_s(16)}, code point: 0x#{code_point.to_s(16)}")
|
163
|
+
raise Encoding::InvalidByteSequenceError.new("MARC8, input byte offset #{pos}, code set: 0x#{code_set.to_s(16)}, code point: 0x#{code_point.to_s(16)}, value: #{transcode(marc8_string, :invalid => :replace, :replace => "�")}")
|
164
164
|
end
|
165
165
|
end
|
166
166
|
end
|
167
167
|
|
168
168
|
# what to do if combining chars left over?
|
169
169
|
uni_str = uni_list.join('')
|
170
|
-
|
170
|
+
|
171
171
|
if expand_ncr
|
172
|
-
uni_str.gsub!(/&#x([0-9A-F]{4,6});/) do
|
172
|
+
uni_str.gsub!(/&#x([0-9A-F]{4,6});/) do
|
173
173
|
[$1.hex].pack("U")
|
174
174
|
end
|
175
175
|
end
|
@@ -177,7 +177,7 @@ module MARC
|
|
177
177
|
if normalization
|
178
178
|
uni_str = UNF::Normalizer.normalize(uni_str, normalization)
|
179
179
|
end
|
180
|
-
|
180
|
+
|
181
181
|
return uni_str
|
182
182
|
end
|
183
183
|
|
@@ -188,11 +188,11 @@ module MARC
|
|
188
188
|
end
|
189
189
|
|
190
190
|
# input single unicode codepoint as integer; output encoded as a UTF-8 string
|
191
|
-
# python has unichr built-in, we just define it for convenience no problem.
|
191
|
+
# python has unichr built-in, we just define it for convenience no problem.
|
192
192
|
def unichr(code_point)
|
193
193
|
[code_point].pack("U")
|
194
194
|
end
|
195
195
|
|
196
196
|
end
|
197
197
|
end
|
198
|
-
end
|
198
|
+
end
|
data/lib/marc/reader.rb
CHANGED
@@ -138,10 +138,10 @@ module MARC
|
|
138
138
|
# Encoding.default_internal = "utf-8"
|
139
139
|
# MARC::Reader.new( File.new("marc_in_cp866.mrc", "r:cp866") )
|
140
140
|
#
|
141
|
-
# # However this
|
141
|
+
# # However this should be safe:
|
142
142
|
# MARC::Reader.new( "marc_in_cp866.mrc", :external_encoding => "cp866")
|
143
143
|
#
|
144
|
-
# # And this
|
144
|
+
# # And this should be safe, if you do want to transcode:
|
145
145
|
# MARC::Reader.new( "marc_in_cp866.mrc", :external_encoding => "cp866",
|
146
146
|
# :internal_encoding => "utf-8")
|
147
147
|
#
|
@@ -443,7 +443,11 @@ module MARC
|
|
443
443
|
# get an exception from inside ruby-marc, and it may change
|
444
444
|
# in future implementations.
|
445
445
|
if params[:internal_encoding]
|
446
|
-
|
446
|
+
if RUBY_VERSION >= '3.0'
|
447
|
+
str = str.encode(params[:internal_encoding], **params)
|
448
|
+
else
|
449
|
+
str = str.encode(params[:internal_encoding], params)
|
450
|
+
end
|
447
451
|
elsif (params[:invalid] || params[:replace] || (params[:validate_encoding] == true))
|
448
452
|
|
449
453
|
if params[:validate_encoding] == true && ! str.valid_encoding?
|
data/lib/marc/record.rb
CHANGED
@@ -1,16 +1,17 @@
|
|
1
|
-
module MARC
|
2
|
-
|
1
|
+
module MARC
|
2
|
+
|
3
3
|
# The FieldMap is an Array of DataFields and Controlfields.
|
4
|
-
# It also contains a Hash representation
|
4
|
+
# It also contains a Hash representation
|
5
5
|
# of the fields for faster lookups (under certain conditions)
|
6
6
|
class FieldMap < Array
|
7
7
|
attr_reader :tags
|
8
8
|
attr_accessor :clean
|
9
|
+
|
9
10
|
def initialize
|
10
|
-
@tags
|
11
|
+
@tags = {}
|
11
12
|
@clean = true
|
12
13
|
end
|
13
|
-
|
14
|
+
|
14
15
|
# Rebuild the HashWithChecksumAttribute with the current
|
15
16
|
# values of the fields Array
|
16
17
|
def reindex
|
@@ -21,28 +22,42 @@ module MARC
|
|
21
22
|
end
|
22
23
|
@clean = true
|
23
24
|
end
|
24
|
-
|
25
|
+
|
25
26
|
# Returns an array of all of the tags that appear in the record (not in the order they appear, however).
|
26
27
|
def tag_list
|
27
28
|
reindex unless @clean
|
28
29
|
@tags.keys
|
29
30
|
end
|
30
|
-
|
31
|
+
|
31
32
|
# Returns an array of fields, in the order they appear, according to their tag.
|
32
33
|
# The tags argument can be a string (e.g. '245'), an array (['100','700','800'])
|
33
34
|
# or a range (('600'..'699')).
|
35
|
+
|
34
36
|
def each_by_tag(tags)
|
35
37
|
reindex unless @clean
|
36
|
-
indices =
|
38
|
+
indices = []
|
39
|
+
# Get all the indices associated with the tags
|
40
|
+
Array(tags).each do |t|
|
41
|
+
indices.concat @tags[t] if @tags[t]
|
42
|
+
end
|
43
|
+
|
44
|
+
# Remove any nils
|
45
|
+
indices.compact!
|
37
46
|
return [] if indices.empty?
|
38
|
-
|
39
|
-
|
47
|
+
|
48
|
+
# Sort it, so we get the fields back in the order they appear in the record
|
49
|
+
indices.sort!
|
50
|
+
|
51
|
+
indices.each do |tag|
|
52
|
+
yield self[tag]
|
40
53
|
end
|
41
54
|
end
|
42
55
|
|
43
|
-
|
56
|
+
|
57
|
+
|
58
|
+
# Freeze for immutability, first reindexing if needed.
|
44
59
|
# A frozen FieldMap is safe for concurrent access, and also
|
45
|
-
# can more easily avoid accidental reindexing on even read-only use.
|
60
|
+
# can more easily avoid accidental reindexing on even read-only use.
|
46
61
|
def freeze
|
47
62
|
self.reindex unless @clean
|
48
63
|
super
|
@@ -50,18 +65,18 @@ module MARC
|
|
50
65
|
end
|
51
66
|
|
52
67
|
# A class that represents an individual MARC record. Every record
|
53
|
-
# is made up of a collection of MARC::DataField objects.
|
68
|
+
# is made up of a collection of MARC::DataField objects.
|
54
69
|
#
|
55
70
|
# MARC::Record mixes in Enumerable to enable access to constituent
|
56
71
|
# DataFields. For example, to return a list of all subject DataFields:
|
57
72
|
#
|
58
|
-
# record.find_all {|field| field.tag =~ /^6../}
|
59
|
-
#
|
73
|
+
# record.find_all {|field| field.tag =~ /^6../}
|
74
|
+
#
|
60
75
|
# The accessor 'fields' is also an Array of MARC::DataField objects which
|
61
76
|
# the client can modify if neccesary.
|
62
77
|
#
|
63
78
|
# record.fields.delete(field)
|
64
|
-
#
|
79
|
+
#
|
65
80
|
# Other accessor attribute: 'leader' for record leader as String
|
66
81
|
#
|
67
82
|
# == High-performance lookup by tag
|
@@ -82,13 +97,13 @@ module MARC
|
|
82
97
|
#
|
83
98
|
# MARC::Record is not generally safe for sharing between threads.
|
84
99
|
# Even if you think you are just acccessing it read-only,
|
85
|
-
# you may accidentally trigger a reindex of the by-tag cache (see above).
|
100
|
+
# you may accidentally trigger a reindex of the by-tag cache (see above).
|
86
101
|
#
|
87
102
|
# However, after you are done constructing a Record, you can mark
|
88
103
|
# the `fields` array as immutable. This makes a Record safe for sharing
|
89
104
|
# between threads for read-only use, and also helps you avoid accidentally
|
90
105
|
# triggering a reindex, as accidental reindexes can harm by-tag
|
91
|
-
# lookup performance.
|
106
|
+
# lookup performance.
|
92
107
|
#
|
93
108
|
# record.fields.freeze
|
94
109
|
class Record
|
@@ -101,9 +116,9 @@ module MARC
|
|
101
116
|
attr_accessor :leader
|
102
117
|
|
103
118
|
def initialize
|
104
|
-
@fields
|
119
|
+
@fields = FieldMap.new
|
105
120
|
# leader is 24 bytes
|
106
|
-
@leader
|
121
|
+
@leader = ' ' * 24
|
107
122
|
# leader defaults:
|
108
123
|
# http://www.loc.gov/marc/bibliographic/ecbdldrd.html
|
109
124
|
@leader[10..11] = '22'
|
@@ -119,9 +134,9 @@ module MARC
|
|
119
134
|
end
|
120
135
|
|
121
136
|
# alias to append
|
122
|
-
|
137
|
+
|
123
138
|
def <<(field)
|
124
|
-
append(field)
|
139
|
+
append(field)
|
125
140
|
end
|
126
141
|
|
127
142
|
# each() is here to support iterating and searching since MARC::Record
|
@@ -141,20 +156,20 @@ module MARC
|
|
141
156
|
yield field
|
142
157
|
end
|
143
158
|
end
|
144
|
-
|
145
|
-
# A more convenient way to iterate over each field with a given tag.
|
159
|
+
|
160
|
+
# A more convenient way to iterate over each field with a given tag.
|
146
161
|
# The filter argument can be a string, array or range.
|
147
162
|
def each_by_tag(filter)
|
148
|
-
@fields.each_by_tag(filter) {|tag| yield tag }
|
163
|
+
@fields.each_by_tag(filter) { |tag| yield tag }
|
149
164
|
end
|
150
165
|
|
151
166
|
# You can lookup fields using this shorthand:
|
152
167
|
# title = record['245']
|
153
168
|
|
154
169
|
def [](tag)
|
155
|
-
return self.find {|f| f.tag == tag}
|
170
|
+
return self.find { |f| f.tag == tag }
|
156
171
|
end
|
157
|
-
|
172
|
+
|
158
173
|
# Provides a backwards compatible means to access the FieldMap.
|
159
174
|
# No argument returns the FieldMap array in entirety. Providing
|
160
175
|
# a string, array or range of tags will return an array of fields
|
@@ -163,9 +178,9 @@ module MARC
|
|
163
178
|
unless filter
|
164
179
|
# Since we're returning the FieldMap object, which the caller
|
165
180
|
# may mutate, we precautionarily mark dirty -- unless it's frozen
|
166
|
-
# immutable.
|
181
|
+
# immutable.
|
167
182
|
@fields.clean = false unless @fields.frozen?
|
168
|
-
return @fields
|
183
|
+
return @fields
|
169
184
|
end
|
170
185
|
@fields.reindex unless @fields.clean
|
171
186
|
flds = []
|
@@ -180,18 +195,18 @@ module MARC
|
|
180
195
|
end
|
181
196
|
flds
|
182
197
|
end
|
183
|
-
|
198
|
+
|
184
199
|
# Returns an array of all of the tags that appear in the record (not necessarily in the order they appear).
|
185
200
|
def tags
|
186
201
|
return @fields.tag_list
|
187
202
|
end
|
188
203
|
|
189
|
-
# Factory method for creating a MARC::Record from MARC21 in
|
204
|
+
# Factory method for creating a MARC::Record from MARC21 in
|
190
205
|
# transmission format.
|
191
206
|
#
|
192
207
|
# record = MARC::Record.new_from_marc(marc21)
|
193
208
|
#
|
194
|
-
# in cases where you might be working with somewhat flawed
|
209
|
+
# in cases where you might be working with somewhat flawed
|
195
210
|
# MARC data you may want to use the :forgiving parameter which
|
196
211
|
# will bypass using field byte offsets and simply look for the
|
197
212
|
# end of field byte to figure out the end of fields.
|
@@ -203,12 +218,12 @@ module MARC
|
|
203
218
|
end
|
204
219
|
|
205
220
|
|
206
|
-
# Returns a record in MARC21 transmission format (ANSI Z39.2).
|
221
|
+
# Returns a record in MARC21 transmission format (ANSI Z39.2).
|
207
222
|
# Really this is just a wrapper around MARC::MARC21::encode
|
208
223
|
#
|
209
224
|
# marc = record.to_marc()
|
210
225
|
|
211
|
-
def to_marc
|
226
|
+
def to_marc
|
212
227
|
return MARC::Writer.encode(self)
|
213
228
|
end
|
214
229
|
|
@@ -235,51 +250,51 @@ module MARC
|
|
235
250
|
# Return a marc-hash version of the record
|
236
251
|
def to_marchash
|
237
252
|
return {
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
253
|
+
'type' => 'marc-hash',
|
254
|
+
'version' => [MARCHASH_MAJOR_VERSION, MARCHASH_MINOR_VERSION],
|
255
|
+
'leader' => self.leader,
|
256
|
+
'fields' => self.map { |f| f.to_marchash }
|
242
257
|
}
|
243
|
-
end
|
258
|
+
end
|
259
|
+
|
260
|
+
#to_hash
|
244
261
|
|
245
262
|
# Factory method for creating a new MARC::Record from
|
246
263
|
# a marchash object
|
247
264
|
#
|
248
265
|
# record = MARC::Record->new_from_marchash(mh)
|
249
|
-
|
266
|
+
|
250
267
|
def self.new_from_marchash(mh)
|
251
|
-
r
|
268
|
+
r = self.new()
|
252
269
|
r.leader = mh['leader']
|
253
270
|
mh['fields'].each do |f|
|
254
|
-
if (f.length == 2)
|
271
|
+
if (f.length == 2)
|
255
272
|
r << MARC::ControlField.new(f[0], f[1])
|
256
|
-
elsif
|
257
|
-
r << MARC::DataField.new(f[0], f[1], f[2], *f[3])
|
273
|
+
elsif r << MARC::DataField.new(f[0], f[1], f[2], *f[3])
|
258
274
|
end
|
259
275
|
end
|
260
276
|
return r
|
261
277
|
end
|
262
|
-
|
263
278
|
|
264
|
-
|
279
|
+
|
265
280
|
# Returns a (roundtrippable) hash representation for MARC-in-JSON
|
266
281
|
def to_hash
|
267
|
-
record_hash = {'leader'
|
282
|
+
record_hash = {'leader' => @leader, 'fields' => []}
|
268
283
|
@fields.each do |field|
|
269
284
|
record_hash['fields'] << field.to_hash
|
270
285
|
end
|
271
286
|
record_hash
|
272
|
-
end
|
287
|
+
end
|
273
288
|
|
274
289
|
def self.new_from_hash(h)
|
275
|
-
r
|
290
|
+
r = self.new
|
276
291
|
r.leader = h['leader']
|
277
292
|
if h['fields']
|
278
293
|
h['fields'].each do |position|
|
279
294
|
position.each_pair do |tag, field|
|
280
295
|
if field.is_a?(Hash)
|
281
296
|
f = MARC::DataField.new(tag, field['ind1'], field['ind2'])
|
282
|
-
field['subfields'].each do |
|
297
|
+
field['subfields'].each do |pos|
|
283
298
|
pos.each_pair do |code, value|
|
284
299
|
f.append MARC::Subfield.new(code, value)
|
285
300
|
end
|
@@ -290,9 +305,10 @@ module MARC
|
|
290
305
|
end
|
291
306
|
end
|
292
307
|
end
|
293
|
-
end
|
294
|
-
return r
|
308
|
+
end
|
309
|
+
return r
|
295
310
|
end
|
311
|
+
|
296
312
|
# Returns a string version of the record, suitable for printing
|
297
313
|
|
298
314
|
def to_s
|
@@ -315,7 +331,7 @@ module MARC
|
|
315
331
|
# if record =~ /Gravity's Rainbow/ then print "Slothrop" end
|
316
332
|
|
317
333
|
def =~(regex)
|
318
|
-
return self.to_s =~ regex
|
334
|
+
return self.to_s =~ regex
|
319
335
|
end
|
320
336
|
|
321
337
|
end
|
data/lib/marc/version.rb
CHANGED
data/lib/marc/xml_parsers.rb
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
module MARC
|
2
|
+
# Exception class to be thrown when an XML parser
|
3
|
+
# encounters an unrecoverable error.
|
4
|
+
class XMLParseError < StandardError; end
|
5
|
+
|
2
6
|
# The MagicReader will try to use the best available XML Parser at the
|
3
7
|
# time of initialization.
|
4
8
|
# The order is currently:
|
@@ -112,6 +116,10 @@ module MARC
|
|
112
116
|
@parser.parse(@handle)
|
113
117
|
end
|
114
118
|
end
|
119
|
+
|
120
|
+
def error(evt)
|
121
|
+
raise(XMLParseError, "XML parsing error: #{evt}")
|
122
|
+
end
|
115
123
|
|
116
124
|
|
117
125
|
def method_missing(methName, *args)
|
@@ -413,4 +421,4 @@ end
|
|
413
421
|
end
|
414
422
|
end # end of module
|
415
423
|
end # end of if jruby
|
416
|
-
end
|
424
|
+
end
|
data/lib/marc/xmlwriter.rb
CHANGED
@@ -61,6 +61,7 @@ module MARC
|
|
61
61
|
|
62
62
|
def self.encode(record, opts={})
|
63
63
|
singleChar = Regexp.new('[\da-z ]{1}')
|
64
|
+
subfieldChar = Regexp.new('[\dA-Za-z!"#$%&\'()*+,-./:;<=>?{}_^`~\[\]\\\]{1}')
|
64
65
|
ctrlFieldTag = Regexp.new('00[1-9A-Za-z]{1}')
|
65
66
|
|
66
67
|
# Right now, this writer handles input from the strict and
|
@@ -122,7 +123,7 @@ module MARC
|
|
122
123
|
|
123
124
|
# If marc is leniently parsed, we may have some dirty data; using
|
124
125
|
# the blank subfield code should help us locate these later to fix
|
125
|
-
if (subfield.code.match(
|
126
|
+
if (subfield.code.match(subfieldChar) == nil)
|
126
127
|
subfield.code = ' '
|
127
128
|
end
|
128
129
|
|
data/test/marc8/tc_to_unicode.rb
CHANGED
@@ -32,9 +32,9 @@ if "".respond_to?(:encoding)
|
|
32
32
|
|
33
33
|
def test_lots_of_marc8_test_cases
|
34
34
|
# Heap of test cases taken from pymarc, which provided these
|
35
|
-
# two data files, marc8 and utf8, with line-by-line correspondences.
|
35
|
+
# two data files, marc8 and utf8, with line-by-line correspondences.
|
36
36
|
#
|
37
|
-
# For now, we have NOT included proprietary III encodings in our test data!
|
37
|
+
# For now, we have NOT included proprietary III encodings in our test data!
|
38
38
|
utf8_file = File.open( File.expand_path("../data/test_utf8.txt", __FILE__), "r:UTF-8")
|
39
39
|
marc8_file = File.open( File.expand_path("../data/test_marc8.txt", __FILE__), "r:binary")
|
40
40
|
|
@@ -55,7 +55,7 @@ if "".respond_to?(:encoding)
|
|
55
55
|
|
56
56
|
assert_equal utf8, converted, "Test data line #{i}, expected converted to match provided utf8"
|
57
57
|
end
|
58
|
-
rescue EOFError => each
|
58
|
+
rescue EOFError => each
|
59
59
|
# just means the file was over, no biggie
|
60
60
|
assert i > 1500, "Read as many lines as we expected to, at least 1500"
|
61
61
|
rescue Exception => e
|
@@ -82,27 +82,50 @@ if "".respond_to?(:encoding)
|
|
82
82
|
assert_equal unicode_d, converter.transcode(marc8, :normalization => :nfd)
|
83
83
|
assert_equal unicode_kd, converter.transcode(marc8, :normalization => :nfkd)
|
84
84
|
|
85
|
-
# disable normalization for performance or something, we won't end up with NFC.
|
85
|
+
# disable normalization for performance or something, we won't end up with NFC.
|
86
86
|
refute_equal unicode_c, converter.transcode(marc8, :normalization => nil)
|
87
87
|
end
|
88
88
|
|
89
89
|
def test_expand_ncr
|
90
90
|
converter = MARC::Marc8::ToUnicode.new
|
91
|
-
|
91
|
+
|
92
92
|
marc8_ncr = "Weird ‏ � but these aren't changed #x2000; ÈF etc."
|
93
93
|
assert_equal "Weird \u200F \uFFFD but these aren't changed #x2000; ÈF etc.", converter.transcode(marc8_ncr)
|
94
94
|
assert_equal marc8_ncr, converter.transcode(marc8_ncr, :expand_ncr => false), "should not expand NCR if disabled"
|
95
|
-
end
|
95
|
+
end
|
96
96
|
|
97
97
|
def test_bad_byte
|
98
98
|
converter = MARC::Marc8::ToUnicode.new
|
99
99
|
|
100
100
|
bad_marc8 = "\e$1!PVK7oi$N!Q1!G4i$N!0p!Q+{6924f6}\e(B"
|
101
101
|
assert_raise(Encoding::InvalidByteSequenceError) {
|
102
|
-
|
102
|
+
converter.transcode(bad_marc8)
|
103
103
|
}
|
104
104
|
end
|
105
105
|
|
106
|
+
def test_bad_byte_error_message
|
107
|
+
converter = MARC::Marc8::ToUnicode.new
|
108
|
+
|
109
|
+
bad_marc8 = "\e$1!PVK7oi$N!Q1!G4i$N!0p!Q+{6924f6}\e(B"
|
110
|
+
begin
|
111
|
+
converter.transcode(bad_marc8)
|
112
|
+
rescue Encoding::InvalidByteSequenceError => err
|
113
|
+
assert_equal("MARC8, input byte offset 30, code set: 0x31, code point: 0x7b3639, value: 米国の統治の仕組�", err.message)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_multiple_bad_byte_error_message
|
118
|
+
converter = MARC::Marc8::ToUnicode.new
|
119
|
+
|
120
|
+
bad_marc8 = "\e$1!Q1!G4i$N!0p!Q+{6924f6}\e(B \e$1!PVK7oi$N!Q1!G4i$N!0p!Q+{6924f6}\e(B \e$1!PVK7oi$N!Q1!G4i$N!0p!Q+{6924f6}\e(B"
|
121
|
+
begin
|
122
|
+
converter.transcode(bad_marc8)
|
123
|
+
rescue Encoding::InvalidByteSequenceError => err
|
124
|
+
# It still identifies the first bad byte found in the offset info, but replaces all bad bytes in the error message
|
125
|
+
assert_equal("MARC8, input byte offset 21, code set: 0x31, code point: 0x7b3639, value: 統治の仕組� 米国の統治の仕組� 米国の統治の仕組�", err.message)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
106
129
|
def test_bad_byte_with_replacement
|
107
130
|
converter = MARC::Marc8::ToUnicode.new
|
108
131
|
|
@@ -112,9 +135,9 @@ if "".respond_to?(:encoding)
|
|
112
135
|
assert_equal "UTF-8", value.encoding.name
|
113
136
|
assert value.valid_encoding?
|
114
137
|
|
115
|
-
assert value.include?("\uFFFD"), "includes replacement char"
|
138
|
+
assert value.include?("\uFFFD"), "includes replacement char"
|
116
139
|
# coalescing multiple replacement chars at end, could change
|
117
|
-
# to not do so, important thing is at least one is there.
|
140
|
+
# to not do so, important thing is at least one is there.
|
118
141
|
assert_equal "米国の統治の仕組�", value
|
119
142
|
end
|
120
143
|
|
@@ -150,5 +173,5 @@ if "".respond_to?(:encoding)
|
|
150
173
|
end
|
151
174
|
else
|
152
175
|
require 'pathname'
|
153
|
-
$stderr.puts "\nTests not being run in ruby 1.9.x, skipping #{Pathname.new(__FILE__).basename}\n\n"
|
176
|
+
$stderr.puts "\nTests not being run in ruby 1.9.x, skipping #{Pathname.new(__FILE__).basename}\n\n"
|
154
177
|
end
|
data/test/tc_xml.rb
CHANGED
@@ -140,11 +140,16 @@ class XMLTest < Test::Unit::TestCase
|
|
140
140
|
record1 = MARC::Record.new
|
141
141
|
record1.leader = '00925njm 22002777a 4500'
|
142
142
|
record1.append MARC::ControlField.new('007', 'sdubumennmplu')
|
143
|
-
record1.append MARC::DataField.new('245', '0', '4',
|
143
|
+
record1.append MARC::DataField.new('245', '0', '4',
|
144
144
|
['a', 'The Great Ray Charles'], ['h', '[sound recording].'])
|
145
|
+
record1.append MARC::DataField.new('998', ' ', ' ',
|
146
|
+
['^', 'Valid local subfield'])
|
147
|
+
|
148
|
+
# MARC::XMLWriter mutates records
|
149
|
+
dup_record = MARC::Record.new_from_hash(record1.to_hash)
|
145
150
|
|
146
151
|
writer = MARC::XMLWriter.new('test/test.xml', :stylesheet => 'style.xsl')
|
147
|
-
writer.write(
|
152
|
+
writer.write(dup_record)
|
148
153
|
writer.close
|
149
154
|
|
150
155
|
xml = File.read('test/test.xml')
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'marc'
|
3
|
+
|
4
|
+
class BadXMLHandlingTestCase < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_nokogiri_bad_xml
|
7
|
+
begin
|
8
|
+
require 'nokogiri'
|
9
|
+
rescue LoadError
|
10
|
+
omit("nokogiri not installed, cannot test")
|
11
|
+
end
|
12
|
+
omit("nokogiri (<1.10.2) under jruby doesn't support error handling: sparklemotion/nokogiri#1847") if RUBY_PLATFORM == 'java' && Gem::Version.new(Nokogiri::VERSION) < Gem::Version.new('1.10.2')
|
13
|
+
count = 0
|
14
|
+
reader = MARC::XMLReader.new('test/three-records-second-bad.xml', :parser => :nokogiri)
|
15
|
+
assert_raise MARC::XMLParseError do
|
16
|
+
reader.each do |rec|
|
17
|
+
count += 1 if rec['260']
|
18
|
+
end
|
19
|
+
end
|
20
|
+
assert_equal(1, count, 'should only be able to parse one record')
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<collection xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/MARC21/slim" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
|
3
|
+
<record>
|
4
|
+
<leader> njm a22 uu 4500</leader>
|
5
|
+
<controlfield tag="001">afc99990058366</controlfield>
|
6
|
+
<controlfield tag="003">DLC</controlfield>
|
7
|
+
<controlfield tag="005">20071104155141.9</controlfield>
|
8
|
+
<controlfield tag="007">sd ummunniauub</controlfield>
|
9
|
+
<controlfield tag="008">071103s1939 xxufmnne||||||||| u eng||</controlfield>
|
10
|
+
<datafield tag="010" ind1=" " ind2=" ">
|
11
|
+
<subfield code="a">afc99990058366</subfield>
|
12
|
+
</datafield>
|
13
|
+
<datafield tag="040" ind1=" " ind2=" ">
|
14
|
+
<subfield code="a">DLC</subfield>
|
15
|
+
<subfield code="c">DLC</subfield>
|
16
|
+
</datafield>
|
17
|
+
<datafield tag="245" ind1="0" ind2="4">
|
18
|
+
<subfield code="a">The Texas ranger</subfield>
|
19
|
+
<subfield code="h">[sound recording] /</subfield>
|
20
|
+
<subfield code="c">Sung by Beale D. Taylor.</subfield>
|
21
|
+
</datafield>
|
22
|
+
<datafield tag="260" ind1=" " ind2=" ">
|
23
|
+
<subfield code="a">Medina, Texas,</subfield>
|
24
|
+
<subfield code="c">1939.</subfield>
|
25
|
+
</datafield>
|
26
|
+
<datafield tag="300" ind1=" " ind2=" ">
|
27
|
+
<subfield code="a">1 sound disc :</subfield>
|
28
|
+
<subfield code="b">analog, 33 1/3 rpm, mono. ;</subfield>
|
29
|
+
<subfield code="c">12 in.</subfield>
|
30
|
+
</datafield>
|
31
|
+
<datafield tag="651" ind1=" " ind2="0">
|
32
|
+
<subfield code="a">Medina</subfield>
|
33
|
+
<subfield code="z">Texas</subfield>
|
34
|
+
<subfield code="z">United States of America.</subfield>
|
35
|
+
</datafield>
|
36
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
37
|
+
<subfield code="a">Lomax, John Avery, 1867-1948</subfield>
|
38
|
+
<subfield code="e">Recording engineer.</subfield>
|
39
|
+
</datafield>
|
40
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
41
|
+
<subfield code="a">Lomax, Ruby T. (Ruby Terrill)</subfield>
|
42
|
+
<subfield code="e">Recording engineer.</subfield>
|
43
|
+
</datafield>
|
44
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
45
|
+
<subfield code="a">Taylor, Beale D.</subfield>
|
46
|
+
<subfield code="e">Singer.</subfield>
|
47
|
+
</datafield>
|
48
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
49
|
+
<subfield code="a">American Folklife Center, Library of Congress</subfield>
|
50
|
+
</datafield>
|
51
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
52
|
+
<subfield code="a">DLC</subfield>
|
53
|
+
</datafield>
|
54
|
+
</record>
|
55
|
+
<record>
|
56
|
+
<leader> njm a22 uu 4500</leader>
|
57
|
+
<controlfield tag="001">afc99990058366</controlfield>
|
58
|
+
<controlfield tag="003">DLC</controlfield>
|
59
|
+
<controlfield tag="005">20071104155141.9</controlfield>
|
60
|
+
<controlfield tag="007">sd ummunniauub</controlfield>
|
61
|
+
<controlfield tag="008">071103s1939 xxufmnne||||||||| u eng||</controlfield>
|
62
|
+
<datafield tag="010" ind1=" " ind2=" ">
|
63
|
+
<subfield code="a">afc99990058366</subfield>
|
64
|
+
</datafield>
|
65
|
+
<datafield tag="040" ind1=" " ind2=" ">
|
66
|
+
<subfield code="a">DLC</subfield>
|
67
|
+
<subfield code="c">DLC</subfield>
|
68
|
+
</datafield>
|
69
|
+
<datafield tag="245" ind1="0" ind2="4">
|
70
|
+
<subfield code="a">The Texas ranger</subfield>
|
71
|
+
<!-- invalid utf-8 bytes in the non-printing subfield code -->
|
72
|
+
<subfield code="">[sound recording] /</subfield>
|
73
|
+
<subfield code="c">Sung by Beale D. Taylor.</subfield>
|
74
|
+
</datafield>
|
75
|
+
<datafield tag="260" ind1=" " ind2=" ">
|
76
|
+
<subfield code="a">Medina, Texas,</subfield>
|
77
|
+
<subfield code="c">1939.</subfield>
|
78
|
+
</datafield>
|
79
|
+
<datafield tag="300" ind1=" " ind2=" ">
|
80
|
+
<subfield code="a">1 sound disc :</subfield>
|
81
|
+
<subfield code="b">analog, 33 1/3 rpm, mono. ;</subfield>
|
82
|
+
<subfield code="c">12 in.</subfield>
|
83
|
+
</datafield>
|
84
|
+
<datafield tag="651" ind1=" " ind2="0">
|
85
|
+
<subfield code="a">Medina</subfield>
|
86
|
+
<subfield code="z">Texas</subfield>
|
87
|
+
<subfield code="z">United States of America.</subfield>
|
88
|
+
</datafield>
|
89
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
90
|
+
<subfield code="a">Lomax, John Avery, 1867-1948</subfield>
|
91
|
+
<subfield code="e">Recording engineer.</subfield>
|
92
|
+
</datafield>
|
93
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
94
|
+
<subfield code="a">Lomax, Ruby T. (Ruby Terrill)</subfield>
|
95
|
+
<subfield code="e">Recording engineer.</subfield>
|
96
|
+
</datafield>
|
97
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
98
|
+
<subfield code="a">Taylor, Beale D.</subfield>
|
99
|
+
<subfield code="e">Singer.</subfield>
|
100
|
+
</datafield>
|
101
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
102
|
+
<subfield code="a">American Folklife Center, Library of Congress</subfield>
|
103
|
+
</datafield>
|
104
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
105
|
+
<subfield code="a">DLC</subfield>
|
106
|
+
</datafield>
|
107
|
+
</record>
|
108
|
+
<record>
|
109
|
+
<leader> njm a22 uu 4500</leader>
|
110
|
+
<controlfield tag="001">afc99990058366</controlfield>
|
111
|
+
<controlfield tag="003">DLC</controlfield>
|
112
|
+
<controlfield tag="005">20071104155141.9</controlfield>
|
113
|
+
<controlfield tag="007">sd ummunniauub</controlfield>
|
114
|
+
<controlfield tag="008">071103s1939 xxufmnne||||||||| u eng||</controlfield>
|
115
|
+
<datafield tag="010" ind1=" " ind2=" ">
|
116
|
+
<subfield code="a">afc99990058366</subfield>
|
117
|
+
</datafield>
|
118
|
+
<datafield tag="040" ind1=" " ind2=" ">
|
119
|
+
<subfield code="a">DLC</subfield>
|
120
|
+
<subfield code="c">DLC</subfield>
|
121
|
+
</datafield>
|
122
|
+
<datafield tag="245" ind1="0" ind2="4">
|
123
|
+
<subfield code="a">The Texas ranger</subfield>
|
124
|
+
<subfield code="h">[sound recording] /</subfield>
|
125
|
+
<subfield code="c">Sung by Beale D. Taylor.</subfield>
|
126
|
+
</datafield>
|
127
|
+
<datafield tag="260" ind1=" " ind2=" ">
|
128
|
+
<subfield code="a">Medina, Texas,</subfield>
|
129
|
+
<subfield code="c">1939.</subfield>
|
130
|
+
</datafield>
|
131
|
+
<datafield tag="300" ind1=" " ind2=" ">
|
132
|
+
<subfield code="a">1 sound disc :</subfield>
|
133
|
+
<subfield code="b">analog, 33 1/3 rpm, mono. ;</subfield>
|
134
|
+
<subfield code="c">12 in.</subfield>
|
135
|
+
</datafield>
|
136
|
+
<datafield tag="651" ind1=" " ind2="0">
|
137
|
+
<subfield code="a">Medina</subfield>
|
138
|
+
<subfield code="z">Texas</subfield>
|
139
|
+
<subfield code="z">United States of America.</subfield>
|
140
|
+
</datafield>
|
141
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
142
|
+
<subfield code="a">Lomax, John Avery, 1867-1948</subfield>
|
143
|
+
<subfield code="e">Recording engineer.</subfield>
|
144
|
+
</datafield>
|
145
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
146
|
+
<subfield code="a">Lomax, Ruby T. (Ruby Terrill)</subfield>
|
147
|
+
<subfield code="e">Recording engineer.</subfield>
|
148
|
+
</datafield>
|
149
|
+
<datafield tag="700" ind1="1" ind2=" ">
|
150
|
+
<subfield code="a">Taylor, Beale D.</subfield>
|
151
|
+
<subfield code="e">Singer.</subfield>
|
152
|
+
</datafield>
|
153
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
154
|
+
<subfield code="a">American Folklife Center, Library of Congress</subfield>
|
155
|
+
</datafield>
|
156
|
+
<datafield tag="852" ind1=" " ind2=" ">
|
157
|
+
<subfield code="a">DLC</subfield>
|
158
|
+
</datafield>
|
159
|
+
</record>
|
160
|
+
</collection>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin Clarke
|
@@ -13,54 +13,74 @@ authors:
|
|
13
13
|
autorequire: marc
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
|
-
date:
|
16
|
+
date: 2021-06-07 00:00:00.000000000 Z
|
17
17
|
dependencies:
|
18
18
|
- !ruby/object:Gem::Dependency
|
19
|
+
name: scrub_rb
|
19
20
|
requirement: !ruby/object:Gem::Requirement
|
20
21
|
requirements:
|
21
|
-
- -
|
22
|
+
- - ">="
|
22
23
|
- !ruby/object:Gem::Version
|
23
24
|
version: 1.0.1
|
24
|
-
- - <
|
25
|
+
- - "<"
|
25
26
|
- !ruby/object:Gem::Version
|
26
27
|
version: '2'
|
27
|
-
name: scrub_rb
|
28
|
-
prerelease: false
|
29
28
|
type: :runtime
|
29
|
+
prerelease: false
|
30
30
|
version_requirements: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
|
-
- -
|
32
|
+
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
34
|
version: 1.0.1
|
35
|
-
- - <
|
35
|
+
- - "<"
|
36
36
|
- !ruby/object:Gem::Version
|
37
37
|
version: '2'
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
|
+
name: unf
|
39
40
|
requirement: !ruby/object:Gem::Requirement
|
40
41
|
requirements:
|
41
|
-
- -
|
42
|
+
- - ">="
|
42
43
|
- !ruby/object:Gem::Version
|
43
44
|
version: '0'
|
44
|
-
|
45
|
+
type: :runtime
|
45
46
|
prerelease: false
|
47
|
+
version_requirements: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
- !ruby/object:Gem::Dependency
|
53
|
+
name: rexml
|
54
|
+
requirement: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
46
59
|
type: :runtime
|
60
|
+
prerelease: false
|
47
61
|
version_requirements: !ruby/object:Gem::Requirement
|
48
62
|
requirements:
|
49
|
-
- -
|
63
|
+
- - ">="
|
50
64
|
- !ruby/object:Gem::Version
|
51
65
|
version: '0'
|
52
|
-
description:
|
66
|
+
description:
|
53
67
|
email: ehs@pobox.com
|
54
68
|
executables: []
|
55
69
|
extensions: []
|
56
70
|
extra_rdoc_files: []
|
57
71
|
files:
|
72
|
+
- Changes
|
73
|
+
- LICENSE
|
74
|
+
- README.md
|
75
|
+
- Rakefile
|
58
76
|
- lib/marc.rb
|
59
77
|
- lib/marc/constants.rb
|
60
78
|
- lib/marc/controlfield.rb
|
61
79
|
- lib/marc/datafield.rb
|
62
80
|
- lib/marc/dublincore.rb
|
63
81
|
- lib/marc/exception.rb
|
82
|
+
- lib/marc/marc8/map_to_unicode.rb
|
83
|
+
- lib/marc/marc8/to_unicode.rb
|
64
84
|
- lib/marc/reader.rb
|
65
85
|
- lib/marc/record.rb
|
66
86
|
- lib/marc/subfield.rb
|
@@ -69,14 +89,16 @@ files:
|
|
69
89
|
- lib/marc/xml_parsers.rb
|
70
90
|
- lib/marc/xmlreader.rb
|
71
91
|
- lib/marc/xmlwriter.rb
|
72
|
-
- lib/marc/marc8/map_to_unicode.rb
|
73
|
-
- lib/marc/marc8/to_unicode.rb
|
74
92
|
- test/bad_eacc_encoding.marc8.marc
|
75
93
|
- test/batch.dat
|
76
94
|
- test/batch.xml
|
77
95
|
- test/cp866_multirecord.marc
|
78
96
|
- test/cp866_unimarc.marc
|
79
97
|
- test/escaped_character_reference.marc8.marc
|
98
|
+
- test/marc8/data/test_marc8.txt
|
99
|
+
- test/marc8/data/test_utf8.txt
|
100
|
+
- test/marc8/tc_marc8_mapping.rb
|
101
|
+
- test/marc8/tc_to_unicode.rb
|
80
102
|
- test/marc8_accented_chars.marc
|
81
103
|
- test/marc_with_bad_utf8.utf8.marc
|
82
104
|
- test/no-leading-zero.xml
|
@@ -98,42 +120,34 @@ files:
|
|
98
120
|
- test/tc_subfield.rb
|
99
121
|
- test/tc_writer.rb
|
100
122
|
- test/tc_xml.rb
|
123
|
+
- test/tc_xml_error_handling.rb
|
124
|
+
- test/three-records-second-bad.xml
|
101
125
|
- test/ts_marc.rb
|
102
126
|
- test/utf8.marc
|
103
127
|
- test/utf8_multirecord.marc
|
104
128
|
- test/utf8_with_bad_bytes.marc
|
105
|
-
- test/marc8/tc_marc8_mapping.rb
|
106
|
-
- test/marc8/tc_to_unicode.rb
|
107
|
-
- test/marc8/data/test_marc8.txt
|
108
|
-
- test/marc8/data/test_utf8.txt
|
109
|
-
- Rakefile
|
110
|
-
- README.md
|
111
|
-
- Changes
|
112
|
-
- LICENSE
|
113
129
|
homepage: https://github.com/ruby-marc/ruby-marc/
|
114
130
|
licenses:
|
115
131
|
- MIT
|
116
132
|
metadata: {}
|
117
|
-
post_install_message:
|
133
|
+
post_install_message:
|
118
134
|
rdoc_options: []
|
119
135
|
require_paths:
|
120
136
|
- lib
|
121
137
|
required_ruby_version: !ruby/object:Gem::Requirement
|
122
138
|
requirements:
|
123
|
-
- -
|
139
|
+
- - ">="
|
124
140
|
- !ruby/object:Gem::Version
|
125
141
|
version: 1.8.6
|
126
142
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
143
|
requirements:
|
128
|
-
- -
|
144
|
+
- - ">="
|
129
145
|
- !ruby/object:Gem::Version
|
130
146
|
version: '0'
|
131
147
|
requirements: []
|
132
|
-
|
133
|
-
|
134
|
-
signing_key:
|
148
|
+
rubygems_version: 3.0.3
|
149
|
+
signing_key:
|
135
150
|
specification_version: 4
|
136
151
|
summary: A ruby library for working with Machine Readable Cataloging
|
137
152
|
test_files:
|
138
153
|
- test/ts_marc.rb
|
139
|
-
has_rdoc: true
|