marc 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
- data/.github/workflows/ruby.yml +24 -0
- data/.gitignore +17 -0
- data/.standard.yml +1 -0
- data/{Changes → CHANGELOG.md} +102 -30
- data/Gemfile +15 -0
- data/README.md +239 -46
- data/Rakefile +14 -14
- data/bin/marc +14 -0
- data/bin/marc2xml +17 -0
- data/examples/xml2marc.rb +10 -0
- data/lib/marc/constants.rb +3 -3
- data/lib/marc/controlfield.rb +35 -23
- data/lib/marc/datafield.rb +70 -63
- data/lib/marc/dublincore.rb +59 -41
- data/lib/marc/exception.rb +9 -1
- data/lib/marc/jsonl_reader.rb +33 -0
- data/lib/marc/jsonl_writer.rb +44 -0
- data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
- data/lib/marc/marc8/to_unicode.rb +80 -86
- data/lib/marc/reader.rb +117 -123
- data/lib/marc/record.rb +72 -62
- data/lib/marc/subfield.rb +12 -10
- data/lib/marc/unsafe_xmlwriter.rb +93 -0
- data/lib/marc/version.rb +1 -1
- data/lib/marc/writer.rb +27 -30
- data/lib/marc/xml_parsers.rb +222 -197
- data/lib/marc/xmlreader.rb +131 -114
- data/lib/marc/xmlwriter.rb +93 -82
- data/lib/marc.rb +20 -18
- data/marc.gemspec +23 -0
- data/test/marc8/tc_marc8_mapping.rb +3 -3
- data/test/marc8/tc_to_unicode.rb +28 -32
- data/test/messed_up_leader.xml +9 -0
- data/test/tc_controlfield.rb +37 -34
- data/test/tc_datafield.rb +65 -60
- data/test/tc_dublincore.rb +9 -11
- data/test/tc_hash.rb +10 -13
- data/test/tc_jsonl.rb +19 -0
- data/test/tc_marchash.rb +17 -21
- data/test/tc_parsers.rb +108 -144
- data/test/tc_reader.rb +35 -36
- data/test/tc_reader_char_encodings.rb +149 -169
- data/test/tc_record.rb +143 -148
- data/test/tc_subfield.rb +14 -13
- data/test/tc_unsafe_xml.rb +95 -0
- data/test/tc_writer.rb +101 -108
- data/test/tc_xml.rb +101 -94
- data/test/tc_xml_error_handling.rb +7 -8
- data/test/ts_marc.rb +8 -8
- metadata +80 -9
data/lib/marc/xml_parsers.rb
CHANGED
@@ -1,152 +1,187 @@
|
|
1
1
|
module MARC
|
2
2
|
# Exception class to be thrown when an XML parser
|
3
3
|
# encounters an unrecoverable error.
|
4
|
-
class XMLParseError < StandardError
|
4
|
+
class XMLParseError < StandardError
|
5
|
+
end
|
6
|
+
|
7
|
+
IND1 = "ind1".freeze
|
8
|
+
IND2 = "ind2".freeze
|
9
|
+
TAG = "tag".freeze
|
10
|
+
CODE = "code".freeze
|
5
11
|
|
6
12
|
# The MagicReader will try to use the best available XML Parser at the
|
7
|
-
# time of initialization.
|
13
|
+
# time of initialization.
|
8
14
|
# The order is currently:
|
9
15
|
# * Nokogiri
|
10
|
-
# *
|
16
|
+
# * libxml-ruby (MRI only) ** DEPRECATED **
|
17
|
+
# * jstax (JRuby only) ** DEPRECATED **
|
11
18
|
# * rexml
|
12
19
|
#
|
13
20
|
# With the idea that other parsers could be added as their modules are
|
14
21
|
# added. Realistically, this list should be limited to stream-based
|
15
22
|
# parsers. The magic should be used selectively, however. After all,
|
16
23
|
# one project's definition of 'best' might not apply universally. It
|
17
|
-
# is arguable which is "best" on JRuby: Nokogiri or jrexml.
|
18
|
-
module MagicReader
|
24
|
+
# is arguable which is "best" on JRuby: Nokogiri or jrexml.
|
25
|
+
module MagicReader
|
19
26
|
def self.extended(receiver)
|
20
27
|
magic = MARC::XMLReader.best_available
|
21
28
|
case magic
|
22
|
-
when
|
23
|
-
|
24
|
-
when
|
25
|
-
|
29
|
+
when "nokogiri"
|
30
|
+
receiver.extend(NokogiriReader)
|
31
|
+
when "libxml"
|
32
|
+
warn "libxml support will be removed in version 1.3. Prefer nokogiri instead"
|
33
|
+
receiver.extend(LibXMLReader)
|
34
|
+
when "jstax"
|
35
|
+
warn "jstax support will be removed in version 1.3. Prefer nokogiri instead"
|
36
|
+
receiver.extend(JRubySTAXReader)
|
37
|
+
when "jrexml"
|
38
|
+
warn "jrexml support is broken upstream; falling back to just rexml. Prefer nokogiri instead"
|
39
|
+
receiver.extend(REXMLReader)
|
26
40
|
else receiver.extend(REXMLReader)
|
27
41
|
end
|
28
42
|
end
|
29
43
|
end
|
30
|
-
|
44
|
+
|
31
45
|
module GenericPullParser
|
32
46
|
# Submodules must include
|
33
47
|
# self.extended()
|
34
48
|
# init()
|
35
49
|
# attributes_to_hash(attributes)
|
36
50
|
# each
|
37
|
-
|
51
|
+
|
52
|
+
REC_TAG = "record".freeze
|
53
|
+
LEAD_TAG = "leader".freeze
|
54
|
+
CF_TAG = "controlfield".freeze
|
55
|
+
DF_TAG = "datafield".freeze
|
56
|
+
SF_TAG = "subfield".freeze
|
57
|
+
|
58
|
+
def init
|
59
|
+
@record = {record: nil, leader: "", field: nil, subfield: nil}
|
60
|
+
@current_element = nil
|
61
|
+
@ns = "http://www.loc.gov/MARC21/slim"
|
62
|
+
end
|
38
63
|
|
39
64
|
# Returns our MARC::Record object to the #each block.
|
40
65
|
def yield_record
|
41
|
-
@
|
66
|
+
if @record[:record].valid?
|
67
|
+
@block.call(@record[:record])
|
68
|
+
elsif @error_handler
|
69
|
+
@error_handler.call(self, @record[:record], @block)
|
70
|
+
else
|
71
|
+
raise MARC::RecordException, @record[:record]
|
72
|
+
end
|
73
|
+
ensure
|
42
74
|
@record[:record] = nil
|
43
|
-
end
|
75
|
+
end
|
44
76
|
|
45
77
|
def start_element_namespace name, attributes = [], prefix = nil, uri = nil, ns = {}
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
def characters text
|
78
|
+
attributes = attributes_to_hash(attributes)
|
79
|
+
if (uri == @ns) || @ignore_namespace
|
80
|
+
case name.downcase
|
81
|
+
when SF_TAG
|
82
|
+
@current_element = :subfield
|
83
|
+
@record[:subfield] = MARC::Subfield.new(attributes[CODE])
|
84
|
+
when DF_TAG
|
85
|
+
@record[:field] = MARC::DataField.new(attributes[TAG], attributes[IND1], attributes[IND2])
|
86
|
+
when CF_TAG
|
87
|
+
@current_element = :field
|
88
|
+
@record[:field] = MARC::ControlField.new(attributes[TAG])
|
89
|
+
when LEAD_TAG then @current_element = :leader
|
90
|
+
when REC_TAG then @record[:record] = MARC::Record.new
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def characters(text)
|
65
96
|
case @current_element
|
66
|
-
when :leader then @record[:record].leader = text
|
67
|
-
when :field then @record[:field].value << text
|
68
97
|
when :subfield then @record[:subfield].value << text
|
98
|
+
when :field then @record[:field].value << text
|
99
|
+
when :leader then @record[:leader] << text
|
69
100
|
end
|
70
101
|
end
|
71
102
|
|
72
|
-
def end_element_namespace
|
103
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
73
104
|
@current_element = nil
|
74
|
-
if uri == @ns
|
105
|
+
if (uri == @ns) || @ignore_namespace
|
75
106
|
case name.downcase
|
76
|
-
when
|
77
|
-
when /(control|data)field/
|
78
|
-
@record[:record] << @record[:field]
|
79
|
-
@record[:field] = nil
|
80
|
-
@current_element = nil if @current_element == :field
|
81
|
-
when 'subfield'
|
107
|
+
when SF_TAG
|
82
108
|
@record[:field].append(@record[:subfield])
|
83
109
|
@record[:subfield] = nil
|
84
110
|
@current_element = nil if @current_element == :subfield
|
111
|
+
when DF_TAG, CF_TAG
|
112
|
+
@record[:record] << @record[:field]
|
113
|
+
@record[:field] = nil
|
114
|
+
@current_element = nil if @current_element == :field
|
115
|
+
when REC_TAG then yield_record
|
116
|
+
when LEAD_TAG
|
117
|
+
@record[:record].leader = @record[:leader]
|
118
|
+
@record[:leader] = ""
|
119
|
+
@current_element = nil if @current_element == :leader
|
85
120
|
end
|
86
121
|
end
|
87
|
-
end
|
122
|
+
end
|
88
123
|
end
|
89
|
-
|
90
124
|
|
91
125
|
# NokogiriReader uses the Nokogiri SAX Parser to quickly read
|
92
126
|
# a MARCXML document. Because dynamically subclassing MARC::XMLReader
|
93
127
|
# is a little ugly, we need to recreate all of the SAX event methods
|
94
|
-
# from Nokogiri::XML::SAX::Document here rather than subclassing.
|
95
|
-
module NokogiriReader
|
128
|
+
# from Nokogiri::XML::SAX::Document here rather than subclassing.
|
129
|
+
module NokogiriReader
|
96
130
|
include GenericPullParser
|
131
|
+
|
97
132
|
def self.extended(receiver)
|
98
|
-
require
|
133
|
+
require "nokogiri"
|
99
134
|
receiver.init
|
100
135
|
end
|
101
|
-
|
136
|
+
|
102
137
|
# Sets our instance variables for SAX parsing in Nokogiri and parser
|
103
138
|
def init
|
104
|
-
|
105
|
-
@
|
106
|
-
@ns = "http://www.loc.gov/MARC21/slim"
|
107
|
-
@parser = Nokogiri::XML::SAX::Parser.new(self)
|
139
|
+
super
|
140
|
+
@parser = Nokogiri::XML::SAX::Parser.new(self)
|
108
141
|
end
|
109
|
-
|
142
|
+
|
110
143
|
# Loop through the MARC records in the XML document
|
111
|
-
def each(&block)
|
112
|
-
|
113
|
-
return self.enum_for(:each)
|
114
|
-
else
|
144
|
+
def each(&block)
|
145
|
+
if block
|
115
146
|
@block = block
|
116
147
|
@parser.parse(@handle)
|
148
|
+
else
|
149
|
+
enum_for(:each)
|
117
150
|
end
|
118
151
|
end
|
119
152
|
|
120
153
|
def error(evt)
|
121
154
|
raise(XMLParseError, "XML parsing error: #{evt}")
|
122
155
|
end
|
123
|
-
|
124
156
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
157
|
+
SAX_METHODS = [:xmldecl, :start_document, :end_document, :start_element,
|
158
|
+
:end_element, :comment, :warning, :error, :cdata_block, :processing_instruction]
|
159
|
+
|
160
|
+
def method_missing(method_name, *args)
|
161
|
+
unless SAX_METHODS.include?(method_name)
|
162
|
+
raise NoMethodError.new("undefined method '#{method_name} for #{self}", "no_meth")
|
130
163
|
end
|
131
164
|
end
|
132
|
-
|
133
|
-
private
|
134
|
-
|
135
|
-
def attributes_to_hash(attributes)
|
136
|
-
hash = {}
|
137
|
-
attributes.each do | att |
|
138
|
-
hash[att.localname] = att.value
|
139
|
-
end
|
140
|
-
hash
|
141
|
-
end
|
142
|
-
end
|
143
165
|
|
166
|
+
def respond_to_missing?(method_name, include_private = false)
|
167
|
+
SAX_METHODS.include?(method_name) || super
|
168
|
+
end
|
169
|
+
|
170
|
+
private
|
171
|
+
|
172
|
+
def attributes_to_hash(attributes)
|
173
|
+
hash = {}
|
174
|
+
attributes.each do |att|
|
175
|
+
hash[att.localname] = att.value
|
176
|
+
end
|
177
|
+
hash
|
178
|
+
end
|
179
|
+
end
|
144
180
|
|
145
|
-
|
146
181
|
# The REXMLReader is the 'default' parser, since we can at least be
|
147
182
|
# assured that REXML is probably there. It uses REXML's PullParser
|
148
183
|
# to handle larger document sizes without consuming insane amounts of
|
149
|
-
# memory, but it's still REXML (read: slow), so it's a good idea to
|
184
|
+
# memory, but it's still REXML (read: slow), so it's a good idea to
|
150
185
|
# use an alternative parser if available. If you don't know the best
|
151
186
|
# parser available, you can use the MagicReader or set:
|
152
187
|
#
|
@@ -158,43 +193,44 @@ module MARC
|
|
158
193
|
#
|
159
194
|
# or
|
160
195
|
#
|
161
|
-
# reader = MARC::XMLReader.new(fh, :parser=>"magic")
|
196
|
+
# reader = MARC::XMLReader.new(fh, :parser=>"magic")
|
162
197
|
# (or the constant)
|
163
198
|
#
|
164
199
|
# which will cascade down to REXML if nothing better is found.
|
165
|
-
#
|
200
|
+
#
|
166
201
|
module REXMLReader
|
167
202
|
def self.extended(receiver)
|
168
|
-
require
|
169
|
-
require
|
203
|
+
require "rexml/document"
|
204
|
+
require "rexml/parsers/pullparser"
|
170
205
|
receiver.init
|
171
206
|
end
|
172
|
-
|
207
|
+
|
173
208
|
# Sets our parser
|
174
209
|
def init
|
175
210
|
@parser = REXML::Parsers::PullParser.new(@handle)
|
176
211
|
end
|
177
|
-
|
212
|
+
|
178
213
|
# Loop through the MARC records in the XML document
|
179
214
|
def each
|
180
|
-
|
181
|
-
return self.enum_for(:each)
|
182
|
-
else
|
215
|
+
if block_given?
|
183
216
|
while @parser.has_next?
|
184
217
|
event = @parser.pull
|
185
|
-
# if it's the start of a record element
|
186
|
-
if event.start_element?
|
218
|
+
# if it's the start of a record element
|
219
|
+
if event.start_element? && (strip_ns(event[0]) == "record")
|
187
220
|
yield build_record
|
188
221
|
end
|
189
|
-
end
|
222
|
+
end
|
223
|
+
else
|
224
|
+
enum_for(:each)
|
190
225
|
end
|
191
226
|
end
|
192
|
-
|
227
|
+
|
193
228
|
private
|
229
|
+
|
194
230
|
def strip_ns(str)
|
195
|
-
|
231
|
+
str.sub(/^.*:/, "")
|
196
232
|
end
|
197
|
-
|
233
|
+
|
198
234
|
# will accept parse events until a record has been built up
|
199
235
|
#
|
200
236
|
def build_record
|
@@ -202,15 +238,15 @@ module MARC
|
|
202
238
|
data_field = nil
|
203
239
|
control_field = nil
|
204
240
|
subfield = nil
|
205
|
-
text =
|
241
|
+
text = ""
|
206
242
|
attrs = nil
|
207
|
-
if Module.constants.index(
|
243
|
+
if Module.constants.index("Nokogiri") && @parser.is_a?(Nokogiri::XML::Reader)
|
208
244
|
datafield = nil
|
209
245
|
cursor = nil
|
210
246
|
open_elements = []
|
211
|
-
@parser.each do |
|
247
|
+
@parser.each do |node|
|
212
248
|
if node.value? && cursor
|
213
|
-
if cursor.is_a?(Symbol)
|
249
|
+
if cursor.is_a?(Symbol) && (cursor == :leader)
|
214
250
|
record.leader = node.value
|
215
251
|
else
|
216
252
|
cursor.value = node.value
|
@@ -230,195 +266,184 @@ module MARC
|
|
230
266
|
when "controlfield"
|
231
267
|
record << datafield if datafield
|
232
268
|
datafield = nil
|
233
|
-
control_field = MARC::ControlField.new(node.attribute(
|
269
|
+
control_field = MARC::ControlField.new(node.attribute("tag"))
|
234
270
|
record << control_field
|
235
271
|
cursor = control_field
|
236
|
-
when "datafield"
|
272
|
+
when "datafield"
|
237
273
|
record << datafield if datafield
|
238
274
|
datafield = nil
|
239
|
-
data_field = MARC::DataField.new(node.attribute(
|
275
|
+
data_field = MARC::DataField.new(node.attribute("tag"), node.attribute(IND1), node.attribute(IND2))
|
240
276
|
datafield = data_field
|
241
277
|
when "subfield"
|
242
278
|
raise "No datafield to add to" unless datafield
|
243
|
-
subfield = MARC::Subfield.new(node.attribute(
|
279
|
+
subfield = MARC::Subfield.new(node.attribute(CODE))
|
244
280
|
datafield.append(subfield)
|
245
281
|
cursor = subfield
|
246
282
|
when "record"
|
247
283
|
record << datafield if datafield
|
248
284
|
return record
|
249
|
-
end
|
250
|
-
#puts node.name
|
285
|
+
end
|
251
286
|
end
|
252
|
-
|
287
|
+
|
253
288
|
else
|
254
289
|
while @parser.has_next?
|
255
290
|
event = @parser.pull
|
256
291
|
|
257
292
|
if event.text?
|
258
|
-
text += REXML::Text
|
293
|
+
text += REXML::Text.unnormalize(event[0])
|
259
294
|
next
|
260
295
|
end
|
261
296
|
|
262
297
|
if event.start_element?
|
263
|
-
text =
|
298
|
+
text = ""
|
264
299
|
attrs = event[1]
|
265
300
|
case strip_ns(event[0])
|
266
|
-
when
|
267
|
-
text =
|
268
|
-
control_field = MARC::ControlField.new(attrs[
|
269
|
-
when
|
270
|
-
text =
|
271
|
-
data_field = MARC::DataField.new(attrs[
|
272
|
-
attrs[
|
273
|
-
when
|
274
|
-
text =
|
275
|
-
subfield = MARC::Subfield.new(attrs[
|
301
|
+
when "controlfield"
|
302
|
+
text = ""
|
303
|
+
control_field = MARC::ControlField.new(attrs[TAG])
|
304
|
+
when "datafield"
|
305
|
+
text = ""
|
306
|
+
data_field = MARC::DataField.new(attrs[TAG], attrs[IND1],
|
307
|
+
attrs[IND2])
|
308
|
+
when "subfield"
|
309
|
+
text = ""
|
310
|
+
subfield = MARC::Subfield.new(attrs[CODE])
|
276
311
|
end
|
277
312
|
end
|
278
313
|
|
279
314
|
if event.end_element?
|
280
315
|
case strip_ns(event[0])
|
281
|
-
when
|
316
|
+
when "leader"
|
282
317
|
record.leader = text
|
283
|
-
when
|
318
|
+
when "record"
|
284
319
|
return record
|
285
|
-
when
|
320
|
+
when "controlfield"
|
286
321
|
control_field.value = text
|
287
322
|
record.append(control_field)
|
288
|
-
when
|
323
|
+
when "datafield"
|
289
324
|
record.append(data_field)
|
290
|
-
when
|
325
|
+
when "subfield"
|
291
326
|
subfield.value = text
|
292
327
|
data_field.append(subfield)
|
293
328
|
end
|
294
329
|
end
|
295
330
|
end
|
296
331
|
end
|
297
|
-
end
|
298
|
-
end
|
299
|
-
|
300
|
-
# The JREXMLReader is really just here to set the load order for
|
301
|
-
# injecting the Java pull parser.
|
302
|
-
module JREXMLReader
|
303
|
-
|
304
|
-
def self.extended(receiver)
|
305
|
-
require 'rexml/document'
|
306
|
-
require 'rexml/parsers/pullparser'
|
307
|
-
require 'jrexml'
|
308
|
-
receiver.extend(REXMLReader)
|
309
332
|
end
|
310
333
|
end
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
334
|
+
|
315
335
|
unless defined? JRUBY_VERSION
|
316
336
|
module LibXMLReader
|
337
|
+
def self.extended(receiver)
|
338
|
+
require "xml"
|
339
|
+
receiver.init
|
340
|
+
end
|
317
341
|
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
def init
|
324
|
-
@ns = "http://www.loc.gov/MARC21/slim"
|
325
|
-
@parser = XML::Reader.io(@handle)
|
326
|
-
end
|
342
|
+
def init
|
343
|
+
@ns = "http://www.loc.gov/MARC21/slim"
|
344
|
+
@parser = XML::Reader.io(@handle)
|
345
|
+
end
|
327
346
|
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
347
|
+
def each
|
348
|
+
if block_given?
|
349
|
+
while @parser.read
|
350
|
+
if @parser.local_name == "record" && @parser.namespace_uri == @ns
|
351
|
+
yield build_record
|
352
|
+
end
|
353
|
+
end # while
|
354
|
+
else
|
355
|
+
enum_for(:each)
|
356
|
+
end
|
337
357
|
end
|
338
|
-
end # each
|
339
358
|
|
340
|
-
|
341
|
-
|
342
|
-
|
359
|
+
# each
|
360
|
+
|
361
|
+
def build_record
|
362
|
+
r = MARC::Record.new
|
363
|
+
until (@parser.local_name == "record") && (@parser.node_type == XML::Reader::TYPE_END_ELEMENT)
|
343
364
|
@parser.read
|
344
365
|
next if @parser.node_type == XML::Reader::TYPE_END_ELEMENT
|
345
366
|
case @parser.local_name
|
346
|
-
when
|
347
|
-
|
348
|
-
|
349
|
-
when
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
when
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
367
|
+
when "leader"
|
368
|
+
@parser.read
|
369
|
+
r.leader = @parser.value
|
370
|
+
when "controlfield"
|
371
|
+
tag = @parser[TAG]
|
372
|
+
@parser.read
|
373
|
+
r << MARC::ControlField.new(tag, @parser.value)
|
374
|
+
when "datafield"
|
375
|
+
data = MARC::DataField.new(@parser[TAG], @parser[IND1], @parser[IND2])
|
376
|
+
while @parser.read && !((@parser.local_name == "datafield") && (@parser.node_type == XML::Reader::TYPE_END_ELEMENT))
|
377
|
+
next if @parser.node_type == XML::Reader::TYPE_END_ELEMENT
|
378
|
+
case @parser.local_name
|
379
|
+
when "subfield"
|
380
|
+
code = @parser[CODE]
|
381
|
+
@parser.read
|
382
|
+
data.append(MARC::Subfield.new(code, @parser.value))
|
383
|
+
end
|
384
|
+
end
|
385
|
+
r << data
|
365
386
|
|
366
387
|
end # case
|
367
|
-
end #until
|
368
|
-
|
388
|
+
end # until
|
389
|
+
r
|
390
|
+
end
|
369
391
|
end
|
370
392
|
end
|
371
|
-
end
|
372
393
|
|
373
394
|
# The JrubySTAXReader uses native java calls to parse the incoming stream
|
374
395
|
# of marc-xml. It includes most of the work from GenericPullParser
|
375
396
|
|
376
397
|
if defined? JRUBY_VERSION
|
398
|
+
# *DEPRECATED*: JRubySTAXReader is deprecated and will be removed in a
|
399
|
+
# future version of ruby-marc. Please use NokogiriReader
|
400
|
+
# instead.
|
377
401
|
module JRubySTAXReader
|
378
402
|
include GenericPullParser
|
403
|
+
|
379
404
|
def self.extended(receiver)
|
380
|
-
require
|
405
|
+
require "java" # may only be neccesary in jruby 1.6
|
381
406
|
receiver.init
|
382
407
|
end
|
383
408
|
|
384
409
|
def init
|
385
|
-
|
386
|
-
|
387
|
-
|
410
|
+
warn "JRubySTAXReader is deprecated and will be removed in a future version of ruby-marc."
|
411
|
+
|
412
|
+
super
|
388
413
|
@factory = javax.xml.stream.XMLInputFactory.newInstance
|
389
414
|
@parser = @factory.createXMLStreamReader(@handle.to_inputstream)
|
390
415
|
end
|
391
416
|
|
392
417
|
# Loop through the MARC records in the XML document
|
393
|
-
def each(&block)
|
394
|
-
|
395
|
-
return self.enum_for(:each)
|
396
|
-
else
|
418
|
+
def each(&block)
|
419
|
+
if block
|
397
420
|
@block = block
|
398
421
|
parser_dispatch
|
422
|
+
else
|
423
|
+
enum_for(:each)
|
399
424
|
end
|
400
425
|
end
|
401
426
|
|
402
427
|
def parser_dispatch
|
403
|
-
while event = @parser.next
|
428
|
+
while (event = @parser.next) && (event != javax.xml.stream.XMLStreamConstants::END_DOCUMENT)
|
404
429
|
case event
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
430
|
+
when javax.xml.stream.XMLStreamConstants::START_ELEMENT
|
431
|
+
start_element_namespace(@parser.getLocalName, [], nil, @parser.getNamespaceURI, nil)
|
432
|
+
when javax.xml.stream.XMLStreamConstants::END_ELEMENT
|
433
|
+
end_element_namespace(@parser.getLocalName, @parser.getPrefix, @parser.getNamespaceURI)
|
434
|
+
when javax.xml.stream.XMLStreamConstants::CHARACTERS
|
435
|
+
characters(@parser.getText)
|
411
436
|
end
|
412
437
|
end
|
413
438
|
end
|
414
439
|
|
415
440
|
def attributes_to_hash(attributes)
|
416
441
|
hash = {}
|
417
|
-
@parser.getAttributeCount.times do |
|
442
|
+
@parser.getAttributeCount.times do |i|
|
418
443
|
hash[@parser.getAttributeName(i).getLocalPart] = @parser.getAttributeValue(i)
|
419
444
|
end
|
420
445
|
hash
|
421
|
-
end
|
446
|
+
end
|
422
447
|
end # end of module
|
423
|
-
end # end of if jruby
|
448
|
+
end # end of if jruby
|
424
449
|
end
|