marc 1.0.4 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
- data/.github/workflows/ruby.yml +24 -0
- data/.gitignore +17 -0
- data/.standard.yml +1 -0
- data/{Changes → CHANGELOG.md} +106 -29
- data/Gemfile +15 -0
- data/README.md +240 -47
- data/Rakefile +14 -14
- data/bin/marc +14 -0
- data/bin/marc2xml +17 -0
- data/examples/xml2marc.rb +10 -0
- data/lib/marc/constants.rb +3 -3
- data/lib/marc/controlfield.rb +35 -23
- data/lib/marc/datafield.rb +70 -63
- data/lib/marc/dublincore.rb +59 -41
- data/lib/marc/exception.rb +9 -1
- data/lib/marc/jsonl_reader.rb +33 -0
- data/lib/marc/jsonl_writer.rb +44 -0
- data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
- data/lib/marc/marc8/to_unicode.rb +80 -86
- data/lib/marc/reader.rb +119 -121
- data/lib/marc/record.rb +72 -62
- data/lib/marc/subfield.rb +12 -10
- data/lib/marc/unsafe_xmlwriter.rb +93 -0
- data/lib/marc/version.rb +1 -1
- data/lib/marc/writer.rb +27 -30
- data/lib/marc/xml_parsers.rb +222 -197
- data/lib/marc/xmlreader.rb +131 -114
- data/lib/marc/xmlwriter.rb +93 -81
- data/lib/marc.rb +20 -18
- data/marc.gemspec +23 -0
- data/test/marc8/tc_marc8_mapping.rb +3 -3
- data/test/marc8/tc_to_unicode.rb +28 -32
- data/test/messed_up_leader.xml +9 -0
- data/test/tc_controlfield.rb +37 -34
- data/test/tc_datafield.rb +65 -60
- data/test/tc_dublincore.rb +9 -11
- data/test/tc_hash.rb +10 -13
- data/test/tc_jsonl.rb +19 -0
- data/test/tc_marchash.rb +17 -21
- data/test/tc_parsers.rb +108 -144
- data/test/tc_reader.rb +35 -36
- data/test/tc_reader_char_encodings.rb +149 -169
- data/test/tc_record.rb +143 -148
- data/test/tc_subfield.rb +14 -13
- data/test/tc_unsafe_xml.rb +95 -0
- data/test/tc_writer.rb +101 -108
- data/test/tc_xml.rb +99 -87
- data/test/tc_xml_error_handling.rb +7 -8
- data/test/ts_marc.rb +8 -8
- metadata +94 -9
data/lib/marc/xml_parsers.rb
CHANGED
@@ -1,152 +1,187 @@
|
|
1
1
|
module MARC
|
2
2
|
# Exception class to be thrown when an XML parser
|
3
3
|
# encounters an unrecoverable error.
|
4
|
-
class XMLParseError < StandardError
|
4
|
+
class XMLParseError < StandardError
|
5
|
+
end
|
6
|
+
|
7
|
+
IND1 = "ind1".freeze
|
8
|
+
IND2 = "ind2".freeze
|
9
|
+
TAG = "tag".freeze
|
10
|
+
CODE = "code".freeze
|
5
11
|
|
6
12
|
# The MagicReader will try to use the best available XML Parser at the
|
7
|
-
# time of initialization.
|
13
|
+
# time of initialization.
|
8
14
|
# The order is currently:
|
9
15
|
# * Nokogiri
|
10
|
-
# *
|
16
|
+
# * libxml-ruby (MRI only) ** DEPRECATED **
|
17
|
+
# * jstax (JRuby only) ** DEPRECATED **
|
11
18
|
# * rexml
|
12
19
|
#
|
13
20
|
# With the idea that other parsers could be added as their modules are
|
14
21
|
# added. Realistically, this list should be limited to stream-based
|
15
22
|
# parsers. The magic should be used selectively, however. After all,
|
16
23
|
# one project's definition of 'best' might not apply universally. It
|
17
|
-
# is arguable which is "best" on JRuby: Nokogiri or jrexml.
|
18
|
-
module MagicReader
|
24
|
+
# is arguable which is "best" on JRuby: Nokogiri or jrexml.
|
25
|
+
module MagicReader
|
19
26
|
def self.extended(receiver)
|
20
27
|
magic = MARC::XMLReader.best_available
|
21
28
|
case magic
|
22
|
-
when
|
23
|
-
|
24
|
-
when
|
25
|
-
|
29
|
+
when "nokogiri"
|
30
|
+
receiver.extend(NokogiriReader)
|
31
|
+
when "libxml"
|
32
|
+
warn "libxml support will be removed in version 1.3. Prefer nokogiri instead"
|
33
|
+
receiver.extend(LibXMLReader)
|
34
|
+
when "jstax"
|
35
|
+
warn "jstax support will be removed in version 1.3. Prefer nokogiri instead"
|
36
|
+
receiver.extend(JRubySTAXReader)
|
37
|
+
when "jrexml"
|
38
|
+
warn "jrexml support is broken upstream; falling back to just rexml. Prefer nokogiri instead"
|
39
|
+
receiver.extend(REXMLReader)
|
26
40
|
else receiver.extend(REXMLReader)
|
27
41
|
end
|
28
42
|
end
|
29
43
|
end
|
30
|
-
|
44
|
+
|
31
45
|
module GenericPullParser
|
32
46
|
# Submodules must include
|
33
47
|
# self.extended()
|
34
48
|
# init()
|
35
49
|
# attributes_to_hash(attributes)
|
36
50
|
# each
|
37
|
-
|
51
|
+
|
52
|
+
REC_TAG = "record".freeze
|
53
|
+
LEAD_TAG = "leader".freeze
|
54
|
+
CF_TAG = "controlfield".freeze
|
55
|
+
DF_TAG = "datafield".freeze
|
56
|
+
SF_TAG = "subfield".freeze
|
57
|
+
|
58
|
+
def init
|
59
|
+
@record = {record: nil, leader: "", field: nil, subfield: nil}
|
60
|
+
@current_element = nil
|
61
|
+
@ns = "http://www.loc.gov/MARC21/slim"
|
62
|
+
end
|
38
63
|
|
39
64
|
# Returns our MARC::Record object to the #each block.
|
40
65
|
def yield_record
|
41
|
-
@
|
66
|
+
if @record[:record].valid?
|
67
|
+
@block.call(@record[:record])
|
68
|
+
elsif @error_handler
|
69
|
+
@error_handler.call(self, @record[:record], @block)
|
70
|
+
else
|
71
|
+
raise MARC::RecordException, @record[:record]
|
72
|
+
end
|
73
|
+
ensure
|
42
74
|
@record[:record] = nil
|
43
|
-
end
|
75
|
+
end
|
44
76
|
|
45
77
|
def start_element_namespace name, attributes = [], prefix = nil, uri = nil, ns = {}
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
def characters text
|
78
|
+
attributes = attributes_to_hash(attributes)
|
79
|
+
if (uri == @ns) || @ignore_namespace
|
80
|
+
case name.downcase
|
81
|
+
when SF_TAG
|
82
|
+
@current_element = :subfield
|
83
|
+
@record[:subfield] = MARC::Subfield.new(attributes[CODE])
|
84
|
+
when DF_TAG
|
85
|
+
@record[:field] = MARC::DataField.new(attributes[TAG], attributes[IND1], attributes[IND2])
|
86
|
+
when CF_TAG
|
87
|
+
@current_element = :field
|
88
|
+
@record[:field] = MARC::ControlField.new(attributes[TAG])
|
89
|
+
when LEAD_TAG then @current_element = :leader
|
90
|
+
when REC_TAG then @record[:record] = MARC::Record.new
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def characters(text)
|
65
96
|
case @current_element
|
66
|
-
when :leader then @record[:record].leader = text
|
67
|
-
when :field then @record[:field].value << text
|
68
97
|
when :subfield then @record[:subfield].value << text
|
98
|
+
when :field then @record[:field].value << text
|
99
|
+
when :leader then @record[:leader] << text
|
69
100
|
end
|
70
101
|
end
|
71
102
|
|
72
|
-
def end_element_namespace
|
103
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
73
104
|
@current_element = nil
|
74
|
-
if uri == @ns
|
105
|
+
if (uri == @ns) || @ignore_namespace
|
75
106
|
case name.downcase
|
76
|
-
when
|
77
|
-
when /(control|data)field/
|
78
|
-
@record[:record] << @record[:field]
|
79
|
-
@record[:field] = nil
|
80
|
-
@current_element = nil if @current_element == :field
|
81
|
-
when 'subfield'
|
107
|
+
when SF_TAG
|
82
108
|
@record[:field].append(@record[:subfield])
|
83
109
|
@record[:subfield] = nil
|
84
110
|
@current_element = nil if @current_element == :subfield
|
111
|
+
when DF_TAG, CF_TAG
|
112
|
+
@record[:record] << @record[:field]
|
113
|
+
@record[:field] = nil
|
114
|
+
@current_element = nil if @current_element == :field
|
115
|
+
when REC_TAG then yield_record
|
116
|
+
when LEAD_TAG
|
117
|
+
@record[:record].leader = @record[:leader]
|
118
|
+
@record[:leader] = ""
|
119
|
+
@current_element = nil if @current_element == :leader
|
85
120
|
end
|
86
121
|
end
|
87
|
-
end
|
122
|
+
end
|
88
123
|
end
|
89
|
-
|
90
124
|
|
91
125
|
# NokogiriReader uses the Nokogiri SAX Parser to quickly read
|
92
126
|
# a MARCXML document. Because dynamically subclassing MARC::XMLReader
|
93
127
|
# is a little ugly, we need to recreate all of the SAX event methods
|
94
|
-
# from Nokogiri::XML::SAX::Document here rather than subclassing.
|
95
|
-
module NokogiriReader
|
128
|
+
# from Nokogiri::XML::SAX::Document here rather than subclassing.
|
129
|
+
module NokogiriReader
|
96
130
|
include GenericPullParser
|
131
|
+
|
97
132
|
def self.extended(receiver)
|
98
|
-
require
|
133
|
+
require "nokogiri"
|
99
134
|
receiver.init
|
100
135
|
end
|
101
|
-
|
136
|
+
|
102
137
|
# Sets our instance variables for SAX parsing in Nokogiri and parser
|
103
138
|
def init
|
104
|
-
|
105
|
-
@
|
106
|
-
@ns = "http://www.loc.gov/MARC21/slim"
|
107
|
-
@parser = Nokogiri::XML::SAX::Parser.new(self)
|
139
|
+
super
|
140
|
+
@parser = Nokogiri::XML::SAX::Parser.new(self)
|
108
141
|
end
|
109
|
-
|
142
|
+
|
110
143
|
# Loop through the MARC records in the XML document
|
111
|
-
def each(&block)
|
112
|
-
|
113
|
-
return self.enum_for(:each)
|
114
|
-
else
|
144
|
+
def each(&block)
|
145
|
+
if block
|
115
146
|
@block = block
|
116
147
|
@parser.parse(@handle)
|
148
|
+
else
|
149
|
+
enum_for(:each)
|
117
150
|
end
|
118
151
|
end
|
119
152
|
|
120
153
|
def error(evt)
|
121
154
|
raise(XMLParseError, "XML parsing error: #{evt}")
|
122
155
|
end
|
123
|
-
|
124
156
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
157
|
+
SAX_METHODS = [:xmldecl, :start_document, :end_document, :start_element,
|
158
|
+
:end_element, :comment, :warning, :error, :cdata_block, :processing_instruction]
|
159
|
+
|
160
|
+
def method_missing(method_name, *args)
|
161
|
+
unless SAX_METHODS.include?(method_name)
|
162
|
+
raise NoMethodError.new("undefined method '#{method_name} for #{self}", "no_meth")
|
130
163
|
end
|
131
164
|
end
|
132
|
-
|
133
|
-
private
|
134
|
-
|
135
|
-
def attributes_to_hash(attributes)
|
136
|
-
hash = {}
|
137
|
-
attributes.each do | att |
|
138
|
-
hash[att.localname] = att.value
|
139
|
-
end
|
140
|
-
hash
|
141
|
-
end
|
142
|
-
end
|
143
165
|
|
166
|
+
def respond_to_missing?(method_name, include_private = false)
|
167
|
+
SAX_METHODS.include?(method_name) || super
|
168
|
+
end
|
169
|
+
|
170
|
+
private
|
171
|
+
|
172
|
+
def attributes_to_hash(attributes)
|
173
|
+
hash = {}
|
174
|
+
attributes.each do |att|
|
175
|
+
hash[att.localname] = att.value
|
176
|
+
end
|
177
|
+
hash
|
178
|
+
end
|
179
|
+
end
|
144
180
|
|
145
|
-
|
146
181
|
# The REXMLReader is the 'default' parser, since we can at least be
|
147
182
|
# assured that REXML is probably there. It uses REXML's PullParser
|
148
183
|
# to handle larger document sizes without consuming insane amounts of
|
149
|
-
# memory, but it's still REXML (read: slow), so it's a good idea to
|
184
|
+
# memory, but it's still REXML (read: slow), so it's a good idea to
|
150
185
|
# use an alternative parser if available. If you don't know the best
|
151
186
|
# parser available, you can use the MagicReader or set:
|
152
187
|
#
|
@@ -158,43 +193,44 @@ module MARC
|
|
158
193
|
#
|
159
194
|
# or
|
160
195
|
#
|
161
|
-
# reader = MARC::XMLReader.new(fh, :parser=>"magic")
|
196
|
+
# reader = MARC::XMLReader.new(fh, :parser=>"magic")
|
162
197
|
# (or the constant)
|
163
198
|
#
|
164
199
|
# which will cascade down to REXML if nothing better is found.
|
165
|
-
#
|
200
|
+
#
|
166
201
|
module REXMLReader
|
167
202
|
def self.extended(receiver)
|
168
|
-
require
|
169
|
-
require
|
203
|
+
require "rexml/document"
|
204
|
+
require "rexml/parsers/pullparser"
|
170
205
|
receiver.init
|
171
206
|
end
|
172
|
-
|
207
|
+
|
173
208
|
# Sets our parser
|
174
209
|
def init
|
175
210
|
@parser = REXML::Parsers::PullParser.new(@handle)
|
176
211
|
end
|
177
|
-
|
212
|
+
|
178
213
|
# Loop through the MARC records in the XML document
|
179
214
|
def each
|
180
|
-
|
181
|
-
return self.enum_for(:each)
|
182
|
-
else
|
215
|
+
if block_given?
|
183
216
|
while @parser.has_next?
|
184
217
|
event = @parser.pull
|
185
|
-
# if it's the start of a record element
|
186
|
-
if event.start_element?
|
218
|
+
# if it's the start of a record element
|
219
|
+
if event.start_element? && (strip_ns(event[0]) == "record")
|
187
220
|
yield build_record
|
188
221
|
end
|
189
|
-
end
|
222
|
+
end
|
223
|
+
else
|
224
|
+
enum_for(:each)
|
190
225
|
end
|
191
226
|
end
|
192
|
-
|
227
|
+
|
193
228
|
private
|
229
|
+
|
194
230
|
def strip_ns(str)
|
195
|
-
|
231
|
+
str.sub(/^.*:/, "")
|
196
232
|
end
|
197
|
-
|
233
|
+
|
198
234
|
# will accept parse events until a record has been built up
|
199
235
|
#
|
200
236
|
def build_record
|
@@ -202,15 +238,15 @@ module MARC
|
|
202
238
|
data_field = nil
|
203
239
|
control_field = nil
|
204
240
|
subfield = nil
|
205
|
-
text =
|
241
|
+
text = ""
|
206
242
|
attrs = nil
|
207
|
-
if Module.constants.index(
|
243
|
+
if Module.constants.index("Nokogiri") && @parser.is_a?(Nokogiri::XML::Reader)
|
208
244
|
datafield = nil
|
209
245
|
cursor = nil
|
210
246
|
open_elements = []
|
211
|
-
@parser.each do |
|
247
|
+
@parser.each do |node|
|
212
248
|
if node.value? && cursor
|
213
|
-
if cursor.is_a?(Symbol)
|
249
|
+
if cursor.is_a?(Symbol) && (cursor == :leader)
|
214
250
|
record.leader = node.value
|
215
251
|
else
|
216
252
|
cursor.value = node.value
|
@@ -230,195 +266,184 @@ module MARC
|
|
230
266
|
when "controlfield"
|
231
267
|
record << datafield if datafield
|
232
268
|
datafield = nil
|
233
|
-
control_field = MARC::ControlField.new(node.attribute(
|
269
|
+
control_field = MARC::ControlField.new(node.attribute("tag"))
|
234
270
|
record << control_field
|
235
271
|
cursor = control_field
|
236
|
-
when "datafield"
|
272
|
+
when "datafield"
|
237
273
|
record << datafield if datafield
|
238
274
|
datafield = nil
|
239
|
-
data_field = MARC::DataField.new(node.attribute(
|
275
|
+
data_field = MARC::DataField.new(node.attribute("tag"), node.attribute(IND1), node.attribute(IND2))
|
240
276
|
datafield = data_field
|
241
277
|
when "subfield"
|
242
278
|
raise "No datafield to add to" unless datafield
|
243
|
-
subfield = MARC::Subfield.new(node.attribute(
|
279
|
+
subfield = MARC::Subfield.new(node.attribute(CODE))
|
244
280
|
datafield.append(subfield)
|
245
281
|
cursor = subfield
|
246
282
|
when "record"
|
247
283
|
record << datafield if datafield
|
248
284
|
return record
|
249
|
-
end
|
250
|
-
#puts node.name
|
285
|
+
end
|
251
286
|
end
|
252
|
-
|
287
|
+
|
253
288
|
else
|
254
289
|
while @parser.has_next?
|
255
290
|
event = @parser.pull
|
256
291
|
|
257
292
|
if event.text?
|
258
|
-
text += REXML::Text
|
293
|
+
text += REXML::Text.unnormalize(event[0])
|
259
294
|
next
|
260
295
|
end
|
261
296
|
|
262
297
|
if event.start_element?
|
263
|
-
text =
|
298
|
+
text = ""
|
264
299
|
attrs = event[1]
|
265
300
|
case strip_ns(event[0])
|
266
|
-
when
|
267
|
-
text =
|
268
|
-
control_field = MARC::ControlField.new(attrs[
|
269
|
-
when
|
270
|
-
text =
|
271
|
-
data_field = MARC::DataField.new(attrs[
|
272
|
-
attrs[
|
273
|
-
when
|
274
|
-
text =
|
275
|
-
subfield = MARC::Subfield.new(attrs[
|
301
|
+
when "controlfield"
|
302
|
+
text = ""
|
303
|
+
control_field = MARC::ControlField.new(attrs[TAG])
|
304
|
+
when "datafield"
|
305
|
+
text = ""
|
306
|
+
data_field = MARC::DataField.new(attrs[TAG], attrs[IND1],
|
307
|
+
attrs[IND2])
|
308
|
+
when "subfield"
|
309
|
+
text = ""
|
310
|
+
subfield = MARC::Subfield.new(attrs[CODE])
|
276
311
|
end
|
277
312
|
end
|
278
313
|
|
279
314
|
if event.end_element?
|
280
315
|
case strip_ns(event[0])
|
281
|
-
when
|
316
|
+
when "leader"
|
282
317
|
record.leader = text
|
283
|
-
when
|
318
|
+
when "record"
|
284
319
|
return record
|
285
|
-
when
|
320
|
+
when "controlfield"
|
286
321
|
control_field.value = text
|
287
322
|
record.append(control_field)
|
288
|
-
when
|
323
|
+
when "datafield"
|
289
324
|
record.append(data_field)
|
290
|
-
when
|
325
|
+
when "subfield"
|
291
326
|
subfield.value = text
|
292
327
|
data_field.append(subfield)
|
293
328
|
end
|
294
329
|
end
|
295
330
|
end
|
296
331
|
end
|
297
|
-
end
|
298
|
-
end
|
299
|
-
|
300
|
-
# The JREXMLReader is really just here to set the load order for
|
301
|
-
# injecting the Java pull parser.
|
302
|
-
module JREXMLReader
|
303
|
-
|
304
|
-
def self.extended(receiver)
|
305
|
-
require 'rexml/document'
|
306
|
-
require 'rexml/parsers/pullparser'
|
307
|
-
require 'jrexml'
|
308
|
-
receiver.extend(REXMLReader)
|
309
332
|
end
|
310
333
|
end
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
334
|
+
|
315
335
|
unless defined? JRUBY_VERSION
|
316
336
|
module LibXMLReader
|
337
|
+
def self.extended(receiver)
|
338
|
+
require "xml"
|
339
|
+
receiver.init
|
340
|
+
end
|
317
341
|
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
def init
|
324
|
-
@ns = "http://www.loc.gov/MARC21/slim"
|
325
|
-
@parser = XML::Reader.io(@handle)
|
326
|
-
end
|
342
|
+
def init
|
343
|
+
@ns = "http://www.loc.gov/MARC21/slim"
|
344
|
+
@parser = XML::Reader.io(@handle)
|
345
|
+
end
|
327
346
|
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
347
|
+
def each
|
348
|
+
if block_given?
|
349
|
+
while @parser.read
|
350
|
+
if @parser.local_name == "record" && @parser.namespace_uri == @ns
|
351
|
+
yield build_record
|
352
|
+
end
|
353
|
+
end # while
|
354
|
+
else
|
355
|
+
enum_for(:each)
|
356
|
+
end
|
337
357
|
end
|
338
|
-
end # each
|
339
358
|
|
340
|
-
|
341
|
-
|
342
|
-
|
359
|
+
# each
|
360
|
+
|
361
|
+
def build_record
|
362
|
+
r = MARC::Record.new
|
363
|
+
until (@parser.local_name == "record") && (@parser.node_type == XML::Reader::TYPE_END_ELEMENT)
|
343
364
|
@parser.read
|
344
365
|
next if @parser.node_type == XML::Reader::TYPE_END_ELEMENT
|
345
366
|
case @parser.local_name
|
346
|
-
when
|
347
|
-
|
348
|
-
|
349
|
-
when
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
when
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
367
|
+
when "leader"
|
368
|
+
@parser.read
|
369
|
+
r.leader = @parser.value
|
370
|
+
when "controlfield"
|
371
|
+
tag = @parser[TAG]
|
372
|
+
@parser.read
|
373
|
+
r << MARC::ControlField.new(tag, @parser.value)
|
374
|
+
when "datafield"
|
375
|
+
data = MARC::DataField.new(@parser[TAG], @parser[IND1], @parser[IND2])
|
376
|
+
while @parser.read && !((@parser.local_name == "datafield") && (@parser.node_type == XML::Reader::TYPE_END_ELEMENT))
|
377
|
+
next if @parser.node_type == XML::Reader::TYPE_END_ELEMENT
|
378
|
+
case @parser.local_name
|
379
|
+
when "subfield"
|
380
|
+
code = @parser[CODE]
|
381
|
+
@parser.read
|
382
|
+
data.append(MARC::Subfield.new(code, @parser.value))
|
383
|
+
end
|
384
|
+
end
|
385
|
+
r << data
|
365
386
|
|
366
387
|
end # case
|
367
|
-
end #until
|
368
|
-
|
388
|
+
end # until
|
389
|
+
r
|
390
|
+
end
|
369
391
|
end
|
370
392
|
end
|
371
|
-
end
|
372
393
|
|
373
394
|
# The JrubySTAXReader uses native java calls to parse the incoming stream
|
374
395
|
# of marc-xml. It includes most of the work from GenericPullParser
|
375
396
|
|
376
397
|
if defined? JRUBY_VERSION
|
398
|
+
# *DEPRECATED*: JRubySTAXReader is deprecated and will be removed in a
|
399
|
+
# future version of ruby-marc. Please use NokogiriReader
|
400
|
+
# instead.
|
377
401
|
module JRubySTAXReader
|
378
402
|
include GenericPullParser
|
403
|
+
|
379
404
|
def self.extended(receiver)
|
380
|
-
require
|
405
|
+
require "java" # may only be neccesary in jruby 1.6
|
381
406
|
receiver.init
|
382
407
|
end
|
383
408
|
|
384
409
|
def init
|
385
|
-
|
386
|
-
|
387
|
-
|
410
|
+
warn "JRubySTAXReader is deprecated and will be removed in a future version of ruby-marc."
|
411
|
+
|
412
|
+
super
|
388
413
|
@factory = javax.xml.stream.XMLInputFactory.newInstance
|
389
414
|
@parser = @factory.createXMLStreamReader(@handle.to_inputstream)
|
390
415
|
end
|
391
416
|
|
392
417
|
# Loop through the MARC records in the XML document
|
393
|
-
def each(&block)
|
394
|
-
|
395
|
-
return self.enum_for(:each)
|
396
|
-
else
|
418
|
+
def each(&block)
|
419
|
+
if block
|
397
420
|
@block = block
|
398
421
|
parser_dispatch
|
422
|
+
else
|
423
|
+
enum_for(:each)
|
399
424
|
end
|
400
425
|
end
|
401
426
|
|
402
427
|
def parser_dispatch
|
403
|
-
while event = @parser.next
|
428
|
+
while (event = @parser.next) && (event != javax.xml.stream.XMLStreamConstants::END_DOCUMENT)
|
404
429
|
case event
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
430
|
+
when javax.xml.stream.XMLStreamConstants::START_ELEMENT
|
431
|
+
start_element_namespace(@parser.getLocalName, [], nil, @parser.getNamespaceURI, nil)
|
432
|
+
when javax.xml.stream.XMLStreamConstants::END_ELEMENT
|
433
|
+
end_element_namespace(@parser.getLocalName, @parser.getPrefix, @parser.getNamespaceURI)
|
434
|
+
when javax.xml.stream.XMLStreamConstants::CHARACTERS
|
435
|
+
characters(@parser.getText)
|
411
436
|
end
|
412
437
|
end
|
413
438
|
end
|
414
439
|
|
415
440
|
def attributes_to_hash(attributes)
|
416
441
|
hash = {}
|
417
|
-
@parser.getAttributeCount.times do |
|
442
|
+
@parser.getAttributeCount.times do |i|
|
418
443
|
hash[@parser.getAttributeName(i).getLocalPart] = @parser.getAttributeValue(i)
|
419
444
|
end
|
420
445
|
hash
|
421
|
-
end
|
446
|
+
end
|
422
447
|
end # end of module
|
423
|
-
end # end of if jruby
|
448
|
+
end # end of if jruby
|
424
449
|
end
|