marc 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,4 +1,4 @@
1
- RUBY_MARC_VERSION = '0.3.0'
1
+ RUBY_MARC_VERSION = '0.3.1'
2
2
 
3
3
  require 'rubygems'
4
4
  require 'rake'
@@ -13,72 +13,49 @@ module MARC
13
13
  # is arguable which is "best" on JRuby: Nokogiri or jrexml.
14
14
  module MagicReader
15
15
  def self.extended(receiver)
16
- # Start with a Nokogiri check
17
- begin
18
- require 'nokogiri'
19
- receiver.extend(NokogiriReader)
20
- rescue LoadError
21
- if RUBY_PLATFORM =~ /java/
22
- # If using JRuby, use JREXML if it's there
23
- begin
24
- receiver.extend(JREXMLReader)
25
- return
26
- rescue LoadError
27
- end
28
- end
29
- # If you're here, you're stuck with lowly REXML
30
- receiver.extend(REXMLReader)
31
- end
16
+ magic = MARC::XMLReader.best_available
17
+ case magic
18
+ when 'nokogiri' then receiver.extend(NokogiriReader)
19
+ when 'libxml' then receiver.extend(LibXMLReader)
20
+ when 'jstax' then receiver.extend(JRubySTAXReader)
21
+ when 'jrexml' then receiver.extend(JREXMLReader)
22
+ else receiver.extend(REXMLReader)
23
+ end
32
24
  end
33
25
  end
34
26
 
35
- # NokogiriReader uses the Nokogiri SAX Parser to quickly read
36
- # a MARCXML document. Because dynamically subclassing MARC::XMLReader
37
- # is a little ugly, we need to recreate all of the SAX event methods
38
- # from Nokogiri::XML::SAX::Document here rather than subclassing.
39
- module NokogiriReader
40
- def self.extended(receiver)
41
- require 'nokogiri'
42
- receiver.init
43
- end
44
-
45
- # Sets our instance variables for SAX parsing in Nokogiri and parser
46
- def init
47
- @record = {:record=>nil,:field=>nil,:subfield=>nil}
48
- @current_element = nil
49
- @ns = "http://www.loc.gov/MARC21/slim"
50
- @parser = Nokogiri::XML::SAX::Parser.new(self)
51
- end
27
+ module GenericPullParser
28
+ # Submodules must include
29
+ # self.extended()
30
+ # init()
31
+ # attributes_to_hash(attributes)
32
+ # each
52
33
 
53
- # Loop through the MARC records in the XML document
54
- def each(&block)
55
- @block = block
56
- @parser.parse(@handle)
57
- end
58
-
34
+
59
35
  # Returns our MARC::Record object to the #each block.
60
36
  def yield_record
61
37
  @block.call(@record[:record])
62
38
  @record[:record] = nil
63
39
  end
64
-
40
+
65
41
  def start_element_namespace name, attributes = [], prefix = nil, uri = nil, ns = {}
66
- attributes = attributes_to_hash(attributes)
67
- if uri == @ns
68
- case name.downcase
69
- when 'record' then @record[:record] = MARC::Record.new
70
- when 'leader' then @current_element = :leader
71
- when 'controlfield'
72
- @current_element=:field
73
- @record[:field] = MARC::ControlField.new(attributes["tag"])
74
- when 'datafield'
75
- @record[:field] = MARC::DataField.new(attributes["tag"], attributes['ind1'], attributes['ind2'])
76
- when 'subfield'
77
- @current_element=:subfield
78
- @record[:subfield] = MARC::Subfield.new(attributes['code'])
79
- end
80
- end
81
- end
42
+ attributes = attributes_to_hash(attributes)
43
+ if uri == @ns
44
+ case name.downcase
45
+ when 'record' then @record[:record] = MARC::Record.new
46
+ when 'leader' then @current_element = :leader
47
+ when 'controlfield'
48
+ @current_element=:field
49
+ @record[:field] = MARC::ControlField.new(attributes["tag"])
50
+ when 'datafield'
51
+ @record[:field] = MARC::DataField.new(attributes["tag"], attributes['ind1'], attributes['ind2'])
52
+ when 'subfield'
53
+ @current_element=:subfield
54
+ @record[:subfield] = MARC::Subfield.new(attributes['code'])
55
+ end
56
+ end
57
+ end
58
+
82
59
 
83
60
  def characters text
84
61
  case @current_element
@@ -90,7 +67,7 @@ module MARC
90
67
 
91
68
  def end_element_namespace name, prefix = nil, uri = nil
92
69
  @current_element = nil
93
- if uri == "http://www.loc.gov/MARC21/slim"
70
+ if uri == @ns
94
71
  case name.downcase
95
72
  when 'record' then yield_record
96
73
  when /(control|data)field/
@@ -103,8 +80,36 @@ module MARC
103
80
  @current_element = nil if @current_element == :subfield
104
81
  end
105
82
  end
83
+ end
84
+ end
85
+
86
+
87
+ # NokogiriReader uses the Nokogiri SAX Parser to quickly read
88
+ # a MARCXML document. Because dynamically subclassing MARC::XMLReader
89
+ # is a little ugly, we need to recreate all of the SAX event methods
90
+ # from Nokogiri::XML::SAX::Document here rather than subclassing.
91
+ module NokogiriReader
92
+ include GenericPullParser
93
+ def self.extended(receiver)
94
+ require 'nokogiri'
95
+ receiver.init
106
96
  end
107
97
 
98
+ # Sets our instance variables for SAX parsing in Nokogiri and parser
99
+ def init
100
+ @record = {:record=>nil,:field=>nil,:subfield=>nil}
101
+ @current_element = nil
102
+ @ns = "http://www.loc.gov/MARC21/slim"
103
+ @parser = Nokogiri::XML::SAX::Parser.new(self)
104
+ end
105
+
106
+ # Loop through the MARC records in the XML document
107
+ def each(&block)
108
+ @block = block
109
+ @parser.parse(@handle)
110
+ end
111
+
112
+
108
113
  def method_missing(methName, *args)
109
114
  sax_methods = [:xmldecl, :start_document, :end_document, :start_element,
110
115
  :end_element, :comment, :warning, :error, :cdata_block]
@@ -123,6 +128,8 @@ module MARC
123
128
  hash
124
129
  end
125
130
  end
131
+
132
+
126
133
 
127
134
  # The REXMLReader is the 'default' parser, since we can at least be
128
135
  # assured that REXML is probably there. It uses REXML's PullParser
@@ -285,4 +292,106 @@ module MARC
285
292
  receiver.extend(REXMLReader)
286
293
  end
287
294
  end
295
+
296
+ module LibXMLReader
297
+
298
+ def self.extended(receiver)
299
+ require 'xml'
300
+ receiver.init
301
+ end
302
+
303
+ def init
304
+ @ns = "http://www.loc.gov/MARC21/slim"
305
+ @parser = XML::Reader.io(@handle)
306
+ end
307
+
308
+ def each
309
+ while (@parser.read) do
310
+ if @parser.local_name == 'record' && @parser.namespace_uri == @ns
311
+ yield build_record
312
+ end
313
+ end # while
314
+ end # each
315
+
316
+ def build_record
317
+ r = MARC::Record.new()
318
+ until (@parser.local_name == 'record' and @parser.node_type == XML::Reader::TYPE_END_ELEMENT) do
319
+ @parser.read
320
+ next if @parser.node_type == XML::Reader::TYPE_END_ELEMENT
321
+ case @parser.local_name
322
+ when 'leader'
323
+ @parser.read
324
+ r.leader = @parser.value
325
+ when 'controlfield'
326
+ tag = @parser['tag']
327
+ @parser.read
328
+ r << MARC::ControlField.new(tag, @parser.value)
329
+ when 'datafield'
330
+ data = MARC::DataField.new(@parser['tag'], @parser['ind1'], @parser['ind2'])
331
+ while (@parser.read and !(@parser.local_name == 'datafield' and @parser.node_type == XML::Reader::TYPE_END_ELEMENT)) do
332
+ next if @parser.node_type == XML::Reader::TYPE_END_ELEMENT
333
+ case @parser.local_name
334
+ when 'subfield'
335
+ code =@parser['code']
336
+ @parser.read
337
+ data.append(MARC::Subfield.new(code, @parser.value))
338
+ end
339
+ end
340
+ r << data
341
+
342
+ end # case
343
+ end #until
344
+ return r
345
+ end
346
+ end
347
+
348
+ # The JrubySTAXReader uses native java calls to parse the incoming stream
349
+ # of marc-xml. It includes most of the work from GenericPullParser
350
+
351
+ if defined? JRUBY_VERSION
352
+ module JRubySTAXReader
353
+ include GenericPullParser
354
+ def self.extended(receiver)
355
+ include Java
356
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
357
+ include javax.xml.stream
358
+ receiver.init
359
+ end
360
+
361
+ def init
362
+ @record = {:record=>nil,:field=>nil,:subfield=>nil}
363
+ @current_element = nil
364
+ @ns = "http://www.loc.gov/MARC21/slim"
365
+ @factory = javax.xml.stream.XMLInputFactory.newInstance
366
+ @parser = @factory.createXMLStreamReader(@handle.to_inputstream)
367
+ end
368
+
369
+ # Loop through the MARC records in the XML document
370
+ def each(&block)
371
+ @block = block
372
+ parser_dispatch
373
+ end
374
+
375
+ def parser_dispatch
376
+ while event = @parser.next and event != XMLStreamConstants.END_DOCUMENT do
377
+ case event
378
+ when XMLStreamConstants.START_ELEMENT
379
+ start_element_namespace(@parser.getLocalName, [], nil, @parser.getNamespaceURI, nil)
380
+ when XMLStreamConstants.END_ELEMENT
381
+ end_element_namespace(@parser.getLocalName, @parser.getPrefix, @parser.getNamespaceURI)
382
+ when XMLStreamConstants.CHARACTERS
383
+ characters(@parser.getText)
384
+ end
385
+ end
386
+ end
387
+
388
+ def attributes_to_hash(attributes)
389
+ hash = {}
390
+ @parser.getAttributeCount.times do | i |
391
+ hash[@parser.getAttributeName(i).getLocalPart] = @parser.getAttributeValue(i)
392
+ end
393
+ hash
394
+ end
395
+ end # end of module
396
+ end # end of if jruby
288
397
  end
@@ -38,6 +38,8 @@ module MARC
38
38
  USE_REXML = 'rexml'
39
39
  USE_NOKOGIRI = 'nokogiri'
40
40
  USE_JREXML = 'jrexml'
41
+ USE_JSTAX = 'jstax'
42
+ USE_LIBXML = 'libxml'
41
43
  @@parser = USE_REXML
42
44
  attr_reader :parser
43
45
 
@@ -59,8 +61,14 @@ module MARC
59
61
  case parser
60
62
  when 'magic' then extend MagicReader
61
63
  when 'rexml' then extend REXMLReader
62
- when 'jrexml' then extend JREXMLReader
63
- when 'nokogiri' then extend NokogiriReader
64
+ when 'jrexml' then
65
+ raise ArgumentError, "jrexml only available under jruby" unless defined? JRUBY_VERSION
66
+ extend JREXMLReader
67
+ when 'nokogiri' then extend NokogiriReader
68
+ when 'jstax' then
69
+ raise ArgumentError, "jstax only available under jruby" unless defined? JRUBY_VERSION
70
+ extend JRubySTAXReader
71
+ when 'libxml' then extend LibXMLReader
64
72
  end
65
73
  end
66
74
 
@@ -87,22 +95,44 @@ module MARC
87
95
  # Returns the value of the best available parser
88
96
  def self.best_available
89
97
  parser = nil
90
- begin
91
- require 'nokogiri'
92
- parser = USE_NOKOGIRI
93
- rescue LoadError
94
- if RUBY_PLATFORM =~ /java/
98
+ jruby = [USE_JSTAX, USE_NOKOGIRI, USE_JREXML]
99
+ ruby = [USE_NOKOGIRI, USE_LIBXML]
100
+ if defined? JRUBY_VERSION
101
+ begin
102
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
103
+ parser = USE_JSTAX
104
+ rescue java.lang.ClassNotFoundException
105
+ end
106
+ unless parser
95
107
  begin
96
- require 'jrexml'
97
- parser = USE_JREXML
108
+ require 'nokogiri'
109
+ parser = USE_NOKOGIRI
98
110
  rescue LoadError
99
- parser = USE_REXML
100
111
  end
101
- else
102
- parser = USE_REXML
103
112
  end
104
- parser
105
- end
113
+ unless parser
114
+ begin
115
+ require 'jrexml'
116
+ parser = USE_JREXML
117
+ rescue LoadError
118
+ end
119
+ end
120
+ else
121
+ begin
122
+ require 'nokogiri'
123
+ parser = USE_NOKOGIRI
124
+ rescue LoadError
125
+ end
126
+ unless parser
127
+ begin
128
+ require 'xml'
129
+ parser = USE_LIBXML
130
+ rescue LoadError
131
+ end
132
+ end
133
+ end
134
+ parser = USE_REXML unless parser
135
+ parser
106
136
  end
107
137
 
108
138
  # Sets the best available parser as the default
@@ -34,7 +34,7 @@ class ParsersTest < Test::Unit::TestCase
34
34
  end
35
35
 
36
36
  def test_set_jrexml
37
- if RUBY_PLATFORM =~ /java/
37
+ if defined? JRUBY_VERSION
38
38
  begin
39
39
  require 'jrexml'
40
40
  reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_JREXML)
@@ -59,6 +59,35 @@ class ParsersTest < Test::Unit::TestCase
59
59
  end
60
60
  end
61
61
 
62
+ def test_set_jstax
63
+ if defined? JRUBY_VERSION
64
+ begin
65
+ assert_equal("rexml", MARC::XMLReader.parser)
66
+ reader = MARC::XMLReader.new('test/one.xml')
67
+ assert_kind_of(REXML::Parsers::PullParser, reader.parser)
68
+
69
+ reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_JSTAX)
70
+ assert_kind_of(Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl, reader.parser)
71
+ assert_equal("rexml", MARC::XMLReader.parser)
72
+ reader = MARC::XMLReader.new('test/one.xml', :parser=>'jstax')
73
+ assert_kind_of(Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl, reader.parser)
74
+ assert_equal("rexml", MARC::XMLReader.parser)
75
+ MARC::XMLReader.parser=MARC::XMLReader::USE_JSTAX
76
+ assert_equal("jstax", MARC::XMLReader.parser)
77
+ reader = MARC::XMLReader.new('test/one.xml')
78
+ assert_kind_of(Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl, reader.parser)
79
+ MARC::XMLReader.parser="jstax"
80
+ assert_equal("jstax", MARC::XMLReader.parser)
81
+ reader = MARC::XMLReader.new('test/one.xml')
82
+ assert_kind_of(Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl, reader.parser)
83
+ rescue java.lang.ClassNotFoundException
84
+ puts "\njavax.xml.stream not available, skipping 'test_set_jstax'.\n"
85
+ end
86
+ else
87
+ puts "\nTest not being run from JRuby, skipping 'test_set_jstax'.\n"
88
+ end
89
+ end
90
+
62
91
  def test_set_rexml
63
92
  reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_REXML)
64
93
  assert_kind_of(REXML::Parsers::PullParser, reader.parser)
@@ -77,13 +106,8 @@ class ParsersTest < Test::Unit::TestCase
77
106
  end
78
107
 
79
108
  def test_set_magic
80
- magic_parser = nil
81
- begin
82
- require 'nokogiri'
83
- magic_parser = Nokogiri::XML::SAX::Parser
84
- rescue LoadError
85
- magic_parser = REXML::Parsers::PullParser
86
- end
109
+ best = choose_best_available_parser
110
+ magic_parser = best[:parser]
87
111
  puts "\nTesting 'test_set_magic' for parser: #{magic_parser}"
88
112
  reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_BEST_AVAILABLE)
89
113
  assert_kind_of(magic_parser, reader.parser)
@@ -102,23 +126,9 @@ class ParsersTest < Test::Unit::TestCase
102
126
  end
103
127
 
104
128
  def test_parser_set_convenience_methods
105
- parser_name = nil
106
- parser = nil
107
- begin
108
- require 'nokogiri'
109
- parser_name = 'nokogiri'
110
- parser = Nokogiri::XML::SAX::Parser
111
- rescue LoadError
112
- parser = REXML::Parsers::PullParser
113
- parser = 'rexml'
114
- if RUBY_PLATFORM =~ /java/
115
- begin
116
- require 'jrexml'
117
- parser_name = 'jrexml'
118
- rescue LoadError
119
- end
120
- end
121
- end
129
+ best = choose_best_available_parser
130
+ parser = best[:parser]
131
+ parser_name = best[:parser_name]
122
132
  assert_equal(parser_name, MARC::XMLReader.best_available)
123
133
  MARC::XMLReader.best_available!
124
134
  reader = MARC::XMLReader.new('test/one.xml')
@@ -133,7 +143,7 @@ class ParsersTest < Test::Unit::TestCase
133
143
  else
134
144
  puts "\nNokogiri not loaded, skipping convenience method test.\n"
135
145
  end
136
- if RUBY_PLATFORM =~ /java/
146
+ if defined? JRUBY_VERSION
137
147
  begin
138
148
  require 'jrexml'
139
149
  MARC::XMLReader.jrexml!
@@ -151,4 +161,49 @@ class ParsersTest < Test::Unit::TestCase
151
161
  MARC::XMLReader.parser=MARC::XMLReader::USE_REXML
152
162
  end
153
163
 
164
+ def choose_best_available_parser
165
+ parser_name = nil
166
+ parser = nil
167
+ if defined? JRUBY_VERSION
168
+ begin
169
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
170
+ parser_name = "jstax"
171
+ parser = Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl
172
+ rescue java.lang.ClassNotFoundException
173
+ end
174
+ end
175
+ unless parser
176
+ begin
177
+ require 'nokogiri'
178
+ parser_name = 'nokogiri'
179
+ parser = Nokogiri::XML::SAX::Parser
180
+ rescue LoadError
181
+ end
182
+ end
183
+ unless parser
184
+ if !defined? JRUBY_VERSION
185
+ begin
186
+ require 'xml'
187
+ parser_name = 'libxml'
188
+ parser = LibXML::XML::Reader
189
+ rescue LoadError
190
+ end
191
+ else
192
+ if defined? JRUBY_VERSION
193
+ begin
194
+ require 'jrexml'
195
+ parser_name = 'jrexml'
196
+ parser = REXML::Parsers::PullParser
197
+ rescue LoadError
198
+ end
199
+ end
200
+ end
201
+ unless parser
202
+ parser = REXML::Parsers::PullParser
203
+ parser_name = 'rexml'
204
+ end
205
+ end
206
+ return {:parser=>parser, :parser_name=>parser_name}
207
+ end
208
+
154
209
  end
@@ -10,12 +10,22 @@ class XMLTest < Test::Unit::TestCase
10
10
  @parsers << :nokogiri
11
11
  rescue LoadError
12
12
  end
13
- if RUBY_PLATFORM =~ /java/
13
+ begin
14
+ require 'xml'
15
+ @parsers << :libxml
16
+ rescue LoadError
17
+ end
18
+ if defined? JRUBY_VERSION
14
19
  begin
15
20
  require 'jrexml'
16
21
  @parsers << :jrexml
17
22
  rescue LoadError
18
23
  end
24
+ begin
25
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
26
+ @parsers << :jstax
27
+ rescue java.lang.ClassNotFoundException
28
+ end
19
29
  end
20
30
  end
21
31
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Clarke
@@ -12,7 +12,7 @@ autorequire: marc
12
12
  bindir: bin
13
13
  cert_chain: []
14
14
 
15
- date: 2009-09-23 00:00:00 -04:00
15
+ date: 2009-12-14 00:00:00 -05:00
16
16
  default_executable:
17
17
  dependencies: []
18
18