marc 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1,4 +1,4 @@
1
- RUBY_MARC_VERSION = '0.3.0'
1
+ RUBY_MARC_VERSION = '0.3.1'
2
2
 
3
3
  require 'rubygems'
4
4
  require 'rake'
@@ -13,72 +13,49 @@ module MARC
13
13
  # is arguable which is "best" on JRuby: Nokogiri or jrexml.
14
14
  module MagicReader
15
15
  def self.extended(receiver)
16
- # Start with a Nokogiri check
17
- begin
18
- require 'nokogiri'
19
- receiver.extend(NokogiriReader)
20
- rescue LoadError
21
- if RUBY_PLATFORM =~ /java/
22
- # If using JRuby, use JREXML if it's there
23
- begin
24
- receiver.extend(JREXMLReader)
25
- return
26
- rescue LoadError
27
- end
28
- end
29
- # If you're here, you're stuck with lowly REXML
30
- receiver.extend(REXMLReader)
31
- end
16
+ magic = MARC::XMLReader.best_available
17
+ case magic
18
+ when 'nokogiri' then receiver.extend(NokogiriReader)
19
+ when 'libxml' then receiver.extend(LibXMLReader)
20
+ when 'jstax' then receiver.extend(JRubySTAXReader)
21
+ when 'jrexml' then receiver.extend(JREXMLReader)
22
+ else receiver.extend(REXMLReader)
23
+ end
32
24
  end
33
25
  end
34
26
 
35
- # NokogiriReader uses the Nokogiri SAX Parser to quickly read
36
- # a MARCXML document. Because dynamically subclassing MARC::XMLReader
37
- # is a little ugly, we need to recreate all of the SAX event methods
38
- # from Nokogiri::XML::SAX::Document here rather than subclassing.
39
- module NokogiriReader
40
- def self.extended(receiver)
41
- require 'nokogiri'
42
- receiver.init
43
- end
44
-
45
- # Sets our instance variables for SAX parsing in Nokogiri and parser
46
- def init
47
- @record = {:record=>nil,:field=>nil,:subfield=>nil}
48
- @current_element = nil
49
- @ns = "http://www.loc.gov/MARC21/slim"
50
- @parser = Nokogiri::XML::SAX::Parser.new(self)
51
- end
27
+ module GenericPullParser
28
+ # Submodules must include
29
+ # self.extended()
30
+ # init()
31
+ # attributes_to_hash(attributes)
32
+ # each
52
33
 
53
- # Loop through the MARC records in the XML document
54
- def each(&block)
55
- @block = block
56
- @parser.parse(@handle)
57
- end
58
-
34
+
59
35
  # Returns our MARC::Record object to the #each block.
60
36
  def yield_record
61
37
  @block.call(@record[:record])
62
38
  @record[:record] = nil
63
39
  end
64
-
40
+
65
41
  def start_element_namespace name, attributes = [], prefix = nil, uri = nil, ns = {}
66
- attributes = attributes_to_hash(attributes)
67
- if uri == @ns
68
- case name.downcase
69
- when 'record' then @record[:record] = MARC::Record.new
70
- when 'leader' then @current_element = :leader
71
- when 'controlfield'
72
- @current_element=:field
73
- @record[:field] = MARC::ControlField.new(attributes["tag"])
74
- when 'datafield'
75
- @record[:field] = MARC::DataField.new(attributes["tag"], attributes['ind1'], attributes['ind2'])
76
- when 'subfield'
77
- @current_element=:subfield
78
- @record[:subfield] = MARC::Subfield.new(attributes['code'])
79
- end
80
- end
81
- end
42
+ attributes = attributes_to_hash(attributes)
43
+ if uri == @ns
44
+ case name.downcase
45
+ when 'record' then @record[:record] = MARC::Record.new
46
+ when 'leader' then @current_element = :leader
47
+ when 'controlfield'
48
+ @current_element=:field
49
+ @record[:field] = MARC::ControlField.new(attributes["tag"])
50
+ when 'datafield'
51
+ @record[:field] = MARC::DataField.new(attributes["tag"], attributes['ind1'], attributes['ind2'])
52
+ when 'subfield'
53
+ @current_element=:subfield
54
+ @record[:subfield] = MARC::Subfield.new(attributes['code'])
55
+ end
56
+ end
57
+ end
58
+
82
59
 
83
60
  def characters text
84
61
  case @current_element
@@ -90,7 +67,7 @@ module MARC
90
67
 
91
68
  def end_element_namespace name, prefix = nil, uri = nil
92
69
  @current_element = nil
93
- if uri == "http://www.loc.gov/MARC21/slim"
70
+ if uri == @ns
94
71
  case name.downcase
95
72
  when 'record' then yield_record
96
73
  when /(control|data)field/
@@ -103,8 +80,36 @@ module MARC
103
80
  @current_element = nil if @current_element == :subfield
104
81
  end
105
82
  end
83
+ end
84
+ end
85
+
86
+
87
+ # NokogiriReader uses the Nokogiri SAX Parser to quickly read
88
+ # a MARCXML document. Because dynamically subclassing MARC::XMLReader
89
+ # is a little ugly, we need to recreate all of the SAX event methods
90
+ # from Nokogiri::XML::SAX::Document here rather than subclassing.
91
+ module NokogiriReader
92
+ include GenericPullParser
93
+ def self.extended(receiver)
94
+ require 'nokogiri'
95
+ receiver.init
106
96
  end
107
97
 
98
+ # Sets our instance variables for SAX parsing in Nokogiri and parser
99
+ def init
100
+ @record = {:record=>nil,:field=>nil,:subfield=>nil}
101
+ @current_element = nil
102
+ @ns = "http://www.loc.gov/MARC21/slim"
103
+ @parser = Nokogiri::XML::SAX::Parser.new(self)
104
+ end
105
+
106
+ # Loop through the MARC records in the XML document
107
+ def each(&block)
108
+ @block = block
109
+ @parser.parse(@handle)
110
+ end
111
+
112
+
108
113
  def method_missing(methName, *args)
109
114
  sax_methods = [:xmldecl, :start_document, :end_document, :start_element,
110
115
  :end_element, :comment, :warning, :error, :cdata_block]
@@ -123,6 +128,8 @@ module MARC
123
128
  hash
124
129
  end
125
130
  end
131
+
132
+
126
133
 
127
134
  # The REXMLReader is the 'default' parser, since we can at least be
128
135
  # assured that REXML is probably there. It uses REXML's PullParser
@@ -285,4 +292,106 @@ module MARC
285
292
  receiver.extend(REXMLReader)
286
293
  end
287
294
  end
295
+
296
+ module LibXMLReader
297
+
298
+ def self.extended(receiver)
299
+ require 'xml'
300
+ receiver.init
301
+ end
302
+
303
+ def init
304
+ @ns = "http://www.loc.gov/MARC21/slim"
305
+ @parser = XML::Reader.io(@handle)
306
+ end
307
+
308
+ def each
309
+ while (@parser.read) do
310
+ if @parser.local_name == 'record' && @parser.namespace_uri == @ns
311
+ yield build_record
312
+ end
313
+ end # while
314
+ end # each
315
+
316
+ def build_record
317
+ r = MARC::Record.new()
318
+ until (@parser.local_name == 'record' and @parser.node_type == XML::Reader::TYPE_END_ELEMENT) do
319
+ @parser.read
320
+ next if @parser.node_type == XML::Reader::TYPE_END_ELEMENT
321
+ case @parser.local_name
322
+ when 'leader'
323
+ @parser.read
324
+ r.leader = @parser.value
325
+ when 'controlfield'
326
+ tag = @parser['tag']
327
+ @parser.read
328
+ r << MARC::ControlField.new(tag, @parser.value)
329
+ when 'datafield'
330
+ data = MARC::DataField.new(@parser['tag'], @parser['ind1'], @parser['ind2'])
331
+ while (@parser.read and !(@parser.local_name == 'datafield' and @parser.node_type == XML::Reader::TYPE_END_ELEMENT)) do
332
+ next if @parser.node_type == XML::Reader::TYPE_END_ELEMENT
333
+ case @parser.local_name
334
+ when 'subfield'
335
+ code =@parser['code']
336
+ @parser.read
337
+ data.append(MARC::Subfield.new(code, @parser.value))
338
+ end
339
+ end
340
+ r << data
341
+
342
+ end # case
343
+ end #until
344
+ return r
345
+ end
346
+ end
347
+
348
+ # The JrubySTAXReader uses native java calls to parse the incoming stream
349
+ # of marc-xml. It includes most of the work from GenericPullParser
350
+
351
+ if defined? JRUBY_VERSION
352
+ module JRubySTAXReader
353
+ include GenericPullParser
354
+ def self.extended(receiver)
355
+ include Java
356
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
357
+ include javax.xml.stream
358
+ receiver.init
359
+ end
360
+
361
+ def init
362
+ @record = {:record=>nil,:field=>nil,:subfield=>nil}
363
+ @current_element = nil
364
+ @ns = "http://www.loc.gov/MARC21/slim"
365
+ @factory = javax.xml.stream.XMLInputFactory.newInstance
366
+ @parser = @factory.createXMLStreamReader(@handle.to_inputstream)
367
+ end
368
+
369
+ # Loop through the MARC records in the XML document
370
+ def each(&block)
371
+ @block = block
372
+ parser_dispatch
373
+ end
374
+
375
+ def parser_dispatch
376
+ while event = @parser.next and event != XMLStreamConstants.END_DOCUMENT do
377
+ case event
378
+ when XMLStreamConstants.START_ELEMENT
379
+ start_element_namespace(@parser.getLocalName, [], nil, @parser.getNamespaceURI, nil)
380
+ when XMLStreamConstants.END_ELEMENT
381
+ end_element_namespace(@parser.getLocalName, @parser.getPrefix, @parser.getNamespaceURI)
382
+ when XMLStreamConstants.CHARACTERS
383
+ characters(@parser.getText)
384
+ end
385
+ end
386
+ end
387
+
388
+ def attributes_to_hash(attributes)
389
+ hash = {}
390
+ @parser.getAttributeCount.times do | i |
391
+ hash[@parser.getAttributeName(i).getLocalPart] = @parser.getAttributeValue(i)
392
+ end
393
+ hash
394
+ end
395
+ end # end of module
396
+ end # end of if jruby
288
397
  end
@@ -38,6 +38,8 @@ module MARC
38
38
  USE_REXML = 'rexml'
39
39
  USE_NOKOGIRI = 'nokogiri'
40
40
  USE_JREXML = 'jrexml'
41
+ USE_JSTAX = 'jstax'
42
+ USE_LIBXML = 'libxml'
41
43
  @@parser = USE_REXML
42
44
  attr_reader :parser
43
45
 
@@ -59,8 +61,14 @@ module MARC
59
61
  case parser
60
62
  when 'magic' then extend MagicReader
61
63
  when 'rexml' then extend REXMLReader
62
- when 'jrexml' then extend JREXMLReader
63
- when 'nokogiri' then extend NokogiriReader
64
+ when 'jrexml' then
65
+ raise ArgumentError, "jrexml only available under jruby" unless defined? JRUBY_VERSION
66
+ extend JREXMLReader
67
+ when 'nokogiri' then extend NokogiriReader
68
+ when 'jstax' then
69
+ raise ArgumentError, "jstax only available under jruby" unless defined? JRUBY_VERSION
70
+ extend JRubySTAXReader
71
+ when 'libxml' then extend LibXMLReader
64
72
  end
65
73
  end
66
74
 
@@ -87,22 +95,44 @@ module MARC
87
95
  # Returns the value of the best available parser
88
96
  def self.best_available
89
97
  parser = nil
90
- begin
91
- require 'nokogiri'
92
- parser = USE_NOKOGIRI
93
- rescue LoadError
94
- if RUBY_PLATFORM =~ /java/
98
+ jruby = [USE_JSTAX, USE_NOKOGIRI, USE_JREXML]
99
+ ruby = [USE_NOKOGIRI, USE_LIBXML]
100
+ if defined? JRUBY_VERSION
101
+ begin
102
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
103
+ parser = USE_JSTAX
104
+ rescue java.lang.ClassNotFoundException
105
+ end
106
+ unless parser
95
107
  begin
96
- require 'jrexml'
97
- parser = USE_JREXML
108
+ require 'nokogiri'
109
+ parser = USE_NOKOGIRI
98
110
  rescue LoadError
99
- parser = USE_REXML
100
111
  end
101
- else
102
- parser = USE_REXML
103
112
  end
104
- parser
105
- end
113
+ unless parser
114
+ begin
115
+ require 'jrexml'
116
+ parser = USE_JREXML
117
+ rescue LoadError
118
+ end
119
+ end
120
+ else
121
+ begin
122
+ require 'nokogiri'
123
+ parser = USE_NOKOGIRI
124
+ rescue LoadError
125
+ end
126
+ unless parser
127
+ begin
128
+ require 'xml'
129
+ parser = USE_LIBXML
130
+ rescue LoadError
131
+ end
132
+ end
133
+ end
134
+ parser = USE_REXML unless parser
135
+ parser
106
136
  end
107
137
 
108
138
  # Sets the best available parser as the default
@@ -34,7 +34,7 @@ class ParsersTest < Test::Unit::TestCase
34
34
  end
35
35
 
36
36
  def test_set_jrexml
37
- if RUBY_PLATFORM =~ /java/
37
+ if defined? JRUBY_VERSION
38
38
  begin
39
39
  require 'jrexml'
40
40
  reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_JREXML)
@@ -59,6 +59,35 @@ class ParsersTest < Test::Unit::TestCase
59
59
  end
60
60
  end
61
61
 
62
+ def test_set_jstax
63
+ if defined? JRUBY_VERSION
64
+ begin
65
+ assert_equal("rexml", MARC::XMLReader.parser)
66
+ reader = MARC::XMLReader.new('test/one.xml')
67
+ assert_kind_of(REXML::Parsers::PullParser, reader.parser)
68
+
69
+ reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_JSTAX)
70
+ assert_kind_of(Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl, reader.parser)
71
+ assert_equal("rexml", MARC::XMLReader.parser)
72
+ reader = MARC::XMLReader.new('test/one.xml', :parser=>'jstax')
73
+ assert_kind_of(Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl, reader.parser)
74
+ assert_equal("rexml", MARC::XMLReader.parser)
75
+ MARC::XMLReader.parser=MARC::XMLReader::USE_JSTAX
76
+ assert_equal("jstax", MARC::XMLReader.parser)
77
+ reader = MARC::XMLReader.new('test/one.xml')
78
+ assert_kind_of(Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl, reader.parser)
79
+ MARC::XMLReader.parser="jstax"
80
+ assert_equal("jstax", MARC::XMLReader.parser)
81
+ reader = MARC::XMLReader.new('test/one.xml')
82
+ assert_kind_of(Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl, reader.parser)
83
+ rescue java.lang.ClassNotFoundException
84
+ puts "\njavax.xml.stream not available, skipping 'test_set_jstax'.\n"
85
+ end
86
+ else
87
+ puts "\nTest not being run from JRuby, skipping 'test_set_jstax'.\n"
88
+ end
89
+ end
90
+
62
91
  def test_set_rexml
63
92
  reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_REXML)
64
93
  assert_kind_of(REXML::Parsers::PullParser, reader.parser)
@@ -77,13 +106,8 @@ class ParsersTest < Test::Unit::TestCase
77
106
  end
78
107
 
79
108
  def test_set_magic
80
- magic_parser = nil
81
- begin
82
- require 'nokogiri'
83
- magic_parser = Nokogiri::XML::SAX::Parser
84
- rescue LoadError
85
- magic_parser = REXML::Parsers::PullParser
86
- end
109
+ best = choose_best_available_parser
110
+ magic_parser = best[:parser]
87
111
  puts "\nTesting 'test_set_magic' for parser: #{magic_parser}"
88
112
  reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_BEST_AVAILABLE)
89
113
  assert_kind_of(magic_parser, reader.parser)
@@ -102,23 +126,9 @@ class ParsersTest < Test::Unit::TestCase
102
126
  end
103
127
 
104
128
  def test_parser_set_convenience_methods
105
- parser_name = nil
106
- parser = nil
107
- begin
108
- require 'nokogiri'
109
- parser_name = 'nokogiri'
110
- parser = Nokogiri::XML::SAX::Parser
111
- rescue LoadError
112
- parser = REXML::Parsers::PullParser
113
- parser = 'rexml'
114
- if RUBY_PLATFORM =~ /java/
115
- begin
116
- require 'jrexml'
117
- parser_name = 'jrexml'
118
- rescue LoadError
119
- end
120
- end
121
- end
129
+ best = choose_best_available_parser
130
+ parser = best[:parser]
131
+ parser_name = best[:parser_name]
122
132
  assert_equal(parser_name, MARC::XMLReader.best_available)
123
133
  MARC::XMLReader.best_available!
124
134
  reader = MARC::XMLReader.new('test/one.xml')
@@ -133,7 +143,7 @@ class ParsersTest < Test::Unit::TestCase
133
143
  else
134
144
  puts "\nNokogiri not loaded, skipping convenience method test.\n"
135
145
  end
136
- if RUBY_PLATFORM =~ /java/
146
+ if defined? JRUBY_VERSION
137
147
  begin
138
148
  require 'jrexml'
139
149
  MARC::XMLReader.jrexml!
@@ -151,4 +161,49 @@ class ParsersTest < Test::Unit::TestCase
151
161
  MARC::XMLReader.parser=MARC::XMLReader::USE_REXML
152
162
  end
153
163
 
164
+ def choose_best_available_parser
165
+ parser_name = nil
166
+ parser = nil
167
+ if defined? JRUBY_VERSION
168
+ begin
169
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
170
+ parser_name = "jstax"
171
+ parser = Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl
172
+ rescue java.lang.ClassNotFoundException
173
+ end
174
+ end
175
+ unless parser
176
+ begin
177
+ require 'nokogiri'
178
+ parser_name = 'nokogiri'
179
+ parser = Nokogiri::XML::SAX::Parser
180
+ rescue LoadError
181
+ end
182
+ end
183
+ unless parser
184
+ if !defined? JRUBY_VERSION
185
+ begin
186
+ require 'xml'
187
+ parser_name = 'libxml'
188
+ parser = LibXML::XML::Reader
189
+ rescue LoadError
190
+ end
191
+ else
192
+ if defined? JRUBY_VERSION
193
+ begin
194
+ require 'jrexml'
195
+ parser_name = 'jrexml'
196
+ parser = REXML::Parsers::PullParser
197
+ rescue LoadError
198
+ end
199
+ end
200
+ end
201
+ unless parser
202
+ parser = REXML::Parsers::PullParser
203
+ parser_name = 'rexml'
204
+ end
205
+ end
206
+ return {:parser=>parser, :parser_name=>parser_name}
207
+ end
208
+
154
209
  end
@@ -10,12 +10,22 @@ class XMLTest < Test::Unit::TestCase
10
10
  @parsers << :nokogiri
11
11
  rescue LoadError
12
12
  end
13
- if RUBY_PLATFORM =~ /java/
13
+ begin
14
+ require 'xml'
15
+ @parsers << :libxml
16
+ rescue LoadError
17
+ end
18
+ if defined? JRUBY_VERSION
14
19
  begin
15
20
  require 'jrexml'
16
21
  @parsers << :jrexml
17
22
  rescue LoadError
18
23
  end
24
+ begin
25
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
26
+ @parsers << :jstax
27
+ rescue java.lang.ClassNotFoundException
28
+ end
19
29
  end
20
30
  end
21
31
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Clarke
@@ -12,7 +12,7 @@ autorequire: marc
12
12
  bindir: bin
13
13
  cert_chain: []
14
14
 
15
- date: 2009-09-23 00:00:00 -04:00
15
+ date: 2009-12-14 00:00:00 -05:00
16
16
  default_executable:
17
17
  dependencies: []
18
18