marc 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Changes +3 -0
- data/Rakefile +4 -4
- data/lib/marc.rb +8 -0
- data/lib/marc/constants.rb +4 -0
- data/lib/marc/controlfield.rb +29 -4
- data/lib/marc/datafield.rb +16 -6
- data/lib/marc/reader.rb +9 -3
- data/lib/marc/record.rb +33 -1
- data/lib/marc/xml_parsers.rb +288 -0
- data/lib/marc/xmlreader.rb +119 -80
- data/lib/marc/xmlwriter.rb +1 -1
- data/test/tc_controlfield.rb +25 -0
- data/test/tc_marchash.rb +37 -0
- data/test/tc_parsers.rb +154 -0
- data/test/tc_record.rb +6 -1
- data/test/tc_xml.rb +77 -12
- metadata +29 -22
data/Changes
CHANGED
data/Rakefile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
RUBY_MARC_VERSION = '0.
|
1
|
+
RUBY_MARC_VERSION = '0.3.0'
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'rake'
|
@@ -21,7 +21,7 @@ spec = Gem::Specification.new do |s|
|
|
21
21
|
s.version = RUBY_MARC_VERSION
|
22
22
|
s.author = 'Ed Summers'
|
23
23
|
s.email = 'ehs@pobox.com'
|
24
|
-
s.homepage = 'http://
|
24
|
+
s.homepage = 'http://marc.rubyforge.org/'
|
25
25
|
s.platform = Gem::Platform::RUBY
|
26
26
|
s.summary = 'A ruby library for working with Machine Readable Cataloging'
|
27
27
|
s.files = Dir.glob("{lib,test}/**/*") + ["Rakefile", "README", "Changes",
|
@@ -30,7 +30,7 @@ spec = Gem::Specification.new do |s|
|
|
30
30
|
s.autorequire = 'marc'
|
31
31
|
s.has_rdoc = true
|
32
32
|
s.required_ruby_version = '>= 1.8.6'
|
33
|
-
|
33
|
+
s.authors = ["Kevin Clarke", "William Groppe", "Ross Singer", "Ed Summers"]
|
34
34
|
s.test_file = 'test/ts_marc.rb'
|
35
35
|
s.bindir = 'bin'
|
36
36
|
end
|
@@ -41,7 +41,7 @@ Rake::GemPackageTask.new(spec) do |pkg|
|
|
41
41
|
end
|
42
42
|
|
43
43
|
Rake::RDocTask.new('doc') do |rd|
|
44
|
-
rd.rdoc_files.include("lib/**/*.rb")
|
44
|
+
rd.rdoc_files.include("README", "Changes", "LICENSE", "lib/**/*.rb")
|
45
45
|
rd.main = 'MARC::Record'
|
46
46
|
rd.rdoc_dir = 'doc'
|
47
47
|
end
|
data/lib/marc.rb
CHANGED
@@ -24,6 +24,13 @@
|
|
24
24
|
# writer = MARC::XMLWriter.new('marc.xml')
|
25
25
|
# writer.write(record)
|
26
26
|
# writer.close()
|
27
|
+
#
|
28
|
+
# # Deal with non-standard control field tags
|
29
|
+
# MARC::Field.control_tags << 'FMT'
|
30
|
+
# record = MARC::Record.new()
|
31
|
+
# record.add_field(MARC::ControlField.new('FMT', 'Book')) # doesn't throw an error
|
32
|
+
|
33
|
+
|
27
34
|
|
28
35
|
require 'marc/constants'
|
29
36
|
require 'marc/record'
|
@@ -36,3 +43,4 @@ require 'marc/exception'
|
|
36
43
|
require 'marc/xmlwriter'
|
37
44
|
require 'marc/xmlreader'
|
38
45
|
require 'marc/dublincore'
|
46
|
+
require 'marc/xml_parsers'
|
data/lib/marc/constants.rb
CHANGED
data/lib/marc/controlfield.rb
CHANGED
@@ -1,10 +1,29 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
1
3
|
module MARC
|
2
4
|
|
3
5
|
# MARC records contain control fields, each of which has a
|
4
6
|
# tag and value. Tags for control fields must be in the
|
5
|
-
# 001-009 range
|
7
|
+
# 001-009 range or be specially added to the @@control_tags Set
|
6
8
|
|
7
9
|
class ControlField
|
10
|
+
|
11
|
+
# Initially, control tags are the numbers 1 through 9 or the string '000'
|
12
|
+
@@control_tags = Set.new( (1..9).to_a)
|
13
|
+
@@control_tags << '000'
|
14
|
+
|
15
|
+
def self.control_tags
|
16
|
+
return @@control_tags
|
17
|
+
end
|
18
|
+
|
19
|
+
# A tag is a control tag if it is a member of the @@control_tags set
|
20
|
+
# as either a string (e.g., 'FMT') or in its .to_i representation
|
21
|
+
# (e.g., '008'.to_i == 3 is in @@control_tags by default)
|
22
|
+
|
23
|
+
def self.control_tag?(tag)
|
24
|
+
return (@@control_tags.include?(tag.to_i) or @@control_tags.include?(tag))
|
25
|
+
end
|
26
|
+
|
8
27
|
|
9
28
|
# the tag value (007, 008, etc)
|
10
29
|
attr_accessor :tag
|
@@ -18,8 +37,8 @@ module MARC
|
|
18
37
|
def initialize(tag,value='')
|
19
38
|
@tag = tag
|
20
39
|
@value = value
|
21
|
-
if tag
|
22
|
-
raise MARC::Exception.new(), "tag must be
|
40
|
+
if not MARC::ControlField.control_tag?(@tag)
|
41
|
+
raise MARC::Exception.new(), "tag must be in 001-009 or in the MARC::ControlField.control_tags set"
|
23
42
|
end
|
24
43
|
end
|
25
44
|
|
@@ -34,13 +53,19 @@ module MARC
|
|
34
53
|
return true
|
35
54
|
end
|
36
55
|
|
56
|
+
# turning it into a marc-hash element
|
57
|
+
def to_marchash
|
58
|
+
return [@tag, @value]
|
59
|
+
end
|
60
|
+
|
61
|
+
|
37
62
|
def to_s
|
38
63
|
return "#{tag} #{value}"
|
39
64
|
end
|
40
65
|
|
41
66
|
def =~(regex)
|
42
67
|
return self.to_s =~ regex
|
43
|
-
end
|
68
|
+
end
|
44
69
|
|
45
70
|
end
|
46
71
|
|
data/lib/marc/datafield.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
require 'marc/subfield'
|
2
2
|
require 'marc/record'
|
3
|
+
require 'marc/controlfield'
|
3
4
|
|
4
5
|
module MARC
|
5
6
|
|
6
7
|
# MARC records contain data fields, each of which has a tag,
|
7
|
-
# indicators and subfields. Tags for data fields must
|
8
|
-
#
|
8
|
+
# indicators and subfields. Tags for data fields must are all
|
9
|
+
# three-character tags that are not control fields (generally,
|
10
|
+
# any numeric tag greater than 009).
|
11
|
+
#
|
9
12
|
# Accessor attributes: tag, indicator1, indicator2
|
10
13
|
#
|
11
14
|
# DataField mixes in Enumerable to enable access to it's constituent
|
@@ -63,10 +66,12 @@ module MARC
|
|
63
66
|
@indicator2 = i2 == nil ? ' ' : i2
|
64
67
|
@subfields = []
|
65
68
|
|
66
|
-
# must use MARC::ControlField for tags < 010
|
67
|
-
|
69
|
+
# must use MARC::ControlField for tags < 010 or
|
70
|
+
# those in MARC::ControlField#extra_control_fields
|
71
|
+
|
72
|
+
if MARC::ControlField.control_tag?(@tag)
|
68
73
|
raise MARC::Exception.new(),
|
69
|
-
"MARC::DataField objects can't have
|
74
|
+
"MARC::DataField objects can't have ControlField tag '" + @tag + "')"
|
70
75
|
end
|
71
76
|
|
72
77
|
# allows MARC::Subfield objects to be passed directly
|
@@ -78,7 +83,7 @@ module MARC
|
|
78
83
|
when Array
|
79
84
|
if subfield.length > 2
|
80
85
|
raise MARC::Exception.new(),
|
81
|
-
"arrays must only have 2 elements"
|
86
|
+
"arrays must only have 2 elements: " + subfield.to_s
|
82
87
|
end
|
83
88
|
@subfields.push(
|
84
89
|
MARC::Subfield.new(subfield[0],subfield[1]))
|
@@ -100,6 +105,11 @@ module MARC
|
|
100
105
|
return str
|
101
106
|
end
|
102
107
|
|
108
|
+
# Turn into a marc-hash structure
|
109
|
+
def to_marchash
|
110
|
+
return [@tag, @indicator1, @indicator2, @subfields.map {|sf| [sf.code, sf.value]} ]
|
111
|
+
end
|
112
|
+
|
103
113
|
|
104
114
|
# Add a subfield (MARC::Subfield) to the field
|
105
115
|
# field.append(MARC::Subfield.new('a','Dave Thomas'))
|
data/lib/marc/reader.rb
CHANGED
@@ -16,9 +16,15 @@ module MARC
|
|
16
16
|
#
|
17
17
|
# # marc is a string with a bunch of records in it
|
18
18
|
# reader = MARC::Reader.new(StringIO.new(reader))
|
19
|
+
#
|
20
|
+
# If your data have non-standard control fields in them
|
21
|
+
# (e.g., Aleph's 'FMT') you need to add them specifically
|
22
|
+
# to the MARC::ControlField.control_tags Set object
|
23
|
+
#
|
24
|
+
# MARC::ControlField.control_tags << 'FMT'
|
19
25
|
|
20
26
|
def initialize(file)
|
21
|
-
if file.
|
27
|
+
if file.is_a?(String)
|
22
28
|
@handle = File.new(file)
|
23
29
|
elsif file.respond_to?("read", 5)
|
24
30
|
@handle = file
|
@@ -40,7 +46,7 @@ module MARC
|
|
40
46
|
while rec_length_s = @handle.read(5)
|
41
47
|
# make sure the record length looks like an integer
|
42
48
|
rec_length_i = rec_length_s.to_i
|
43
|
-
if rec_length_i == 0
|
49
|
+
if rec_length_i == 0
|
44
50
|
raise MARC::Exception.new("invalid record length: #{rec_length_s}")
|
45
51
|
end
|
46
52
|
|
@@ -113,7 +119,7 @@ module MARC
|
|
113
119
|
field_data.delete!(END_OF_FIELD)
|
114
120
|
|
115
121
|
# add a control field or data field
|
116
|
-
if tag
|
122
|
+
if MARC::ControlField.control_tag?(tag)
|
117
123
|
record.append(MARC::ControlField.new(tag,field_data))
|
118
124
|
else
|
119
125
|
field = MARC::DataField.new(tag)
|
data/lib/marc/record.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'marc/controlfield'
|
2
|
+
require 'marc/datafield'
|
3
|
+
|
1
4
|
module MARC
|
2
5
|
|
3
6
|
# A class that represents an individual MARC record. Every record
|
@@ -118,11 +121,40 @@ module MARC
|
|
118
121
|
return MARC::DublinCore.map(self)
|
119
122
|
end
|
120
123
|
|
124
|
+
# Return a marc-hash version of the record
|
125
|
+
def to_marchash
|
126
|
+
return {
|
127
|
+
'type' => 'marc-hash',
|
128
|
+
'version' => [MARCHASH_MAJOR_VERSION, MARCHASH_MINOR_VERSION],
|
129
|
+
'leader' => self.leader,
|
130
|
+
'fields' => self.map {|f| f.to_marchash}
|
131
|
+
}
|
132
|
+
end #to_hash
|
133
|
+
|
134
|
+
# Factory method for creating a new MARC::Record from
|
135
|
+
# a marchash object
|
136
|
+
#
|
137
|
+
# record = MARC::Record->new_from_marchash(mh)
|
138
|
+
|
139
|
+
def self.new_from_marchash(mh)
|
140
|
+
r = self.new()
|
141
|
+
r.leader = mh['leader']
|
142
|
+
mh['fields'].each do |f|
|
143
|
+
if (f.length == 2)
|
144
|
+
r << MARC::ControlField.new(f[0], f[1])
|
145
|
+
elsif
|
146
|
+
r << MARC::DataField.new(f[0], f[1], f[2], *f[3])
|
147
|
+
end
|
148
|
+
end
|
149
|
+
return r
|
150
|
+
end
|
151
|
+
|
152
|
+
|
121
153
|
# Returns a string version of the record, suitable for printing
|
122
154
|
|
123
155
|
def to_s
|
124
156
|
str = "LEADER #{leader}\n"
|
125
|
-
for field in fields
|
157
|
+
for field in fields
|
126
158
|
str += field.to_s() + "\n"
|
127
159
|
end
|
128
160
|
return str
|
@@ -0,0 +1,288 @@
|
|
1
|
+
module MARC
|
2
|
+
# The MagicReader will try to use the best available XML Parser at the
|
3
|
+
# time of initialization.
|
4
|
+
# The order is currently:
|
5
|
+
# * Nokogiri
|
6
|
+
# * jrexml (JRuby only)
|
7
|
+
# * rexml
|
8
|
+
#
|
9
|
+
# With the idea that other parsers could be added as their modules are
|
10
|
+
# added. Realistically, this list should be limited to stream-based
|
11
|
+
# parsers. The magic should be used selectively, however. After all,
|
12
|
+
# one project's definition of 'best' might not apply universally. It
|
13
|
+
# is arguable which is "best" on JRuby: Nokogiri or jrexml.
|
14
|
+
module MagicReader
|
15
|
+
def self.extended(receiver)
|
16
|
+
# Start with a Nokogiri check
|
17
|
+
begin
|
18
|
+
require 'nokogiri'
|
19
|
+
receiver.extend(NokogiriReader)
|
20
|
+
rescue LoadError
|
21
|
+
if RUBY_PLATFORM =~ /java/
|
22
|
+
# If using JRuby, use JREXML if it's there
|
23
|
+
begin
|
24
|
+
receiver.extend(JREXMLReader)
|
25
|
+
return
|
26
|
+
rescue LoadError
|
27
|
+
end
|
28
|
+
end
|
29
|
+
# If you're here, you're stuck with lowly REXML
|
30
|
+
receiver.extend(REXMLReader)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# NokogiriReader uses the Nokogiri SAX Parser to quickly read
|
36
|
+
# a MARCXML document. Because dynamically subclassing MARC::XMLReader
|
37
|
+
# is a little ugly, we need to recreate all of the SAX event methods
|
38
|
+
# from Nokogiri::XML::SAX::Document here rather than subclassing.
|
39
|
+
module NokogiriReader
|
40
|
+
def self.extended(receiver)
|
41
|
+
require 'nokogiri'
|
42
|
+
receiver.init
|
43
|
+
end
|
44
|
+
|
45
|
+
# Sets our instance variables for SAX parsing in Nokogiri and parser
|
46
|
+
def init
|
47
|
+
@record = {:record=>nil,:field=>nil,:subfield=>nil}
|
48
|
+
@current_element = nil
|
49
|
+
@ns = "http://www.loc.gov/MARC21/slim"
|
50
|
+
@parser = Nokogiri::XML::SAX::Parser.new(self)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Loop through the MARC records in the XML document
|
54
|
+
def each(&block)
|
55
|
+
@block = block
|
56
|
+
@parser.parse(@handle)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Returns our MARC::Record object to the #each block.
|
60
|
+
def yield_record
|
61
|
+
@block.call(@record[:record])
|
62
|
+
@record[:record] = nil
|
63
|
+
end
|
64
|
+
|
65
|
+
def start_element_namespace name, attributes = [], prefix = nil, uri = nil, ns = {}
|
66
|
+
attributes = attributes_to_hash(attributes)
|
67
|
+
if uri == @ns
|
68
|
+
case name.downcase
|
69
|
+
when 'record' then @record[:record] = MARC::Record.new
|
70
|
+
when 'leader' then @current_element = :leader
|
71
|
+
when 'controlfield'
|
72
|
+
@current_element=:field
|
73
|
+
@record[:field] = MARC::ControlField.new(attributes["tag"])
|
74
|
+
when 'datafield'
|
75
|
+
@record[:field] = MARC::DataField.new(attributes["tag"], attributes['ind1'], attributes['ind2'])
|
76
|
+
when 'subfield'
|
77
|
+
@current_element=:subfield
|
78
|
+
@record[:subfield] = MARC::Subfield.new(attributes['code'])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def characters text
|
84
|
+
case @current_element
|
85
|
+
when :leader then @record[:record].leader = text
|
86
|
+
when :field then @record[:field].value << text
|
87
|
+
when :subfield then @record[:subfield].value << text
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def end_element_namespace name, prefix = nil, uri = nil
|
92
|
+
@current_element = nil
|
93
|
+
if uri == "http://www.loc.gov/MARC21/slim"
|
94
|
+
case name.downcase
|
95
|
+
when 'record' then yield_record
|
96
|
+
when /(control|data)field/
|
97
|
+
@record[:record] << @record[:field]
|
98
|
+
@record[:field] = nil
|
99
|
+
@current_element = nil if @current_element == :field
|
100
|
+
when 'subfield'
|
101
|
+
@record[:field].append(@record[:subfield])
|
102
|
+
@record[:subfield] = nil
|
103
|
+
@current_element = nil if @current_element == :subfield
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def method_missing(methName, *args)
|
109
|
+
sax_methods = [:xmldecl, :start_document, :end_document, :start_element,
|
110
|
+
:end_element, :comment, :warning, :error, :cdata_block]
|
111
|
+
unless sax_methods.index(methName)
|
112
|
+
raise NoMethodError.new("undefined method '#{methName} for #{self}", 'no_meth')
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
def attributes_to_hash(attributes)
|
119
|
+
hash = {}
|
120
|
+
attributes.each do | att |
|
121
|
+
hash[att.localname] = att.value
|
122
|
+
end
|
123
|
+
hash
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# The REXMLReader is the 'default' parser, since we can at least be
|
128
|
+
# assured that REXML is probably there. It uses REXML's PullParser
|
129
|
+
# to handle larger document sizes without consuming insane amounts of
|
130
|
+
# memory, but it's still REXML (read: slow), so it's a good idea to
|
131
|
+
# use an alternative parser if available. If you don't know the best
|
132
|
+
# parser available, you can use the MagicReader or set:
|
133
|
+
#
|
134
|
+
# MARC::XMLReader.parser=MARC::XMLReader::USE_BEST_AVAILABLE
|
135
|
+
#
|
136
|
+
# or
|
137
|
+
#
|
138
|
+
# MARC::XMLReader.parser="magic"
|
139
|
+
#
|
140
|
+
# or
|
141
|
+
#
|
142
|
+
# reader = MARC::XMLReader.new(fh, :parser=>"magic")
|
143
|
+
# (or the constant)
|
144
|
+
#
|
145
|
+
# which will cascade down to REXML if nothing better is found.
|
146
|
+
#
|
147
|
+
module REXMLReader
|
148
|
+
def self.extended(receiver)
|
149
|
+
require 'rexml/document'
|
150
|
+
require 'rexml/parsers/pullparser'
|
151
|
+
receiver.init
|
152
|
+
end
|
153
|
+
|
154
|
+
# Sets our parser
|
155
|
+
def init
|
156
|
+
@parser = REXML::Parsers::PullParser.new(@handle)
|
157
|
+
end
|
158
|
+
|
159
|
+
# Loop through the MARC records in the XML document
|
160
|
+
def each
|
161
|
+
while @parser.has_next?
|
162
|
+
event = @parser.pull
|
163
|
+
# if it's the start of a record element
|
164
|
+
if event.start_element? and strip_ns(event[0]) == 'record'
|
165
|
+
yield build_record
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
private
|
171
|
+
def strip_ns(str)
|
172
|
+
return str.sub(/^.*:/, '')
|
173
|
+
end
|
174
|
+
|
175
|
+
# will accept parse events until a record has been built up
|
176
|
+
#
|
177
|
+
def build_record
|
178
|
+
record = MARC::Record.new
|
179
|
+
data_field = nil
|
180
|
+
control_field = nil
|
181
|
+
subfield = nil
|
182
|
+
text = ''
|
183
|
+
attrs = nil
|
184
|
+
if Module.constants.index('Nokogiri') and @parser.is_a?(Nokogiri::XML::Reader)
|
185
|
+
datafield = nil
|
186
|
+
cursor = nil
|
187
|
+
open_elements = []
|
188
|
+
@parser.each do | node |
|
189
|
+
if node.value? && cursor
|
190
|
+
if cursor.is_a?(Symbol) and cursor == :leader
|
191
|
+
record.leader = node.value
|
192
|
+
else
|
193
|
+
cursor.value = node.value
|
194
|
+
end
|
195
|
+
cursor = nil
|
196
|
+
end
|
197
|
+
next unless node.namespace_uri == @ns
|
198
|
+
if open_elements.index(node.local_name.downcase)
|
199
|
+
open_elements.delete(node.local_name.downcase)
|
200
|
+
next
|
201
|
+
else
|
202
|
+
open_elements << node.local_name.downcase
|
203
|
+
end
|
204
|
+
case node.local_name.downcase
|
205
|
+
when "leader"
|
206
|
+
cursor = :leader
|
207
|
+
when "controlfield"
|
208
|
+
record << datafield if datafield
|
209
|
+
datafield = nil
|
210
|
+
control_field = MARC::ControlField.new(node.attribute('tag'))
|
211
|
+
record << control_field
|
212
|
+
cursor = control_field
|
213
|
+
when "datafield"
|
214
|
+
record << datafield if datafield
|
215
|
+
datafield = nil
|
216
|
+
data_field = MARC::DataField.new(node.attribute('tag'), node.attribute('ind1'), node.attribute('ind2'))
|
217
|
+
datafield = data_field
|
218
|
+
when "subfield"
|
219
|
+
raise "No datafield to add to" unless datafield
|
220
|
+
subfield = MARC::Subfield.new(node.attribute('code'))
|
221
|
+
datafield.append(subfield)
|
222
|
+
cursor = subfield
|
223
|
+
when "record"
|
224
|
+
record << datafield if datafield
|
225
|
+
return record
|
226
|
+
end
|
227
|
+
#puts node.name
|
228
|
+
end
|
229
|
+
|
230
|
+
else
|
231
|
+
while @parser.has_next?
|
232
|
+
event = @parser.pull
|
233
|
+
|
234
|
+
if event.text?
|
235
|
+
text += REXML::Text::unnormalize(event[0])
|
236
|
+
next
|
237
|
+
end
|
238
|
+
|
239
|
+
if event.start_element?
|
240
|
+
text = ''
|
241
|
+
attrs = event[1]
|
242
|
+
case strip_ns(event[0])
|
243
|
+
when 'controlfield'
|
244
|
+
text = ''
|
245
|
+
control_field = MARC::ControlField.new(attrs['tag'])
|
246
|
+
when 'datafield'
|
247
|
+
text = ''
|
248
|
+
data_field = MARC::DataField.new(attrs['tag'], attrs['ind1'],
|
249
|
+
attrs['ind2'])
|
250
|
+
when 'subfield'
|
251
|
+
text = ''
|
252
|
+
subfield = MARC::Subfield.new(attrs['code'])
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
if event.end_element?
|
257
|
+
case strip_ns(event[0])
|
258
|
+
when 'leader'
|
259
|
+
record.leader = text
|
260
|
+
when 'record'
|
261
|
+
return record
|
262
|
+
when 'controlfield'
|
263
|
+
control_field.value = text
|
264
|
+
record.append(control_field)
|
265
|
+
when 'datafield'
|
266
|
+
record.append(data_field)
|
267
|
+
when 'subfield'
|
268
|
+
subfield.value = text
|
269
|
+
data_field.append(subfield)
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
# The JREXMLReader is really just here to set the load order for
|
278
|
+
# injecting the Java pull parser.
|
279
|
+
module JREXMLReader
|
280
|
+
|
281
|
+
def self.extended(receiver)
|
282
|
+
require 'rexml/document'
|
283
|
+
require 'rexml/parsers/pullparser'
|
284
|
+
require 'jrexml'
|
285
|
+
receiver.extend(REXMLReader)
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
data/lib/marc/xmlreader.rb
CHANGED
@@ -1,103 +1,142 @@
|
|
1
|
-
require '
|
2
|
-
require 'rexml/parsers/pullparser'
|
3
|
-
|
1
|
+
require File.dirname(__FILE__) + '/xml_parsers'
|
4
2
|
module MARC
|
5
|
-
|
3
|
+
|
4
|
+
# the constructor which you can pass either a filename:
|
5
|
+
#
|
6
|
+
# reader = MARC::XMLReader.new('/Users/edsu/marc.xml')
|
7
|
+
#
|
8
|
+
# or a File object,
|
9
|
+
#
|
10
|
+
# reader = Marc::XMLReader.new(File.new('/Users/edsu/marc.xml'))
|
11
|
+
#
|
12
|
+
# or really any object that responds to read(n)
|
13
|
+
#
|
14
|
+
# reader = MARC::XMLReader.new(StringIO.new(xml))
|
15
|
+
#
|
16
|
+
# By default, XMLReader uses REXML's pull parser, but you can swap
|
17
|
+
# that out with Nokogiri or jrexml (or let the system choose the
|
18
|
+
# 'best' one). The :parser can either be one of the defined constants
|
19
|
+
# or the constant's value.
|
20
|
+
#
|
21
|
+
# reader = MARC::XMLReader.new(fh, :parser=>'magic')
|
22
|
+
#
|
23
|
+
# It is also possible to set the default parser at the class level so
|
24
|
+
# all subsequent instances will use it instead:
|
25
|
+
#
|
26
|
+
# MARC::XMLReader.best_available
|
27
|
+
# "nokogiri" # returns parser name, but doesn't set it.
|
28
|
+
#
|
29
|
+
# Use:
|
30
|
+
# MARC::XMLReader.best_available!
|
31
|
+
#
|
32
|
+
# or
|
33
|
+
# MARC::XMLReader.nokogiri!
|
34
|
+
#
|
6
35
|
class XMLReader
|
7
36
|
include Enumerable
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
#
|
15
|
-
# reader = Marc::XMLReader.new(File.new('/Users/edsu/marc.xml'))
|
16
|
-
#
|
17
|
-
# or really any object that responds to read(n)
|
18
|
-
#
|
19
|
-
# reader = MARC::XMLReader.new(StringIO.new(xml))
|
37
|
+
USE_BEST_AVAILABLE = 'magic'
|
38
|
+
USE_REXML = 'rexml'
|
39
|
+
USE_NOKOGIRI = 'nokogiri'
|
40
|
+
USE_JREXML = 'jrexml'
|
41
|
+
@@parser = USE_REXML
|
42
|
+
attr_reader :parser
|
20
43
|
|
21
|
-
def initialize(file)
|
22
|
-
if file.
|
44
|
+
def initialize(file, options = {})
|
45
|
+
if file.is_a?(String)
|
23
46
|
handle = File.new(file)
|
24
47
|
elsif file.respond_to?("read", 5)
|
25
48
|
handle = file
|
26
49
|
else
|
27
50
|
throw "must pass in path or File"
|
28
51
|
end
|
52
|
+
@handle = handle
|
29
53
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
54
|
+
if options[:parser]
|
55
|
+
parser = self.class.choose_parser(options[:parser].to_s)
|
56
|
+
else
|
57
|
+
parser = @@parser
|
58
|
+
end
|
59
|
+
case parser
|
60
|
+
when 'magic' then extend MagicReader
|
61
|
+
when 'rexml' then extend REXMLReader
|
62
|
+
when 'jrexml' then extend JREXMLReader
|
63
|
+
when 'nokogiri' then extend NokogiriReader
|
40
64
|
end
|
41
65
|
end
|
42
66
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
67
|
+
# Returns the currently set parser type
|
68
|
+
def self.parser
|
69
|
+
return @@parser
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns an array of all the parsers available
|
73
|
+
def self.parsers
|
74
|
+
p = []
|
75
|
+
self.constants.each do | const |
|
76
|
+
next unless const.match("^USE_")
|
77
|
+
p << const
|
78
|
+
end
|
79
|
+
return p
|
80
|
+
end
|
81
|
+
|
82
|
+
# Sets the class parser
|
83
|
+
def self.parser=(p)
|
84
|
+
@@parser = choose_parser(p)
|
47
85
|
end
|
48
86
|
|
49
|
-
#
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
if event.text?
|
63
|
-
text += REXML::Text::unnormalize(event[0])
|
64
|
-
next
|
65
|
-
end
|
66
|
-
|
67
|
-
if event.start_element?
|
68
|
-
text = ''
|
69
|
-
attrs = event[1]
|
70
|
-
case strip_ns(event[0])
|
71
|
-
when 'controlfield'
|
72
|
-
text = ''
|
73
|
-
control_field = MARC::ControlField.new(attrs['tag'])
|
74
|
-
when 'datafield'
|
75
|
-
text = ''
|
76
|
-
data_field = MARC::DataField.new(attrs['tag'], attrs['ind1'],
|
77
|
-
attrs['ind2'])
|
78
|
-
when 'subfield'
|
79
|
-
text = ''
|
80
|
-
subfield = MARC::Subfield.new(attrs['code'])
|
87
|
+
# Returns the value of the best available parser
|
88
|
+
def self.best_available
|
89
|
+
parser = nil
|
90
|
+
begin
|
91
|
+
require 'nokogiri'
|
92
|
+
parser = USE_NOKOGIRI
|
93
|
+
rescue LoadError
|
94
|
+
if RUBY_PLATFORM =~ /java/
|
95
|
+
begin
|
96
|
+
require 'jrexml'
|
97
|
+
parser = USE_JREXML
|
98
|
+
rescue LoadError
|
99
|
+
parser = USE_REXML
|
81
100
|
end
|
101
|
+
else
|
102
|
+
parser = USE_REXML
|
82
103
|
end
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
104
|
+
parser
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Sets the best available parser as the default
|
109
|
+
def self.best_available!
|
110
|
+
@@parser = self.best_available
|
111
|
+
end
|
112
|
+
|
113
|
+
# Sets Nokogiri as the default parser
|
114
|
+
def self.nokogiri!
|
115
|
+
@@parser = USE_NOKOGIRI
|
116
|
+
end
|
117
|
+
|
118
|
+
# Sets jrexml as the default parser
|
119
|
+
def self.jrexml!
|
120
|
+
@@parser = USE_JREXML
|
121
|
+
end
|
122
|
+
|
123
|
+
# Sets REXML as the default parser
|
124
|
+
def self.rexml!
|
125
|
+
@@parser = USE_REXML
|
126
|
+
end
|
127
|
+
|
128
|
+
protected
|
129
|
+
|
130
|
+
def self.choose_parser(p)
|
131
|
+
match = false
|
132
|
+
self.constants.each do | const |
|
133
|
+
next unless const.to_s.match("^USE_")
|
134
|
+
if self.const_get(const) == p
|
135
|
+
match = true
|
136
|
+
return p
|
99
137
|
end
|
100
138
|
end
|
139
|
+
raise ArgumentError.new("Parser '#{p}' not defined") unless match
|
101
140
|
end
|
102
141
|
end
|
103
142
|
end
|
data/lib/marc/xmlwriter.rb
CHANGED
@@ -137,7 +137,7 @@ module MARC
|
|
137
137
|
control_element = REXML::Element.new("controlfield")
|
138
138
|
|
139
139
|
# We need a marker for invalid tag values (we use 000)
|
140
|
-
unless field.tag.match(ctrlFieldTag)
|
140
|
+
unless field.tag.match(ctrlFieldTag) or MARC::Field.control_tag?(ctrlFieldTag)
|
141
141
|
field.tag = "00z"
|
142
142
|
end
|
143
143
|
|
data/test/tc_controlfield.rb
CHANGED
@@ -15,6 +15,31 @@ class TestField < Test::Unit::TestCase
|
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
+
def test_alpha_control_field
|
19
|
+
assert_raise(MARC::Exception) do
|
20
|
+
# can't have a field with a tag < 010
|
21
|
+
field = MARC::ControlField.new('DDD')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_extra_control_field
|
26
|
+
MARC::ControlField.control_tags << 'FMT'
|
27
|
+
assert_nothing_raised do
|
28
|
+
field = MARC::ControlField.new('FMT')
|
29
|
+
end
|
30
|
+
assert_raise(MARC::Exception) do
|
31
|
+
field = MARC::DataField.new('FMT')
|
32
|
+
end
|
33
|
+
MARC::ControlField.control_tags.delete('FMT')
|
34
|
+
assert_nothing_raised do
|
35
|
+
field = MARC::DataField.new('FMT')
|
36
|
+
end
|
37
|
+
assert_raise(MARC::Exception) do
|
38
|
+
field = MARC::ControlField.new('FMT')
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
18
43
|
def test_control_as_field
|
19
44
|
assert_raise(MARC::Exception) do
|
20
45
|
# can't have a control with a tag > 009
|
data/test/tc_marchash.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'marc'
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
class TestMARCHASH < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_simple
|
8
|
+
simple = {
|
9
|
+
'type' => 'marc-hash',
|
10
|
+
'version' => [1,0],
|
11
|
+
'leader' => 'LEADER',
|
12
|
+
'fields' => [
|
13
|
+
['245', '1', '0',
|
14
|
+
[
|
15
|
+
['a', 'TITLE'],
|
16
|
+
['b', 'SUBTITLE']
|
17
|
+
]
|
18
|
+
]
|
19
|
+
]
|
20
|
+
}
|
21
|
+
r = MARC::Record.new()
|
22
|
+
r.leader = 'LEADER'
|
23
|
+
f = MARC::DataField.new('245', '1', '0', ['a', 'TITLE'], ['b', 'SUBTITLE'])
|
24
|
+
r << f
|
25
|
+
assert_equal(r.to_marchash, simple)
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_real
|
29
|
+
reader = MARC::Reader.new('test/batch.dat')
|
30
|
+
reader.each do |r|
|
31
|
+
x = MARC::Record.new_from_marchash(r.to_marchash)
|
32
|
+
assert_equal(r,x)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
end
|
data/test/tc_parsers.rb
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'marc'
|
3
|
+
|
4
|
+
class ParsersTest < Test::Unit::TestCase
|
5
|
+
def test_parser_default
|
6
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
7
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
8
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_set_nokogiri
|
12
|
+
begin
|
13
|
+
require 'nokogiri'
|
14
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
15
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
16
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
17
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_NOKOGIRI)
|
18
|
+
assert_kind_of(Nokogiri::XML::SAX::Parser, reader.parser)
|
19
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
20
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>'nokogiri')
|
21
|
+
assert_kind_of(Nokogiri::XML::SAX::Parser, reader.parser)
|
22
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
23
|
+
MARC::XMLReader.parser=MARC::XMLReader::USE_NOKOGIRI
|
24
|
+
assert_equal("nokogiri", MARC::XMLReader.parser)
|
25
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
26
|
+
assert_kind_of(Nokogiri::XML::SAX::Parser, reader.parser)
|
27
|
+
MARC::XMLReader.parser="nokogiri"
|
28
|
+
assert_equal("nokogiri", MARC::XMLReader.parser)
|
29
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
30
|
+
assert_kind_of(Nokogiri::XML::SAX::Parser, reader.parser)
|
31
|
+
rescue LoadError
|
32
|
+
puts "\nNokogiri not available, skipping 'test_set_nokogiri'.\n"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_set_jrexml
|
37
|
+
if RUBY_PLATFORM =~ /java/
|
38
|
+
begin
|
39
|
+
require 'jrexml'
|
40
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_JREXML)
|
41
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
42
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
43
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>'jrexml')
|
44
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
45
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
46
|
+
MARC::XMLReader.parser=MARC::XMLReader::USE_JREXML
|
47
|
+
assert_equal("jrexml", MARC::XMLReader.parser)
|
48
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
49
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
50
|
+
MARC::XMLReader.parser="jrexml"
|
51
|
+
assert_equal("jrexml", MARC::XMLReader.parser)
|
52
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
53
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
54
|
+
rescue LoadError
|
55
|
+
puts "\njrexml not available, skipping 'test_set_jrexml'.\n"
|
56
|
+
end
|
57
|
+
else
|
58
|
+
puts "\nTest not being run from JRuby, skipping 'test_set_jrexml'.\n"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_set_rexml
|
63
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_REXML)
|
64
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
65
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
66
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>'rexml')
|
67
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
68
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
69
|
+
MARC::XMLReader.parser=MARC::XMLReader::USE_REXML
|
70
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
71
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
72
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
73
|
+
MARC::XMLReader.parser="rexml"
|
74
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
75
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
76
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_set_magic
|
80
|
+
magic_parser = nil
|
81
|
+
begin
|
82
|
+
require 'nokogiri'
|
83
|
+
magic_parser = Nokogiri::XML::SAX::Parser
|
84
|
+
rescue LoadError
|
85
|
+
magic_parser = REXML::Parsers::PullParser
|
86
|
+
end
|
87
|
+
puts "\nTesting 'test_set_magic' for parser: #{magic_parser}"
|
88
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_BEST_AVAILABLE)
|
89
|
+
assert_kind_of(magic_parser, reader.parser)
|
90
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
91
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>'magic')
|
92
|
+
assert_kind_of(magic_parser, reader.parser)
|
93
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
94
|
+
MARC::XMLReader.parser=MARC::XMLReader::USE_BEST_AVAILABLE
|
95
|
+
assert_equal("magic", MARC::XMLReader.parser)
|
96
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
97
|
+
assert_kind_of(magic_parser, reader.parser)
|
98
|
+
MARC::XMLReader.parser="magic"
|
99
|
+
assert_equal("magic", MARC::XMLReader.parser)
|
100
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
101
|
+
assert_kind_of(magic_parser, reader.parser)
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_parser_set_convenience_methods
|
105
|
+
parser_name = nil
|
106
|
+
parser = nil
|
107
|
+
begin
|
108
|
+
require 'nokogiri'
|
109
|
+
parser_name = 'nokogiri'
|
110
|
+
parser = Nokogiri::XML::SAX::Parser
|
111
|
+
rescue LoadError
|
112
|
+
parser = REXML::Parsers::PullParser
|
113
|
+
parser = 'rexml'
|
114
|
+
if RUBY_PLATFORM =~ /java/
|
115
|
+
begin
|
116
|
+
require 'jrexml'
|
117
|
+
parser_name = 'jrexml'
|
118
|
+
rescue LoadError
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
assert_equal(parser_name, MARC::XMLReader.best_available)
|
123
|
+
MARC::XMLReader.best_available!
|
124
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
125
|
+
assert_kind_of(parser, reader.parser)
|
126
|
+
MARC::XMLReader.rexml!
|
127
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
128
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
129
|
+
if parser_name == 'nokogiri'
|
130
|
+
MARC::XMLReader.nokogiri!
|
131
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
132
|
+
assert_kind_of(Nokogiri::XML::SAX::Parser, reader.parser)
|
133
|
+
else
|
134
|
+
puts "\nNokogiri not loaded, skipping convenience method test.\n"
|
135
|
+
end
|
136
|
+
if RUBY_PLATFORM =~ /java/
|
137
|
+
begin
|
138
|
+
require 'jrexml'
|
139
|
+
MARC::XMLReader.jrexml!
|
140
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
141
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
142
|
+
rescue LoadError
|
143
|
+
puts "\njrexml not available, skipping convenience method test.\n"
|
144
|
+
end
|
145
|
+
else
|
146
|
+
puts "\nTest not being run from JRuby, skipping jrexml convenience method test.\n"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def teardown
|
151
|
+
MARC::XMLReader.parser=MARC::XMLReader::USE_REXML
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
data/test/tc_record.rb
CHANGED
@@ -12,7 +12,12 @@ class TestRecord < Test::Unit::TestCase
|
|
12
12
|
r = get_record()
|
13
13
|
doc = r.to_xml
|
14
14
|
assert_kind_of REXML::Element, doc
|
15
|
-
|
15
|
+
if RUBY_VERSION < '1.9.0'
|
16
|
+
assert_equal "<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s
|
17
|
+
else
|
18
|
+
# REXML inexplicably sorts the attributes alphabetically in Ruby 1.9
|
19
|
+
assert_equal "<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield ind1='2' ind2='0' tag='100'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield ind1='0' ind2='4' tag='245'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s
|
20
|
+
end
|
16
21
|
end
|
17
22
|
|
18
23
|
def test_append_field
|
data/test/tc_xml.rb
CHANGED
@@ -3,20 +3,50 @@ require 'marc'
|
|
3
3
|
require 'stringio'
|
4
4
|
|
5
5
|
class XMLTest < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
@parsers = [:rexml]
|
8
|
+
begin
|
9
|
+
require 'nokogiri'
|
10
|
+
@parsers << :nokogiri
|
11
|
+
rescue LoadError
|
12
|
+
end
|
13
|
+
if RUBY_PLATFORM =~ /java/
|
14
|
+
begin
|
15
|
+
require 'jrexml'
|
16
|
+
@parsers << :jrexml
|
17
|
+
rescue LoadError
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
6
21
|
|
7
|
-
|
22
|
+
|
23
|
+
def test_xml_entities
|
24
|
+
@parsers.each do | parser |
|
25
|
+
puts "\nRunning test_xml_entities with: #{parser}.\n"
|
26
|
+
xml_entities_test(parser)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def xml_entities_test(parser)
|
8
31
|
r1 = MARC::Record.new
|
9
32
|
r1 << MARC::DataField.new('245', '0', '0', ['a', 'foo & bar & baz'])
|
10
33
|
xml = r1.to_xml.to_s
|
11
34
|
assert_match /foo & bar & baz/, xml
|
12
35
|
|
13
|
-
reader = MARC::XMLReader.new(StringIO.new(xml))
|
36
|
+
reader = MARC::XMLReader.new(StringIO.new(xml), :parser=>parser)
|
14
37
|
r2 = reader.entries[0]
|
15
|
-
assert_equal 'foo & bar & baz', r2['245']['a']
|
38
|
+
assert_equal 'foo & bar & baz', r2['245']['a']
|
16
39
|
end
|
17
|
-
|
40
|
+
|
18
41
|
def test_batch
|
19
|
-
|
42
|
+
@parsers.each do | parser |
|
43
|
+
puts "\nRunning test_batch with: #{parser}.\n"
|
44
|
+
batch_test(parser)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def batch_test(parser)
|
49
|
+
reader = MARC::XMLReader.new('test/batch.xml', :parser=>parser)
|
20
50
|
count = 0
|
21
51
|
for record in reader
|
22
52
|
count += 1
|
@@ -24,15 +54,29 @@ class XMLTest < Test::Unit::TestCase
|
|
24
54
|
end
|
25
55
|
assert_equal(count, 2)
|
26
56
|
end
|
27
|
-
|
57
|
+
|
28
58
|
def test_read_string
|
59
|
+
@parsers.each do | parser |
|
60
|
+
puts "\nRunning test_read_string with: #{parser}.\n"
|
61
|
+
read_string_test(parser)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def read_string_test(parser)
|
29
66
|
xml = File.new('test/batch.xml').read
|
30
|
-
reader = MARC::XMLReader.new(StringIO.new(xml))
|
67
|
+
reader = MARC::XMLReader.new(StringIO.new(xml), :parser=>parser)
|
31
68
|
assert_equal 2, reader.entries.length
|
32
69
|
end
|
33
70
|
|
34
71
|
def test_non_numeric_fields
|
35
|
-
|
72
|
+
@parsers.each do | parser |
|
73
|
+
puts "\nRunning test_non_numeric_fields with: #{parser}.\n"
|
74
|
+
non_numeric_fields_test(parser)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def non_numeric_fields_test(parser)
|
79
|
+
reader = MARC::XMLReader.new('test/non-numeric.xml', :parser=>parser)
|
36
80
|
count = 0
|
37
81
|
record = nil
|
38
82
|
reader.each do | rec |
|
@@ -45,21 +89,42 @@ class XMLTest < Test::Unit::TestCase
|
|
45
89
|
end
|
46
90
|
|
47
91
|
def test_read_no_leading_zero_write_leading_zero
|
48
|
-
|
92
|
+
@parsers.each do | parser |
|
93
|
+
puts "\nRunning test_read_no_leading_zero_write_leading_zero with: #{parser}.\n"
|
94
|
+
read_no_leading_zero_write_leading_zero_test(parser)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def read_no_leading_zero_write_leading_zero_test(parser)
|
99
|
+
reader = MARC::XMLReader.new('test/no-leading-zero.xml', :parser=>parser)
|
49
100
|
record = reader.to_a[0]
|
50
101
|
assert_equal("042 zz $a dc ", record['042'].to_s)
|
51
102
|
end
|
52
103
|
|
53
104
|
def test_leader_from_xml
|
54
|
-
|
105
|
+
@parsers.each do | parser |
|
106
|
+
puts "\nRunning test_leader_from_xml with: #{parser}.\n"
|
107
|
+
leader_from_xml_test(parser)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def leader_from_xml_test(parser)
|
112
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>parser)
|
55
113
|
record = reader.entries[0]
|
56
114
|
assert_equal ' njm a22 uu 4500', record.leader
|
57
115
|
# serializing as MARC should populate the record length and directory offset
|
58
116
|
record = MARC::Record.new_from_marc(record.to_marc)
|
59
117
|
assert_equal '00734njm a2200217uu 4500', record.leader
|
60
118
|
end
|
61
|
-
|
119
|
+
|
62
120
|
def test_read_write
|
121
|
+
@parsers.each do | parser |
|
122
|
+
puts "\nRunning test_read_write with: #{parser}.\n"
|
123
|
+
read_write_test(parser)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def read_write_test(parser)
|
63
128
|
record1 = MARC::Record.new
|
64
129
|
record1.leader = '00925njm 22002777a 4500'
|
65
130
|
record1.append MARC::ControlField.new('007', 'sdubumennmplu')
|
@@ -74,7 +139,7 @@ class XMLTest < Test::Unit::TestCase
|
|
74
139
|
assert_match /<controlfield tag='007'>sdubumennmplu<\/controlfield>/, xml
|
75
140
|
assert_match /<\?xml-stylesheet type="text\/xsl" href="style.xsl"\?>/, xml
|
76
141
|
|
77
|
-
reader = MARC::XMLReader.new('test/test.xml')
|
142
|
+
reader = MARC::XMLReader.new('test/test.xml', :parser=>parser)
|
78
143
|
record2 = reader.entries[0]
|
79
144
|
assert_equal(record1, record2)
|
80
145
|
|
metadata
CHANGED
@@ -1,15 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
+
- Kevin Clarke
|
8
|
+
- William Groppe
|
9
|
+
- Ross Singer
|
7
10
|
- Ed Summers
|
8
11
|
autorequire: marc
|
9
12
|
bindir: bin
|
10
13
|
cert_chain: []
|
11
14
|
|
12
|
-
date:
|
15
|
+
date: 2009-09-23 00:00:00 -04:00
|
13
16
|
default_executable:
|
14
17
|
dependencies: []
|
15
18
|
|
@@ -22,41 +25,45 @@ extensions: []
|
|
22
25
|
extra_rdoc_files: []
|
23
26
|
|
24
27
|
files:
|
25
|
-
- lib/marc.rb
|
26
|
-
- lib/marc
|
27
|
-
- lib/marc/xmlwriter.rb
|
28
|
+
- lib/marc/constants.rb
|
28
29
|
- lib/marc/controlfield.rb
|
29
|
-
- lib/marc/
|
30
|
+
- lib/marc/datafield.rb
|
31
|
+
- lib/marc/dublincore.rb
|
32
|
+
- lib/marc/exception.rb
|
30
33
|
- lib/marc/reader.rb
|
31
34
|
- lib/marc/record.rb
|
32
|
-
- lib/marc/exception.rb
|
33
|
-
- lib/marc/datafield.rb
|
34
35
|
- lib/marc/subfield.rb
|
35
|
-
- lib/marc/constants.rb
|
36
|
-
- lib/marc/dublincore.rb
|
37
36
|
- lib/marc/writer.rb
|
37
|
+
- lib/marc/xml_parsers.rb
|
38
|
+
- lib/marc/xmlreader.rb
|
39
|
+
- lib/marc/xmlwriter.rb
|
40
|
+
- lib/marc.rb
|
41
|
+
- test/batch.dat
|
42
|
+
- test/batch.xml
|
43
|
+
- test/no-leading-zero.xml
|
38
44
|
- test/non-numeric.dat
|
39
|
-
- test/tc_dublincore.rb
|
40
|
-
- test/tc_datafield.rb
|
41
45
|
- test/non-numeric.xml
|
42
|
-
- test/no-leading-zero.xml
|
43
|
-
- test/ts_marc.rb
|
44
|
-
- test/tc_writer.rb
|
45
|
-
- test/batch.xml
|
46
|
-
- test/tc_xml.rb
|
47
46
|
- test/one.dat
|
48
|
-
- test/tc_record.rb
|
49
47
|
- test/one.xml
|
50
|
-
- test/batch.dat
|
51
48
|
- test/tc_controlfield.rb
|
49
|
+
- test/tc_datafield.rb
|
50
|
+
- test/tc_dublincore.rb
|
51
|
+
- test/tc_marchash.rb
|
52
|
+
- test/tc_parsers.rb
|
52
53
|
- test/tc_reader.rb
|
54
|
+
- test/tc_record.rb
|
53
55
|
- test/tc_subfield.rb
|
56
|
+
- test/tc_writer.rb
|
57
|
+
- test/tc_xml.rb
|
58
|
+
- test/ts_marc.rb
|
54
59
|
- Rakefile
|
55
60
|
- README
|
56
61
|
- Changes
|
57
62
|
- LICENSE
|
58
63
|
has_rdoc: true
|
59
|
-
homepage: http://
|
64
|
+
homepage: http://marc.rubyforge.org/
|
65
|
+
licenses: []
|
66
|
+
|
60
67
|
post_install_message:
|
61
68
|
rdoc_options: []
|
62
69
|
|
@@ -77,9 +84,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
84
|
requirements: []
|
78
85
|
|
79
86
|
rubyforge_project:
|
80
|
-
rubygems_version: 1.3.
|
87
|
+
rubygems_version: 1.3.5
|
81
88
|
signing_key:
|
82
|
-
specification_version:
|
89
|
+
specification_version: 3
|
83
90
|
summary: A ruby library for working with Machine Readable Cataloging
|
84
91
|
test_files:
|
85
92
|
- test/ts_marc.rb
|