marc 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Changes +3 -0
- data/Rakefile +4 -4
- data/lib/marc.rb +8 -0
- data/lib/marc/constants.rb +4 -0
- data/lib/marc/controlfield.rb +29 -4
- data/lib/marc/datafield.rb +16 -6
- data/lib/marc/reader.rb +9 -3
- data/lib/marc/record.rb +33 -1
- data/lib/marc/xml_parsers.rb +288 -0
- data/lib/marc/xmlreader.rb +119 -80
- data/lib/marc/xmlwriter.rb +1 -1
- data/test/tc_controlfield.rb +25 -0
- data/test/tc_marchash.rb +37 -0
- data/test/tc_parsers.rb +154 -0
- data/test/tc_record.rb +6 -1
- data/test/tc_xml.rb +77 -12
- metadata +29 -22
data/Changes
CHANGED
data/Rakefile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
RUBY_MARC_VERSION = '0.
|
1
|
+
RUBY_MARC_VERSION = '0.3.0'
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'rake'
|
@@ -21,7 +21,7 @@ spec = Gem::Specification.new do |s|
|
|
21
21
|
s.version = RUBY_MARC_VERSION
|
22
22
|
s.author = 'Ed Summers'
|
23
23
|
s.email = 'ehs@pobox.com'
|
24
|
-
s.homepage = 'http://
|
24
|
+
s.homepage = 'http://marc.rubyforge.org/'
|
25
25
|
s.platform = Gem::Platform::RUBY
|
26
26
|
s.summary = 'A ruby library for working with Machine Readable Cataloging'
|
27
27
|
s.files = Dir.glob("{lib,test}/**/*") + ["Rakefile", "README", "Changes",
|
@@ -30,7 +30,7 @@ spec = Gem::Specification.new do |s|
|
|
30
30
|
s.autorequire = 'marc'
|
31
31
|
s.has_rdoc = true
|
32
32
|
s.required_ruby_version = '>= 1.8.6'
|
33
|
-
|
33
|
+
s.authors = ["Kevin Clarke", "William Groppe", "Ross Singer", "Ed Summers"]
|
34
34
|
s.test_file = 'test/ts_marc.rb'
|
35
35
|
s.bindir = 'bin'
|
36
36
|
end
|
@@ -41,7 +41,7 @@ Rake::GemPackageTask.new(spec) do |pkg|
|
|
41
41
|
end
|
42
42
|
|
43
43
|
Rake::RDocTask.new('doc') do |rd|
|
44
|
-
rd.rdoc_files.include("lib/**/*.rb")
|
44
|
+
rd.rdoc_files.include("README", "Changes", "LICENSE", "lib/**/*.rb")
|
45
45
|
rd.main = 'MARC::Record'
|
46
46
|
rd.rdoc_dir = 'doc'
|
47
47
|
end
|
data/lib/marc.rb
CHANGED
@@ -24,6 +24,13 @@
|
|
24
24
|
# writer = MARC::XMLWriter.new('marc.xml')
|
25
25
|
# writer.write(record)
|
26
26
|
# writer.close()
|
27
|
+
#
|
28
|
+
# # Deal with non-standard control field tags
|
29
|
+
# MARC::Field.control_tags << 'FMT'
|
30
|
+
# record = MARC::Record.new()
|
31
|
+
# record.add_field(MARC::ControlField.new('FMT', 'Book')) # doesn't throw an error
|
32
|
+
|
33
|
+
|
27
34
|
|
28
35
|
require 'marc/constants'
|
29
36
|
require 'marc/record'
|
@@ -36,3 +43,4 @@ require 'marc/exception'
|
|
36
43
|
require 'marc/xmlwriter'
|
37
44
|
require 'marc/xmlreader'
|
38
45
|
require 'marc/dublincore'
|
46
|
+
require 'marc/xml_parsers'
|
data/lib/marc/constants.rb
CHANGED
data/lib/marc/controlfield.rb
CHANGED
@@ -1,10 +1,29 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
1
3
|
module MARC
|
2
4
|
|
3
5
|
# MARC records contain control fields, each of which has a
|
4
6
|
# tag and value. Tags for control fields must be in the
|
5
|
-
# 001-009 range
|
7
|
+
# 001-009 range or be specially added to the @@control_tags Set
|
6
8
|
|
7
9
|
class ControlField
|
10
|
+
|
11
|
+
# Initially, control tags are the numbers 1 through 9 or the string '000'
|
12
|
+
@@control_tags = Set.new( (1..9).to_a)
|
13
|
+
@@control_tags << '000'
|
14
|
+
|
15
|
+
def self.control_tags
|
16
|
+
return @@control_tags
|
17
|
+
end
|
18
|
+
|
19
|
+
# A tag is a control tag if it is a member of the @@control_tags set
|
20
|
+
# as either a string (e.g., 'FMT') or in its .to_i representation
|
21
|
+
# (e.g., '008'.to_i == 3 is in @@control_tags by default)
|
22
|
+
|
23
|
+
def self.control_tag?(tag)
|
24
|
+
return (@@control_tags.include?(tag.to_i) or @@control_tags.include?(tag))
|
25
|
+
end
|
26
|
+
|
8
27
|
|
9
28
|
# the tag value (007, 008, etc)
|
10
29
|
attr_accessor :tag
|
@@ -18,8 +37,8 @@ module MARC
|
|
18
37
|
def initialize(tag,value='')
|
19
38
|
@tag = tag
|
20
39
|
@value = value
|
21
|
-
if tag
|
22
|
-
raise MARC::Exception.new(), "tag must be
|
40
|
+
if not MARC::ControlField.control_tag?(@tag)
|
41
|
+
raise MARC::Exception.new(), "tag must be in 001-009 or in the MARC::ControlField.control_tags set"
|
23
42
|
end
|
24
43
|
end
|
25
44
|
|
@@ -34,13 +53,19 @@ module MARC
|
|
34
53
|
return true
|
35
54
|
end
|
36
55
|
|
56
|
+
# turning it into a marc-hash element
|
57
|
+
def to_marchash
|
58
|
+
return [@tag, @value]
|
59
|
+
end
|
60
|
+
|
61
|
+
|
37
62
|
def to_s
|
38
63
|
return "#{tag} #{value}"
|
39
64
|
end
|
40
65
|
|
41
66
|
def =~(regex)
|
42
67
|
return self.to_s =~ regex
|
43
|
-
end
|
68
|
+
end
|
44
69
|
|
45
70
|
end
|
46
71
|
|
data/lib/marc/datafield.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
require 'marc/subfield'
|
2
2
|
require 'marc/record'
|
3
|
+
require 'marc/controlfield'
|
3
4
|
|
4
5
|
module MARC
|
5
6
|
|
6
7
|
# MARC records contain data fields, each of which has a tag,
|
7
|
-
# indicators and subfields. Tags for data fields must
|
8
|
-
#
|
8
|
+
# indicators and subfields. Tags for data fields must are all
|
9
|
+
# three-character tags that are not control fields (generally,
|
10
|
+
# any numeric tag greater than 009).
|
11
|
+
#
|
9
12
|
# Accessor attributes: tag, indicator1, indicator2
|
10
13
|
#
|
11
14
|
# DataField mixes in Enumerable to enable access to it's constituent
|
@@ -63,10 +66,12 @@ module MARC
|
|
63
66
|
@indicator2 = i2 == nil ? ' ' : i2
|
64
67
|
@subfields = []
|
65
68
|
|
66
|
-
# must use MARC::ControlField for tags < 010
|
67
|
-
|
69
|
+
# must use MARC::ControlField for tags < 010 or
|
70
|
+
# those in MARC::ControlField#extra_control_fields
|
71
|
+
|
72
|
+
if MARC::ControlField.control_tag?(@tag)
|
68
73
|
raise MARC::Exception.new(),
|
69
|
-
"MARC::DataField objects can't have
|
74
|
+
"MARC::DataField objects can't have ControlField tag '" + @tag + "')"
|
70
75
|
end
|
71
76
|
|
72
77
|
# allows MARC::Subfield objects to be passed directly
|
@@ -78,7 +83,7 @@ module MARC
|
|
78
83
|
when Array
|
79
84
|
if subfield.length > 2
|
80
85
|
raise MARC::Exception.new(),
|
81
|
-
"arrays must only have 2 elements"
|
86
|
+
"arrays must only have 2 elements: " + subfield.to_s
|
82
87
|
end
|
83
88
|
@subfields.push(
|
84
89
|
MARC::Subfield.new(subfield[0],subfield[1]))
|
@@ -100,6 +105,11 @@ module MARC
|
|
100
105
|
return str
|
101
106
|
end
|
102
107
|
|
108
|
+
# Turn into a marc-hash structure
|
109
|
+
def to_marchash
|
110
|
+
return [@tag, @indicator1, @indicator2, @subfields.map {|sf| [sf.code, sf.value]} ]
|
111
|
+
end
|
112
|
+
|
103
113
|
|
104
114
|
# Add a subfield (MARC::Subfield) to the field
|
105
115
|
# field.append(MARC::Subfield.new('a','Dave Thomas'))
|
data/lib/marc/reader.rb
CHANGED
@@ -16,9 +16,15 @@ module MARC
|
|
16
16
|
#
|
17
17
|
# # marc is a string with a bunch of records in it
|
18
18
|
# reader = MARC::Reader.new(StringIO.new(reader))
|
19
|
+
#
|
20
|
+
# If your data have non-standard control fields in them
|
21
|
+
# (e.g., Aleph's 'FMT') you need to add them specifically
|
22
|
+
# to the MARC::ControlField.control_tags Set object
|
23
|
+
#
|
24
|
+
# MARC::ControlField.control_tags << 'FMT'
|
19
25
|
|
20
26
|
def initialize(file)
|
21
|
-
if file.
|
27
|
+
if file.is_a?(String)
|
22
28
|
@handle = File.new(file)
|
23
29
|
elsif file.respond_to?("read", 5)
|
24
30
|
@handle = file
|
@@ -40,7 +46,7 @@ module MARC
|
|
40
46
|
while rec_length_s = @handle.read(5)
|
41
47
|
# make sure the record length looks like an integer
|
42
48
|
rec_length_i = rec_length_s.to_i
|
43
|
-
if rec_length_i == 0
|
49
|
+
if rec_length_i == 0
|
44
50
|
raise MARC::Exception.new("invalid record length: #{rec_length_s}")
|
45
51
|
end
|
46
52
|
|
@@ -113,7 +119,7 @@ module MARC
|
|
113
119
|
field_data.delete!(END_OF_FIELD)
|
114
120
|
|
115
121
|
# add a control field or data field
|
116
|
-
if tag
|
122
|
+
if MARC::ControlField.control_tag?(tag)
|
117
123
|
record.append(MARC::ControlField.new(tag,field_data))
|
118
124
|
else
|
119
125
|
field = MARC::DataField.new(tag)
|
data/lib/marc/record.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'marc/controlfield'
|
2
|
+
require 'marc/datafield'
|
3
|
+
|
1
4
|
module MARC
|
2
5
|
|
3
6
|
# A class that represents an individual MARC record. Every record
|
@@ -118,11 +121,40 @@ module MARC
|
|
118
121
|
return MARC::DublinCore.map(self)
|
119
122
|
end
|
120
123
|
|
124
|
+
# Return a marc-hash version of the record
|
125
|
+
def to_marchash
|
126
|
+
return {
|
127
|
+
'type' => 'marc-hash',
|
128
|
+
'version' => [MARCHASH_MAJOR_VERSION, MARCHASH_MINOR_VERSION],
|
129
|
+
'leader' => self.leader,
|
130
|
+
'fields' => self.map {|f| f.to_marchash}
|
131
|
+
}
|
132
|
+
end #to_hash
|
133
|
+
|
134
|
+
# Factory method for creating a new MARC::Record from
|
135
|
+
# a marchash object
|
136
|
+
#
|
137
|
+
# record = MARC::Record->new_from_marchash(mh)
|
138
|
+
|
139
|
+
def self.new_from_marchash(mh)
|
140
|
+
r = self.new()
|
141
|
+
r.leader = mh['leader']
|
142
|
+
mh['fields'].each do |f|
|
143
|
+
if (f.length == 2)
|
144
|
+
r << MARC::ControlField.new(f[0], f[1])
|
145
|
+
elsif
|
146
|
+
r << MARC::DataField.new(f[0], f[1], f[2], *f[3])
|
147
|
+
end
|
148
|
+
end
|
149
|
+
return r
|
150
|
+
end
|
151
|
+
|
152
|
+
|
121
153
|
# Returns a string version of the record, suitable for printing
|
122
154
|
|
123
155
|
def to_s
|
124
156
|
str = "LEADER #{leader}\n"
|
125
|
-
for field in fields
|
157
|
+
for field in fields
|
126
158
|
str += field.to_s() + "\n"
|
127
159
|
end
|
128
160
|
return str
|
@@ -0,0 +1,288 @@
|
|
1
|
+
module MARC
|
2
|
+
# The MagicReader will try to use the best available XML Parser at the
|
3
|
+
# time of initialization.
|
4
|
+
# The order is currently:
|
5
|
+
# * Nokogiri
|
6
|
+
# * jrexml (JRuby only)
|
7
|
+
# * rexml
|
8
|
+
#
|
9
|
+
# With the idea that other parsers could be added as their modules are
|
10
|
+
# added. Realistically, this list should be limited to stream-based
|
11
|
+
# parsers. The magic should be used selectively, however. After all,
|
12
|
+
# one project's definition of 'best' might not apply universally. It
|
13
|
+
# is arguable which is "best" on JRuby: Nokogiri or jrexml.
|
14
|
+
module MagicReader
|
15
|
+
def self.extended(receiver)
|
16
|
+
# Start with a Nokogiri check
|
17
|
+
begin
|
18
|
+
require 'nokogiri'
|
19
|
+
receiver.extend(NokogiriReader)
|
20
|
+
rescue LoadError
|
21
|
+
if RUBY_PLATFORM =~ /java/
|
22
|
+
# If using JRuby, use JREXML if it's there
|
23
|
+
begin
|
24
|
+
receiver.extend(JREXMLReader)
|
25
|
+
return
|
26
|
+
rescue LoadError
|
27
|
+
end
|
28
|
+
end
|
29
|
+
# If you're here, you're stuck with lowly REXML
|
30
|
+
receiver.extend(REXMLReader)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# NokogiriReader uses the Nokogiri SAX Parser to quickly read
|
36
|
+
# a MARCXML document. Because dynamically subclassing MARC::XMLReader
|
37
|
+
# is a little ugly, we need to recreate all of the SAX event methods
|
38
|
+
# from Nokogiri::XML::SAX::Document here rather than subclassing.
|
39
|
+
module NokogiriReader
|
40
|
+
def self.extended(receiver)
|
41
|
+
require 'nokogiri'
|
42
|
+
receiver.init
|
43
|
+
end
|
44
|
+
|
45
|
+
# Sets our instance variables for SAX parsing in Nokogiri and parser
|
46
|
+
def init
|
47
|
+
@record = {:record=>nil,:field=>nil,:subfield=>nil}
|
48
|
+
@current_element = nil
|
49
|
+
@ns = "http://www.loc.gov/MARC21/slim"
|
50
|
+
@parser = Nokogiri::XML::SAX::Parser.new(self)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Loop through the MARC records in the XML document
|
54
|
+
def each(&block)
|
55
|
+
@block = block
|
56
|
+
@parser.parse(@handle)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Returns our MARC::Record object to the #each block.
|
60
|
+
def yield_record
|
61
|
+
@block.call(@record[:record])
|
62
|
+
@record[:record] = nil
|
63
|
+
end
|
64
|
+
|
65
|
+
def start_element_namespace name, attributes = [], prefix = nil, uri = nil, ns = {}
|
66
|
+
attributes = attributes_to_hash(attributes)
|
67
|
+
if uri == @ns
|
68
|
+
case name.downcase
|
69
|
+
when 'record' then @record[:record] = MARC::Record.new
|
70
|
+
when 'leader' then @current_element = :leader
|
71
|
+
when 'controlfield'
|
72
|
+
@current_element=:field
|
73
|
+
@record[:field] = MARC::ControlField.new(attributes["tag"])
|
74
|
+
when 'datafield'
|
75
|
+
@record[:field] = MARC::DataField.new(attributes["tag"], attributes['ind1'], attributes['ind2'])
|
76
|
+
when 'subfield'
|
77
|
+
@current_element=:subfield
|
78
|
+
@record[:subfield] = MARC::Subfield.new(attributes['code'])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def characters text
|
84
|
+
case @current_element
|
85
|
+
when :leader then @record[:record].leader = text
|
86
|
+
when :field then @record[:field].value << text
|
87
|
+
when :subfield then @record[:subfield].value << text
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def end_element_namespace name, prefix = nil, uri = nil
|
92
|
+
@current_element = nil
|
93
|
+
if uri == "http://www.loc.gov/MARC21/slim"
|
94
|
+
case name.downcase
|
95
|
+
when 'record' then yield_record
|
96
|
+
when /(control|data)field/
|
97
|
+
@record[:record] << @record[:field]
|
98
|
+
@record[:field] = nil
|
99
|
+
@current_element = nil if @current_element == :field
|
100
|
+
when 'subfield'
|
101
|
+
@record[:field].append(@record[:subfield])
|
102
|
+
@record[:subfield] = nil
|
103
|
+
@current_element = nil if @current_element == :subfield
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def method_missing(methName, *args)
|
109
|
+
sax_methods = [:xmldecl, :start_document, :end_document, :start_element,
|
110
|
+
:end_element, :comment, :warning, :error, :cdata_block]
|
111
|
+
unless sax_methods.index(methName)
|
112
|
+
raise NoMethodError.new("undefined method '#{methName} for #{self}", 'no_meth')
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
def attributes_to_hash(attributes)
|
119
|
+
hash = {}
|
120
|
+
attributes.each do | att |
|
121
|
+
hash[att.localname] = att.value
|
122
|
+
end
|
123
|
+
hash
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# The REXMLReader is the 'default' parser, since we can at least be
|
128
|
+
# assured that REXML is probably there. It uses REXML's PullParser
|
129
|
+
# to handle larger document sizes without consuming insane amounts of
|
130
|
+
# memory, but it's still REXML (read: slow), so it's a good idea to
|
131
|
+
# use an alternative parser if available. If you don't know the best
|
132
|
+
# parser available, you can use the MagicReader or set:
|
133
|
+
#
|
134
|
+
# MARC::XMLReader.parser=MARC::XMLReader::USE_BEST_AVAILABLE
|
135
|
+
#
|
136
|
+
# or
|
137
|
+
#
|
138
|
+
# MARC::XMLReader.parser="magic"
|
139
|
+
#
|
140
|
+
# or
|
141
|
+
#
|
142
|
+
# reader = MARC::XMLReader.new(fh, :parser=>"magic")
|
143
|
+
# (or the constant)
|
144
|
+
#
|
145
|
+
# which will cascade down to REXML if nothing better is found.
|
146
|
+
#
|
147
|
+
module REXMLReader
|
148
|
+
def self.extended(receiver)
|
149
|
+
require 'rexml/document'
|
150
|
+
require 'rexml/parsers/pullparser'
|
151
|
+
receiver.init
|
152
|
+
end
|
153
|
+
|
154
|
+
# Sets our parser
|
155
|
+
def init
|
156
|
+
@parser = REXML::Parsers::PullParser.new(@handle)
|
157
|
+
end
|
158
|
+
|
159
|
+
# Loop through the MARC records in the XML document
|
160
|
+
def each
|
161
|
+
while @parser.has_next?
|
162
|
+
event = @parser.pull
|
163
|
+
# if it's the start of a record element
|
164
|
+
if event.start_element? and strip_ns(event[0]) == 'record'
|
165
|
+
yield build_record
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
private
|
171
|
+
def strip_ns(str)
|
172
|
+
return str.sub(/^.*:/, '')
|
173
|
+
end
|
174
|
+
|
175
|
+
# will accept parse events until a record has been built up
|
176
|
+
#
|
177
|
+
def build_record
|
178
|
+
record = MARC::Record.new
|
179
|
+
data_field = nil
|
180
|
+
control_field = nil
|
181
|
+
subfield = nil
|
182
|
+
text = ''
|
183
|
+
attrs = nil
|
184
|
+
if Module.constants.index('Nokogiri') and @parser.is_a?(Nokogiri::XML::Reader)
|
185
|
+
datafield = nil
|
186
|
+
cursor = nil
|
187
|
+
open_elements = []
|
188
|
+
@parser.each do | node |
|
189
|
+
if node.value? && cursor
|
190
|
+
if cursor.is_a?(Symbol) and cursor == :leader
|
191
|
+
record.leader = node.value
|
192
|
+
else
|
193
|
+
cursor.value = node.value
|
194
|
+
end
|
195
|
+
cursor = nil
|
196
|
+
end
|
197
|
+
next unless node.namespace_uri == @ns
|
198
|
+
if open_elements.index(node.local_name.downcase)
|
199
|
+
open_elements.delete(node.local_name.downcase)
|
200
|
+
next
|
201
|
+
else
|
202
|
+
open_elements << node.local_name.downcase
|
203
|
+
end
|
204
|
+
case node.local_name.downcase
|
205
|
+
when "leader"
|
206
|
+
cursor = :leader
|
207
|
+
when "controlfield"
|
208
|
+
record << datafield if datafield
|
209
|
+
datafield = nil
|
210
|
+
control_field = MARC::ControlField.new(node.attribute('tag'))
|
211
|
+
record << control_field
|
212
|
+
cursor = control_field
|
213
|
+
when "datafield"
|
214
|
+
record << datafield if datafield
|
215
|
+
datafield = nil
|
216
|
+
data_field = MARC::DataField.new(node.attribute('tag'), node.attribute('ind1'), node.attribute('ind2'))
|
217
|
+
datafield = data_field
|
218
|
+
when "subfield"
|
219
|
+
raise "No datafield to add to" unless datafield
|
220
|
+
subfield = MARC::Subfield.new(node.attribute('code'))
|
221
|
+
datafield.append(subfield)
|
222
|
+
cursor = subfield
|
223
|
+
when "record"
|
224
|
+
record << datafield if datafield
|
225
|
+
return record
|
226
|
+
end
|
227
|
+
#puts node.name
|
228
|
+
end
|
229
|
+
|
230
|
+
else
|
231
|
+
while @parser.has_next?
|
232
|
+
event = @parser.pull
|
233
|
+
|
234
|
+
if event.text?
|
235
|
+
text += REXML::Text::unnormalize(event[0])
|
236
|
+
next
|
237
|
+
end
|
238
|
+
|
239
|
+
if event.start_element?
|
240
|
+
text = ''
|
241
|
+
attrs = event[1]
|
242
|
+
case strip_ns(event[0])
|
243
|
+
when 'controlfield'
|
244
|
+
text = ''
|
245
|
+
control_field = MARC::ControlField.new(attrs['tag'])
|
246
|
+
when 'datafield'
|
247
|
+
text = ''
|
248
|
+
data_field = MARC::DataField.new(attrs['tag'], attrs['ind1'],
|
249
|
+
attrs['ind2'])
|
250
|
+
when 'subfield'
|
251
|
+
text = ''
|
252
|
+
subfield = MARC::Subfield.new(attrs['code'])
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
if event.end_element?
|
257
|
+
case strip_ns(event[0])
|
258
|
+
when 'leader'
|
259
|
+
record.leader = text
|
260
|
+
when 'record'
|
261
|
+
return record
|
262
|
+
when 'controlfield'
|
263
|
+
control_field.value = text
|
264
|
+
record.append(control_field)
|
265
|
+
when 'datafield'
|
266
|
+
record.append(data_field)
|
267
|
+
when 'subfield'
|
268
|
+
subfield.value = text
|
269
|
+
data_field.append(subfield)
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
# The JREXMLReader is really just here to set the load order for
|
278
|
+
# injecting the Java pull parser.
|
279
|
+
module JREXMLReader
|
280
|
+
|
281
|
+
def self.extended(receiver)
|
282
|
+
require 'rexml/document'
|
283
|
+
require 'rexml/parsers/pullparser'
|
284
|
+
require 'jrexml'
|
285
|
+
receiver.extend(REXMLReader)
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
data/lib/marc/xmlreader.rb
CHANGED
@@ -1,103 +1,142 @@
|
|
1
|
-
require '
|
2
|
-
require 'rexml/parsers/pullparser'
|
3
|
-
|
1
|
+
require File.dirname(__FILE__) + '/xml_parsers'
|
4
2
|
module MARC
|
5
|
-
|
3
|
+
|
4
|
+
# the constructor which you can pass either a filename:
|
5
|
+
#
|
6
|
+
# reader = MARC::XMLReader.new('/Users/edsu/marc.xml')
|
7
|
+
#
|
8
|
+
# or a File object,
|
9
|
+
#
|
10
|
+
# reader = Marc::XMLReader.new(File.new('/Users/edsu/marc.xml'))
|
11
|
+
#
|
12
|
+
# or really any object that responds to read(n)
|
13
|
+
#
|
14
|
+
# reader = MARC::XMLReader.new(StringIO.new(xml))
|
15
|
+
#
|
16
|
+
# By default, XMLReader uses REXML's pull parser, but you can swap
|
17
|
+
# that out with Nokogiri or jrexml (or let the system choose the
|
18
|
+
# 'best' one). The :parser can either be one of the defined constants
|
19
|
+
# or the constant's value.
|
20
|
+
#
|
21
|
+
# reader = MARC::XMLReader.new(fh, :parser=>'magic')
|
22
|
+
#
|
23
|
+
# It is also possible to set the default parser at the class level so
|
24
|
+
# all subsequent instances will use it instead:
|
25
|
+
#
|
26
|
+
# MARC::XMLReader.best_available
|
27
|
+
# "nokogiri" # returns parser name, but doesn't set it.
|
28
|
+
#
|
29
|
+
# Use:
|
30
|
+
# MARC::XMLReader.best_available!
|
31
|
+
#
|
32
|
+
# or
|
33
|
+
# MARC::XMLReader.nokogiri!
|
34
|
+
#
|
6
35
|
class XMLReader
|
7
36
|
include Enumerable
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
#
|
15
|
-
# reader = Marc::XMLReader.new(File.new('/Users/edsu/marc.xml'))
|
16
|
-
#
|
17
|
-
# or really any object that responds to read(n)
|
18
|
-
#
|
19
|
-
# reader = MARC::XMLReader.new(StringIO.new(xml))
|
37
|
+
USE_BEST_AVAILABLE = 'magic'
|
38
|
+
USE_REXML = 'rexml'
|
39
|
+
USE_NOKOGIRI = 'nokogiri'
|
40
|
+
USE_JREXML = 'jrexml'
|
41
|
+
@@parser = USE_REXML
|
42
|
+
attr_reader :parser
|
20
43
|
|
21
|
-
def initialize(file)
|
22
|
-
if file.
|
44
|
+
def initialize(file, options = {})
|
45
|
+
if file.is_a?(String)
|
23
46
|
handle = File.new(file)
|
24
47
|
elsif file.respond_to?("read", 5)
|
25
48
|
handle = file
|
26
49
|
else
|
27
50
|
throw "must pass in path or File"
|
28
51
|
end
|
52
|
+
@handle = handle
|
29
53
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
54
|
+
if options[:parser]
|
55
|
+
parser = self.class.choose_parser(options[:parser].to_s)
|
56
|
+
else
|
57
|
+
parser = @@parser
|
58
|
+
end
|
59
|
+
case parser
|
60
|
+
when 'magic' then extend MagicReader
|
61
|
+
when 'rexml' then extend REXMLReader
|
62
|
+
when 'jrexml' then extend JREXMLReader
|
63
|
+
when 'nokogiri' then extend NokogiriReader
|
40
64
|
end
|
41
65
|
end
|
42
66
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
67
|
+
# Returns the currently set parser type
|
68
|
+
def self.parser
|
69
|
+
return @@parser
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns an array of all the parsers available
|
73
|
+
def self.parsers
|
74
|
+
p = []
|
75
|
+
self.constants.each do | const |
|
76
|
+
next unless const.match("^USE_")
|
77
|
+
p << const
|
78
|
+
end
|
79
|
+
return p
|
80
|
+
end
|
81
|
+
|
82
|
+
# Sets the class parser
|
83
|
+
def self.parser=(p)
|
84
|
+
@@parser = choose_parser(p)
|
47
85
|
end
|
48
86
|
|
49
|
-
#
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
if event.text?
|
63
|
-
text += REXML::Text::unnormalize(event[0])
|
64
|
-
next
|
65
|
-
end
|
66
|
-
|
67
|
-
if event.start_element?
|
68
|
-
text = ''
|
69
|
-
attrs = event[1]
|
70
|
-
case strip_ns(event[0])
|
71
|
-
when 'controlfield'
|
72
|
-
text = ''
|
73
|
-
control_field = MARC::ControlField.new(attrs['tag'])
|
74
|
-
when 'datafield'
|
75
|
-
text = ''
|
76
|
-
data_field = MARC::DataField.new(attrs['tag'], attrs['ind1'],
|
77
|
-
attrs['ind2'])
|
78
|
-
when 'subfield'
|
79
|
-
text = ''
|
80
|
-
subfield = MARC::Subfield.new(attrs['code'])
|
87
|
+
# Returns the value of the best available parser
|
88
|
+
def self.best_available
|
89
|
+
parser = nil
|
90
|
+
begin
|
91
|
+
require 'nokogiri'
|
92
|
+
parser = USE_NOKOGIRI
|
93
|
+
rescue LoadError
|
94
|
+
if RUBY_PLATFORM =~ /java/
|
95
|
+
begin
|
96
|
+
require 'jrexml'
|
97
|
+
parser = USE_JREXML
|
98
|
+
rescue LoadError
|
99
|
+
parser = USE_REXML
|
81
100
|
end
|
101
|
+
else
|
102
|
+
parser = USE_REXML
|
82
103
|
end
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
104
|
+
parser
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Sets the best available parser as the default
|
109
|
+
def self.best_available!
|
110
|
+
@@parser = self.best_available
|
111
|
+
end
|
112
|
+
|
113
|
+
# Sets Nokogiri as the default parser
|
114
|
+
def self.nokogiri!
|
115
|
+
@@parser = USE_NOKOGIRI
|
116
|
+
end
|
117
|
+
|
118
|
+
# Sets jrexml as the default parser
|
119
|
+
def self.jrexml!
|
120
|
+
@@parser = USE_JREXML
|
121
|
+
end
|
122
|
+
|
123
|
+
# Sets REXML as the default parser
|
124
|
+
def self.rexml!
|
125
|
+
@@parser = USE_REXML
|
126
|
+
end
|
127
|
+
|
128
|
+
protected
|
129
|
+
|
130
|
+
def self.choose_parser(p)
|
131
|
+
match = false
|
132
|
+
self.constants.each do | const |
|
133
|
+
next unless const.to_s.match("^USE_")
|
134
|
+
if self.const_get(const) == p
|
135
|
+
match = true
|
136
|
+
return p
|
99
137
|
end
|
100
138
|
end
|
139
|
+
raise ArgumentError.new("Parser '#{p}' not defined") unless match
|
101
140
|
end
|
102
141
|
end
|
103
142
|
end
|
data/lib/marc/xmlwriter.rb
CHANGED
@@ -137,7 +137,7 @@ module MARC
|
|
137
137
|
control_element = REXML::Element.new("controlfield")
|
138
138
|
|
139
139
|
# We need a marker for invalid tag values (we use 000)
|
140
|
-
unless field.tag.match(ctrlFieldTag)
|
140
|
+
unless field.tag.match(ctrlFieldTag) or MARC::Field.control_tag?(ctrlFieldTag)
|
141
141
|
field.tag = "00z"
|
142
142
|
end
|
143
143
|
|
data/test/tc_controlfield.rb
CHANGED
@@ -15,6 +15,31 @@ class TestField < Test::Unit::TestCase
|
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
+
def test_alpha_control_field
|
19
|
+
assert_raise(MARC::Exception) do
|
20
|
+
# can't have a field with a tag < 010
|
21
|
+
field = MARC::ControlField.new('DDD')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_extra_control_field
|
26
|
+
MARC::ControlField.control_tags << 'FMT'
|
27
|
+
assert_nothing_raised do
|
28
|
+
field = MARC::ControlField.new('FMT')
|
29
|
+
end
|
30
|
+
assert_raise(MARC::Exception) do
|
31
|
+
field = MARC::DataField.new('FMT')
|
32
|
+
end
|
33
|
+
MARC::ControlField.control_tags.delete('FMT')
|
34
|
+
assert_nothing_raised do
|
35
|
+
field = MARC::DataField.new('FMT')
|
36
|
+
end
|
37
|
+
assert_raise(MARC::Exception) do
|
38
|
+
field = MARC::ControlField.new('FMT')
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
18
43
|
def test_control_as_field
|
19
44
|
assert_raise(MARC::Exception) do
|
20
45
|
# can't have a control with a tag > 009
|
data/test/tc_marchash.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'marc'
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
class TestMARCHASH < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_simple
|
8
|
+
simple = {
|
9
|
+
'type' => 'marc-hash',
|
10
|
+
'version' => [1,0],
|
11
|
+
'leader' => 'LEADER',
|
12
|
+
'fields' => [
|
13
|
+
['245', '1', '0',
|
14
|
+
[
|
15
|
+
['a', 'TITLE'],
|
16
|
+
['b', 'SUBTITLE']
|
17
|
+
]
|
18
|
+
]
|
19
|
+
]
|
20
|
+
}
|
21
|
+
r = MARC::Record.new()
|
22
|
+
r.leader = 'LEADER'
|
23
|
+
f = MARC::DataField.new('245', '1', '0', ['a', 'TITLE'], ['b', 'SUBTITLE'])
|
24
|
+
r << f
|
25
|
+
assert_equal(r.to_marchash, simple)
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_real
|
29
|
+
reader = MARC::Reader.new('test/batch.dat')
|
30
|
+
reader.each do |r|
|
31
|
+
x = MARC::Record.new_from_marchash(r.to_marchash)
|
32
|
+
assert_equal(r,x)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
end
|
data/test/tc_parsers.rb
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'marc'
|
3
|
+
|
4
|
+
class ParsersTest < Test::Unit::TestCase
|
5
|
+
def test_parser_default
|
6
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
7
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
8
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_set_nokogiri
|
12
|
+
begin
|
13
|
+
require 'nokogiri'
|
14
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
15
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
16
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
17
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_NOKOGIRI)
|
18
|
+
assert_kind_of(Nokogiri::XML::SAX::Parser, reader.parser)
|
19
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
20
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>'nokogiri')
|
21
|
+
assert_kind_of(Nokogiri::XML::SAX::Parser, reader.parser)
|
22
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
23
|
+
MARC::XMLReader.parser=MARC::XMLReader::USE_NOKOGIRI
|
24
|
+
assert_equal("nokogiri", MARC::XMLReader.parser)
|
25
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
26
|
+
assert_kind_of(Nokogiri::XML::SAX::Parser, reader.parser)
|
27
|
+
MARC::XMLReader.parser="nokogiri"
|
28
|
+
assert_equal("nokogiri", MARC::XMLReader.parser)
|
29
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
30
|
+
assert_kind_of(Nokogiri::XML::SAX::Parser, reader.parser)
|
31
|
+
rescue LoadError
|
32
|
+
puts "\nNokogiri not available, skipping 'test_set_nokogiri'.\n"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_set_jrexml
|
37
|
+
if RUBY_PLATFORM =~ /java/
|
38
|
+
begin
|
39
|
+
require 'jrexml'
|
40
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_JREXML)
|
41
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
42
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
43
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>'jrexml')
|
44
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
45
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
46
|
+
MARC::XMLReader.parser=MARC::XMLReader::USE_JREXML
|
47
|
+
assert_equal("jrexml", MARC::XMLReader.parser)
|
48
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
49
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
50
|
+
MARC::XMLReader.parser="jrexml"
|
51
|
+
assert_equal("jrexml", MARC::XMLReader.parser)
|
52
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
53
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
54
|
+
rescue LoadError
|
55
|
+
puts "\njrexml not available, skipping 'test_set_jrexml'.\n"
|
56
|
+
end
|
57
|
+
else
|
58
|
+
puts "\nTest not being run from JRuby, skipping 'test_set_jrexml'.\n"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_set_rexml
|
63
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_REXML)
|
64
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
65
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
66
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>'rexml')
|
67
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
68
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
69
|
+
MARC::XMLReader.parser=MARC::XMLReader::USE_REXML
|
70
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
71
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
72
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
73
|
+
MARC::XMLReader.parser="rexml"
|
74
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
75
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
76
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_set_magic
|
80
|
+
magic_parser = nil
|
81
|
+
begin
|
82
|
+
require 'nokogiri'
|
83
|
+
magic_parser = Nokogiri::XML::SAX::Parser
|
84
|
+
rescue LoadError
|
85
|
+
magic_parser = REXML::Parsers::PullParser
|
86
|
+
end
|
87
|
+
puts "\nTesting 'test_set_magic' for parser: #{magic_parser}"
|
88
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>MARC::XMLReader::USE_BEST_AVAILABLE)
|
89
|
+
assert_kind_of(magic_parser, reader.parser)
|
90
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
91
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>'magic')
|
92
|
+
assert_kind_of(magic_parser, reader.parser)
|
93
|
+
assert_equal("rexml", MARC::XMLReader.parser)
|
94
|
+
MARC::XMLReader.parser=MARC::XMLReader::USE_BEST_AVAILABLE
|
95
|
+
assert_equal("magic", MARC::XMLReader.parser)
|
96
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
97
|
+
assert_kind_of(magic_parser, reader.parser)
|
98
|
+
MARC::XMLReader.parser="magic"
|
99
|
+
assert_equal("magic", MARC::XMLReader.parser)
|
100
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
101
|
+
assert_kind_of(magic_parser, reader.parser)
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_parser_set_convenience_methods
|
105
|
+
parser_name = nil
|
106
|
+
parser = nil
|
107
|
+
begin
|
108
|
+
require 'nokogiri'
|
109
|
+
parser_name = 'nokogiri'
|
110
|
+
parser = Nokogiri::XML::SAX::Parser
|
111
|
+
rescue LoadError
|
112
|
+
parser = REXML::Parsers::PullParser
|
113
|
+
parser = 'rexml'
|
114
|
+
if RUBY_PLATFORM =~ /java/
|
115
|
+
begin
|
116
|
+
require 'jrexml'
|
117
|
+
parser_name = 'jrexml'
|
118
|
+
rescue LoadError
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
assert_equal(parser_name, MARC::XMLReader.best_available)
|
123
|
+
MARC::XMLReader.best_available!
|
124
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
125
|
+
assert_kind_of(parser, reader.parser)
|
126
|
+
MARC::XMLReader.rexml!
|
127
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
128
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
129
|
+
if parser_name == 'nokogiri'
|
130
|
+
MARC::XMLReader.nokogiri!
|
131
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
132
|
+
assert_kind_of(Nokogiri::XML::SAX::Parser, reader.parser)
|
133
|
+
else
|
134
|
+
puts "\nNokogiri not loaded, skipping convenience method test.\n"
|
135
|
+
end
|
136
|
+
if RUBY_PLATFORM =~ /java/
|
137
|
+
begin
|
138
|
+
require 'jrexml'
|
139
|
+
MARC::XMLReader.jrexml!
|
140
|
+
reader = MARC::XMLReader.new('test/one.xml')
|
141
|
+
assert_kind_of(REXML::Parsers::PullParser, reader.parser)
|
142
|
+
rescue LoadError
|
143
|
+
puts "\njrexml not available, skipping convenience method test.\n"
|
144
|
+
end
|
145
|
+
else
|
146
|
+
puts "\nTest not being run from JRuby, skipping jrexml convenience method test.\n"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def teardown
|
151
|
+
MARC::XMLReader.parser=MARC::XMLReader::USE_REXML
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
data/test/tc_record.rb
CHANGED
@@ -12,7 +12,12 @@ class TestRecord < Test::Unit::TestCase
|
|
12
12
|
r = get_record()
|
13
13
|
doc = r.to_xml
|
14
14
|
assert_kind_of REXML::Element, doc
|
15
|
-
|
15
|
+
if RUBY_VERSION < '1.9.0'
|
16
|
+
assert_equal "<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s
|
17
|
+
else
|
18
|
+
# REXML inexplicably sorts the attributes alphabetically in Ruby 1.9
|
19
|
+
assert_equal "<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield ind1='2' ind2='0' tag='100'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield ind1='0' ind2='4' tag='245'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s
|
20
|
+
end
|
16
21
|
end
|
17
22
|
|
18
23
|
def test_append_field
|
data/test/tc_xml.rb
CHANGED
@@ -3,20 +3,50 @@ require 'marc'
|
|
3
3
|
require 'stringio'
|
4
4
|
|
5
5
|
class XMLTest < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
@parsers = [:rexml]
|
8
|
+
begin
|
9
|
+
require 'nokogiri'
|
10
|
+
@parsers << :nokogiri
|
11
|
+
rescue LoadError
|
12
|
+
end
|
13
|
+
if RUBY_PLATFORM =~ /java/
|
14
|
+
begin
|
15
|
+
require 'jrexml'
|
16
|
+
@parsers << :jrexml
|
17
|
+
rescue LoadError
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
6
21
|
|
7
|
-
|
22
|
+
|
23
|
+
def test_xml_entities
|
24
|
+
@parsers.each do | parser |
|
25
|
+
puts "\nRunning test_xml_entities with: #{parser}.\n"
|
26
|
+
xml_entities_test(parser)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def xml_entities_test(parser)
|
8
31
|
r1 = MARC::Record.new
|
9
32
|
r1 << MARC::DataField.new('245', '0', '0', ['a', 'foo & bar & baz'])
|
10
33
|
xml = r1.to_xml.to_s
|
11
34
|
assert_match /foo & bar & baz/, xml
|
12
35
|
|
13
|
-
reader = MARC::XMLReader.new(StringIO.new(xml))
|
36
|
+
reader = MARC::XMLReader.new(StringIO.new(xml), :parser=>parser)
|
14
37
|
r2 = reader.entries[0]
|
15
|
-
assert_equal 'foo & bar & baz', r2['245']['a']
|
38
|
+
assert_equal 'foo & bar & baz', r2['245']['a']
|
16
39
|
end
|
17
|
-
|
40
|
+
|
18
41
|
def test_batch
|
19
|
-
|
42
|
+
@parsers.each do | parser |
|
43
|
+
puts "\nRunning test_batch with: #{parser}.\n"
|
44
|
+
batch_test(parser)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def batch_test(parser)
|
49
|
+
reader = MARC::XMLReader.new('test/batch.xml', :parser=>parser)
|
20
50
|
count = 0
|
21
51
|
for record in reader
|
22
52
|
count += 1
|
@@ -24,15 +54,29 @@ class XMLTest < Test::Unit::TestCase
|
|
24
54
|
end
|
25
55
|
assert_equal(count, 2)
|
26
56
|
end
|
27
|
-
|
57
|
+
|
28
58
|
def test_read_string
|
59
|
+
@parsers.each do | parser |
|
60
|
+
puts "\nRunning test_read_string with: #{parser}.\n"
|
61
|
+
read_string_test(parser)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def read_string_test(parser)
|
29
66
|
xml = File.new('test/batch.xml').read
|
30
|
-
reader = MARC::XMLReader.new(StringIO.new(xml))
|
67
|
+
reader = MARC::XMLReader.new(StringIO.new(xml), :parser=>parser)
|
31
68
|
assert_equal 2, reader.entries.length
|
32
69
|
end
|
33
70
|
|
34
71
|
def test_non_numeric_fields
|
35
|
-
|
72
|
+
@parsers.each do | parser |
|
73
|
+
puts "\nRunning test_non_numeric_fields with: #{parser}.\n"
|
74
|
+
non_numeric_fields_test(parser)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def non_numeric_fields_test(parser)
|
79
|
+
reader = MARC::XMLReader.new('test/non-numeric.xml', :parser=>parser)
|
36
80
|
count = 0
|
37
81
|
record = nil
|
38
82
|
reader.each do | rec |
|
@@ -45,21 +89,42 @@ class XMLTest < Test::Unit::TestCase
|
|
45
89
|
end
|
46
90
|
|
47
91
|
def test_read_no_leading_zero_write_leading_zero
|
48
|
-
|
92
|
+
@parsers.each do | parser |
|
93
|
+
puts "\nRunning test_read_no_leading_zero_write_leading_zero with: #{parser}.\n"
|
94
|
+
read_no_leading_zero_write_leading_zero_test(parser)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def read_no_leading_zero_write_leading_zero_test(parser)
|
99
|
+
reader = MARC::XMLReader.new('test/no-leading-zero.xml', :parser=>parser)
|
49
100
|
record = reader.to_a[0]
|
50
101
|
assert_equal("042 zz $a dc ", record['042'].to_s)
|
51
102
|
end
|
52
103
|
|
53
104
|
def test_leader_from_xml
|
54
|
-
|
105
|
+
@parsers.each do | parser |
|
106
|
+
puts "\nRunning test_leader_from_xml with: #{parser}.\n"
|
107
|
+
leader_from_xml_test(parser)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def leader_from_xml_test(parser)
|
112
|
+
reader = MARC::XMLReader.new('test/one.xml', :parser=>parser)
|
55
113
|
record = reader.entries[0]
|
56
114
|
assert_equal ' njm a22 uu 4500', record.leader
|
57
115
|
# serializing as MARC should populate the record length and directory offset
|
58
116
|
record = MARC::Record.new_from_marc(record.to_marc)
|
59
117
|
assert_equal '00734njm a2200217uu 4500', record.leader
|
60
118
|
end
|
61
|
-
|
119
|
+
|
62
120
|
def test_read_write
|
121
|
+
@parsers.each do | parser |
|
122
|
+
puts "\nRunning test_read_write with: #{parser}.\n"
|
123
|
+
read_write_test(parser)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def read_write_test(parser)
|
63
128
|
record1 = MARC::Record.new
|
64
129
|
record1.leader = '00925njm 22002777a 4500'
|
65
130
|
record1.append MARC::ControlField.new('007', 'sdubumennmplu')
|
@@ -74,7 +139,7 @@ class XMLTest < Test::Unit::TestCase
|
|
74
139
|
assert_match /<controlfield tag='007'>sdubumennmplu<\/controlfield>/, xml
|
75
140
|
assert_match /<\?xml-stylesheet type="text\/xsl" href="style.xsl"\?>/, xml
|
76
141
|
|
77
|
-
reader = MARC::XMLReader.new('test/test.xml')
|
142
|
+
reader = MARC::XMLReader.new('test/test.xml', :parser=>parser)
|
78
143
|
record2 = reader.entries[0]
|
79
144
|
assert_equal(record1, record2)
|
80
145
|
|
metadata
CHANGED
@@ -1,15 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
+
- Kevin Clarke
|
8
|
+
- William Groppe
|
9
|
+
- Ross Singer
|
7
10
|
- Ed Summers
|
8
11
|
autorequire: marc
|
9
12
|
bindir: bin
|
10
13
|
cert_chain: []
|
11
14
|
|
12
|
-
date:
|
15
|
+
date: 2009-09-23 00:00:00 -04:00
|
13
16
|
default_executable:
|
14
17
|
dependencies: []
|
15
18
|
|
@@ -22,41 +25,45 @@ extensions: []
|
|
22
25
|
extra_rdoc_files: []
|
23
26
|
|
24
27
|
files:
|
25
|
-
- lib/marc.rb
|
26
|
-
- lib/marc
|
27
|
-
- lib/marc/xmlwriter.rb
|
28
|
+
- lib/marc/constants.rb
|
28
29
|
- lib/marc/controlfield.rb
|
29
|
-
- lib/marc/
|
30
|
+
- lib/marc/datafield.rb
|
31
|
+
- lib/marc/dublincore.rb
|
32
|
+
- lib/marc/exception.rb
|
30
33
|
- lib/marc/reader.rb
|
31
34
|
- lib/marc/record.rb
|
32
|
-
- lib/marc/exception.rb
|
33
|
-
- lib/marc/datafield.rb
|
34
35
|
- lib/marc/subfield.rb
|
35
|
-
- lib/marc/constants.rb
|
36
|
-
- lib/marc/dublincore.rb
|
37
36
|
- lib/marc/writer.rb
|
37
|
+
- lib/marc/xml_parsers.rb
|
38
|
+
- lib/marc/xmlreader.rb
|
39
|
+
- lib/marc/xmlwriter.rb
|
40
|
+
- lib/marc.rb
|
41
|
+
- test/batch.dat
|
42
|
+
- test/batch.xml
|
43
|
+
- test/no-leading-zero.xml
|
38
44
|
- test/non-numeric.dat
|
39
|
-
- test/tc_dublincore.rb
|
40
|
-
- test/tc_datafield.rb
|
41
45
|
- test/non-numeric.xml
|
42
|
-
- test/no-leading-zero.xml
|
43
|
-
- test/ts_marc.rb
|
44
|
-
- test/tc_writer.rb
|
45
|
-
- test/batch.xml
|
46
|
-
- test/tc_xml.rb
|
47
46
|
- test/one.dat
|
48
|
-
- test/tc_record.rb
|
49
47
|
- test/one.xml
|
50
|
-
- test/batch.dat
|
51
48
|
- test/tc_controlfield.rb
|
49
|
+
- test/tc_datafield.rb
|
50
|
+
- test/tc_dublincore.rb
|
51
|
+
- test/tc_marchash.rb
|
52
|
+
- test/tc_parsers.rb
|
52
53
|
- test/tc_reader.rb
|
54
|
+
- test/tc_record.rb
|
53
55
|
- test/tc_subfield.rb
|
56
|
+
- test/tc_writer.rb
|
57
|
+
- test/tc_xml.rb
|
58
|
+
- test/ts_marc.rb
|
54
59
|
- Rakefile
|
55
60
|
- README
|
56
61
|
- Changes
|
57
62
|
- LICENSE
|
58
63
|
has_rdoc: true
|
59
|
-
homepage: http://
|
64
|
+
homepage: http://marc.rubyforge.org/
|
65
|
+
licenses: []
|
66
|
+
|
60
67
|
post_install_message:
|
61
68
|
rdoc_options: []
|
62
69
|
|
@@ -77,9 +84,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
84
|
requirements: []
|
78
85
|
|
79
86
|
rubyforge_project:
|
80
|
-
rubygems_version: 1.3.
|
87
|
+
rubygems_version: 1.3.5
|
81
88
|
signing_key:
|
82
|
-
specification_version:
|
89
|
+
specification_version: 3
|
83
90
|
summary: A ruby library for working with Machine Readable Cataloging
|
84
91
|
test_files:
|
85
92
|
- test/ts_marc.rb
|