marc 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/marc/datafield.rb +4 -3
- data/lib/marc/reader.rb +8 -2
- data/lib/marc/record.rb +14 -5
- data/lib/marc/subfield.rb +4 -4
- data/lib/marc/xmlreader.rb +1 -1
- data/lib/marc/xmlwriter.rb +4 -17
- data/test/tc_reader.rb +36 -31
- data/test/tc_xml.rb +11 -0
- metadata +2 -2
data/lib/marc/datafield.rb
CHANGED
@@ -6,12 +6,13 @@ module MARC
|
|
6
6
|
# MARC records contain data fields, each of which has a tag,
|
7
7
|
# indicators and subfields. Tags for data fields must be in
|
8
8
|
# the range 010-999.
|
9
|
-
# Accessor attributes: tag
|
9
|
+
# Accessor attributes: tag, indicator1, indicator2
|
10
10
|
#
|
11
|
-
# DataField
|
11
|
+
# DataField mixes in Enumerable to enable access to it's constituent
|
12
12
|
# Subfield objects. For instance, if you have a DataField representing
|
13
13
|
# a 856 tag, and want to find all 'z' subfields:
|
14
|
-
#
|
14
|
+
#
|
15
|
+
# subfield_z = field.find_all {|subfield| subfield.code == 'z'}
|
15
16
|
#
|
16
17
|
# Also, the accessor 'subfields' is an array of MARC::Subfield objects
|
17
18
|
# which can be accessed or modified by the client directly if
|
data/lib/marc/reader.rb
CHANGED
@@ -37,11 +37,17 @@ module MARC
|
|
37
37
|
|
38
38
|
def each
|
39
39
|
# while there is data left in the file
|
40
|
-
while
|
40
|
+
while rec_length_s = @handle.read(5)
|
41
|
+
# make sure the record length looks like an integer
|
42
|
+
rec_length_i = rec_length_s.to_i
|
43
|
+
if rec_length_i == 0:
|
44
|
+
raise MARC::Exception.new("invalid record length: #{rec_length_s}")
|
45
|
+
end
|
41
46
|
|
42
47
|
# get the raw MARC21 for a record back from the file
|
43
48
|
# using the record length
|
44
|
-
raw =
|
49
|
+
raw = rec_length_s + @handle.read(rec_length_i-5)
|
50
|
+
|
45
51
|
|
46
52
|
# create a record from the data and return it
|
47
53
|
#record = MARC::Record.new_from_marc(raw)
|
data/lib/marc/record.rb
CHANGED
@@ -3,12 +3,15 @@ module MARC
|
|
3
3
|
# A class that represents an individual MARC record. Every record
|
4
4
|
# is made up of a collection of MARC::DataField objects.
|
5
5
|
#
|
6
|
-
# MARC::Record
|
7
|
-
# DataFields.
|
8
|
-
#
|
6
|
+
# MARC::Record mixes in Enumerable to enable access to constituent
|
7
|
+
# DataFields. For example, to return a list of all subject DataFields:
|
8
|
+
#
|
9
|
+
# record.find_all {|field| field.tag =~ /^6../}
|
9
10
|
#
|
10
11
|
# The accessor 'fields' is also an Array of MARC::DataField objects which
|
11
12
|
# the client can access or modifyi if neccesary.
|
13
|
+
#
|
14
|
+
# record.fields.delete(field)
|
12
15
|
#
|
13
16
|
# Other accessor attribute: 'leader' for record leader as String
|
14
17
|
|
@@ -16,10 +19,10 @@ module MARC
|
|
16
19
|
include Enumerable
|
17
20
|
|
18
21
|
# the record fields
|
19
|
-
attr_accessor :fields
|
22
|
+
attr_accessor :fields
|
20
23
|
|
21
24
|
# the record leader
|
22
|
-
:leader
|
25
|
+
attr_accessor :leader
|
23
26
|
|
24
27
|
def initialize
|
25
28
|
@fields = []
|
@@ -38,6 +41,12 @@ module MARC
|
|
38
41
|
@fields.push(field)
|
39
42
|
end
|
40
43
|
|
44
|
+
# alias to append
|
45
|
+
|
46
|
+
def <<(field)
|
47
|
+
append(field)
|
48
|
+
end
|
49
|
+
|
41
50
|
# each() is here to support iterating and searching since MARC::Record
|
42
51
|
# mixes in Enumberable
|
43
52
|
#
|
data/lib/marc/subfield.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
# A class
|
4
|
-
# Accessor attributes: code (letter subfield code)
|
5
|
-
# Both can be empty string, but should
|
6
|
-
|
3
|
+
# A class that represents an individual subfield within a DataField.
|
4
|
+
# Accessor attributes include: code (letter subfield code) and value
|
5
|
+
# (the content of the subfield). Both can be empty string, but should
|
6
|
+
# not be set to nil.
|
7
7
|
|
8
8
|
class Subfield
|
9
9
|
attr_accessor :code, :value
|
data/lib/marc/xmlreader.rb
CHANGED
data/lib/marc/xmlwriter.rb
CHANGED
@@ -4,6 +4,8 @@ require 'rexml/text'
|
|
4
4
|
module MARC
|
5
5
|
|
6
6
|
# A class for writing MARC records as MARCXML.
|
7
|
+
# BIG CAVEAT! XMLWriter will *not* convert your MARC8 to UTF8
|
8
|
+
# bug the authors to do this if you need it
|
7
9
|
|
8
10
|
class XMLWriter
|
9
11
|
|
@@ -51,21 +53,6 @@ module MARC
|
|
51
53
|
@fh.close
|
52
54
|
end
|
53
55
|
|
54
|
-
|
55
|
-
# Converts from ISO 8859-1 to UTF-8, normalizes the UTF-8, and puts a
|
56
|
-
# 'clean up marker' in records that have control characters (which are
|
57
|
-
# not valid in XML). This is useful for locating these records once
|
58
|
-
# they are in XML so problems caused by removing the invalid characters
|
59
|
-
# can be fixed by a person. This (or something in the module) needs to
|
60
|
-
# convert from MARC-8 to UTF-8, but it doesn't do this yet...
|
61
|
-
|
62
|
-
def self.convert_to_utf8(text)
|
63
|
-
cleaned_text = text.gsub(/[\x00-\x1f\x7f-\xff]+/, ' CLEAN_ME_UP ')
|
64
|
-
utf8_text = cleaned_text.unpack('C*').pack('U*')
|
65
|
-
normalized_text = REXML::Text::normalize(utf8_text)
|
66
|
-
|
67
|
-
return normalized_text
|
68
|
-
end
|
69
56
|
|
70
57
|
# a static method that accepts a MARC::Record object
|
71
58
|
# and returns a REXML::Document for the XML serialization.
|
@@ -138,7 +125,7 @@ module MARC
|
|
138
125
|
end
|
139
126
|
|
140
127
|
subfield_element.add_attribute("code", subfield.code)
|
141
|
-
text =
|
128
|
+
text = subfield.value
|
142
129
|
subfield_element.add_text(text)
|
143
130
|
datafield_elem.add_element(subfield_element)
|
144
131
|
end
|
@@ -153,7 +140,7 @@ module MARC
|
|
153
140
|
end
|
154
141
|
|
155
142
|
control_element.add_attribute("tag", field.tag)
|
156
|
-
text =
|
143
|
+
text = field.value
|
157
144
|
control_element.add_text(text)
|
158
145
|
e.add_element(control_element)
|
159
146
|
end
|
data/test/tc_reader.rb
CHANGED
@@ -3,36 +3,41 @@ require 'marc'
|
|
3
3
|
|
4
4
|
class ReaderTest < Test::Unit::TestCase
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
6
|
+
def test_batch
|
7
|
+
reader = MARC::Reader.new('test/batch.dat')
|
8
|
+
count = 0
|
9
|
+
reader.each { count += 1 }
|
10
|
+
assert_equal(count, 10)
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_loose
|
14
|
+
reader = MARC::ForgivingReader.new('test/batch.dat')
|
15
|
+
count = 0
|
16
|
+
reader.each { count += 1 }
|
17
|
+
assert_equal(10, count)
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_bad_marc
|
21
|
+
reader = MARC::Reader.new('test/tc_reader.rb')
|
22
|
+
assert_raises(MARC::Exception) {reader.entries[0]}
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_search
|
26
|
+
reader = MARC::Reader.new('test/batch.dat')
|
27
|
+
records = reader.find_all { |r| r =~ /Perl/ }
|
28
|
+
assert_equal(10, records.length)
|
29
|
+
|
30
|
+
reader = MARC::Reader.new('test/batch.dat')
|
31
|
+
records = reader.find_all { |r| r['245'] =~ /Perl/ }
|
32
|
+
assert_equal(10, records.length)
|
33
|
+
|
34
|
+
reader = MARC::Reader.new('test/batch.dat')
|
35
|
+
records = reader.find_all { |r| r['245']['a'] =~ /Perl/ }
|
36
|
+
assert_equal(10, records.length)
|
37
|
+
|
38
|
+
reader = MARC::Reader.new('test/batch.dat')
|
39
|
+
records = reader.find_all { |r| r =~ /Foo/ }
|
40
|
+
assert_equal(0, records.length)
|
41
|
+
end
|
37
42
|
|
38
43
|
end
|
data/test/tc_xml.rb
CHANGED
@@ -4,6 +4,17 @@ require 'stringio'
|
|
4
4
|
|
5
5
|
class XMLTest < Test::Unit::TestCase
|
6
6
|
|
7
|
+
def test_xml_entities
|
8
|
+
r1 = MARC::Record.new
|
9
|
+
r1 << MARC::DataField.new('245', '0', '0', ['a', 'foo & bar'])
|
10
|
+
xml = r1.to_xml.to_s
|
11
|
+
assert_match /foo & bar/, xml
|
12
|
+
|
13
|
+
reader = MARC::XMLReader.new(StringIO.new(xml))
|
14
|
+
r2 = reader.entries[0]
|
15
|
+
assert_equal 'foo & bar', r2['245']['a']
|
16
|
+
end
|
17
|
+
|
7
18
|
def test_batch
|
8
19
|
reader = MARC::XMLReader.new('test/batch.xml')
|
9
20
|
count = 0
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
|
|
3
3
|
specification_version: 1
|
4
4
|
name: marc
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2007-05-
|
6
|
+
version: 0.1.6
|
7
|
+
date: 2007-05-04 00:00:00 -04:00
|
8
8
|
summary: A ruby library for working with Machine Readable Cataloging
|
9
9
|
require_paths:
|
10
10
|
- lib
|