marc 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/marc/datafield.rb +4 -3
- data/lib/marc/reader.rb +8 -2
- data/lib/marc/record.rb +14 -5
- data/lib/marc/subfield.rb +4 -4
- data/lib/marc/xmlreader.rb +1 -1
- data/lib/marc/xmlwriter.rb +4 -17
- data/test/tc_reader.rb +36 -31
- data/test/tc_xml.rb +11 -0
- metadata +2 -2
data/lib/marc/datafield.rb
CHANGED
@@ -6,12 +6,13 @@ module MARC
|
|
6
6
|
# MARC records contain data fields, each of which has a tag,
|
7
7
|
# indicators and subfields. Tags for data fields must be in
|
8
8
|
# the range 010-999.
|
9
|
-
# Accessor attributes: tag
|
9
|
+
# Accessor attributes: tag, indicator1, indicator2
|
10
10
|
#
|
11
|
-
# DataField
|
11
|
+
# DataField mixes in Enumerable to enable access to it's constituent
|
12
12
|
# Subfield objects. For instance, if you have a DataField representing
|
13
13
|
# a 856 tag, and want to find all 'z' subfields:
|
14
|
-
#
|
14
|
+
#
|
15
|
+
# subfield_z = field.find_all {|subfield| subfield.code == 'z'}
|
15
16
|
#
|
16
17
|
# Also, the accessor 'subfields' is an array of MARC::Subfield objects
|
17
18
|
# which can be accessed or modified by the client directly if
|
data/lib/marc/reader.rb
CHANGED
@@ -37,11 +37,17 @@ module MARC
|
|
37
37
|
|
38
38
|
def each
|
39
39
|
# while there is data left in the file
|
40
|
-
while
|
40
|
+
while rec_length_s = @handle.read(5)
|
41
|
+
# make sure the record length looks like an integer
|
42
|
+
rec_length_i = rec_length_s.to_i
|
43
|
+
if rec_length_i == 0:
|
44
|
+
raise MARC::Exception.new("invalid record length: #{rec_length_s}")
|
45
|
+
end
|
41
46
|
|
42
47
|
# get the raw MARC21 for a record back from the file
|
43
48
|
# using the record length
|
44
|
-
raw =
|
49
|
+
raw = rec_length_s + @handle.read(rec_length_i-5)
|
50
|
+
|
45
51
|
|
46
52
|
# create a record from the data and return it
|
47
53
|
#record = MARC::Record.new_from_marc(raw)
|
data/lib/marc/record.rb
CHANGED
@@ -3,12 +3,15 @@ module MARC
|
|
3
3
|
# A class that represents an individual MARC record. Every record
|
4
4
|
# is made up of a collection of MARC::DataField objects.
|
5
5
|
#
|
6
|
-
# MARC::Record
|
7
|
-
# DataFields.
|
8
|
-
#
|
6
|
+
# MARC::Record mixes in Enumerable to enable access to constituent
|
7
|
+
# DataFields. For example, to return a list of all subject DataFields:
|
8
|
+
#
|
9
|
+
# record.find_all {|field| field.tag =~ /^6../}
|
9
10
|
#
|
10
11
|
# The accessor 'fields' is also an Array of MARC::DataField objects which
|
11
12
|
# the client can access or modifyi if neccesary.
|
13
|
+
#
|
14
|
+
# record.fields.delete(field)
|
12
15
|
#
|
13
16
|
# Other accessor attribute: 'leader' for record leader as String
|
14
17
|
|
@@ -16,10 +19,10 @@ module MARC
|
|
16
19
|
include Enumerable
|
17
20
|
|
18
21
|
# the record fields
|
19
|
-
attr_accessor :fields
|
22
|
+
attr_accessor :fields
|
20
23
|
|
21
24
|
# the record leader
|
22
|
-
:leader
|
25
|
+
attr_accessor :leader
|
23
26
|
|
24
27
|
def initialize
|
25
28
|
@fields = []
|
@@ -38,6 +41,12 @@ module MARC
|
|
38
41
|
@fields.push(field)
|
39
42
|
end
|
40
43
|
|
44
|
+
# alias to append
|
45
|
+
|
46
|
+
def <<(field)
|
47
|
+
append(field)
|
48
|
+
end
|
49
|
+
|
41
50
|
# each() is here to support iterating and searching since MARC::Record
|
42
51
|
# mixes in Enumberable
|
43
52
|
#
|
data/lib/marc/subfield.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
# A class
|
4
|
-
# Accessor attributes: code (letter subfield code)
|
5
|
-
# Both can be empty string, but should
|
6
|
-
|
3
|
+
# A class that represents an individual subfield within a DataField.
|
4
|
+
# Accessor attributes include: code (letter subfield code) and value
|
5
|
+
# (the content of the subfield). Both can be empty string, but should
|
6
|
+
# not be set to nil.
|
7
7
|
|
8
8
|
class Subfield
|
9
9
|
attr_accessor :code, :value
|
data/lib/marc/xmlreader.rb
CHANGED
data/lib/marc/xmlwriter.rb
CHANGED
@@ -4,6 +4,8 @@ require 'rexml/text'
|
|
4
4
|
module MARC
|
5
5
|
|
6
6
|
# A class for writing MARC records as MARCXML.
|
7
|
+
# BIG CAVEAT! XMLWriter will *not* convert your MARC8 to UTF8
|
8
|
+
# bug the authors to do this if you need it
|
7
9
|
|
8
10
|
class XMLWriter
|
9
11
|
|
@@ -51,21 +53,6 @@ module MARC
|
|
51
53
|
@fh.close
|
52
54
|
end
|
53
55
|
|
54
|
-
|
55
|
-
# Converts from ISO 8859-1 to UTF-8, normalizes the UTF-8, and puts a
|
56
|
-
# 'clean up marker' in records that have control characters (which are
|
57
|
-
# not valid in XML). This is useful for locating these records once
|
58
|
-
# they are in XML so problems caused by removing the invalid characters
|
59
|
-
# can be fixed by a person. This (or something in the module) needs to
|
60
|
-
# convert from MARC-8 to UTF-8, but it doesn't do this yet...
|
61
|
-
|
62
|
-
def self.convert_to_utf8(text)
|
63
|
-
cleaned_text = text.gsub(/[\x00-\x1f\x7f-\xff]+/, ' CLEAN_ME_UP ')
|
64
|
-
utf8_text = cleaned_text.unpack('C*').pack('U*')
|
65
|
-
normalized_text = REXML::Text::normalize(utf8_text)
|
66
|
-
|
67
|
-
return normalized_text
|
68
|
-
end
|
69
56
|
|
70
57
|
# a static method that accepts a MARC::Record object
|
71
58
|
# and returns a REXML::Document for the XML serialization.
|
@@ -138,7 +125,7 @@ module MARC
|
|
138
125
|
end
|
139
126
|
|
140
127
|
subfield_element.add_attribute("code", subfield.code)
|
141
|
-
text =
|
128
|
+
text = subfield.value
|
142
129
|
subfield_element.add_text(text)
|
143
130
|
datafield_elem.add_element(subfield_element)
|
144
131
|
end
|
@@ -153,7 +140,7 @@ module MARC
|
|
153
140
|
end
|
154
141
|
|
155
142
|
control_element.add_attribute("tag", field.tag)
|
156
|
-
text =
|
143
|
+
text = field.value
|
157
144
|
control_element.add_text(text)
|
158
145
|
e.add_element(control_element)
|
159
146
|
end
|
data/test/tc_reader.rb
CHANGED
@@ -3,36 +3,41 @@ require 'marc'
|
|
3
3
|
|
4
4
|
class ReaderTest < Test::Unit::TestCase
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
6
|
+
def test_batch
|
7
|
+
reader = MARC::Reader.new('test/batch.dat')
|
8
|
+
count = 0
|
9
|
+
reader.each { count += 1 }
|
10
|
+
assert_equal(count, 10)
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_loose
|
14
|
+
reader = MARC::ForgivingReader.new('test/batch.dat')
|
15
|
+
count = 0
|
16
|
+
reader.each { count += 1 }
|
17
|
+
assert_equal(10, count)
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_bad_marc
|
21
|
+
reader = MARC::Reader.new('test/tc_reader.rb')
|
22
|
+
assert_raises(MARC::Exception) {reader.entries[0]}
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_search
|
26
|
+
reader = MARC::Reader.new('test/batch.dat')
|
27
|
+
records = reader.find_all { |r| r =~ /Perl/ }
|
28
|
+
assert_equal(10, records.length)
|
29
|
+
|
30
|
+
reader = MARC::Reader.new('test/batch.dat')
|
31
|
+
records = reader.find_all { |r| r['245'] =~ /Perl/ }
|
32
|
+
assert_equal(10, records.length)
|
33
|
+
|
34
|
+
reader = MARC::Reader.new('test/batch.dat')
|
35
|
+
records = reader.find_all { |r| r['245']['a'] =~ /Perl/ }
|
36
|
+
assert_equal(10, records.length)
|
37
|
+
|
38
|
+
reader = MARC::Reader.new('test/batch.dat')
|
39
|
+
records = reader.find_all { |r| r =~ /Foo/ }
|
40
|
+
assert_equal(0, records.length)
|
41
|
+
end
|
37
42
|
|
38
43
|
end
|
data/test/tc_xml.rb
CHANGED
@@ -4,6 +4,17 @@ require 'stringio'
|
|
4
4
|
|
5
5
|
class XMLTest < Test::Unit::TestCase
|
6
6
|
|
7
|
+
def test_xml_entities
|
8
|
+
r1 = MARC::Record.new
|
9
|
+
r1 << MARC::DataField.new('245', '0', '0', ['a', 'foo & bar'])
|
10
|
+
xml = r1.to_xml.to_s
|
11
|
+
assert_match /foo & bar/, xml
|
12
|
+
|
13
|
+
reader = MARC::XMLReader.new(StringIO.new(xml))
|
14
|
+
r2 = reader.entries[0]
|
15
|
+
assert_equal 'foo & bar', r2['245']['a']
|
16
|
+
end
|
17
|
+
|
7
18
|
def test_batch
|
8
19
|
reader = MARC::XMLReader.new('test/batch.xml')
|
9
20
|
count = 0
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
|
|
3
3
|
specification_version: 1
|
4
4
|
name: marc
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2007-05-
|
6
|
+
version: 0.1.6
|
7
|
+
date: 2007-05-04 00:00:00 -04:00
|
8
8
|
summary: A ruby library for working with Machine Readable Cataloging
|
9
9
|
require_paths:
|
10
10
|
- lib
|