marc 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/marc.rb +3 -1
- data/lib/marc/constants.rb +14 -0
- data/lib/marc/control.rb +24 -24
- data/lib/marc/exception.rb +4 -4
- data/lib/marc/field.rb +119 -119
- data/lib/marc/reader.rb +163 -78
- data/lib/marc/record.rb +114 -117
- data/lib/marc/subfield.rb +19 -20
- data/lib/marc/writer.rb +72 -21
- data/lib/marc/xmlreader.rb +83 -0
- data/lib/marc/xmlwriter.rb +87 -0
- data/test/batch.xml +157 -0
- data/test/tc_xmlreader.rb +34 -0
- data/test/tc_xmlwriter.rb +37 -0
- data/test/ts_marc.rb +2 -0
- metadata +9 -4
- data/lib/marc/marc21.rb +0 -155
data/lib/marc.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'marc/constants'
|
1
2
|
require 'marc/record'
|
2
3
|
require 'marc/field'
|
3
4
|
require 'marc/control'
|
@@ -5,4 +6,5 @@ require 'marc/subfield'
|
|
5
6
|
require 'marc/reader'
|
6
7
|
require 'marc/writer'
|
7
8
|
require 'marc/exception'
|
8
|
-
require 'marc/
|
9
|
+
require 'marc/xmlwriter'
|
10
|
+
require 'marc/xmlreader'
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module MARC
|
2
|
+
|
3
|
+
# constants used in MARC21 reading/writing
|
4
|
+
LEADER_LENGTH = 24
|
5
|
+
DIRECTORY_ENTRY_LENGTH = 12
|
6
|
+
SUBFIELD_INDICATOR = 0x1F.chr
|
7
|
+
END_OF_FIELD = 0x1E.chr
|
8
|
+
END_OF_RECORD = 0x1D.chr
|
9
|
+
|
10
|
+
# constants used in XML reading/writing
|
11
|
+
MARC_NS = "http://www.loc.gov/MARC21/slim"
|
12
|
+
MARC_XSD = "http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
|
13
|
+
|
14
|
+
end
|
data/lib/marc/control.rb
CHANGED
@@ -1,36 +1,36 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
# A class for representing fields with a tag less than 010.
|
4
|
+
# Ordinary MARC::Field objects are for fields with tags >= 010
|
5
|
+
# which have indicators and subfields.
|
6
6
|
|
7
|
-
|
7
|
+
class Control
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
# the tag value (007, 008, etc)
|
10
|
+
attr_accessor :tag
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
# the value of the control field
|
13
|
+
attr_accessor :value
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
# The constructor which must be passed a tag value and
|
16
|
+
# an optional value for the field.
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
def to_s
|
27
|
-
return "#{tag} #{value}"
|
28
|
-
end
|
18
|
+
def initialize(tag,value='')
|
19
|
+
@tag = tag
|
20
|
+
@value = value
|
21
|
+
if tag.to_i > 9
|
22
|
+
raise MARC::Exception.new(), "tag must be greater than 009"
|
23
|
+
end
|
24
|
+
end
|
29
25
|
|
30
|
-
|
31
|
-
|
32
|
-
|
26
|
+
def to_s
|
27
|
+
return "#{tag} #{value}"
|
28
|
+
end
|
33
29
|
|
30
|
+
def =~(regex)
|
31
|
+
return self.to_s =~ regex
|
34
32
|
end
|
35
33
|
|
34
|
+
end
|
35
|
+
|
36
36
|
end
|
data/lib/marc/exception.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
# basic exception class for exceptions that
|
4
|
+
# can occur during MARC processing.
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
class Exception < RuntimeError
|
7
|
+
end
|
8
8
|
|
9
9
|
end
|
data/lib/marc/field.rb
CHANGED
@@ -3,141 +3,141 @@ require 'marc/record'
|
|
3
3
|
|
4
4
|
module MARC
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
end
|
73
|
-
end
|
6
|
+
# MARC records are made up of fields, each of which has a tag,
|
7
|
+
# indicators and subfields. If the tag is between 000-009 it is
|
8
|
+
# known as a control field, and actually does not have any
|
9
|
+
# indicators.
|
10
|
+
|
11
|
+
class Field
|
12
|
+
include Enumerable
|
13
|
+
|
14
|
+
# The tag for the field
|
15
|
+
attr_accessor :tag
|
16
|
+
|
17
|
+
# The first indicator
|
18
|
+
attr_accessor :indicator1
|
19
|
+
|
20
|
+
# The second indicator
|
21
|
+
attr_accessor :indicator2
|
22
|
+
|
23
|
+
# A list of MARC::Subfield objects
|
24
|
+
attr_accessor :subfields
|
25
|
+
|
26
|
+
|
27
|
+
# Create a new field with tag, indicators and subfields.
|
28
|
+
# Subfields are passed in as comma separated list of
|
29
|
+
# MARC::Subfield objects,
|
30
|
+
#
|
31
|
+
# field = MARC::Field.new('245','0','0',
|
32
|
+
# MARC::Subfield.new('a', 'Consilience :'),
|
33
|
+
# MARC::Subfield.new('b', 'the unity of knowledge ',
|
34
|
+
# MARC::Subfield.new('c', 'by Edward O. Wilson.'))
|
35
|
+
#
|
36
|
+
# or using a shorthand:
|
37
|
+
#
|
38
|
+
# field = MARC::Field.new('245','0','0',
|
39
|
+
# ['a', 'Consilience :'],['b','the unity of knowledge ',
|
40
|
+
# ['c', 'by Edward O. Wilson.'] )
|
41
|
+
|
42
|
+
def initialize(tag, i1=' ', i2=' ', *subfields)
|
43
|
+
@tag = tag
|
44
|
+
# can't allow nil to be passed in or else it'll
|
45
|
+
# screw us up later when we try to encode
|
46
|
+
@indicator1 = i1 == nil ? ' ' : i1
|
47
|
+
@indicator2 = i2 == nil ? ' ' : i2
|
48
|
+
@subfields = []
|
49
|
+
|
50
|
+
# must use MARC::ControlField for tags < 010
|
51
|
+
if @tag.to_i < 10
|
52
|
+
raise MARC::Exception.new(),
|
53
|
+
"MARC::Field objects can't have tags < 010"
|
54
|
+
end
|
55
|
+
|
56
|
+
# allows MARC::Subfield objects to be passed directly
|
57
|
+
# or a shorthand of ['a','Foo'], ['b','Bar']
|
58
|
+
subfields.each do |subfield|
|
59
|
+
case subfield
|
60
|
+
when MARC::Subfield
|
61
|
+
@subfields.push(subfield)
|
62
|
+
when Array
|
63
|
+
if subfield.length > 2
|
64
|
+
raise MARC::Exception.new(),
|
65
|
+
"arrays must only have 2 elements"
|
66
|
+
end
|
67
|
+
@subfields.push(
|
68
|
+
MARC::Subfield.new(subfield[0],subfield[1]))
|
69
|
+
else
|
70
|
+
raise MARC::Exception.new(),
|
71
|
+
"invalid subfield type #{subfield.class}"
|
74
72
|
end
|
73
|
+
end
|
74
|
+
end
|
75
75
|
|
76
76
|
|
77
|
-
|
78
|
-
|
77
|
+
# Returns a string representation of the field such as:
|
78
|
+
# 245 00 $aConsilience :$bthe unity of knowledge $cby Edward O. Wilson.
|
79
79
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
80
|
+
def to_s
|
81
|
+
str = "#{tag} "
|
82
|
+
str += "#{indicator1}#{indicator2} "
|
83
|
+
@subfields.each { |subfield| str += subfield.to_s }
|
84
|
+
return str
|
85
|
+
end
|
86
86
|
|
87
87
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
def append(subfield)
|
92
|
-
@subfields.push(subfield)
|
93
|
-
end
|
88
|
+
# Add a subfield (MARC::Subfield) to the field
|
89
|
+
# field.append(MARC::Subfield('a','Dave Thomas'))
|
94
90
|
|
91
|
+
def append(subfield)
|
92
|
+
@subfields.push(subfield)
|
93
|
+
end
|
95
94
|
|
96
|
-
# You can iterate through the subfields in a Field:
|
97
|
-
# field.each {|s| print s}
|
98
95
|
|
99
|
-
|
100
|
-
|
101
|
-
yield subfield
|
102
|
-
end
|
103
|
-
end
|
96
|
+
# You can iterate through the subfields in a Field:
|
97
|
+
# field.each {|s| print s}
|
104
98
|
|
99
|
+
def each
|
100
|
+
for subfield in subfields
|
101
|
+
yield subfield
|
102
|
+
end
|
103
|
+
end
|
105
104
|
|
106
|
-
# You can lookup subfields with this shorthand. Note it
|
107
|
-
# will return a string and not a MARC::Subfield object.
|
108
|
-
# subfield = field['a']
|
109
|
-
|
110
|
-
def [](code)
|
111
|
-
subfield = self.find {|s| s.code == code}
|
112
|
-
return subfield.value if subfield
|
113
|
-
return
|
114
|
-
end
|
115
105
|
|
106
|
+
# You can lookup subfields with this shorthand. Note it
|
107
|
+
# will return a string and not a MARC::Subfield object.
|
108
|
+
# subfield = field['a']
|
109
|
+
|
110
|
+
def [](code)
|
111
|
+
subfield = self.find {|s| s.code == code}
|
112
|
+
return subfield.value if subfield
|
113
|
+
return
|
114
|
+
end
|
116
115
|
|
117
|
-
# Two fields are equal if their tag, indicators and
|
118
|
-
# subfields are all equal.
|
119
|
-
|
120
|
-
def ==(other)
|
121
|
-
if @tag != other.tag
|
122
|
-
return false
|
123
|
-
elsif @indicator1 != other.indicator1
|
124
|
-
return false
|
125
|
-
elsif @indicator2 != other.indicator2
|
126
|
-
return false
|
127
|
-
elsif @subfields != other.subfields
|
128
|
-
return false
|
129
|
-
end
|
130
|
-
return true
|
131
|
-
end
|
132
116
|
|
117
|
+
# Two fields are equal if their tag, indicators and
|
118
|
+
# subfields are all equal.
|
119
|
+
|
120
|
+
def ==(other)
|
121
|
+
if @tag != other.tag
|
122
|
+
return false
|
123
|
+
elsif @indicator1 != other.indicator1
|
124
|
+
return false
|
125
|
+
elsif @indicator2 != other.indicator2
|
126
|
+
return false
|
127
|
+
elsif @subfields != other.subfields
|
128
|
+
return false
|
129
|
+
end
|
130
|
+
return true
|
131
|
+
end
|
133
132
|
|
134
|
-
# To support regex matching with fields
|
135
|
-
#
|
136
|
-
# if field =~ /Huckleberry/ ...
|
137
133
|
|
138
|
-
|
139
|
-
|
140
|
-
|
134
|
+
# To support regex matching with fields
|
135
|
+
#
|
136
|
+
# if field =~ /Huckleberry/ ...
|
141
137
|
|
138
|
+
def =~(regex)
|
139
|
+
return self.to_s =~ regex
|
142
140
|
end
|
141
|
+
|
142
|
+
end
|
143
143
|
end
|
data/lib/marc/reader.rb
CHANGED
@@ -1,92 +1,177 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
# to support iteration:
|
28
|
-
# for record in reader
|
29
|
-
# print record
|
30
|
-
# end
|
31
|
-
#
|
32
|
-
# and even searching:
|
33
|
-
# record.find { |f| f['245'] =~ /Huckleberry/ }
|
34
|
-
|
35
|
-
def each
|
36
|
-
# while there is data left in the file
|
37
|
-
while length = @handle.read(5)
|
38
|
-
|
39
|
-
# get the raw MARC21 for a record back from the file
|
40
|
-
# using the record length
|
41
|
-
raw = length + @handle.read(length.to_i-5)
|
42
|
-
|
43
|
-
# create a record from the data and return it
|
44
|
-
record = MARC::Record.new_from_marc(raw)
|
45
|
-
yield record
|
46
|
-
end
|
47
|
-
end
|
3
|
+
class Reader
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
# The constructor which you may pass either a path
|
7
|
+
#
|
8
|
+
# reader = MARC::Reader.new('marc.dat')
|
9
|
+
#
|
10
|
+
# or, if it's more convenient a File object:
|
11
|
+
#
|
12
|
+
# fh = File.new('marc.dat')
|
13
|
+
# reader = MARC::Reader.new(fh)
|
14
|
+
#
|
15
|
+
# or really any object that responds to read(n).
|
16
|
+
|
17
|
+
def initialize(file)
|
18
|
+
if file.class == String:
|
19
|
+
@handle = File.new(file)
|
20
|
+
elsif file.respond_to?("read", 5)
|
21
|
+
@handle = file
|
22
|
+
else
|
23
|
+
throw "must pass in path or file"
|
24
|
+
end
|
25
|
+
end
|
48
26
|
|
27
|
+
# to support iteration:
|
28
|
+
# for record in reader
|
29
|
+
# print record
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# and even searching:
|
33
|
+
# record.find { |f| f['245'] =~ /Huckleberry/ }
|
34
|
+
|
35
|
+
def each
|
36
|
+
# while there is data left in the file
|
37
|
+
while length = @handle.read(5)
|
38
|
+
|
39
|
+
# get the raw MARC21 for a record back from the file
|
40
|
+
# using the record length
|
41
|
+
raw = length + @handle.read(length.to_i-5)
|
42
|
+
|
43
|
+
# create a record from the data and return it
|
44
|
+
#record = MARC::Record.new_from_marc(raw)
|
45
|
+
record = MARC::Reader.decode(raw)
|
46
|
+
yield record
|
47
|
+
end
|
49
48
|
end
|
50
49
|
|
51
50
|
|
52
|
-
#
|
53
|
-
#
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
# 100% guranteed which is why the normal behavior of Reader is encouraged.
|
62
|
-
|
63
|
-
class ForgivingReader
|
64
|
-
include Enumerable
|
65
|
-
|
66
|
-
def initialize(file)
|
67
|
-
if file.class == String
|
68
|
-
@handle = File.new(file)
|
69
|
-
elsif file.class == File
|
70
|
-
@handle = file
|
71
|
-
else
|
72
|
-
throw "must pass in path or File object"
|
73
|
-
end
|
74
|
-
end
|
51
|
+
# A static method for turning raw MARC data in transission
|
52
|
+
# format into a MARC::Record object.
|
53
|
+
|
54
|
+
def self.decode(marc, params={})
|
55
|
+
record = Record.new()
|
56
|
+
record.leader = marc[0..LEADER_LENGTH-1]
|
57
|
+
|
58
|
+
# where the field data starts
|
59
|
+
base_address = record.leader[12..16].to_i
|
75
60
|
|
61
|
+
# get the byte offsets from the record directory
|
62
|
+
directory = marc[LEADER_LENGTH..base_address-1]
|
76
63
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
64
|
+
throw "invalid directory in record" if directory == nil
|
65
|
+
|
66
|
+
# the number of fields in the record corresponds to
|
67
|
+
# how many directory entries there are
|
68
|
+
num_fields = directory.length / DIRECTORY_ENTRY_LENGTH
|
69
|
+
|
70
|
+
# when operating in forgiving mode we just split on end of
|
71
|
+
# field instead of using calculated byte offsets from the
|
72
|
+
# directory
|
73
|
+
all_fields = marc[base_address..-1].split(END_OF_FIELD)
|
74
|
+
|
75
|
+
0.upto(num_fields-1) do |field_num|
|
76
|
+
|
77
|
+
# pull the directory entry for a field out
|
78
|
+
entry_start = field_num * DIRECTORY_ENTRY_LENGTH
|
79
|
+
entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
|
80
|
+
entry = directory[entry_start..entry_end]
|
81
|
+
|
82
|
+
# extract the tag
|
83
|
+
tag = entry[0..2]
|
84
|
+
|
85
|
+
# get the actual field data
|
86
|
+
# if we were told to be forgiving we just use the
|
87
|
+
# next available chuck of field data that we
|
88
|
+
# split apart based on the END_OF_FIELD
|
89
|
+
field_data = ''
|
90
|
+
if params[:forgiving]
|
91
|
+
field_data = all_fields.shift()
|
92
|
+
|
93
|
+
# otherwise we actually use the byte offsets in
|
94
|
+
# directory to figure out what field data to extract
|
95
|
+
else
|
96
|
+
length = entry[3..6].to_i
|
97
|
+
offset = entry[7..11].to_i
|
98
|
+
field_start = base_address + offset
|
99
|
+
field_end = field_start + length - 1
|
100
|
+
field_data = marc[field_start..field_end]
|
87
101
|
end
|
88
102
|
|
103
|
+
# remove end of field
|
104
|
+
field_data.delete!(END_OF_FIELD)
|
105
|
+
|
106
|
+
# add a control field or variable field
|
107
|
+
if tag < '010'
|
108
|
+
record.append(MARC::Control.new(tag,field_data))
|
109
|
+
else
|
110
|
+
field = MARC::Field.new(tag)
|
111
|
+
|
112
|
+
# get all subfields
|
113
|
+
subfields = field_data.split(SUBFIELD_INDICATOR)
|
114
|
+
|
115
|
+
# must have at least 2 elements (indicators, and 1 subfield)
|
116
|
+
# TODO some sort of logging?
|
117
|
+
next if subfields.length() < 2
|
118
|
+
|
119
|
+
# get indicators
|
120
|
+
indicators = subfields.shift()
|
121
|
+
field.indicator1 = indicators[0,1]
|
122
|
+
field.indicator2 = indicators[1,1]
|
123
|
+
|
124
|
+
# add each subfield to the field
|
125
|
+
subfields.each() do |data|
|
126
|
+
subfield = MARC::Subfield.new(data[0,1],data[1..-1])
|
127
|
+
field.append(subfield)
|
128
|
+
end
|
129
|
+
|
130
|
+
# add the field to the record
|
131
|
+
record.append(field)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
return record
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
# Like Reader ForgivingReader lets you read in a batch of MARC21 records
|
141
|
+
# but it does not use record lengths and field byte offets found in the
|
142
|
+
# leader and directory. It is not unusual to run across MARC records
|
143
|
+
# which have had their offsets calcualted wrong. In situations like this
|
144
|
+
# the vanilla Reader may fail, and you can try to use ForgivingReader.
|
145
|
+
|
146
|
+
# The one downside to this is that ForgivingReader will assume that the
|
147
|
+
# order of the fields in the directory is the same as the order of fields
|
148
|
+
# in the field data. Hopefully this will be the case, but it is not
|
149
|
+
# 100% guranteed which is why the normal behavior of Reader is encouraged.
|
150
|
+
|
151
|
+
class ForgivingReader
|
152
|
+
include Enumerable
|
153
|
+
|
154
|
+
def initialize(file)
|
155
|
+
if file.class == String
|
156
|
+
@handle = File.new(file)
|
157
|
+
elsif file.class == File
|
158
|
+
@handle = file
|
159
|
+
else
|
160
|
+
throw "must pass in path or File object"
|
161
|
+
end
|
89
162
|
end
|
90
163
|
|
91
164
|
|
165
|
+
def each
|
166
|
+
@handle.each_line(END_OF_RECORD) do |raw|
|
167
|
+
begin
|
168
|
+
record = MARC::Reader.decode(raw, :forgiving => true)
|
169
|
+
yield record
|
170
|
+
rescue StandardError => e
|
171
|
+
# caught exception just keep barrelling along
|
172
|
+
# TODO add logging
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
92
177
|
end
|