marc 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/marc.rb +3 -1
- data/lib/marc/constants.rb +14 -0
- data/lib/marc/control.rb +24 -24
- data/lib/marc/exception.rb +4 -4
- data/lib/marc/field.rb +119 -119
- data/lib/marc/reader.rb +163 -78
- data/lib/marc/record.rb +114 -117
- data/lib/marc/subfield.rb +19 -20
- data/lib/marc/writer.rb +72 -21
- data/lib/marc/xmlreader.rb +83 -0
- data/lib/marc/xmlwriter.rb +87 -0
- data/test/batch.xml +157 -0
- data/test/tc_xmlreader.rb +34 -0
- data/test/tc_xmlwriter.rb +37 -0
- data/test/ts_marc.rb +2 -0
- metadata +9 -4
- data/lib/marc/marc21.rb +0 -155
data/lib/marc.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'marc/constants'
|
1
2
|
require 'marc/record'
|
2
3
|
require 'marc/field'
|
3
4
|
require 'marc/control'
|
@@ -5,4 +6,5 @@ require 'marc/subfield'
|
|
5
6
|
require 'marc/reader'
|
6
7
|
require 'marc/writer'
|
7
8
|
require 'marc/exception'
|
8
|
-
require 'marc/
|
9
|
+
require 'marc/xmlwriter'
|
10
|
+
require 'marc/xmlreader'
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module MARC
|
2
|
+
|
3
|
+
# constants used in MARC21 reading/writing
|
4
|
+
LEADER_LENGTH = 24
|
5
|
+
DIRECTORY_ENTRY_LENGTH = 12
|
6
|
+
SUBFIELD_INDICATOR = 0x1F.chr
|
7
|
+
END_OF_FIELD = 0x1E.chr
|
8
|
+
END_OF_RECORD = 0x1D.chr
|
9
|
+
|
10
|
+
# constants used in XML reading/writing
|
11
|
+
MARC_NS = "http://www.loc.gov/MARC21/slim"
|
12
|
+
MARC_XSD = "http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
|
13
|
+
|
14
|
+
end
|
data/lib/marc/control.rb
CHANGED
@@ -1,36 +1,36 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
# A class for representing fields with a tag less than 010.
|
4
|
+
# Ordinary MARC::Field objects are for fields with tags >= 010
|
5
|
+
# which have indicators and subfields.
|
6
6
|
|
7
|
-
|
7
|
+
class Control
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
# the tag value (007, 008, etc)
|
10
|
+
attr_accessor :tag
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
# the value of the control field
|
13
|
+
attr_accessor :value
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
# The constructor which must be passed a tag value and
|
16
|
+
# an optional value for the field.
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
def to_s
|
27
|
-
return "#{tag} #{value}"
|
28
|
-
end
|
18
|
+
def initialize(tag,value='')
|
19
|
+
@tag = tag
|
20
|
+
@value = value
|
21
|
+
if tag.to_i > 9
|
22
|
+
raise MARC::Exception.new(), "tag must be greater than 009"
|
23
|
+
end
|
24
|
+
end
|
29
25
|
|
30
|
-
|
31
|
-
|
32
|
-
|
26
|
+
def to_s
|
27
|
+
return "#{tag} #{value}"
|
28
|
+
end
|
33
29
|
|
30
|
+
def =~(regex)
|
31
|
+
return self.to_s =~ regex
|
34
32
|
end
|
35
33
|
|
34
|
+
end
|
35
|
+
|
36
36
|
end
|
data/lib/marc/exception.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
# basic exception class for exceptions that
|
4
|
+
# can occur during MARC processing.
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
class Exception < RuntimeError
|
7
|
+
end
|
8
8
|
|
9
9
|
end
|
data/lib/marc/field.rb
CHANGED
@@ -3,141 +3,141 @@ require 'marc/record'
|
|
3
3
|
|
4
4
|
module MARC
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
end
|
73
|
-
end
|
6
|
+
# MARC records are made up of fields, each of which has a tag,
|
7
|
+
# indicators and subfields. If the tag is between 000-009 it is
|
8
|
+
# known as a control field, and actually does not have any
|
9
|
+
# indicators.
|
10
|
+
|
11
|
+
class Field
|
12
|
+
include Enumerable
|
13
|
+
|
14
|
+
# The tag for the field
|
15
|
+
attr_accessor :tag
|
16
|
+
|
17
|
+
# The first indicator
|
18
|
+
attr_accessor :indicator1
|
19
|
+
|
20
|
+
# The second indicator
|
21
|
+
attr_accessor :indicator2
|
22
|
+
|
23
|
+
# A list of MARC::Subfield objects
|
24
|
+
attr_accessor :subfields
|
25
|
+
|
26
|
+
|
27
|
+
# Create a new field with tag, indicators and subfields.
|
28
|
+
# Subfields are passed in as comma separated list of
|
29
|
+
# MARC::Subfield objects,
|
30
|
+
#
|
31
|
+
# field = MARC::Field.new('245','0','0',
|
32
|
+
# MARC::Subfield.new('a', 'Consilience :'),
|
33
|
+
# MARC::Subfield.new('b', 'the unity of knowledge ',
|
34
|
+
# MARC::Subfield.new('c', 'by Edward O. Wilson.'))
|
35
|
+
#
|
36
|
+
# or using a shorthand:
|
37
|
+
#
|
38
|
+
# field = MARC::Field.new('245','0','0',
|
39
|
+
# ['a', 'Consilience :'],['b','the unity of knowledge ',
|
40
|
+
# ['c', 'by Edward O. Wilson.'] )
|
41
|
+
|
42
|
+
def initialize(tag, i1=' ', i2=' ', *subfields)
|
43
|
+
@tag = tag
|
44
|
+
# can't allow nil to be passed in or else it'll
|
45
|
+
# screw us up later when we try to encode
|
46
|
+
@indicator1 = i1 == nil ? ' ' : i1
|
47
|
+
@indicator2 = i2 == nil ? ' ' : i2
|
48
|
+
@subfields = []
|
49
|
+
|
50
|
+
# must use MARC::ControlField for tags < 010
|
51
|
+
if @tag.to_i < 10
|
52
|
+
raise MARC::Exception.new(),
|
53
|
+
"MARC::Field objects can't have tags < 010"
|
54
|
+
end
|
55
|
+
|
56
|
+
# allows MARC::Subfield objects to be passed directly
|
57
|
+
# or a shorthand of ['a','Foo'], ['b','Bar']
|
58
|
+
subfields.each do |subfield|
|
59
|
+
case subfield
|
60
|
+
when MARC::Subfield
|
61
|
+
@subfields.push(subfield)
|
62
|
+
when Array
|
63
|
+
if subfield.length > 2
|
64
|
+
raise MARC::Exception.new(),
|
65
|
+
"arrays must only have 2 elements"
|
66
|
+
end
|
67
|
+
@subfields.push(
|
68
|
+
MARC::Subfield.new(subfield[0],subfield[1]))
|
69
|
+
else
|
70
|
+
raise MARC::Exception.new(),
|
71
|
+
"invalid subfield type #{subfield.class}"
|
74
72
|
end
|
73
|
+
end
|
74
|
+
end
|
75
75
|
|
76
76
|
|
77
|
-
|
78
|
-
|
77
|
+
# Returns a string representation of the field such as:
|
78
|
+
# 245 00 $aConsilience :$bthe unity of knowledge $cby Edward O. Wilson.
|
79
79
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
80
|
+
def to_s
|
81
|
+
str = "#{tag} "
|
82
|
+
str += "#{indicator1}#{indicator2} "
|
83
|
+
@subfields.each { |subfield| str += subfield.to_s }
|
84
|
+
return str
|
85
|
+
end
|
86
86
|
|
87
87
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
def append(subfield)
|
92
|
-
@subfields.push(subfield)
|
93
|
-
end
|
88
|
+
# Add a subfield (MARC::Subfield) to the field
|
89
|
+
# field.append(MARC::Subfield('a','Dave Thomas'))
|
94
90
|
|
91
|
+
def append(subfield)
|
92
|
+
@subfields.push(subfield)
|
93
|
+
end
|
95
94
|
|
96
|
-
# You can iterate through the subfields in a Field:
|
97
|
-
# field.each {|s| print s}
|
98
95
|
|
99
|
-
|
100
|
-
|
101
|
-
yield subfield
|
102
|
-
end
|
103
|
-
end
|
96
|
+
# You can iterate through the subfields in a Field:
|
97
|
+
# field.each {|s| print s}
|
104
98
|
|
99
|
+
def each
|
100
|
+
for subfield in subfields
|
101
|
+
yield subfield
|
102
|
+
end
|
103
|
+
end
|
105
104
|
|
106
|
-
# You can lookup subfields with this shorthand. Note it
|
107
|
-
# will return a string and not a MARC::Subfield object.
|
108
|
-
# subfield = field['a']
|
109
|
-
|
110
|
-
def [](code)
|
111
|
-
subfield = self.find {|s| s.code == code}
|
112
|
-
return subfield.value if subfield
|
113
|
-
return
|
114
|
-
end
|
115
105
|
|
106
|
+
# You can lookup subfields with this shorthand. Note it
|
107
|
+
# will return a string and not a MARC::Subfield object.
|
108
|
+
# subfield = field['a']
|
109
|
+
|
110
|
+
def [](code)
|
111
|
+
subfield = self.find {|s| s.code == code}
|
112
|
+
return subfield.value if subfield
|
113
|
+
return
|
114
|
+
end
|
116
115
|
|
117
|
-
# Two fields are equal if their tag, indicators and
|
118
|
-
# subfields are all equal.
|
119
|
-
|
120
|
-
def ==(other)
|
121
|
-
if @tag != other.tag
|
122
|
-
return false
|
123
|
-
elsif @indicator1 != other.indicator1
|
124
|
-
return false
|
125
|
-
elsif @indicator2 != other.indicator2
|
126
|
-
return false
|
127
|
-
elsif @subfields != other.subfields
|
128
|
-
return false
|
129
|
-
end
|
130
|
-
return true
|
131
|
-
end
|
132
116
|
|
117
|
+
# Two fields are equal if their tag, indicators and
|
118
|
+
# subfields are all equal.
|
119
|
+
|
120
|
+
def ==(other)
|
121
|
+
if @tag != other.tag
|
122
|
+
return false
|
123
|
+
elsif @indicator1 != other.indicator1
|
124
|
+
return false
|
125
|
+
elsif @indicator2 != other.indicator2
|
126
|
+
return false
|
127
|
+
elsif @subfields != other.subfields
|
128
|
+
return false
|
129
|
+
end
|
130
|
+
return true
|
131
|
+
end
|
133
132
|
|
134
|
-
# To support regex matching with fields
|
135
|
-
#
|
136
|
-
# if field =~ /Huckleberry/ ...
|
137
133
|
|
138
|
-
|
139
|
-
|
140
|
-
|
134
|
+
# To support regex matching with fields
|
135
|
+
#
|
136
|
+
# if field =~ /Huckleberry/ ...
|
141
137
|
|
138
|
+
def =~(regex)
|
139
|
+
return self.to_s =~ regex
|
142
140
|
end
|
141
|
+
|
142
|
+
end
|
143
143
|
end
|
data/lib/marc/reader.rb
CHANGED
@@ -1,92 +1,177 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
# to support iteration:
|
28
|
-
# for record in reader
|
29
|
-
# print record
|
30
|
-
# end
|
31
|
-
#
|
32
|
-
# and even searching:
|
33
|
-
# record.find { |f| f['245'] =~ /Huckleberry/ }
|
34
|
-
|
35
|
-
def each
|
36
|
-
# while there is data left in the file
|
37
|
-
while length = @handle.read(5)
|
38
|
-
|
39
|
-
# get the raw MARC21 for a record back from the file
|
40
|
-
# using the record length
|
41
|
-
raw = length + @handle.read(length.to_i-5)
|
42
|
-
|
43
|
-
# create a record from the data and return it
|
44
|
-
record = MARC::Record.new_from_marc(raw)
|
45
|
-
yield record
|
46
|
-
end
|
47
|
-
end
|
3
|
+
class Reader
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
# The constructor which you may pass either a path
|
7
|
+
#
|
8
|
+
# reader = MARC::Reader.new('marc.dat')
|
9
|
+
#
|
10
|
+
# or, if it's more convenient a File object:
|
11
|
+
#
|
12
|
+
# fh = File.new('marc.dat')
|
13
|
+
# reader = MARC::Reader.new(fh)
|
14
|
+
#
|
15
|
+
# or really any object that responds to read(n).
|
16
|
+
|
17
|
+
def initialize(file)
|
18
|
+
if file.class == String:
|
19
|
+
@handle = File.new(file)
|
20
|
+
elsif file.respond_to?("read", 5)
|
21
|
+
@handle = file
|
22
|
+
else
|
23
|
+
throw "must pass in path or file"
|
24
|
+
end
|
25
|
+
end
|
48
26
|
|
27
|
+
# to support iteration:
|
28
|
+
# for record in reader
|
29
|
+
# print record
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# and even searching:
|
33
|
+
# record.find { |f| f['245'] =~ /Huckleberry/ }
|
34
|
+
|
35
|
+
def each
|
36
|
+
# while there is data left in the file
|
37
|
+
while length = @handle.read(5)
|
38
|
+
|
39
|
+
# get the raw MARC21 for a record back from the file
|
40
|
+
# using the record length
|
41
|
+
raw = length + @handle.read(length.to_i-5)
|
42
|
+
|
43
|
+
# create a record from the data and return it
|
44
|
+
#record = MARC::Record.new_from_marc(raw)
|
45
|
+
record = MARC::Reader.decode(raw)
|
46
|
+
yield record
|
47
|
+
end
|
49
48
|
end
|
50
49
|
|
51
50
|
|
52
|
-
#
|
53
|
-
#
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
# 100% guranteed which is why the normal behavior of Reader is encouraged.
|
62
|
-
|
63
|
-
class ForgivingReader
|
64
|
-
include Enumerable
|
65
|
-
|
66
|
-
def initialize(file)
|
67
|
-
if file.class == String
|
68
|
-
@handle = File.new(file)
|
69
|
-
elsif file.class == File
|
70
|
-
@handle = file
|
71
|
-
else
|
72
|
-
throw "must pass in path or File object"
|
73
|
-
end
|
74
|
-
end
|
51
|
+
# A static method for turning raw MARC data in transission
|
52
|
+
# format into a MARC::Record object.
|
53
|
+
|
54
|
+
def self.decode(marc, params={})
|
55
|
+
record = Record.new()
|
56
|
+
record.leader = marc[0..LEADER_LENGTH-1]
|
57
|
+
|
58
|
+
# where the field data starts
|
59
|
+
base_address = record.leader[12..16].to_i
|
75
60
|
|
61
|
+
# get the byte offsets from the record directory
|
62
|
+
directory = marc[LEADER_LENGTH..base_address-1]
|
76
63
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
64
|
+
throw "invalid directory in record" if directory == nil
|
65
|
+
|
66
|
+
# the number of fields in the record corresponds to
|
67
|
+
# how many directory entries there are
|
68
|
+
num_fields = directory.length / DIRECTORY_ENTRY_LENGTH
|
69
|
+
|
70
|
+
# when operating in forgiving mode we just split on end of
|
71
|
+
# field instead of using calculated byte offsets from the
|
72
|
+
# directory
|
73
|
+
all_fields = marc[base_address..-1].split(END_OF_FIELD)
|
74
|
+
|
75
|
+
0.upto(num_fields-1) do |field_num|
|
76
|
+
|
77
|
+
# pull the directory entry for a field out
|
78
|
+
entry_start = field_num * DIRECTORY_ENTRY_LENGTH
|
79
|
+
entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
|
80
|
+
entry = directory[entry_start..entry_end]
|
81
|
+
|
82
|
+
# extract the tag
|
83
|
+
tag = entry[0..2]
|
84
|
+
|
85
|
+
# get the actual field data
|
86
|
+
# if we were told to be forgiving we just use the
|
87
|
+
# next available chuck of field data that we
|
88
|
+
# split apart based on the END_OF_FIELD
|
89
|
+
field_data = ''
|
90
|
+
if params[:forgiving]
|
91
|
+
field_data = all_fields.shift()
|
92
|
+
|
93
|
+
# otherwise we actually use the byte offsets in
|
94
|
+
# directory to figure out what field data to extract
|
95
|
+
else
|
96
|
+
length = entry[3..6].to_i
|
97
|
+
offset = entry[7..11].to_i
|
98
|
+
field_start = base_address + offset
|
99
|
+
field_end = field_start + length - 1
|
100
|
+
field_data = marc[field_start..field_end]
|
87
101
|
end
|
88
102
|
|
103
|
+
# remove end of field
|
104
|
+
field_data.delete!(END_OF_FIELD)
|
105
|
+
|
106
|
+
# add a control field or variable field
|
107
|
+
if tag < '010'
|
108
|
+
record.append(MARC::Control.new(tag,field_data))
|
109
|
+
else
|
110
|
+
field = MARC::Field.new(tag)
|
111
|
+
|
112
|
+
# get all subfields
|
113
|
+
subfields = field_data.split(SUBFIELD_INDICATOR)
|
114
|
+
|
115
|
+
# must have at least 2 elements (indicators, and 1 subfield)
|
116
|
+
# TODO some sort of logging?
|
117
|
+
next if subfields.length() < 2
|
118
|
+
|
119
|
+
# get indicators
|
120
|
+
indicators = subfields.shift()
|
121
|
+
field.indicator1 = indicators[0,1]
|
122
|
+
field.indicator2 = indicators[1,1]
|
123
|
+
|
124
|
+
# add each subfield to the field
|
125
|
+
subfields.each() do |data|
|
126
|
+
subfield = MARC::Subfield.new(data[0,1],data[1..-1])
|
127
|
+
field.append(subfield)
|
128
|
+
end
|
129
|
+
|
130
|
+
# add the field to the record
|
131
|
+
record.append(field)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
return record
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
# Like Reader ForgivingReader lets you read in a batch of MARC21 records
|
141
|
+
# but it does not use record lengths and field byte offets found in the
|
142
|
+
# leader and directory. It is not unusual to run across MARC records
|
143
|
+
# which have had their offsets calcualted wrong. In situations like this
|
144
|
+
# the vanilla Reader may fail, and you can try to use ForgivingReader.
|
145
|
+
|
146
|
+
# The one downside to this is that ForgivingReader will assume that the
|
147
|
+
# order of the fields in the directory is the same as the order of fields
|
148
|
+
# in the field data. Hopefully this will be the case, but it is not
|
149
|
+
# 100% guranteed which is why the normal behavior of Reader is encouraged.
|
150
|
+
|
151
|
+
class ForgivingReader
|
152
|
+
include Enumerable
|
153
|
+
|
154
|
+
def initialize(file)
|
155
|
+
if file.class == String
|
156
|
+
@handle = File.new(file)
|
157
|
+
elsif file.class == File
|
158
|
+
@handle = file
|
159
|
+
else
|
160
|
+
throw "must pass in path or File object"
|
161
|
+
end
|
89
162
|
end
|
90
163
|
|
91
164
|
|
165
|
+
def each
|
166
|
+
@handle.each_line(END_OF_RECORD) do |raw|
|
167
|
+
begin
|
168
|
+
record = MARC::Reader.decode(raw, :forgiving => true)
|
169
|
+
yield record
|
170
|
+
rescue StandardError => e
|
171
|
+
# caught exception just keep barrelling along
|
172
|
+
# TODO add logging
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
92
177
|
end
|