marc 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/marc.rb +3 -1
- data/lib/marc/constants.rb +14 -0
- data/lib/marc/control.rb +24 -24
- data/lib/marc/exception.rb +4 -4
- data/lib/marc/field.rb +119 -119
- data/lib/marc/reader.rb +163 -78
- data/lib/marc/record.rb +114 -117
- data/lib/marc/subfield.rb +19 -20
- data/lib/marc/writer.rb +72 -21
- data/lib/marc/xmlreader.rb +83 -0
- data/lib/marc/xmlwriter.rb +87 -0
- data/test/batch.xml +157 -0
- data/test/tc_xmlreader.rb +34 -0
- data/test/tc_xmlwriter.rb +37 -0
- data/test/ts_marc.rb +2 -0
- metadata +9 -4
- data/lib/marc/marc21.rb +0 -155
data/lib/marc/record.rb
CHANGED
@@ -1,123 +1,120 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
def append(field)
|
29
|
-
@fields.push(field)
|
30
|
-
end
|
31
|
-
|
32
|
-
# each() is here to support iterating and searching since MARC::Record
|
33
|
-
# mixes in Enumberable
|
34
|
-
#
|
35
|
-
# iterating through the fields in a record:
|
36
|
-
# record.each { |f| print f }
|
37
|
-
#
|
38
|
-
# getting the 245
|
39
|
-
# title = record.find {|f| f.tag == '245'}
|
40
|
-
#
|
41
|
-
# getting all subjects
|
42
|
-
# subjects = record.find_all {|f| ('600'..'699' === f.tag)}
|
43
|
-
|
44
|
-
def each
|
45
|
-
for field in @fields
|
46
|
-
yield field
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
# You can lookup fields using this shorthand:
|
51
|
-
# title = record['245']
|
52
|
-
|
53
|
-
def [](tag)
|
54
|
-
return self.find {|f| f.tag == tag}
|
55
|
-
end
|
56
|
-
|
57
|
-
# Factory method for creating a MARC::Record from MARC21 in
|
58
|
-
# transmission format. Really this is just a wrapper around
|
59
|
-
# MARC::MARC21::decode
|
60
|
-
#
|
61
|
-
# record = MARC::Record.new_from_marc(marc21)
|
62
|
-
#
|
63
|
-
# in cases where you might be working with somewhat flawed
|
64
|
-
# MARC data you may want to use the :forgiving parameter which
|
65
|
-
# will bypass using field byte offsets and simply look for the
|
66
|
-
# end of field byte to figure out the end of fields.
|
67
|
-
#
|
68
|
-
# record = MARC::Record.new_from_marc(marc21, :forgiving => true)
|
69
|
-
|
70
|
-
def self.new_from_marc(raw, params={})
|
71
|
-
return MARC::MARC21.new().decode(raw, params)
|
72
|
-
end
|
73
|
-
|
74
|
-
|
75
|
-
# Handy method for returning a the MARC21 serialization for a
|
76
|
-
# MARC::Record object. Really this is just a wrapper around
|
77
|
-
# MARC::MARC21::encode
|
78
|
-
#
|
79
|
-
# marc = record.to_marc()
|
80
|
-
|
81
|
-
def to_marc
|
82
|
-
return MARC::MARC21.new().encode(self)
|
83
|
-
end
|
84
|
-
|
85
|
-
|
86
|
-
# Returns a string version of the record, suitable for printing
|
87
|
-
|
88
|
-
def to_s
|
89
|
-
str = "LEADER #{leader}\n"
|
90
|
-
for field in fields:
|
91
|
-
str += field.to_s() + "\n"
|
92
|
-
end
|
93
|
-
return str
|
94
|
-
end
|
95
|
-
|
96
|
-
|
97
|
-
# For testing if two records can be considered equal.
|
98
|
-
|
99
|
-
def ==(other)
|
100
|
-
if @leader != other.leader:
|
101
|
-
return false
|
102
|
-
elsif @fields.length != other.fields.length()
|
103
|
-
return false
|
104
|
-
else
|
105
|
-
for i in [0..@fields.length()]:
|
106
|
-
return false if @fields[i] != other.fields[i]
|
107
|
-
end
|
108
|
-
end
|
109
|
-
return true
|
110
|
-
end
|
111
|
-
|
112
|
-
|
113
|
-
# Handy for using a record in a regex:
|
114
|
-
# if record =~ /Gravity's Rainbow/ then print "Slothrop" end
|
115
|
-
|
116
|
-
def =~(regex)
|
117
|
-
return self.to_s =~ regex
|
118
|
-
end
|
3
|
+
# A class that represents an individual MARC record. Every record
|
4
|
+
# is made up of a collection of MARC::Field objects.
|
5
|
+
|
6
|
+
class Record
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# the record fields
|
10
|
+
attr_accessor :fields,
|
11
|
+
|
12
|
+
# the record leader
|
13
|
+
:leader
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@fields = []
|
17
|
+
# leader is 24 bytes
|
18
|
+
@leader = ' ' * 24
|
19
|
+
# leader defaults:
|
20
|
+
# http://www.loc.gov/marc/bibliographic/ecbdldrd.html
|
21
|
+
@leader[10..11] = '22'
|
22
|
+
@leader[20..23] = '4500'
|
23
|
+
end
|
24
|
+
|
25
|
+
# add a field to the record
|
26
|
+
# record.append(MARC::Field.new( '100', '2', '0', ['a', 'Fred']))
|
119
27
|
|
28
|
+
def append(field)
|
29
|
+
@fields.push(field)
|
120
30
|
end
|
121
31
|
|
122
|
-
|
32
|
+
# each() is here to support iterating and searching since MARC::Record
|
33
|
+
# mixes in Enumberable
|
34
|
+
#
|
35
|
+
# iterating through the fields in a record:
|
36
|
+
# record.each { |f| print f }
|
37
|
+
#
|
38
|
+
# getting the 245
|
39
|
+
# title = record.find {|f| f.tag == '245'}
|
40
|
+
#
|
41
|
+
# getting all subjects
|
42
|
+
# subjects = record.find_all {|f| ('600'..'699' === f.tag)}
|
43
|
+
|
44
|
+
def each
|
45
|
+
for field in @fields
|
46
|
+
yield field
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# You can lookup fields using this shorthand:
|
51
|
+
# title = record['245']
|
52
|
+
|
53
|
+
def [](tag)
|
54
|
+
return self.find {|f| f.tag == tag}
|
55
|
+
end
|
56
|
+
|
57
|
+
# Factory method for creating a MARC::Record from MARC21 in
|
58
|
+
# transmission format.
|
59
|
+
#
|
60
|
+
# record = MARC::Record.new_from_marc(marc21)
|
61
|
+
#
|
62
|
+
# in cases where you might be working with somewhat flawed
|
63
|
+
# MARC data you may want to use the :forgiving parameter which
|
64
|
+
# will bypass using field byte offsets and simply look for the
|
65
|
+
# end of field byte to figure out the end of fields.
|
66
|
+
#
|
67
|
+
# record = MARC::Record.new_from_marc(marc21, :forgiving => true)
|
68
|
+
|
69
|
+
def self.new_from_marc(raw, params={})
|
70
|
+
return MARC::Reader.decode(raw, params)
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
# Handy method for returning a the MARC21 serialization for a
|
75
|
+
# MARC::Record object. Really this is just a wrapper around
|
76
|
+
# MARC::MARC21::encode
|
77
|
+
#
|
78
|
+
# marc = record.to_marc()
|
79
|
+
|
80
|
+
def to_marc
|
81
|
+
return MARC::Writer.encode(self)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Handy method for returning the MARCXML serialization for a
|
85
|
+
# MARC::Record object. Really this is just a wrapper around
|
86
|
+
# MARC::MARCXML::encode
|
87
|
+
#
|
88
|
+
# xml = record.to_xml()
|
123
89
|
|
90
|
+
def to_xml
|
91
|
+
return MARC::MARCXML.new().encode(self)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Returns a string version of the record, suitable for printing
|
95
|
+
|
96
|
+
def to_s
|
97
|
+
str = "LEADER #{leader}\n"
|
98
|
+
for field in fields:
|
99
|
+
str += field.to_s() + "\n"
|
100
|
+
end
|
101
|
+
return str
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
# For testing if two records can be considered equal.
|
106
|
+
|
107
|
+
def ==(other)
|
108
|
+
return self.to_s == other.to_s
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
# Handy for using a record in a regex:
|
113
|
+
# if record =~ /Gravity's Rainbow/ then print "Slothrop" end
|
114
|
+
|
115
|
+
def =~(regex)
|
116
|
+
return self.to_s =~ regex
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
end
|
data/lib/marc/subfield.rb
CHANGED
@@ -1,27 +1,26 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
class Subfield
|
4
|
+
attr_accessor :code, :value
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
def ==(other)
|
14
|
-
if @code != other.code
|
15
|
-
return false
|
16
|
-
elsif @value != other.value
|
17
|
-
return false
|
18
|
-
end
|
19
|
-
return true
|
20
|
-
end
|
6
|
+
def initialize(code='' ,value='')
|
7
|
+
# can't allow code of value to be nil
|
8
|
+
# or else it'll screw us up later on
|
9
|
+
@code = code == nil ? '' : code
|
10
|
+
@value = value == nil ? '' : value
|
11
|
+
end
|
21
12
|
|
22
|
-
|
23
|
-
|
24
|
-
|
13
|
+
def ==(other)
|
14
|
+
if @code != other.code
|
15
|
+
return false
|
16
|
+
elsif @value != other.value
|
17
|
+
return false
|
18
|
+
end
|
19
|
+
return true
|
25
20
|
end
|
26
21
|
|
22
|
+
def to_s
|
23
|
+
return "$#{code}#{value}"
|
24
|
+
end
|
25
|
+
end
|
27
26
|
end
|
data/lib/marc/writer.rb
CHANGED
@@ -1,36 +1,87 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
3
|
+
# A class for writing MARC records as MARC21.
|
4
4
|
|
5
|
-
|
5
|
+
class Writer
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
# the constructor which you must pass a file path
|
8
|
+
# or an object that responds to a write message
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
10
|
+
def initialize(file)
|
11
|
+
if file.class == String
|
12
|
+
@fh = File.new(file,"w")
|
13
|
+
elsif file.respond_to?('write')
|
14
|
+
@fh = file
|
15
|
+
else
|
16
|
+
throw "must pass in file name or handle"
|
17
|
+
end
|
18
|
+
end
|
19
19
|
|
20
20
|
|
21
|
-
|
21
|
+
# write a record to the file or handle
|
22
|
+
|
23
|
+
def write(record)
|
24
|
+
@fh.write(MARC::Writer.encode(record))
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# close underlying filehandle
|
29
|
+
|
30
|
+
def close
|
31
|
+
@fh.close
|
32
|
+
end
|
22
33
|
|
23
|
-
def write(record)
|
24
|
-
@fh.write(record.to_marc)
|
25
|
-
end
|
26
34
|
|
35
|
+
# a static method that accepts a MARC::Record object
|
36
|
+
# and returns the record encoded as MARC21 in transmission format
|
27
37
|
|
28
|
-
|
38
|
+
def self.encode(record)
|
39
|
+
directory = ''
|
40
|
+
fields = ''
|
41
|
+
offset = 0
|
42
|
+
for field in record.fields
|
29
43
|
|
30
|
-
|
31
|
-
|
44
|
+
# encode the field
|
45
|
+
field_data = ''
|
46
|
+
if field.class == MARC::Field
|
47
|
+
field_data = field.indicator1 + field.indicator2
|
48
|
+
for s in field.subfields
|
49
|
+
field_data += SUBFIELD_INDICATOR + s.code + s.value
|
50
|
+
end
|
51
|
+
elsif field.class == MARC::Control
|
52
|
+
field_data = field.value
|
32
53
|
end
|
54
|
+
field_data += END_OF_FIELD
|
33
55
|
|
34
|
-
|
56
|
+
# calculate directory entry for the field
|
57
|
+
field_length = field_data.length()
|
58
|
+
directory += sprintf("%03s%04i%05i", field.tag, field_length,
|
59
|
+
offset)
|
60
|
+
|
61
|
+
# add field to data for other fields
|
62
|
+
fields += field_data
|
63
|
+
|
64
|
+
# update offset for next field
|
65
|
+
offset += field_length
|
66
|
+
end
|
67
|
+
|
68
|
+
# determine the base (leader + directory)
|
69
|
+
base = record.leader + directory + END_OF_FIELD
|
35
70
|
|
71
|
+
# determine complete record
|
72
|
+
marc = base + fields + END_OF_RECORD
|
73
|
+
|
74
|
+
# update leader with the byte offest to the end of the directory
|
75
|
+
marc[12..16] = sprintf("%05i", base.length())
|
76
|
+
|
77
|
+
# update the record length
|
78
|
+
marc[0..4] = sprintf("%05i", marc.length())
|
79
|
+
|
80
|
+
# store updated leader in the record that was passed in
|
81
|
+
record.leader = marc[0..LEADER_LENGTH-1]
|
82
|
+
|
83
|
+
# return encoded marc
|
84
|
+
return marc
|
85
|
+
end
|
86
|
+
end
|
36
87
|
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
require 'rexml/parsers/pullparser'
|
3
|
+
|
4
|
+
module MARC
|
5
|
+
|
6
|
+
class XMLReader
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
def initialize(filename)
|
10
|
+
source = File.new(filename)
|
11
|
+
@parser = REXML::Parsers::PullParser.new(source)
|
12
|
+
end
|
13
|
+
|
14
|
+
def each
|
15
|
+
while @parser.has_next?
|
16
|
+
event = @parser.pull
|
17
|
+
# if it's the start of a record element
|
18
|
+
if event.start_element? and strip_ns(event[0]) == 'record'
|
19
|
+
yield build_record
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def strip_ns(str)
|
27
|
+
return str.sub!(/^.*:/, '')
|
28
|
+
end
|
29
|
+
|
30
|
+
# will accept parse events until a record has been built up
|
31
|
+
#
|
32
|
+
def build_record
|
33
|
+
record = MARC::Record.new
|
34
|
+
data_field = nil
|
35
|
+
control_field = nil
|
36
|
+
subfield = nil
|
37
|
+
text = ''
|
38
|
+
attrs = nil
|
39
|
+
|
40
|
+
while @parser.has_next?
|
41
|
+
event = @parser.pull
|
42
|
+
|
43
|
+
if event.text?
|
44
|
+
text += event[0].strip
|
45
|
+
next
|
46
|
+
end
|
47
|
+
|
48
|
+
if event.start_element?
|
49
|
+
attrs = event[1]
|
50
|
+
case strip_ns(event[0])
|
51
|
+
when 'controlfield'
|
52
|
+
text = ''
|
53
|
+
control_field = MARC::Control.new(attrs['tag'])
|
54
|
+
when 'datafield'
|
55
|
+
text = ''
|
56
|
+
data_field = MARC::Field.new(attrs['tag'], attrs['ind1'],
|
57
|
+
attrs['ind2'])
|
58
|
+
when 'subfield'
|
59
|
+
text = ''
|
60
|
+
subfield = MARC::Subfield.new(attrs['code'])
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
if event.end_element?
|
65
|
+
case strip_ns(event[0])
|
66
|
+
when 'leader'
|
67
|
+
record.leader = text
|
68
|
+
when 'record'
|
69
|
+
return record
|
70
|
+
when 'controlfield'
|
71
|
+
control_field.value = text
|
72
|
+
record.append(control_field)
|
73
|
+
when 'datafield'
|
74
|
+
record.append(data_field)
|
75
|
+
when 'subfield'
|
76
|
+
subfield.value = text
|
77
|
+
data_field.append(subfield)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|