marc 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/marc.rb +3 -1
- data/lib/marc/constants.rb +14 -0
- data/lib/marc/control.rb +24 -24
- data/lib/marc/exception.rb +4 -4
- data/lib/marc/field.rb +119 -119
- data/lib/marc/reader.rb +163 -78
- data/lib/marc/record.rb +114 -117
- data/lib/marc/subfield.rb +19 -20
- data/lib/marc/writer.rb +72 -21
- data/lib/marc/xmlreader.rb +83 -0
- data/lib/marc/xmlwriter.rb +87 -0
- data/test/batch.xml +157 -0
- data/test/tc_xmlreader.rb +34 -0
- data/test/tc_xmlwriter.rb +37 -0
- data/test/ts_marc.rb +2 -0
- metadata +9 -4
- data/lib/marc/marc21.rb +0 -155
data/lib/marc/record.rb
CHANGED
@@ -1,123 +1,120 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
def append(field)
|
29
|
-
@fields.push(field)
|
30
|
-
end
|
31
|
-
|
32
|
-
# each() is here to support iterating and searching since MARC::Record
|
33
|
-
# mixes in Enumberable
|
34
|
-
#
|
35
|
-
# iterating through the fields in a record:
|
36
|
-
# record.each { |f| print f }
|
37
|
-
#
|
38
|
-
# getting the 245
|
39
|
-
# title = record.find {|f| f.tag == '245'}
|
40
|
-
#
|
41
|
-
# getting all subjects
|
42
|
-
# subjects = record.find_all {|f| ('600'..'699' === f.tag)}
|
43
|
-
|
44
|
-
def each
|
45
|
-
for field in @fields
|
46
|
-
yield field
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
# You can lookup fields using this shorthand:
|
51
|
-
# title = record['245']
|
52
|
-
|
53
|
-
def [](tag)
|
54
|
-
return self.find {|f| f.tag == tag}
|
55
|
-
end
|
56
|
-
|
57
|
-
# Factory method for creating a MARC::Record from MARC21 in
|
58
|
-
# transmission format. Really this is just a wrapper around
|
59
|
-
# MARC::MARC21::decode
|
60
|
-
#
|
61
|
-
# record = MARC::Record.new_from_marc(marc21)
|
62
|
-
#
|
63
|
-
# in cases where you might be working with somewhat flawed
|
64
|
-
# MARC data you may want to use the :forgiving parameter which
|
65
|
-
# will bypass using field byte offsets and simply look for the
|
66
|
-
# end of field byte to figure out the end of fields.
|
67
|
-
#
|
68
|
-
# record = MARC::Record.new_from_marc(marc21, :forgiving => true)
|
69
|
-
|
70
|
-
def self.new_from_marc(raw, params={})
|
71
|
-
return MARC::MARC21.new().decode(raw, params)
|
72
|
-
end
|
73
|
-
|
74
|
-
|
75
|
-
# Handy method for returning a the MARC21 serialization for a
|
76
|
-
# MARC::Record object. Really this is just a wrapper around
|
77
|
-
# MARC::MARC21::encode
|
78
|
-
#
|
79
|
-
# marc = record.to_marc()
|
80
|
-
|
81
|
-
def to_marc
|
82
|
-
return MARC::MARC21.new().encode(self)
|
83
|
-
end
|
84
|
-
|
85
|
-
|
86
|
-
# Returns a string version of the record, suitable for printing
|
87
|
-
|
88
|
-
def to_s
|
89
|
-
str = "LEADER #{leader}\n"
|
90
|
-
for field in fields:
|
91
|
-
str += field.to_s() + "\n"
|
92
|
-
end
|
93
|
-
return str
|
94
|
-
end
|
95
|
-
|
96
|
-
|
97
|
-
# For testing if two records can be considered equal.
|
98
|
-
|
99
|
-
def ==(other)
|
100
|
-
if @leader != other.leader:
|
101
|
-
return false
|
102
|
-
elsif @fields.length != other.fields.length()
|
103
|
-
return false
|
104
|
-
else
|
105
|
-
for i in [0..@fields.length()]:
|
106
|
-
return false if @fields[i] != other.fields[i]
|
107
|
-
end
|
108
|
-
end
|
109
|
-
return true
|
110
|
-
end
|
111
|
-
|
112
|
-
|
113
|
-
# Handy for using a record in a regex:
|
114
|
-
# if record =~ /Gravity's Rainbow/ then print "Slothrop" end
|
115
|
-
|
116
|
-
def =~(regex)
|
117
|
-
return self.to_s =~ regex
|
118
|
-
end
|
3
|
+
# A class that represents an individual MARC record. Every record
|
4
|
+
# is made up of a collection of MARC::Field objects.
|
5
|
+
|
6
|
+
class Record
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# the record fields
|
10
|
+
attr_accessor :fields,
|
11
|
+
|
12
|
+
# the record leader
|
13
|
+
:leader
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@fields = []
|
17
|
+
# leader is 24 bytes
|
18
|
+
@leader = ' ' * 24
|
19
|
+
# leader defaults:
|
20
|
+
# http://www.loc.gov/marc/bibliographic/ecbdldrd.html
|
21
|
+
@leader[10..11] = '22'
|
22
|
+
@leader[20..23] = '4500'
|
23
|
+
end
|
24
|
+
|
25
|
+
# add a field to the record
|
26
|
+
# record.append(MARC::Field.new( '100', '2', '0', ['a', 'Fred']))
|
119
27
|
|
28
|
+
def append(field)
|
29
|
+
@fields.push(field)
|
120
30
|
end
|
121
31
|
|
122
|
-
|
32
|
+
# each() is here to support iterating and searching since MARC::Record
|
33
|
+
# mixes in Enumberable
|
34
|
+
#
|
35
|
+
# iterating through the fields in a record:
|
36
|
+
# record.each { |f| print f }
|
37
|
+
#
|
38
|
+
# getting the 245
|
39
|
+
# title = record.find {|f| f.tag == '245'}
|
40
|
+
#
|
41
|
+
# getting all subjects
|
42
|
+
# subjects = record.find_all {|f| ('600'..'699' === f.tag)}
|
43
|
+
|
44
|
+
def each
|
45
|
+
for field in @fields
|
46
|
+
yield field
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# You can lookup fields using this shorthand:
|
51
|
+
# title = record['245']
|
52
|
+
|
53
|
+
def [](tag)
|
54
|
+
return self.find {|f| f.tag == tag}
|
55
|
+
end
|
56
|
+
|
57
|
+
# Factory method for creating a MARC::Record from MARC21 in
|
58
|
+
# transmission format.
|
59
|
+
#
|
60
|
+
# record = MARC::Record.new_from_marc(marc21)
|
61
|
+
#
|
62
|
+
# in cases where you might be working with somewhat flawed
|
63
|
+
# MARC data you may want to use the :forgiving parameter which
|
64
|
+
# will bypass using field byte offsets and simply look for the
|
65
|
+
# end of field byte to figure out the end of fields.
|
66
|
+
#
|
67
|
+
# record = MARC::Record.new_from_marc(marc21, :forgiving => true)
|
68
|
+
|
69
|
+
def self.new_from_marc(raw, params={})
|
70
|
+
return MARC::Reader.decode(raw, params)
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
# Handy method for returning a the MARC21 serialization for a
|
75
|
+
# MARC::Record object. Really this is just a wrapper around
|
76
|
+
# MARC::MARC21::encode
|
77
|
+
#
|
78
|
+
# marc = record.to_marc()
|
79
|
+
|
80
|
+
def to_marc
|
81
|
+
return MARC::Writer.encode(self)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Handy method for returning the MARCXML serialization for a
|
85
|
+
# MARC::Record object. Really this is just a wrapper around
|
86
|
+
# MARC::MARCXML::encode
|
87
|
+
#
|
88
|
+
# xml = record.to_xml()
|
123
89
|
|
90
|
+
def to_xml
|
91
|
+
return MARC::MARCXML.new().encode(self)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Returns a string version of the record, suitable for printing
|
95
|
+
|
96
|
+
def to_s
|
97
|
+
str = "LEADER #{leader}\n"
|
98
|
+
for field in fields:
|
99
|
+
str += field.to_s() + "\n"
|
100
|
+
end
|
101
|
+
return str
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
# For testing if two records can be considered equal.
|
106
|
+
|
107
|
+
def ==(other)
|
108
|
+
return self.to_s == other.to_s
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
# Handy for using a record in a regex:
|
113
|
+
# if record =~ /Gravity's Rainbow/ then print "Slothrop" end
|
114
|
+
|
115
|
+
def =~(regex)
|
116
|
+
return self.to_s =~ regex
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
end
|
data/lib/marc/subfield.rb
CHANGED
@@ -1,27 +1,26 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
class Subfield
|
4
|
+
attr_accessor :code, :value
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
def ==(other)
|
14
|
-
if @code != other.code
|
15
|
-
return false
|
16
|
-
elsif @value != other.value
|
17
|
-
return false
|
18
|
-
end
|
19
|
-
return true
|
20
|
-
end
|
6
|
+
def initialize(code='' ,value='')
|
7
|
+
# can't allow code of value to be nil
|
8
|
+
# or else it'll screw us up later on
|
9
|
+
@code = code == nil ? '' : code
|
10
|
+
@value = value == nil ? '' : value
|
11
|
+
end
|
21
12
|
|
22
|
-
|
23
|
-
|
24
|
-
|
13
|
+
def ==(other)
|
14
|
+
if @code != other.code
|
15
|
+
return false
|
16
|
+
elsif @value != other.value
|
17
|
+
return false
|
18
|
+
end
|
19
|
+
return true
|
25
20
|
end
|
26
21
|
|
22
|
+
def to_s
|
23
|
+
return "$#{code}#{value}"
|
24
|
+
end
|
25
|
+
end
|
27
26
|
end
|
data/lib/marc/writer.rb
CHANGED
@@ -1,36 +1,87 @@
|
|
1
1
|
module MARC
|
2
2
|
|
3
|
-
|
3
|
+
# A class for writing MARC records as MARC21.
|
4
4
|
|
5
|
-
|
5
|
+
class Writer
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
# the constructor which you must pass a file path
|
8
|
+
# or an object that responds to a write message
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
10
|
+
def initialize(file)
|
11
|
+
if file.class == String
|
12
|
+
@fh = File.new(file,"w")
|
13
|
+
elsif file.respond_to?('write')
|
14
|
+
@fh = file
|
15
|
+
else
|
16
|
+
throw "must pass in file name or handle"
|
17
|
+
end
|
18
|
+
end
|
19
19
|
|
20
20
|
|
21
|
-
|
21
|
+
# write a record to the file or handle
|
22
|
+
|
23
|
+
def write(record)
|
24
|
+
@fh.write(MARC::Writer.encode(record))
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# close underlying filehandle
|
29
|
+
|
30
|
+
def close
|
31
|
+
@fh.close
|
32
|
+
end
|
22
33
|
|
23
|
-
def write(record)
|
24
|
-
@fh.write(record.to_marc)
|
25
|
-
end
|
26
34
|
|
35
|
+
# a static method that accepts a MARC::Record object
|
36
|
+
# and returns the record encoded as MARC21 in transmission format
|
27
37
|
|
28
|
-
|
38
|
+
def self.encode(record)
|
39
|
+
directory = ''
|
40
|
+
fields = ''
|
41
|
+
offset = 0
|
42
|
+
for field in record.fields
|
29
43
|
|
30
|
-
|
31
|
-
|
44
|
+
# encode the field
|
45
|
+
field_data = ''
|
46
|
+
if field.class == MARC::Field
|
47
|
+
field_data = field.indicator1 + field.indicator2
|
48
|
+
for s in field.subfields
|
49
|
+
field_data += SUBFIELD_INDICATOR + s.code + s.value
|
50
|
+
end
|
51
|
+
elsif field.class == MARC::Control
|
52
|
+
field_data = field.value
|
32
53
|
end
|
54
|
+
field_data += END_OF_FIELD
|
33
55
|
|
34
|
-
|
56
|
+
# calculate directory entry for the field
|
57
|
+
field_length = field_data.length()
|
58
|
+
directory += sprintf("%03s%04i%05i", field.tag, field_length,
|
59
|
+
offset)
|
60
|
+
|
61
|
+
# add field to data for other fields
|
62
|
+
fields += field_data
|
63
|
+
|
64
|
+
# update offset for next field
|
65
|
+
offset += field_length
|
66
|
+
end
|
67
|
+
|
68
|
+
# determine the base (leader + directory)
|
69
|
+
base = record.leader + directory + END_OF_FIELD
|
35
70
|
|
71
|
+
# determine complete record
|
72
|
+
marc = base + fields + END_OF_RECORD
|
73
|
+
|
74
|
+
# update leader with the byte offest to the end of the directory
|
75
|
+
marc[12..16] = sprintf("%05i", base.length())
|
76
|
+
|
77
|
+
# update the record length
|
78
|
+
marc[0..4] = sprintf("%05i", marc.length())
|
79
|
+
|
80
|
+
# store updated leader in the record that was passed in
|
81
|
+
record.leader = marc[0..LEADER_LENGTH-1]
|
82
|
+
|
83
|
+
# return encoded marc
|
84
|
+
return marc
|
85
|
+
end
|
86
|
+
end
|
36
87
|
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
require 'rexml/parsers/pullparser'
|
3
|
+
|
4
|
+
module MARC
|
5
|
+
|
6
|
+
class XMLReader
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
def initialize(filename)
|
10
|
+
source = File.new(filename)
|
11
|
+
@parser = REXML::Parsers::PullParser.new(source)
|
12
|
+
end
|
13
|
+
|
14
|
+
def each
|
15
|
+
while @parser.has_next?
|
16
|
+
event = @parser.pull
|
17
|
+
# if it's the start of a record element
|
18
|
+
if event.start_element? and strip_ns(event[0]) == 'record'
|
19
|
+
yield build_record
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def strip_ns(str)
|
27
|
+
return str.sub!(/^.*:/, '')
|
28
|
+
end
|
29
|
+
|
30
|
+
# will accept parse events until a record has been built up
|
31
|
+
#
|
32
|
+
def build_record
|
33
|
+
record = MARC::Record.new
|
34
|
+
data_field = nil
|
35
|
+
control_field = nil
|
36
|
+
subfield = nil
|
37
|
+
text = ''
|
38
|
+
attrs = nil
|
39
|
+
|
40
|
+
while @parser.has_next?
|
41
|
+
event = @parser.pull
|
42
|
+
|
43
|
+
if event.text?
|
44
|
+
text += event[0].strip
|
45
|
+
next
|
46
|
+
end
|
47
|
+
|
48
|
+
if event.start_element?
|
49
|
+
attrs = event[1]
|
50
|
+
case strip_ns(event[0])
|
51
|
+
when 'controlfield'
|
52
|
+
text = ''
|
53
|
+
control_field = MARC::Control.new(attrs['tag'])
|
54
|
+
when 'datafield'
|
55
|
+
text = ''
|
56
|
+
data_field = MARC::Field.new(attrs['tag'], attrs['ind1'],
|
57
|
+
attrs['ind2'])
|
58
|
+
when 'subfield'
|
59
|
+
text = ''
|
60
|
+
subfield = MARC::Subfield.new(attrs['code'])
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
if event.end_element?
|
65
|
+
case strip_ns(event[0])
|
66
|
+
when 'leader'
|
67
|
+
record.leader = text
|
68
|
+
when 'record'
|
69
|
+
return record
|
70
|
+
when 'controlfield'
|
71
|
+
control_field.value = text
|
72
|
+
record.append(control_field)
|
73
|
+
when 'datafield'
|
74
|
+
record.append(data_field)
|
75
|
+
when 'subfield'
|
76
|
+
subfield.value = text
|
77
|
+
data_field.append(subfield)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|