rmarc 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +504 -0
- data/lib/rmarc.rb +14 -0
- data/lib/rmarc/constants.rb +43 -0
- data/lib/rmarc/marc_stream_reader.rb +128 -0
- data/lib/rmarc/marc_stream_writer.rb +73 -0
- data/lib/rmarc/marc_xml_reader.rb +137 -0
- data/lib/rmarc/marc_xml_writer.rb +88 -0
- data/lib/rmarc/model/control_field.rb +42 -0
- data/lib/rmarc/model/data_field.rb +68 -0
- data/lib/rmarc/model/directory.rb +36 -0
- data/lib/rmarc/model/leader.rb +65 -0
- data/lib/rmarc/model/record.rb +64 -0
- data/lib/rmarc/model/subfield.rb +39 -0
- data/lib/rmarc/model/variable_field.rb +38 -0
- data/test/chabon.mrc +1 -0
- data/test/chabon.xml +116 -0
- data/test/summerland.mrc +1 -0
- data/test/summerland.xml +52 -0
- data/test/test_model.rb +78 -0
- data/test/test_reader.rb +50 -0
- data/test/test_writer.rb +56 -0
- metadata +60 -0
data/lib/rmarc.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
|
3
|
+
require 'rmarc/constants'
|
4
|
+
require 'rmarc/marc_stream_reader'
|
5
|
+
require 'rmarc/marc_stream_writer'
|
6
|
+
require 'rmarc/marc_xml_reader'
|
7
|
+
require 'rmarc/marc_xml_writer'
|
8
|
+
require 'rmarc/model/control_field'
|
9
|
+
require 'rmarc/model/data_field'
|
10
|
+
require 'rmarc/model/directory'
|
11
|
+
require 'rmarc/model/leader'
|
12
|
+
require 'rmarc/model/record'
|
13
|
+
require 'rmarc/model/subfield'
|
14
|
+
require 'rmarc/model/variable_field'
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# $Id: constants.rb,v 1.2 2005/12/02 16:57:26 bpeters Exp $
|
2
|
+
#
|
3
|
+
# Copyright (c) 2005 Bas Peters
|
4
|
+
#
|
5
|
+
# This file is part of RMARC
|
6
|
+
#
|
7
|
+
# RMARC is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2.1 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# RMARC is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with RMARC; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
module RMARC
|
22
|
+
|
23
|
+
class Constants
|
24
|
+
|
25
|
+
def Constants.VERSION
|
26
|
+
return "1.0rc1"
|
27
|
+
end
|
28
|
+
|
29
|
+
def Constants.RT
|
30
|
+
return "\035"
|
31
|
+
end
|
32
|
+
|
33
|
+
def Constants.FT
|
34
|
+
return "\036"
|
35
|
+
end
|
36
|
+
|
37
|
+
def Constants.US
|
38
|
+
return "\037"
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
# $Id: marc_stream_reader.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
|
2
|
+
#
|
3
|
+
# Copyright (c) 2005 Bas Peters
|
4
|
+
#
|
5
|
+
# This file is part of RMARC
|
6
|
+
#
|
7
|
+
# RMARC is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2.1 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# RMARC is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with RMARC; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
module RMARC
|
22
|
+
|
23
|
+
# An iterator over a collection of MARC records in ISO 2709 format.
|
24
|
+
#
|
25
|
+
# Example usage:
|
26
|
+
#
|
27
|
+
# File.open("file.mrc", "r") do |file|
|
28
|
+
# reader = RMARC::MarcStreamReader.new(file)
|
29
|
+
# while reader.has_next
|
30
|
+
# record = reader.next()
|
31
|
+
# end
|
32
|
+
class MarcStreamReader
|
33
|
+
|
34
|
+
$input = nil
|
35
|
+
|
36
|
+
def parse_data_field(entry)
|
37
|
+
msg = "Unexpected EOF while reading field with tag #{entry.tag}"
|
38
|
+
|
39
|
+
ind1 = $input.read(1)
|
40
|
+
|
41
|
+
raise msg if ind1 == nil
|
42
|
+
|
43
|
+
ind2 = $input.read(1)
|
44
|
+
|
45
|
+
raise msg if ind2 == nil
|
46
|
+
|
47
|
+
field = DataField.new(entry.tag, ind1, ind2)
|
48
|
+
|
49
|
+
data = nil
|
50
|
+
code = nil
|
51
|
+
i = 2
|
52
|
+
|
53
|
+
while i < entry.length
|
54
|
+
s = $input.read(1)
|
55
|
+
if s == nil
|
56
|
+
raise msg
|
57
|
+
elsif s == Constants.US
|
58
|
+
field.add(Subfield.new(code, data)) if code != nil
|
59
|
+
code = $input.read(1)
|
60
|
+
i += 1
|
61
|
+
data = ""
|
62
|
+
elsif s == Constants.FT
|
63
|
+
field.add(Subfield.new(code, data)) if code != nil
|
64
|
+
else
|
65
|
+
data << s if data != nil
|
66
|
+
end
|
67
|
+
i += 1
|
68
|
+
end
|
69
|
+
return field
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns the next record in the iteration.
|
73
|
+
def next
|
74
|
+
ldr = $input.read(24)
|
75
|
+
|
76
|
+
raise "Unexpected EOF while reading record label" if ldr == nil
|
77
|
+
|
78
|
+
leader = Leader.new(ldr)
|
79
|
+
|
80
|
+
record = Record.new(leader)
|
81
|
+
|
82
|
+
length = leader.base_address - 25
|
83
|
+
|
84
|
+
raise "Invalid directory" if length % 12 != 0
|
85
|
+
|
86
|
+
dir = $input.read(length)
|
87
|
+
|
88
|
+
entries = length / 12
|
89
|
+
|
90
|
+
raise "Expected field terminator" if $input.read(1) != Constants.FT
|
91
|
+
|
92
|
+
start = 0
|
93
|
+
|
94
|
+
entries.times do
|
95
|
+
entry = Directory.new(dir[start, 3], dir[start += 3, 4], dir[start += 4, 5])
|
96
|
+
if (entry.tag.to_i < 10)
|
97
|
+
data = $input.read(entry.length)
|
98
|
+
|
99
|
+
raise "Unexpected EOF while reading field with tag #{entry.tag}" if data == nil
|
100
|
+
|
101
|
+
record.add(ControlField.new(entry.tag, data.chop))
|
102
|
+
else
|
103
|
+
record.add(parse_data_field(entry))
|
104
|
+
end
|
105
|
+
start += 5
|
106
|
+
end
|
107
|
+
|
108
|
+
raise "Expected record terminator" if $input.read(1) != Constants.RT
|
109
|
+
|
110
|
+
return record
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns true if the iteration has more records, false otherwise.
|
114
|
+
def has_next
|
115
|
+
if $input.eof == false
|
116
|
+
return true
|
117
|
+
else
|
118
|
+
return false
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Default constructor
|
123
|
+
def initialize(input)
|
124
|
+
$input = input
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# $Id: marc_stream_writer.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
|
2
|
+
#
|
3
|
+
# Copyright (c) 2005 Bas Peters
|
4
|
+
#
|
5
|
+
# This file is part of RMARC
|
6
|
+
#
|
7
|
+
# RMARC is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2.1 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# RMARC is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with RMARC; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
module RMARC
|
22
|
+
|
23
|
+
# Class for writing MARC record objects in ISO 2709 format.
|
24
|
+
#
|
25
|
+
# The following example reads a file with MARCXML records
|
26
|
+
# and outputs the record set in ISO 2709 format:
|
27
|
+
#
|
28
|
+
# File.open("file.mrc", "r") do |file|
|
29
|
+
# reader = RMARC::MarcStreamReader.new(file)
|
30
|
+
# writer = RMARC::MarcStreamWriter.new(STDOUT)
|
31
|
+
# while reader.has_next
|
32
|
+
# record = reader.next()
|
33
|
+
# writer.write_record(record)
|
34
|
+
# end
|
35
|
+
class MarcStreamWriter
|
36
|
+
|
37
|
+
$output = nil
|
38
|
+
|
39
|
+
# Default constructor.
|
40
|
+
def initialize(output)
|
41
|
+
$output = output
|
42
|
+
end
|
43
|
+
|
44
|
+
# Writes a single record to the given output stream.
|
45
|
+
def write_record(record)
|
46
|
+
data = ""
|
47
|
+
dir = ""
|
48
|
+
prev = 0
|
49
|
+
|
50
|
+
record.fields.each {
|
51
|
+
|field|
|
52
|
+
if field.tag.to_i < 10
|
53
|
+
data << field.data << Constants.FT
|
54
|
+
else
|
55
|
+
data << field.ind1 << field.ind2
|
56
|
+
field.subfields.each { |subf| data << Constants.US << subf.code << subf.data }
|
57
|
+
data << Constants.FT
|
58
|
+
end
|
59
|
+
len = data.length
|
60
|
+
dir << field.tag << "%04d" % (len - prev) << "%05d" % prev
|
61
|
+
prev = len
|
62
|
+
}
|
63
|
+
dir << Constants.FT
|
64
|
+
|
65
|
+
leader = record.leader
|
66
|
+
leader.base_address = 24 + dir.length
|
67
|
+
leader.record_length = leader.base_address + data.length + 1
|
68
|
+
|
69
|
+
$output.write(leader.to_s << dir << data << Constants.RT)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# $Id: marc_xml_reader.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
|
2
|
+
#
|
3
|
+
# Copyright (c) 2005 Bas Peters
|
4
|
+
#
|
5
|
+
# This file is part of RMARC
|
6
|
+
#
|
7
|
+
# RMARC is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2.1 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# RMARC is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with RMARC; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
require 'rexml/document'
|
22
|
+
require 'thread'
|
23
|
+
|
24
|
+
module RMARC
|
25
|
+
|
26
|
+
class RecordStack < SizedQueue
|
27
|
+
|
28
|
+
attr_reader :has_next
|
29
|
+
attr_writer :has_next
|
30
|
+
end
|
31
|
+
|
32
|
+
class Listener
|
33
|
+
|
34
|
+
$record = nil
|
35
|
+
$queue = nil
|
36
|
+
$field = nil
|
37
|
+
$subfield = nil
|
38
|
+
$data = nil
|
39
|
+
|
40
|
+
def tag_start(name, attrs)
|
41
|
+
re = /(\w+):(\w+)/
|
42
|
+
md = re.match(name)
|
43
|
+
name = $2 if ($2 != nil)
|
44
|
+
case name
|
45
|
+
when "collection"
|
46
|
+
$queue.has_next = true
|
47
|
+
when "record"
|
48
|
+
$record = Record.new
|
49
|
+
when "controlfield"
|
50
|
+
$field = ControlField.new(attrs["tag"])
|
51
|
+
when "datafield"
|
52
|
+
$field = DataField.new(attrs["tag"], attrs["ind1"], attrs["ind2"])
|
53
|
+
when "subfield"
|
54
|
+
$subfield = Subfield.new(attrs["code"])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def text(text)
|
59
|
+
$data = text
|
60
|
+
end
|
61
|
+
|
62
|
+
def tag_end(name)
|
63
|
+
re = /(\w+):(\w+)/
|
64
|
+
md = re.match(name)
|
65
|
+
name = $2 if ($2 != nil)
|
66
|
+
case name
|
67
|
+
when "collection"
|
68
|
+
$queue.has_next = false
|
69
|
+
when "record"
|
70
|
+
$queue.push($record)
|
71
|
+
when "leader"
|
72
|
+
leader = Leader.new($data)
|
73
|
+
$data = ""
|
74
|
+
$record.leader = leader
|
75
|
+
when "controlfield"
|
76
|
+
$field.data = $data
|
77
|
+
$data = ""
|
78
|
+
$record.add($field)
|
79
|
+
when "datafield"
|
80
|
+
$record.add($field)
|
81
|
+
when "subfield"
|
82
|
+
$subfield.data = $data
|
83
|
+
$data = ""
|
84
|
+
$field.add($subfield)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def xmldecl(version, encoding, standalone)
|
89
|
+
end
|
90
|
+
|
91
|
+
def initialize(queue)
|
92
|
+
$queue = queue
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
# An iterator over a collection of MARC XML records.
|
98
|
+
#
|
99
|
+
# Example usage:
|
100
|
+
#
|
101
|
+
# File.open("test/file.xml", "r") do |file|
|
102
|
+
# reader = RMARC::MarcXmlReader.new(file)
|
103
|
+
# while reader.has_next
|
104
|
+
# record = reader.next()
|
105
|
+
# end
|
106
|
+
class MarcXmlReader
|
107
|
+
|
108
|
+
$queue = nil
|
109
|
+
$input = nil
|
110
|
+
|
111
|
+
# Default constructor
|
112
|
+
def initialize(input)
|
113
|
+
$input = input
|
114
|
+
$queue = RecordStack.new(1)
|
115
|
+
Thread.new do
|
116
|
+
producer = Listener.new($queue)
|
117
|
+
REXML::Document.parse_stream($input, producer)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# Returns true if the iteration has more records, false otherwise.
|
122
|
+
def has_next
|
123
|
+
if ($queue.has_next == false && $queue.empty?)
|
124
|
+
return false
|
125
|
+
else
|
126
|
+
return true
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# Returns the next record in the iteration.
|
131
|
+
def next
|
132
|
+
obj = $queue.pop
|
133
|
+
return obj
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# $Id: marc_xml_writer.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
|
2
|
+
#
|
3
|
+
# Copyright (c) 2005 Bas Peters
|
4
|
+
#
|
5
|
+
# This file is part of RMARC
|
6
|
+
#
|
7
|
+
# RMARC is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2.1 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# RMARC is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with RMARC; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
require 'rexml/document'
|
22
|
+
|
23
|
+
module RMARC
|
24
|
+
|
25
|
+
# Class for writing MARC record objects in MARC XML format.
|
26
|
+
#
|
27
|
+
# writer = RMARC::MarcXmlWriter.new(STDOUT)
|
28
|
+
# File.open("file.mrc", "r") do |file|
|
29
|
+
# reader = RMARC::MarcStreamReader.new(file)
|
30
|
+
# writer.start_document
|
31
|
+
# while reader.has_next
|
32
|
+
# record = reader.next()
|
33
|
+
# writer.write_record(record)
|
34
|
+
# end
|
35
|
+
# writer.end_document
|
36
|
+
class MarcXmlWriter
|
37
|
+
|
38
|
+
$output = nil
|
39
|
+
|
40
|
+
# Default constructor.
|
41
|
+
def initialize(output)
|
42
|
+
$output = output
|
43
|
+
end
|
44
|
+
|
45
|
+
# Writes the XML declaration and the collection start tag.
|
46
|
+
def start_document
|
47
|
+
$output.write(REXML::XMLDecl.new)
|
48
|
+
$output.write("\n<collection xmlns=\"http://www.loc.gov/MARC21/slim\">\n ")
|
49
|
+
end
|
50
|
+
|
51
|
+
# Writes a single record element.
|
52
|
+
def write_record(record)
|
53
|
+
rec = REXML::Element.new('record')
|
54
|
+
ldr = REXML::Element.new('leader')
|
55
|
+
ldr.add_text(record.leader.to_s)
|
56
|
+
rec.add_element(ldr)
|
57
|
+
|
58
|
+
record.fields.each { |field|
|
59
|
+
if field.tag.to_i < 10
|
60
|
+
fld = REXML::Element.new('controlfield')
|
61
|
+
fld.add_attribute('tag', field.tag)
|
62
|
+
fld.add_text(field.data)
|
63
|
+
rec.add_element(fld)
|
64
|
+
else
|
65
|
+
fld = REXML::Element.new('datafield')
|
66
|
+
fld.add_attribute('tag', field.tag)
|
67
|
+
fld.add_attribute('ind1', field.ind1)
|
68
|
+
fld.add_attribute('ind2', field.ind2)
|
69
|
+
field.subfields.each { |subf|
|
70
|
+
sub = REXML::Element.new('subfield')
|
71
|
+
sub.add_attribute('code', subf.code)
|
72
|
+
sub.add_text(subf.data)
|
73
|
+
fld.add_element(sub)
|
74
|
+
}
|
75
|
+
rec.add_element(fld)
|
76
|
+
end
|
77
|
+
}
|
78
|
+
rec.write($output, 1)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Writes the collection end tag.
|
82
|
+
def end_document
|
83
|
+
$output.write("\n</collection>")
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|