rmarc 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +504 -0
- data/lib/rmarc.rb +14 -0
- data/lib/rmarc/constants.rb +43 -0
- data/lib/rmarc/marc_stream_reader.rb +128 -0
- data/lib/rmarc/marc_stream_writer.rb +73 -0
- data/lib/rmarc/marc_xml_reader.rb +137 -0
- data/lib/rmarc/marc_xml_writer.rb +88 -0
- data/lib/rmarc/model/control_field.rb +42 -0
- data/lib/rmarc/model/data_field.rb +68 -0
- data/lib/rmarc/model/directory.rb +36 -0
- data/lib/rmarc/model/leader.rb +65 -0
- data/lib/rmarc/model/record.rb +64 -0
- data/lib/rmarc/model/subfield.rb +39 -0
- data/lib/rmarc/model/variable_field.rb +38 -0
- data/test/chabon.mrc +1 -0
- data/test/chabon.xml +116 -0
- data/test/summerland.mrc +1 -0
- data/test/summerland.xml +52 -0
- data/test/test_model.rb +78 -0
- data/test/test_reader.rb +50 -0
- data/test/test_writer.rb +56 -0
- metadata +60 -0
data/lib/rmarc.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
|
3
|
+
require 'rmarc/constants'
|
4
|
+
require 'rmarc/marc_stream_reader'
|
5
|
+
require 'rmarc/marc_stream_writer'
|
6
|
+
require 'rmarc/marc_xml_reader'
|
7
|
+
require 'rmarc/marc_xml_writer'
|
8
|
+
require 'rmarc/model/control_field'
|
9
|
+
require 'rmarc/model/data_field'
|
10
|
+
require 'rmarc/model/directory'
|
11
|
+
require 'rmarc/model/leader'
|
12
|
+
require 'rmarc/model/record'
|
13
|
+
require 'rmarc/model/subfield'
|
14
|
+
require 'rmarc/model/variable_field'
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# $Id: constants.rb,v 1.2 2005/12/02 16:57:26 bpeters Exp $
|
2
|
+
#
|
3
|
+
# Copyright (c) 2005 Bas Peters
|
4
|
+
#
|
5
|
+
# This file is part of RMARC
|
6
|
+
#
|
7
|
+
# RMARC is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2.1 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# RMARC is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with RMARC; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
module RMARC
|
22
|
+
|
23
|
+
class Constants
|
24
|
+
|
25
|
+
def Constants.VERSION
|
26
|
+
return "1.0rc1"
|
27
|
+
end
|
28
|
+
|
29
|
+
def Constants.RT
|
30
|
+
return "\035"
|
31
|
+
end
|
32
|
+
|
33
|
+
def Constants.FT
|
34
|
+
return "\036"
|
35
|
+
end
|
36
|
+
|
37
|
+
def Constants.US
|
38
|
+
return "\037"
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
# $Id: marc_stream_reader.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
|
2
|
+
#
|
3
|
+
# Copyright (c) 2005 Bas Peters
|
4
|
+
#
|
5
|
+
# This file is part of RMARC
|
6
|
+
#
|
7
|
+
# RMARC is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2.1 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# RMARC is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with RMARC; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
module RMARC
|
22
|
+
|
23
|
+
# An iterator over a collection of MARC records in ISO 2709 format.
|
24
|
+
#
|
25
|
+
# Example usage:
|
26
|
+
#
|
27
|
+
# File.open("file.mrc", "r") do |file|
|
28
|
+
# reader = RMARC::MarcStreamReader.new(file)
|
29
|
+
# while reader.has_next
|
30
|
+
# record = reader.next()
|
31
|
+
# end
|
32
|
+
class MarcStreamReader
|
33
|
+
|
34
|
+
$input = nil
|
35
|
+
|
36
|
+
def parse_data_field(entry)
|
37
|
+
msg = "Unexpected EOF while reading field with tag #{entry.tag}"
|
38
|
+
|
39
|
+
ind1 = $input.read(1)
|
40
|
+
|
41
|
+
raise msg if ind1 == nil
|
42
|
+
|
43
|
+
ind2 = $input.read(1)
|
44
|
+
|
45
|
+
raise msg if ind2 == nil
|
46
|
+
|
47
|
+
field = DataField.new(entry.tag, ind1, ind2)
|
48
|
+
|
49
|
+
data = nil
|
50
|
+
code = nil
|
51
|
+
i = 2
|
52
|
+
|
53
|
+
while i < entry.length
|
54
|
+
s = $input.read(1)
|
55
|
+
if s == nil
|
56
|
+
raise msg
|
57
|
+
elsif s == Constants.US
|
58
|
+
field.add(Subfield.new(code, data)) if code != nil
|
59
|
+
code = $input.read(1)
|
60
|
+
i += 1
|
61
|
+
data = ""
|
62
|
+
elsif s == Constants.FT
|
63
|
+
field.add(Subfield.new(code, data)) if code != nil
|
64
|
+
else
|
65
|
+
data << s if data != nil
|
66
|
+
end
|
67
|
+
i += 1
|
68
|
+
end
|
69
|
+
return field
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns the next record in the iteration.
|
73
|
+
def next
|
74
|
+
ldr = $input.read(24)
|
75
|
+
|
76
|
+
raise "Unexpected EOF while reading record label" if ldr == nil
|
77
|
+
|
78
|
+
leader = Leader.new(ldr)
|
79
|
+
|
80
|
+
record = Record.new(leader)
|
81
|
+
|
82
|
+
length = leader.base_address - 25
|
83
|
+
|
84
|
+
raise "Invalid directory" if length % 12 != 0
|
85
|
+
|
86
|
+
dir = $input.read(length)
|
87
|
+
|
88
|
+
entries = length / 12
|
89
|
+
|
90
|
+
raise "Expected field terminator" if $input.read(1) != Constants.FT
|
91
|
+
|
92
|
+
start = 0
|
93
|
+
|
94
|
+
entries.times do
|
95
|
+
entry = Directory.new(dir[start, 3], dir[start += 3, 4], dir[start += 4, 5])
|
96
|
+
if (entry.tag.to_i < 10)
|
97
|
+
data = $input.read(entry.length)
|
98
|
+
|
99
|
+
raise "Unexpected EOF while reading field with tag #{entry.tag}" if data == nil
|
100
|
+
|
101
|
+
record.add(ControlField.new(entry.tag, data.chop))
|
102
|
+
else
|
103
|
+
record.add(parse_data_field(entry))
|
104
|
+
end
|
105
|
+
start += 5
|
106
|
+
end
|
107
|
+
|
108
|
+
raise "Expected record terminator" if $input.read(1) != Constants.RT
|
109
|
+
|
110
|
+
return record
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns true if the iteration has more records, false otherwise.
|
114
|
+
def has_next
|
115
|
+
if $input.eof == false
|
116
|
+
return true
|
117
|
+
else
|
118
|
+
return false
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Default constructor
|
123
|
+
def initialize(input)
|
124
|
+
$input = input
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# $Id: marc_stream_writer.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
|
2
|
+
#
|
3
|
+
# Copyright (c) 2005 Bas Peters
|
4
|
+
#
|
5
|
+
# This file is part of RMARC
|
6
|
+
#
|
7
|
+
# RMARC is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2.1 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# RMARC is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with RMARC; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
module RMARC
|
22
|
+
|
23
|
+
# Class for writing MARC record objects in ISO 2709 format.
|
24
|
+
#
|
25
|
+
# The following example reads a file with MARCXML records
|
26
|
+
# and outputs the record set in ISO 2709 format:
|
27
|
+
#
|
28
|
+
# File.open("file.mrc", "r") do |file|
|
29
|
+
# reader = RMARC::MarcStreamReader.new(file)
|
30
|
+
# writer = RMARC::MarcStreamWriter.new(STDOUT)
|
31
|
+
# while reader.has_next
|
32
|
+
# record = reader.next()
|
33
|
+
# writer.write_record(record)
|
34
|
+
# end
|
35
|
+
class MarcStreamWriter
|
36
|
+
|
37
|
+
$output = nil
|
38
|
+
|
39
|
+
# Default constructor.
|
40
|
+
def initialize(output)
|
41
|
+
$output = output
|
42
|
+
end
|
43
|
+
|
44
|
+
# Writes a single record to the given output stream.
|
45
|
+
def write_record(record)
|
46
|
+
data = ""
|
47
|
+
dir = ""
|
48
|
+
prev = 0
|
49
|
+
|
50
|
+
record.fields.each {
|
51
|
+
|field|
|
52
|
+
if field.tag.to_i < 10
|
53
|
+
data << field.data << Constants.FT
|
54
|
+
else
|
55
|
+
data << field.ind1 << field.ind2
|
56
|
+
field.subfields.each { |subf| data << Constants.US << subf.code << subf.data }
|
57
|
+
data << Constants.FT
|
58
|
+
end
|
59
|
+
len = data.length
|
60
|
+
dir << field.tag << "%04d" % (len - prev) << "%05d" % prev
|
61
|
+
prev = len
|
62
|
+
}
|
63
|
+
dir << Constants.FT
|
64
|
+
|
65
|
+
leader = record.leader
|
66
|
+
leader.base_address = 24 + dir.length
|
67
|
+
leader.record_length = leader.base_address + data.length + 1
|
68
|
+
|
69
|
+
$output.write(leader.to_s << dir << data << Constants.RT)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# $Id: marc_xml_reader.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
|
2
|
+
#
|
3
|
+
# Copyright (c) 2005 Bas Peters
|
4
|
+
#
|
5
|
+
# This file is part of RMARC
|
6
|
+
#
|
7
|
+
# RMARC is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2.1 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# RMARC is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with RMARC; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
require 'rexml/document'
|
22
|
+
require 'thread'
|
23
|
+
|
24
|
+
module RMARC
|
25
|
+
|
26
|
+
class RecordStack < SizedQueue
|
27
|
+
|
28
|
+
attr_reader :has_next
|
29
|
+
attr_writer :has_next
|
30
|
+
end
|
31
|
+
|
32
|
+
class Listener
|
33
|
+
|
34
|
+
$record = nil
|
35
|
+
$queue = nil
|
36
|
+
$field = nil
|
37
|
+
$subfield = nil
|
38
|
+
$data = nil
|
39
|
+
|
40
|
+
def tag_start(name, attrs)
|
41
|
+
re = /(\w+):(\w+)/
|
42
|
+
md = re.match(name)
|
43
|
+
name = $2 if ($2 != nil)
|
44
|
+
case name
|
45
|
+
when "collection"
|
46
|
+
$queue.has_next = true
|
47
|
+
when "record"
|
48
|
+
$record = Record.new
|
49
|
+
when "controlfield"
|
50
|
+
$field = ControlField.new(attrs["tag"])
|
51
|
+
when "datafield"
|
52
|
+
$field = DataField.new(attrs["tag"], attrs["ind1"], attrs["ind2"])
|
53
|
+
when "subfield"
|
54
|
+
$subfield = Subfield.new(attrs["code"])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def text(text)
|
59
|
+
$data = text
|
60
|
+
end
|
61
|
+
|
62
|
+
def tag_end(name)
|
63
|
+
re = /(\w+):(\w+)/
|
64
|
+
md = re.match(name)
|
65
|
+
name = $2 if ($2 != nil)
|
66
|
+
case name
|
67
|
+
when "collection"
|
68
|
+
$queue.has_next = false
|
69
|
+
when "record"
|
70
|
+
$queue.push($record)
|
71
|
+
when "leader"
|
72
|
+
leader = Leader.new($data)
|
73
|
+
$data = ""
|
74
|
+
$record.leader = leader
|
75
|
+
when "controlfield"
|
76
|
+
$field.data = $data
|
77
|
+
$data = ""
|
78
|
+
$record.add($field)
|
79
|
+
when "datafield"
|
80
|
+
$record.add($field)
|
81
|
+
when "subfield"
|
82
|
+
$subfield.data = $data
|
83
|
+
$data = ""
|
84
|
+
$field.add($subfield)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def xmldecl(version, encoding, standalone)
|
89
|
+
end
|
90
|
+
|
91
|
+
def initialize(queue)
|
92
|
+
$queue = queue
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
# An iterator over a collection of MARC XML records.
|
98
|
+
#
|
99
|
+
# Example usage:
|
100
|
+
#
|
101
|
+
# File.open("test/file.xml", "r") do |file|
|
102
|
+
# reader = RMARC::MarcXmlReader.new(file)
|
103
|
+
# while reader.has_next
|
104
|
+
# record = reader.next()
|
105
|
+
# end
|
106
|
+
class MarcXmlReader
|
107
|
+
|
108
|
+
$queue = nil
|
109
|
+
$input = nil
|
110
|
+
|
111
|
+
# Default constructor
|
112
|
+
def initialize(input)
|
113
|
+
$input = input
|
114
|
+
$queue = RecordStack.new(1)
|
115
|
+
Thread.new do
|
116
|
+
producer = Listener.new($queue)
|
117
|
+
REXML::Document.parse_stream($input, producer)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# Returns true if the iteration has more records, false otherwise.
|
122
|
+
def has_next
|
123
|
+
if ($queue.has_next == false && $queue.empty?)
|
124
|
+
return false
|
125
|
+
else
|
126
|
+
return true
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# Returns the next record in the iteration.
|
131
|
+
def next
|
132
|
+
obj = $queue.pop
|
133
|
+
return obj
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# $Id: marc_xml_writer.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
|
2
|
+
#
|
3
|
+
# Copyright (c) 2005 Bas Peters
|
4
|
+
#
|
5
|
+
# This file is part of RMARC
|
6
|
+
#
|
7
|
+
# RMARC is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2.1 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# RMARC is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with RMARC; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
require 'rexml/document'
|
22
|
+
|
23
|
+
module RMARC
|
24
|
+
|
25
|
+
# Class for writing MARC record objects in MARC XML format.
|
26
|
+
#
|
27
|
+
# writer = RMARC::MarcXmlWriter.new(STDOUT)
|
28
|
+
# File.open("file.mrc", "r") do |file|
|
29
|
+
# reader = RMARC::MarcStreamReader.new(file)
|
30
|
+
# writer.start_document
|
31
|
+
# while reader.has_next
|
32
|
+
# record = reader.next()
|
33
|
+
# writer.write_record(record)
|
34
|
+
# end
|
35
|
+
# writer.end_document
|
36
|
+
class MarcXmlWriter
|
37
|
+
|
38
|
+
$output = nil
|
39
|
+
|
40
|
+
# Default constructor.
|
41
|
+
def initialize(output)
|
42
|
+
$output = output
|
43
|
+
end
|
44
|
+
|
45
|
+
# Writes the XML declaration and the collection start tag.
|
46
|
+
def start_document
|
47
|
+
$output.write(REXML::XMLDecl.new)
|
48
|
+
$output.write("\n<collection xmlns=\"http://www.loc.gov/MARC21/slim\">\n ")
|
49
|
+
end
|
50
|
+
|
51
|
+
# Writes a single record element.
|
52
|
+
def write_record(record)
|
53
|
+
rec = REXML::Element.new('record')
|
54
|
+
ldr = REXML::Element.new('leader')
|
55
|
+
ldr.add_text(record.leader.to_s)
|
56
|
+
rec.add_element(ldr)
|
57
|
+
|
58
|
+
record.fields.each { |field|
|
59
|
+
if field.tag.to_i < 10
|
60
|
+
fld = REXML::Element.new('controlfield')
|
61
|
+
fld.add_attribute('tag', field.tag)
|
62
|
+
fld.add_text(field.data)
|
63
|
+
rec.add_element(fld)
|
64
|
+
else
|
65
|
+
fld = REXML::Element.new('datafield')
|
66
|
+
fld.add_attribute('tag', field.tag)
|
67
|
+
fld.add_attribute('ind1', field.ind1)
|
68
|
+
fld.add_attribute('ind2', field.ind2)
|
69
|
+
field.subfields.each { |subf|
|
70
|
+
sub = REXML::Element.new('subfield')
|
71
|
+
sub.add_attribute('code', subf.code)
|
72
|
+
sub.add_text(subf.data)
|
73
|
+
fld.add_element(sub)
|
74
|
+
}
|
75
|
+
rec.add_element(fld)
|
76
|
+
end
|
77
|
+
}
|
78
|
+
rec.write($output, 1)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Writes the collection end tag.
|
82
|
+
def end_document
|
83
|
+
$output.write("\n</collection>")
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|