rmarc 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/rmarc.rb ADDED
@@ -0,0 +1,14 @@
1
+ require 'rexml/document'
2
+
3
+ require 'rmarc/constants'
4
+ require 'rmarc/marc_stream_reader'
5
+ require 'rmarc/marc_stream_writer'
6
+ require 'rmarc/marc_xml_reader'
7
+ require 'rmarc/marc_xml_writer'
8
+ require 'rmarc/model/control_field'
9
+ require 'rmarc/model/data_field'
10
+ require 'rmarc/model/directory'
11
+ require 'rmarc/model/leader'
12
+ require 'rmarc/model/record'
13
+ require 'rmarc/model/subfield'
14
+ require 'rmarc/model/variable_field'
@@ -0,0 +1,43 @@
1
+ # $Id: constants.rb,v 1.2 2005/12/02 16:57:26 bpeters Exp $
2
+ #
3
+ # Copyright (c) 2005 Bas Peters
4
+ #
5
+ # This file is part of RMARC
6
+ #
7
+ # RMARC is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2.1 of the License, or (at your option) any later version.
11
+ #
12
+ # RMARC is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with RMARC; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ module RMARC
22
+
23
+ class Constants
24
+
25
+ def Constants.VERSION
26
+ return "1.0rc1"
27
+ end
28
+
29
+ def Constants.RT
30
+ return "\035"
31
+ end
32
+
33
+ def Constants.FT
34
+ return "\036"
35
+ end
36
+
37
+ def Constants.US
38
+ return "\037"
39
+ end
40
+
41
+ end
42
+
43
+ end
@@ -0,0 +1,128 @@
1
+ # $Id: marc_stream_reader.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
2
+ #
3
+ # Copyright (c) 2005 Bas Peters
4
+ #
5
+ # This file is part of RMARC
6
+ #
7
+ # RMARC is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2.1 of the License, or (at your option) any later version.
11
+ #
12
+ # RMARC is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with RMARC; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ module RMARC
22
+
23
+ # An iterator over a collection of MARC records in ISO 2709 format.
24
+ #
25
+ # Example usage:
26
+ #
27
+ # File.open("file.mrc", "r") do |file|
28
+ # reader = RMARC::MarcStreamReader.new(file)
29
+ # while reader.has_next
30
+ # record = reader.next()
31
+ # end
32
+ class MarcStreamReader
33
+
34
+ $input = nil
35
+
36
+ def parse_data_field(entry)
37
+ msg = "Unexpected EOF while reading field with tag #{entry.tag}"
38
+
39
+ ind1 = $input.read(1)
40
+
41
+ raise msg if ind1 == nil
42
+
43
+ ind2 = $input.read(1)
44
+
45
+ raise msg if ind2 == nil
46
+
47
+ field = DataField.new(entry.tag, ind1, ind2)
48
+
49
+ data = nil
50
+ code = nil
51
+ i = 2
52
+
53
+ while i < entry.length
54
+ s = $input.read(1)
55
+ if s == nil
56
+ raise msg
57
+ elsif s == Constants.US
58
+ field.add(Subfield.new(code, data)) if code != nil
59
+ code = $input.read(1)
60
+ i += 1
61
+ data = ""
62
+ elsif s == Constants.FT
63
+ field.add(Subfield.new(code, data)) if code != nil
64
+ else
65
+ data << s if data != nil
66
+ end
67
+ i += 1
68
+ end
69
+ return field
70
+ end
71
+
72
+ # Returns the next record in the iteration.
73
+ def next
74
+ ldr = $input.read(24)
75
+
76
+ raise "Unexpected EOF while reading record label" if ldr == nil
77
+
78
+ leader = Leader.new(ldr)
79
+
80
+ record = Record.new(leader)
81
+
82
+ length = leader.base_address - 25
83
+
84
+ raise "Invalid directory" if length % 12 != 0
85
+
86
+ dir = $input.read(length)
87
+
88
+ entries = length / 12
89
+
90
+ raise "Expected field terminator" if $input.read(1) != Constants.FT
91
+
92
+ start = 0
93
+
94
+ entries.times do
95
+ entry = Directory.new(dir[start, 3], dir[start += 3, 4], dir[start += 4, 5])
96
+ if (entry.tag.to_i < 10)
97
+ data = $input.read(entry.length)
98
+
99
+ raise "Unexpected EOF while reading field with tag #{entry.tag}" if data == nil
100
+
101
+ record.add(ControlField.new(entry.tag, data.chop))
102
+ else
103
+ record.add(parse_data_field(entry))
104
+ end
105
+ start += 5
106
+ end
107
+
108
+ raise "Expected record terminator" if $input.read(1) != Constants.RT
109
+
110
+ return record
111
+ end
112
+
113
+ # Returns true if the iteration has more records, false otherwise.
114
+ def has_next
115
+ if $input.eof == false
116
+ return true
117
+ else
118
+ return false
119
+ end
120
+ end
121
+
122
+ # Default constructor
123
+ def initialize(input)
124
+ $input = input
125
+ end
126
+ end
127
+
128
+ end
@@ -0,0 +1,73 @@
1
+ # $Id: marc_stream_writer.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
2
+ #
3
+ # Copyright (c) 2005 Bas Peters
4
+ #
5
+ # This file is part of RMARC
6
+ #
7
+ # RMARC is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2.1 of the License, or (at your option) any later version.
11
+ #
12
+ # RMARC is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with RMARC; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ module RMARC
22
+
23
+ # Class for writing MARC record objects in ISO 2709 format.
24
+ #
25
+ # The following example reads a file with MARCXML records
26
+ # and outputs the record set in ISO 2709 format:
27
+ #
28
+ # File.open("file.mrc", "r") do |file|
29
+ # reader = RMARC::MarcStreamReader.new(file)
30
+ # writer = RMARC::MarcStreamWriter.new(STDOUT)
31
+ # while reader.has_next
32
+ # record = reader.next()
33
+ # writer.write_record(record)
34
+ # end
35
+ class MarcStreamWriter
36
+
37
+ $output = nil
38
+
39
+ # Default constructor.
40
+ def initialize(output)
41
+ $output = output
42
+ end
43
+
44
+ # Writes a single record to the given output stream.
45
+ def write_record(record)
46
+ data = ""
47
+ dir = ""
48
+ prev = 0
49
+
50
+ record.fields.each {
51
+ |field|
52
+ if field.tag.to_i < 10
53
+ data << field.data << Constants.FT
54
+ else
55
+ data << field.ind1 << field.ind2
56
+ field.subfields.each { |subf| data << Constants.US << subf.code << subf.data }
57
+ data << Constants.FT
58
+ end
59
+ len = data.length
60
+ dir << field.tag << "%04d" % (len - prev) << "%05d" % prev
61
+ prev = len
62
+ }
63
+ dir << Constants.FT
64
+
65
+ leader = record.leader
66
+ leader.base_address = 24 + dir.length
67
+ leader.record_length = leader.base_address + data.length + 1
68
+
69
+ $output.write(leader.to_s << dir << data << Constants.RT)
70
+ end
71
+ end
72
+
73
+ end
@@ -0,0 +1,137 @@
1
+ # $Id: marc_xml_reader.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
2
+ #
3
+ # Copyright (c) 2005 Bas Peters
4
+ #
5
+ # This file is part of RMARC
6
+ #
7
+ # RMARC is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2.1 of the License, or (at your option) any later version.
11
+ #
12
+ # RMARC is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with RMARC; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ require 'rexml/document'
22
+ require 'thread'
23
+
24
+ module RMARC
25
+
26
+ class RecordStack < SizedQueue
27
+
28
+ attr_reader :has_next
29
+ attr_writer :has_next
30
+ end
31
+
32
+ class Listener
33
+
34
+ $record = nil
35
+ $queue = nil
36
+ $field = nil
37
+ $subfield = nil
38
+ $data = nil
39
+
40
+ def tag_start(name, attrs)
41
+ re = /(\w+):(\w+)/
42
+ md = re.match(name)
43
+ name = $2 if ($2 != nil)
44
+ case name
45
+ when "collection"
46
+ $queue.has_next = true
47
+ when "record"
48
+ $record = Record.new
49
+ when "controlfield"
50
+ $field = ControlField.new(attrs["tag"])
51
+ when "datafield"
52
+ $field = DataField.new(attrs["tag"], attrs["ind1"], attrs["ind2"])
53
+ when "subfield"
54
+ $subfield = Subfield.new(attrs["code"])
55
+ end
56
+ end
57
+
58
+ def text(text)
59
+ $data = text
60
+ end
61
+
62
+ def tag_end(name)
63
+ re = /(\w+):(\w+)/
64
+ md = re.match(name)
65
+ name = $2 if ($2 != nil)
66
+ case name
67
+ when "collection"
68
+ $queue.has_next = false
69
+ when "record"
70
+ $queue.push($record)
71
+ when "leader"
72
+ leader = Leader.new($data)
73
+ $data = ""
74
+ $record.leader = leader
75
+ when "controlfield"
76
+ $field.data = $data
77
+ $data = ""
78
+ $record.add($field)
79
+ when "datafield"
80
+ $record.add($field)
81
+ when "subfield"
82
+ $subfield.data = $data
83
+ $data = ""
84
+ $field.add($subfield)
85
+ end
86
+ end
87
+
88
+ def xmldecl(version, encoding, standalone)
89
+ end
90
+
91
+ def initialize(queue)
92
+ $queue = queue
93
+ end
94
+
95
+ end
96
+
97
+ # An iterator over a collection of MARC XML records.
98
+ #
99
+ # Example usage:
100
+ #
101
+ # File.open("test/file.xml", "r") do |file|
102
+ # reader = RMARC::MarcXmlReader.new(file)
103
+ # while reader.has_next
104
+ # record = reader.next()
105
+ # end
106
+ class MarcXmlReader
107
+
108
+ $queue = nil
109
+ $input = nil
110
+
111
+ # Default constructor
112
+ def initialize(input)
113
+ $input = input
114
+ $queue = RecordStack.new(1)
115
+ Thread.new do
116
+ producer = Listener.new($queue)
117
+ REXML::Document.parse_stream($input, producer)
118
+ end
119
+ end
120
+
121
+ # Returns true if the iteration has more records, false otherwise.
122
+ def has_next
123
+ if ($queue.has_next == false && $queue.empty?)
124
+ return false
125
+ else
126
+ return true
127
+ end
128
+ end
129
+
130
+ # Returns the next record in the iteration.
131
+ def next
132
+ obj = $queue.pop
133
+ return obj
134
+ end
135
+ end
136
+
137
+ end
@@ -0,0 +1,88 @@
1
+ # $Id: marc_xml_writer.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
2
+ #
3
+ # Copyright (c) 2005 Bas Peters
4
+ #
5
+ # This file is part of RMARC
6
+ #
7
+ # RMARC is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2.1 of the License, or (at your option) any later version.
11
+ #
12
+ # RMARC is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with RMARC; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ require 'rexml/document'
22
+
23
+ module RMARC
24
+
25
+ # Class for writing MARC record objects in MARC XML format.
26
+ #
27
+ # writer = RMARC::MarcXmlWriter.new(STDOUT)
28
+ # File.open("file.mrc", "r") do |file|
29
+ # reader = RMARC::MarcStreamReader.new(file)
30
+ # writer.start_document
31
+ # while reader.has_next
32
+ # record = reader.next()
33
+ # writer.write_record(record)
34
+ # end
35
+ # writer.end_document
36
+ class MarcXmlWriter
37
+
38
+ $output = nil
39
+
40
+ # Default constructor.
41
+ def initialize(output)
42
+ $output = output
43
+ end
44
+
45
+ # Writes the XML declaration and the collection start tag.
46
+ def start_document
47
+ $output.write(REXML::XMLDecl.new)
48
+ $output.write("\n<collection xmlns=\"http://www.loc.gov/MARC21/slim\">\n ")
49
+ end
50
+
51
+ # Writes a single record element.
52
+ def write_record(record)
53
+ rec = REXML::Element.new('record')
54
+ ldr = REXML::Element.new('leader')
55
+ ldr.add_text(record.leader.to_s)
56
+ rec.add_element(ldr)
57
+
58
+ record.fields.each { |field|
59
+ if field.tag.to_i < 10
60
+ fld = REXML::Element.new('controlfield')
61
+ fld.add_attribute('tag', field.tag)
62
+ fld.add_text(field.data)
63
+ rec.add_element(fld)
64
+ else
65
+ fld = REXML::Element.new('datafield')
66
+ fld.add_attribute('tag', field.tag)
67
+ fld.add_attribute('ind1', field.ind1)
68
+ fld.add_attribute('ind2', field.ind2)
69
+ field.subfields.each { |subf|
70
+ sub = REXML::Element.new('subfield')
71
+ sub.add_attribute('code', subf.code)
72
+ sub.add_text(subf.data)
73
+ fld.add_element(sub)
74
+ }
75
+ rec.add_element(fld)
76
+ end
77
+ }
78
+ rec.write($output, 1)
79
+ end
80
+
81
+ # Writes the collection end tag.
82
+ def end_document
83
+ $output.write("\n</collection>")
84
+ end
85
+
86
+ end
87
+
88
+ end