rmarc 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rmarc.rb ADDED
@@ -0,0 +1,14 @@
1
+ require 'rexml/document'
2
+
3
+ require 'rmarc/constants'
4
+ require 'rmarc/marc_stream_reader'
5
+ require 'rmarc/marc_stream_writer'
6
+ require 'rmarc/marc_xml_reader'
7
+ require 'rmarc/marc_xml_writer'
8
+ require 'rmarc/model/control_field'
9
+ require 'rmarc/model/data_field'
10
+ require 'rmarc/model/directory'
11
+ require 'rmarc/model/leader'
12
+ require 'rmarc/model/record'
13
+ require 'rmarc/model/subfield'
14
+ require 'rmarc/model/variable_field'
@@ -0,0 +1,43 @@
1
+ # $Id: constants.rb,v 1.2 2005/12/02 16:57:26 bpeters Exp $
2
+ #
3
+ # Copyright (c) 2005 Bas Peters
4
+ #
5
+ # This file is part of RMARC
6
+ #
7
+ # RMARC is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2.1 of the License, or (at your option) any later version.
11
+ #
12
+ # RMARC is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with RMARC; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ module RMARC
22
+
23
+ class Constants
24
+
25
+ def Constants.VERSION
26
+ return "1.0rc1"
27
+ end
28
+
29
+ def Constants.RT
30
+ return "\035"
31
+ end
32
+
33
+ def Constants.FT
34
+ return "\036"
35
+ end
36
+
37
+ def Constants.US
38
+ return "\037"
39
+ end
40
+
41
+ end
42
+
43
+ end
@@ -0,0 +1,128 @@
1
+ # $Id: marc_stream_reader.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
2
+ #
3
+ # Copyright (c) 2005 Bas Peters
4
+ #
5
+ # This file is part of RMARC
6
+ #
7
+ # RMARC is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2.1 of the License, or (at your option) any later version.
11
+ #
12
+ # RMARC is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with RMARC; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ module RMARC
22
+
23
+ # An iterator over a collection of MARC records in ISO 2709 format.
24
+ #
25
+ # Example usage:
26
+ #
27
+ # File.open("file.mrc", "r") do |file|
28
+ # reader = RMARC::MarcStreamReader.new(file)
29
+ # while reader.has_next
30
+ # record = reader.next()
31
+ # end
32
+ class MarcStreamReader
33
+
34
+ $input = nil
35
+
36
+ def parse_data_field(entry)
37
+ msg = "Unexpected EOF while reading field with tag #{entry.tag}"
38
+
39
+ ind1 = $input.read(1)
40
+
41
+ raise msg if ind1 == nil
42
+
43
+ ind2 = $input.read(1)
44
+
45
+ raise msg if ind2 == nil
46
+
47
+ field = DataField.new(entry.tag, ind1, ind2)
48
+
49
+ data = nil
50
+ code = nil
51
+ i = 2
52
+
53
+ while i < entry.length
54
+ s = $input.read(1)
55
+ if s == nil
56
+ raise msg
57
+ elsif s == Constants.US
58
+ field.add(Subfield.new(code, data)) if code != nil
59
+ code = $input.read(1)
60
+ i += 1
61
+ data = ""
62
+ elsif s == Constants.FT
63
+ field.add(Subfield.new(code, data)) if code != nil
64
+ else
65
+ data << s if data != nil
66
+ end
67
+ i += 1
68
+ end
69
+ return field
70
+ end
71
+
72
+ # Returns the next record in the iteration.
73
+ def next
74
+ ldr = $input.read(24)
75
+
76
+ raise "Unexpected EOF while reading record label" if ldr == nil
77
+
78
+ leader = Leader.new(ldr)
79
+
80
+ record = Record.new(leader)
81
+
82
+ length = leader.base_address - 25
83
+
84
+ raise "Invalid directory" if length % 12 != 0
85
+
86
+ dir = $input.read(length)
87
+
88
+ entries = length / 12
89
+
90
+ raise "Expected field terminator" if $input.read(1) != Constants.FT
91
+
92
+ start = 0
93
+
94
+ entries.times do
95
+ entry = Directory.new(dir[start, 3], dir[start += 3, 4], dir[start += 4, 5])
96
+ if (entry.tag.to_i < 10)
97
+ data = $input.read(entry.length)
98
+
99
+ raise "Unexpected EOF while reading field with tag #{entry.tag}" if data == nil
100
+
101
+ record.add(ControlField.new(entry.tag, data.chop))
102
+ else
103
+ record.add(parse_data_field(entry))
104
+ end
105
+ start += 5
106
+ end
107
+
108
+ raise "Expected record terminator" if $input.read(1) != Constants.RT
109
+
110
+ return record
111
+ end
112
+
113
+ # Returns true if the iteration has more records, false otherwise.
114
+ def has_next
115
+ if $input.eof == false
116
+ return true
117
+ else
118
+ return false
119
+ end
120
+ end
121
+
122
+ # Default constructor
123
+ def initialize(input)
124
+ $input = input
125
+ end
126
+ end
127
+
128
+ end
@@ -0,0 +1,73 @@
1
+ # $Id: marc_stream_writer.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
2
+ #
3
+ # Copyright (c) 2005 Bas Peters
4
+ #
5
+ # This file is part of RMARC
6
+ #
7
+ # RMARC is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2.1 of the License, or (at your option) any later version.
11
+ #
12
+ # RMARC is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with RMARC; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ module RMARC
22
+
23
+ # Class for writing MARC record objects in ISO 2709 format.
24
+ #
25
+ # The following example reads a file with MARCXML records
26
+ # and outputs the record set in ISO 2709 format:
27
+ #
28
+ # File.open("file.mrc", "r") do |file|
29
+ # reader = RMARC::MarcStreamReader.new(file)
30
+ # writer = RMARC::MarcStreamWriter.new(STDOUT)
31
+ # while reader.has_next
32
+ # record = reader.next()
33
+ # writer.write_record(record)
34
+ # end
35
+ class MarcStreamWriter
36
+
37
+ $output = nil
38
+
39
+ # Default constructor.
40
+ def initialize(output)
41
+ $output = output
42
+ end
43
+
44
+ # Writes a single record to the given output stream.
45
+ def write_record(record)
46
+ data = ""
47
+ dir = ""
48
+ prev = 0
49
+
50
+ record.fields.each {
51
+ |field|
52
+ if field.tag.to_i < 10
53
+ data << field.data << Constants.FT
54
+ else
55
+ data << field.ind1 << field.ind2
56
+ field.subfields.each { |subf| data << Constants.US << subf.code << subf.data }
57
+ data << Constants.FT
58
+ end
59
+ len = data.length
60
+ dir << field.tag << "%04d" % (len - prev) << "%05d" % prev
61
+ prev = len
62
+ }
63
+ dir << Constants.FT
64
+
65
+ leader = record.leader
66
+ leader.base_address = 24 + dir.length
67
+ leader.record_length = leader.base_address + data.length + 1
68
+
69
+ $output.write(leader.to_s << dir << data << Constants.RT)
70
+ end
71
+ end
72
+
73
+ end
@@ -0,0 +1,137 @@
1
+ # $Id: marc_xml_reader.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
2
+ #
3
+ # Copyright (c) 2005 Bas Peters
4
+ #
5
+ # This file is part of RMARC
6
+ #
7
+ # RMARC is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2.1 of the License, or (at your option) any later version.
11
+ #
12
+ # RMARC is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with RMARC; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ require 'rexml/document'
22
+ require 'thread'
23
+
24
+ module RMARC
25
+
26
+ class RecordStack < SizedQueue
27
+
28
+ attr_reader :has_next
29
+ attr_writer :has_next
30
+ end
31
+
32
+ class Listener
33
+
34
+ $record = nil
35
+ $queue = nil
36
+ $field = nil
37
+ $subfield = nil
38
+ $data = nil
39
+
40
+ def tag_start(name, attrs)
41
+ re = /(\w+):(\w+)/
42
+ md = re.match(name)
43
+ name = $2 if ($2 != nil)
44
+ case name
45
+ when "collection"
46
+ $queue.has_next = true
47
+ when "record"
48
+ $record = Record.new
49
+ when "controlfield"
50
+ $field = ControlField.new(attrs["tag"])
51
+ when "datafield"
52
+ $field = DataField.new(attrs["tag"], attrs["ind1"], attrs["ind2"])
53
+ when "subfield"
54
+ $subfield = Subfield.new(attrs["code"])
55
+ end
56
+ end
57
+
58
+ def text(text)
59
+ $data = text
60
+ end
61
+
62
+ def tag_end(name)
63
+ re = /(\w+):(\w+)/
64
+ md = re.match(name)
65
+ name = $2 if ($2 != nil)
66
+ case name
67
+ when "collection"
68
+ $queue.has_next = false
69
+ when "record"
70
+ $queue.push($record)
71
+ when "leader"
72
+ leader = Leader.new($data)
73
+ $data = ""
74
+ $record.leader = leader
75
+ when "controlfield"
76
+ $field.data = $data
77
+ $data = ""
78
+ $record.add($field)
79
+ when "datafield"
80
+ $record.add($field)
81
+ when "subfield"
82
+ $subfield.data = $data
83
+ $data = ""
84
+ $field.add($subfield)
85
+ end
86
+ end
87
+
88
+ def xmldecl(version, encoding, standalone)
89
+ end
90
+
91
+ def initialize(queue)
92
+ $queue = queue
93
+ end
94
+
95
+ end
96
+
97
+ # An iterator over a collection of MARC XML records.
98
+ #
99
+ # Example usage:
100
+ #
101
+ # File.open("test/file.xml", "r") do |file|
102
+ # reader = RMARC::MarcXmlReader.new(file)
103
+ # while reader.has_next
104
+ # record = reader.next()
105
+ # end
106
+ class MarcXmlReader
107
+
108
+ $queue = nil
109
+ $input = nil
110
+
111
+ # Default constructor
112
+ def initialize(input)
113
+ $input = input
114
+ $queue = RecordStack.new(1)
115
+ Thread.new do
116
+ producer = Listener.new($queue)
117
+ REXML::Document.parse_stream($input, producer)
118
+ end
119
+ end
120
+
121
+ # Returns true if the iteration has more records, false otherwise.
122
+ def has_next
123
+ if ($queue.has_next == false && $queue.empty?)
124
+ return false
125
+ else
126
+ return true
127
+ end
128
+ end
129
+
130
+ # Returns the next record in the iteration.
131
+ def next
132
+ obj = $queue.pop
133
+ return obj
134
+ end
135
+ end
136
+
137
+ end
@@ -0,0 +1,88 @@
1
+ # $Id: marc_xml_writer.rb,v 1.3 2005/12/05 19:36:41 bpeters Exp $
2
+ #
3
+ # Copyright (c) 2005 Bas Peters
4
+ #
5
+ # This file is part of RMARC
6
+ #
7
+ # RMARC is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2.1 of the License, or (at your option) any later version.
11
+ #
12
+ # RMARC is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with RMARC; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ require 'rexml/document'
22
+
23
+ module RMARC
24
+
25
+ # Class for writing MARC record objects in MARC XML format.
26
+ #
27
+ # writer = RMARC::MarcXmlWriter.new(STDOUT)
28
+ # File.open("file.mrc", "r") do |file|
29
+ # reader = RMARC::MarcStreamReader.new(file)
30
+ # writer.start_document
31
+ # while reader.has_next
32
+ # record = reader.next()
33
+ # writer.write_record(record)
34
+ # end
35
+ # writer.end_document
36
+ class MarcXmlWriter
37
+
38
+ $output = nil
39
+
40
+ # Default constructor.
41
+ def initialize(output)
42
+ $output = output
43
+ end
44
+
45
+ # Writes the XML declaration and the collection start tag.
46
+ def start_document
47
+ $output.write(REXML::XMLDecl.new)
48
+ $output.write("\n<collection xmlns=\"http://www.loc.gov/MARC21/slim\">\n ")
49
+ end
50
+
51
+ # Writes a single record element.
52
+ def write_record(record)
53
+ rec = REXML::Element.new('record')
54
+ ldr = REXML::Element.new('leader')
55
+ ldr.add_text(record.leader.to_s)
56
+ rec.add_element(ldr)
57
+
58
+ record.fields.each { |field|
59
+ if field.tag.to_i < 10
60
+ fld = REXML::Element.new('controlfield')
61
+ fld.add_attribute('tag', field.tag)
62
+ fld.add_text(field.data)
63
+ rec.add_element(fld)
64
+ else
65
+ fld = REXML::Element.new('datafield')
66
+ fld.add_attribute('tag', field.tag)
67
+ fld.add_attribute('ind1', field.ind1)
68
+ fld.add_attribute('ind2', field.ind2)
69
+ field.subfields.each { |subf|
70
+ sub = REXML::Element.new('subfield')
71
+ sub.add_attribute('code', subf.code)
72
+ sub.add_text(subf.data)
73
+ fld.add_element(sub)
74
+ }
75
+ rec.add_element(fld)
76
+ end
77
+ }
78
+ rec.write($output, 1)
79
+ end
80
+
81
+ # Writes the collection end tag.
82
+ def end_document
83
+ $output.write("\n</collection>")
84
+ end
85
+
86
+ end
87
+
88
+ end