marc 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,87 @@
1
+ require 'rexml/document'
2
+
3
+ module MARC
4
+
5
+ # A class for writing MARC records as MARC21.
6
+
7
+ class XMLWriter
8
+
9
+ # the constructor which you must pass a file path
10
+ # or an object that responds to a write message
11
+
12
+ def initialize(file)
13
+ if file.class == String
14
+ @fh = File.new(file,"w")
15
+ elsif file.respond_to?('write')
16
+ @fh = file
17
+ else
18
+ throw "must pass in file name or handle"
19
+ end
20
+
21
+ @fh.write("<?xml version='1.0'?>")
22
+
23
+ @fh.write("<marc:collection xmlns:marc='" + MARC_NS + "' " \
24
+ + "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' " \
25
+ + "xsi:schemaLocation='" + MARC_NS + " " + MARC_XSD + "'>")
26
+ end
27
+
28
+
29
+ # write a record to the file or handle
30
+
31
+ def write(record)
32
+ @fh.write(MARC::XMLWriter.encode(record).to_s)
33
+ end
34
+
35
+
36
+ # close underlying filehandle
37
+
38
+ def close
39
+ @fh.write("</marc:collection>")
40
+ @fh.close
41
+ end
42
+
43
+
44
+ # a static method that accepts a MARC::Record object
45
+ # and returns a REXML::Document for the XML serialization
46
+
47
+ def self.encode(record)
48
+ root = "<marc:record xmlns:marc='" + MARC_NS + "'/>"
49
+ doc = REXML::Document.new root
50
+
51
+ # MARCXML is particular about this; ILSes aren't
52
+ record.leader[20..24] = "4500"
53
+
54
+ leader = REXML::Element.new "marc:leader"
55
+ leader.add_text record.leader
56
+ doc.root.add_element leader
57
+
58
+ for field in record.fields
59
+ if field.class == MARC::Field
60
+ dfElem = REXML::Element.new "marc:datafield"
61
+ dfElem.add_attributes({
62
+ "tag"=>field.tag,
63
+ "ind1"=>field.indicator1,
64
+ "ind2"=>field.indicator2
65
+ })
66
+
67
+ for subfield in field.subfields
68
+ sfElem = REXML::Element.new "marc:subfield"
69
+ sfElem.add_attribute("code", subfield.code)
70
+ sfElem.add_text subfield.value
71
+ dfElem.add_element sfElem
72
+ end
73
+
74
+ doc.root.add_element dfElem
75
+ elsif field.class == MARC::Control
76
+ cfElem = REXML::Element.new "marc:controlfield"
77
+ cfElem.add_attribute("tag", field.tag)
78
+ cfElem.add_text field.value
79
+ doc.root.add_element cfElem
80
+ end
81
+ end
82
+
83
+ # return xml
84
+ return doc
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,157 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!-- edited with XML Spy v4.3 U (http://www.xmlspy.com) by Morgan Cundiff (Library of Congress) -->
3
+ <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/MARC21/slim
4
+ http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
5
+ <marc:record>
6
+ <marc:leader>00925njm 22002777a 4500</marc:leader>
7
+ <marc:controlfield tag="001">5637241</marc:controlfield>
8
+ <marc:controlfield tag="003">DLC</marc:controlfield>
9
+ <marc:controlfield tag="005">19920826084036.0</marc:controlfield>
10
+ <marc:controlfield tag="007">sdubumennmplu</marc:controlfield>
11
+ <marc:controlfield tag="008">910926s1957 nyuuun eng </marc:controlfield>
12
+ <marc:datafield tag="010" ind1=" " ind2=" ">
13
+ <marc:subfield code="a"> 91758335 </marc:subfield>
14
+ </marc:datafield>
15
+ <marc:datafield tag="028" ind1="0" ind2="0">
16
+ <marc:subfield code="a">1259</marc:subfield>
17
+ <marc:subfield code="b">Atlantic</marc:subfield>
18
+ </marc:datafield>
19
+ <marc:datafield tag="040" ind1=" " ind2=" ">
20
+ <marc:subfield code="a">DLC</marc:subfield>
21
+ <marc:subfield code="c">DLC</marc:subfield>
22
+ </marc:datafield>
23
+ <marc:datafield tag="050" ind1="0" ind2="0">
24
+ <marc:subfield code="a">Atlantic 1259</marc:subfield>
25
+ </marc:datafield>
26
+ <marc:datafield tag="245" ind1="0" ind2="4">
27
+ <marc:subfield code="a">The Great Ray Charles</marc:subfield>
28
+ <marc:subfield code="h">[sound recording].</marc:subfield>
29
+ </marc:datafield>
30
+ <marc:datafield tag="260" ind1=" " ind2=" ">
31
+ <marc:subfield code="a">New York, N.Y. :</marc:subfield>
32
+ <marc:subfield code="b">Atlantic,</marc:subfield>
33
+ <marc:subfield code="c">[1957?]</marc:subfield>
34
+ </marc:datafield>
35
+ <marc:datafield tag="300" ind1=" " ind2=" ">
36
+ <marc:subfield code="a">1 sound disc :</marc:subfield>
37
+ <marc:subfield code="b">analog, 33 1/3 rpm ;</marc:subfield>
38
+ <marc:subfield code="c">12 in.</marc:subfield>
39
+ </marc:datafield>
40
+ <marc:datafield tag="511" ind1="0" ind2=" ">
41
+ <marc:subfield code="a">Ray Charles, piano &amp; celeste.</marc:subfield>
42
+ </marc:datafield>
43
+ <marc:datafield tag="505" ind1="0" ind2=" ">
44
+ <marc:subfield code="a">The Ray -- My melancholy baby -- Black coffee -- There's no you -- Doodlin' -- Sweet sixteen bars -- I surrender dear -- Undecided.</marc:subfield>
45
+ </marc:datafield>
46
+ <marc:datafield tag="500" ind1=" " ind2=" ">
47
+ <marc:subfield code="a">Brief record.</marc:subfield>
48
+ </marc:datafield>
49
+ <marc:datafield tag="650" ind1=" " ind2="0">
50
+ <marc:subfield code="a">Jazz</marc:subfield>
51
+ <marc:subfield code="y">1951-1960.</marc:subfield>
52
+ </marc:datafield>
53
+ <marc:datafield tag="650" ind1=" " ind2="0">
54
+ <marc:subfield code="a">Piano with jazz ensemble.</marc:subfield>
55
+ </marc:datafield>
56
+ <marc:datafield tag="700" ind1="1" ind2=" ">
57
+ <marc:subfield code="a">Charles, Ray,</marc:subfield>
58
+ <marc:subfield code="d">1930-</marc:subfield>
59
+ <marc:subfield code="4">prf</marc:subfield>
60
+ </marc:datafield>
61
+ </marc:record>
62
+ <marc:record>
63
+ <marc:leader>01832cmma 2200349 a 4500</marc:leader>
64
+ <marc:controlfield tag="001">12149120</marc:controlfield>
65
+ <marc:controlfield tag="005">20001005175443.0</marc:controlfield>
66
+ <marc:controlfield tag="007">cr |||</marc:controlfield>
67
+ <marc:controlfield tag="008">000407m19949999dcu g m eng d</marc:controlfield>
68
+ <marc:datafield tag="906" ind1=" " ind2=" ">
69
+ <marc:subfield code="a">0</marc:subfield>
70
+ <marc:subfield code="b">ibc</marc:subfield>
71
+ <marc:subfield code="c">copycat</marc:subfield>
72
+ <marc:subfield code="d">1</marc:subfield>
73
+ <marc:subfield code="e">ncip</marc:subfield>
74
+ <marc:subfield code="f">20</marc:subfield>
75
+ <marc:subfield code="g">y-gencompf</marc:subfield>
76
+ </marc:datafield>
77
+ <marc:datafield tag="925" ind1="0" ind2=" ">
78
+ <marc:subfield code="a">undetermined</marc:subfield>
79
+ <marc:subfield code="x">web preservation project (wpp)</marc:subfield>
80
+ </marc:datafield>
81
+ <marc:datafield tag="955" ind1=" " ind2=" ">
82
+ <marc:subfield code="a">vb07 (stars done) 08-19-00 to HLCD lk00; AA3s lk29 received for subject Aug 25, 2000; to DEWEY 08-25-00; aa11 08-28-00</marc:subfield>
83
+ </marc:datafield>
84
+ <marc:datafield tag="010" ind1=" " ind2=" ">
85
+ <marc:subfield code="a"> 00530046 </marc:subfield>
86
+ </marc:datafield>
87
+ <marc:datafield tag="035" ind1=" " ind2=" ">
88
+ <marc:subfield code="a">(OCoLC)ocm44279786</marc:subfield>
89
+ </marc:datafield>
90
+ <marc:datafield tag="040" ind1=" " ind2=" ">
91
+ <marc:subfield code="a">IEU</marc:subfield>
92
+ <marc:subfield code="c">IEU</marc:subfield>
93
+ <marc:subfield code="d">N@F</marc:subfield>
94
+ <marc:subfield code="d">DLC</marc:subfield>
95
+ </marc:datafield>
96
+ <marc:datafield tag="042" ind1=" " ind2=" ">
97
+ <marc:subfield code="a">lccopycat</marc:subfield>
98
+ </marc:datafield>
99
+ <marc:datafield tag="043" ind1=" " ind2=" ">
100
+ <marc:subfield code="a">n-us-dc</marc:subfield>
101
+ <marc:subfield code="a">n-us---</marc:subfield>
102
+ </marc:datafield>
103
+ <marc:datafield tag="050" ind1="0" ind2="0">
104
+ <marc:subfield code="a">F204.W5</marc:subfield>
105
+ </marc:datafield>
106
+ <marc:datafield tag="082" ind1="1" ind2="0">
107
+ <marc:subfield code="a">975.3</marc:subfield>
108
+ <marc:subfield code="2">13</marc:subfield>
109
+ </marc:datafield>
110
+ <marc:datafield tag="245" ind1="0" ind2="4">
111
+ <marc:subfield code="a">The White House</marc:subfield>
112
+ <marc:subfield code="h">[computer file].</marc:subfield>
113
+ </marc:datafield>
114
+ <marc:datafield tag="256" ind1=" " ind2=" ">
115
+ <marc:subfield code="a">Computer data.</marc:subfield>
116
+ </marc:datafield>
117
+ <marc:datafield tag="260" ind1=" " ind2=" ">
118
+ <marc:subfield code="a">Washington, D.C. :</marc:subfield>
119
+ <marc:subfield code="b">White House Web Team,</marc:subfield>
120
+ <marc:subfield code="c">1994-</marc:subfield>
121
+ </marc:datafield>
122
+ <marc:datafield tag="538" ind1=" " ind2=" ">
123
+ <marc:subfield code="a">Mode of access: Internet.</marc:subfield>
124
+ </marc:datafield>
125
+ <marc:datafield tag="500" ind1=" " ind2=" ">
126
+ <marc:subfield code="a">Title from home page as viewed on Aug. 19, 2000.</marc:subfield>
127
+ </marc:datafield>
128
+ <marc:datafield tag="520" ind1="8" ind2=" ">
129
+ <marc:subfield code="a">Features the White House. Highlights the Executive Office of the President, which includes senior policy advisors and offices responsible for the President's correspondence and communications, the Office of the Vice President, and the Office of the First Lady. Posts contact information via mailing address, telephone and fax numbers, and e-mail. Contains the Interactive Citizens' Handbook with information on health, travel and tourism, education and training, and housing. Provides a tour and the history of the White House. Links to White House for Kids.</marc:subfield>
130
+ </marc:datafield>
131
+ <marc:datafield tag="610" ind1="2" ind2="0">
132
+ <marc:subfield code="a">White House (Washington, D.C.)</marc:subfield>
133
+ </marc:datafield>
134
+ <marc:datafield tag="610" ind1="1" ind2="0">
135
+ <marc:subfield code="a">United States.</marc:subfield>
136
+ <marc:subfield code="b">Executive Office of the President.</marc:subfield>
137
+ </marc:datafield>
138
+ <marc:datafield tag="610" ind1="1" ind2="0">
139
+ <marc:subfield code="a">United States.</marc:subfield>
140
+ <marc:subfield code="b">Office of the Vice President.</marc:subfield>
141
+ </marc:datafield>
142
+ <marc:datafield tag="610" ind1="1" ind2="0">
143
+ <marc:subfield code="a">United States.</marc:subfield>
144
+ <marc:subfield code="b">Office of the First Lady.</marc:subfield>
145
+ </marc:datafield>
146
+ <marc:datafield tag="710" ind1="2" ind2=" ">
147
+ <marc:subfield code="a">White House Web Team.</marc:subfield>
148
+ </marc:datafield>
149
+ <marc:datafield tag="856" ind1="4" ind2="0">
150
+ <marc:subfield code="u">http://www.whitehouse.gov</marc:subfield>
151
+ </marc:datafield>
152
+ <marc:datafield tag="856" ind1="4" ind2="0">
153
+ <marc:subfield code="u">http://lcweb.loc.gov/staff/wpp/whitehouse.html</marc:subfield>
154
+ <marc:subfield code="z">Web site archive</marc:subfield>
155
+ </marc:datafield>
156
+ </marc:record>
157
+ </marc:collection>
@@ -0,0 +1,34 @@
1
+ require 'test/unit'
2
+ require 'marc'
3
+
4
+ class XMLReaderTest < Test::Unit::TestCase
5
+
6
+ def test_batch
7
+ reader = MARC::XMLReader.new('test/batch.xml')
8
+ count = 0
9
+ for record in reader
10
+ count += 1
11
+ assert_instance_of(MARC::Record, record)
12
+ end
13
+ assert_equal(count, 2)
14
+ end
15
+
16
+ def test_read_write
17
+ record1 = MARC::Record.new
18
+ record1.leader = '00925njm 22002777a 4500'
19
+ record1.append MARC::Control.new('007', 'sdubumennmplu')
20
+ record1.append MARC::Field.new('245','0','4',
21
+ ['a','The Great Ray Charles'], ['h', '[sound recording].'])
22
+
23
+ writer = MARC::XMLWriter.new('test/foo.xml')
24
+ writer.write(record1)
25
+ writer.close
26
+
27
+ reader = MARC::XMLReader.new('test/foo.xml')
28
+ record2 = reader.entries[0]
29
+ assert_equal(record1, record2)
30
+
31
+ File.unlink('test/foo.xml')
32
+ end
33
+ end
34
+
@@ -0,0 +1,37 @@
1
+ require 'test/unit'
2
+ require 'marc'
3
+
4
+ class XMLWriterTest < Test::Unit::TestCase
5
+
6
+ def test_writer()
7
+ # get a record
8
+ reader = MARC::Reader.new('test/one.dat')
9
+ record = reader.entries[0]
10
+
11
+ str_writer = StringWriter.new()
12
+ xml_writer = MARC::XMLWriter.new(str_writer)
13
+ xml_writer.write(record)
14
+ assert_match /<\?xml version='1.0'\?>/, str_writer.buffer
15
+ end
16
+ end
17
+
18
+ # little class that enables wriing to a string
19
+ # like it's a file
20
+
21
+ class StringWriter
22
+ attr_reader :buffer
23
+
24
+ def initialize
25
+ @buffer = ''
26
+ end
27
+
28
+ def write(str)
29
+ @buffer += str
30
+ end
31
+
32
+ def to_s
33
+ return @buffer
34
+ end
35
+ end
36
+
37
+
@@ -10,3 +10,5 @@ require 'test/tc_field'
10
10
  require 'test/tc_record'
11
11
  require 'test/tc_reader'
12
12
  require 'test/tc_writer'
13
+ require 'test/tc_xmlwriter'
14
+ require 'test/tc_xmlreader'
metadata CHANGED
@@ -3,13 +3,13 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: marc
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.6
7
- date: 2005-10-18 00:00:00 -05:00
6
+ version: 0.0.7
7
+ date: 2006-01-02 00:00:00 -06:00
8
8
  summary: A ruby library for working with Machine Readable Cataloging
9
9
  require_paths:
10
10
  - lib
11
11
  email: ehs@pobox.com
12
- homepage: http://www.textualize.com/marc
12
+ homepage: http://www.textualize.com/ruby_marc
13
13
  rubyforge_project:
14
14
  description:
15
15
  autorequire: marc
@@ -31,21 +31,26 @@ authors:
31
31
  files:
32
32
  - lib/marc
33
33
  - lib/marc.rb
34
+ - lib/marc/constants.rb
34
35
  - lib/marc/control.rb
35
36
  - lib/marc/exception.rb
36
37
  - lib/marc/field.rb
37
- - lib/marc/marc21.rb
38
38
  - lib/marc/reader.rb
39
39
  - lib/marc/record.rb
40
40
  - lib/marc/subfield.rb
41
41
  - lib/marc/writer.rb
42
+ - lib/marc/xmlreader.rb
43
+ - lib/marc/xmlwriter.rb
42
44
  - test/batch.dat
45
+ - test/batch.xml
43
46
  - test/one.dat
44
47
  - test/tc_field.rb
45
48
  - test/tc_reader.rb
46
49
  - test/tc_record.rb
47
50
  - test/tc_subfield.rb
48
51
  - test/tc_writer.rb
52
+ - test/tc_xmlreader.rb
53
+ - test/tc_xmlwriter.rb
49
54
  - test/ts_marc.rb
50
55
  test_files:
51
56
  - test/ts_marc.rb
@@ -1,155 +0,0 @@
1
- module MARC
2
-
3
- # Provides methods for serializing and deserializing MARC::Record
4
- # objects as MARC21 in transmission format.
5
-
6
- class MARC21
7
-
8
- LEADER_LENGTH = 24
9
- DIRECTORY_ENTRY_LENGTH = 12
10
- SUBFIELD_INDICATOR = 0x1F.chr
11
- END_OF_FIELD = 0x1E.chr
12
- END_OF_RECORD = 0x1D.chr
13
-
14
-
15
- # Returns the MARC21 serialization for a MARC::Record
16
-
17
- def encode(record)
18
- directory = ''
19
- fields = ''
20
- offset = 0
21
- for field in record.fields
22
-
23
- # encode the field
24
- field_data = ''
25
- if field.class == MARC::Field
26
- field_data = field.indicator1 + field.indicator2
27
- for s in field.subfields
28
- field_data += SUBFIELD_INDICATOR + s.code + s.value
29
- end
30
- elsif field.class == MARC::Control
31
- field_data = field.value
32
- end
33
- field_data += END_OF_FIELD
34
-
35
- # calculate directory entry for the field
36
- field_length = field_data.length()
37
- directory += sprintf("%03s%04i%05i", field.tag, field_length,
38
- offset)
39
-
40
- # add field to data for other fields
41
- fields += field_data
42
-
43
- # update offset for next field
44
- offset += field_length
45
- end
46
-
47
- # determine the base (leader + directory)
48
- base = record.leader + directory + END_OF_FIELD
49
-
50
- # determine complete record
51
- marc = base + fields + END_OF_RECORD
52
-
53
- # update leader with the byte offest to the end of the directory
54
- marc[12..16] = sprintf("%05i", base.length())
55
-
56
- # update the record length
57
- marc[0..4] = sprintf("%05i", marc.length())
58
-
59
- # store updated leader in the record that was passed in
60
- record.leader = marc[0..LEADER_LENGTH-1]
61
-
62
- # return encoded marc
63
- return marc
64
- end
65
-
66
-
67
- # Deserializes MARC21 as a MARC::Record object
68
-
69
- def decode(marc, params={})
70
- record = Record.new()
71
- record.leader = marc[0..LEADER_LENGTH-1]
72
-
73
- # where the field data starts
74
- base_address = record.leader[12..16].to_i
75
-
76
- # get the byte offsets from the record directory
77
- directory = marc[LEADER_LENGTH..base_address-1]
78
-
79
- throw "invalid directory in record" if directory == nil
80
-
81
- # the number of fields in the record corresponds to
82
- # how many directory entries there are
83
- num_fields = directory.length / DIRECTORY_ENTRY_LENGTH
84
-
85
- # when operating in forgiving mode we just split on end of
86
- # field instead of using calculated byte offsets from the
87
- # directory
88
- all_fields = marc[base_address..-1].split(END_OF_FIELD)
89
-
90
- 0.upto(num_fields-1) do |field_num|
91
-
92
- # pull the directory entry for a field out
93
- entry_start = field_num * DIRECTORY_ENTRY_LENGTH
94
- entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
95
- entry = directory[entry_start..entry_end]
96
-
97
- # extract the tag
98
- tag = entry[0..2]
99
-
100
- # get the actual field data
101
- # if we were told to be forgiving we just use the
102
- # next available chuck of field data that we
103
- # split apart based on the END_OF_FIELD
104
- field_data = ''
105
- if params[:forgiving]
106
- field_data = all_fields.shift()
107
-
108
- # otherwise we actually use the byte offsets in
109
- # directory to figure out what field data to extract
110
- else
111
- length = entry[3..6].to_i
112
- offset = entry[7..11].to_i
113
- field_start = base_address + offset
114
- field_end = field_start + length - 1
115
- field_data = marc[field_start..field_end]
116
- end
117
-
118
- # remove end of field
119
- field_data.delete!(END_OF_FIELD)
120
-
121
- # add a control field or variable field
122
- if tag < '010'
123
- record.append(MARC::Control.new(tag,field_data))
124
- else
125
- field = MARC::Field.new(tag)
126
-
127
- # get all subfields
128
- subfields = field_data.split(SUBFIELD_INDICATOR)
129
-
130
- # must have at least 2 elements (indicators, and 1 subfield)
131
- # TODO some sort of logging?
132
- next if subfields.length() < 2
133
-
134
- # get indicators
135
- indicators = subfields.shift()
136
- field.indicator1 = indicators[0,1]
137
- field.indicator2 = indicators[1,1]
138
-
139
- # add each subfield to the field
140
- subfields.each() do |data|
141
- subfield = MARC::Subfield.new(data[0,1],data[1..-1])
142
- field.append(subfield)
143
- end
144
-
145
- # add the field to the record
146
- record.append(field)
147
- end
148
- end
149
-
150
- return record
151
- end
152
-
153
- end
154
-
155
- end