marc 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,12 +6,13 @@ module MARC
6
6
  # MARC records contain data fields, each of which has a tag,
7
7
  # indicators and subfields. Tags for data fields must be in
8
8
  # the range 010-999.
9
- # Accessor attributes: tag ; indicator1 ; indicator2
9
+ # Accessor attributes: tag, indicator1, indicator2
10
10
  #
11
- # DataField includes enumerable for access to it's constituent
11
+ # DataField mixes in Enumerable to enable access to it's constituent
12
12
  # Subfield objects. For instance, if you have a DataField representing
13
13
  # a 856 tag, and want to find all 'z' subfields:
14
- # urltag.find_all { |subfield| subfield.code == 'z' }
14
+ #
15
+ # subfield_z = field.find_all {|subfield| subfield.code == 'z'}
15
16
  #
16
17
  # Also, the accessor 'subfields' is an array of MARC::Subfield objects
17
18
  # which can be accessed or modified by the client directly if
@@ -37,11 +37,17 @@ module MARC
37
37
 
38
38
  def each
39
39
  # while there is data left in the file
40
- while length = @handle.read(5)
40
+ while rec_length_s = @handle.read(5)
41
+ # make sure the record length looks like an integer
42
+ rec_length_i = rec_length_s.to_i
43
+ if rec_length_i == 0:
44
+ raise MARC::Exception.new("invalid record length: #{rec_length_s}")
45
+ end
41
46
 
42
47
  # get the raw MARC21 for a record back from the file
43
48
  # using the record length
44
- raw = length + @handle.read(length.to_i-5)
49
+ raw = rec_length_s + @handle.read(rec_length_i-5)
50
+
45
51
 
46
52
  # create a record from the data and return it
47
53
  #record = MARC::Record.new_from_marc(raw)
@@ -3,12 +3,15 @@ module MARC
3
3
  # A class that represents an individual MARC record. Every record
4
4
  # is made up of a collection of MARC::DataField objects.
5
5
  #
6
- # MARC::Record includes Enumerable for access to constituent
7
- # DataFields. Eg, to return a list of all 650 DataFields:
8
- # record.find_all { |field| field.tag == '650' }
6
+ # MARC::Record mixes in Enumerable to enable access to constituent
7
+ # DataFields. For example, to return a list of all subject DataFields:
8
+ #
9
+ # record.find_all {|field| field.tag =~ /^6../}
9
10
  #
10
11
  # The accessor 'fields' is also an Array of MARC::DataField objects which
11
12
  # the client can access or modifyi if neccesary.
13
+ #
14
+ # record.fields.delete(field)
12
15
  #
13
16
  # Other accessor attribute: 'leader' for record leader as String
14
17
 
@@ -16,10 +19,10 @@ module MARC
16
19
  include Enumerable
17
20
 
18
21
  # the record fields
19
- attr_accessor :fields,
22
+ attr_accessor :fields
20
23
 
21
24
  # the record leader
22
- :leader
25
+ attr_accessor :leader
23
26
 
24
27
  def initialize
25
28
  @fields = []
@@ -38,6 +41,12 @@ module MARC
38
41
  @fields.push(field)
39
42
  end
40
43
 
44
+ # alias to append
45
+
46
+ def <<(field)
47
+ append(field)
48
+ end
49
+
41
50
  # each() is here to support iterating and searching since MARC::Record
42
51
  # mixes in Enumberable
43
52
  #
@@ -1,9 +1,9 @@
1
1
  module MARC
2
2
 
3
- # A class to represents a subfield within a DataField.
4
- # Accessor attributes: code (letter subfield code) ; value
5
- # Both can be empty string, but should not be nil.
6
-
3
+ # A class that represents an individual subfield within a DataField.
4
+ # Accessor attributes include: code (letter subfield code) and value
5
+ # (the content of the subfield). Both can be empty string, but should
6
+ # not be set to nil.
7
7
 
8
8
  class Subfield
9
9
  attr_accessor :code, :value
@@ -60,7 +60,7 @@ module MARC
60
60
  event = @parser.pull
61
61
 
62
62
  if event.text?
63
- text += event[0].strip
63
+ text += REXML::Text::unnormalize(event[0].strip)
64
64
  next
65
65
  end
66
66
 
@@ -4,6 +4,8 @@ require 'rexml/text'
4
4
  module MARC
5
5
 
6
6
  # A class for writing MARC records as MARCXML.
7
+ # BIG CAVEAT! XMLWriter will *not* convert your MARC8 to UTF8
8
+ # bug the authors to do this if you need it
7
9
 
8
10
  class XMLWriter
9
11
 
@@ -51,21 +53,6 @@ module MARC
51
53
  @fh.close
52
54
  end
53
55
 
54
-
55
- # Converts from ISO 8859-1 to UTF-8, normalizes the UTF-8, and puts a
56
- # 'clean up marker' in records that have control characters (which are
57
- # not valid in XML). This is useful for locating these records once
58
- # they are in XML so problems caused by removing the invalid characters
59
- # can be fixed by a person. This (or something in the module) needs to
60
- # convert from MARC-8 to UTF-8, but it doesn't do this yet...
61
-
62
- def self.convert_to_utf8(text)
63
- cleaned_text = text.gsub(/[\x00-\x1f\x7f-\xff]+/, ' CLEAN_ME_UP ')
64
- utf8_text = cleaned_text.unpack('C*').pack('U*')
65
- normalized_text = REXML::Text::normalize(utf8_text)
66
-
67
- return normalized_text
68
- end
69
56
 
70
57
  # a static method that accepts a MARC::Record object
71
58
  # and returns a REXML::Document for the XML serialization.
@@ -138,7 +125,7 @@ module MARC
138
125
  end
139
126
 
140
127
  subfield_element.add_attribute("code", subfield.code)
141
- text = MARC::XMLWriter.convert_to_utf8(subfield.value)
128
+ text = subfield.value
142
129
  subfield_element.add_text(text)
143
130
  datafield_elem.add_element(subfield_element)
144
131
  end
@@ -153,7 +140,7 @@ module MARC
153
140
  end
154
141
 
155
142
  control_element.add_attribute("tag", field.tag)
156
- text = MARC::XMLWriter.convert_to_utf8(field.value)
143
+ text = field.value
157
144
  control_element.add_text(text)
158
145
  e.add_element(control_element)
159
146
  end
@@ -3,36 +3,41 @@ require 'marc'
3
3
 
4
4
  class ReaderTest < Test::Unit::TestCase
5
5
 
6
- def test_batch
7
- reader = MARC::Reader.new('test/batch.dat')
8
- count = 0
9
- reader.each { count += 1 }
10
- assert_equal(count, 10)
11
- end
12
-
13
- def test_loose
14
- reader = MARC::ForgivingReader.new('test/batch.dat')
15
- count = 0
16
- reader.each { count += 1 }
17
- assert_equal(10, count)
18
- end
19
-
20
- def test_search
21
- reader = MARC::Reader.new('test/batch.dat')
22
- records = reader.find_all { |r| r =~ /Perl/ }
23
- assert_equal(10, records.length)
24
-
25
- reader = MARC::Reader.new('test/batch.dat')
26
- records = reader.find_all { |r| r['245'] =~ /Perl/ }
27
- assert_equal(10, records.length)
28
-
29
- reader = MARC::Reader.new('test/batch.dat')
30
- records = reader.find_all { |r| r['245']['a'] =~ /Perl/ }
31
- assert_equal(10, records.length)
32
-
33
- reader = MARC::Reader.new('test/batch.dat')
34
- records = reader.find_all { |r| r =~ /Foo/ }
35
- assert_equal(0, records.length)
36
- end
6
+ def test_batch
7
+ reader = MARC::Reader.new('test/batch.dat')
8
+ count = 0
9
+ reader.each { count += 1 }
10
+ assert_equal(count, 10)
11
+ end
12
+
13
+ def test_loose
14
+ reader = MARC::ForgivingReader.new('test/batch.dat')
15
+ count = 0
16
+ reader.each { count += 1 }
17
+ assert_equal(10, count)
18
+ end
19
+
20
+ def test_bad_marc
21
+ reader = MARC::Reader.new('test/tc_reader.rb')
22
+ assert_raises(MARC::Exception) {reader.entries[0]}
23
+ end
24
+
25
+ def test_search
26
+ reader = MARC::Reader.new('test/batch.dat')
27
+ records = reader.find_all { |r| r =~ /Perl/ }
28
+ assert_equal(10, records.length)
29
+
30
+ reader = MARC::Reader.new('test/batch.dat')
31
+ records = reader.find_all { |r| r['245'] =~ /Perl/ }
32
+ assert_equal(10, records.length)
33
+
34
+ reader = MARC::Reader.new('test/batch.dat')
35
+ records = reader.find_all { |r| r['245']['a'] =~ /Perl/ }
36
+ assert_equal(10, records.length)
37
+
38
+ reader = MARC::Reader.new('test/batch.dat')
39
+ records = reader.find_all { |r| r =~ /Foo/ }
40
+ assert_equal(0, records.length)
41
+ end
37
42
 
38
43
  end
@@ -4,6 +4,17 @@ require 'stringio'
4
4
 
5
5
  class XMLTest < Test::Unit::TestCase
6
6
 
7
+ def test_xml_entities
8
+ r1 = MARC::Record.new
9
+ r1 << MARC::DataField.new('245', '0', '0', ['a', 'foo & bar'])
10
+ xml = r1.to_xml.to_s
11
+ assert_match /foo &amp; bar/, xml
12
+
13
+ reader = MARC::XMLReader.new(StringIO.new(xml))
14
+ r2 = reader.entries[0]
15
+ assert_equal 'foo & bar', r2['245']['a']
16
+ end
17
+
7
18
  def test_batch
8
19
  reader = MARC::XMLReader.new('test/batch.xml')
9
20
  count = 0
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: marc
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.5
7
- date: 2007-05-01 00:00:00 -04:00
6
+ version: 0.1.6
7
+ date: 2007-05-04 00:00:00 -04:00
8
8
  summary: A ruby library for working with Machine Readable Cataloging
9
9
  require_paths:
10
10
  - lib