marc 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,12 +6,13 @@ module MARC
6
6
  # MARC records contain data fields, each of which has a tag,
7
7
  # indicators and subfields. Tags for data fields must be in
8
8
  # the range 010-999.
9
- # Accessor attributes: tag ; indicator1 ; indicator2
9
+ # Accessor attributes: tag, indicator1, indicator2
10
10
  #
11
- # DataField includes enumerable for access to it's constituent
11
+ # DataField mixes in Enumerable to enable access to it's constituent
12
12
  # Subfield objects. For instance, if you have a DataField representing
13
13
  # a 856 tag, and want to find all 'z' subfields:
14
- # urltag.find_all { |subfield| subfield.code == 'z' }
14
+ #
15
+ # subfield_z = field.find_all {|subfield| subfield.code == 'z'}
15
16
  #
16
17
  # Also, the accessor 'subfields' is an array of MARC::Subfield objects
17
18
  # which can be accessed or modified by the client directly if
@@ -37,11 +37,17 @@ module MARC
37
37
 
38
38
  def each
39
39
  # while there is data left in the file
40
- while length = @handle.read(5)
40
+ while rec_length_s = @handle.read(5)
41
+ # make sure the record length looks like an integer
42
+ rec_length_i = rec_length_s.to_i
43
+ if rec_length_i == 0:
44
+ raise MARC::Exception.new("invalid record length: #{rec_length_s}")
45
+ end
41
46
 
42
47
  # get the raw MARC21 for a record back from the file
43
48
  # using the record length
44
- raw = length + @handle.read(length.to_i-5)
49
+ raw = rec_length_s + @handle.read(rec_length_i-5)
50
+
45
51
 
46
52
  # create a record from the data and return it
47
53
  #record = MARC::Record.new_from_marc(raw)
@@ -3,12 +3,15 @@ module MARC
3
3
  # A class that represents an individual MARC record. Every record
4
4
  # is made up of a collection of MARC::DataField objects.
5
5
  #
6
- # MARC::Record includes Enumerable for access to constituent
7
- # DataFields. Eg, to return a list of all 650 DataFields:
8
- # record.find_all { |field| field.tag == '650' }
6
+ # MARC::Record mixes in Enumerable to enable access to constituent
7
+ # DataFields. For example, to return a list of all subject DataFields:
8
+ #
9
+ # record.find_all {|field| field.tag =~ /^6../}
9
10
  #
10
11
  # The accessor 'fields' is also an Array of MARC::DataField objects which
11
12
  # the client can access or modifyi if neccesary.
13
+ #
14
+ # record.fields.delete(field)
12
15
  #
13
16
  # Other accessor attribute: 'leader' for record leader as String
14
17
 
@@ -16,10 +19,10 @@ module MARC
16
19
  include Enumerable
17
20
 
18
21
  # the record fields
19
- attr_accessor :fields,
22
+ attr_accessor :fields
20
23
 
21
24
  # the record leader
22
- :leader
25
+ attr_accessor :leader
23
26
 
24
27
  def initialize
25
28
  @fields = []
@@ -38,6 +41,12 @@ module MARC
38
41
  @fields.push(field)
39
42
  end
40
43
 
44
+ # alias to append
45
+
46
+ def <<(field)
47
+ append(field)
48
+ end
49
+
41
50
  # each() is here to support iterating and searching since MARC::Record
42
51
  # mixes in Enumberable
43
52
  #
@@ -1,9 +1,9 @@
1
1
  module MARC
2
2
 
3
- # A class to represents a subfield within a DataField.
4
- # Accessor attributes: code (letter subfield code) ; value
5
- # Both can be empty string, but should not be nil.
6
-
3
+ # A class that represents an individual subfield within a DataField.
4
+ # Accessor attributes include: code (letter subfield code) and value
5
+ # (the content of the subfield). Both can be empty string, but should
6
+ # not be set to nil.
7
7
 
8
8
  class Subfield
9
9
  attr_accessor :code, :value
@@ -60,7 +60,7 @@ module MARC
60
60
  event = @parser.pull
61
61
 
62
62
  if event.text?
63
- text += event[0].strip
63
+ text += REXML::Text::unnormalize(event[0].strip)
64
64
  next
65
65
  end
66
66
 
@@ -4,6 +4,8 @@ require 'rexml/text'
4
4
  module MARC
5
5
 
6
6
  # A class for writing MARC records as MARCXML.
7
+ # BIG CAVEAT! XMLWriter will *not* convert your MARC8 to UTF8
8
+ # bug the authors to do this if you need it
7
9
 
8
10
  class XMLWriter
9
11
 
@@ -51,21 +53,6 @@ module MARC
51
53
  @fh.close
52
54
  end
53
55
 
54
-
55
- # Converts from ISO 8859-1 to UTF-8, normalizes the UTF-8, and puts a
56
- # 'clean up marker' in records that have control characters (which are
57
- # not valid in XML). This is useful for locating these records once
58
- # they are in XML so problems caused by removing the invalid characters
59
- # can be fixed by a person. This (or something in the module) needs to
60
- # convert from MARC-8 to UTF-8, but it doesn't do this yet...
61
-
62
- def self.convert_to_utf8(text)
63
- cleaned_text = text.gsub(/[\x00-\x1f\x7f-\xff]+/, ' CLEAN_ME_UP ')
64
- utf8_text = cleaned_text.unpack('C*').pack('U*')
65
- normalized_text = REXML::Text::normalize(utf8_text)
66
-
67
- return normalized_text
68
- end
69
56
 
70
57
  # a static method that accepts a MARC::Record object
71
58
  # and returns a REXML::Document for the XML serialization.
@@ -138,7 +125,7 @@ module MARC
138
125
  end
139
126
 
140
127
  subfield_element.add_attribute("code", subfield.code)
141
- text = MARC::XMLWriter.convert_to_utf8(subfield.value)
128
+ text = subfield.value
142
129
  subfield_element.add_text(text)
143
130
  datafield_elem.add_element(subfield_element)
144
131
  end
@@ -153,7 +140,7 @@ module MARC
153
140
  end
154
141
 
155
142
  control_element.add_attribute("tag", field.tag)
156
- text = MARC::XMLWriter.convert_to_utf8(field.value)
143
+ text = field.value
157
144
  control_element.add_text(text)
158
145
  e.add_element(control_element)
159
146
  end
@@ -3,36 +3,41 @@ require 'marc'
3
3
 
4
4
  class ReaderTest < Test::Unit::TestCase
5
5
 
6
- def test_batch
7
- reader = MARC::Reader.new('test/batch.dat')
8
- count = 0
9
- reader.each { count += 1 }
10
- assert_equal(count, 10)
11
- end
12
-
13
- def test_loose
14
- reader = MARC::ForgivingReader.new('test/batch.dat')
15
- count = 0
16
- reader.each { count += 1 }
17
- assert_equal(10, count)
18
- end
19
-
20
- def test_search
21
- reader = MARC::Reader.new('test/batch.dat')
22
- records = reader.find_all { |r| r =~ /Perl/ }
23
- assert_equal(10, records.length)
24
-
25
- reader = MARC::Reader.new('test/batch.dat')
26
- records = reader.find_all { |r| r['245'] =~ /Perl/ }
27
- assert_equal(10, records.length)
28
-
29
- reader = MARC::Reader.new('test/batch.dat')
30
- records = reader.find_all { |r| r['245']['a'] =~ /Perl/ }
31
- assert_equal(10, records.length)
32
-
33
- reader = MARC::Reader.new('test/batch.dat')
34
- records = reader.find_all { |r| r =~ /Foo/ }
35
- assert_equal(0, records.length)
36
- end
6
+ def test_batch
7
+ reader = MARC::Reader.new('test/batch.dat')
8
+ count = 0
9
+ reader.each { count += 1 }
10
+ assert_equal(count, 10)
11
+ end
12
+
13
+ def test_loose
14
+ reader = MARC::ForgivingReader.new('test/batch.dat')
15
+ count = 0
16
+ reader.each { count += 1 }
17
+ assert_equal(10, count)
18
+ end
19
+
20
+ def test_bad_marc
21
+ reader = MARC::Reader.new('test/tc_reader.rb')
22
+ assert_raises(MARC::Exception) {reader.entries[0]}
23
+ end
24
+
25
+ def test_search
26
+ reader = MARC::Reader.new('test/batch.dat')
27
+ records = reader.find_all { |r| r =~ /Perl/ }
28
+ assert_equal(10, records.length)
29
+
30
+ reader = MARC::Reader.new('test/batch.dat')
31
+ records = reader.find_all { |r| r['245'] =~ /Perl/ }
32
+ assert_equal(10, records.length)
33
+
34
+ reader = MARC::Reader.new('test/batch.dat')
35
+ records = reader.find_all { |r| r['245']['a'] =~ /Perl/ }
36
+ assert_equal(10, records.length)
37
+
38
+ reader = MARC::Reader.new('test/batch.dat')
39
+ records = reader.find_all { |r| r =~ /Foo/ }
40
+ assert_equal(0, records.length)
41
+ end
37
42
 
38
43
  end
@@ -4,6 +4,17 @@ require 'stringio'
4
4
 
5
5
  class XMLTest < Test::Unit::TestCase
6
6
 
7
+ def test_xml_entities
8
+ r1 = MARC::Record.new
9
+ r1 << MARC::DataField.new('245', '0', '0', ['a', 'foo & bar'])
10
+ xml = r1.to_xml.to_s
11
+ assert_match /foo &amp; bar/, xml
12
+
13
+ reader = MARC::XMLReader.new(StringIO.new(xml))
14
+ r2 = reader.entries[0]
15
+ assert_equal 'foo & bar', r2['245']['a']
16
+ end
17
+
7
18
  def test_batch
8
19
  reader = MARC::XMLReader.new('test/batch.xml')
9
20
  count = 0
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: marc
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.5
7
- date: 2007-05-01 00:00:00 -04:00
6
+ version: 0.1.6
7
+ date: 2007-05-04 00:00:00 -04:00
8
8
  summary: A ruby library for working with Machine Readable Cataloging
9
9
  require_paths:
10
10
  - lib