marc 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/marc/marc21.rb CHANGED
@@ -66,7 +66,7 @@ module MARC
66
66
 
67
67
  # Deserializes MARC21 as a MARC::Record object
68
68
 
69
- def decode(marc)
69
+ def decode(marc, params={})
70
70
  record = Record.new()
71
71
  record.leader = marc[0..LEADER_LENGTH]
72
72
 
@@ -80,6 +80,11 @@ module MARC
80
80
  # how many directory entries there are
81
81
  num_fields = directory.length / DIRECTORY_ENTRY_LENGTH
82
82
 
83
+ # when operating in forgiving mode we just split on end of
84
+ # field instead of using calculated byte offsets from the
85
+ # directory
86
+ all_fields = marc[base_address..-1].split(END_OF_FIELD)
87
+
83
88
  0.upto(num_fields-1) do |field_num|
84
89
 
85
90
  # pull the directory entry for a field out
@@ -87,14 +92,26 @@ module MARC
87
92
  entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
88
93
  entry = directory[entry_start..entry_end]
89
94
 
90
- # extract the tag, length and offset for pulling the
91
- # field out of the field portion
95
+ # extract the tag
92
96
  tag = entry[0..2]
93
- length = entry[3..6].to_i
94
- offset = entry[7..11].to_i
95
- field_start = base_address + offset
96
- field_end = field_start + length - 1
97
- field_data = marc[field_start..field_end]
97
+
98
+ # get the actual field data
99
+ # if we were told to be forgiving we just use the
100
+ # next available chuck of field data that we
101
+ # split apart based on the END_OF_FIELD
102
+ field_data = ''
103
+ if params[:forgiving]
104
+ field_data = all_fields.shift()
105
+
106
+ # otherwise we actually use the byte offsets in
107
+ # directory to figure out what field data to extract
108
+ else
109
+ length = entry[3..6].to_i
110
+ offset = entry[7..11].to_i
111
+ field_start = base_address + offset
112
+ field_end = field_start + length - 1
113
+ field_data = marc[field_start..field_end]
114
+ end
98
115
 
99
116
  # remove end of field
100
117
  field_data.delete!(END_OF_FIELD)
data/lib/marc/reader.rb CHANGED
@@ -48,4 +48,40 @@ module MARC
48
48
 
49
49
  end
50
50
 
51
+
52
+ # Like Reader ForgivingReader lets you read in a batch of MARC21 records
53
+ # but it does not use record lengths and field byte offets found in the
54
+ # leader and directory. It is not unusual to run across MARC records
55
+ # which have had their offsets calcualted wrong. In situations like this
56
+ # the vanilla Reader may fail, and you can try to use ForgivingReader.
57
+
58
+ # The one downside to this is that ForgivingReader will assume that the
59
+ # order of the fields in the directory is the same as the order of fields
60
+ # in the field data. Hopefully this will be the case, but it is not
61
+ # 100% guranteed which is why the normal behavior of Reader is encouraged.
62
+
63
+ class ForgivingReader
64
+ include Enumerable
65
+
66
+ def initialize(file)
67
+ if file.class == String
68
+ @handle = File.new(file)
69
+ elsif file.class == File
70
+ @handle = file
71
+ else
72
+ throw "must pass in path or File object"
73
+ end
74
+ end
75
+
76
+
77
+ def each
78
+ @handle.each_line(MARC::MARC21::END_OF_RECORD) do |raw|
79
+ record = MARC::Record.new_from_marc(raw, :forgiving => true)
80
+ yield record
81
+ end
82
+ end
83
+
84
+ end
85
+
86
+
51
87
  end
data/lib/marc/record.rb CHANGED
@@ -59,10 +59,16 @@ module MARC
59
59
  # MARC::MARC21::decode
60
60
  #
61
61
  # record = MARC::Record.new_from_marc(marc21)
62
+ #
63
+ # in cases where you might be working with somewhat flawed
64
+ # MARC data you may want to use the :forgiving parameter which
65
+ # will bypass using field byte offsets and simply look for the
66
+ # end of field byte to figure out the end of fields.
67
+ #
68
+ # record = MARC::Record.new_from_marc(marc21, :forgiving => true)
62
69
 
63
-
64
- def Record::new_from_marc(raw)
65
- return MARC::MARC21.new().decode(raw)
70
+ def self.new_from_marc(raw, params={})
71
+ return MARC::MARC21.new().decode(raw, params)
66
72
  end
67
73
 
68
74
 
data/test/tc_reader.rb CHANGED
@@ -10,6 +10,13 @@ class ReaderTest < Test::Unit::TestCase
10
10
  assert_equal(count, 10)
11
11
  end
12
12
 
13
+ def test_loose
14
+ reader = MARC::ForgivingReader.new('test/batch.dat')
15
+ count = 0
16
+ reader.each { count += 1 }
17
+ assert_equal(count, 10)
18
+ end
19
+
13
20
  def test_search
14
21
  reader = MARC::Reader.new('test/batch.dat')
15
22
  records = reader.find_all { |r| r =~ /Perl/ }
data/test/tc_record.rb CHANGED
@@ -30,6 +30,16 @@ class TestRecord < Test::Unit::TestCase
30
30
  '245 10 $aActivePerl with ASP and ADO /$cTobias Martinsson.')
31
31
  end
32
32
 
33
+ def test_decode_loose
34
+ raw = IO.read('test/one.dat')
35
+ r = MARC::Record::new_from_marc(raw, :loose => true)
36
+ assert_equal(r.class, MARC::Record)
37
+ assert_equal(r.leader,'00755cam 22002414a 45000')
38
+ assert_equal(r.fields.length(), 18)
39
+ assert_equal(r.find {|f| f.tag == '245'}.to_s,
40
+ '245 10 $aActivePerl with ASP and ADO /$cTobias Martinsson.')
41
+ end
42
+
33
43
  def test_encode
34
44
  r1 = MARC::Record.new()
35
45
  r1.append(MARC::Field.new('100','2','0', ['a','Thomas, Dave']))
metadata CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: marc
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.2
6
+ version: 0.0.3
7
7
  date: 2005-10-17 00:00:00 -05:00
8
8
  summary: A ruby library for working with Machine Readable Cataloging
9
9
  require_paths: