marc 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/marc/marc21.rb CHANGED
@@ -66,7 +66,7 @@ module MARC
66
66
 
67
67
  # Deserializes MARC21 as a MARC::Record object
68
68
 
69
- def decode(marc)
69
+ def decode(marc, params={})
70
70
  record = Record.new()
71
71
  record.leader = marc[0..LEADER_LENGTH]
72
72
 
@@ -80,6 +80,11 @@ module MARC
80
80
  # how many directory entries there are
81
81
  num_fields = directory.length / DIRECTORY_ENTRY_LENGTH
82
82
 
83
+ # when operating in forgiving mode we just split on end of
84
+ # field instead of using calculated byte offsets from the
85
+ # directory
86
+ all_fields = marc[base_address..-1].split(END_OF_FIELD)
87
+
83
88
  0.upto(num_fields-1) do |field_num|
84
89
 
85
90
  # pull the directory entry for a field out
@@ -87,14 +92,26 @@ module MARC
87
92
  entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
88
93
  entry = directory[entry_start..entry_end]
89
94
 
90
- # extract the tag, length and offset for pulling the
91
- # field out of the field portion
95
+ # extract the tag
92
96
  tag = entry[0..2]
93
- length = entry[3..6].to_i
94
- offset = entry[7..11].to_i
95
- field_start = base_address + offset
96
- field_end = field_start + length - 1
97
- field_data = marc[field_start..field_end]
97
+
98
+ # get the actual field data
99
+ # if we were told to be forgiving we just use the
100
+ # next available chuck of field data that we
101
+ # split apart based on the END_OF_FIELD
102
+ field_data = ''
103
+ if params[:forgiving]
104
+ field_data = all_fields.shift()
105
+
106
+ # otherwise we actually use the byte offsets in
107
+ # directory to figure out what field data to extract
108
+ else
109
+ length = entry[3..6].to_i
110
+ offset = entry[7..11].to_i
111
+ field_start = base_address + offset
112
+ field_end = field_start + length - 1
113
+ field_data = marc[field_start..field_end]
114
+ end
98
115
 
99
116
  # remove end of field
100
117
  field_data.delete!(END_OF_FIELD)
data/lib/marc/reader.rb CHANGED
@@ -48,4 +48,40 @@ module MARC
48
48
 
49
49
  end
50
50
 
51
+
52
+ # Like Reader ForgivingReader lets you read in a batch of MARC21 records
53
+ # but it does not use record lengths and field byte offets found in the
54
+ # leader and directory. It is not unusual to run across MARC records
55
+ # which have had their offsets calcualted wrong. In situations like this
56
+ # the vanilla Reader may fail, and you can try to use ForgivingReader.
57
+
58
+ # The one downside to this is that ForgivingReader will assume that the
59
+ # order of the fields in the directory is the same as the order of fields
60
+ # in the field data. Hopefully this will be the case, but it is not
61
+ # 100% guranteed which is why the normal behavior of Reader is encouraged.
62
+
63
+ class ForgivingReader
64
+ include Enumerable
65
+
66
+ def initialize(file)
67
+ if file.class == String
68
+ @handle = File.new(file)
69
+ elsif file.class == File
70
+ @handle = file
71
+ else
72
+ throw "must pass in path or File object"
73
+ end
74
+ end
75
+
76
+
77
+ def each
78
+ @handle.each_line(MARC::MARC21::END_OF_RECORD) do |raw|
79
+ record = MARC::Record.new_from_marc(raw, :forgiving => true)
80
+ yield record
81
+ end
82
+ end
83
+
84
+ end
85
+
86
+
51
87
  end
data/lib/marc/record.rb CHANGED
@@ -59,10 +59,16 @@ module MARC
59
59
  # MARC::MARC21::decode
60
60
  #
61
61
  # record = MARC::Record.new_from_marc(marc21)
62
+ #
63
+ # in cases where you might be working with somewhat flawed
64
+ # MARC data you may want to use the :forgiving parameter which
65
+ # will bypass using field byte offsets and simply look for the
66
+ # end of field byte to figure out the end of fields.
67
+ #
68
+ # record = MARC::Record.new_from_marc(marc21, :forgiving => true)
62
69
 
63
-
64
- def Record::new_from_marc(raw)
65
- return MARC::MARC21.new().decode(raw)
70
+ def self.new_from_marc(raw, params={})
71
+ return MARC::MARC21.new().decode(raw, params)
66
72
  end
67
73
 
68
74
 
data/test/tc_reader.rb CHANGED
@@ -10,6 +10,13 @@ class ReaderTest < Test::Unit::TestCase
10
10
  assert_equal(count, 10)
11
11
  end
12
12
 
13
+ def test_loose
14
+ reader = MARC::ForgivingReader.new('test/batch.dat')
15
+ count = 0
16
+ reader.each { count += 1 }
17
+ assert_equal(count, 10)
18
+ end
19
+
13
20
  def test_search
14
21
  reader = MARC::Reader.new('test/batch.dat')
15
22
  records = reader.find_all { |r| r =~ /Perl/ }
data/test/tc_record.rb CHANGED
@@ -30,6 +30,16 @@ class TestRecord < Test::Unit::TestCase
30
30
  '245 10 $aActivePerl with ASP and ADO /$cTobias Martinsson.')
31
31
  end
32
32
 
33
+ def test_decode_loose
34
+ raw = IO.read('test/one.dat')
35
+ r = MARC::Record::new_from_marc(raw, :loose => true)
36
+ assert_equal(r.class, MARC::Record)
37
+ assert_equal(r.leader,'00755cam 22002414a 45000')
38
+ assert_equal(r.fields.length(), 18)
39
+ assert_equal(r.find {|f| f.tag == '245'}.to_s,
40
+ '245 10 $aActivePerl with ASP and ADO /$cTobias Martinsson.')
41
+ end
42
+
33
43
  def test_encode
34
44
  r1 = MARC::Record.new()
35
45
  r1.append(MARC::Field.new('100','2','0', ['a','Thomas, Dave']))
metadata CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: marc
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.2
6
+ version: 0.0.3
7
7
  date: 2005-10-17 00:00:00 -05:00
8
8
  summary: A ruby library for working with Machine Readable Cataloging
9
9
  require_paths: