marc 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/marc/marc21.rb +25 -8
- data/lib/marc/reader.rb +36 -0
- data/lib/marc/record.rb +9 -3
- data/test/tc_reader.rb +7 -0
- data/test/tc_record.rb +10 -0
- metadata +1 -1
data/lib/marc/marc21.rb
CHANGED
@@ -66,7 +66,7 @@ module MARC
|
|
66
66
|
|
67
67
|
# Deserializes MARC21 as a MARC::Record object
|
68
68
|
|
69
|
-
def decode(marc)
|
69
|
+
def decode(marc, params={})
|
70
70
|
record = Record.new()
|
71
71
|
record.leader = marc[0..LEADER_LENGTH]
|
72
72
|
|
@@ -80,6 +80,11 @@ module MARC
|
|
80
80
|
# how many directory entries there are
|
81
81
|
num_fields = directory.length / DIRECTORY_ENTRY_LENGTH
|
82
82
|
|
83
|
+
# when operating in forgiving mode we just split on end of
|
84
|
+
# field instead of using calculated byte offsets from the
|
85
|
+
# directory
|
86
|
+
all_fields = marc[base_address..-1].split(END_OF_FIELD)
|
87
|
+
|
83
88
|
0.upto(num_fields-1) do |field_num|
|
84
89
|
|
85
90
|
# pull the directory entry for a field out
|
@@ -87,14 +92,26 @@ module MARC
|
|
87
92
|
entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
|
88
93
|
entry = directory[entry_start..entry_end]
|
89
94
|
|
90
|
-
# extract the tag
|
91
|
-
# field out of the field portion
|
95
|
+
# extract the tag
|
92
96
|
tag = entry[0..2]
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
97
|
+
|
98
|
+
# get the actual field data
|
99
|
+
# if we were told to be forgiving we just use the
|
100
|
+
# next available chuck of field data that we
|
101
|
+
# split apart based on the END_OF_FIELD
|
102
|
+
field_data = ''
|
103
|
+
if params[:forgiving]
|
104
|
+
field_data = all_fields.shift()
|
105
|
+
|
106
|
+
# otherwise we actually use the byte offsets in
|
107
|
+
# directory to figure out what field data to extract
|
108
|
+
else
|
109
|
+
length = entry[3..6].to_i
|
110
|
+
offset = entry[7..11].to_i
|
111
|
+
field_start = base_address + offset
|
112
|
+
field_end = field_start + length - 1
|
113
|
+
field_data = marc[field_start..field_end]
|
114
|
+
end
|
98
115
|
|
99
116
|
# remove end of field
|
100
117
|
field_data.delete!(END_OF_FIELD)
|
data/lib/marc/reader.rb
CHANGED
@@ -48,4 +48,40 @@ module MARC
|
|
48
48
|
|
49
49
|
end
|
50
50
|
|
51
|
+
|
52
|
+
# Like Reader ForgivingReader lets you read in a batch of MARC21 records
|
53
|
+
# but it does not use record lengths and field byte offets found in the
|
54
|
+
# leader and directory. It is not unusual to run across MARC records
|
55
|
+
# which have had their offsets calcualted wrong. In situations like this
|
56
|
+
# the vanilla Reader may fail, and you can try to use ForgivingReader.
|
57
|
+
|
58
|
+
# The one downside to this is that ForgivingReader will assume that the
|
59
|
+
# order of the fields in the directory is the same as the order of fields
|
60
|
+
# in the field data. Hopefully this will be the case, but it is not
|
61
|
+
# 100% guranteed which is why the normal behavior of Reader is encouraged.
|
62
|
+
|
63
|
+
class ForgivingReader
|
64
|
+
include Enumerable
|
65
|
+
|
66
|
+
def initialize(file)
|
67
|
+
if file.class == String
|
68
|
+
@handle = File.new(file)
|
69
|
+
elsif file.class == File
|
70
|
+
@handle = file
|
71
|
+
else
|
72
|
+
throw "must pass in path or File object"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
def each
|
78
|
+
@handle.each_line(MARC::MARC21::END_OF_RECORD) do |raw|
|
79
|
+
record = MARC::Record.new_from_marc(raw, :forgiving => true)
|
80
|
+
yield record
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
|
51
87
|
end
|
data/lib/marc/record.rb
CHANGED
@@ -59,10 +59,16 @@ module MARC
|
|
59
59
|
# MARC::MARC21::decode
|
60
60
|
#
|
61
61
|
# record = MARC::Record.new_from_marc(marc21)
|
62
|
+
#
|
63
|
+
# in cases where you might be working with somewhat flawed
|
64
|
+
# MARC data you may want to use the :forgiving parameter which
|
65
|
+
# will bypass using field byte offsets and simply look for the
|
66
|
+
# end of field byte to figure out the end of fields.
|
67
|
+
#
|
68
|
+
# record = MARC::Record.new_from_marc(marc21, :forgiving => true)
|
62
69
|
|
63
|
-
|
64
|
-
|
65
|
-
return MARC::MARC21.new().decode(raw)
|
70
|
+
def self.new_from_marc(raw, params={})
|
71
|
+
return MARC::MARC21.new().decode(raw, params)
|
66
72
|
end
|
67
73
|
|
68
74
|
|
data/test/tc_reader.rb
CHANGED
@@ -10,6 +10,13 @@ class ReaderTest < Test::Unit::TestCase
|
|
10
10
|
assert_equal(count, 10)
|
11
11
|
end
|
12
12
|
|
13
|
+
def test_loose
|
14
|
+
reader = MARC::ForgivingReader.new('test/batch.dat')
|
15
|
+
count = 0
|
16
|
+
reader.each { count += 1 }
|
17
|
+
assert_equal(count, 10)
|
18
|
+
end
|
19
|
+
|
13
20
|
def test_search
|
14
21
|
reader = MARC::Reader.new('test/batch.dat')
|
15
22
|
records = reader.find_all { |r| r =~ /Perl/ }
|
data/test/tc_record.rb
CHANGED
@@ -30,6 +30,16 @@ class TestRecord < Test::Unit::TestCase
|
|
30
30
|
'245 10 $aActivePerl with ASP and ADO /$cTobias Martinsson.')
|
31
31
|
end
|
32
32
|
|
33
|
+
def test_decode_loose
|
34
|
+
raw = IO.read('test/one.dat')
|
35
|
+
r = MARC::Record::new_from_marc(raw, :loose => true)
|
36
|
+
assert_equal(r.class, MARC::Record)
|
37
|
+
assert_equal(r.leader,'00755cam 22002414a 45000')
|
38
|
+
assert_equal(r.fields.length(), 18)
|
39
|
+
assert_equal(r.find {|f| f.tag == '245'}.to_s,
|
40
|
+
'245 10 $aActivePerl with ASP and ADO /$cTobias Martinsson.')
|
41
|
+
end
|
42
|
+
|
33
43
|
def test_encode
|
34
44
|
r1 = MARC::Record.new()
|
35
45
|
r1.append(MARC::Field.new('100','2','0', ['a','Thomas, Dave']))
|
metadata
CHANGED