marc 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/marc/marc21.rb +25 -8
- data/lib/marc/reader.rb +36 -0
- data/lib/marc/record.rb +9 -3
- data/test/tc_reader.rb +7 -0
- data/test/tc_record.rb +10 -0
- metadata +1 -1
data/lib/marc/marc21.rb
CHANGED
@@ -66,7 +66,7 @@ module MARC
|
|
66
66
|
|
67
67
|
# Deserializes MARC21 as a MARC::Record object
|
68
68
|
|
69
|
-
def decode(marc)
|
69
|
+
def decode(marc, params={})
|
70
70
|
record = Record.new()
|
71
71
|
record.leader = marc[0..LEADER_LENGTH]
|
72
72
|
|
@@ -80,6 +80,11 @@ module MARC
|
|
80
80
|
# how many directory entries there are
|
81
81
|
num_fields = directory.length / DIRECTORY_ENTRY_LENGTH
|
82
82
|
|
83
|
+
# when operating in forgiving mode we just split on end of
|
84
|
+
# field instead of using calculated byte offsets from the
|
85
|
+
# directory
|
86
|
+
all_fields = marc[base_address..-1].split(END_OF_FIELD)
|
87
|
+
|
83
88
|
0.upto(num_fields-1) do |field_num|
|
84
89
|
|
85
90
|
# pull the directory entry for a field out
|
@@ -87,14 +92,26 @@ module MARC
|
|
87
92
|
entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
|
88
93
|
entry = directory[entry_start..entry_end]
|
89
94
|
|
90
|
-
# extract the tag
|
91
|
-
# field out of the field portion
|
95
|
+
# extract the tag
|
92
96
|
tag = entry[0..2]
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
97
|
+
|
98
|
+
# get the actual field data
|
99
|
+
# if we were told to be forgiving we just use the
|
100
|
+
# next available chuck of field data that we
|
101
|
+
# split apart based on the END_OF_FIELD
|
102
|
+
field_data = ''
|
103
|
+
if params[:forgiving]
|
104
|
+
field_data = all_fields.shift()
|
105
|
+
|
106
|
+
# otherwise we actually use the byte offsets in
|
107
|
+
# directory to figure out what field data to extract
|
108
|
+
else
|
109
|
+
length = entry[3..6].to_i
|
110
|
+
offset = entry[7..11].to_i
|
111
|
+
field_start = base_address + offset
|
112
|
+
field_end = field_start + length - 1
|
113
|
+
field_data = marc[field_start..field_end]
|
114
|
+
end
|
98
115
|
|
99
116
|
# remove end of field
|
100
117
|
field_data.delete!(END_OF_FIELD)
|
data/lib/marc/reader.rb
CHANGED
@@ -48,4 +48,40 @@ module MARC
|
|
48
48
|
|
49
49
|
end
|
50
50
|
|
51
|
+
|
52
|
+
# Like Reader ForgivingReader lets you read in a batch of MARC21 records
|
53
|
+
# but it does not use record lengths and field byte offets found in the
|
54
|
+
# leader and directory. It is not unusual to run across MARC records
|
55
|
+
# which have had their offsets calcualted wrong. In situations like this
|
56
|
+
# the vanilla Reader may fail, and you can try to use ForgivingReader.
|
57
|
+
|
58
|
+
# The one downside to this is that ForgivingReader will assume that the
|
59
|
+
# order of the fields in the directory is the same as the order of fields
|
60
|
+
# in the field data. Hopefully this will be the case, but it is not
|
61
|
+
# 100% guranteed which is why the normal behavior of Reader is encouraged.
|
62
|
+
|
63
|
+
class ForgivingReader
|
64
|
+
include Enumerable
|
65
|
+
|
66
|
+
def initialize(file)
|
67
|
+
if file.class == String
|
68
|
+
@handle = File.new(file)
|
69
|
+
elsif file.class == File
|
70
|
+
@handle = file
|
71
|
+
else
|
72
|
+
throw "must pass in path or File object"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
def each
|
78
|
+
@handle.each_line(MARC::MARC21::END_OF_RECORD) do |raw|
|
79
|
+
record = MARC::Record.new_from_marc(raw, :forgiving => true)
|
80
|
+
yield record
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
|
51
87
|
end
|
data/lib/marc/record.rb
CHANGED
@@ -59,10 +59,16 @@ module MARC
|
|
59
59
|
# MARC::MARC21::decode
|
60
60
|
#
|
61
61
|
# record = MARC::Record.new_from_marc(marc21)
|
62
|
+
#
|
63
|
+
# in cases where you might be working with somewhat flawed
|
64
|
+
# MARC data you may want to use the :forgiving parameter which
|
65
|
+
# will bypass using field byte offsets and simply look for the
|
66
|
+
# end of field byte to figure out the end of fields.
|
67
|
+
#
|
68
|
+
# record = MARC::Record.new_from_marc(marc21, :forgiving => true)
|
62
69
|
|
63
|
-
|
64
|
-
|
65
|
-
return MARC::MARC21.new().decode(raw)
|
70
|
+
def self.new_from_marc(raw, params={})
|
71
|
+
return MARC::MARC21.new().decode(raw, params)
|
66
72
|
end
|
67
73
|
|
68
74
|
|
data/test/tc_reader.rb
CHANGED
@@ -10,6 +10,13 @@ class ReaderTest < Test::Unit::TestCase
|
|
10
10
|
assert_equal(count, 10)
|
11
11
|
end
|
12
12
|
|
13
|
+
def test_loose
|
14
|
+
reader = MARC::ForgivingReader.new('test/batch.dat')
|
15
|
+
count = 0
|
16
|
+
reader.each { count += 1 }
|
17
|
+
assert_equal(count, 10)
|
18
|
+
end
|
19
|
+
|
13
20
|
def test_search
|
14
21
|
reader = MARC::Reader.new('test/batch.dat')
|
15
22
|
records = reader.find_all { |r| r =~ /Perl/ }
|
data/test/tc_record.rb
CHANGED
@@ -30,6 +30,16 @@ class TestRecord < Test::Unit::TestCase
|
|
30
30
|
'245 10 $aActivePerl with ASP and ADO /$cTobias Martinsson.')
|
31
31
|
end
|
32
32
|
|
33
|
+
def test_decode_loose
|
34
|
+
raw = IO.read('test/one.dat')
|
35
|
+
r = MARC::Record::new_from_marc(raw, :loose => true)
|
36
|
+
assert_equal(r.class, MARC::Record)
|
37
|
+
assert_equal(r.leader,'00755cam 22002414a 45000')
|
38
|
+
assert_equal(r.fields.length(), 18)
|
39
|
+
assert_equal(r.find {|f| f.tag == '245'}.to_s,
|
40
|
+
'245 10 $aActivePerl with ASP and ADO /$cTobias Martinsson.')
|
41
|
+
end
|
42
|
+
|
33
43
|
def test_encode
|
34
44
|
r1 = MARC::Record.new()
|
35
45
|
r1.append(MARC::Field.new('100','2','0', ['a','Thomas, Dave']))
|
metadata
CHANGED