RubyGems - marc - Versions diffs - 0.0.2 → 0.0.3 - Mend

marc 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/lib/marc/marc21.rb CHANGED Viewed

@@ -66,7 +66,7 @@ module MARC
         # Deserializes MARC21 as a MARC::Record object
-        def decode(marc)
+        def decode(marc, params={})
             record = Record.new()
             record.leader = marc[0..LEADER_LENGTH]
@@ -80,6 +80,11 @@ module MARC
             # how many directory entries there are
             num_fields = directory.length / DIRECTORY_ENTRY_LENGTH
+            # when operating in forgiving mode we just split on end of
+            # field instead of using calculated byte offsets from the
+            # directory
+            all_fields = marc[base_address..-1].split(END_OF_FIELD)
             0.upto(num_fields-1) do |field_num|
                 # pull the directory entry for a field out
@@ -87,14 +92,26 @@ module MARC
                 entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
                 entry = directory[entry_start..entry_end]
-                # extract the tag, length and offset for pulling the
-                # field out of the field portion
+                # extract the tag
                 tag = entry[0..2]
-                length = entry[3..6].to_i
-                offset = entry[7..11].to_i
-                field_start = base_address + offset
-                field_end = field_start + length - 1
-                field_data = marc[field_start..field_end]
+                # get the actual field data
+                # if we were told to be forgiving we just use the
+                # next available chuck of field data that we
+                # split apart based on the END_OF_FIELD
+                field_data = ''
+                if params[:forgiving]
+                    field_data = all_fields.shift()
+                # otherwise we actually use the byte offsets in
+                # directory to figure out what field data to extract
+                else
+                    length = entry[3..6].to_i
+                    offset = entry[7..11].to_i
+                    field_start = base_address + offset
+                    field_end = field_start + length - 1
+                    field_data = marc[field_start..field_end]
+                end
                 # remove end of field
                 field_data.delete!(END_OF_FIELD)

data/lib/marc/reader.rb CHANGED Viewed

@@ -48,4 +48,40 @@ module MARC
     end
+    # Like Reader ForgivingReader lets you read in a batch of MARC21 records
+    # but it does not use record lengths and field byte offets found in the
+    # leader and directory. It is not unusual to run across MARC records
+    # which have had their offsets calcualted wrong. In situations like this
+    # the vanilla Reader may fail, and you can try to use ForgivingReader.
+    # The one downside to this is that ForgivingReader will assume that the
+    # order of the fields in the directory is the same as the order of fields
+    # in the field data. Hopefully this will be the case, but it is not
+    # 100% guranteed which is why the normal behavior of Reader is encouraged.
+    class ForgivingReader
+        include Enumerable
+        def initialize(file)
+            if file.class == String
+                @handle = File.new(file)
+            elsif file.class == File
+                @handle = file
+            else
+                throw "must pass in path or File object"
+            end
+        end
+        def each
+            @handle.each_line(MARC::MARC21::END_OF_RECORD) do |raw|
+                record = MARC::Record.new_from_marc(raw, :forgiving => true)
+                yield record
+            end
+        end
+    end
 end

data/lib/marc/record.rb CHANGED Viewed

@@ -59,10 +59,16 @@ module MARC
         # MARC::MARC21::decode
         #
         #     record = MARC::Record.new_from_marc(marc21)
+        #
+        # in cases where you might be working with somewhat flawed
+        # MARC data you may want to use the :forgiving parameter which
+        # will bypass using field byte offsets and simply look for the
+        # end of field byte to figure out the end of fields.
+        #
+        #    record = MARC::Record.new_from_marc(marc21, :forgiving => true)
-        def Record::new_from_marc(raw)
-            return MARC::MARC21.new().decode(raw)
+        def self.new_from_marc(raw, params={})
+            return MARC::MARC21.new().decode(raw, params)
         end

data/test/tc_reader.rb CHANGED Viewed

@@ -10,6 +10,13 @@ class ReaderTest < Test::Unit::TestCase
         assert_equal(count, 10)
     end
+    def test_loose
+        reader = MARC::ForgivingReader.new('test/batch.dat')
+        count = 0
+        reader.each { count += 1 }
+        assert_equal(count, 10)
+    end
     def test_search
         reader = MARC::Reader.new('test/batch.dat')
         records = reader.find_all { |r| r =~ /Perl/ }

data/test/tc_record.rb CHANGED Viewed

@@ -30,6 +30,16 @@ class TestRecord < Test::Unit::TestCase
             '245 10 $aActivePerl with ASP and ADO /$cTobias Martinsson.')
     end
+    def test_decode_loose
+        raw = IO.read('test/one.dat')
+        r = MARC::Record::new_from_marc(raw, :loose => true)
+        assert_equal(r.class, MARC::Record)
+        assert_equal(r.leader,'00755cam  22002414a 45000')
+        assert_equal(r.fields.length(), 18)
+        assert_equal(r.find {|f| f.tag == '245'}.to_s,
+            '245 10 $aActivePerl with ASP and ADO /$cTobias Martinsson.')
+    end
     def test_encode
         r1 = MARC::Record.new()
         r1.append(MARC::Field.new('100','2','0', ['a','Thomas, Dave']))

metadata CHANGED Viewed

@@ -3,7 +3,7 @@ rubygems_version: 0.8.11
 specification_version: 1
 name: marc
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
 date: 2005-10-17 00:00:00 -05:00
 summary: A ruby library for working with Machine Readable Cataloging
 require_paths: