RubyGems - pdf-reader - Versions diffs - 0.8.1 → 0.8.2 - Mend

pdf-reader 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/CHANGELOG CHANGED

@@ -1,3 +1,9 @@
+v0.8.2 (1st January 2010)
+- Fix parsing of files that use Form XObjects behind an indirect reference
+  (thanks Cornelius Illi and Patrick Crosby)
+- Rewrote Buffer class to fix various speed issues reported over the years
+  - On my sample file extracting full text reduced from 220 seconds to 9 seconds.
 v0.8.1 (27th November 2009)
 - Added PDF::Hash#version. Provides access to the source file PDF version

data/Rakefile CHANGED

@@ -6,7 +6,7 @@ require 'rake/testtask'
 require "rake/gempackagetask"
 require 'spec/rake/spectask'
-PKG_VERSION = "0.8.1"
+PKG_VERSION = "0.8.2"
 PKG_NAME = "pdf-reader"
 PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"

data/lib/pdf/hash.rb CHANGED

@@ -46,8 +46,7 @@ module PDF
         raise ArgumentError, "input must be an IO-like object or a filename"
       end
       @version = read_version(io)
-      buffer = PDF::Reader::Buffer.new(io)
-      @xref  = PDF::Reader::XRef.new(buffer)
+      @xref  = PDF::Reader::XRef.new(io)
       @trailer = @xref.load
     end

data/lib/pdf/reader.rb CHANGED

@@ -122,9 +122,7 @@ class PDF::Reader
   ################################################################################
   # Given an IO object that contains PDF data, parse it.
   def parse (io, receiver, opts = {})
-    @buffer   = Buffer.new(io)
-    @xref     = XRef.new(@buffer)
-    @parser   = Parser.new(@buffer, @xref)
+    @xref     = XRef.new(io)
     @content  = (receiver == Explore ? Explore : Content).new(receiver, @xref)
     options = {:pages => true, :metadata => true}

data/lib/pdf/reader/buffer.rb CHANGED

@@ -1,6 +1,8 @@
+# coding: utf-8
 ################################################################################
 #
-# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
+# Copyright (C) 2010 James Healy (jimmy@deefa.com)
 #
 # Permission is hereby granted, free of charge, to any person obtaining
 # a copy of this software and associated documentation files (the
@@ -24,140 +26,118 @@
 ################################################################################
 class PDF::Reader
-  ################################################################################
-  # An internal PDF::Reader class that mediates access to the underlying PDF File or IO Stream
+  # A string tokeniser that recognises PDF grammer. When passed an IO stream or a
+  # string, repeated calls to token() will return the next token from the source.
+  #
+  # This is very low level, and getting the raw tokens is not very useful in itself.
+  #
+  # This will usually be used in conjunction with PDF:Reader::Parser, which converts
+  # the raw tokens into objects we can work with (strings, ints, arrays, etc)
+  #
   class Buffer
-    ################################################################################
-    # Creates a new buffer around the specified IO object
-    def initialize (io)
+    attr_reader :pos
+    # Creates a new buffer.
+    #
+    # Params:
+    #
+    #   io - an IO stream or string with the raw data to tokenise
+    #
+    # options:
+    #
+    #   :seek - a byte offset to seek to before starting to tokenise
+    #
+    def initialize (io, opts = {})
       @io = io
-      @buffer = nil
+      @tokens = []
+      @options = opts
+      @io.seek(opts[:seek]) if opts[:seek]
+      @pos = @io.pos
     end
-    ################################################################################
-    # Seek to the requested byte in the IO stream.
-    def seek (offset)
-      @io.seek(offset, IO::SEEK_SET)
-      @buffer = nil
-      self
+    # return true if there are no more tokens left
+    #
+    def empty?
+      prepare_tokens if @tokens.size < 3
+      @tokens.empty?
     end
-    ################################################################################
-    # reads the requested number of bytes from the underlying IO stream.
+    # return raw bytes from the underlying IO stream.
     #
-    # length should be a positive integer.
-    def read (length)
-      out = ""
+    #   bytes - the number of bytes to read
+    #
+    # options:
+    #
+    #   :skip_eol - if true, the IO stream is advanced past any LF or CR
+    #               bytes before it reads any data. This is to handle
+    #               content streams, which have a CRLF or LF after the stream
+    #               token.
+    #
+    def read(bytes, opts = {})
+      reset_pos
-      if @buffer and !@buffer.empty?
-        out << head(length)
-        length -= out.length
+      if opts[:skip_eol]
+        done = false
+        while !done
+          chr = @io.read(1)
+          if chr.nil?
+            return nil
+          elsif chr != "\n" && chr != "\r"
+            @io.seek(-1, IO::SEEK_CUR)
+            done = true
+          end
+        end
       end
-      out << @io.read(length) if length > 0
-      out
+      bytes = @io.read(bytes)
+      save_pos
+      bytes
     end
-    ################################################################################
-    # Reads from the buffer until the specified token is found, or the end of the buffer
+    # return raw bytes from the underlying IO stream. All bytes up to the first
+    # occurance of needle will be returned. The match (if any) is not returned.
+    # The IO stream cursor is left on the first byte of the match.
+    #
+    #   needle - a string to search the IO stream for
     #
-    # bytes - the bytes to search for.
-    def read_until(bytes)
+    def read_until(needle)
+      reset_pos
       out = ""
-      size = bytes.size
+      size = needle.size
-      if @buffer && !@buffer.empty?
-        if @buffer.include?(bytes)
-          offset = @buffer.index(bytes) + size
-          return head(offset)
-        else
-          out << head(@buffer.size)
-        end
+      while out[size * -1, size] != needle && !@io.eof?
+        out << @io.read(1)
       end
-      loop do
-        out << @io.read(1)
-        if out[-1 * size,size].eql?(bytes)
-          out = out[0, out.size - size]
-          seek(pos - size)
-          break
-        end
+      if out[size * -1, size] == needle
+        out = out[0, out.size - size]
+        @io.seek(size * -1, IO::SEEK_CUR)
       end
+      save_pos
       out
     end
-    ################################################################################
-    # returns true if the underlying IO object is at end and the internal buffer
-    # is empty
-    def eof?
-      ready_token
-      if @buffer
-        @buffer.empty? && @io.eof?
-      else
-        @io.eof?
-      end
-    end
-    ################################################################################
-    def pos
-      @io.pos
-    end
-    ################################################################################
-    def pos_without_buf
-      @io.pos - @buffer.to_s.size
-    end
-    ################################################################################
-    # PDF files are processed by tokenising the content into a series of objects and commands.
-    # This prepares the buffer for use by reading the next line of tokens into memory.
-    def ready_token (with_strip=true, skip_blanks=true)
-      while (@buffer.nil? or @buffer.empty?) && !@io.eof?
-        @buffer = @io.readline
-        @buffer.force_encoding("BINARY") if @buffer.respond_to?(:force_encoding)
-        #@buffer.sub!(/%.*$/, '') if strip_comments
-        @buffer.chomp!
-        break unless skip_blanks
-      end
-      @buffer.lstrip! if with_strip
-    end
-    ################################################################################
-    # return the next token from the underlying IO stream
-    def token
-      ready_token
-      i = @buffer.index(/[\[\]()<>{}\s\/]/) || @buffer.size
-      token_chars =
-        if i == 0 and @buffer[i,2] == "<<"    then 2
-        elsif i == 0 and @buffer[i,2] == ">>" then 2
-        elsif i == 0                          then 1
-        else                                    i
-        end
-      strip_space = !(i == 0 and @buffer[0,1] == '(')
-      tok = head(token_chars, strip_space)
+    # return the next token from the source. Returns a string if a token
+    # is found, nil if there are no tokens left.
+    #
+    def token
+      reset_pos
+      prepare_tokens if @tokens.size < 3
+      merge_indirect_reference
+      merge_tokens
-      if tok == ""
-        nil
-      elsif tok[0,1] == "%"
-        @buffer = ""
-        token
-      else
-        tok
-      end
+      @tokens.shift
     end
-    ################################################################################
-    def head (chars, with_strip=true)
-      val = @buffer[0, chars]
-      @buffer = @buffer[chars .. -1] || ""
-      @buffer.lstrip! if with_strip
-      val
-    end
-    ################################################################################
-    # return the internal buffer used by this class when reading from the IO stream.
-    def raw
-      @buffer
-    end
-    ################################################################################
-    # The Xref table in a PDF file acts as an aid for finding the location of various
-    # objects in the file. This method attempts to locate the byte offset of the xref
-    # table in the underlying IO stream.
+    # return the byte offset where the first XRef table in th source can be found.
+    #
     def find_first_xref_offset
-      @io.seek(-1024, IO::SEEK_END) rescue seek(0)
+      @io.seek(-1024, IO::SEEK_END) rescue @io.seek(0)
       data = @io.read(1024)
       # the PDF 1.7 spec (section #3.4) says that EOL markers can be either \r, \n, or both.
@@ -179,8 +159,159 @@ class PDF::Reader
       raise MalformedPDFError, "PDF EOF marker does not follow offset" if eof_index >= lines.size-1
       lines[eof_index+1].to_i
     end
-    ################################################################################
+    private
+    # Some bastard moved our IO stream cursor. Restore it.
+    #
+    def reset_pos
+      @io.seek(@pos) if @io.pos != @pos
+    end
+    # save the current position of the source IO stream. If someone else (like another buffer)
+    # moves the cursor, we can then restore it.
+    #
+    def save_pos
+      @pos = @io.pos
+    end
+    # attempt to prime the buffer with the next few tokens.
+    #
+    def prepare_tokens
+      10.times do
+        if state == :literal_string
+          prepare_literal_token
+        elsif state == :regular
+          prepare_regular_token
+        end
+      end
+      save_pos
+    end
+    # tokenising behaves slightly differently based on the current context.
+    # Determine the current context/state by examining the last token we found
+    #
+    def state
+      if @tokens[-1] == "("
+        :literal_string
+      elsif @tokens[-1] == "stream"
+        :stream
+      else
+        :regular
+      end
+    end
+    # detect a series of 3 tokens that make up an indirect object. If we find
+    # them, replace the tokens with a PDF::Reader::Reference instance.
+    #
+    # Merging them into a single string was another option, but that would mean
+    # code further up the stact would need to check every token  to see if it looks
+    # like an indirect object. For optimisation reasons, I'd rather avoid
+    # that extra check.
+    #
+    def merge_indirect_reference
+      return if @tokens.size < 3
+      return if @tokens[2] != "R"
+      if @tokens[0].match(/\d+/) && @tokens[1].match(/\d+/)
+        @tokens[0] = PDF::Reader::Reference.new(@tokens[0].to_i, @tokens[1].to_i)
+        @tokens[1] = nil
+        @tokens[2] = nil
+        @tokens.compact!
+      end
+    end
+    # merge any consequtive tokens that are actually 1 token. The only current
+    # time this is the case is << and >>. < and > are valid tokens (they indicate
+    # a hex string) but so are << and >> (they indicate a dictionary).
+    #
+    def merge_tokens
+      @tokens.each_with_index do |tok, idx|
+        if tok == "<" && @tokens[idx+1] == "<"
+          @tokens.inspect
+          @tokens[idx] = "<<"
+          @tokens[idx+1] = nil
+        elsif tok == ">" && @tokens[idx+1] == ">"
+          @tokens[idx] = ">>"
+          @tokens[idx+1] = nil
+        end
+      end
+      @tokens.compact!
+    end
+    # if we're currently inside a literal string we more or less just read bytes until
+    # we find the closes ) delimiter. Lots of bytes that would otherwise indicate the
+    # start of a new token in regular mode are left untouched when inside a literal
+    # string.
+    #
+    # The entire literal string will be returned as a single token. It will need further
+    # processing to fix things like escaped new lines, but that's someone else's
+    # problem.
+    #
+    def prepare_literal_token
+      str = ""
+      count = 1
+      while count > 0
+        chr = @io.read(1)
+        if chr.nil?
+          count = 0 # unbalanced params
+        elsif chr == "(" && str[-1,1] != "\x5C"
+          str << "("
+          count += 1
+        elsif chr == ")" && str[-1,1] != "\x5C"
+          count -= 1
+          str << ")" unless count == 0
+        else
+          str << chr unless count == 0
+        end
+      end
+      @tokens << str if str.size > 0
+      @tokens << ")"
+    end
+    # Extract the next regular token and stock it in our buffer, ready to be returned.
+    #
+    # What each byte means is complex, check out section "3.1.1 Character Set" of the 1.7 spec
+    # to read up on it.
+    #
+    def prepare_regular_token
+      tok = ""
+      while chr = @io.read(1)
+        case chr
+        when "\x25"
+          # comment, ignore everything until the next EOL char
+          done = false
+          while !done
+            chr = @io.read(1)
+            done = true if chr.nil? || chr == "\x0A" || chr == "\x0D"
+          end
+        when "\x00", "\x09", "\x0A", "\x0C", "\x0D", "\x20"
+          # white space, token finished
+          @tokens << tok if tok.size > 0
+          tok = ""
+          break
+        when "\x28", "\x3C", "\x5B", "\x7B", "\x2F"
+          # opening delimiter, start of new token
+          @tokens << tok if tok.size > 0
+          @tokens << chr
+          tok = ""
+          break
+        when "\x29", "\x3E", "\x5D", "\x7D"
+          # closing delimiter
+          @tokens << tok if tok.size > 0
+          @tokens << chr
+          tok = ""
+          break
+        else
+          tok << chr
+        end
+      end
+      @tokens << tok if tok.size > 0
+    end
   end
-  ################################################################################
 end
-################################################################################

data/lib/pdf/reader/content.rb CHANGED

@@ -323,7 +323,7 @@ class PDF::Reader
     # like a regular page content stream.
     #
     def walk_xobject_form(label)
-      xobjects = current_resources[:XObject] || {}
+      xobjects = @xref.object(current_resources[:XObject]) || {}
       xobject  = @xref.object(xobjects[label])
       if xobject && xobject.hash[:Subtype] == :Form

data/lib/pdf/reader/parser.rb CHANGED

@@ -43,10 +43,10 @@ class PDF::Reader
     #
     # operators - a hash of supported operators to read from the underlying buffer.
     def parse_token (operators={})
-      ref = Reference.from_buffer(@buffer) and return ref
       token = @buffer.token
       case token
+      when PDF::Reader::Reference     then return token
       when nil                        then return nil
       when "/"                        then return @buffer.token.to_sym
       when "<<"                       then return dictionary()
@@ -58,7 +58,7 @@ class PDF::Reader
       when "null"                     then return nil
       when "obj", "endobj"            then return Token.new(token)
       when "stream", "endstream"      then return Token.new(token)
-      when ">>", "]", ">"             then return Token.new(token)
+      when ">>", "]", ">", ")"        then return Token.new(token)
       else
         if operators.has_key?(token)  then return Token.new(token)
         elsif token =~ /\d*\.\d/      then return token.to_f
@@ -66,6 +66,29 @@ class PDF::Reader
         end
       end
     end
+    ################################################################################
+    # Reads an entire PDF object from the buffer and returns it as a Ruby String.
+    # If the object is a content stream, returns both the stream and the dictionary
+    # that describes it
+    #
+    # id  - the object ID to return
+    # gen - the object revision number to return
+    def object (id, gen)
+      Error.assert_equal(parse_token, id)
+      Error.assert_equal(parse_token, gen)
+      Error.str_assert(parse_token, "obj")
+      obj = parse_token
+      post_obj = parse_token
+      case post_obj
+      when "endobj"   then return obj
+      when "stream"   then return stream(obj)
+      else            raise MalformedPDFError, "PDF malformed, unexpected token #{post_obj}"
+      end
+    end
+    private
     ################################################################################
     # reads a PDF dict from the buffer and converts it to a Ruby Hash.
     def dictionary
@@ -114,95 +137,34 @@ class PDF::Reader
     ################################################################################
     # Reads a PDF String from the buffer and converts it to a Ruby String
     def string
-      str = ""
-      count = 1
-      while count != 0
-        @buffer.ready_token(false, false)
-        # find the first occurance of ( ) [ \ or ]
-        #
-        # I originally just used the regexp form of index(), but it seems to be
-        # buggy on some OSX systems (returns nil when there is a match). This
-        # version is more reliable and was suggested by Andrès Koetsier.
-        #
-        i = nil
-        @buffer.raw.unpack("C*").each_with_index do |charint, idx|
-          if [40, 41, 92].include?(charint)
-            i = idx
-            break
-          end
-        end
-        if i.nil?
-          str << @buffer.raw + "\n"
-          @buffer.raw.replace("")
-          # if a content stream opens a string, but never closes it, we'll
-          # hit the end of the stream and still be appending stuff to the
-          # string. bad! This check prevents a hard loop.
-          raise MalformedPDFError, 'unterminated string in content stream' if @buffer.eof?
-          next
-        end
+      str = @buffer.token
+      return "" if str == ")"
+      Error.assert_equal(parse_token, ")")
+      str.gsub!("\\n","\n")
+      str.gsub!("\\r","\r")
+      str.gsub!("\\t","\t")
+      str.gsub!("\\b","\b")
+      str.gsub!("\\f","\f")
+      str.gsub!("\\(","(")
+      str.gsub!("\\)",")")
+      str.gsub!("\\\\","\\")
+      str.gsub!(/\\\n/m,"")
+      str.gsub!(/(\n\r|\r\n|\r)/m,"\n")
+      str.scan(/\\\d{1,3}/).each do |octal|
+        str.gsub!(octal, octal[1,3].oct.chr)
+      end
-        str << @buffer.head(i, false)
-        to_remove = 1
-        case @buffer.raw[0, 1]
-        when "("
-          str << "("
-          count += 1
-        when ")"
-          count -= 1
-          str << ")" unless count == 0
-        when "\\"
-          to_remove = 2
-          case @buffer.raw[1, 1]
-          when ""   then to_remove = 1
-          when "n"  then str << "\n"
-          when "r"  then str << "\r"
-          when "t"  then str << "\t"
-          when "b"  then str << "\b"
-          when "f"  then str << "\f"
-          when "("  then str << "("
-          when ")"  then str << ")"
-          when "\\" then str << "\\"
-          else
-            if m = @buffer.raw.match(/^\\(\d{1,3})/)
-              to_remove = m[0].size
-              str << m[1].oct.chr
-            end
-          end
-        end
+      str.gsub!(/\\([^\\])/,'\1')
-        @buffer.head(to_remove, false)
-      end
       str
     end
     ################################################################################
-    # Reads an entire PDF object from the buffer and returns it as a Ruby String.
-    # If the object is a content stream, returns both the stream and the dictionary
-    # that describes it
-    #
-    # id  - the object ID to return
-    # gen - the object revision number to return
-    def object (id, gen)
-      Error.assert_equal(parse_token, id)
-      Error.assert_equal(parse_token, gen)
-      Error.str_assert(parse_token, "obj")
-      obj = parse_token
-      post_obj = parse_token
-      case post_obj
-      when "endobj"   then return obj
-      when "stream"   then return stream(obj)
-      else            raise MalformedPDFError, "PDF malformed, unexpected token #{post_obj}"
-      end
-    end
-    ################################################################################
     # Decodes the contents of a PDF Stream and returns it as a Ruby String.
     def stream (dict)
       raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
-      data = @buffer.read(@xref.object(dict[:Length]))
+      data = @buffer.read(@xref.object(dict[:Length]), :skip_eol => true)
       Error.str_assert(parse_token, "endstream")
       Error.str_assert(parse_token, "endobj")

data/lib/pdf/reader/reference.rb CHANGED

@@ -27,16 +27,6 @@ class PDF::Reader
   ################################################################################
   # An internal PDF::Reader class that represents an indirect reference to a PDF Object
   class Reference
-    ################################################################################
-    # check if the next token in the buffer is a reference, and return a PDF::Reader::Reference
-    # instance. Returns nil if the next token isn't an indirect reference.
-    def self.from_buffer (buffer)
-      buffer.ready_token
-      return nil unless m = buffer.raw.match(/^(\d+)\s+(\d+)\s+R\b/)
-      buffer.head(m[0].size)
-      self.new(m[1].to_i, m[2].to_i)
-    end
-    ################################################################################
     attr_reader :id, :gen
     ################################################################################
     # Create a new Reference to an object with the specified id and revision number

data/lib/pdf/reader/xref.rb CHANGED

@@ -32,8 +32,8 @@ class PDF::Reader
   class XRef
     ################################################################################
     # create a new Xref table based on the contents of the supplied PDF::Reader::Buffer object
-    def initialize (buffer)
-      @buffer = buffer
+    def initialize (io)
+      @io = io
       @xref = {}
     end
     def size
@@ -44,8 +44,8 @@ class PDF::Reader
     # table, but it is one of the lowest level data items in the file, so we've lumped it in
     # with the cross reference code.
     def pdf_version
-      @buffer.seek(0)
-      m, version = *@buffer.read(8).match(/%PDF-(\d.\d)/)
+      @io.seek(0)
+      m, version = *@io.read(8).match(/%PDF-(\d.\d)/)
       raise MalformedPDFError, 'invalid PDF version' if version.nil?
       return version.to_f
     end
@@ -55,13 +55,14 @@ class PDF::Reader
     #
     # Will fail silently if there is no xref table at the requested offset.
     def load (offset = nil)
-      offset ||= @buffer.find_first_xref_offset
-      @buffer.seek(offset)
-      token = @buffer.token
+      offset ||= new_buffer.find_first_xref_offset
+      buf = new_buffer(offset)
+      token = buf.token
       if token == "xref" || token == "ref"
-        load_xref_table
-      elsif token.to_i >= 0 && @buffer.token.to_i >= 0 && @buffer.token == "obj"
+        load_xref_table(buf)
+      elsif token.to_i >= 0 && buf.token.to_i >= 0 && buf.token == "obj"
         raise PDF::Reader::UnsupportedFeatureError, "XRef streams are not supported in PDF::Reader yet"
       else
         raise PDF::Reader::MalformedPDFError, "xref table not found at offset #{offset} (#{token} != xref)"
@@ -73,52 +74,12 @@ class PDF::Reader
     # number
     #
     # If the object is a stream, that is returned as well
-    def object (ref, save_pos = true)
+    def object (ref)
       return ref unless ref.kind_of?(Reference)
-      pos = @buffer.pos_without_buf if save_pos
-      obj = Parser.new(@buffer.seek(offset_for(ref)), self).object(ref.id, ref.gen)
-      @buffer.seek(pos) if save_pos
+      buf = new_buffer(offset_for(ref))
+      obj = Parser.new(buf, self).object(ref.id, ref.gen)
       return obj
     end
-    ################################################################################
-    # Assumes the underlying buffer is positioned at the start of an Xref table and
-    # processes it into memory.
-    def load_xref_table
-      tok_one = tok_two = nil
-      begin
-        # loop over all subsections of the xref table
-        # In a well formed PDF, the 'trailer' token will indicate
-        # the end of the table. However we need to be careful in case
-        # we're processing a malformed pdf that is missing the trailer.
-        loop do
-          tok_one, tok_two = @buffer.token, @buffer.token
-          if tok_one != "trailer" && !tok_one.match(/\d+/)
-            raise MalformedPDFError, "PDF malformed, missing trailer after cross reference"
-          end
-          break if tok_one == "trailer" or tok_one.nil?
-          objid, count = tok_one.to_i, tok_two.to_i
-          count.times do
-            offset = @buffer.token.to_i
-            generation = @buffer.token.to_i
-            state = @buffer.token
-            store(objid, generation, offset) if state == "n"
-            objid += 1
-          end
-        end
-      rescue EOFError => e
-        raise MalformedPDFError, "PDF malformed, missing trailer after cross reference"
-      end
-      raise MalformedPDFError, "PDF malformed, trailer should be a dictionary" unless tok_two == "<<"
-      trailer = Parser.new(@buffer, self).dictionary
-      load(trailer[:Prev].to_i) if trailer.has_key?(:Prev)
-      trailer
-    end
     # returns the type of object a ref points to
     def obj_type(ref)
       obj = object(ref)
@@ -154,6 +115,41 @@ class PDF::Reader
       (@xref[id] ||= {})[gen] ||= offset
     end
     ################################################################################
+    private
+    ################################################################################
+    # Assumes the underlying buffer is positioned at the start of an Xref table and
+    # processes it into memory.
+    def load_xref_table(buf)
+      params = []
+      while !params.include?("trailer") && !params.include?(nil)
+        if params.size == 2
+          objid, count = params[0].to_i, params[1].to_i
+          count.times do
+            offset = buf.token.to_i
+            generation = buf.token.to_i
+            state = buf.token
+            store(objid, generation, offset) if state == "n"
+            objid += 1
+            params.clear
+          end
+        end
+        params << buf.token
+      end
+      trailer = Parser.new(buf, self).parse_token
+      raise MalformedPDFError, "PDF malformed, trailer should be a dictionary" unless trailer.kind_of?(Hash)
+      load(trailer[:Prev].to_i) if trailer.has_key?(:Prev)
+      trailer
+    end
+    def new_buffer(offset = 0)
+      PDF::Reader::Buffer.new(@io, :seek => offset)
+    end
   end
   ################################################################################
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: pdf-reader
 version: !ruby/object:Gem::Version
-  version: 0.8.1
+  version: 0.8.2
 platform: ruby
 authors:
 - Peter Jones
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-11-27 00:00:00 +11:00
+date: 2010-01-01 00:00:00 +11:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency