RubyGems - pdf-reader - Versions diffs - 0.8.1 → 0.8.2 - Mend

pdf-reader 0.8.1 → 0.8.2

Files changed (10) hide show

data/CHANGELOG CHANGED

@@ -1,3 +1,9 @@
+v0.8.2 (1st January 2010)
+- Fix parsing of files that use Form XObjects behind an indirect reference
+  (thanks Cornelius Illi and Patrick Crosby)
+- Rewrote Buffer class to fix various speed issues reported over the years
+  - On my sample file extracting full text reduced from 220 seconds to 9 seconds.
 v0.8.1 (27th November 2009)
 - Added PDF::Hash#version. Provides access to the source file PDF version

data/Rakefile CHANGED

@@ -6,7 +6,7 @@ require 'rake/testtask'
 require "rake/gempackagetask"
 require 'spec/rake/spectask'
-PKG_VERSION = "0.8.1"
+PKG_VERSION = "0.8.2"
 PKG_NAME = "pdf-reader"
 PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"

data/lib/pdf/hash.rb CHANGED

@@ -46,8 +46,7 @@ module PDF
         raise ArgumentError, "input must be an IO-like object or a filename"
       end
       @version = read_version(io)
-      buffer = PDF::Reader::Buffer.new(io)
-      @xref  = PDF::Reader::XRef.new(buffer)
+      @xref  = PDF::Reader::XRef.new(io)
       @trailer = @xref.load
     end

data/lib/pdf/reader.rb CHANGED

@@ -122,9 +122,7 @@ class PDF::Reader
   ################################################################################
   # Given an IO object that contains PDF data, parse it.
   def parse (io, receiver, opts = {})
-    @buffer   = Buffer.new(io)
-    @xref     = XRef.new(@buffer)
-    @parser   = Parser.new(@buffer, @xref)
+    @xref     = XRef.new(io)
     @content  = (receiver == Explore ? Explore : Content).new(receiver, @xref)
     options = {:pages => true, :metadata => true}

data/lib/pdf/reader/buffer.rb CHANGED

@@ -1,6 +1,8 @@
+# coding: utf-8
 ################################################################################
 #
-# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
+# Copyright (C) 2010 James Healy (jimmy@deefa.com)
 #
 # Permission is hereby granted, free of charge, to any person obtaining
 # a copy of this software and associated documentation files (the
@@ -24,140 +26,118 @@
 ################################################################################
 class PDF::Reader
-  ################################################################################
-  # An internal PDF::Reader class that mediates access to the underlying PDF File or IO Stream
+  # A string tokeniser that recognises PDF grammer. When passed an IO stream or a
+  # string, repeated calls to token() will return the next token from the source.
+  #
+  # This is very low level, and getting the raw tokens is not very useful in itself.
+  #
+  # This will usually be used in conjunction with PDF:Reader::Parser, which converts
+  # the raw tokens into objects we can work with (strings, ints, arrays, etc)
+  #
   class Buffer
-    ################################################################################
-    # Creates a new buffer around the specified IO object
-    def initialize (io)
+    attr_reader :pos
+    # Creates a new buffer.
+    #
+    # Params:
+    #
+    #   io - an IO stream or string with the raw data to tokenise
+    #
+    # options:
+    #
+    #   :seek - a byte offset to seek to before starting to tokenise
+    #
+    def initialize (io, opts = {})
       @io = io
-      @buffer = nil
+      @tokens = []
+      @options = opts
+      @io.seek(opts[:seek]) if opts[:seek]
+      @pos = @io.pos
     end
-    ################################################################################
-    # Seek to the requested byte in the IO stream.
-    def seek (offset)
-      @io.seek(offset, IO::SEEK_SET)
-      @buffer = nil
-      self
+    # return true if there are no more tokens left
+    #
+    def empty?
+      prepare_tokens if @tokens.size < 3
+      @tokens.empty?
     end
-    ################################################################################
-    # reads the requested number of bytes from the underlying IO stream.
+    # return raw bytes from the underlying IO stream.
     #
-    # length should be a positive integer.
-    def read (length)
-      out = ""
+    #   bytes - the number of bytes to read
+    #
+    # options:
+    #
+    #   :skip_eol - if true, the IO stream is advanced past any LF or CR
+    #               bytes before it reads any data. This is to handle
+    #               content streams, which have a CRLF or LF after the stream
+    #               token.
+    #
+    def read(bytes, opts = {})
+      reset_pos
-      if @buffer and !@buffer.empty?
-        out << head(length)
-        length -= out.length
+      if opts[:skip_eol]
+        done = false
+        while !done
+          chr = @io.read(1)
+          if chr.nil?
+            return nil
+          elsif chr != "\n" && chr != "\r"
+            @io.seek(-1, IO::SEEK_CUR)
+            done = true
+          end
+        end
       end
-      out << @io.read(length) if length > 0
-      out
+      bytes = @io.read(bytes)
+      save_pos
+      bytes
     end
-    ################################################################################
-    # Reads from the buffer until the specified token is found, or the end of the buffer
+    # return raw bytes from the underlying IO stream. All bytes up to the first
+    # occurance of needle will be returned. The match (if any) is not returned.
+    # The IO stream cursor is left on the first byte of the match.
+    #
+    #   needle - a string to search the IO stream for
     #
-    # bytes - the bytes to search for.
-    def read_until(bytes)
+    def read_until(needle)
+      reset_pos
       out = ""
-      size = bytes.size
+      size = needle.size
-      if @buffer && !@buffer.empty?
-        if @buffer.include?(bytes)
-          offset = @buffer.index(bytes) + size
-          return head(offset)
-        else
-          out << head(@buffer.size)
-        end
+      while out[size * -1, size] != needle && !@io.eof?
+        out << @io.read(1)
       end
-      loop do
-        out << @io.read(1)
-        if out[-1 * size,size].eql?(bytes)
-          out = out[0, out.size - size]
-          seek(pos - size)
-          break
-        end
+      if out[size * -1, size] == needle
+        out = out[0, out.size - size]
+        @io.seek(size * -1, IO::SEEK_CUR)
       end
+      save_pos
       out
     end
-    ################################################################################
-    # returns true if the underlying IO object is at end and the internal buffer
-    # is empty
-    def eof?
-      ready_token
-      if @buffer
-        @buffer.empty? && @io.eof?
-      else
-        @io.eof?
-      end
-    end
-    ################################################################################
-    def pos
-      @io.pos
-    end
-    ################################################################################
-    def pos_without_buf
-      @io.pos - @buffer.to_s.size
-    end
-    ################################################################################
-    # PDF files are processed by tokenising the content into a series of objects and commands.
-    # This prepares the buffer for use by reading the next line of tokens into memory.
-    def ready_token (with_strip=true, skip_blanks=true)
-      while (@buffer.nil? or @buffer.empty?) && !@io.eof?
-        @buffer = @io.readline
-        @buffer.force_encoding("BINARY") if @buffer.respond_to?(:force_encoding)
-        #@buffer.sub!(/%.*$/, '') if strip_comments
-        @buffer.chomp!
-        break unless skip_blanks
-      end
-      @buffer.lstrip! if with_strip
-    end
-    ################################################################################
-    # return the next token from the underlying IO stream
-    def token
-      ready_token
-      i = @buffer.index(/[\[\]()<>{}\s\/]/) || @buffer.size
-      token_chars =
-        if i == 0 and @buffer[i,2] == "<<"    then 2
-        elsif i == 0 and @buffer[i,2] == ">>" then 2
-        elsif i == 0                          then 1
-        else                                    i
-        end
-      strip_space = !(i == 0 and @buffer[0,1] == '(')
-      tok = head(token_chars, strip_space)
+    # return the next token from the source. Returns a string if a token
+    # is found, nil if there are no tokens left.
+    #
+    def token
+      reset_pos
+      prepare_tokens if @tokens.size < 3
+      merge_indirect_reference
+      merge_tokens
-      if tok == ""
-        nil
-      elsif tok[0,1] == "%"
-        @buffer = ""
-        token
-      else
-        tok
-      end
+      @tokens.shift
     end
-    ################################################################################
-    def head (chars, with_strip=true)
-      val = @buffer[0, chars]
-      @buffer = @buffer[chars .. -1] || ""
-      @buffer.lstrip! if with_strip
-      val
-    end
-    ################################################################################
-    # return the internal buffer used by this class when reading from the IO stream.
-    def raw
-      @buffer
-    end
-    ################################################################################
-    # The Xref table in a PDF file acts as an aid for finding the location of various
-    # objects in the file. This method attempts to locate the byte offset of the xref
-    # table in the underlying IO stream.
+    # return the byte offset where the first XRef table in th source can be found.
+    #
     def find_first_xref_offset
-      @io.seek(-1024, IO::SEEK_END) rescue seek(0)
+      @io.seek(-1024, IO::SEEK_END) rescue @io.seek(0)
       data = @io.read(1024)
       # the PDF 1.7 spec (section #3.4) says that EOL markers can be either \r, \n, or both.
@@ -179,8 +159,159 @@ class PDF::Reader
       raise MalformedPDFError, "PDF EOF marker does not follow offset" if eof_index >= lines.size-1
       lines[eof_index+1].to_i
     end
-    ################################################################################
+    private
+    # Some bastard moved our IO stream cursor. Restore it.
+    #
+    def reset_pos
+      @io.seek(@pos) if @io.pos != @pos
+    end
+    # save the current position of the source IO stream. If someone else (like another buffer)
+    # moves the cursor, we can then restore it.
+    #
+    def save_pos
+      @pos = @io.pos
+    end
+    # attempt to prime the buffer with the next few tokens.
+    #
+    def prepare_tokens
+      10.times do
+        if state == :literal_string
+          prepare_literal_token
+        elsif state == :regular
+          prepare_regular_token
+        end
+      end
+      save_pos
+    end
+    # tokenising behaves slightly differently based on the current context.
+    # Determine the current context/state by examining the last token we found
+    #
+    def state
+      if @tokens[-1] == "("
+        :literal_string
+      elsif @tokens[-1] == "stream"
+        :stream
+      else
+        :regular
+      end
+    end
+    # detect a series of 3 tokens that make up an indirect object. If we find
+    # them, replace the tokens with a PDF::Reader::Reference instance.
+    #
+    # Merging them into a single string was another option, but that would mean
+    # code further up the stact would need to check every token  to see if it looks
+    # like an indirect object. For optimisation reasons, I'd rather avoid
+    # that extra check.
+    #
+    def merge_indirect_reference
+      return if @tokens.size < 3
+      return if @tokens[2] != "R"
+      if @tokens[0].match(/\d+/) && @tokens[1].match(/\d+/)
+        @tokens[0] = PDF::Reader::Reference.new(@tokens[0].to_i, @tokens[1].to_i)
+        @tokens[1] = nil
+        @tokens[2] = nil
+        @tokens.compact!
+      end
+    end
+    # merge any consequtive tokens that are actually 1 token. The only current
+    # time this is the case is << and >>. < and > are valid tokens (they indicate
+    # a hex string) but so are << and >> (they indicate a dictionary).
+    #
+    def merge_tokens
+      @tokens.each_with_index do |tok, idx|
+        if tok == "<" && @tokens[idx+1] == "<"
+          @tokens.inspect
+          @tokens[idx] = "<<"
+          @tokens[idx+1] = nil
+        elsif tok == ">" && @tokens[idx+1] == ">"
+          @tokens[idx] = ">>"
+          @tokens[idx+1] = nil
+        end
+      end
+      @tokens.compact!
+    end
+    # if we're currently inside a literal string we more or less just read bytes until
+    # we find the closes ) delimiter. Lots of bytes that would otherwise indicate the
+    # start of a new token in regular mode are left untouched when inside a literal
+    # string.
+    #
+    # The entire literal string will be returned as a single token. It will need further
+    # processing to fix things like escaped new lines, but that's someone else's
+    # problem.
+    #
+    def prepare_literal_token
+      str = ""
+      count = 1
+      while count > 0
+        chr = @io.read(1)
+        if chr.nil?
+          count = 0 # unbalanced params
+        elsif chr == "(" && str[-1,1] != "\x5C"
+          str << "("
+          count += 1
+        elsif chr == ")" && str[-1,1] != "\x5C"
+          count -= 1
+          str << ")" unless count == 0
+        else
+          str << chr unless count == 0
+        end
+      end
+      @tokens << str if str.size > 0
+      @tokens << ")"
+    end
+    # Extract the next regular token and stock it in our buffer, ready to be returned.
+    #
+    # What each byte means is complex, check out section "3.1.1 Character Set" of the 1.7 spec
+    # to read up on it.
+    #
+    def prepare_regular_token
+      tok = ""
+      while chr = @io.read(1)
+        case chr
+        when "\x25"
+          # comment, ignore everything until the next EOL char
+          done = false
+          while !done
+            chr = @io.read(1)
+            done = true if chr.nil? || chr == "\x0A" || chr == "\x0D"
+          end
+        when "\x00", "\x09", "\x0A", "\x0C", "\x0D", "\x20"
+          # white space, token finished
+          @tokens << tok if tok.size > 0
+          tok = ""
+          break
+        when "\x28", "\x3C", "\x5B", "\x7B", "\x2F"
+          # opening delimiter, start of new token
+          @tokens << tok if tok.size > 0
+          @tokens << chr
+          tok = ""
+          break
+        when "\x29", "\x3E", "\x5D", "\x7D"
+          # closing delimiter
+          @tokens << tok if tok.size > 0
+          @tokens << chr
+          tok = ""
+          break
+        else
+          tok << chr
+        end
+      end
+      @tokens << tok if tok.size > 0
+    end
   end
-  ################################################################################
 end
-################################################################################

data/lib/pdf/reader/content.rb CHANGED

@@ -323,7 +323,7 @@ class PDF::Reader
     # like a regular page content stream.
     #
     def walk_xobject_form(label)
-      xobjects = current_resources[:XObject] || {}
+      xobjects = @xref.object(current_resources[:XObject]) || {}
       xobject  = @xref.object(xobjects[label])
       if xobject && xobject.hash[:Subtype] == :Form

data/lib/pdf/reader/parser.rb CHANGED

@@ -43,10 +43,10 @@ class PDF::Reader
     #
     # operators - a hash of supported operators to read from the underlying buffer.
     def parse_token (operators={})
-      ref = Reference.from_buffer(@buffer) and return ref
       token = @buffer.token
       case token
+      when PDF::Reader::Reference     then return token
       when nil                        then return nil
       when "/"                        then return @buffer.token.to_sym
       when "<<"                       then return dictionary()
@@ -58,7 +58,7 @@ class PDF::Reader
       when "null"                     then return nil
       when "obj", "endobj"            then return Token.new(token)
       when "stream", "endstream"      then return Token.new(token)
-      when ">>", "]", ">"             then return Token.new(token)
+      when ">>", "]", ">", ")"        then return Token.new(token)
       else
         if operators.has_key?(token)  then return Token.new(token)
         elsif token =~ /\d*\.\d/      then return token.to_f
@@ -66,6 +66,29 @@ class PDF::Reader
         end
       end
     end
+    ################################################################################
+    # Reads an entire PDF object from the buffer and returns it as a Ruby String.
+    # If the object is a content stream, returns both the stream and the dictionary
+    # that describes it
+    #
+    # id  - the object ID to return
+    # gen - the object revision number to return
+    def object (id, gen)
+      Error.assert_equal(parse_token, id)
+      Error.assert_equal(parse_token, gen)
+      Error.str_assert(parse_token, "obj")
+      obj = parse_token
+      post_obj = parse_token
+      case post_obj
+      when "endobj"   then return obj
+      when "stream"   then return stream(obj)
+      else            raise MalformedPDFError, "PDF malformed, unexpected token #{post_obj}"
+      end
+    end
+    private
     ################################################################################
     # reads a PDF dict from the buffer and converts it to a Ruby Hash.
     def dictionary
@@ -114,95 +137,34 @@ class PDF::Reader
     ################################################################################
     # Reads a PDF String from the buffer and converts it to a Ruby String
     def string
-      str = ""
-      count = 1
-      while count != 0
-        @buffer.ready_token(false, false)
-        # find the first occurance of ( ) [ \ or ]
-        #
-        # I originally just used the regexp form of index(), but it seems to be
-        # buggy on some OSX systems (returns nil when there is a match). This
-        # version is more reliable and was suggested by Andrès Koetsier.
-        #
-        i = nil
-        @buffer.raw.unpack("C*").each_with_index do |charint, idx|
-          if [40, 41, 92].include?(charint)
-            i = idx
-            break
-          end
-        end
-        if i.nil?
-          str << @buffer.raw + "\n"
-          @buffer.raw.replace("")
-          # if a content stream opens a string, but never closes it, we'll
-          # hit the end of the stream and still be appending stuff to the
-          # string. bad! This check prevents a hard loop.
-          raise MalformedPDFError, 'unterminated string in content stream' if @buffer.eof?
-          next
-        end
+      str = @buffer.token
+      return "" if str == ")"
+      Error.assert_equal(parse_token, ")")
+      str.gsub!("\\n","\n")
+      str.gsub!("\\r","\r")
+      str.gsub!("\\t","\t")
+      str.gsub!("\\b","\b")
+      str.gsub!("\\f","\f")
+      str.gsub!("\\(","(")
+      str.gsub!("\\)",")")
+      str.gsub!("\\\\","\\")
+      str.gsub!(/\\\n/m,"")
+      str.gsub!(/(\n\r|\r\n|\r)/m,"\n")
+      str.scan(/\\\d{1,3}/).each do |octal|
+        str.gsub!(octal, octal[1,3].oct.chr)
+      end
-        str << @buffer.head(i, false)
-        to_remove = 1
-        case @buffer.raw[0, 1]
-        when "("
-          str << "("
-          count += 1
-        when ")"
-          count -= 1
-          str << ")" unless count == 0
-        when "\\"
-          to_remove = 2
-          case @buffer.raw[1, 1]
-          when ""   then to_remove = 1
-          when "n"  then str << "\n"
-          when "r"  then str << "\r"
-          when "t"  then str << "\t"
-          when "b"  then str << "\b"
-          when "f"  then str << "\f"
-          when "("  then str << "("
-          when ")"  then str << ")"
-          when "\\" then str << "\\"
-          else
-            if m = @buffer.raw.match(/^\\(\d{1,3})/)
-              to_remove = m[0].size
-              str << m[1].oct.chr
-            end
-          end
-        end
+      str.gsub!(/\\([^\\])/,'\1')
-        @buffer.head(to_remove, false)
-      end
       str
     end
     ################################################################################
-    # Reads an entire PDF object from the buffer and returns it as a Ruby String.
-    # If the object is a content stream, returns both the stream and the dictionary
-    # that describes it
-    #
-    # id  - the object ID to return
-    # gen - the object revision number to return
-    def object (id, gen)
-      Error.assert_equal(parse_token, id)
-      Error.assert_equal(parse_token, gen)
-      Error.str_assert(parse_token, "obj")
-      obj = parse_token
-      post_obj = parse_token
-      case post_obj
-      when "endobj"   then return obj
-      when "stream"   then return stream(obj)
-      else            raise MalformedPDFError, "PDF malformed, unexpected token #{post_obj}"
-      end
-    end
-    ################################################################################
     # Decodes the contents of a PDF Stream and returns it as a Ruby String.
     def stream (dict)
       raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
-      data = @buffer.read(@xref.object(dict[:Length]))
+      data = @buffer.read(@xref.object(dict[:Length]), :skip_eol => true)
       Error.str_assert(parse_token, "endstream")
       Error.str_assert(parse_token, "endobj")

data/lib/pdf/reader/reference.rb CHANGED

@@ -27,16 +27,6 @@ class PDF::Reader
   ################################################################################
   # An internal PDF::Reader class that represents an indirect reference to a PDF Object
   class Reference
-    ################################################################################
-    # check if the next token in the buffer is a reference, and return a PDF::Reader::Reference
-    # instance. Returns nil if the next token isn't an indirect reference.
-    def self.from_buffer (buffer)
-      buffer.ready_token
-      return nil unless m = buffer.raw.match(/^(\d+)\s+(\d+)\s+R\b/)
-      buffer.head(m[0].size)
-      self.new(m[1].to_i, m[2].to_i)
-    end
-    ################################################################################
     attr_reader :id, :gen
     ################################################################################
     # Create a new Reference to an object with the specified id and revision number

data/lib/pdf/reader/xref.rb CHANGED

@@ -32,8 +32,8 @@ class PDF::Reader
   class XRef
     ################################################################################
     # create a new Xref table based on the contents of the supplied PDF::Reader::Buffer object
-    def initialize (buffer)
-      @buffer = buffer
+    def initialize (io)
+      @io = io
       @xref = {}
     end
     def size
@@ -44,8 +44,8 @@ class PDF::Reader
     # table, but it is one of the lowest level data items in the file, so we've lumped it in
     # with the cross reference code.
     def pdf_version
-      @buffer.seek(0)
-      m, version = *@buffer.read(8).match(/%PDF-(\d.\d)/)
+      @io.seek(0)
+      m, version = *@io.read(8).match(/%PDF-(\d.\d)/)
       raise MalformedPDFError, 'invalid PDF version' if version.nil?
       return version.to_f
     end
@@ -55,13 +55,14 @@ class PDF::Reader
     #
     # Will fail silently if there is no xref table at the requested offset.
     def load (offset = nil)
-      offset ||= @buffer.find_first_xref_offset
-      @buffer.seek(offset)
-      token = @buffer.token
+      offset ||= new_buffer.find_first_xref_offset
+      buf = new_buffer(offset)
+      token = buf.token
       if token == "xref" || token == "ref"
-        load_xref_table
-      elsif token.to_i >= 0 && @buffer.token.to_i >= 0 && @buffer.token == "obj"
+        load_xref_table(buf)
+      elsif token.to_i >= 0 && buf.token.to_i >= 0 && buf.token == "obj"
         raise PDF::Reader::UnsupportedFeatureError, "XRef streams are not supported in PDF::Reader yet"
       else
         raise PDF::Reader::MalformedPDFError, "xref table not found at offset #{offset} (#{token} != xref)"
@@ -73,52 +74,12 @@ class PDF::Reader
     # number
     #
     # If the object is a stream, that is returned as well
-    def object (ref, save_pos = true)
+    def object (ref)
       return ref unless ref.kind_of?(Reference)
-      pos = @buffer.pos_without_buf if save_pos
-      obj = Parser.new(@buffer.seek(offset_for(ref)), self).object(ref.id, ref.gen)
-      @buffer.seek(pos) if save_pos
+      buf = new_buffer(offset_for(ref))
+      obj = Parser.new(buf, self).object(ref.id, ref.gen)
       return obj
     end
-    ################################################################################
-    # Assumes the underlying buffer is positioned at the start of an Xref table and
-    # processes it into memory.
-    def load_xref_table
-      tok_one = tok_two = nil
-      begin
-        # loop over all subsections of the xref table
-        # In a well formed PDF, the 'trailer' token will indicate
-        # the end of the table. However we need to be careful in case
-        # we're processing a malformed pdf that is missing the trailer.
-        loop do
-          tok_one, tok_two = @buffer.token, @buffer.token
-          if tok_one != "trailer" && !tok_one.match(/\d+/)
-            raise MalformedPDFError, "PDF malformed, missing trailer after cross reference"
-          end
-          break if tok_one == "trailer" or tok_one.nil?
-          objid, count = tok_one.to_i, tok_two.to_i
-          count.times do
-            offset = @buffer.token.to_i
-            generation = @buffer.token.to_i
-            state = @buffer.token
-            store(objid, generation, offset) if state == "n"
-            objid += 1
-          end
-        end
-      rescue EOFError => e
-        raise MalformedPDFError, "PDF malformed, missing trailer after cross reference"
-      end
-      raise MalformedPDFError, "PDF malformed, trailer should be a dictionary" unless tok_two == "<<"
-      trailer = Parser.new(@buffer, self).dictionary
-      load(trailer[:Prev].to_i) if trailer.has_key?(:Prev)
-      trailer
-    end
     # returns the type of object a ref points to
     def obj_type(ref)
       obj = object(ref)
@@ -154,6 +115,41 @@ class PDF::Reader
       (@xref[id] ||= {})[gen] ||= offset
     end
     ################################################################################
+    private
+    ################################################################################
+    # Assumes the underlying buffer is positioned at the start of an Xref table and
+    # processes it into memory.
+    def load_xref_table(buf)
+      params = []
+      while !params.include?("trailer") && !params.include?(nil)
+        if params.size == 2
+          objid, count = params[0].to_i, params[1].to_i
+          count.times do
+            offset = buf.token.to_i
+            generation = buf.token.to_i
+            state = buf.token
+            store(objid, generation, offset) if state == "n"
+            objid += 1
+            params.clear
+          end
+        end
+        params << buf.token
+      end
+      trailer = Parser.new(buf, self).parse_token
+      raise MalformedPDFError, "PDF malformed, trailer should be a dictionary" unless trailer.kind_of?(Hash)
+      load(trailer[:Prev].to_i) if trailer.has_key?(:Prev)
+      trailer
+    end
+    def new_buffer(offset = 0)
+      PDF::Reader::Buffer.new(@io, :seek => offset)
+    end
   end
   ################################################################################
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: pdf-reader
 version: !ruby/object:Gem::Version
-  version: 0.8.1
+  version: 0.8.2
 platform: ruby
 authors:
 - Peter Jones
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-11-27 00:00:00 +11:00
+date: 2010-01-01 00:00:00 +11:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency