RubyGems - origami - Versions diffs - 1.2.7 → 2.0.0 - Mend

origami 1.2.7 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +66 -0
data/README.md +112 -0
data/bin/config/pdfcop.conf.yml +232 -233
data/bin/gui/about.rb +27 -37
data/bin/gui/config.rb +108 -117
data/bin/gui/file.rb +416 -365
data/bin/gui/gtkhex.rb +1138 -1153
data/bin/gui/hexview.rb +55 -57
data/bin/gui/imgview.rb +48 -51
data/bin/gui/menu.rb +388 -386
data/bin/gui/properties.rb +114 -130
data/bin/gui/signing.rb +571 -617
data/bin/gui/textview.rb +77 -95
data/bin/gui/treeview.rb +382 -387
data/bin/gui/walker.rb +227 -232
data/bin/gui/xrefs.rb +56 -60
data/bin/pdf2pdfa +53 -57
data/bin/pdf2ruby +212 -228
data/bin/pdfcop +338 -348
data/bin/pdfdecompress +58 -65
data/bin/pdfdecrypt +56 -60
data/bin/pdfencrypt +75 -80
data/bin/pdfexplode +185 -182
data/bin/pdfextract +201 -218
data/bin/pdfmetadata +83 -82
data/bin/pdfsh +4 -5
data/bin/pdfwalker +1 -2
data/bin/shell/.irbrc +45 -82
data/bin/shell/console.rb +105 -130
data/bin/shell/hexdump.rb +40 -64
data/examples/README.md +34 -0
data/examples/attachments/attachment.rb +38 -0
data/examples/attachments/nested_document.rb +51 -0
data/examples/encryption/encryption.rb +28 -0
data/{samples/actions/triggerevents/trigger.rb → examples/events/events.rb} +13 -16
data/examples/flash/flash.rb +37 -0
data/{samples → examples}/flash/helloworld.swf +0 -0
data/examples/forms/javascript.rb +54 -0
data/examples/forms/xfa.rb +115 -0
data/examples/javascript/hello_world.rb +22 -0
data/examples/javascript/js_emulation.rb +54 -0
data/examples/loop/goto.rb +32 -0
data/examples/loop/named.rb +33 -0
data/examples/signature/signature.rb +65 -0
data/examples/uri/javascript.rb +56 -0
data/examples/uri/open-uri.rb +21 -0
data/examples/uri/submitform.rb +47 -0
data/lib/origami.rb +29 -42
data/lib/origami/3d.rb +350 -225
data/lib/origami/acroform.rb +262 -288
data/lib/origami/actions.rb +268 -288
data/lib/origami/annotations.rb +697 -722
data/lib/origami/array.rb +258 -184
data/lib/origami/boolean.rb +74 -84
data/lib/origami/catalog.rb +397 -434
data/lib/origami/collections.rb +144 -0
data/lib/origami/destinations.rb +233 -194
data/lib/origami/dictionary.rb +253 -232
data/lib/origami/encryption.rb +1274 -1243
data/lib/origami/export.rb +232 -268
data/lib/origami/extensions/fdf.rb +307 -220
data/lib/origami/extensions/ppklite.rb +368 -435
data/lib/origami/filespec.rb +197 -0
data/lib/origami/filters.rb +301 -295
data/lib/origami/filters/ascii.rb +177 -180
data/lib/origami/filters/ccitt.rb +528 -535
data/lib/origami/filters/crypt.rb +26 -35
data/lib/origami/filters/dct.rb +46 -52
data/lib/origami/filters/flate.rb +95 -94
data/lib/origami/filters/jbig2.rb +49 -55
data/lib/origami/filters/jpx.rb +38 -44
data/lib/origami/filters/lzw.rb +189 -183
data/lib/origami/filters/predictors.rb +221 -235
data/lib/origami/filters/runlength.rb +103 -104
data/lib/origami/font.rb +173 -186
data/lib/origami/functions.rb +67 -81
data/lib/origami/graphics.rb +25 -21
data/lib/origami/graphics/colors.rb +178 -187
data/lib/origami/graphics/instruction.rb +79 -85
data/lib/origami/graphics/path.rb +142 -148
data/lib/origami/graphics/patterns.rb +160 -167
data/lib/origami/graphics/render.rb +43 -50
data/lib/origami/graphics/state.rb +138 -153
data/lib/origami/graphics/text.rb +188 -205
data/lib/origami/graphics/xobject.rb +819 -815
data/lib/origami/header.rb +63 -78
data/lib/origami/javascript.rb +596 -597
data/lib/origami/linearization.rb +285 -290
data/lib/origami/metadata.rb +139 -148
data/lib/origami/name.rb +112 -148
data/lib/origami/null.rb +53 -62
data/lib/origami/numeric.rb +162 -175
data/lib/origami/obfuscation.rb +186 -174
data/lib/origami/object.rb +593 -573
data/lib/origami/outline.rb +42 -47
data/lib/origami/outputintents.rb +73 -82
data/lib/origami/page.rb +703 -592
data/lib/origami/parser.rb +238 -290
data/lib/origami/parsers/fdf.rb +41 -33
data/lib/origami/parsers/pdf.rb +75 -95
data/lib/origami/parsers/pdf/lazy.rb +137 -0
data/lib/origami/parsers/pdf/linear.rb +64 -66
data/lib/origami/parsers/ppklite.rb +34 -70
data/lib/origami/pdf.rb +1030 -1005
data/lib/origami/reference.rb +102 -102
data/lib/origami/signature.rb +591 -609
data/lib/origami/stream.rb +668 -551
data/lib/origami/string.rb +397 -373
data/lib/origami/template/patterns.rb +56 -0
data/lib/origami/template/widgets.rb +151 -0
data/lib/origami/trailer.rb +144 -158
data/lib/origami/tree.rb +62 -0
data/lib/origami/version.rb +23 -0
data/lib/origami/webcapture.rb +88 -79
data/lib/origami/xfa.rb +2863 -2882
data/lib/origami/xreftable.rb +472 -384
data/test/dataset/calc.pdf +85 -0
data/test/dataset/crypto.pdf +82 -0
data/test/dataset/empty.pdf +49 -0
data/test/test_actions.rb +27 -0
data/test/test_annotations.rb +90 -0
data/test/test_pages.rb +31 -0
data/test/test_pdf.rb +16 -0
data/test/test_pdf_attachment.rb +34 -0
data/test/test_pdf_create.rb +24 -0
data/test/test_pdf_encrypt.rb +95 -0
data/test/test_pdf_parse.rb +96 -0
data/test/test_pdf_sign.rb +58 -0
data/test/test_streams.rb +182 -0
data/test/test_xrefs.rb +67 -0
metadata +88 -58
data/README +0 -67
data/bin/pdf2graph +0 -121
data/bin/pdfcocoon +0 -104
data/lib/origami/file.rb +0 -233
data/samples/README.txt +0 -45
data/samples/actions/launch/calc.rb +0 -87
data/samples/actions/launch/winparams.rb +0 -22
data/samples/actions/loop/loopgoto.rb +0 -24
data/samples/actions/loop/loopnamed.rb +0 -21
data/samples/actions/named/named.rb +0 -31
data/samples/actions/samba/smbrelay.rb +0 -26
data/samples/actions/webbug/submitform.js +0 -26
data/samples/actions/webbug/webbug-browser.rb +0 -68
data/samples/actions/webbug/webbug-js.rb +0 -67
data/samples/actions/webbug/webbug-reader.rb +0 -90
data/samples/attachments/attach.rb +0 -40
data/samples/attachments/attached.txt +0 -1
data/samples/crypto/crypto.rb +0 -28
data/samples/digsig/signed.rb +0 -46
data/samples/exploits/cve-2008-2992-utilprintf.rb +0 -87
data/samples/exploits/cve-2009-0927-geticon.rb +0 -65
data/samples/exploits/exploit_customdictopen.rb +0 -55
data/samples/exploits/getannots.rb +0 -69
data/samples/flash/flash.rb +0 -31
data/samples/javascript/attached.txt +0 -1
data/samples/javascript/js.rb +0 -52
data/templates/patterns.rb +0 -66
data/templates/widgets.rb +0 -173
data/templates/xdp.rb +0 -92
data/test/ts_pdf.rb +0 -50

data/lib/origami/parsers/ppklite.rb CHANGED

@@ -1,21 +1,20 @@
 =begin
-= File
-	parsers/ppklite.rb
+    This file is part of Origami, PDF manipulation framework for Ruby
+    Copyright (C) 2016	Guillaume Delugré.
-= Info
-	Origami is free software: you can redistribute it and/or modify
-  it under the terms of the GNU Lesser General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
+    Origami is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
-  Origami is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU Lesser General Public License for more details.
+    Origami is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
-  You should have received a copy of the GNU Lesser General Public License
-  along with Origami.  If not, see <http://www.gnu.org/licenses/>.
+    You should have received a copy of the GNU Lesser General Public License
+    along with Origami.  If not, see <http://www.gnu.org/licenses/>.
 =end
@@ -23,71 +22,36 @@ require 'origami/parser'
 module Origami
-  module Adobe
     class PPKLite
-      class Parser < Origami::Parser
-        def parse(stream) #:nodoc:
-          super
+        class Parser < Origami::Parser
+            def parse(stream) #:nodoc:
+                super
+                address_book = PPKLite.new(self)
+                address_book.header = PPKLite::Header.parse(@data)
+                @options[:callback].call(address_book.header)
-          addrbk = Adobe::PPKLite.new
-          addrbk.header = Adobe::PPKLite::Header.parse(stream)
-          @options[:callback].call(addrbk.header)
-          loop do
-            break if (object = parse_object).nil?
-            addrbk << object
-          end
+                loop do
+                    break if (object = parse_object).nil?
+                    address_book.insert(object)
+                end
-          addrbk.revisions.first.xreftable = parse_xreftable
-          addrbm.revisions.first.trailer = parse_trailer
-          book_specialize_entries(addrbk)
+                address_book.revisions.first.xreftable = parse_xreftable
+                address_book.revisions.first.trailer = parse_trailer
-          addrbk
-        end
-        def book_specialize_entries(addrbk) #:nodoc:
-          addrbk.revisions.first.body.each_pair do |ref, obj|
-            if obj.is_a?(Dictionary)
-              if obj[:Type] == :Catalog
-                o = Adobe::PPKLite::Catalog.new(obj)
-                o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
-                if o.PPK.is_a?(Dictionary) and o.PPK[:Type] == :PPK
-                  o.PPK = Adobe::PPKLite::PPK.new(o.PPK)
-                  if o.PPK.User.is_a?(Dictionary) and o.PPK.User[:Type] == :User
-                    o.PPK.User = Adobe::PPKLite::UserList.new(o.PPK.User)
-                  end
-                  if o.PPK.AddressBook.is_a?(Dictionary) and o.PPK.AddressBook[:Type] == :AddressBook
-                    o.PPK.AddressBook = Adobe::PPKLite::AddressList.new(o.PPK.AddressBook)
-                  end
+                if Origami::OPTIONS[:enable_type_propagation]
+                    trailer = address_book.revisions.first.trailer
+                    if trailer[:Root].is_a?(Reference)
+                        address_book.cast_object(trailer[:Root], PPKLite::Catalog, self)
+                    end
+                    propagate_types(address_book)
                 end
-                addrbk.revisions.first.body[ref] = o
-              elsif obj[:ABEType] == Adobe::PPKLite::Descriptor::USER
-                o = Adobe::PPKLite::User.new(obj)
-                o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
-                addrbk.revisions.first.body[ref] = o
-              elsif obj[:ABEType] == Adobe::PPKLite::Descriptor::CERTIFICATE
-                o = Adobe::PPKLite::Certificate.new(obj)
-                o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
-                addrbk.revisions.first.body[ref] = o
-              end
+                address_book
             end
-          end
         end
-      end
     end
-  end
 end

data/lib/origami/pdf.rb CHANGED

@@ -1,25 +1,20 @@
 =begin
-= File
-	pdf.rb
-= Info
-	This file is part of Origami, PDF manipulation framework for Ruby
-	Copyright (C) 2010	Guillaume Delugré <guillaume AT security-labs DOT org>
-	All right reserved.
-	Origami is free software: you can redistribute it and/or modify
-  it under the terms of the GNU Lesser General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-  Origami is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU Lesser General Public License for more details.
-  You should have received a copy of the GNU Lesser General Public License
-  along with Origami.  If not, see <http://www.gnu.org/licenses/>.
+    This file is part of Origami, PDF manipulation framework for Ruby
+    Copyright (C) 2016	Guillaume Delugré.
+    Origami is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    Origami is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+    You should have received a copy of the GNU Lesser General Public License
+    along with Origami.  If not, see <http://www.gnu.org/licenses/>.
 =end
@@ -33,1106 +28,1136 @@ require 'origami/numeric'
 require 'origami/string'
 require 'origami/array'
 require 'origami/stream'
+require 'origami/tree'
 require 'origami/filters'
-require 'origami/trailer'
-require 'origami/xreftable'
 require 'origami/header'
+require 'origami/metadata'
 require 'origami/functions'
-require 'origami/catalog'
-require 'origami/font'
 require 'origami/page'
+require 'origami/font'
 require 'origami/graphics'
 require 'origami/destinations'
-require 'origami/outline'
-require 'origami/actions'
-require 'origami/file'
+require 'origami/filespec'
+require 'origami/xfa'
 require 'origami/acroform'
 require 'origami/annotations'
+require 'origami/actions'
 require 'origami/3d'
 require 'origami/signature'
 require 'origami/webcapture'
-require 'origami/metadata'
 require 'origami/export'
 require 'origami/webcapture'
 require 'origami/encryption'
 require 'origami/linearization'
 require 'origami/obfuscation'
-require 'origami/xfa'
 require 'origami/javascript'
+require 'origami/outline'
 require 'origami/outputintents'
+require 'origami/collections'
+require 'origami/catalog'
+require 'origami/xreftable'
+require 'origami/trailer'
-require 'origami/parsers/pdf'
+require 'origami/parsers/pdf/linear'
+require 'origami/parsers/pdf/lazy'
 module Origami
-  class InvalidPDFError < Exception #:nodoc:
-  end
-  #
-  # Main class representing a PDF file and its inner contents.
-  # A PDF file contains a set of Revision.
-  #
-  class PDF
-    #
-    # Class representing a particular revision in a PDF file.
-    # Revision contains :
-    # * A Body, which is a sequence of Object.
-    # * A XRef::Section, holding XRef information about objects in body.
-    # * A Trailer.
-    #
-    class Revision
-      attr_accessor :pdf
-      attr_accessor :body, :xreftable, :xrefstm, :trailer
-      def initialize(pdf)
-        @pdf = pdf
-        @body = {}
-        @xreftable = nil
-        @xrefstm = nil
-        @trailer = nil
-      end
-      def trailer=(trl)
-        trl.pdf = @pdf
-        @trailer = trl
-      end
-      def has_xreftable?
-        not @xreftable.nil?
-      end
-      def has_xrefstm?
-        not @xrefstm.nil?
-      end
-      def objects
-        @body.values
-      end
+    class InvalidPDFError < Error #:nodoc:
     end
-    attr_accessor :header, :revisions
-    class << self
-      #
-      # Reads and parses a PDF file from disk.
-      #
-      def read(filename, options = {})
-        filename = File.expand_path(filename) if filename.is_a?(::String)
-        PDF::LinearParser.new(options).parse(filename)
-      end
-      #
-      # Creates a new PDF and saves it.
-      # If a block is passed, the PDF instance can be processed before saving.
-      #
-      def create(output, options = {})
-        pdf = PDF.new
-        yield(pdf) if block_given?
-        pdf.save(output, options)
-      end
-      alias write create
-      #
-      # Deserializes a PDF dump.
-      #
-      def deserialize(filename)
-        Zlib::GzipReader.open(filename) { |gz|
-          pdf = Marshal.load(gz.read)
-        }
-        pdf
-      end
-    end
-    #
-    # Creates a new PDF instance.
-    # _parser_:: The Parser object creating the document. If none is specified, some default structures are automatically created to get a minimal working document.
-    #
-    def initialize(parser = nil)
-      @header = PDF::Header.new
-      @revisions = []
-      add_new_revision
-      @revisions.first.trailer = Trailer.new
-      if parser
-        @parser = parser
-      else
-        init
-      end
-    end
     #
-    # Original file name if parsed from disk, nil otherwise.
+    # Main class representing a PDF file and its inner contents.
+    # A PDF file contains a set of Revision.
     #
-    def original_filename
-      @parser.target_filename if @parser
-    end
+    class PDF
-    #
-    # Original file size if parsed from a data stream, nil otherwise.
-    #
-    def original_filesize
-      @parser.target_filesize if @parser
-    end
+        #
+        # Class representing a particular revision in a PDF file.
+        # Revision contains :
+        # * A Body, which is a sequence of Object.
+        # * A XRef::Section, holding XRef information about objects in body.
+        # * A Trailer.
+        #
+        class Revision
+            attr_accessor :pdf
+            attr_accessor :body, :xreftable, :xrefstm
+            attr_reader :trailer
+            def initialize(doc)
+                @document = doc
+                @body = {}
+                @xreftable = nil
+                @xrefstm = nil
+                @trailer = nil
+            end
-    #
-    # Original data parsed to create this document, nil if created from scratch.
-    #
-    def original_data
-      @parser.target_data if @parser
-    end
-    #
-    # Serializes the current PDF.
-    #
-    def serialize(filename)
-      parser = @parser
-      @parser = nil # do not serialize the parser
-      Zlib::GzipWriter.open(filename) { |gz|
-        gz.write Marshal.dump(self)
-      }
-      @parser = parser
-      self
-    end
-    #
-    # Saves the current document.
-    # _filename_:: The path where to save this PDF.
-    #
-    def save(path, params = {})
-      options =
-      {
-        :delinearize => true,
-        :recompile => true,
-        :decrypt => false
-      }
-      options.update(params)
-      if self.frozen? # incompatible flags with frozen doc (signed)
-        options[:recompile] =
-        options[:rebuildxrefs] =
-        options[:noindent] =
-        options[:obfuscate] = false
-      end
-      if path.respond_to?(:write)
-        fd = path
-      else
-        path = File.expand_path(path)
-        fd = File.open(path, 'w').binmode
-      end
-      intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i
-      self.delinearize! if options[:delinearize] and self.is_linearized?
-      compile(options) if options[:recompile]
-      fd.write output(options)
-      fd.close
-      self
-    end
-    alias write save
-    #
-    # Saves the file up to given revision number.
-    # This can be useful to visualize the modifications over different incremental updates.
-    # _revision_:: The revision number to save.
-    # _filename_:: The path where to save this PDF.
-    #
-    def save_upto(revision, filename)
-      save(filename, :up_to_revision => revision)
-    end
+            def trailer=(trl)
+                trl.document = @document
-    #
-    # Returns an array of Objects whose content is matching _pattern_.
-    #
-#    def grep(*patterns)
-#
-#      patterns.map! do |pattern|
-#        pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
-#      end
-#
-#      unless patterns.all? { |pattern| pattern.is_a?(Regexp) }
-#        raise TypeError, "Expected a String or Regexp"
-#      end
-#
-#      result = []
-#      objects.each do |obj|
-#        begin
-#          case obj
-#            when String, Name
-#              result << obj if patterns.any?{|pattern| obj.value.to_s.match(pattern)}
-#            when Stream
-#              result << obj if patterns.any?{|pattern| obj.data.match(pattern)}
-#          end
-#        rescue Exception => e
-#          puts "[#{e.class}] #{e.message}"
-#
-#          next
-#        end
-#      end
-#
-#      result
-#    end
+                @trailer = trl
+            end
-    #
-    # Returns an array of strings and streams matching the given pattern.
-    #
-    def grep(*patterns) #:nodoc:
-      patterns.map! do |pattern|
-        if pattern.is_a?(::String)
-          Regexp.new(Regexp.escape(pattern), Regexp::IGNORECASE)
-        else
-          pattern
-        end
-      end
-      unless patterns.all? { |pattern| pattern.is_a?(Regexp) }
-        raise TypeError, "Expected a String or Regexp"
-      end
-      objset = []
-      self.indirect_objects.each do |indobj|
-        case indobj
-          when Stream then
-            objset.push indobj
-            objset.concat(indobj.dictionary.strings_cache)
-            objset.concat(indobj.dictionary.names_cache)
-          when Name,String then objset.push indobj
-          when Dictionary,Array then
-            objset.concat(indobj.strings_cache)
-            objset.concat(indobj.names_cache)
-        end
-      end
-      objset.delete_if do |obj|
-        begin
-          case obj
-            when String, Name
-              not patterns.any?{|pattern| obj.value.to_s.match(pattern)}
-            when Stream
-              not patterns.any?{|pattern| obj.data.match(pattern)}
-          end
-        rescue Exception => e
-          true
+            def has_xreftable?
+                not @xreftable.nil?
+            end
+            def has_xrefstm?
+                not @xrefstm.nil?
+            end
+            def each_object(&b)
+                @body.each_value(&b)
+            end
+            def objects
+                @body.values
+            end
         end
-      end
-    end
-    #
-    # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
-    #
-    def ls(*patterns)
-      return objects(:include_keys => false) if patterns.empty?
+        #
+        # Document header and revisions.
+        #
+        attr_accessor :header, :revisions
+        class << self
+            #
+            # Reads and parses a PDF file from disk.
+            #
+            def read(path, options = {})
+                path = File.expand_path(path) if path.is_a?(::String)
+                lazy = options[:lazy]
+                if lazy
+                    parser_class = PDF::LazyParser
+                else
+                    parser_class = PDF::LinearParser
+                end
-      result = []
+                parser_class.new(options).parse(path)
+            end
-      patterns.map! do |pattern|
-        pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
-      end
+            #
+            # Creates a new PDF and saves it.
+            # If a block is passed, the PDF instance can be processed before saving.
+            #
+            def create(output, options = {})
+                pdf = PDF.new
+                yield(pdf) if block_given?
+                pdf.save(output, options)
+            end
+            alias write create
-      objects(:only_keys => true).each do |key|
-        if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
-          value = key.parent[key]
-          result << ( value.is_a?(Reference) ? value.solve : value )
+            #
+            # Deserializes a PDF dump.
+            #
+            def deserialize(filename)
+                Zlib::GzipReader.open(filename) { |gz|
+                    return Marshal.load(gz.read)
+                }
+            end
         end
-      end
-      result
-    end
+        #
+        # Creates a new PDF instance.
+        # _parser_:: The Parser object creating the document.
+        #            If none is specified, some default structures are automatically created to get a minimal working document.
+        #
+        def initialize(parser = nil)
+            @header = PDF::Header.new
+            @revisions = []
-    #
-    # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
-    # Do not follow references.
-    #
-    def ls_no_follow(*patterns)
-      return objects(:include_keys => false) if patterns.empty?
+            add_new_revision
+            @revisions.first.trailer = Trailer.new
+            if parser
+                @loaded = false
+                @parser = parser
+            else
+                init
+            end
+        end
-      result = []
+        #
+        # Original file name if parsed from disk, nil otherwise.
+        #
+        def original_filename
+            @parser.target_filename if @parser
+        end
-      patterns.map! do |pattern|
-        pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
-      end
+        #
+        # Original file size if parsed from a data stream, nil otherwise.
+        #
+        def original_filesize
+            @parser.target_filesize if @parser
+        end
-      objects(:only_keys => true).each do |key|
-        if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
-          value = key.parent[key]
-          result << value
+        #
+        # Original data parsed to create this document, nil if created from scratch.
+        #
+        def original_data
+            @parser.target_data if @parser
         end
-      end
-      result
-    end
+        #
+        # Serializes the current PDF.
+        #
+        def serialize(filename)
+            parser = @parser
+            @parser = nil # do not serialize the parser
-    #
-    # Returns an array of objects matching specified block.
-    #
-    def find(params = {}, &b)
-      options =
-      {
-        :only_indirect => false
-      }
-      options.update(params)
-      objset = (options[:only_indirect] == true) ?
-        self.indirect_objects : self.objects
-      objset.find_all(&b)
-    end
-    #
-    # Returns an array of objects embedded in the PDF body.
-    # _include_objstm_:: Whether it shall return objects embedded in object streams.
-    # Note : Shall return to an iterator for Ruby 1.9 comp.
-    #
-    def objects(params = {})
-      def append_subobj(root, objset, opts)
-        if objset.find{ |o| root.equal?(o) }.nil?
-          objset << root unless opts[:only_keys]
-          if root.is_a?(Dictionary)
-            root.each_pair { |name, value|
-              objset << name if opts[:only_keys]
-              append_subobj(name, objset, opts) if opts[:include_keys] and not opts[:only_keys]
-              append_subobj(value, objset, opts)
+            Zlib::GzipWriter.open(filename) { |gz|
+                gz.write Marshal.dump(self)
             }
-          elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and opts[:include_objectstreams])
-            root.each { |subobj| append_subobj(subobj, objset, opts) }
-          end
+            @parser = parser
+            self
         end
-      end
-      options =
-      {
-        :include_objectstreams => true,
-        :include_keys => true,
-        :only_keys => false
-      }
-      options.update(params)
-      options[:include_keys] |= options[:only_keys]
-      objset = []
-      @revisions.each do |revision|
-        revision.objects.each do |object|
-            append_subobj(object, objset, options)
+        #
+        # Saves the current document.
+        # _filename_:: The path where to save this PDF.
+        #
+        def save(path, params = {})
+            options =
+            {
+                delinearize: true,
+                recompile: true,
+                decrypt: false
+            }
+            options.update(params)
+            if self.frozen? # incompatible flags with frozen doc (signed)
+                options[:recompile] =
+                options[:rebuild_xrefs] =
+                options[:noindent] =
+                options[:obfuscate] = false
+            end
+            if path.respond_to?(:write)
+                fd = path
+            else
+                path = File.expand_path(path)
+                fd = File.open(path, 'w').binmode
+                close = true
+            end
+            load_all_objects unless @loaded
+            intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i
+            self.delinearize! if options[:delinearize] and self.linearized?
+            compile(options) if options[:recompile]
+            fd.write output(options)
+            fd.close if close
+            self
         end
-      end
-      objset
-    end
-    #
-    # Return an array of indirect objects.
-    #
-    def indirect_objects
-      @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
-    end
-    alias :root_objects :indirect_objects
-    #
-    # Adds a new object to the PDF file.
-    # If this object has no version number, then a new one will be automatically computed and assignated to him.
-    # It returns a Reference to this Object.
-    # _object_:: The object to add.
-    #
-    def <<(object)
-      owner = object.pdf
-      #
-      # Does object belongs to another PDF ?
-      #
-      if owner and not owner.equal?(self)
-        import object
-      else
-        add_to_revision(object, @revisions.last)
-      end
-    end
-    alias :insert :<<
-    #
-    # Similar to PDF#insert or PDF#<<, but for an object belonging to another document.
-    # Object will be recursively copied and new version numbers will be assigned.
-    # Returns the new reference to the imported object.
-    # _object_:: The object to import.
-    #
-    def import(object)
-      self.insert(object.export)
-    end
+        alias write save
-    #
-    # Adds a new object to a specific revision.
-    # If this object has no version number, then a new one will be automatically computed and assignated to him.
-    # It returns a Reference to this Object.
-    # _object_:: The object to add.
-    # _revision_:: The revision to add the object to.
-    #
-    def add_to_revision(object, revision)
-      object.set_indirect(true)
-      object.set_pdf(self)
-      object.no, object.generation = alloc_new_object_number if object.no == 0
-      revision.body[object.reference] = object
-      object.reference
-    end
+        #
+        # Saves the file up to given revision number.
+        # This can be useful to visualize the modifications over different incremental updates.
+        # _revision_:: The revision number to save.
+        # _filename_:: The path where to save this PDF.
+        #
+        def save_upto(revision, filename)
+            save(filename, up_to_revision: revision)
+        end
-    #
-    # Ends the current Revision, and starts a new one.
-    #
-    def add_new_revision
-      root = @revisions.last.trailer[:Root] unless @revisions.empty?
+        #
+        # Returns an array of strings, names and streams matching the given pattern.
+        # _streams_: Search into decoded stream data.
+        # _object_streams_: Search into objects inside object streams.
+        #
+        def grep(pattern, streams: true, object_streams: true) #:nodoc:
-      @revisions << Revision.new(self)
-      @revisions.last.trailer = Trailer.new
-      @revisions.last.trailer.Root = root
+            pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
+            raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)
-      self
-    end
+            result = []
-    #
-    # Removes a whole document revision.
-    # _index_:: Revision index, first is 0.
-    #
-    def remove_revision(index)
-      if index < 0 or index > @revisions.size
-        raise IndexError, "Not a valid revision index"
-      end
+            search_object = -> (object) do
+                case object
+                when Stream
+                    result.concat object.dictionary.strings_cache.select{|str| pattern === str}
+                    result.concat object.dictionary.names_cache.select{|name| pattern === name.value}
-      if @revisions.size == 1
-        raise InvalidPDFError, "Cannot remove last revision"
-      end
+                    begin
+                        result.push object if streams and object.data.match(pattern)
+                    rescue Filter::Error
+                        next # Skip object if a decoding error occured.
+                    end
-      @revisions.delete_at(index)
-      self
-    end
-    #
-    # Looking for an object present at a specified file offset.
-    #
-    def get_object_by_offset(offset) #:nodoc:
-      self.indirect_objects.find { |obj| obj.file_offset == offset }
-    end
+                    next if object.is_a?(ObjectStream) and not object_streams
-    #
-    # Remove an object.
-    #
-    def delete_object(no, generation = 0)
-      case no
-        when Reference
-          target = no
-        when ::Integer
-          target = Reference.new(no, generation)
-      else
-        raise TypeError, "Invalid parameter type : #{no.class}"
-      end
-      @revisions.each do |rev|
-        rev.body.delete(target)
-      end
+                    object.each do |subobject|
+                        search_object.call(subobject)
+                    end
-    end
+                when Name, String
+                    result.push object if object.value.match(pattern)
+                when Dictionary, Array then
+                    result.concat object.strings_cache.select{|str| pattern === str}
+                    result.concat object.names_cache.select{|name| pattern === name.value}
+                end
+            end
+            self.indirect_objects.each do |object|
+                search_object.call(object)
+            end
+            result
+        end
+        #
+        # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
+        #
+        def ls(pattern, follow_references: true)
+            pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
+            raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)
+            self.grep(pattern, streams: false, object_streams: true)
+                .select {|object| object.is_a?(Name) and object.parent.is_a?(Dictionary) and object.parent.key?(object) }
+                .collect {|object| result = object.parent[object]; follow_references ? result.solve : result }
+        end
+        #
+        # Iterates over the objects of the document.
+        # _compressed_: iterates over the objects inside object streams.
+        # _recursive_: iterates recursively inside objects like arrays and dictionaries.
+        #
+        def each_object(compressed: false, recursive: false)
+            return enum_for(__method__, compressed: compressed,
+                                        recursive: recursive
+                           ) unless block_given?
+            walk_object = -> (object) do
+                case object
+                when Dictionary
+                    object.each_value do |value|
+                        yield(value)
+                        walk_object.call(value)
+                    end
+                when Array
+                    object.each do |child|
+                        yield(child)
+                        walk_object.call(child)
+                    end
+                when Stream
+                    yield(object.dictionary)
+                    walk_object.call(object.dictionary)
+                end
+            end
+            @revisions.each do |revision|
+                revision.each_object do |object|
+                    yield(object)
+                    walk_object.call(object) if recursive
+                    if object.is_a?(ObjectStream) and compressed
+                        object.each do |child_obj|
+                            yield(child_obj)
+                            walk_object.call(child_obj) if recursive
+                        end
+                    end
+                end
+            end
+        end
+        #
+        # Return an array of indirect objects.
+        #
+        def indirect_objects
+            @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
+        end
+        alias root_objects indirect_objects
+        #
+        # Adds a new object to the PDF file.
+        # If this object has no version number, then a new one will be automatically
+        # computed and assignated to him.
+        #
+        # It returns a Reference to this Object.
+        # _object_:: The object to add.
+        #
+        def <<(object)
+            owner = object.document
-    #
-    # Search for an indirect object in the document.
-    # _no_:: Reference or number of the object.
-    # _generation_:: Object generation.
-    #
-    def get_object(no, generation = 0, use_xrefstm = true) #:nodoc:
-      case no
-        when Reference
-          target = no
-        when ::Integer
-           target = Reference.new(no, generation)
-        when Origami::Object
-          return no
-      else
-        raise TypeError, "Invalid parameter type : #{no.class}"
-      end
-      #
-      # Search through accessible indirect objects.
-      #
-      @revisions.each do |rev|
-        return rev.body[target] if rev.body.include?(target)
-      end
-      # Look into XRef streams.
-      if use_xrefstm == true
-        if @revisions.last.has_xrefstm?
-          xrefstm = @revisions.last.xrefstm
-          done = []
-          while xrefstm.is_a?(XRefStream) and not done.include?(xrefstm)
-            xref = xrefstm.find(target.refno)
             #
-            # We found a matching XRef.
+            # Does object belongs to another PDF ?
             #
-            if xref.is_a?(XRefToCompressedObj)
-              objstm = get_object(xref.objstmno, 0, false)
-              object = objstm.extract_by_index(xref.index)
-              if object.is_a?(Origami::Object) and object.no == target.refno
-                return object
-              else
-                return objstm.extract(target.refno)
-              end
-            elsif xrefstm.has_field?(:Prev)
-              done << xrefstm
-              xrefstm = get_object_by_offset(xrefstm.Prev)
+            if owner and not owner.equal?(self)
+                import object
             else
-              break
+                add_to_revision(object, @revisions.last)
             end
-          end
         end
+        alias insert <<
         #
-        # Lastly search directly into Object streams (might be very slow).
+        # Similar to PDF#insert or PDF#<<, but for an object belonging to another document.
+        # Object will be recursively copied and new version numbers will be assigned.
+        # Returns the new reference to the imported object.
+        # _object_:: The object to import.
         #
-        @revisions.each do |rev|
-          streams = rev.objects.find_all{|obj| obj.is_a?(ObjectStream) and obj.include?(target.refno)}
-          return streams.first.extract(target.refno) unless streams.empty?
+        def import(object)
+            self.insert(object.export)
         end
-        nil
-      end
-    end
+        #
+        # Adds a new object to a specific revision.
+        # If this object has no version number, then a new one will be automatically
+        # computed and assignated to him.
+        #
+        # It returns a Reference to this Object.
+        # _object_:: The object to add.
+        # _revision_:: The revision to add the object to.
+        #
+        def add_to_revision(object, revision)
+            object.set_indirect(true)
+            object.set_document(self)
+            object.no, object.generation = allocate_new_object_number if object.no == 0
-    alias :[] :get_object
+            revision.body[object.reference] = object
-    def cast_object(reference, type) #:nodoc:
-      @revisions.each do |rev|
-        if rev.body.include?(reference) and type < rev.body[reference].class
-          rev.body[reference] = rev.body[reference].cast_to(type)
+            object.reference
         end
-      end
-    end
-    #
-    # Returns a new number/generation for future object.
-    #
-    def alloc_new_object_number
-      no = 1
-      # Deprecated number allocation policy (first available)
-      #no = no + 1 while get_object(no)
+        #
+        # Ends the current Revision, and starts a new one.
+        #
+        def add_new_revision
+            root = @revisions.last.trailer[:Root] unless @revisions.empty?
-      objset = self.indirect_objects
-      self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
-        objstm.each{|obj| objset << obj}
-      end
+            @revisions << Revision.new(self)
+            @revisions.last.trailer = Trailer.new
+            @revisions.last.trailer.Root = root
-      allocated = objset.collect{|obj| obj.no}.compact
-      no = allocated.max + 1 unless allocated.empty?
+            self
+        end
-      [ no, 0 ]
-    end
-    ##########################
-    private
-    ##########################
-    #
-    # Compute and update XRef::Section for each Revision.
-    #
-    def rebuildxrefs
-      size = 0
-      startxref = @header.to_s.size
-      @revisions.each do |revision|
-        revision.objects.each do |object|
-          startxref += object.to_s.size
+        #
+        # Removes a whole document revision.
+        # _index_:: Revision index, first is 0.
+        #
+        def remove_revision(index)
+            if index < 0 or index > @revisions.size
+                raise IndexError, "Not a valid revision index"
+            end
+            if @revisions.size == 1
+                raise InvalidPDFError, "Cannot remove last revision"
+            end
+            @revisions.delete_at(index)
+            self
         end
-        size += revision.body.size
-        revision.xreftable = buildxrefs(revision.objects)
-        revision.trailer ||= Trailer.new
-        revision.trailer.Size = size + 1
-        revision.trailer.startxref = startxref
-        startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
-      end
-      self
-    end
-    #
-    # This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
-    # * Allocates objects references.
-    # * Sets some objects missing required values.
-    #
-    def compile(options = {})
-      #
-      # A valid document must have at least one page.
-      #
-      append_page if pages.empty?
-      #
-      # Allocates object numbers and creates references.
-      # Invokes object finalization methods.
-      #
-      if self.is_a?(Encryption::EncryptedDocument)
-        physicalize(options)
-      else
-        physicalize
-      end
-      #
-      # Sets the PDF version header.
-      #
-      version, level = version_required
-      @header.majorversion = version[0,1].to_i
-      @header.minorversion = version[2,1].to_i
-      set_extension_level(version, level) if level > 0
-      self
-    end
-    #
-    # Cleans the document from its references.
-    # Indirects objects are made direct whenever possible.
-    # TODO: Circuit-checking to avoid infinite induction
-    #
-    def logicalize #:nodoc:
-      fail "Not yet supported"
-      processed = []
-      def convert(root) #:nodoc:
-        replaced = []
-        if root.is_a?(Dictionary) or root.is_a?(Array)
-          root.each { |obj|
-            convert(obj)
-          }
-          root.map! { |obj|
-            if obj.is_a?(Reference)
-              target = obj.solve
-              # Streams can't be direct objects
-              if target.is_a?(Stream)
-                obj
-              else
-                replaced << obj
-                target
-              end
+        #
+        # Looking for an object present at a specified file offset.
+        #
+        def get_object_by_offset(offset) #:nodoc:
+            self.indirect_objects.find { |obj| obj.file_offset == offset }
+        end
+        #
+        # Remove an object.
+        #
+        def delete_object(no, generation = 0)
+            case no
+            when Reference
+                target = no
+            when ::Integer
+                target = Reference.new(no, generation)
+            else
+                raise TypeError, "Invalid parameter type : #{no.class}"
+            end
+            @revisions.each do |rev|
+                rev.body.delete(target)
+            end
+        end
+        #
+        # Search for an indirect object in the document.
+        # _no_:: Reference or number of the object.
+        # _generation_:: Object generation.
+        #
+        def get_object(no, generation = 0, use_xrefstm: true) #:nodoc:
+            case no
+            when Reference
+                target = no
+            when ::Integer
+                target = Reference.new(no, generation)
+            when Origami::Object
+                return no
             else
-              obj
+                raise TypeError, "Invalid parameter type : #{no.class}"
+            end
+            #
+            # Search through accessible indirect objects.
+            #
+            @revisions.reverse_each do |rev|
+                return rev.body[target] if rev.body.include?(target)
+            end
+            #
+            # Search through xref sections.
+            #
+            @revisions.reverse_each do |rev|
+                next unless rev.has_xreftable?
+                xref = rev.xreftable.find(target.refno)
+                next if xref.nil? or xref.free?
+                # Try loading the object if it is not present.
+                object = load_object_at_offset(rev, xref.offset)
+                return object unless object.nil?
+            end
+            return nil unless use_xrefstm
+            # Search through xref streams.
+            @revisions.reverse_each do |rev|
+                next unless rev.has_xrefstm?
+                xrefstm = rev.xrefstm
+                xref = xrefstm.find(target.refno)
+                next if xref.nil?
+                #
+                # We found a matching XRef.
+                #
+                if xref.is_a?(XRefToCompressedObj)
+                    objstm = get_object(xref.objstmno, 0, use_xrefstm: use_xrefstm)
+                    object = objstm.extract_by_index(xref.index)
+                    if object.is_a?(Origami::Object) and object.no == target.refno
+                        return object
+                    else
+                        return objstm.extract(target.refno)
+                    end
+                elsif xref.is_a?(XRef)
+                    object = load_object_at_offset(rev, xref.offset)
+                    return object unless object.nil?
+                end
+            end
+            #
+            # Lastly search directly into Object streams (might be very slow).
+            #
+            @revisions.reverse_each do |rev|
+                stream = rev.objects.find{|obj| obj.is_a?(ObjectStream) and obj.include?(target.refno)}
+                return stream.extract(target.refno) unless stream.nil?
+            end
+            nil
+        end
+        alias [] get_object
+        #
+        # Casts a PDF object into another object type.
+        # The target type must be a subtype of the original type.
+        #
+        def cast_object(reference, type, parser = nil) #:nodoc:
+            @revisions.each do |rev|
+                if rev.body.include?(reference) and type < rev.body[reference].class
+                    rev.body[reference] = rev.body[reference].cast_to(type, parser)
+                    rev.body[reference]
+                else
+                    nil
+                end
             end
-          }
         end
-        replaced
-      end
+        #
+        # Returns a new number/generation for future object.
+        #
+        def allocate_new_object_number
+            no = 1
+            # Deprecated number allocation policy (first available)
+            #no = no + 1 while get_object(no)
-      @revisions.each do |revision|
-        revision.objects.each do |obj|
-          processed.concat(convert(obj))
+            objset = self.indirect_objects
+            self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
+                objstm.each{|obj| objset << obj}
+            end
+            allocated = objset.collect{|obj| obj.no}.compact
+            no = allocated.max + 1 unless allocated.empty?
+            [ no, 0 ]
         end
-      end
-    end
-    #
-    # Converts a logical PDF view into a physical view ready for writing.
-    #
-    def physicalize
-      #
-      # Indirect objects are added to the revision and assigned numbers.
-      #
-      def build(obj, revision) #:nodoc:
+        #
+        # Mark the document as complete.
+        # No more objects needs to be fetched by the parser.
+        #
+        def loaded!
+            @loaded = true
+        end
+        ##########################
+        private
+        ##########################
         #
-        # Finalize any subobjects before building the stream.
+        # Load an object from its given file offset.
+        # The document must have an associated Parser.
         #
-        if obj.is_a?(ObjectStream)
-          obj.each do |subobj|
-            build(subobj, revision)
-          end
+        def load_object_at_offset(revision, offset)
+            return nil if @loaded or @parser.nil?
+            pos = @parser.pos
+            begin
+                object = @parser.parse_object(offset)
+                return nil if object.nil?
+                if self.is_a?(Encryption::EncryptedDocument)
+                    case object
+                    when String
+                        object.extend(Encryption::EncryptedString)
+                        object.decrypted = false
+                    when Stream
+                        object.extend(Encryption::EncryptedStream)
+                        object.decrypted = false
+                    when Dictionary, Array
+                        object.strings_cache.each do |string|
+                            string.extend(Encryption::EncryptedString)
+                            string.decrypted = false
+                        end
+                    end
+                end
+                add_to_revision(object, revision)
+            ensure
+                @parser.pos = pos
+            end
+            object
         end
-        obj.pre_build
-        if obj.is_a?(Dictionary) or obj.is_a?(Array)
-            obj.map! do |subobj|
-              if subobj.is_indirect?
-                if get_object(subobj.reference)
-                  subobj.reference
+        #
+        # Force the loading of all objects in the document.
+        #
+        def load_all_objects
+            return if @loaded or @parser.nil?
+            @revisions.each do |revision|
+                if revision.has_xreftable?
+                    xrefs = revision.xreftable
+                elsif revision.has_xrefstm?
+                    xrefs = revision.xrefstm
                 else
-                  ref = add_to_revision(subobj, revision)
-                  build(subobj, revision)
-                  ref
+                    next
+                end
+                xrefs.each_with_number do |_, no|
+                    self.get_object(no)
                 end
-              else
-                subobj
-              end
             end
-            obj.each do |subobj|
-              build(subobj, revision)
+            @loaded = true
+        end
+        #
+        # Compute and update XRef::Section for each Revision.
+        #
+        def rebuild_xrefs
+            size = 0
+            startxref = @header.to_s.size
+            @revisions.each do |revision|
+                revision.objects.each do |object|
+                    startxref += object.to_s.size
+                end
+                size += revision.body.size
+                revision.xreftable = build_xrefs(revision.objects)
+                revision.trailer ||= Trailer.new
+                revision.trailer.Size = size + 1
+                revision.trailer.startxref = startxref
+                startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
             end
-        elsif obj.is_a?(Stream)
-          build(obj.dictionary, revision)
+            self
         end
-        obj.post_build
-      end
-      indirect_objects_by_rev.each do |obj, revision|
-          build(obj, revision)
-      end
-      self
-    end
+        #
+        # This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
+        # * Allocates objects references.
+        # * Sets some objects missing required values.
+        #
+        def compile(options = {})
-    #
-    # Returns the final binary representation of the current document.
-    #
-    def output(params = {})
-      has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)}
-      options =
-      {
-        :rebuildxrefs => true,
-        :noindent => false,
-        :obfuscate => false,
-        :use_xrefstm => has_objstm,
-        :use_xreftable => (not has_objstm),
-        :up_to_revision => @revisions.size
-      }
-      options.update(params)
-      options[:up_to_revision] = @revisions.size if options[:up_to_revision] > @revisions.size
-      # Reset to default params if no xrefs are chosen (hybrid files not supported yet)
-      if options[:use_xrefstm] == options[:use_xreftable]
-        options[:use_xrefstm] = has_objstm
-        options[:use_xreftable] = (not has_objstm)
-      end
-      # Get trailer dictionary
-      trailer_info = get_trailer_info
-      if trailer_info.nil?
-        raise InvalidPDFError, "No trailer information found"
-      end
-      trailer_dict = trailer_info.dictionary
-      prev_xref_offset = nil
-      xrefstm_offset = nil
-      xreftable_offset = nil
-      # Header
-      bin = ""
-      bin << @header.to_s
-      # For each revision
-      @revisions[0, options[:up_to_revision]].each do |rev|
-        # Create xref table/stream.
-        if options[:rebuildxrefs] == true
-          lastno_table, lastno_stm = 0, 0
-          brange_table, brange_stm = 0, 0
-          xrefs_stm = [ XRef.new(0, 0, XRef::FREE) ]
-          xrefs_table = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
-          if options[:use_xreftable] == true
-            xrefsection = XRef::Section.new
-          end
+            load_all_objects unless @loaded
-          if options[:use_xrefstm] == true
-            xrefstm = rev.xrefstm || XRefStream.new
-            if xrefstm == rev.xrefstm
-              xrefstm.clear
+            #
+            # A valid document must have at least one page.
+            #
+            append_page if pages.empty?
+            #
+            # Allocates object numbers and creates references.
+            # Invokes object finalization methods.
+            #
+            if self.is_a?(Encryption::EncryptedDocument)
+                physicalize(options)
             else
-              add_to_revision(xrefstm, rev)
+                physicalize
             end
-          end
+            #
+            # Sets the PDF version header.
+            #
+            version, level = version_required
+            @header.major_version = version[0,1].to_i
+            @header.minor_version = version[2,1].to_i
+            set_extension_level(version, level) if level > 0
+            self
         end
-        objset = rev.objects
-        objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
-          objset.concat objstm.objects
-        end if options[:rebuildxrefs] == true and options[:use_xrefstm] == true
-        # For each object, in number order
-        objset.sort.each do |obj|
-          # Create xref entry.
-          if options[:rebuildxrefs] == true
-            # Adding subsections if needed
-            if options[:use_xreftable] and (obj.no - lastno_table).abs > 1
-              xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
-              xrefs_table.clear
-              brange_table = obj.no
+        #
+        # Cleans the document from its references.
+        # Indirects objects are made direct whenever possible.
+        # TODO: Circuit-checking to avoid infinite induction
+        #
+        def logicalize #:nodoc:
+            raise NotImplementedError
+            processed = []
+            convert = -> (root) do
+                replaced = []
+                if root.is_a?(Dictionary) or root.is_a?(Array)
+                    root.each do |obj|
+                        convert[obj]
+                    end
+                    root.map! do |obj|
+                        if obj.is_a?(Reference)
+                            target = obj.solve
+                            # Streams can't be direct objects
+                            if target.is_a?(Stream)
+                                obj
+                            else
+                                replaced << obj
+                                target
+                            end
+                        else
+                            obj
+                        end
+                    end
+                end
+                replaced
             end
-            if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1
-              xrefs_stm.each do |xref| xrefstm << xref end
-              xrefstm.Index ||= []
-              xrefstm.Index << brange_stm << xrefs_stm.length
-              xrefs_stm.clear
-              brange_stm = obj.no
+            @revisions.each do |revision|
+                revision.objects.each do |obj|
+                    processed.concat(convert[obj])
+                end
             end
+        end
-            # Process embedded objects
-            if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
-              index = obj.parent.index(obj.no)
-              xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
-              lastno_stm = obj.no
-            else
-              xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
-              xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)
+        #
+        # Converts a logical PDF view into a physical view ready for writing.
+        #
+        def physicalize
+            #
+            # Indirect objects are added to the revision and assigned numbers.
+            #
+            build = -> (obj, revision) do
+                #
+                # Finalize any subobjects before building the stream.
+                #
+                if obj.is_a?(ObjectStream)
+                    obj.each do |subobj|
+                        build.call(subobj, revision)
+                    end
+                end
-              lastno_table = lastno_stm = obj.no
+                obj.pre_build
+                if obj.is_a?(Dictionary) or obj.is_a?(Array)
+                    obj.map! do |subobj|
+                        if subobj.indirect?
+                            if get_object(subobj.reference)
+                                subobj.reference
+                            else
+                                ref = add_to_revision(subobj, revision)
+                                build.call(subobj, revision)
+                                ref
+                            end
+                        else
+                            subobj
+                        end
+                    end
+                    obj.each do |subobj|
+                        build.call(subobj, revision)
+                    end
+                elsif obj.is_a?(Stream)
+                    build.call(obj.dictionary, revision)
+                end
+                obj.post_build
             end
+          indirect_objects_by_rev.each do |obj, revision|
+              build.call(obj, revision)
           end
-          if obj.parent == obj or not obj.parent.is_a?(ObjectStream)
-            # Finalize XRefStm
-            if options[:rebuildxrefs] == true and options[:use_xrefstm] == true and obj == xrefstm
-              xrefstm_offset = bin.size
-              xrefs_stm.each do |xref| xrefstm << xref end
-              xrefstm.W = [ 1, (xrefstm_offset.to_s(2).size + 7) >> 3, 2 ]
-              if xrefstm.DecodeParms.is_a?(Dictionary) and xrefstm.DecodeParms.has_key?(:Columns)
-                xrefstm.DecodeParms[:Columns] = xrefstm.W[0] + xrefstm.W[1] + xrefstm.W[2]
-              end
-              xrefstm.Index ||= []
-              xrefstm.Index << brange_stm << xrefs_stm.size
-              xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict)
-              xrefstm.Prev = prev_xref_offset
-              rev.trailer.dictionary = nil
-              add_to_revision(xrefstm, rev)
-              xrefstm.pre_build
-              xrefstm.post_build
+          self
+        end
+        #
+        # Returns the final binary representation of the current document.
+        #
+        def output(params = {})
+            has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)}
+            options =
+            {
+                rebuild_xrefs: true,
+                noindent: false,
+                obfuscate: false,
+                use_xrefstm: has_objstm,
+                use_xreftable: (not has_objstm),
+                up_to_revision: @revisions.size
+            }
+            options.update(params)
+            options[:up_to_revision] = @revisions.size if options[:up_to_revision] > @revisions.size
+            # Reset to default params if no xrefs are chosen (hybrid files not supported yet)
+            if options[:use_xrefstm] == options[:use_xreftable]
+                options[:use_xrefstm] = has_objstm
+                options[:use_xreftable] = (not has_objstm)
             end
-            # Output object code
-            if (obj.is_a?(Dictionary) or obj.is_a?(Stream)) and options[:noindent]
-              bin << obj.to_s(0)
-            else
-              bin << obj.to_s
+            # Get trailer dictionary
+            trailer_info = get_trailer_info
+            raise InvalidPDFError, "No trailer information found" if trailer_info.nil?
+            trailer_dict = trailer_info.dictionary
+            prev_xref_offset = nil
+            xrefstm_offset = nil
+            # Header
+            bin = ""
+            bin << @header.to_s
+            # For each revision
+            @revisions[0, options[:up_to_revision]].each do |rev|
+                # Create xref table/stream.
+                if options[:rebuild_xrefs] == true
+                    lastno_table, lastno_stm = 0, 0
+                    brange_table, brange_stm = 0, 0
+                    xrefs_stm = [ XRef.new(0, 0, XRef::FREE) ]
+                    xrefs_table = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
+                    if options[:use_xreftable] == true
+                        xrefsection = XRef::Section.new
+                    end
+                    if options[:use_xrefstm] == true
+                        xrefstm = rev.xrefstm || XRefStream.new
+                        if xrefstm == rev.xrefstm
+                            xrefstm.clear
+                        else
+                            add_to_revision(xrefstm, rev)
+                        end
+                    end
+                end
+                objset = rev.objects
+                objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
+                    objset.concat objstm.objects
+                end if options[:rebuild_xrefs] == true and options[:use_xrefstm] == true
+                previous_obj = nil
+                # For each object, in number order
+                # Move any XRefStream to the end of the revision.
+                objset.sort_by {|obj| [obj.is_a?(XRefStream) ? 1 : 0, obj.no, obj.generation] }
+                      .each do |obj|
+                    # Ensures that every object has a unique reference number.
+                    # Duplicates should never happen in a well-formed revision and will cause breakage of xrefs.
+                    if previous_obj and previous_obj.reference == obj.reference
+                        raise InvalidPDFError, "Duplicate object detected, reference #{obj.reference}"
+                    else
+                        previous_obj = obj
+                    end
+                    # Create xref entry.
+                    if options[:rebuild_xrefs] == true
+                        # Adding subsections if needed
+                        if options[:use_xreftable] and (obj.no - lastno_table).abs > 1
+                            xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
+                            xrefs_table.clear
+                            brange_table = obj.no
+                        end
+                        if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1
+                            xrefs_stm.each do |xref| xrefstm << xref end
+                            xrefstm.Index ||= []
+                            xrefstm.Index << brange_stm << xrefs_stm.length
+                            xrefs_stm.clear
+                            brange_stm = obj.no
+                        end
+                        # Process embedded objects
+                        if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
+                            index = obj.parent.index(obj.no)
+                            xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
+                            lastno_stm = obj.no
+                        else
+                            xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
+                            xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)
+                            lastno_table = lastno_stm = obj.no
+                        end
+                    end
+                    if obj.parent == obj or not obj.parent.is_a?(ObjectStream)
+                        # Finalize XRefStm
+                        if options[:rebuild_xrefs] == true and options[:use_xrefstm] == true and obj == xrefstm
+                            xrefstm_offset = bin.size
+                            xrefs_stm.each do |xref| xrefstm << xref end
+                            xrefstm.W = [ 1, (xrefstm_offset.to_s(2).size + 7) >> 3, 2 ]
+                            if xrefstm.DecodeParms.is_a?(Dictionary) and xrefstm.DecodeParms.has_key?(:Columns)
+                                xrefstm.DecodeParms[:Columns] = xrefstm.W[0] + xrefstm.W[1] + xrefstm.W[2]
+                            end
+                            xrefstm.Index ||= []
+                            xrefstm.Index << brange_stm << xrefs_stm.size
+                            xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict)
+                            xrefstm.Prev = prev_xref_offset
+                            rev.trailer.dictionary = nil
+                            add_to_revision(xrefstm, rev)
+                            xrefstm.pre_build
+                            xrefstm.post_build
+                        end
+                        # Output object code
+                        if (obj.is_a?(Dictionary) or obj.is_a?(Stream)) and options[:noindent]
+                            bin << obj.to_s(indent: 0)
+                        else
+                            bin << obj.to_s
+                        end
+                    end
+                end # end each object
+                rev.trailer ||= Trailer.new
+                # XRef table
+                if options[:rebuild_xrefs] == true
+                    if options[:use_xreftable] == true
+                        table_offset = bin.size
+                        xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
+                        rev.xreftable = xrefsection
+                        rev.trailer.dictionary = trailer_dict
+                        rev.trailer.Size = objset.size + 1
+                        rev.trailer.Prev = prev_xref_offset
+                        rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true
+                    end
+                    startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset
+                    rev.trailer.startxref = prev_xref_offset = startxref
+                end
+                # Trailer
+                bin << rev.xreftable.to_s if options[:use_xreftable] == true
+                bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s)
+            end # end each revision
+            bin
+        end
+        #
+        # Instanciates basic structures required for a valid PDF file.
+        #
+        def init
+            catalog = (self.Catalog = (trailer_key(:Root) || Catalog.new))
+            catalog.Pages = PageTreeNode.new.set_indirect(true)
+            @revisions.last.trailer.Root = catalog.reference
+            @loaded = true
+            self
+        end
+        def filesize #:nodoc:
+            output(rebuild_xrefs: false).size
+        end
+        def version_required #:nodoc:
+            max = [ 1.0, 0 ]
+            @revisions.each do |revision|
+                revision.objects.each do |object|
+                    current = object.version_required
+                    max = current if (current <=> max) > 0
+                end
             end
-          end
+            max[0] = max[0].to_s
+            max
         end
-        rev.trailer ||= Trailer.new
-        # XRef table
-        if options[:rebuildxrefs] == true
-          if options[:use_xreftable] == true
-            table_offset = bin.size
-            xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
-            rev.xreftable = xrefsection
-            rev.trailer.dictionary = trailer_dict
-            rev.trailer.Size = objset.size + 1
-            rev.trailer.Prev = prev_xref_offset
-            rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true
-          end
-          startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset
-          rev.trailer.startxref = prev_xref_offset = startxref
-        end # end each rev
-        # Trailer
-        bin << rev.xreftable.to_s if options[:use_xreftable] == true
-        bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s)
-      end
-      bin
-    end
-    #
-    # Instanciates basic structures required for a valid PDF file.
-    #
-    def init
-      catalog = (self.Catalog = (get_doc_attr(:Root) || Catalog.new))
-      catalog.Pages = PageTreeNode.new.set_indirect(true)
-      @revisions.last.trailer.Root = catalog.reference
+        def indirect_objects_by_rev #:nodoc:
+            @revisions.inject([]) do |set,rev|
+                objset = rev.objects
+                set.concat(objset.zip(::Array.new(objset.length, rev)))
+            end
+        end
+        #
+        # Compute and update XRef::Section for each Revision.
+        #
+        def rebuild_dummy_xrefs #:nodoc
+            build_dummy_xrefs = -> (objects) do
+                lastno = 0
+                brange = 0
+                xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
+                xrefsection = XRef::Section.new
+                objects.sort.each do |object|
+                    if (object.no - lastno).abs > 1
+                        xrefsection << XRef::Subsection.new(brange, xrefs)
+                        brange = object.no
+                        xrefs.clear
+                    end
+                    xrefs << XRef.new(0, 0, XRef::FREE)
+                    lastno = object.no
+                end
+                xrefsection << XRef::Subsection.new(brange, xrefs)
+                xrefsection
+            end
+            size = 0
+            startxref = @header.to_s.size
+            @revisions.each do |revision|
+                revision.objects.each do |object|
+                    startxref += object.to_s.size
+                end
+                size += revision.body.size
+                revision.xreftable = build_dummy_xrefs.call(revision.objects)
+                revision.trailer ||= Trailer.new
+                revision.trailer.Size = size + 1
+                revision.trailer.startxref = startxref
+                startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
+            end
+            self
+        end
+        #
+        # Build a xref section from a set of objects.
+        #
+        def build_xrefs(objects) #:nodoc:
+            lastno = 0
+            brange = 0
+            xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
+            xrefsection = XRef::Section.new
+            objects.sort.each do |object|
+                if (object.no - lastno).abs > 1
+                    xrefsection << XRef::Subsection.new(brange, xrefs)
+                    brange = object.no
+                    xrefs.clear
+                end
+                xrefs << XRef.new(get_object_offset(object.no, object.generation), object.generation, XRef::USED)
+                lastno = object.no
+            end
-      self
-    end
-    def filesize #:nodoc:
-      output(:rebuildxrefs => false).size
-    end
-    def version_required #:nodoc:
-      max = [ 1.0, 0 ]
-      @revisions.each { |revision|
-        revision.objects.each { |object|
-          current = object.pdf_version_required
-          max = current if (current <=> max) > 0
-        }
-      }
-      max[0] = max[0].to_s
-      max
-    end
-    def indirect_objects_by_rev #:nodoc:
-      @revisions.inject([]) do |set,rev|
-        objset = rev.objects
-        set.concat(objset.zip(::Array.new(objset.length, rev)))
-      end
-    end
-    #
-    # Compute and update XRef::Section for each Revision.
-    #
-    def rebuild_dummy_xrefs #:nodoc
-      def build_dummy_xrefs(objects)
-        lastno = 0
-        brange = 0
-        xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
-        xrefsection = XRef::Section.new
-        objects.sort.each { |object|
-          if (object.no - lastno).abs > 1
             xrefsection << XRef::Subsection.new(brange, xrefs)
-            brange = object.no
-            xrefs.clear
-          end
-          xrefs << XRef.new(0, 0, XRef::FREE)
-          lastno = object.no
-        }
-        xrefsection << XRef::Subsection.new(brange, xrefs)
-        xrefsection
-      end
-      size = 0
-      startxref = @header.to_s.size
-      @revisions.each do |revision|
-        revision.objects.each do |object|
-          startxref += object.to_s.size
+            xrefsection
         end
-        size += revision.body.size
-        revision.xreftable = build_dummy_xrefs(revision.objects)
-        revision.trailer ||= Trailer.new
-        revision.trailer.Size = size + 1
-        revision.trailer.startxref = startxref
-        startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
-      end
-      self
-    end
-    #
-    # Build a xref section from a set of objects.
-    #
-    def buildxrefs(objects) #:nodoc:
-      lastno = 0
-      brange = 0
-      xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
-      xrefsection = XRef::Section.new
-      objects.sort.each { |object|
-        if (object.no - lastno).abs > 1
-          xrefsection << XRef::Subsection.new(brange, xrefs)
-          brange = object.no
-          xrefs.clear
+        def delete_revision(ngen) #:nodoc:
+            @revisions.delete_at[ngen]
         end
-        xrefs << XRef.new(get_object_offset(object.no, object.generation), object.generation, XRef::USED)
-        lastno = object.no
-      }
-      xrefsection << XRef::Subsection.new(brange, xrefs)
-      xrefsection
-    end
-    def delete_revision(ngen) #:nodoc:
-      @revisions.delete_at[ngen]
-    end
-    def get_revision(ngen) #:nodoc:
-      @revisions[ngen].body
-    end
-    def get_object_offset(no,generation) #:nodoc:
-      objectoffset = @header.to_s.size
-      @revisions.each do |revision|
-        revision.objects.sort.each do |object|
-          if object.no == no and object.generation == generation then return objectoffset
-          else
-            objectoffset += object.to_s.size
-          end
+        def get_revision(ngen) #:nodoc:
+            @revisions[ngen].body
         end
-        objectoffset += revision.xreftable.to_s.size
-        objectoffset += revision.trailer.to_s.size
-      end
-      nil
-    end
-	end
+        def get_object_offset(no,generation) #:nodoc:
+            objectoffset = @header.to_s.size
-end
+            @revisions.each do |revision|
+                revision.objects.sort.each do |object|
+                    if object.no == no and object.generation == generation then return objectoffset
+                    else
+                        objectoffset += object.to_s.size
+                    end
+                end
+                objectoffset += revision.xreftable.to_s.size
+                objectoffset += revision.trailer.to_s.size
+            end
+            nil
+        end
+    end
+end