RubyGems - hexapdf - Versions diffs - 0.27.0 → 0.28.0 - Mend

hexapdf 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +59 -1
data/examples/019-acro_form.rb +14 -3
data/examples/023-images.rb +30 -0
data/lib/hexapdf/cli/info.rb +5 -1
data/lib/hexapdf/cli/inspect.rb +2 -2
data/lib/hexapdf/cli/split.rb +2 -2
data/lib/hexapdf/configuration.rb +1 -2
data/lib/hexapdf/content/canvas.rb +8 -3
data/lib/hexapdf/dictionary.rb +1 -5
data/lib/hexapdf/document.rb +6 -10
data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
data/lib/hexapdf/importer.rb +32 -27
data/lib/hexapdf/layout/list_box.rb +1 -5
data/lib/hexapdf/object.rb +5 -0
data/lib/hexapdf/parser.rb +13 -0
data/lib/hexapdf/revision.rb +15 -12
data/lib/hexapdf/revisions.rb +4 -0
data/lib/hexapdf/tokenizer.rb +14 -8
data/lib/hexapdf/type/acro_form/appearance_generator.rb +174 -128
data/lib/hexapdf/type/acro_form/button_field.rb +5 -3
data/lib/hexapdf/type/acro_form/choice_field.rb +2 -0
data/lib/hexapdf/type/acro_form/field.rb +11 -5
data/lib/hexapdf/type/acro_form/form.rb +33 -7
data/lib/hexapdf/type/acro_form/signature_field.rb +2 -0
data/lib/hexapdf/type/acro_form/text_field.rb +12 -2
data/lib/hexapdf/type/annotations/widget.rb +3 -0
data/lib/hexapdf/type/font_true_type.rb +14 -0
data/lib/hexapdf/type/object_stream.rb +2 -2
data/lib/hexapdf/type/outline.rb +1 -1
data/lib/hexapdf/type/page.rb +56 -46
data/lib/hexapdf/version.rb +1 -1
data/lib/hexapdf/writer.rb +2 -3
data/test/hexapdf/content/test_canvas.rb +5 -0
data/test/hexapdf/document/test_pages.rb +2 -2
data/test/hexapdf/encryption/test_aes.rb +1 -1
data/test/hexapdf/filter/test_predictor.rb +0 -1
data/test/hexapdf/layout/test_box.rb +2 -1
data/test/hexapdf/layout/test_column_box.rb +1 -1
data/test/hexapdf/layout/test_list_box.rb +1 -1
data/test/hexapdf/test_document.rb +2 -8
data/test/hexapdf/test_importer.rb +13 -6
data/test/hexapdf/test_parser.rb +17 -0
data/test/hexapdf/test_revision.rb +15 -14
data/test/hexapdf/test_revisions.rb +43 -0
data/test/hexapdf/test_stream.rb +1 -1
data/test/hexapdf/test_tokenizer.rb +3 -4
data/test/hexapdf/test_writer.rb +3 -3
data/test/hexapdf/type/acro_form/test_appearance_generator.rb +135 -56
data/test/hexapdf/type/acro_form/test_button_field.rb +6 -1
data/test/hexapdf/type/acro_form/test_choice_field.rb +4 -0
data/test/hexapdf/type/acro_form/test_field.rb +4 -4
data/test/hexapdf/type/acro_form/test_form.rb +18 -0
data/test/hexapdf/type/acro_form/test_signature_field.rb +4 -0
data/test/hexapdf/type/acro_form/test_text_field.rb +13 -0
data/test/hexapdf/type/signature/common.rb +3 -1
data/test/hexapdf/type/test_font_true_type.rb +20 -0
data/test/hexapdf/type/test_object_stream.rb +2 -1
data/test/hexapdf/type/test_outline.rb +3 -0
data/test/hexapdf/type/test_page.rb +67 -30
data/test/hexapdf/type/test_page_tree_node.rb +4 -2
metadata +46 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 57b852a0648f47b5e3443e9b8aa4480a7a8cd187085c0067baf86af14cee7d9a
-  data.tar.gz: 6e28f748f1d8e089b6585748e2f56c3adf79669cf9b00d0d9fe8d83a74e97063
+  metadata.gz: 874e09b094ea4e793d1d123cfbaded6d1cc5ba93af3b57587e9faf402786a30f
+  data.tar.gz: c1eed6a778936cd360b4f1878a18abc3e5727c1f36b5eab4838a9a72817dff7b
 SHA512:
-  metadata.gz: a1d2cd75344a3fc9cd54f0dafa6a0fd23b9821d67f54581090a6db2820f7e04a0989da01d7b5f9c558e02bb57cf254f4ebaab791b950d4dacc1e02a29c5f3844
-  data.tar.gz: ea758cdeb96d8282e3c6581785b690b8d79e9bee318a7cf80bfb4201b47fc0f1d721a78b5c703b32f20b8671211b604e78f83dec6768187321fef46e5dac3a81
+  metadata.gz: b66a7587a239acbeb9ebbb20f851b2fa7738c5a4c2f8a95ae7ae3d7419b84ae37b5d1fb48a6b7023bff0df3e1728c395b5351c9c42c05707fabd5a1722e2b88a
+  data.tar.gz: 62ac7d070bb8ae3426685af497a047096dd32dc16a102baa961512652222b388198561776fc1227be69bdd0713183d91fa25e411262eec34a29227aae9723d5c

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,61 @@
+## 0.28.0 - 2022-12-30
+### Added
+* [HexaPDF::Type::AcroForm::AppearanceGenerator#create_push_button_appearances]
+  to allow customizing the behaviour
+* [HexaPDF::Parser#linearized?] for determining whether a document is linearized
+* Information on linearization to `hexapdf info` output
+* Support for `AFNumber_Format` Javascript method to the form field appearance
+  generator
+* Support for using fully embedded, simple TrueType fonts for drawing operations
+### Changed
+* **Breaking change**: `HexaPDF::Revision#reset_objects` has been removed
+* **Breaking change**: Method signature of [HexaPDF::Importer::for] has been
+  changed
+* **Breaking change**: [HexaPDF::Type::AcroForm::Field#each_widget] now has the
+  default value of the argument `direct_only` set to `true` instead of `false`
+* [HexaPDF::Revision#each_modified_object] to allow deleting the modified
+  objects from the active objects' container
+* [HexaPDF::Revision#each_modified_object] to allow ignoring added object and
+  cross-reference stream objects
+* [HexaPDF::Revisions::from_io] to merge the two revisions of a linearized PDF
+* [HexaPDF::Importer] and [HexaPDF::Document#import] to make working with them
+  easier by allowing the import of arbitrary objects
+* `HexaPDF::Type::AcroForm::Form#perform_validation` to combine fields with the
+  same name
+### Fixed
+* [HexaPDF::Type::AcroForm::AppearanceGenerator#create_check_box_appearances] to
+  correctly handle a field value of `nil`
+* Return value of `#type` method for all AcroForm field classes
+* [HexaPDF::Type::Page#flatten_annotations] to work correctly in case no
+  annotations are on the page
+* [HexaPDF::Type::AcroForm::ButtonField#create_appearances] to avoid creating
+  appearances in case of as-yet unresolved references to existing appearances
+* [HexaPDF::Type::AcroForm::TextField#create_appearances] to avoid creating
+  appearances in case of pre-existing ones
+* `HexaPDF::Tokenizer#parse_number` to treat invalid indirect object references
+  with an object number of 0 as null values
+* [HexaPDF::Type::AcroForm::AppearanceGenerator] to handle empty appearance
+  characteristics dictionary marker style strings
+* Writing of encrypted files containing two or more revisions
+* Generation of object streams to never allow storing the catalog object to
+  avoid problems with certain viewers
+* `HexaPDF::Type::Outline#perform_validation` to not show validation error when
+  `/Count` is zero
+* Writing of documents with two or more revisions in non-incremental mode when
+  `optimize: true` is used and the original document used cross-reference tables
+* [HexaPDF::Type::AcroForm::AppearanceGenerator] to take a widget's rotation
+  value into account
+* [HexaPDF::Type::Page#flatten_annotations] to correctly flatten all
+  annotations, including ones with custom rotations
+* [HexaPDF::Type::Page#rotate] to also rotate annotations
 ## 0.27.0 - 2022-11-18
 ### Added
@@ -236,7 +294,7 @@
 ### Added
 - [HexaPDF::Composer#create_stamp] for creating a form Xobject
-- [HexaPDF::Revision#reset_objects] for deleting all live loaded and added
+- `HexaPDF::Revision#reset_objects` for deleting all live loaded and added
   objects
 - Support for removing or flattening annotations to the `hexapdf modify` command
 - Option to CLI command `hexapdf form` to allow generation of a template file

data/examples/019-acro_form.rb CHANGED Viewed

@@ -6,6 +6,9 @@
 # This example show-cases how to create the various form field types and their
 # possible standard appearances.
 #
+# Note the 'number format' text field which uses a JavaScript function for
+# formatting a number.
+#
 # Usage:
 # : `ruby acro_form.rb`
 #
@@ -42,13 +45,21 @@ rb = form.create_radio_button("Radio")
 end
 rb.field_value = :button0
-canvas.text("Text fields", at: [50, 450])
+canvas.text("Text fields", at: [50, 480])
-canvas.text("Single line", at: [70, 420])
+canvas.text("Single line", at: [70, 450])
 tx = form.create_text_field("Single Line", font_size: 16)
-widget = tx.create_widget(page, Rect: [200, 415, 500, 435])
+widget = tx.create_widget(page, Rect: [200, 445, 500, 465])
 tx.field_value = "A sample test string!"
+canvas.text("Number format", at: [70, 420])
+tx = form.create_text_field("Number format", font_size: 16)
+widget = tx.create_widget(page, Rect: [200, 415, 500, 435])
+widget[:AA] = {
+  F: {S: :JavaScript, JS: 'AFNumber_Format(2, 2, 0, 0, "EUR ", true);'},
+}
+tx.field_value = "123456,789"
 canvas.text("Multiline", at: [70, 390])
 tx = form.create_multiline_text_field("Multiline", font_size: 0, align: :right)
 widget = tx.create_widget(page, Rect: [200, 325, 500, 405])

data/examples/023-images.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# # Images
+#
+# This example shows how to embed images into a PDF document, directly on a
+# page's canvas and through the high-level [HexaPDF::Composer].
+#
+# Usage:
+# : `ruby images.rb`
+#
+require 'hexapdf'
+file = File.join(__dir__, 'machupicchu.jpg')
+doc = HexaPDF::Document.new
+# Image only added to PDF once though used multiple times
+canvas = doc.pages.add.canvas
+canvas.image(file, at: [100, 500]) # auto-size based on image size
+canvas.image(file, at: [100, 300], width: 100) # height based on w/h ratio
+canvas.image(file, at: [300, 300], height: 100) # width based on w/h ratio
+canvas.image(file, at: [100, 100], width: 300, height: 100)
+HexaPDF::Composer.create('images.pdf') do |composer|
+  composer.image(file) # fill current rectangular region
+  composer.image(file, width: 100)  # height based on w/h ratio
+  composer.image(file, height: 100) # width based on w/h ratio
+  composer.image(file, width: 300, height: 100)
+  # Add the page created above as second page
+  composer.document.pages << composer.document.import(doc.pages[0])
+end

data/lib/hexapdf/cli/info.rb CHANGED Viewed

@@ -131,6 +131,10 @@ module HexaPDF
             output_line("Encrypted", "yes (no or wrong password given)")
           end
+          if doc.revisions.parser.linearized?
+            output_line("Linearized", "yes")
+          end
           signatures = doc.signatures.to_a
           unless signatures.empty?
             nr_sigs = signatures.count
@@ -186,7 +190,7 @@ module HexaPDF
       end
       def output_line(header, text) #:nodoc:
-        puts(("#{header}:").ljust(COLUMN_WIDTH) << text.to_s)
+        puts("#{header}:".ljust(COLUMN_WIDTH) << text.to_s)
       end
     end

data/lib/hexapdf/cli/inspect.rb CHANGED Viewed

@@ -335,9 +335,9 @@ module HexaPDF
       # - The signature dictionary if this revision was signed
       # - The byte offset from the start of the file to the end of the revision
       def revision_information
-        signatures = @doc.signatures.map do |sig|
+        signatures = @doc.signatures.to_h do |sig|
           [@doc.revisions.find {|rev| rev.object(sig) == sig }, sig]
-        end.to_h
+        end
         io = @doc.revisions.parser.io
         startxrefs = @doc.revisions.map {|rev| rev.trailer[:Prev] }

data/lib/hexapdf/cli/split.rb CHANGED Viewed

@@ -131,8 +131,8 @@ module HexaPDF
         @page_name_cache ||= {}
         return @page_name_cache[box] if @page_name_cache.key?(box)
-        paper_size = HexaPDF::Type::Page::PAPER_SIZE.find do |_name, box|
-          box.each_with_index.all? {|entry, index| (entry - box[index]).abs < 5 }
+        paper_size = HexaPDF::Type::Page::PAPER_SIZE.find do |_name, paper_box|
+          paper_box.each_with_index.all? {|entry, index| (entry - paper_box[index]).abs < 5 }
         end
         @page_name_cache[box] =

data/lib/hexapdf/configuration.rb CHANGED Viewed

@@ -422,8 +422,7 @@ module HexaPDF
                       'encryption.filter_map' => {
                         Standard: 'HexaPDF::Encryption::StandardSecurityHandler',
                       },
-                      'encryption.sub_filter_map' => {
-                      },
+                      'encryption.sub_filter_map' => {},
                       'filter.map' => {
                         ASCIIHexDecode: 'HexaPDF::Filter::ASCIIHexDecode',
                         AHx: 'HexaPDF::Filter::ASCIIHexDecode',

data/lib/hexapdf/content/canvas.rb CHANGED Viewed

@@ -1626,17 +1626,22 @@ module HexaPDF
         end
         return obj if obj.width == 0 || obj.height == 0
+        left, bottom = *at
         width, height = calculate_dimensions(obj.width, obj.height,
                                              rwidth: width, rheight: height)
         if obj[:Subtype] != :Image
           width /= obj.box.width.to_f
           height /= obj.box.height.to_f
-          at[0] -= obj.box.left
-          at[1] -= obj.box.bottom
+          left -= obj.box.left
+          bottom -= obj.box.bottom
         end
-        transform(width, 0, 0, height, at[0], at[1]) do
+        if left == 0 && bottom == 0 && width == 1 && height == 1
           invoke1(:Do, resources.add_xobject(obj))
+        else
+          transform(width, 0, 0, height, left, bottom) do
+            invoke1(:Do, resources.add_xobject(obj))
+          end
         end
         obj

data/lib/hexapdf/dictionary.rb CHANGED Viewed

@@ -108,11 +108,7 @@ module HexaPDF
     # The ancestor classes are also searched for such a field entry if none is found for the
     # current class.
     def self.field(name)
-      if defined?(@fields) && @fields.key?(name)
-        @fields[name]
-      elsif superclass.respond_to?(:field)
-        superclass.field(name)
-      end
+      @fields&.[](name) || superclass.field(name)
     end
     # :call-seq:

data/lib/hexapdf/document.rb CHANGED Viewed

@@ -164,6 +164,8 @@ module HexaPDF
     def initialize(io: nil, decryption_opts: {}, config: {})
       @config = Configuration.with_defaults(config)
       @version = '1.2'
+      @cache = Hash.new {|h, k| h[k] = {} }
+      @listeners = {}
       @revisions = Revisions.from_io(self, io)
       @security_handler = if encrypted? && @config['document.auto_decrypt']
@@ -171,9 +173,6 @@ module HexaPDF
                           else
                             nil
                           end
-      @listeners = {}
-      @cache = Hash.new {|h, k| h[k] = {} }
     end
     # :call-seq:
@@ -251,19 +250,16 @@ module HexaPDF
     # :call-seq:
     #   doc.import(obj)     -> imported_object
     #
-    # Imports the given, with a different document associated PDF object and returns the imported
+    # Imports the given object from a different HexaPDF::Document instance and returns the imported
     # object.
     #
     # If the same argument is provided in multiple invocations, the import is done only once and
-    # the previously imoprted object is returned.
+    # the previously imported object is returned.
     #
     # See: Importer
     def import(obj)
-      if !obj.kind_of?(HexaPDF::Object) || !obj.document? || obj.document == self
-        raise ArgumentError, "Importing only works for PDF objects associated " \
-          "with another document"
-      end
-      HexaPDF::Importer.for(source: obj.document, destination: self).import(obj)
+      source = (obj.kind_of?(HexaPDF::Object) ? obj.document : nil)
+      HexaPDF::Importer.for(self).import(obj, source: source)
     end
     # Wraps the given object inside a HexaPDF::Object class which allows one to use

data/lib/hexapdf/filter/ascii85_decode.rb CHANGED Viewed

@@ -49,7 +49,7 @@ module HexaPDF
     module ASCII85Decode
       VALUE_TO_CHAR = {} #:nodoc:
-      (0..84).each do |i|
+      85.times do |i|
         VALUE_TO_CHAR[i] = (i + 33).chr
       end

data/lib/hexapdf/importer.rb CHANGED Viewed

@@ -60,64 +60,69 @@ module HexaPDF
     end
-    # Returns the Importer object for copying objects from the +source+ to the +destination+
-    # document.
-    def self.for(source:, destination:)
+    # Returns the Importer object for copying objects to the +destination+ document.
+    def self.for(destination)
       @map ||= {}
-      @map.keep_if {|_, v| v.source.weakref_alive? && v.destination.weakref_alive? }
-      source = NullableWeakRef.new(source)
+      @map.keep_if {|_, v| v.destination.weakref_alive? }
       destination = NullableWeakRef.new(destination)
-      @map[[source.hash, destination.hash]] ||= new(source: source, destination: destination)
+      @map[destination.hash] ||= new(destination)
     end
     private_class_method :new
-    attr_reader :source, :destination #:nodoc:
+    attr_reader :destination #:nodoc:
-    # Initializes a new importer that can import objects from the +source+ document to the
-    # +destination+ document.
-    def initialize(source:, destination:)
-      @source = source
+    # Initializes a new importer that can import objects to the +destination+ document.
+    def initialize(destination)
       @destination = destination
       @mapper = {}
     end
-    # Imports the given +object+ from the source to the destination object and returns the
-    # imported object.
+    SourceWrapper = Struct.new(:source) #:nodoc:
+    # Imports the given +object+ to the destination object and returns the imported object.
     #
     # Note: Indirect objects are automatically added to the destination document but direct or
     # simple objects are not.
     #
-    # An error is raised if the object doesn't belong to the +source+ document.
-    def import(object)
+    # The +source+ argument should be +nil+ or set to the source document of the imported object. If
+    # it is +nil+, the source document is dynamically identified. If this identification is not
+    # possible and the source document would be needed, an error is raised.
+    def import(object, source: nil)
+      internal_import(object, SourceWrapper.new(source))
+    end
+    private
+    # Does the actual importing of the given +object+, using +wrapper+ to store/use the source
+    # document.
+    def internal_import(object, wrapper)
       mapped_object = @mapper[object.data]&.__getobj__ if object.kind_of?(HexaPDF::Object)
-      if object.kind_of?(HexaPDF::Object) && object.document? && @source != object.document
-        raise HexaPDF::Error, "Import error: Incorrect document object for importer"
-      elsif mapped_object && !mapped_object.null?
+      if mapped_object && !mapped_object.null?
         if object.class != mapped_object.class
           mapped_object = @destination.wrap(mapped_object, type: object.class)
         end
         mapped_object
       else
-        duplicate(object)
+        duplicate(object, wrapper)
       end
     end
-    private
     # Recursively duplicates the object.
     #
     # PDF objects are automatically added to the destination document if they are indirect objects
     # in the source document.
-    def duplicate(object)
+    def duplicate(object, wrapper)
       case object
       when Hash
-        object.transform_values {|v| duplicate(v) }
+        object.transform_values {|v| duplicate(v, wrapper) }
       when Array
-        object.map {|v| duplicate(v) }
+        object.map {|v| duplicate(v, wrapper) }
       when HexaPDF::Reference
-        import(@source.object(object))
+        raise HexaPDF::Error, "Import error: No source document specified" unless wrapper.source
+        internal_import(wrapper.source.object(object), wrapper)
       when HexaPDF::Object
+        wrapper.source ||= object.document
         if object.type == :Catalog || object.type == :Pages
           @mapper[object.data] = nil
         elsif (mapped_object = @mapper[object.data]&.__getobj__) && !mapped_object.null?
@@ -132,8 +137,8 @@ module HexaPDF
           @destination.add(obj) if object.indirect?
           obj.data.stream = obj.data.stream.dup if obj.data.stream.kind_of?(String)
-          obj.data.value = duplicate(obj.data.value)
-          obj.data.value.update(duplicate(object.copy_inherited_values)) if object.type == :Page
+          obj.data.value = duplicate(obj.data.value, wrapper)
+          obj.data.value.update(duplicate(object.copy_inherited_values, wrapper)) if object.type == :Page
           obj
         end
       when String

data/lib/hexapdf/layout/list_box.rb CHANGED Viewed

@@ -207,7 +207,7 @@ module HexaPDF
         @results = []
         @results_item_marker_x = []
-        @children.each_with_index do |child, index|
+        @children.each do |child|
           shape = Geom2D::Polygon([left, top - height],
                                   [left + width, top - height],
                                   [left + width, top],
@@ -217,11 +217,7 @@ module HexaPDF
             remove_indent_from_frame_shape(shape) unless shape.polygons.empty?
           end
-          #p [:list, left, width, shape]
           item_frame = Frame.new(item_frame_left, top - height, item_frame_width, height, shape: shape)
-          #p [index, item_frame.x, @results_item_marker_x]
           @results_item_marker_x << item_frame.x - content_indentation
           box_fitter = BoxFitter.new([item_frame])

data/lib/hexapdf/object.rb CHANGED Viewed

@@ -159,6 +159,11 @@ module HexaPDF
       object
     end
+    # Returns +nil+ to end the recursion for field searching in Dictionary.field.
+    def self.field(_name)
+      nil
+    end
     # The wrapped HexaPDF::PDFData value.
     #
     # This attribute is not part of the public API!

data/lib/hexapdf/parser.rb CHANGED Viewed

@@ -70,6 +70,19 @@ module HexaPDF
       !@reconstructed_revision.nil?
     end
+    # Returns +true+ if the PDF file is a linearized file.
+    def linearized?
+      @linearized ||=
+        begin
+          @tokenizer.pos = @header_offset
+          3.times { @tokenizer.next_token } # parse: oid gen obj
+          obj = @tokenizer.next_object
+          obj.kind_of?(Hash) && obj.key?(:Linearized)
+        rescue MalformedPDFError
+          false
+        end
+    end
     # Loads the indirect (potentially compressed) object specified by the given cross-reference
     # entry.
     #

data/lib/hexapdf/revision.rb CHANGED Viewed

@@ -229,16 +229,22 @@ module HexaPDF
     end
     # :call-seq:
-    #   revision.each_modified_object {|obj| block }   -> revision
-    #   revision.each_modified_object                  -> Enumerator
+    #   revision.each_modified_object(delete: false, all: all) {|obj| block }   -> revision
+    #   revision.each_modified_object(delete: false, all: all)                  -> Enumerator
     #
-    # Calls the given block once for each object that has been modified since it was loaded. Deleted
-    # object and cross-reference streams are ignored.
+    # Calls the given block once for each object that has been modified since it was loaded. Added
+    # or eleted object and cross-reference streams as well as signature dictionaries are ignored.
+    #
+    # +delete+:: If the +delete+ argument is set to +true+, each modified object is deleted from the
+    #            active objects.
+    #
+    # +all+:: If the +all+ argument is set to +true+, added object and cross-reference streams are
+    #         also yielded.
     #
     # Note that this also means that for revisions without an associated cross-reference section all
     # loaded objects will be yielded.
-    def each_modified_object
-      return to_enum(__method__) unless block_given?
+    def each_modified_object(delete: false, all: false)
+      return to_enum(__method__, delete: delete, all: all) unless block_given?
       @objects.each do |oid, gen, obj|
         if @xref_section.entry?(oid, gen)
@@ -259,20 +265,17 @@ module HexaPDF
             end
             next if values_unchanged && streams_are_same
           end
+        elsif !all && (obj.type == :XRef || obj.type == :ObjStm)
+          next
         end
         yield(obj)
+        @objects.delete(oid) if delete
       end
       self
     end
-    # Resets the revision by deleting all loaded and added objects from it.
-    def reset_objects
-      @objects = HexaPDF::Utils::ObjectHash.new
-      @all_objects_loaded = false
-    end
     private
     # Loads a single object from the associated cross-reference section.

data/lib/hexapdf/revisions.rb CHANGED Viewed

@@ -93,6 +93,10 @@ module HexaPDF
               seen_xref_offsets[stm] = true
             end
+            if parser.linearized? && !trailer.key?(:Prev)
+              merge_revision = offset
+            end
             if merge_revision == offset
               xref_section.merge!(revisions.first.xref_section)
               offset = trailer[:Prev] # Get possible next offset before overwriting trailer

data/lib/hexapdf/tokenizer.rb CHANGED Viewed

@@ -285,7 +285,14 @@ module HexaPDF
         tmp = val.to_i
         # Handle object references, see PDF1.7 s7.3.10
         prepare_string_scanner(10)
-        tmp = Reference.new(tmp, @ss[1].to_i) if tmp > 0 && @ss.scan(REFERENCE_RE)
+        if @ss.scan(REFERENCE_RE)
+          tmp = if tmp > 0
+                  Reference.new(tmp, @ss[1].to_i)
+                else
+                  maybe_raise("Invalid indirect object reference (#{tmp},#{@ss[1].to_i})")
+                  nil
+                end
+        end
         tmp
       elsif val.match?(/\A[+-]?(?:\d+\.\d*|\.\d+)\z/)
         val << '0' if val.getbyte(-1) == 46 # dot '.'
@@ -315,21 +322,20 @@ module HexaPDF
       parentheses = 1
       while parentheses != 0
-        data = scan_until(/([()\\\r])/)
-        char = @ss[1]
+        data = scan_until(/[()\\\r]/)
         unless data
           raise HexaPDF::MalformedPDFError.new("Unclosed literal string found", pos: pos)
         end
         str << data
         prepare_string_scanner if @ss.eos?
-        case char
-        when '(' then parentheses += 1
-        when ')' then parentheses -= 1
-        when "\r"
+        case @ss.string.getbyte(@ss.pos - 1)
+        when 41 then parentheses -= 1 # )
+        when 40 then parentheses += 1 # (
+        when 13 # \r
           str[-1] = "\n"
           @ss.pos += 1 if @ss.peek(1) == "\n"
-        when '\\'
+        when 92 # \\
           str.chop!
           byte = @ss.get_byte
           if (data = LITERAL_STRING_ESCAPE_MAP[byte])