RubyGems - hexapdf - Versions diffs - 0.14.3 → 0.15.3 - Mend

hexapdf 0.14.3 → 0.15.3

Files changed (50) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +85 -0
data/lib/hexapdf/cli/form.rb +30 -8
data/lib/hexapdf/configuration.rb +19 -4
data/lib/hexapdf/content/canvas.rb +1 -0
data/lib/hexapdf/encryption/security_handler.rb +7 -2
data/lib/hexapdf/encryption/standard_security_handler.rb +16 -0
data/lib/hexapdf/error.rb +4 -3
data/lib/hexapdf/filter.rb +1 -0
data/lib/hexapdf/filter/crypt.rb +60 -0
data/lib/hexapdf/font/type1/afm_parser.rb +2 -1
data/lib/hexapdf/parser.rb +35 -11
data/lib/hexapdf/revision.rb +16 -0
data/lib/hexapdf/serializer.rb +7 -1
data/lib/hexapdf/tokenizer.rb +22 -3
data/lib/hexapdf/type/acro_form.rb +1 -0
data/lib/hexapdf/type/acro_form/appearance_generator.rb +29 -17
data/lib/hexapdf/type/acro_form/button_field.rb +8 -4
data/lib/hexapdf/type/acro_form/field.rb +1 -0
data/lib/hexapdf/type/acro_form/form.rb +37 -0
data/lib/hexapdf/type/acro_form/signature_field.rb +223 -0
data/lib/hexapdf/type/annotation.rb +13 -9
data/lib/hexapdf/type/annotations/widget.rb +3 -1
data/lib/hexapdf/type/font_descriptor.rb +9 -2
data/lib/hexapdf/type/page.rb +81 -0
data/lib/hexapdf/type/xref_stream.rb +7 -0
data/lib/hexapdf/utils/graphics_helpers.rb +4 -4
data/lib/hexapdf/version.rb +1 -1
data/test/hexapdf/content/test_canvas.rb +21 -0
data/test/hexapdf/encryption/test_security_handler.rb +15 -0
data/test/hexapdf/encryption/test_standard_security_handler.rb +27 -0
data/test/hexapdf/filter/test_crypt.rb +21 -0
data/test/hexapdf/font/type1/test_afm_parser.rb +5 -0
data/test/hexapdf/test_parser.rb +47 -3
data/test/hexapdf/test_revision.rb +21 -0
data/test/hexapdf/test_serializer.rb +3 -0
data/test/hexapdf/test_tokenizer.rb +22 -0
data/test/hexapdf/test_writer.rb +2 -2
data/test/hexapdf/type/acro_form/test_appearance_generator.rb +21 -2
data/test/hexapdf/type/acro_form/test_button_field.rb +13 -7
data/test/hexapdf/type/acro_form/test_field.rb +5 -0
data/test/hexapdf/type/acro_form/test_form.rb +46 -2
data/test/hexapdf/type/acro_form/test_signature_field.rb +38 -0
data/test/hexapdf/type/annotations/test_widget.rb +2 -0
data/test/hexapdf/type/test_annotation.rb +20 -10
data/test/hexapdf/type/test_font_descriptor.rb +7 -0
data/test/hexapdf/type/test_page.rb +187 -49
data/test/hexapdf/type/test_xref_stream.rb +7 -0
data/test/hexapdf/utils/test_graphics_helpers.rb +8 -0
metadata +6 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c43d8e9e117db1717ddfee73a54e4384743b8aa35863ab5bd19ffe57b8ce5674
-  data.tar.gz: 1020c8a3de8fcdf201500c1c0d22dfb99ed27daebac7baac92748f8127efc992
+  metadata.gz: 592ea8ae7648df43e92ba50effdf3f8f34163e4acf7fb9567c3b38db46eb598e
+  data.tar.gz: 6c3b7d32a1499f2e2133fbafdf46b9d3cd4d1df41b9ae308c0c32ea39aefff2d
 SHA512:
-  metadata.gz: e19eea4e88077afb7e8532fa6fe9ab2a03ffc5588749b72277462a971ebcec877ee72868d0ab698744117d46566be98e65c10225649d3bd1b4cd6e64e9625767
-  data.tar.gz: 6626a9feba0af0b46f293c1069a0d53b458a0dc29d08b82253f14f9bb98a878b914042faccc433b73f2f0e35d4da47c58a1bdebd2f3dee2fefb24c076a4e6bb3
+  metadata.gz: fdf9edf53c0443d459008634ddbff7cd80fc1422fa558df41db04af0d9eeb512ea050d5b4a10987b824c675203e39bc851d1b2a68d0178f2cd12fada66b31245
+  data.tar.gz: 8e6a7b91da0ed2b63f7bc6d52c3993553f439edf986253b3508d0510310195c2a6f3721c2cfed735afc3d60dacacedc8207a6fac361bedc354bc6bd779207eac

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,88 @@
+## 0.15.3 - 2021-05-01
+### Fixed
+* Handling of general (not document-level), unencrypted metadata streams
+## 0.15.2 - 2021-05-01
+### Fixed
+* Handling of unencrypted metadata streams
+## 0.15.1 - 2021-04-15
+### Fixed
+* Potential division by zero when calculating the scaling for XObjects
+* Handling of XObjects with a width or height of zero when drawing on canvas
+## 0.15.0 - 2021-04-12
+### Added
+* [HexaPDF::Type::Page#flatten_annotations] for flattening the annotations of a
+  page
+* [HexaPDF::Type::AcroForm::Form#flatten] for flattening interactive forms
+* [HexaPDF::Revision#update] for updating the stored wrapper class of a PDF
+  object
+* [HexaPDF::Type::AcroForm::SignatureField] for working with AcroForm signature
+  fields
+* Support for form field flattening to the `hexapdf form` CLI command
+### Changed
+* **Breaking change**: Overhauled the interface for accessing appearances of
+  annotations to make it more convenient
+* Validation of [HexaPDF::Type::FontDescriptor] to delete invalid `/FontWeight`
+  value
+* [HexaPDF::MalformedPDFError#pos] an accessor instead of a reader and update
+  the exception message
+* Configuration option 'acro_form.fallback_font' to allow a callable object for
+  more advanced fallback font handling
+### Fixed
+* [HexaPDF::Type::Annotations::Widget#background_color] to correctly handle
+  empty background color arrays
+* [HexaPDF::Type::AcroForm::Field#delete_widget] to update the wrapper object
+  stored in the document in case the widget is embedded
+* Processing of invalid PDF files containing a space,CR,LF combination after
+  the 'stream' keyword
+* Cross-reference stream reconstruction with respect to detection of linearized
+  files
+* Detection of existing appearances for AcroForm push button fields when
+  creating appearances
+## 0.14.4 - 2021-02-27
+### Added
+* Support for the Crypt filters
+### Changed
+* [HexaPDF::MalformedPDFError] to make the `pos` argument optional
+### Fixed
+* Handling of invalid floating point numbers NaN, Inf and -Inf when serializing
+* Processing of invalid PDF files containing NaN and Inf instead of numbers
+* Bug in Type1 font AFM parser that occured if the file doesn't end with a new
+  line character
+* Cross-reference table reconstruction to handle the case of an entry specifying
+  a non-existent indirect object
+* Cross-reference table reconstruction to handle trailers specified by cross-
+  reference streams
+* Cross-reference table reconstruction to use the set security handle for
+  decrypting indirect objects
+* Parsing of cross-reference streams where data is missing
 ## 0.14.3 - 2021-02-16
 ### Fixed

data/lib/hexapdf/cli/form.rb CHANGED Viewed

@@ -52,18 +52,26 @@ module HexaPDF
           If the the output file name is not given, all form fields are listed in page order. Use
           the global --verbose option to show additional information like field type and location.
-          If the output file name is given, the fields can be interactively filled out. By
-          additionally using the --template option, the data for the fields is read from the given
-          template file instead of the standard input.
+          If the output file name is given, the fields can be filled out interactively, via a
+          template or just flattened by using the respective options. Form field flattening can also
+          be activated in addition to filling out the form. If neither --fill, --template nor
+          --flatten is specified, --fill is implied.
         EOF
         options.on("--password PASSWORD", "-p", String,
                    "The password for decryption. Use - for reading from standard input.") do |pwd|
           @password = (pwd == '-' ? read_password : pwd)
         end
+        options.on("--fill", "Fill out the form") do
+          @fill = true
+        end
         options.on("--template TEMPLATE_FILE", "-t TEMPLATE_FILE",
-                   "Use the template file for the field values") do |template|
+                   "Use the template file for the field values (implies --fill)") do |template|
           @template = template
+          @fill = true
+        end
+        options.on('--flatten', 'Flatten the form fields') do
+          @flatten = true
         end
         options.on("--[no-]viewer-override", "Let the PDF viewer override the visual " \
                    "appearance. Default: use setting from input PDF") do |need_appearances|
@@ -75,6 +83,8 @@ module HexaPDF
         end
         @password = nil
+        @fill = false
+        @flatten = false
         @template = nil
         @need_appearances = nil
         @incremental = true
@@ -82,16 +92,28 @@ module HexaPDF
       def execute(in_file, out_file = nil) #:nodoc:
         maybe_raise_on_existing_file(out_file) if out_file
+        if (@fill || @flatten) && !out_file
+          raise "Output file missing"
+        end
         with_document(in_file, password: @password, out_file: out_file,
                       incremental: @incremental) do |doc|
           if !doc.acro_form
             raise "This PDF doesn't contain an interactive form"
           elsif out_file
             doc.acro_form[:NeedAppearances] = @need_appearances unless @need_appearances.nil?
-            if @template
-              fill_form_with_template(doc)
-            else
-              fill_form(doc)
+            if @fill || !@flatten
+              if @template
+                fill_form_with_template(doc)
+              else
+                fill_form(doc)
+              end
+            end
+            if @flatten
+              unless doc.acro_form.flatten.empty?
+                $stderr.puts "Warning: Not all form fields could be flattened"
+                doc.catalog.delete(:AcroForm)
+                doc.delete(doc.acro_form)
+              end
             end
           else
             list_form_fields(doc)

data/lib/hexapdf/configuration.rb CHANGED Viewed

@@ -164,9 +164,20 @@ module HexaPDF
   # acro_form.fallback_font::
   #    The font that should be used when a variable text field references a font that cannot be used.
   #
-  #    Can either be the name of a font, like 'Helvetica', or an array consisting of the font name
-  #    and a hash of font options, like ['Helvetica', variant: :italic]. If set to +nil+, the use of
-  #    the fallback font is disabled.
+  #    Can be one of the following:
+  #
+  #    * The name of a font, like 'Helvetica'.
+  #
+  #    * An array consisting of the font name and a hash of font options, like ['Helvetica',
+  #      variant: :italic].
+  #
+  #    * A callable object receiving the field and the font object (or +nil+ if no valid font object
+  #      was found) and which has to return either a font name or an array consisting of the font
+  #      name and a hash of font options. This way the response can be different depending on the
+  #      original font and it would also allow e.g. modifying the configured fonts to add custom
+  #      ones.
+  #
+  #    If set to +nil+, the use of the fallback font is disabled.
   #
   #    Default is 'Helvetica'.
   #
@@ -393,7 +404,7 @@ module HexaPDF
                         DCTDecode: 'HexaPDF::Filter::PassThrough',
                         DCT: 'HexaPDF::Filter::PassThrough',
                         JPXDecode: 'HexaPDF::Filter::PassThrough',
-                        Crypt: nil,
+                        Crypt: 'HexaPDF::Filter::Crypt',
                         Encryption: 'HexaPDF::Filter::Encryption',
                       },
                       'font.map' => {},
@@ -516,6 +527,9 @@ module HexaPDF
                         XXAcroFormField: 'HexaPDF::Type::AcroForm::Field',
                         XXAppearanceDictionary: 'HexaPDF::Type::Annotation::AppearanceDictionary',
                         Border: 'HexaPDF::Type::Annotation::Border',
+                        SigFieldLock: 'HexaPDF::Type::AcroForm::SignatureField::LockDictionary',
+                        SV: 'HexaPDF::Type::AcroForm::SignatureField::SeedValueDictionary',
+                        SVCert: 'HexaPDF::Type::AcroForm::SignatureField::CertificateSeedValueDictionary',
                       },
                       'object.subtype_map' => {
                         nil => {
@@ -561,6 +575,7 @@ module HexaPDF
                           Tx: 'HexaPDF::Type::AcroForm::TextField',
                           Btn: 'HexaPDF::Type::AcroForm::ButtonField',
                           Ch: 'HexaPDF::Type::AcroForm::ChoiceField',
+                          Sig: 'HexaPDF::Type::AcroForm::SignatureField',
                         },
                       })

data/lib/hexapdf/content/canvas.rb CHANGED Viewed

@@ -1260,6 +1260,7 @@ module HexaPDF
         unless obj.kind_of?(HexaPDF::Stream)
           obj = context.document.images.add(obj)
         end
+        return obj if obj.width == 0 || obj.height == 0
         width, height = calculate_dimensions(obj.width, obj.height,
                                              rwidth: width, rheight: height)

data/lib/hexapdf/encryption/security_handler.rb CHANGED Viewed

@@ -268,7 +268,7 @@ module HexaPDF
           str.replace(string_algorithm.decrypt(key, str))
         end
-        if obj.kind_of?(HexaPDF::Stream)
+        if obj.kind_of?(HexaPDF::Stream) && obj.raw_stream.filter[0] != :Crypt
           unless string_algorithm == stream_algorithm
             key = object_key(obj.oid, obj.gen, stream_algorithm)
           end
@@ -300,7 +300,12 @@ module HexaPDF
             obj.raw_stream.key == key && obj.raw_stream.algorithm == stream_algorithm
           obj.raw_stream.undecrypted_fiber
         else
-          stream_algorithm.encryption_fiber(key, result)
+          filter = obj[:Filter]
+          if filter == :Crypt || (filter.kind_of?(PDFArray) && filter[0] == :Crypt)
+            result
+          else
+            stream_algorithm.encryption_fiber(key, result)
+          end
         end
       end

data/lib/hexapdf/encryption/standard_security_handler.rb CHANGED Viewed

@@ -240,6 +240,22 @@ module HexaPDF
         end
       end
+      def decrypt(obj) #:nodoc:
+        if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
+          obj
+        else
+          super
+        end
+      end
+      def encrypt_stream(obj) #:nodoc
+        if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
+          obj.stream_encoder
+        else
+          super
+        end
+      end
       private
       # Prepares the security handler for use in encrypting the document.

data/lib/hexapdf/error.rb CHANGED Viewed

@@ -43,12 +43,13 @@ module HexaPDF
   class MalformedPDFError < Error
     # The byte position in the PDF file where the error occured.
-    attr_reader :pos
+    attr_accessor :pos
     # Creates a new malformed PDF error object for the given exception message.
     #
-    # The byte position where the error occured has to be given via the +pos+ argument.
-    def initialize(message, pos:)
+    # The byte position where the error occured can either be given via the +pos+ argument or later
+    # via the #pos accessor but must be set before the exception message is retrieved.
+    def initialize(message, pos: nil)
       super(message)
       @pos = pos
     end

data/lib/hexapdf/filter.rb CHANGED Viewed

@@ -95,6 +95,7 @@ module HexaPDF
     autoload(:Predictor, 'hexapdf/filter/predictor')
     autoload(:Encryption, 'hexapdf/filter/encryption')
+    autoload(:Crypt, 'hexapdf/filter/crypt')
     autoload(:PassThrough, 'hexapdf/filter/pass_through')

data/lib/hexapdf/filter/crypt.rb ADDED Viewed

@@ -0,0 +1,60 @@
+# -*- encoding: utf-8; frozen_string_literal: true -*-
+#
+#--
+# This file is part of HexaPDF.
+#
+# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
+# Copyright (C) 2014-2020 Thomas Leitner
+#
+# HexaPDF is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License version 3 as
+# published by the Free Software Foundation with the addition of the
+# following permission added to Section 15 as permitted in Section 7(a):
+# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
+# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
+# INFRINGEMENT OF THIRD PARTY RIGHTS.
+#
+# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
+#
+# The interactive user interfaces in modified source and object code
+# versions of HexaPDF must display Appropriate Legal Notices, as required
+# under Section 5 of the GNU Affero General Public License version 3.
+#
+# In accordance with Section 7(b) of the GNU Affero General Public
+# License, a covered work must retain the producer line in every PDF that
+# is created or manipulated using HexaPDF.
+#
+# If the GNU Affero General Public License doesn't fit your need,
+# commercial licenses are available at <https://gettalong.at/hexapdf/>.
+#++
+require 'hexapdf/error'
+module HexaPDF
+  module Filter
+    # This filter module implements the Crypt filter. The only supported part is using the Identity
+    # filter.
+    module Crypt
+      # See HexaPDF::Filter
+      def self.decoder(source, options)
+        if !options || !options.key?(:Name) || options[:Name] == :Identity
+          source
+        else
+          raise FilterError, "Handling of Crypt filters besides Identity is not implemented"
+        end
+      end
+      singleton_class.send(:alias_method, :encoder, :decoder)
+    end
+  end
+end

data/lib/hexapdf/font/type1/afm_parser.rb CHANGED Viewed

@@ -207,7 +207,8 @@ module HexaPDF
         # Returns the rest of the line, with whitespace stripped.
         def parse_string
-          line = @line.strip!
+          @line.strip!
+          line = @line
           @line = ''
           line
         end

data/lib/hexapdf/parser.rb CHANGED Viewed

@@ -56,10 +56,12 @@ module HexaPDF
     # PDF references are resolved using the associated Document object.
     def initialize(io, document)
       @io = io
-      @tokenizer = Tokenizer.new(io)
+      on_correctable_error = document.config['parser.on_correctable_error'].curry[document]
+      @tokenizer = Tokenizer.new(io, on_correctable_error: on_correctable_error)
       @document = document
       @object_stream_data = {}
       @reconstructed_revision = nil
+      @in_reconstruct_revision = false
       retrieve_pdf_header_offset_and_version
     end
@@ -94,7 +96,8 @@ module HexaPDF
       @document.wrap(obj, oid: oid, gen: gen, stream: stream)
     rescue HexaPDF::MalformedPDFError
-      reconstructed_revision.object(xref_entry)
+      reconstructed_revision.object(xref_entry) ||
+        @document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
     end
     # Parses the indirect object at the specified offset.
@@ -137,11 +140,13 @@ module HexaPDF
           raise_malformed("A stream needs a dictionary, not a(n) #{object.class}", pos: offset)
         end
         tok1 = @tokenizer.next_byte
-        tok2 = @tokenizer.next_byte if tok1 == 13 # 13=CR, 10=LF
+        if tok1 == 32 # space
+          maybe_raise("Keyword stream followed by space instead of LF or CR/LF", pos: @tokenizer.pos)
+          tok1 = @tokenizer.next_byte
+        end
+        tok2 = @tokenizer.next_byte if tok1 == 13 # CR
         if tok1 != 10 && tok1 != 13
-          tok2 = @tokenizer.next_byte
-          maybe_raise("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos,
-                      force: tok1 != 32 || (tok2 != 10 && tok2 != 13)) # 32=space
+          raise_malformed("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos)
         elsif tok1 == 13 && tok2 != 10
           maybe_raise("Keyword stream must be followed by LF or CR/LF, not CR alone",
                       pos: @tokenizer.pos)
@@ -211,7 +216,12 @@ module HexaPDF
         unless obj.respond_to?(:xref_section)
           raise_malformed("Object is not a cross-reference stream", pos: pos)
         end
-        xref_section = obj.xref_section
+        begin
+          xref_section = obj.xref_section
+        rescue MalformedPDFError => e
+          e.pos = pos
+          raise
+        end
         trailer = obj.trailer
         unless xref_section.entry?(obj.oid, obj.gen)
           maybe_raise("Cross-reference stream doesn't contain entry for itself", pos: pos)
@@ -389,12 +399,16 @@ module HexaPDF
     # If the file contains multiple cross-reference sections, all objects will be put into a single
     # cross-reference table, later objects overwriting prior ones.
     def reconstruct_revision
+      return if @in_reconstruct_revision
+      @in_reconstruct_revision = true
       raise unless @document.config['parser.try_xref_reconstruction']
       msg = "#{$!} - trying cross-reference table reconstruction"
       @document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
       xref = XRefSection.new
       @tokenizer.pos = 0
+      linearized = nil
       while true
         @tokenizer.skip_whitespace
         pos = @tokenizer.pos
@@ -410,13 +424,17 @@ module HexaPDF
             @tokenizer.pos = next_new_line_pos
           elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
             xref.add_in_use_entry(token, gen, pos)
+            if linearized.nil?
+              obj = @tokenizer.next_object rescue nil
+              linearized = obj.kind_of?(Hash) && obj.key?(:Linearized)
+            end
             @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
           end
         elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
           obj = @tokenizer.next_object rescue nil
           # Use last trailer found in case of multiple revisions but use first trailer in case of
           # linearized file.
-          trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
+          trailer = obj if obj.kind_of?(Hash) && (!linearized || trailer.nil?)
         elsif token == Tokenizer::NO_MORE_TOKENS
           break
         else
@@ -424,16 +442,22 @@ module HexaPDF
         end
       end
-      trailer&.delete(:Prev) # no need for this and may wreak havoc
       if !trailer || trailer.empty?
-        raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
+        _, trailer = load_revision(startxref_offset) rescue nil
+        unless trailer
+          @in_reconstruct_revision = false
+          raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
+        end
       end
+      trailer&.delete(:Prev) # no need for this and may wreak havoc
       loader = lambda do |xref_entry|
         obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
-        @document.wrap(obj, oid: oid, gen: gen, stream: stream)
+        obj = @document.wrap(obj, oid: oid, gen: gen, stream: stream)
+        @document.security_handler ? @document.security_handler.decrypt(obj) : obj
       end
+      @in_reconstruct_revision = false
       Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
                    loader: loader)
     end