RubyGems - hexapdf - Versions diffs - 0.18.0 → 0.19.3 - Mend

hexapdf 0.18.0 → 0.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +46 -2
data/lib/hexapdf/cli/command.rb +7 -1
data/lib/hexapdf/content/canvas.rb +2 -2
data/lib/hexapdf/content/graphics_state.rb +167 -25
data/lib/hexapdf/dictionary_fields.rb +1 -1
data/lib/hexapdf/encryption/standard_security_handler.rb +1 -2
data/lib/hexapdf/layout/style.rb +2 -1
data/lib/hexapdf/parser.rb +21 -9
data/lib/hexapdf/task/optimize.rb +46 -3
data/lib/hexapdf/type/font.rb +5 -0
data/lib/hexapdf/type/font_type3.rb +20 -0
data/lib/hexapdf/version.rb +1 -1
data/lib/hexapdf/writer.rb +8 -2
data/test/hexapdf/content/test_graphics_state.rb +9 -1
data/test/hexapdf/content/test_operator.rb +8 -3
data/test/hexapdf/encryption/test_standard_security_handler.rb +8 -6
data/test/hexapdf/layout/test_style.rb +11 -0
data/test/hexapdf/task/test_optimize.rb +26 -0
data/test/hexapdf/test_dictionary_fields.rb +1 -0
data/test/hexapdf/test_parser.rb +14 -0
data/test/hexapdf/test_writer.rb +42 -13
data/test/hexapdf/type/test_font.rb +4 -0
data/test/hexapdf/type/test_font_type3.rb +16 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d95ce1575c017f44b2c0f96e7e5a927b8c4f8c3adf6aa0f3a7dc983c5dfa77a8
-  data.tar.gz: e49a23655e5ce4f4ded50c5ac0c90d7892c41bd526dbdfe72ad85cca4891098b
+  metadata.gz: 85c063a63af9729acc10a54ef53fba69d73b75f1c06ff0e6de246df920e17dd9
+  data.tar.gz: dcea10d0ccfe66282c92e6bb41c1d57927d525e1618753d598a910546c8a8e82
 SHA512:
-  metadata.gz: 37e3b09a059bb7875c797f50f60642b080f20a081fa2540f74837ac51a8cb9c1aa5b93e6502dd9f242d178f68ea654c0619b2e55d4cb934e36badca4a5057d1c
-  data.tar.gz: cd9fc830b8b4f5387478d7e6c32260672a9332bb2faf3aa7afcb4d71bf7c39481616f7f33d731229a6099e44dac84bbecd651c3618593f6c7ba872b9e958a3cb
+  metadata.gz: 4f1d468375c4ce336e55a09d897de9a8c95babcfa1b4441cc04a9dc712435c64906016647baa030f5695f9434d18214c30bca746f245b53a69321139f63256b9
+  data.tar.gz: 1f1895ca7ad46bae1bf33790d4c8daa7f72adfcc00cc26e1611b7ece64d7a5a76e7f1e06be4022bb58f5dbd84154e9846cb4feb94f727c9abe99f5b7c8b87fcf

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,47 @@
+## 0.19.3 - 2021-12-14
+### Fixed
+* Handling of invalid files where the "startxref" keyword and its value are on
+  the same line
+## 0.19.2 - 2021-12-14
+### Fixed
+* Set the trailer's ID field to an array of two empty strings when decrypting in
+  case it is missing
+* Incremental writing when one of the existing revisions contains a
+  cross-reference stream
+## 0.19.1 - 2021-12-12
+### Added
+* [HexaPDF::Type::FontType3#bounding_box] to fix content stream processing error
+### Fixed
+* Calculation of scaled font size for [HexaPDF::Content::GraphicsState] and
+  [HexaPDF::Layout::Style] when Type3 fonts are used
+## 0.19.0 - 2021-11-24
+### Added
+* Page resource pruning to the optimization task
+* An option for page resources pruning to the optimization options of the
+  `hexapdf` command
+### Fixed
+* Handling of invalid date strings with a minute time zone offset greater than
+  59
 ## 0.18.0 - 2021-11-04
 ### Added
@@ -6,7 +50,7 @@
   device colors in parts other than the canvas
 * [HexaPDF::Type::AcroForm::VariableTextField::create_appearance_string] for
   centralized creation of appearance strings
-* [HexaPDF::Object.make_direct] for making objects and all parts of them direct
+* [HexaPDF::Object::make_direct] for making objects and all parts of them direct
   instead of indirect
 ### Changed
@@ -26,7 +70,7 @@
   dictionary are indirect objects
 * [HexaPDF::Content::GraphicObject::EndpointArc] to correctly determine the
   start and end points
-* [HexaPDF::Dictionary#perform_validation] to correctly handle objects that
+* HexaPDF::Dictionary#perform_validation to correctly handle objects that
   should not be indirect objects

data/lib/hexapdf/cli/command.rb CHANGED Viewed

@@ -66,6 +66,7 @@ module HexaPDF
         @out_options.xref_streams = :preserve
         @out_options.streams = :preserve
         @out_options.optimize_fonts = false
+        @out_options.prune_page_resources = false
         @out_options.encryption = :preserve
         @out_options.enc_user_pwd = @out_options.enc_owner_pwd = nil
@@ -169,6 +170,10 @@ module HexaPDF
                    "time; default: #{@out_options.compress_pages})") do |c|
           @out_options.compress_pages = c
         end
+        options.on("--[no-]prune-page-resources", "Prunes unused objects from the page resources " \
+                   "(may take a long time; default: #{@out_options.prune_page_resources})") do |c|
+          @out_options.prune_page_resources = c
+        end
         options.on("--[no-]optimize-fonts", "Optimize embedded font files; " \
                    "default: #{@out_options.optimize_fonts})") do |o|
           @out_options.optimize_fonts = o
@@ -236,7 +241,8 @@ module HexaPDF
         doc.task(:optimize, compact: @out_options.compact,
                  object_streams: @out_options.object_streams,
                  xref_streams: @out_options.xref_streams,
-                 compress_pages: @out_options.compress_pages)
+                 compress_pages: @out_options.compress_pages,
+                 prune_page_resources: @out_options.prune_page_resources)
         if @out_options.streams != :preserve || @out_options.optimize_fonts
           doc.each(only_current: false) do |obj|
             optimize_stream(obj)

data/lib/hexapdf/content/canvas.rb CHANGED Viewed

@@ -589,7 +589,7 @@ module HexaPDF
       #
       # The line cap style specifies how the ends of stroked open paths should look like.
       #
-      # The +style+ parameter can be one of:
+      # The +style+ parameter can be one of (also see LineCapStyle):
       #
       # :butt or 0::
       #     Stroke is squared off at the endpoint of a path.
@@ -641,7 +641,7 @@ module HexaPDF
       #
       # The line join style specifies the shape that is used at the corners of stroked paths.
       #
-      # The +style+ parameter can be one of:
+      # The +style+ parameter can be one of (also see LineJoinStyle):
       #
       # :miter or 0::
       #     The outer lines of the two segments continue until the meet at an angle.

data/lib/hexapdf/content/graphics_state.rb CHANGED Viewed

@@ -73,7 +73,7 @@ module HexaPDF
     end
     # Defines all available line cap styles as constants. Each line cap style is an instance of
-    # NamedValue. For use with Content::GraphicsState#line_cap_style.
+    # NamedValue, see ::normalize. For use with e.g. Content::Canvas#line_cap_style.
     #
     # See: PDF1.7 s8.4.3.3
     module LineCapStyle
@@ -95,18 +95,39 @@ module HexaPDF
       end
       # Stroke is squared off at the endpoint of a path.
+      #
+      # Specify as 0 or :butt.
+      #
+      #   #>pdf-small-hide
+      #   canvas.line_cap_style(:butt)
+      #   canvas.line_width(10).line(50, 20, 50, 80).stroke
+      #   canvas.stroke_color("white").line_width(1).line(50, 20, 50, 80).stroke
       BUTT_CAP = NamedValue.new(:butt, 0)
       # A semicircular arc is drawn at the endpoint of a path.
+      #
+      # Specify as 1 or :round.
+      #
+      #   #>pdf-small-hide
+      #   canvas.line_cap_style(:round)
+      #   canvas.line_width(10).line(50, 20, 50, 80).stroke
+      #   canvas.stroke_color("white").line_width(1).line(50, 20, 50, 80).stroke
       ROUND_CAP = NamedValue.new(:round, 1)
       # The stroke continues half the line width beyond the endpoint of a path.
+      #
+      # Specify as 2 or :projecting_square.
+      #
+      #   #>pdf-small-hide
+      #   canvas.line_cap_style(:projecting_square)
+      #   canvas.line_width(10).line(50, 20, 50, 80).stroke
+      #   canvas.stroke_color("white").line_width(1).line(50, 20, 50, 80).stroke
       PROJECTING_SQUARE_CAP = NamedValue.new(:projecting_square, 2)
     end
     # Defines all available line join styles as constants. Each line join style is an instance of
-    # NamedValue. For use with Content::GraphicsState#line_join_style.
+    # NamedValue, see ::normalize For use with e.g. Content::Canvas#line_join_style.
     #
     # See: PDF1.7 s8.4.3.4
     module LineJoinStyle
@@ -127,20 +148,47 @@ module HexaPDF
         end
       end
-      # The outer lines of the two segments continue until the meet at an angle.
+      # The outer lines of the two segments continue until they meet at an angle.
+      #
+      # Specify as 0 or :miter.
+      #
+      #   #>pdf-small-hide
+      #   canvas.line_join_style(:miter)
+      #   canvas.line_width(10).
+      #     polyline(20, 20, 50, 80, 80, 20).stroke
+      #   canvas.stroke_color("white").line_width(1).line_join_style(:bevel).
+      #     polyline(20, 20, 50, 80, 80, 20).stroke
       MITER_JOIN = NamedValue.new(:miter, 0)
       # An arc of a circle is drawn around the point where the segments meet.
+      #
+      # Specify as 1 or :round.
+      #
+      #   #>pdf-small-hide
+      #   canvas.line_join_style(:round)
+      #   canvas.line_width(10).
+      #     polyline(20, 20, 50, 80, 80, 20).stroke
+      #   canvas.stroke_color("white").line_width(1).line_join_style(:bevel).
+      #     polyline(20, 20, 50, 80, 80, 20).stroke
       ROUND_JOIN = NamedValue.new(:round, 1)
-      # The two segments are finished with butt caps and the space between the ends is filled with
-      # a triangle.
+      # The two segments are finished with butt caps and the space between the ends is filled with a
+      # triangle.
+      #
+      # Specify as 2 or :bevel.
+      #
+      #   #>pdf-small-hide
+      #   canvas.line_join_style(:bevel)
+      #   canvas.line_width(10).
+      #     polyline(20, 20, 50, 80, 80, 20).stroke
+      #   canvas.stroke_color("white").line_width(1).line_join_style(:bevel).
+      #     polyline(20, 20, 50, 80, 80, 20).stroke
       BEVEL_JOIN = NamedValue.new(:bevel, 2)
     end
-    # The line dash pattern defines how a line should be dashed. For use with
-    # Content::GraphicsState#line_dash_pattern.
+    # The line dash pattern defines how a line should be dashed. For use with e.g.
+    # Content::Canvas#line_dash_pattern.
     #
     # A dash pattern consists of two parts: the dash array and the dash phase. The dash array
     # defines the length of alternating dashes and gaps (important: starting with dashes). And the
@@ -159,6 +207,12 @@ module HexaPDF
     # See: PDF1.7 s8.4.3.6
     class LineDashPattern
+      # :call-seq:
+      #   LineDashPattern.normalize(line_dash_pattern)         -> line_dash_pattern
+      #   LineDashPattern.normalize(array, phase = 0)          -> LineDashPattern.new(array, phase)
+      #   LineDashPattern.normalize(number, phase = 0)         -> LineDashPattern.new([number], phase)
+      #   LineDashPattern.normalize(0)                         -> LineDashPattern.new
+      #
       # Returns the arguments normalized to a valid LineDashPattern instance.
       #
       # If +array+ is 0, the default line dash pattern representing a solid line will be used. If it
@@ -206,8 +260,8 @@ module HexaPDF
     end
-    # Defines all available rendering intents as constants. For use with
-    # Content::GraphicsState#rendering_intent.
+    # Defines all available rendering intents as constants. For use with e.g.
+    # Content::Canvas#rendering_intent.
     #
     # See: PDF1.7 s8.6.5.8
     module RenderingIntent
@@ -241,7 +295,7 @@ module HexaPDF
     end
     # Defines all available text rendering modes as constants. Each text rendering mode is an
-    # instance of NamedValue. For use with Content::GraphicsState#text_rendering_mode.
+    # instance of NamedValue. For use with e.g. Content::Canvas#text_rendering_mode.
     #
     # See: PDF1.7 s9.3.6
     module TextRenderingMode
@@ -272,28 +326,97 @@ module HexaPDF
         end
       end
-      # Fill text
+      # Fill text.
+      #
+      # Specify as 0 or :fill.
+      #
+      #   #>pdf-small-hide
+      #   canvas.font("Helvetica", size: 13)
+      #   canvas.stroke_color("green").line_width(0.5)
+      #   canvas.text_rendering_mode(:fill)
+      #   canvas.text("#{canvas.text_rendering_mode.name}", at: [10, 50])
       FILL = NamedValue.new(:fill, 0)
-      # Stroke text
+      # Stroke text.
+      #
+      # Specify as 1 or :stroke.
+      #
+      #   #>pdf-small-hide
+      #   canvas.font("Helvetica", size: 13)
+      #   canvas.stroke_color("green").line_width(0.5)
+      #   canvas.text_rendering_mode(:stroke)
+      #   canvas.text("#{canvas.text_rendering_mode.name}", at: [10, 50])
       STROKE = NamedValue.new(:stroke, 1)
-      # Fill, then stroke text
+      # Fill, then stroke text.
+      #
+      # Specify as 2 or :fill_stroke.
+      #
+      #   #>pdf-small-hide
+      #   canvas.font("Helvetica", size: 13)
+      #   canvas.stroke_color("green").line_width(0.5)
+      #   canvas.text_rendering_mode(:fill_stroke)
+      #   canvas.text("#{canvas.text_rendering_mode.name}", at: [10, 50])
       FILL_STROKE = NamedValue.new(:fill_stroke, 2)
-      # Neither fill nor stroke text (invisible)
+      # Neither fill nor stroke text (invisible).
+      #
+      # Specify as 3 or :invisible.
+      #
+      #   #>pdf-small-hide
+      #   canvas.font("Helvetica", size: 13)
+      #   canvas.stroke_color("green").line_width(0.5)
+      #   canvas.text_rendering_mode(:invisible)
+      #   canvas.text("#{canvas.text_rendering_mode.name}", at: [10, 50])
+      #   canvas.stroke_color("red").line_width(20).line(30, 20, 30, 80).stroke
       INVISIBLE = NamedValue.new(:invisible, 3)
-      # Fill text and add to path for clipping
+      # Fill text and add to path for clipping.
+      #
+      # Specify as 4 or :fill_clip.
+      #
+      #   #>pdf-small-hide
+      #   canvas.font("Helvetica", size: 13)
+      #   canvas.stroke_color("green").line_width(0.5)
+      #   canvas.text_rendering_mode(:fill_clip)
+      #   canvas.text("#{canvas.text_rendering_mode.name}", at: [10, 50])
+      #   canvas.stroke_color("red").line_width(20).line(30, 20, 30, 80).stroke
       FILL_CLIP = NamedValue.new(:fill_clip, 4)
-      # Stroke text and add to path for clipping
+      # Stroke text and add to path for clipping.
+      #
+      # Specify as 5 or :stroke_clip.
+      #
+      #   #>pdf-small-hide
+      #   canvas.font("Helvetica", size: 13)
+      #   canvas.stroke_color("green").line_width(0.5)
+      #   canvas.text_rendering_mode(:stroke_clip)
+      #   canvas.text("#{canvas.text_rendering_mode.name}", at: [10, 50])
+      #   canvas.stroke_color("red").line_width(20).line(30, 20, 30, 80).stroke
       STROKE_CLIP = NamedValue.new(:stroke_clip, 5)
-      # Fill, then stroke text and add to path for clipping
+      # Fill, then stroke text and add to path for clipping.
+      #
+      # Specify as 6 or :fill_stroke_clip.
+      #
+      #   #>pdf-small-hide
+      #   canvas.font("Helvetica", size: 13)
+      #   canvas.stroke_color("green").line_width(0.5)
+      #   canvas.text_rendering_mode(:fill_stroke_clip)
+      #   canvas.text("#{canvas.text_rendering_mode.name}", at: [10, 50])
+      #   canvas.stroke_color("red").line_width(20).line(30, 20, 30, 80).stroke
       FILL_STROKE_CLIP = NamedValue.new(:fill_stroke_clip, 6)
-      # Add text to path for clipping
+      # Add text to path for clipping.
+      #
+      # Specify as 7 or :clip.
+      #
+      #   #>pdf-small-hide
+      #   canvas.font("Helvetica", size: 13)
+      #   canvas.stroke_color("green").line_width(0.5)
+      #   canvas.text_rendering_mode(:clip)
+      #   canvas.text("#{canvas.text_rendering_mode.name}", at: [10, 50])
+      #   canvas.stroke_color("red").line_width(20).line(30, 20, 30, 80).stroke
       CLIP = NamedValue.new(:clip, 7)
     end
@@ -389,7 +512,7 @@ module HexaPDF
       attr_accessor :leading
       # The font for the text.
-      attr_accessor :font
+      attr_reader :font
       # The font size.
       attr_reader :font_size
@@ -415,23 +538,25 @@ module HexaPDF
       # The scaled character spacing used in glyph displacement calculations.
       #
-      # This returns the value T_c multiplied by #scaled_horizontal_scaling.
+      # This returns the character spacing multiplied by #scaled_horizontal_scaling.
       #
       # See PDF1.7 s9.4.4
       attr_reader :scaled_character_spacing
       # The scaled word spacing used in glyph displacement calculations.
       #
-      # This returns the value T_w  multiplied by #scaled_horizontal_scaling.
+      # This returns the word spacing  multiplied by #scaled_horizontal_scaling.
       #
       # See PDF1.7 s9.4.4
       attr_reader :scaled_word_spacing
       # The scaled font size used in glyph displacement calculations.
       #
-      # This returns the value T_fs / 1000 multiplied by #scaled_horizontal_scaling.
+      # This returns the font size multiplied by the scaling factor from glyph space to text space
+      # (0.001 for all fonts except Type3 fonts or the scaling specified in /FontMatrix for Type3
+      # fonts) and multiplied by #scaled_horizontal_scaling.
       #
-      # See PDF1.7 s9.4.4
+      # See PDF1.7 s9.4.4, HexaPDF::Type::FontType3
       attr_reader :scaled_font_size
       # The scaled horizontal scaling used in glyph displacement calculations.
@@ -542,6 +667,15 @@ module HexaPDF
         self.fill_color = color_space.default_color
       end
+      ##
+      # :attr_writer: font
+      #
+      # Sets the font and updates the glyph space to text space scaling.
+      def font=(font)
+        @font = font
+        update_scaled_font_size
+      end
       ##
       # :attr_writer: character_spacing
       #
@@ -566,7 +700,7 @@ module HexaPDF
       # Sets the font size and updates the scaled font size.
       def font_size=(size)
         @font_size = size
-        @scaled_font_size = size / 1000.0 * @scaled_horizontal_scaling
+        update_scaled_font_size
       end
       ##
@@ -579,7 +713,15 @@ module HexaPDF
         @scaled_horizontal_scaling = scaling / 100.0
         @scaled_character_spacing = @character_spacing * @scaled_horizontal_scaling
         @scaled_word_spacing = @word_spacing * @scaled_horizontal_scaling
-        @scaled_font_size = @font_size / 1000.0 * @scaled_horizontal_scaling
+        update_scaled_font_size
+      end
+      private
+      # Updates the cached value for the scaled font size.
+      def update_scaled_font_size
+        @scaled_font_size = @font_size * (@font&.glyph_scaling_factor || 0.001) *
+          @scaled_horizontal_scaling
       end
     end

data/lib/hexapdf/dictionary_fields.rb CHANGED Viewed

@@ -293,7 +293,7 @@ module HexaPDF
       end
       # :nodoc:
-      DATE_RE = /\AD:(\d{4})(\d\d)?(\d\d)?(\d\d)?(\d\d)?(\d\d)?([Z+-])?(?:(\d\d)(?:'|'(\d\d)'?|\z)?)?\z/n
+      DATE_RE = /\AD:(\d{4})(\d\d)?(\d\d)?(\d\d)?(\d\d)?(\d\d)?([Z+-])?(?:(\d\d)(?:'|'([0-5]\d)'?|\z)?)?\z/n
       # Checks if the given object is a string and converts into a Time object if possible.
       # Otherwise returns +nil+.

data/lib/hexapdf/encryption/standard_security_handler.rb CHANGED Viewed

@@ -328,8 +328,7 @@ module HexaPDF
           raise(HexaPDF::UnsupportedEncryptionError,
                 "Invalid /R value for standard security handler")
         elsif dict[:R] <= 4 && !document.trailer[:ID].kind_of?(PDFArray)
-          raise(HexaPDF::EncryptionError,
-                "Document ID for needed for decryption")
+          document.trailer[:ID] = ['', '']
         end
         @trailer_id_hash = trailer_id_hash

data/lib/hexapdf/layout/style.rb CHANGED Viewed

@@ -1069,7 +1069,8 @@ module HexaPDF
       # The font size scaled appropriately.
       def scaled_font_size
-        @scaled_font_size ||= calculated_font_size / 1000.0 * scaled_horizontal_scaling
+        @scaled_font_size ||= calculated_font_size * font.pdf_object.glyph_scaling_factor *
+          scaled_horizontal_scaling
       end
       # The character spacing scaled appropriately.

data/lib/hexapdf/parser.rb CHANGED Viewed

@@ -62,9 +62,15 @@ module HexaPDF
       @object_stream_data = {}
       @reconstructed_revision = nil
       @in_reconstruct_revision = false
+      @contains_xref_streams = false
       retrieve_pdf_header_offset_and_version
     end
+    # Returns +true+ if the PDF file contains cross-reference streams.
+    def contains_xref_streams?
+      @contains_xref_streams
+    end
     # Loads the indirect (potentially compressed) object specified by the given cross-reference
     # entry.
     #
@@ -230,6 +236,7 @@ module HexaPDF
           maybe_raise("Cross-reference stream doesn't contain entry for itself", pos: pos)
           xref_section.add_in_use_entry(obj.oid, obj.gen, pos)
         end
+        @contains_xref_streams = true
       end
       xref_section.delete(0)
       [xref_section, trailer]
@@ -335,7 +342,8 @@ module HexaPDF
       step_size = 1024
       pos = @io.pos
       eof_not_found = pos == 0
-      startxref_missing = false
+      startxref_missing = startxref_mangled = false
+      startxref_offset = nil
       while pos != 0
         @io.pos = [pos - step_size, 0].max
@@ -343,27 +351,31 @@ module HexaPDF
         lines = @io.read(step_size + 40).split(/[\r\n]+/)
         eof_index = lines.rindex {|l| l.strip == '%%EOF' }
-        unless eof_index
+        if !eof_index
           eof_not_found = true
-          next
-        end
-        unless eof_index >= 2 && lines[eof_index - 2].strip == "startxref"
+        elsif lines[eof_index - 1].strip =~ /\Astartxref\s(\d+)\z/
+          startxref_offset = $1.to_i
+          startxref_mangled = true
+          break # we found it even if it the syntax is not entirely correct
+        elsif eof_index < 2 || lines[eof_index - 2].strip != "startxref"
           startxref_missing = true
-          next
+        else
+          startxref_offset = lines[eof_index - 1].to_i
+          break # we found it
         end
-        break # we found the startxref offset
       end
       if eof_not_found
         maybe_raise("PDF file trailer with end-of-file marker not found", pos: pos,
                     force: !eof_index)
+      elsif startxref_mangled
+        maybe_raise("PDF file trailer keyword startxref on same line as value", pos: pos)
       elsif startxref_missing
         maybe_raise("PDF file trailer is missing startxref keyword", pos: pos,
                     force: eof_index < 2 || lines[eof_index - 2].strip != "startxref")
       end
-      @startxref_offset = lines[eof_index - 1].to_i
+      @startxref_offset = startxref_offset
     end
     # Returns the reconstructed revision.

data/lib/hexapdf/task/optimize.rb CHANGED Viewed

@@ -72,8 +72,19 @@ module HexaPDF
       #   Compresses the content streams of all pages if set to +true+. Note that this can take a
       #   *very* long time because each content stream has to be unfiltered, parsed, serialized
       #   and then filtered again.
+      #
+      # prune_page_resources::
+      #   Removes all unused XObjects from the resources dictionaries of all pages. It is
+      #   recommended to also set the +compact+ argument because otherwise the unused XObjects won't
+      #   be deleted from the document.
+      #
+      #   This is sometimes necessary after importing pages from other PDF files that use a single
+      #   resources dictionary for all pages.
       def self.call(doc, compact: false, object_streams: :preserve, xref_streams: :preserve,
-                    compress_pages: false)
+                    compress_pages: false, prune_page_resources: false)
+        used_refs = compress_pages(doc) if compress_pages
+        prune_page_resources(doc, used_refs) if prune_page_resources
         if compact
           compact(doc, object_streams, xref_streams)
         elsif object_streams != :preserve
@@ -83,8 +94,6 @@ module HexaPDF
         else
           doc.each(only_current: false, &method(:delete_fields_with_defaults))
         end
-        compress_pages(doc) if compress_pages
       end
       # Compacts the document by merging all revisions into one, deleting null and unused entries
@@ -214,12 +223,41 @@ module HexaPDF
       # Compresses the contents of all pages by parsing and then serializing again. The HexaPDF
       # serializer is already optimized for small output size so nothing else needs to be done.
+      #
+      # Returns a hash of the form key=>true where the keys are the used XObjects (for use with
+      # #prune_page_resources).
       def self.compress_pages(doc)
+        used_refs = {}
         doc.pages.each do |page|
           processor = SerializationProcessor.new
           HexaPDF::Content::Parser.parse(page.contents, processor)
           page.contents = processor.result
           page[:Contents].set_filter(:FlateDecode)
+          xobjects = page.resources[:XObject]
+          processor.used_references.each {|ref| used_refs[xobjects[ref]] = true }
+        end
+        used_refs
+      end
+      # Deletes all XObject entries from the resources dictionaries of all pages whose names do not
+      # match the keys in +used_refs+.
+      def self.prune_page_resources(doc, used_refs)
+        unless used_refs
+          used_refs = {}
+          doc.pages.each do |page|
+            xobjects = page.resources[:XObject]
+            HexaPDF::Content::Parser.parse(page.contents) do |op, operands|
+              used_refs[xobjects[operands[0]]] = true if op == :Do
+            end
+          end
+        end
+        doc.pages.each do |page|
+          xobjects = page.resources[:XObject]
+          xobjects.each do |key, obj|
+            next if used_refs[obj]
+            xobjects.delete(key)
+          end
         end
       end
@@ -228,14 +266,19 @@ module HexaPDF
         attr_reader :result #:nodoc:
+        # Contains all found references
+        attr_reader :used_references
         def initialize #:nodoc:
           @result = ''.b
           @serializer = HexaPDF::Serializer.new
+          @used_references = []
         end
         def process(op, operands) #:nodoc:
           @result << HexaPDF::Content::Operator::DEFAULT_OPERATORS[op].
             serialize(@serializer, *operands)
+          @used_references << operands[0] if op == :Do
         end
       end

data/lib/hexapdf/type/font.rb CHANGED Viewed

@@ -98,6 +98,11 @@ module HexaPDF
         embedded?
       end
+      # Returns the glyph scaling factor for transforming from glyph space to text space.
+      def glyph_scaling_factor
+        0.001
+      end
       private
       # Parses and caches the ToUnicode CMap.

data/lib/hexapdf/type/font_type3.rb CHANGED Viewed

@@ -41,6 +41,10 @@ module HexaPDF
     # Represents a Type 3 font.
     #
+    # Note: We assume the /FontMatrix is only used for scaling, i.e. of the form [x 0 0 +/-x 0 0].
+    # If it is of a different form, things won't work correctly. This will be handled once such a
+    # case is found.
+    #
     # See: PDF1.7 s9.6.5
     class FontType3 < FontSimple
@@ -51,6 +55,22 @@ module HexaPDF
       define_field :CharProcs,  type: Dictionary, required: true
       define_field :Resources,  type: Dictionary, version: '1.2'
+      # Returns the bounding box of the font.
+      def bounding_box
+        matrix = self[:FontMatrix]
+        bbox = self[:FontBBox].value
+        if matrix[3] < 0    # Some writers invert the y-axis
+          bbox = bbox.dup
+          bbox[1], bbox[3] = -bbox[3], -bbox[1]
+        end
+        bbox
+      end
+      # Returns the glyph scaling factor for transforming from glyph space to text space.
+      def glyph_scaling_factor
+        self[:FontMatrix][0]
+      end
       private
       def perform_validation

data/lib/hexapdf/version.rb CHANGED Viewed

@@ -37,6 +37,6 @@
 module HexaPDF
   # The version of HexaPDF.
-  VERSION = '0.18.0'
+  VERSION = '0.19.3'
 end

data/lib/hexapdf/writer.rb CHANGED Viewed

@@ -66,6 +66,8 @@ module HexaPDF
       @serializer = Serializer.new
       @serializer.encrypter = @document.encrypted? ? @document.security_handler : nil
       @rev_size = 0
+      @use_xref_streams = false
     end
     # Writes the document to the IO object.
@@ -87,6 +89,7 @@ module HexaPDF
       IO.copy_stream(@document.revisions.parser.io, @io)
       @rev_size = @document.revisions.current.next_free_oid
+      @use_xref_streams = @document.revisions.parser.contains_xref_streams?
       revision = Revision.new(@document.revisions.current.trailer)
       @document.revisions.each do |rev|
@@ -170,10 +173,13 @@ module HexaPDF
         end
       end
-      if !object_streams.empty? && xref_stream.nil?
-        raise HexaPDF::Error, "Cannot use object streams when there is no xref stream"
+      if (!object_streams.empty? || @use_xref_streams) && xref_stream.nil?
+        xref_stream = @document.wrap({Type: :XRef}, oid: rev.next_free_oid)
+        rev.add(xref_stream)
       end
+      @use_xref_streams = true if xref_stream
       [xref_stream, object_streams]
     end

data/test/hexapdf/content/test_graphics_state.rb CHANGED Viewed

@@ -2,6 +2,7 @@
 require 'test_helper'
 require 'hexapdf/content/graphics_state'
+require 'ostruct'
 # Dummy class used as wrapper so that constant lookup works correctly
 class GraphicsStateWrapper < Minitest::Spec
@@ -146,6 +147,13 @@ class GraphicsStateWrapper < Minitest::Spec
     it "fails when restoring the graphics state if the stack is empty" do
       assert_raises(HexaPDF::Error) { @gs.restore }
     end
-  end
+    it "uses the correct glyph to text space scaling" do
+      font = OpenStruct.new
+      font.glyph_scaling_factor = 0.002
+      @gs.font = font
+      @gs.font_size = 10
+      assert_equal(0.02, @gs.scaled_font_size)
+    end
+  end
 end

data/test/hexapdf/content/test_operator.rb CHANGED Viewed

@@ -4,6 +4,7 @@ require 'test_helper'
 require 'hexapdf/content/operator'
 require 'hexapdf/content/processor'
 require 'hexapdf/serializer'
+require 'ostruct'
 describe HexaPDF::Content::Operator::BaseOperator do
   before do
@@ -190,9 +191,11 @@ end
 describe_operator :SetGraphicsStateParameters, :gs do
   it "applies parameters from an ExtGState dictionary" do
+    font = OpenStruct.new
+    font.glyph_scaling_factor = 0.01
     @processor.resources[:ExtGState] = {Name: {LW: 10, LC: 2, LJ: 2, ML: 2, D: [[3, 5], 2],
                                                RI: 2, SA: true, BM: :Multiply, CA: 0.5, ca: 0.5,
-                                               AIS: true, TK: false, Font: [:Test, 10]}}
+                                               AIS: true, TK: false, Font: [font, 10]}}
     @processor.resources.define_singleton_method(:document) do
       Object.new.tap {|obj| obj.define_singleton_method(:deref) {|o| o } }
     end
@@ -210,7 +213,7 @@ describe_operator :SetGraphicsStateParameters, :gs do
     assert_equal(0.5, gs.stroke_alpha)
     assert_equal(0.5, gs.fill_alpha)
     assert(gs.alpha_source)
-    assert_equal(:Test, gs.font)
+    assert_equal(font, gs.font)
     assert_equal(10, gs.font_size)
     refute(gs.text_knockout)
   end
@@ -448,7 +451,9 @@ describe_operator :SetFontAndSize, :Tf do
       self[:Font] && self[:Font][name]
     end
-    @processor.resources[:Font] = {F1: :test}
+    font = OpenStruct.new
+    font.glyph_scaling_factor = 0.01
+    @processor.resources[:Font] = {F1: font}
     invoke(:F1, 10)
     assert_equal(@processor.resources.font(:F1), @processor.graphics_state.font)
     assert_equal(10, @processor.graphics_state.font_size)

data/test/hexapdf/encryption/test_standard_security_handler.rb CHANGED Viewed

@@ -229,19 +229,21 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
       assert_match(/Invalid \/R/i, exp.message)
     end
-    it "fails if the ID in the document's trailer is missing although it is needed" do
+    it "fails if the supplied password is invalid" do
       exp = assert_raises(HexaPDF::EncryptionError) do
-        @handler.set_up_decryption({Filter: :Standard, V: 2, R: 2})
+        @handler.set_up_decryption({Filter: :Standard, V: 2, R: 6, U: 'a' * 48, O: 'a' * 48,
+                                    UE: 'a' * 32, OE: 'a' * 32})
       end
-      assert_match(/Document ID/i, exp.message)
+      assert_match(/Invalid password/i, exp.message)
     end
-    it "fails if the supplied password is invalid" do
+    it "assigns empty strings to the trailer's ID field if it is missing" do
+      refute(@document.trailer.key?(:ID))
       exp = assert_raises(HexaPDF::EncryptionError) do
-        @handler.set_up_decryption({Filter: :Standard, V: 2, R: 6, U: 'a' * 48, O: 'a' * 48,
-                                    UE: 'a' * 32, OE: 'a' * 32})
+        @handler.set_up_decryption({Filter: :Standard, V: 1, R: 2, U: 'a' * 48, O: 'a' * 48, P: 15})
       end
       assert_match(/Invalid password/i, exp.message)
+      assert_equal(['', ''], @document.trailer[:ID].value)
     end
     describe "/Perms field checking" do

data/test/hexapdf/layout/test_style.rb CHANGED Viewed

@@ -597,6 +597,11 @@ end
 describe HexaPDF::Layout::Style do
   before do
     @style = HexaPDF::Layout::Style.new
+    @style.font = Object.new.tap do |obj|
+      obj.define_singleton_method(:pdf_object) do
+        Object.new.tap {|pdf| pdf.define_singleton_method(:glyph_scaling_factor) { 0.001 } }
+      end
+    end
   end
   it "can assign values on initialization" do
@@ -644,6 +649,7 @@ describe HexaPDF::Layout::Style do
   end
   it "has several simple and dynamically generated properties with default values" do
+    @style = HexaPDF::Layout::Style.new
     assert_raises(HexaPDF::Error) { @style.font }
     assert_equal(10, @style.font_size)
     assert_equal(0, @style.character_spacing)
@@ -725,6 +731,11 @@ describe HexaPDF::Layout::Style do
       font = Object.new
       font.define_singleton_method(:scaling_factor) { 1 }
       font.define_singleton_method(:wrapped_font) { wrapped_font }
+      font.define_singleton_method(:pdf_object) do
+        obj = Object.new
+        obj.define_singleton_method(:glyph_scaling_factor) { 0.001 }
+        obj
+      end
       @style.font = font
     end

data/test/hexapdf/task/test_optimize.rb CHANGED Viewed

@@ -159,4 +159,30 @@ describe HexaPDF::Task::Optimize do
       assert_equal("10 10 m\nq\nQ\nBI\n/Name 5 ID\ndataEI\n", page.contents)
     end
   end
+  describe "prune_page_resources" do
+    it "removes all unused XObject references" do
+      [false, true].each do |compress_pages|
+        page1 = @doc.pages.add
+        page1.resources[:XObject] = {}
+        page1.resources[:XObject][:test] = @doc.add({})
+        page1.resources[:XObject][:used_on_page2] = @doc.add({})
+        page1.resources[:XObject][:unused] = @doc.add({})
+        page1.contents = "/test Do"
+        page2 = @doc.pages.add
+        page2.resources[:XObject] = {}
+        page2.resources[:XObject][:used_on2] = page1.resources[:XObject][:used_on_page2]
+        page2.resources[:XObject][:also_unused] = page1.resources[:XObject][:unused]
+        page2.contents = "/used_on2 Do"
+        @doc.task(:optimize, prune_page_resources: true, compress_pages: compress_pages)
+        assert(page1.resources[:XObject].key?(:test))
+        assert(page1.resources[:XObject].key?(:used_on_page2))
+        refute(page1.resources[:XObject].key?(:unused))
+        assert(page2.resources[:XObject].key?(:used_on2))
+        refute(page2.resources[:XObject].key?(:also_unused))
+      end
+    end
+  end
 end

data/test/hexapdf/test_dictionary_fields.rb CHANGED Viewed

@@ -173,6 +173,7 @@ describe HexaPDF::DictionaryFields do
     it "allows conversion to a Time object from a binary string" do
       refute(@field.convert('test'.b, self))
+      refute(@field.convert('D:01211016165909+00\'64'.b, self))
       [
         ["D:1998", [1998, 01, 01, 00, 00, 00, "-00:00"]],

data/test/hexapdf/test_parser.rb CHANGED Viewed

@@ -338,6 +338,11 @@ describe HexaPDF::Parser do
       assert_equal(5, @parser.startxref_offset)
     end
+    it "handles the case of startxref and its value being on the same line" do
+      create_parser("startxref 5\n%%EOF")
+      assert_equal(5, @parser.startxref_offset)
+    end
     it "fails even in big files when nothing is found" do
       create_parser("\nhallo" * 5000)
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
@@ -366,6 +371,13 @@ describe HexaPDF::Parser do
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
       assert_match(/end-of-file marker not found/, exp.message)
     end
+    it "fails on strict parsing if the startxref is on the same line as its value" do
+      @document.config['parser.on_correctable_error'] = proc { true }
+      create_parser("startxref 5\n%%EOF")
+      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
+      assert_match(/startxref on same line/, exp.message)
+    end
   end
   describe "file_header_version" do
@@ -531,12 +543,14 @@ describe HexaPDF::Parser do
       xref_section, trailer = @parser.load_revision(@parser.startxref_offset)
       assert_equal({Test: 'now'}, trailer)
       assert(xref_section[1].in_use?)
+      refute(@parser.contains_xref_streams?)
     end
     it "works for a cross-reference stream" do
       xref_section, trailer = @parser.load_revision(212)
       assert_equal({Size: 2}, trailer)
       assert(xref_section[1].in_use?)
+      assert(@parser.contains_xref_streams?)
     end
     it "fails if another object is found instead of a cross-reference stream" do

data/test/hexapdf/test_writer.rb CHANGED Viewed

@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
       219
       %%EOF
       3 0 obj
-      <</Producer(HexaPDF version 0.18.0)>>
+      <</Producer(HexaPDF version 0.19.3)>>
       endobj
       xref
       3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
       141
       %%EOF
       6 0 obj
-      <</Producer(HexaPDF version 0.18.0)>>
+      <</Producer(HexaPDF version 0.19.3)>>
       endobj
       2 0 obj
       <</Length 10>>stream
@@ -103,21 +103,50 @@ describe HexaPDF::Writer do
     assert_document_conversion(@compressed_input_io)
   end
-  it "writes a document in incremental mode" do
-    doc = HexaPDF::Document.new(io: @std_input_io)
-    doc.pages.add
-    output_io = StringIO.new
-    HexaPDF::Writer.write(doc, output_io, incremental: true)
-    assert_equal(output_io.string[0, @std_input_io.string.length], @std_input_io.string)
-    doc = HexaPDF::Document.new(io: output_io)
-    assert_equal(4, doc.revisions.size)
-    assert_equal(2, doc.revisions.current.each.to_a.size)
+  describe "write_incremental" do
+    it "writes a document in incremental mode" do
+      doc = HexaPDF::Document.new(io: @std_input_io)
+      doc.pages.add
+      output_io = StringIO.new
+      HexaPDF::Writer.write(doc, output_io, incremental: true)
+      assert_equal(output_io.string[0, @std_input_io.string.length], @std_input_io.string)
+      doc = HexaPDF::Document.new(io: output_io)
+      assert_equal(4, doc.revisions.size)
+      assert_equal(2, doc.revisions.current.each.to_a.size)
+    end
+    it "uses an xref stream if the document already contains at least one" do
+      doc = HexaPDF::Document.new(io: @compressed_input_io)
+      doc.pages.add
+      output_io = StringIO.new
+      HexaPDF::Writer.write(doc, output_io, incremental: true)
+      refute_match(/^trailer/, output_io.string)
+    end
   end
-  it "raises an error if no xref stream is in a revision but object streams are" do
+  it "creates an xref stream if no xref stream is in a revision but object streams are" do
     document = HexaPDF::Document.new
     document.add({Type: :ObjStm})
-    assert_raises(HexaPDF::Error) { HexaPDF::Writer.new(document, StringIO.new).write }
+    HexaPDF::Writer.new(document, StringIO.new).write
+    assert(:XRef, document.object(2).type)
+  end
+  it "creates an xref stream if a previous revision had one" do
+    document = HexaPDF::Document.new
+    document.pages.add
+    document.revisions.add
+    document.pages.add
+    document.add({Type: :ObjStm})
+    document.revisions.add
+    document.pages.add
+    io = StringIO.new
+    HexaPDF::Writer.new(document, io).write
+    document = HexaPDF::Document.new(io: io)
+    assert_equal(3, document.revisions.count)
+    assert(document.revisions[0].none? {|obj| obj.type == :XRef })
+    assert(document.revisions[1].one? {|obj| obj.type == :XRef })
+    assert(document.revisions[2].one? {|obj| obj.type == :XRef })
   end
   it "raises an error if the class is misused and an xref section contains invalid entries" do

data/test/hexapdf/type/test_font.rb CHANGED Viewed

@@ -64,4 +64,8 @@ describe HexaPDF::Type::Font do
       assert_equal(5, @font.font_file)
     end
   end
+  it "returns the glyph scaling factor" do
+    assert_equal(0.001, @font.glyph_scaling_factor)
+  end
 end

data/test/hexapdf/type/test_font_type3.rb CHANGED Viewed

@@ -9,10 +9,25 @@ describe HexaPDF::Type::FontType3 do
     @doc = HexaPDF::Document.new
     @font = @doc.add({Type: :Font, Subtype: :Type3, Encoding: :WinAnsiEncoding,
                       FirstChar: 32, LastChar: 34, Widths: [600, 0, 700],
-                      FontBBox: [0, 0, 100, 100], FontMatrix: [1, 0, 0, 1, 0, 0],
+                      FontBBox: [0, 100, 100, 0], FontMatrix: [0.002, 0, 0, 0.002, 0, 0],
                       CharProcs: {}})
   end
+  describe "bounding_box" do
+    it "returns the font's bounding box" do
+      assert_equal([0, 0, 100, 100], @font.bounding_box)
+    end
+    it "inverts the y-values if necessary based on /FontMatrix" do
+      @font[:FontMatrix][3] *= -1
+      assert_equal([0, -100, 100, 0], @font.bounding_box)
+    end
+  end
+  it "returns the glyph scaling factor" do
+    assert_equal(0.002, @font.glyph_scaling_factor)
+  end
   describe "validation" do
     it "works for valid objects" do
       assert(@font.validate)

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: hexapdf
 version: !ruby/object:Gem::Version
-  version: 0.18.0
+  version: 0.19.3
 platform: ruby
 authors:
 - Thomas Leitner
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-11-04 00:00:00.000000000 Z
+date: 2021-12-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cmdparse