hexapdf 0.32.2 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +63 -1
  3. data/README.md +9 -0
  4. data/examples/002-graphics.rb +15 -17
  5. data/examples/003-arcs.rb +9 -9
  6. data/examples/009-text_layouter_alignment.rb +1 -1
  7. data/examples/010-text_layouter_inline_boxes.rb +2 -2
  8. data/examples/011-text_layouter_line_wrapping.rb +1 -1
  9. data/examples/012-text_layouter_styling.rb +7 -7
  10. data/examples/013-text_layouter_shapes.rb +1 -1
  11. data/examples/014-text_in_polygon.rb +1 -1
  12. data/examples/015-boxes.rb +8 -7
  13. data/examples/016-frame_automatic_box_placement.rb +2 -2
  14. data/examples/017-frame_text_flow.rb +2 -1
  15. data/examples/018-composer.rb +1 -1
  16. data/examples/020-column_box.rb +2 -1
  17. data/examples/025-table_box.rb +46 -0
  18. data/lib/hexapdf/cli/command.rb +5 -2
  19. data/lib/hexapdf/cli/form.rb +5 -5
  20. data/lib/hexapdf/cli/inspect.rb +3 -3
  21. data/lib/hexapdf/composer.rb +104 -52
  22. data/lib/hexapdf/configuration.rb +44 -39
  23. data/lib/hexapdf/content/canvas.rb +393 -267
  24. data/lib/hexapdf/content/color_space.rb +72 -25
  25. data/lib/hexapdf/content/graphic_object/arc.rb +57 -24
  26. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +66 -23
  27. data/lib/hexapdf/content/graphic_object/geom2d.rb +47 -6
  28. data/lib/hexapdf/content/graphic_object/solid_arc.rb +58 -36
  29. data/lib/hexapdf/content/graphic_object.rb +6 -7
  30. data/lib/hexapdf/content/graphics_state.rb +54 -45
  31. data/lib/hexapdf/content/operator.rb +52 -54
  32. data/lib/hexapdf/content/parser.rb +2 -2
  33. data/lib/hexapdf/content/processor.rb +15 -15
  34. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  35. data/lib/hexapdf/content.rb +5 -0
  36. data/lib/hexapdf/dictionary.rb +6 -5
  37. data/lib/hexapdf/dictionary_fields.rb +42 -14
  38. data/lib/hexapdf/digital_signature/cms_handler.rb +2 -2
  39. data/lib/hexapdf/digital_signature/handler.rb +1 -1
  40. data/lib/hexapdf/digital_signature/pkcs1_handler.rb +2 -3
  41. data/lib/hexapdf/digital_signature/signature.rb +6 -6
  42. data/lib/hexapdf/digital_signature/signatures.rb +13 -12
  43. data/lib/hexapdf/digital_signature/signing/default_handler.rb +14 -5
  44. data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +2 -4
  45. data/lib/hexapdf/digital_signature/signing/timestamp_handler.rb +4 -4
  46. data/lib/hexapdf/digital_signature/signing.rb +4 -0
  47. data/lib/hexapdf/digital_signature/verification_result.rb +2 -2
  48. data/lib/hexapdf/digital_signature.rb +7 -2
  49. data/lib/hexapdf/document/destinations.rb +12 -11
  50. data/lib/hexapdf/document/files.rb +1 -1
  51. data/lib/hexapdf/document/fonts.rb +1 -1
  52. data/lib/hexapdf/document/layout.rb +167 -39
  53. data/lib/hexapdf/document/pages.rb +3 -2
  54. data/lib/hexapdf/document.rb +89 -55
  55. data/lib/hexapdf/encryption/aes.rb +5 -5
  56. data/lib/hexapdf/encryption/arc4.rb +1 -1
  57. data/lib/hexapdf/encryption/fast_aes.rb +2 -2
  58. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  59. data/lib/hexapdf/encryption/identity.rb +1 -1
  60. data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
  61. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  62. data/lib/hexapdf/encryption/security_handler.rb +31 -24
  63. data/lib/hexapdf/encryption/standard_security_handler.rb +45 -36
  64. data/lib/hexapdf/encryption.rb +7 -2
  65. data/lib/hexapdf/error.rb +18 -0
  66. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  67. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  68. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  69. data/lib/hexapdf/filter/lzw_decode.rb +1 -1
  70. data/lib/hexapdf/filter/pass_through.rb +1 -1
  71. data/lib/hexapdf/filter/predictor.rb +1 -1
  72. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  73. data/lib/hexapdf/filter.rb +55 -6
  74. data/lib/hexapdf/font/cmap/parser.rb +2 -2
  75. data/lib/hexapdf/font/cmap.rb +1 -1
  76. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  77. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  78. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +2 -2
  79. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  80. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  81. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +3 -3
  82. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  83. data/lib/hexapdf/font/invalid_glyph.rb +3 -0
  84. data/lib/hexapdf/font/true_type_wrapper.rb +17 -4
  85. data/lib/hexapdf/font/type1_wrapper.rb +19 -4
  86. data/lib/hexapdf/font_loader/from_configuration.rb +5 -2
  87. data/lib/hexapdf/font_loader/from_file.rb +5 -5
  88. data/lib/hexapdf/font_loader/standard14.rb +3 -3
  89. data/lib/hexapdf/font_loader.rb +3 -0
  90. data/lib/hexapdf/image_loader/jpeg.rb +2 -2
  91. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  92. data/lib/hexapdf/image_loader/png.rb +2 -2
  93. data/lib/hexapdf/image_loader.rb +1 -1
  94. data/lib/hexapdf/importer.rb +13 -0
  95. data/lib/hexapdf/layout/box.rb +9 -2
  96. data/lib/hexapdf/layout/box_fitter.rb +2 -2
  97. data/lib/hexapdf/layout/column_box.rb +18 -4
  98. data/lib/hexapdf/layout/frame.rb +30 -12
  99. data/lib/hexapdf/layout/image_box.rb +5 -0
  100. data/lib/hexapdf/layout/inline_box.rb +1 -0
  101. data/lib/hexapdf/layout/list_box.rb +17 -1
  102. data/lib/hexapdf/layout/page_style.rb +4 -4
  103. data/lib/hexapdf/layout/style.rb +18 -3
  104. data/lib/hexapdf/layout/table_box.rb +682 -0
  105. data/lib/hexapdf/layout/text_box.rb +5 -3
  106. data/lib/hexapdf/layout/text_fragment.rb +1 -1
  107. data/lib/hexapdf/layout/text_layouter.rb +12 -4
  108. data/lib/hexapdf/layout.rb +1 -0
  109. data/lib/hexapdf/name_tree_node.rb +1 -1
  110. data/lib/hexapdf/number_tree_node.rb +1 -1
  111. data/lib/hexapdf/object.rb +18 -7
  112. data/lib/hexapdf/parser.rb +7 -7
  113. data/lib/hexapdf/pdf_array.rb +1 -1
  114. data/lib/hexapdf/rectangle.rb +1 -1
  115. data/lib/hexapdf/reference.rb +1 -1
  116. data/lib/hexapdf/revision.rb +1 -1
  117. data/lib/hexapdf/revisions.rb +3 -3
  118. data/lib/hexapdf/serializer.rb +15 -15
  119. data/lib/hexapdf/stream.rb +4 -2
  120. data/lib/hexapdf/tokenizer.rb +14 -14
  121. data/lib/hexapdf/type/acro_form/appearance_generator.rb +22 -22
  122. data/lib/hexapdf/type/acro_form/button_field.rb +1 -1
  123. data/lib/hexapdf/type/acro_form/choice_field.rb +1 -1
  124. data/lib/hexapdf/type/acro_form/field.rb +2 -2
  125. data/lib/hexapdf/type/acro_form/form.rb +1 -1
  126. data/lib/hexapdf/type/acro_form/signature_field.rb +4 -4
  127. data/lib/hexapdf/type/acro_form/text_field.rb +1 -1
  128. data/lib/hexapdf/type/acro_form/variable_text_field.rb +1 -1
  129. data/lib/hexapdf/type/acro_form.rb +1 -1
  130. data/lib/hexapdf/type/action.rb +1 -1
  131. data/lib/hexapdf/type/actions/go_to.rb +1 -1
  132. data/lib/hexapdf/type/actions/go_to_r.rb +1 -1
  133. data/lib/hexapdf/type/actions/launch.rb +1 -1
  134. data/lib/hexapdf/type/actions/uri.rb +1 -1
  135. data/lib/hexapdf/type/actions.rb +1 -1
  136. data/lib/hexapdf/type/annotation.rb +3 -3
  137. data/lib/hexapdf/type/annotations/link.rb +1 -1
  138. data/lib/hexapdf/type/annotations/markup_annotation.rb +1 -1
  139. data/lib/hexapdf/type/annotations/text.rb +1 -1
  140. data/lib/hexapdf/type/annotations/widget.rb +2 -2
  141. data/lib/hexapdf/type/annotations.rb +1 -1
  142. data/lib/hexapdf/type/catalog.rb +1 -1
  143. data/lib/hexapdf/type/cid_font.rb +3 -3
  144. data/lib/hexapdf/type/embedded_file.rb +1 -1
  145. data/lib/hexapdf/type/file_specification.rb +2 -2
  146. data/lib/hexapdf/type/font_descriptor.rb +1 -1
  147. data/lib/hexapdf/type/font_simple.rb +2 -2
  148. data/lib/hexapdf/type/font_type0.rb +3 -3
  149. data/lib/hexapdf/type/font_type3.rb +1 -1
  150. data/lib/hexapdf/type/form.rb +1 -1
  151. data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
  152. data/lib/hexapdf/type/icon_fit.rb +1 -1
  153. data/lib/hexapdf/type/image.rb +1 -1
  154. data/lib/hexapdf/type/info.rb +1 -1
  155. data/lib/hexapdf/type/mark_information.rb +1 -1
  156. data/lib/hexapdf/type/names.rb +2 -2
  157. data/lib/hexapdf/type/object_stream.rb +2 -1
  158. data/lib/hexapdf/type/outline.rb +1 -1
  159. data/lib/hexapdf/type/outline_item.rb +1 -1
  160. data/lib/hexapdf/type/page.rb +19 -10
  161. data/lib/hexapdf/type/page_label.rb +1 -1
  162. data/lib/hexapdf/type/page_tree_node.rb +1 -1
  163. data/lib/hexapdf/type/resources.rb +1 -1
  164. data/lib/hexapdf/type/trailer.rb +2 -2
  165. data/lib/hexapdf/type/viewer_preferences.rb +1 -1
  166. data/lib/hexapdf/type/xref_stream.rb +2 -2
  167. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  168. data/lib/hexapdf/version.rb +1 -1
  169. data/lib/hexapdf/writer.rb +4 -4
  170. data/lib/hexapdf/xref_section.rb +2 -2
  171. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +11 -1
  172. data/test/hexapdf/content/graphic_object/test_geom2d.rb +7 -0
  173. data/test/hexapdf/content/test_canvas.rb +0 -1
  174. data/test/hexapdf/digital_signature/test_signatures.rb +22 -0
  175. data/test/hexapdf/document/test_files.rb +2 -2
  176. data/test/hexapdf/document/test_layout.rb +98 -0
  177. data/test/hexapdf/encryption/test_security_handler.rb +12 -11
  178. data/test/hexapdf/encryption/test_standard_security_handler.rb +35 -23
  179. data/test/hexapdf/font/test_true_type_wrapper.rb +18 -1
  180. data/test/hexapdf/font/test_type1_wrapper.rb +15 -1
  181. data/test/hexapdf/layout/test_box.rb +1 -1
  182. data/test/hexapdf/layout/test_column_box.rb +65 -21
  183. data/test/hexapdf/layout/test_frame.rb +14 -14
  184. data/test/hexapdf/layout/test_image_box.rb +4 -0
  185. data/test/hexapdf/layout/test_inline_box.rb +5 -0
  186. data/test/hexapdf/layout/test_list_box.rb +40 -6
  187. data/test/hexapdf/layout/test_page_style.rb +3 -2
  188. data/test/hexapdf/layout/test_style.rb +50 -0
  189. data/test/hexapdf/layout/test_table_box.rb +722 -0
  190. data/test/hexapdf/layout/test_text_box.rb +18 -0
  191. data/test/hexapdf/layout/test_text_layouter.rb +4 -0
  192. data/test/hexapdf/test_dictionary_fields.rb +4 -1
  193. data/test/hexapdf/test_document.rb +1 -0
  194. data/test/hexapdf/test_filter.rb +8 -0
  195. data/test/hexapdf/test_importer.rb +9 -0
  196. data/test/hexapdf/test_object.rb +16 -5
  197. data/test/hexapdf/test_stream.rb +7 -0
  198. data/test/hexapdf/test_writer.rb +3 -3
  199. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +13 -5
  200. data/test/hexapdf/type/acro_form/test_form.rb +4 -3
  201. data/test/hexapdf/type/test_page.rb +18 -4
  202. metadata +17 -8
@@ -64,32 +64,43 @@ end
64
64
  #
65
65
  # Here are some pointers to more in depth information:
66
66
  #
67
- # * For information about the command line application, see the HexaPDF::CLI module.
67
+ # * HexaPDF::CLI has information about the accompanying command line application.
68
68
  # * HexaPDF::Document provides information about how to work with a PDF file.
69
+ # * HexaPDF::Composer is the main class for easily creating PDF documents from scratch.
69
70
  # * HexaPDF::Content::Canvas provides the canvas API for drawing/writing on a page or form XObject
71
+ # * HexaPDF::Type::AcroForm::Form is the entry point for working with interactive forms.
72
+ # * HexaPDF::Type::Outline has information on working with outlines/bookmarks.
73
+ # * HexaPDF::Encryption provides information on how encryption works.
74
+ # * HexaPDF::DigitalSignature is the entry point for working with digital signaturs.
70
75
  module HexaPDF
71
76
 
72
77
  autoload(:Composer, 'hexapdf/composer')
73
78
 
74
79
  # == HexaPDF::Document
75
80
  #
76
- # Represents one PDF document.
81
+ # Represents a PDF document.
77
82
  #
78
- # A PDF document consists of (indirect) objects, so the main job of this class is to provide
79
- # methods for working with these objects. However, since a PDF document may also be
83
+ # A PDF document essentially consists of (indirect) objects, so the main job of this class is to
84
+ # provide methods for working with these objects. However, since a PDF document may also be
80
85
  # incrementally updated and can therefore contain one or more revisions, there are also methods
81
- # for working with these revisions.
86
+ # for working with these revisions (see Revisions for details).
82
87
  #
83
- # Note: This class provides everything to work on PDF documents on a low-level basis. This means
84
- # that there are no convenience methods for higher PDF functionality. Those can be found in the
85
- # objects linked from here, like #catalog.
88
+ # Additionally, there are many convenience methods for easily accessing the most important PDF
89
+ # functionality, like encrypting a document (#encrypt), working with digital signatures
90
+ # (#signatures), accessing the interactive form data (#acro_form), working with the pages
91
+ # (#pages), fonts (#fonts) and images (#images).
86
92
  #
87
- # == Known Messages
93
+ # Note: This class provides the basis for working with a PDF document. The higher PDF
94
+ # functionality is *not* implemented here but either in the appropriate PDF type classes or in
95
+ # special convenience classes. All this functionality can be accessed via the convenience methods
96
+ # described above.
97
+ #
98
+ # == Available Message Hooks
88
99
  #
89
100
  # The document object provides a basic message dispatch system via #register_listener and
90
101
  # #dispatch_message.
91
102
  #
92
- # Following are the messages that are used by HexaPDF itself:
103
+ # Following messages are used by HexaPDF itself:
93
104
  #
94
105
  # :complete_objects::
95
106
  # This message is called before the first step of writing a document. Listeners should
@@ -138,17 +149,22 @@ module HexaPDF
138
149
  end
139
150
  end
140
151
 
141
- # The configuration for the document.
152
+ # The configuration object for the document.
153
+ #
154
+ # See Configuration for details.
142
155
  attr_reader :config
143
156
 
144
157
  # The revisions of the document.
158
+ #
159
+ # See Revisions.
145
160
  attr_reader :revisions
146
161
 
147
162
  # Creates a new PDF document, either an empty one or one read from the provided +io+.
148
163
  #
149
164
  # When an IO object is provided and it contains an encrypted PDF file, it is automatically
150
165
  # decrypted behind the scenes. The +decryption_opts+ argument has to be set appropriately in
151
- # this case.
166
+ # this case. In case this is not wanted, the configuration option 'document.auto_decrypt' needs
167
+ # to be used.
152
168
  #
153
169
  # Options:
154
170
  #
@@ -183,8 +199,8 @@ module HexaPDF
183
199
  # doc.object(ref) -> obj or nil
184
200
  # doc.object(oid) -> obj or nil
185
201
  #
186
- # Returns the current version of the indirect object for the given exact reference or for the
187
- # given object number.
202
+ # Returns the current version of the indirect object for the given exact reference (see
203
+ # Reference) or for the given object number.
188
204
  #
189
205
  # For references to unknown objects, +nil+ is returned but free objects are represented by a
190
206
  # PDF Null object, not by +nil+!
@@ -199,7 +215,7 @@ module HexaPDF
199
215
  # doc.object?(oid) -> true or false
200
216
  #
201
217
  # Returns +true+ if the the document contains an indirect object for the given exact reference
202
- # or for the given object number.
218
+ # (see Reference) or for the given object number.
203
219
  #
204
220
  # Even though this method might return +true+ for some references, #object may return +nil+
205
221
  # because this method takes *all* revisions into account. Also see the discussion on #each for
@@ -212,7 +228,7 @@ module HexaPDF
212
228
 
213
229
  # Dereferences the given object.
214
230
  #
215
- # Return the object itself if it is not a reference, or the indirect object specified by the
231
+ # Returns the object itself if it is not a reference, or the indirect object specified by the
216
232
  # reference.
217
233
  def deref(obj)
218
234
  obj.kind_of?(Reference) ? object(obj) : obj
@@ -227,7 +243,7 @@ module HexaPDF
227
243
  # HexaPDF::Object. If it is not the latter, #wrap is called with the object and the
228
244
  # additional keyword arguments.
229
245
  #
230
- # See: Revisions#add_object
246
+ # See: #wrap, Revisions#add_object
231
247
  def add(obj, **wrap_opts)
232
248
  obj = wrap(obj, **wrap_opts) unless obj.kind_of?(HexaPDF::Object)
233
249
 
@@ -266,14 +282,14 @@ module HexaPDF
266
282
  HexaPDF::Importer.for(self).import(obj, source: source)
267
283
  end
268
284
 
269
- # Wraps the given object inside a HexaPDF::Object class which allows one to use
285
+ # Wraps the given object inside a HexaPDF::Object (sub)class which allows one to use
270
286
  # convenience functions to work with the object.
271
287
  #
272
288
  # The +obj+ argument can also be a HexaPDF::Object object so that it can be re-wrapped if
273
- # needed.
289
+ # necessary.
274
290
  #
275
291
  # The class of the returned object is always a subclass of HexaPDF::Object (or of
276
- # HexaPDF::Stream if a +stream+ is given). Which subclass is used, depends on the values of the
292
+ # HexaPDF::Stream if +stream+ is given). Which subclass is used, depends on the values of the
277
293
  # +type+ and +subtype+ options as well as on the 'object.type_map' and 'object.subtype_map'
278
294
  # global configuration options:
279
295
  #
@@ -291,13 +307,13 @@ module HexaPDF
291
307
  #
292
308
  # * If there is no valid class after the above steps, HexaPDF::Stream is used if a stream is
293
309
  # given, HexaPDF::Dictionary if the given object is a hash, HexaPDF::PDFArray if it is an
294
- # array or else HexaPDF::Object is used.
310
+ # array or else HexaPDF::Object.
295
311
  #
296
312
  # Options:
297
313
  #
298
314
  # :type:: (Symbol or Class) The type of a PDF object that should be used for wrapping. This
299
315
  # could be, for example, :Pages. If a class object is provided, it is used directly
300
- # instead of the type detection system.
316
+ # instead of determining the class through the type detection system.
301
317
  #
302
318
  # :subtype:: (Symbol) The subtype of a PDF object which further qualifies a type. For
303
319
  # example, image objects in PDF have a type of :XObject and a subtype of :Image.
@@ -341,7 +357,9 @@ module HexaPDF
341
357
  if subtype
342
358
  sub_klass = GlobalConfiguration.constantize('object.subtype_map', type, subtype) { klass }
343
359
  if type ||
344
- sub_klass&.each_field&.none? {|name, field| field.required? && !data.value.key?(name) }
360
+ sub_klass&.each_field&.none? do |name, field|
361
+ field.required? && !data.value.key?(name) && name != :Type
362
+ end
345
363
  klass = sub_klass
346
364
  end
347
365
  end
@@ -410,6 +428,11 @@ module HexaPDF
410
428
  # doc.register_listener(name) {|*args| block} -> block
411
429
  #
412
430
  # Registers the given listener for the message +name+.
431
+ #
432
+ # If +callable+ is provided, it needs to be an Object responding to #call. Otherwise the block
433
+ # has to be provided. The arguments that are provided to the #call method depend on the message.
434
+ #
435
+ # See: dispatch_message
413
436
  def register_listener(name, callable = nil, &block)
414
437
  callable ||= block
415
438
  (@listeners[name] ||= []) << callable
@@ -420,6 +443,8 @@ module HexaPDF
420
443
  #
421
444
  # See the main Document documentation for an overview of messages that are used by HexaPDF
422
445
  # itself.
446
+ #
447
+ # See: register_listener
423
448
  def dispatch_message(name, *args)
424
449
  @listeners[name]&.each {|obj| obj.call(*args) }
425
450
  end
@@ -427,10 +452,10 @@ module HexaPDF
427
452
  UNSET = ::Object.new # :nordoc:
428
453
 
429
454
  # Caches and returns the given +value+ or the value of the given block using the given
430
- # +pdf_data+ and +key+ arguments as composite cache key. If a cached value already exists and
431
- # +update+ is +false+, the cached value is just returned.
455
+ # +pdf_data+ and +key+ arguments as composite cache key.
432
456
  #
433
- # Set +update+ to +true+ to force an update of the cached value.
457
+ # If a cached value already exists and +update+ is +false+, the cached value is just returned.
458
+ # If +update+ is set to +true+, an update of the cached value is forced.
434
459
  #
435
460
  # This facility can be used to cache expensive operations in PDF objects that are easy to
436
461
  # compute again.
@@ -444,7 +469,7 @@ module HexaPDF
444
469
  # Returns +true+ if there is a value cached for the composite key consisting of the given
445
470
  # +pdf_data+ and +key+ objects.
446
471
  #
447
- # Also see: #cache
472
+ # See: #cache
448
473
  def cached?(pdf_data, key)
449
474
  @cache.key?(pdf_data) && @cache[pdf_data].key?(key)
450
475
  end
@@ -455,29 +480,32 @@ module HexaPDF
455
480
  # It is *not* recommended to clear the whole cache! Better clear the cache for individual PDF
456
481
  # objects!
457
482
  #
458
- # Also see: #cache
483
+ # See: #cache, #cached?
459
484
  def clear_cache(pdf_data = nil)
460
485
  pdf_data ? @cache[pdf_data].clear : @cache.clear
461
486
  end
462
487
 
463
- # Returns the Pages object that provides convenience methods for working with pages.
488
+ # Returns the Pages object that provides convenience methods for working with the pages of the
489
+ # PDF file.
464
490
  #
465
- # Also see: HexaPDF::Type::PageTreeNode
491
+ # See: Pages, Type::PageTreeNode
466
492
  def pages
467
493
  @pages ||= Pages.new(self)
468
494
  end
469
495
 
470
- # Returns the Images object that provides convenience methods for working with images.
496
+ # Returns the Images object that provides convenience methods for working with images (e.g.
497
+ # adding them to the PDF or listing them).
471
498
  def images
472
499
  @images ||= Images.new(self)
473
500
  end
474
501
 
475
- # Returns the Files object that provides convenience methods for working with files.
502
+ # Returns the Files object that provides convenience methods for working with embedded files.
476
503
  def files
477
504
  @files ||= Files.new(self)
478
505
  end
479
506
 
480
- # Returns the Fonts object that provides convenience methods for working with fonts.
507
+ # Returns the Fonts object that provides convenience methods for working with the fonts used in
508
+ # the PDF file.
481
509
  def fonts
482
510
  @fonts ||= Fonts.new(self)
483
511
  end
@@ -496,14 +524,16 @@ module HexaPDF
496
524
 
497
525
  # Returns the main AcroForm object for dealing with interactive forms.
498
526
  #
499
- # See HexaPDF::Type::Catalog#acro_form for details on the arguments.
527
+ # The meaning of the +create+ argument is detailed at Type::Catalog#acro_form.
528
+ #
529
+ # See: Type::AcroForm::Form
500
530
  def acro_form(create: false)
501
531
  catalog.acro_form(create: create)
502
532
  end
503
533
 
504
- # Returns the main document outline object.
534
+ # Returns the entry object to the document outline (a.k.a. bookmarks).
505
535
  #
506
- # See HexaPDF::Type::Outline for details.
536
+ # See: Type::Outline
507
537
  def outline
508
538
  catalog.outline
509
539
  end
@@ -513,7 +543,7 @@ module HexaPDF
513
543
  # Tasks provide an extensible way for performing operations on a PDF document without
514
544
  # cluttering the Document interface.
515
545
  #
516
- # See Task for more information.
546
+ # See: Task
517
547
  def task(name, **opts, &block)
518
548
  task = config.constantize('task.map', name) do
519
549
  raise HexaPDF::Error, "No task named '#{name}' is available"
@@ -522,11 +552,15 @@ module HexaPDF
522
552
  end
523
553
 
524
554
  # Returns the trailer dictionary for the document.
555
+ #
556
+ # See: Type::Trailer
525
557
  def trailer
526
558
  @revisions.current.trailer
527
559
  end
528
560
 
529
561
  # Returns the document's catalog, the root of the object tree.
562
+ #
563
+ # See: Type::Catalog
530
564
  def catalog
531
565
  trailer.catalog
532
566
  end
@@ -537,14 +571,16 @@ module HexaPDF
537
571
  # version has been set manually and the catalog's /Version key refers to a later version, the
538
572
  # later version is used.
539
573
  #
540
- # See: PDF1.7 s7.2.2
574
+ # See: PDF2.0 s7.2.2
541
575
  def version
542
576
  catalog_version = (catalog[:Version] || '1.0').to_s
543
577
  (@version < catalog_version ? catalog_version : @version)
544
578
  end
545
579
 
546
- # Sets the version of the PDF document. The argument must be a string in the format 'M.N'
547
- # where M is the major version and N the minor version (e.g. '1.4' or '2.0').
580
+ # Sets the version of the PDF document.
581
+ #
582
+ # The argument +value+ must be a string in the format 'M.N' where M is the major version and N
583
+ # the minor version (e.g. '1.4' or '2.0').
548
584
  def version=(value)
549
585
  raise ArgumentError, "PDF version must follow format M.N" unless value.to_s.match?(/\A\d\.\d\z/)
550
586
  @version = value.to_s
@@ -557,9 +593,9 @@ module HexaPDF
557
593
 
558
594
  # Encrypts the document.
559
595
  #
560
- # This is done by setting up a security handler for this purpose and populating the trailer's
561
- # Encrypt dictionary accordingly. The actual encryption, however, is only done when writing the
562
- # document.
596
+ # Encryption is done by setting up a security handler for this purpose and populating the
597
+ # trailer's Encrypt dictionary accordingly. The actual encryption, however, is only done when
598
+ # writing the document.
563
599
  #
564
600
  # The security handler used for encrypting is selected via the +name+ argument. All other
565
601
  # arguments are passed on the security handler.
@@ -567,9 +603,8 @@ module HexaPDF
567
603
  # If the document should not be encrypted, the +name+ argument has to be set to +nil+. This
568
604
  # removes the security handler and deletes the trailer's Encrypt dictionary.
569
605
  #
570
- # See: HexaPDF::Encryption::SecurityHandler#set_up_encryption and
571
- # HexaPDF::Encryption::StandardSecurityHandler::EncryptionOptions for possible encryption
572
- # options.
606
+ # See: Encryption::SecurityHandler#set_up_encryption and
607
+ # Encryption::StandardSecurityHandler::EncryptionOptions for possible encryption options.
573
608
  def encrypt(name: :Standard, **options)
574
609
  if name.nil?
575
610
  trailer.delete(:Encrypt)
@@ -605,17 +640,16 @@ module HexaPDF
605
640
  # Signs the document and writes it to the given file or IO object.
606
641
  #
607
642
  # For details on the arguments +file_or_io+, +signature+ and +write_options+ see
608
- # HexaPDF::DigitalSignature::Signatures#add.
643
+ # DigitalSignature::Signatures#add.
609
644
  #
610
645
  # The signing handler to be used is determined by the +handler+ argument together with the rest
611
- # of the keyword arguments (see HexaPDF::DigitalSignature::Signatures#signing_handler for
612
- # details).
646
+ # of the keyword arguments (see DigitalSignature::Signatures#signing_handler for details).
613
647
  #
614
- # If not changed, the default signing handler is
615
- # HexaPDF::DigitalSignature::Signing::DefaultHandler.
648
+ # If not changed, the default signing handler is DigitalSignature::Signing::DefaultHandler.
616
649
  #
617
- # *Note*: Once signing is done the document cannot be changed anymore since it was written. If a
618
- # document needs to be signed multiple times, it needs to be loaded again after writing.
650
+ # *Note*: Once signing is done the document cannot be changed anymore since it was written
651
+ # during the signing process. If a document needs to be signed multiple times, it needs to be
652
+ # loaded again afterwards.
619
653
  def sign(file_or_io, handler: :default, signature: nil, write_options: {}, **handler_options)
620
654
  handler = signatures.signing_handler(name: handler, **handler_options)
621
655
  signatures.add(file_or_io, handler, signature: signature, write_options: write_options)
@@ -626,7 +660,7 @@ module HexaPDF
626
660
  #
627
661
  # If a block is given, it is called on validation problems.
628
662
  #
629
- # See HexaPDF::Object#validate for more information.
663
+ # See Object#validate for more information.
630
664
  def validate(auto_correct: true, only_loaded: false, &block) #:yield: msg, correctable, object
631
665
  result = trailer.validate(auto_correct: auto_correct, &block)
632
666
  each(only_loaded: only_loaded) do |obj|
@@ -651,7 +685,7 @@ module HexaPDF
651
685
  # This is needed, for example, when modifying a signed PDF and the original signature should
652
686
  # stay valid.
653
687
  #
654
- # See: PDF1.7 s7.5.6
688
+ # See: PDF2.0 s7.5.6
655
689
  #
656
690
  # validate::
657
691
  # Validates the document and raises an error if an uncorrectable problem is found.
@@ -43,7 +43,7 @@ module HexaPDF
43
43
  # Common interface for AES algorithms
44
44
  #
45
45
  # This module defines the common interface that is used by the security handlers to encrypt or
46
- # decrypt data with AES. It has to be *prepended* by any AES algorithm class.
46
+ # decrypt data with AES. It has to be *prepended* by any specific AES algorithm class.
47
47
  #
48
48
  # See the ClassMethods module for available class level methods of AES algorithms.
49
49
  #
@@ -79,7 +79,7 @@ module HexaPDF
79
79
  # The data is padded using the PKCS#5 padding scheme and the initialization vector is
80
80
  # prepended to the encrypted data,
81
81
  #
82
- # See: PDF1.7 s7.6.2.
82
+ # See: PDF2.0 s7.6.3
83
83
  def encrypt(key, data)
84
84
  iv = random_bytes(BLOCK_SIZE)
85
85
  iv << new(key, iv, :encrypt).process(pad(data))
@@ -112,7 +112,7 @@ module HexaPDF
112
112
  # It is assumed that the initialization vector is included in the first BLOCK_SIZE bytes
113
113
  # of the data. After the decryption the PKCS#5 padding is removed.
114
114
  #
115
- # See: PDF1.7 s7.6.2.
115
+ # See: PDF2.0 s7.6.3
116
116
  def decrypt(key, data)
117
117
  return data if data.empty? # Handle invalid files with empty strings
118
118
  if data.length % BLOCK_SIZE != 0 || data.length < BLOCK_SIZE
@@ -167,7 +167,7 @@ module HexaPDF
167
167
  # Pads the data to a muliple of BLOCK_SIZE using the PKCS#5 padding scheme and returns the
168
168
  # result.
169
169
  #
170
- # See: PDF1.7 s7.6.2
170
+ # See: PDF2.0 s7.6.3
171
171
  def pad(data)
172
172
  padding_length = BLOCK_SIZE - data.size % BLOCK_SIZE
173
173
  data + padding_length.chr * padding_length
@@ -179,7 +179,7 @@ module HexaPDF
179
179
  # In case the padding is not correct as per the specification, it is assumed that there is
180
180
  # no padding and the input is returned as is.
181
181
  #
182
- # See: PDF1.7 s7.6.2
182
+ # See: PDF2.0 s7.6.3
183
183
  def unpad(data)
184
184
  padding_length = data.getbyte(-1)
185
185
  if padding_length > BLOCK_SIZE || padding_length == 0 ||
@@ -65,7 +65,7 @@ module HexaPDF
65
65
 
66
66
  # Encrypts the given +data+ with the +key+.
67
67
  #
68
- # See: PDF1.7 s7.6.2.
68
+ # See: PDF2.0 s7.6.3
69
69
  def encrypt(key, data)
70
70
  new(key).process(data)
71
71
  end
@@ -48,7 +48,7 @@ module HexaPDF
48
48
  #
49
49
  # This implementation is using AES in Cipher Block Chaining (CBC) mode.
50
50
  #
51
- # See: PDF1.7 s7.6.2
51
+ # See: PDF2.0 s7.6.3
52
52
  class FastAES
53
53
 
54
54
  prepend AES
@@ -68,7 +68,7 @@ module HexaPDF
68
68
  @cipher.send(mode)
69
69
  @cipher.key = key
70
70
  @cipher.iv = iv
71
- @cipher.padding = 0
71
+ @cipher.padding = 0 # Padding handled by HexaPDF, also no @cipher.final call needed
72
72
  end
73
73
 
74
74
  # Encrypts or decrypts the given data whose length must be a multiple of 16.
@@ -45,7 +45,7 @@ module HexaPDF
45
45
 
46
46
  # Implementation of the general encryption algorithm ARC4 using OpenSSL as backend.
47
47
  #
48
- # See: PDF1.7 s7.6.2
48
+ # See: PDF2.0 s7.6.3
49
49
  class FastARC4
50
50
 
51
51
  prepend ARC4
@@ -42,7 +42,7 @@ module HexaPDF
42
42
  # This "algorithm" does nothing, i.e. it returns the given data as is without encrypting or
43
43
  # decrypting it.
44
44
  #
45
- # See: PDF1.7 s7.6.5
45
+ # See: PDF2.0 s7.6.6
46
46
  module Identity
47
47
 
48
48
  class << self
@@ -51,7 +51,7 @@ module HexaPDF
51
51
  #
52
52
  # This implementation is using AES in Cipher Block Chaining (CBC) mode.
53
53
  #
54
- # See: PDF1.7 s7.6.2
54
+ # See: PDF2.0 s7.6.3
55
55
  class RubyAES
56
56
 
57
57
  prepend AES
@@ -46,7 +46,7 @@ module HexaPDF
46
46
  #
47
47
  # For reference: This implementation is about 250 times slower than the FastARC4 version.
48
48
  #
49
- # See: PDF1.7 s7.6.2
49
+ # See: PDF2.0 s7.6.3
50
50
  class RubyARC4
51
51
 
52
52
  prepend ARC4
@@ -47,12 +47,12 @@ module HexaPDF
47
47
  # Contains entries common to all encryption dictionaries. If a specific security handler
48
48
  # needs further fields it should derive a new subclass and add the new fields there.
49
49
  #
50
- # See: PDF1.7 s7.6.1
50
+ # See: PDF2.0 s7.6.2
51
51
  class EncryptionDictionary < Dictionary
52
52
 
53
53
  define_field :Filter, type: Symbol, required: true
54
54
  define_field :SubFilter, type: Symbol, version: '1.3'
55
- define_field :V, type: Integer, required: true
55
+ define_field :V, type: Integer, required: true, allowed_values: [0, 1, 2, 3, 4, 5]
56
56
  define_field :Lenth, type: Integer, default: 40, version: '1.4'
57
57
  define_field :CF, type: Dictionary, version: '1.5'
58
58
  define_field :StmF, type: Symbol, default: :Identity, version: '1.5'
@@ -70,12 +70,8 @@ module HexaPDF
70
70
  # Ensures that the encryption dictionary's content is valid.
71
71
  def perform_validation
72
72
  super
73
- unless [1, 2, 4, 5].include?(value[:V])
74
- yield("Value of /V is not one of 1, 2, 4 or 5", false)
75
- return
76
- end
77
- if value[:V] == 2 && (!key?(:Length) || value[:Length] < 40 ||
78
- value[:Length] > 128 || value[:Length] % 8 != 0)
73
+ length = self[:Length]
74
+ if self[:V] == 2 && (!key?(:Length) || length < 40 || length > 128 || length % 8 != 0)
79
75
  yield("Invalid value for /Length field when /V is 2", false)
80
76
  end
81
77
  end
@@ -94,8 +90,8 @@ module HexaPDF
94
90
  # * The method ::set_up_decryption is used when a security handler should be created from the
95
91
  # document's encryption dictionary.
96
92
  #
97
- # Security handlers could also be created with the ::new method but this is discouraged because
98
- # the above methods provide the correct handling in both cases.
93
+ # It is *not* recommended to create security handlers manually but only with those two methods
94
+ # listed above.
99
95
  #
100
96
  #
101
97
  # == Using SecurityHandler Instances
@@ -107,12 +103,16 @@ module HexaPDF
107
103
  # * #encrypt_string
108
104
  # * #encrypt_stream
109
105
  #
110
- # How the decryption/encryption key is actually computed is deferred to a sub class.
106
+ # How the decryption/encryption key is actually computed is deferred to a sub class, as per the
107
+ # PDF specification.
111
108
  #
112
109
  # Additionally, the #encryption_key_valid? method can be used to check whether the
113
110
  # SecurityHandler instance is built from/built for the current version of the encryption
114
111
  # dictionary.
115
112
  #
113
+ # Note that any manual changes to the encryption dictionary will invalidate the key and lead to
114
+ # an error!
115
+ #
116
116
  #
117
117
  # == Implementing a SecurityHandler Class
118
118
  #
@@ -151,8 +151,8 @@ module HexaPDF
151
151
  # The encryption algorithm.
152
152
  attr_reader :algorithm
153
153
 
154
- # Creates a new encrypted stream data object by utilizing the given stream data object as
155
- # template. The arguments +key+ and +algorithm+ are used for decrypting purposes.
154
+ # Creates a new encrypted stream data object by utilizing the given stream data object +obj+
155
+ # as template. The arguments +key+ and +algorithm+ are used for decrypting purposes.
156
156
  def initialize(obj, key, algorithm)
157
157
  obj.instance_variables.each {|v| instance_variable_set(v, obj.instance_variable_get(v)) }
158
158
  @key = key
@@ -214,7 +214,7 @@ module HexaPDF
214
214
 
215
215
  handler = handler.new(document)
216
216
  dict = document.trailer[:Encrypt] = handler.set_up_decryption(dict, **options)
217
- HexaPDF::Object.make_direct(dict.value)
217
+ HexaPDF::Object.make_direct(dict.value, document)
218
218
  document.revisions.current.update(dict)
219
219
  document.revisions.each do |r|
220
220
  loader = r.loader
@@ -264,7 +264,7 @@ module HexaPDF
264
264
  # Decrypts the strings and the possibly attached stream of the given indirect object in
265
265
  # place.
266
266
  #
267
- # See: PDF1.7 s7.6.2
267
+ # See: PDF2.0 s7.6.3
268
268
  def decrypt(obj)
269
269
  return obj if @is_encrypt_dict[obj] || obj.type == :XRef
270
270
 
@@ -292,7 +292,7 @@ module HexaPDF
292
292
  # Note that some strings won't be encrypted as per the specification. The returned string,
293
293
  # however, is always a different object.
294
294
  #
295
- # See: PDF1.7 s7.6.2
295
+ # See: PDF2.0 s7.6.3
296
296
  def encrypt_string(str, obj)
297
297
  return str.dup if str.empty? || obj == document.trailer[:Encrypt] || obj.type == :XRef ||
298
298
  (obj.type == :Sig && obj[:Contents].equal?(str))
@@ -302,6 +302,9 @@ module HexaPDF
302
302
  end
303
303
 
304
304
  # Returns a Fiber that encrypts the contents of the given stream object.
305
+ #
306
+ # Note that some streams *must not be* encrypted. For those, their standard stream encoding
307
+ # fiber is returned.
305
308
  def encrypt_stream(obj)
306
309
  return obj.stream_encoder if obj.type == :XRef
307
310
 
@@ -321,8 +324,8 @@ module HexaPDF
321
324
  end
322
325
  end
323
326
 
324
- # Computes the encryption key and sets up the algorithms for encrypting the document based on
325
- # the given options, and returns the corresponding encryption dictionary.
327
+ # Computes the encryption key, sets up the algorithms for encrypting the document based on the
328
+ # given options, and returns the corresponding encryption dictionary.
326
329
  #
327
330
  # The security handler specific +options+ as well as the +algorithm+ argument are passed on to
328
331
  # the #prepare_encryption method.
@@ -340,7 +343,7 @@ module HexaPDF
340
343
  # force_v4::
341
344
  # Forces the use of protocol version 4 when key_length=128 and algorithm=:arc4.
342
345
  #
343
- # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
346
+ # See: PDF2.0 s7.6.2
344
347
  def set_up_encryption(key_length: 128, algorithm: :aes, force_v4: false, **options)
345
348
  @dict = document.wrap({}, type: encryption_dictionary_class)
346
349
 
@@ -382,9 +385,13 @@ module HexaPDF
382
385
  #
383
386
  # The security handler specific +options+ are passed on to the #prepare_decryption method.
384
387
  #
385
- # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
388
+ # See: PDF2.0 s7.6.2
386
389
  def set_up_decryption(dictionary, **options)
387
390
  @dict = document.wrap(dictionary, type: encryption_dictionary_class)
391
+ @dict.validate do |msg, correctable, obj|
392
+ next if correctable
393
+ raise HexaPDF::Error, "Validation error for encryption dictionary (#{obj.oid},#{obj.gen}): #{msg}"
394
+ end
388
395
 
389
396
  case dict[:V]
390
397
  when 1, 2
@@ -495,7 +502,7 @@ module HexaPDF
495
502
 
496
503
  # Computes the key for decrypting the indirect object with the given algorithm.
497
504
  #
498
- # See: PDF1.7 s7.6.2 (algorithm 1), PDF2.0 s7.6.2.2 (algorithm 1.A)
505
+ # See: PDF2.0 s7.6.3.2 (algorithm 1), PDF2.0 s7.6.3.3 (algorithm 1.A)
499
506
  def object_key(oid, gen, algorithm)
500
507
  key = encryption_key
501
508
  return key if dict[:V] == 5
@@ -508,13 +515,13 @@ module HexaPDF
508
515
 
509
516
  # Returns the length of the encryption key in bytes based on the security handlers version.
510
517
  #
511
- # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
518
+ # See: PDF2.0 s7.6.2
512
519
  def key_length
513
520
  case dict[:V]
514
521
  when 1 then 5
515
522
  when 2 then dict[:Length] / 8
516
- when 4 then 16 # PDF2.0 s7.6.1 specifies that a /V of 4 is equal to length of 128bit
517
- when 5 then 32 # PDF2.0 s7.6.1 specifies that a /V of 5 is equal to length of 256bit
523
+ when 4 then 16 # PDF2.0 s7.6.2 specifies that a /V of 4 is equal to length of 128bit
524
+ when 5 then 32 # PDF2.0 s7.6.2 specifies that a /V of 5 is equal to length of 256bit
518
525
  end
519
526
  end
520
527