hexapdf 0.32.1 → 0.33.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +76 -1
  3. data/README.md +9 -0
  4. data/examples/002-graphics.rb +15 -17
  5. data/examples/003-arcs.rb +9 -9
  6. data/examples/009-text_layouter_alignment.rb +1 -1
  7. data/examples/010-text_layouter_inline_boxes.rb +2 -2
  8. data/examples/011-text_layouter_line_wrapping.rb +1 -1
  9. data/examples/012-text_layouter_styling.rb +7 -7
  10. data/examples/013-text_layouter_shapes.rb +1 -1
  11. data/examples/014-text_in_polygon.rb +1 -1
  12. data/examples/015-boxes.rb +8 -7
  13. data/examples/016-frame_automatic_box_placement.rb +2 -2
  14. data/examples/017-frame_text_flow.rb +2 -1
  15. data/examples/018-composer.rb +1 -1
  16. data/examples/020-column_box.rb +2 -1
  17. data/examples/025-table_box.rb +46 -0
  18. data/lib/hexapdf/cli/command.rb +5 -2
  19. data/lib/hexapdf/cli/form.rb +5 -5
  20. data/lib/hexapdf/cli/inspect.rb +3 -3
  21. data/lib/hexapdf/cli.rb +4 -0
  22. data/lib/hexapdf/composer.rb +104 -52
  23. data/lib/hexapdf/configuration.rb +44 -39
  24. data/lib/hexapdf/content/canvas.rb +393 -267
  25. data/lib/hexapdf/content/color_space.rb +72 -25
  26. data/lib/hexapdf/content/graphic_object/arc.rb +57 -24
  27. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +66 -23
  28. data/lib/hexapdf/content/graphic_object/geom2d.rb +47 -6
  29. data/lib/hexapdf/content/graphic_object/solid_arc.rb +58 -36
  30. data/lib/hexapdf/content/graphic_object.rb +6 -7
  31. data/lib/hexapdf/content/graphics_state.rb +54 -45
  32. data/lib/hexapdf/content/operator.rb +52 -54
  33. data/lib/hexapdf/content/parser.rb +2 -2
  34. data/lib/hexapdf/content/processor.rb +15 -15
  35. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  36. data/lib/hexapdf/content.rb +5 -0
  37. data/lib/hexapdf/dictionary.rb +6 -5
  38. data/lib/hexapdf/dictionary_fields.rb +42 -14
  39. data/lib/hexapdf/digital_signature/cms_handler.rb +2 -2
  40. data/lib/hexapdf/digital_signature/handler.rb +1 -1
  41. data/lib/hexapdf/digital_signature/pkcs1_handler.rb +2 -3
  42. data/lib/hexapdf/digital_signature/signature.rb +6 -6
  43. data/lib/hexapdf/digital_signature/signatures.rb +13 -12
  44. data/lib/hexapdf/digital_signature/signing/default_handler.rb +14 -5
  45. data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +2 -4
  46. data/lib/hexapdf/digital_signature/signing/timestamp_handler.rb +4 -4
  47. data/lib/hexapdf/digital_signature/signing.rb +4 -0
  48. data/lib/hexapdf/digital_signature/verification_result.rb +2 -2
  49. data/lib/hexapdf/digital_signature.rb +7 -2
  50. data/lib/hexapdf/document/destinations.rb +12 -11
  51. data/lib/hexapdf/document/files.rb +1 -1
  52. data/lib/hexapdf/document/fonts.rb +1 -1
  53. data/lib/hexapdf/document/layout.rb +167 -39
  54. data/lib/hexapdf/document/pages.rb +3 -2
  55. data/lib/hexapdf/document.rb +89 -55
  56. data/lib/hexapdf/encryption/aes.rb +5 -5
  57. data/lib/hexapdf/encryption/arc4.rb +1 -1
  58. data/lib/hexapdf/encryption/fast_aes.rb +2 -2
  59. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  60. data/lib/hexapdf/encryption/identity.rb +1 -1
  61. data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
  62. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  63. data/lib/hexapdf/encryption/security_handler.rb +31 -24
  64. data/lib/hexapdf/encryption/standard_security_handler.rb +45 -36
  65. data/lib/hexapdf/encryption.rb +7 -2
  66. data/lib/hexapdf/error.rb +18 -0
  67. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  68. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  69. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  70. data/lib/hexapdf/filter/lzw_decode.rb +1 -1
  71. data/lib/hexapdf/filter/pass_through.rb +1 -1
  72. data/lib/hexapdf/filter/predictor.rb +1 -1
  73. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  74. data/lib/hexapdf/filter.rb +55 -6
  75. data/lib/hexapdf/font/cmap/parser.rb +2 -2
  76. data/lib/hexapdf/font/cmap.rb +1 -1
  77. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  78. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  79. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +2 -2
  80. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  81. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  82. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +3 -3
  83. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  84. data/lib/hexapdf/font/invalid_glyph.rb +3 -0
  85. data/lib/hexapdf/font/true_type_wrapper.rb +17 -4
  86. data/lib/hexapdf/font/type1_wrapper.rb +19 -4
  87. data/lib/hexapdf/font_loader/from_configuration.rb +5 -2
  88. data/lib/hexapdf/font_loader/from_file.rb +5 -5
  89. data/lib/hexapdf/font_loader/standard14.rb +3 -3
  90. data/lib/hexapdf/font_loader.rb +3 -0
  91. data/lib/hexapdf/image_loader/jpeg.rb +2 -2
  92. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  93. data/lib/hexapdf/image_loader/png.rb +2 -2
  94. data/lib/hexapdf/image_loader.rb +1 -1
  95. data/lib/hexapdf/importer.rb +13 -0
  96. data/lib/hexapdf/layout/box.rb +9 -2
  97. data/lib/hexapdf/layout/box_fitter.rb +2 -2
  98. data/lib/hexapdf/layout/column_box.rb +18 -4
  99. data/lib/hexapdf/layout/frame.rb +30 -12
  100. data/lib/hexapdf/layout/image_box.rb +5 -0
  101. data/lib/hexapdf/layout/inline_box.rb +1 -0
  102. data/lib/hexapdf/layout/list_box.rb +17 -1
  103. data/lib/hexapdf/layout/page_style.rb +4 -4
  104. data/lib/hexapdf/layout/style.rb +18 -3
  105. data/lib/hexapdf/layout/table_box.rb +682 -0
  106. data/lib/hexapdf/layout/text_box.rb +5 -3
  107. data/lib/hexapdf/layout/text_fragment.rb +1 -1
  108. data/lib/hexapdf/layout/text_layouter.rb +12 -4
  109. data/lib/hexapdf/layout.rb +1 -0
  110. data/lib/hexapdf/name_tree_node.rb +1 -1
  111. data/lib/hexapdf/number_tree_node.rb +1 -1
  112. data/lib/hexapdf/object.rb +18 -7
  113. data/lib/hexapdf/parser.rb +8 -8
  114. data/lib/hexapdf/pdf_array.rb +1 -1
  115. data/lib/hexapdf/rectangle.rb +1 -1
  116. data/lib/hexapdf/reference.rb +1 -1
  117. data/lib/hexapdf/revision.rb +1 -1
  118. data/lib/hexapdf/revisions.rb +3 -3
  119. data/lib/hexapdf/serializer.rb +15 -15
  120. data/lib/hexapdf/stream.rb +4 -2
  121. data/lib/hexapdf/tokenizer.rb +14 -14
  122. data/lib/hexapdf/type/acro_form/appearance_generator.rb +22 -22
  123. data/lib/hexapdf/type/acro_form/button_field.rb +1 -1
  124. data/lib/hexapdf/type/acro_form/choice_field.rb +1 -1
  125. data/lib/hexapdf/type/acro_form/field.rb +2 -2
  126. data/lib/hexapdf/type/acro_form/form.rb +1 -1
  127. data/lib/hexapdf/type/acro_form/signature_field.rb +4 -4
  128. data/lib/hexapdf/type/acro_form/text_field.rb +1 -1
  129. data/lib/hexapdf/type/acro_form/variable_text_field.rb +1 -1
  130. data/lib/hexapdf/type/acro_form.rb +1 -1
  131. data/lib/hexapdf/type/action.rb +1 -1
  132. data/lib/hexapdf/type/actions/go_to.rb +1 -1
  133. data/lib/hexapdf/type/actions/go_to_r.rb +1 -1
  134. data/lib/hexapdf/type/actions/launch.rb +1 -1
  135. data/lib/hexapdf/type/actions/uri.rb +1 -1
  136. data/lib/hexapdf/type/actions.rb +1 -1
  137. data/lib/hexapdf/type/annotation.rb +3 -3
  138. data/lib/hexapdf/type/annotations/link.rb +1 -1
  139. data/lib/hexapdf/type/annotations/markup_annotation.rb +1 -1
  140. data/lib/hexapdf/type/annotations/text.rb +1 -1
  141. data/lib/hexapdf/type/annotations/widget.rb +2 -2
  142. data/lib/hexapdf/type/annotations.rb +1 -1
  143. data/lib/hexapdf/type/catalog.rb +1 -1
  144. data/lib/hexapdf/type/cid_font.rb +3 -3
  145. data/lib/hexapdf/type/embedded_file.rb +1 -1
  146. data/lib/hexapdf/type/file_specification.rb +2 -2
  147. data/lib/hexapdf/type/font_descriptor.rb +1 -1
  148. data/lib/hexapdf/type/font_simple.rb +2 -2
  149. data/lib/hexapdf/type/font_type0.rb +3 -3
  150. data/lib/hexapdf/type/font_type3.rb +1 -1
  151. data/lib/hexapdf/type/form.rb +1 -1
  152. data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
  153. data/lib/hexapdf/type/icon_fit.rb +1 -1
  154. data/lib/hexapdf/type/image.rb +1 -1
  155. data/lib/hexapdf/type/info.rb +1 -1
  156. data/lib/hexapdf/type/mark_information.rb +1 -1
  157. data/lib/hexapdf/type/names.rb +2 -2
  158. data/lib/hexapdf/type/object_stream.rb +7 -3
  159. data/lib/hexapdf/type/outline.rb +1 -1
  160. data/lib/hexapdf/type/outline_item.rb +1 -1
  161. data/lib/hexapdf/type/page.rb +19 -10
  162. data/lib/hexapdf/type/page_label.rb +1 -1
  163. data/lib/hexapdf/type/page_tree_node.rb +1 -1
  164. data/lib/hexapdf/type/resources.rb +1 -1
  165. data/lib/hexapdf/type/trailer.rb +2 -2
  166. data/lib/hexapdf/type/viewer_preferences.rb +1 -1
  167. data/lib/hexapdf/type/xref_stream.rb +2 -2
  168. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  169. data/lib/hexapdf/version.rb +1 -1
  170. data/lib/hexapdf/writer.rb +4 -4
  171. data/lib/hexapdf/xref_section.rb +2 -2
  172. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +11 -1
  173. data/test/hexapdf/content/graphic_object/test_geom2d.rb +7 -0
  174. data/test/hexapdf/content/test_canvas.rb +0 -1
  175. data/test/hexapdf/digital_signature/test_signatures.rb +22 -0
  176. data/test/hexapdf/document/test_files.rb +2 -2
  177. data/test/hexapdf/document/test_layout.rb +98 -0
  178. data/test/hexapdf/encryption/test_security_handler.rb +12 -11
  179. data/test/hexapdf/encryption/test_standard_security_handler.rb +35 -23
  180. data/test/hexapdf/font/test_true_type_wrapper.rb +18 -1
  181. data/test/hexapdf/font/test_type1_wrapper.rb +15 -1
  182. data/test/hexapdf/layout/test_box.rb +1 -1
  183. data/test/hexapdf/layout/test_column_box.rb +65 -21
  184. data/test/hexapdf/layout/test_frame.rb +14 -14
  185. data/test/hexapdf/layout/test_image_box.rb +4 -0
  186. data/test/hexapdf/layout/test_inline_box.rb +5 -0
  187. data/test/hexapdf/layout/test_list_box.rb +40 -6
  188. data/test/hexapdf/layout/test_page_style.rb +3 -2
  189. data/test/hexapdf/layout/test_style.rb +50 -0
  190. data/test/hexapdf/layout/test_table_box.rb +722 -0
  191. data/test/hexapdf/layout/test_text_box.rb +18 -0
  192. data/test/hexapdf/layout/test_text_layouter.rb +4 -0
  193. data/test/hexapdf/test_dictionary_fields.rb +4 -1
  194. data/test/hexapdf/test_document.rb +1 -0
  195. data/test/hexapdf/test_filter.rb +8 -0
  196. data/test/hexapdf/test_importer.rb +9 -0
  197. data/test/hexapdf/test_object.rb +16 -5
  198. data/test/hexapdf/test_parser.rb +1 -1
  199. data/test/hexapdf/test_stream.rb +7 -0
  200. data/test/hexapdf/test_writer.rb +3 -3
  201. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +13 -5
  202. data/test/hexapdf/type/acro_form/test_form.rb +4 -3
  203. data/test/hexapdf/type/test_object_stream.rb +9 -3
  204. data/test/hexapdf/type/test_page.rb +18 -4
  205. metadata +17 -8
@@ -64,32 +64,43 @@ end
64
64
  #
65
65
  # Here are some pointers to more in depth information:
66
66
  #
67
- # * For information about the command line application, see the HexaPDF::CLI module.
67
+ # * HexaPDF::CLI has information about the accompanying command line application.
68
68
  # * HexaPDF::Document provides information about how to work with a PDF file.
69
+ # * HexaPDF::Composer is the main class for easily creating PDF documents from scratch.
69
70
  # * HexaPDF::Content::Canvas provides the canvas API for drawing/writing on a page or form XObject
71
+ # * HexaPDF::Type::AcroForm::Form is the entry point for working with interactive forms.
72
+ # * HexaPDF::Type::Outline has information on working with outlines/bookmarks.
73
+ # * HexaPDF::Encryption provides information on how encryption works.
74
+ # * HexaPDF::DigitalSignature is the entry point for working with digital signaturs.
70
75
  module HexaPDF
71
76
 
72
77
  autoload(:Composer, 'hexapdf/composer')
73
78
 
74
79
  # == HexaPDF::Document
75
80
  #
76
- # Represents one PDF document.
81
+ # Represents a PDF document.
77
82
  #
78
- # A PDF document consists of (indirect) objects, so the main job of this class is to provide
79
- # methods for working with these objects. However, since a PDF document may also be
83
+ # A PDF document essentially consists of (indirect) objects, so the main job of this class is to
84
+ # provide methods for working with these objects. However, since a PDF document may also be
80
85
  # incrementally updated and can therefore contain one or more revisions, there are also methods
81
- # for working with these revisions.
86
+ # for working with these revisions (see Revisions for details).
82
87
  #
83
- # Note: This class provides everything to work on PDF documents on a low-level basis. This means
84
- # that there are no convenience methods for higher PDF functionality. Those can be found in the
85
- # objects linked from here, like #catalog.
88
+ # Additionally, there are many convenience methods for easily accessing the most important PDF
89
+ # functionality, like encrypting a document (#encrypt), working with digital signatures
90
+ # (#signatures), accessing the interactive form data (#acro_form), working with the pages
91
+ # (#pages), fonts (#fonts) and images (#images).
86
92
  #
87
- # == Known Messages
93
+ # Note: This class provides the basis for working with a PDF document. The higher PDF
94
+ # functionality is *not* implemented here but either in the appropriate PDF type classes or in
95
+ # special convenience classes. All this functionality can be accessed via the convenience methods
96
+ # described above.
97
+ #
98
+ # == Available Message Hooks
88
99
  #
89
100
  # The document object provides a basic message dispatch system via #register_listener and
90
101
  # #dispatch_message.
91
102
  #
92
- # Following are the messages that are used by HexaPDF itself:
103
+ # Following messages are used by HexaPDF itself:
93
104
  #
94
105
  # :complete_objects::
95
106
  # This message is called before the first step of writing a document. Listeners should
@@ -138,17 +149,22 @@ module HexaPDF
138
149
  end
139
150
  end
140
151
 
141
- # The configuration for the document.
152
+ # The configuration object for the document.
153
+ #
154
+ # See Configuration for details.
142
155
  attr_reader :config
143
156
 
144
157
  # The revisions of the document.
158
+ #
159
+ # See Revisions.
145
160
  attr_reader :revisions
146
161
 
147
162
  # Creates a new PDF document, either an empty one or one read from the provided +io+.
148
163
  #
149
164
  # When an IO object is provided and it contains an encrypted PDF file, it is automatically
150
165
  # decrypted behind the scenes. The +decryption_opts+ argument has to be set appropriately in
151
- # this case.
166
+ # this case. In case this is not wanted, the configuration option 'document.auto_decrypt' needs
167
+ # to be used.
152
168
  #
153
169
  # Options:
154
170
  #
@@ -183,8 +199,8 @@ module HexaPDF
183
199
  # doc.object(ref) -> obj or nil
184
200
  # doc.object(oid) -> obj or nil
185
201
  #
186
- # Returns the current version of the indirect object for the given exact reference or for the
187
- # given object number.
202
+ # Returns the current version of the indirect object for the given exact reference (see
203
+ # Reference) or for the given object number.
188
204
  #
189
205
  # For references to unknown objects, +nil+ is returned but free objects are represented by a
190
206
  # PDF Null object, not by +nil+!
@@ -199,7 +215,7 @@ module HexaPDF
199
215
  # doc.object?(oid) -> true or false
200
216
  #
201
217
  # Returns +true+ if the the document contains an indirect object for the given exact reference
202
- # or for the given object number.
218
+ # (see Reference) or for the given object number.
203
219
  #
204
220
  # Even though this method might return +true+ for some references, #object may return +nil+
205
221
  # because this method takes *all* revisions into account. Also see the discussion on #each for
@@ -212,7 +228,7 @@ module HexaPDF
212
228
 
213
229
  # Dereferences the given object.
214
230
  #
215
- # Return the object itself if it is not a reference, or the indirect object specified by the
231
+ # Returns the object itself if it is not a reference, or the indirect object specified by the
216
232
  # reference.
217
233
  def deref(obj)
218
234
  obj.kind_of?(Reference) ? object(obj) : obj
@@ -227,7 +243,7 @@ module HexaPDF
227
243
  # HexaPDF::Object. If it is not the latter, #wrap is called with the object and the
228
244
  # additional keyword arguments.
229
245
  #
230
- # See: Revisions#add_object
246
+ # See: #wrap, Revisions#add_object
231
247
  def add(obj, **wrap_opts)
232
248
  obj = wrap(obj, **wrap_opts) unless obj.kind_of?(HexaPDF::Object)
233
249
 
@@ -266,14 +282,14 @@ module HexaPDF
266
282
  HexaPDF::Importer.for(self).import(obj, source: source)
267
283
  end
268
284
 
269
- # Wraps the given object inside a HexaPDF::Object class which allows one to use
285
+ # Wraps the given object inside a HexaPDF::Object (sub)class which allows one to use
270
286
  # convenience functions to work with the object.
271
287
  #
272
288
  # The +obj+ argument can also be a HexaPDF::Object object so that it can be re-wrapped if
273
- # needed.
289
+ # necessary.
274
290
  #
275
291
  # The class of the returned object is always a subclass of HexaPDF::Object (or of
276
- # HexaPDF::Stream if a +stream+ is given). Which subclass is used, depends on the values of the
292
+ # HexaPDF::Stream if +stream+ is given). Which subclass is used, depends on the values of the
277
293
  # +type+ and +subtype+ options as well as on the 'object.type_map' and 'object.subtype_map'
278
294
  # global configuration options:
279
295
  #
@@ -291,13 +307,13 @@ module HexaPDF
291
307
  #
292
308
  # * If there is no valid class after the above steps, HexaPDF::Stream is used if a stream is
293
309
  # given, HexaPDF::Dictionary if the given object is a hash, HexaPDF::PDFArray if it is an
294
- # array or else HexaPDF::Object is used.
310
+ # array or else HexaPDF::Object.
295
311
  #
296
312
  # Options:
297
313
  #
298
314
  # :type:: (Symbol or Class) The type of a PDF object that should be used for wrapping. This
299
315
  # could be, for example, :Pages. If a class object is provided, it is used directly
300
- # instead of the type detection system.
316
+ # instead of determining the class through the type detection system.
301
317
  #
302
318
  # :subtype:: (Symbol) The subtype of a PDF object which further qualifies a type. For
303
319
  # example, image objects in PDF have a type of :XObject and a subtype of :Image.
@@ -341,7 +357,9 @@ module HexaPDF
341
357
  if subtype
342
358
  sub_klass = GlobalConfiguration.constantize('object.subtype_map', type, subtype) { klass }
343
359
  if type ||
344
- sub_klass&.each_field&.none? {|name, field| field.required? && !data.value.key?(name) }
360
+ sub_klass&.each_field&.none? do |name, field|
361
+ field.required? && !data.value.key?(name) && name != :Type
362
+ end
345
363
  klass = sub_klass
346
364
  end
347
365
  end
@@ -410,6 +428,11 @@ module HexaPDF
410
428
  # doc.register_listener(name) {|*args| block} -> block
411
429
  #
412
430
  # Registers the given listener for the message +name+.
431
+ #
432
+ # If +callable+ is provided, it needs to be an Object responding to #call. Otherwise the block
433
+ # has to be provided. The arguments that are provided to the #call method depend on the message.
434
+ #
435
+ # See: dispatch_message
413
436
  def register_listener(name, callable = nil, &block)
414
437
  callable ||= block
415
438
  (@listeners[name] ||= []) << callable
@@ -420,6 +443,8 @@ module HexaPDF
420
443
  #
421
444
  # See the main Document documentation for an overview of messages that are used by HexaPDF
422
445
  # itself.
446
+ #
447
+ # See: register_listener
423
448
  def dispatch_message(name, *args)
424
449
  @listeners[name]&.each {|obj| obj.call(*args) }
425
450
  end
@@ -427,10 +452,10 @@ module HexaPDF
427
452
  UNSET = ::Object.new # :nordoc:
428
453
 
429
454
  # Caches and returns the given +value+ or the value of the given block using the given
430
- # +pdf_data+ and +key+ arguments as composite cache key. If a cached value already exists and
431
- # +update+ is +false+, the cached value is just returned.
455
+ # +pdf_data+ and +key+ arguments as composite cache key.
432
456
  #
433
- # Set +update+ to +true+ to force an update of the cached value.
457
+ # If a cached value already exists and +update+ is +false+, the cached value is just returned.
458
+ # If +update+ is set to +true+, an update of the cached value is forced.
434
459
  #
435
460
  # This facility can be used to cache expensive operations in PDF objects that are easy to
436
461
  # compute again.
@@ -444,7 +469,7 @@ module HexaPDF
444
469
  # Returns +true+ if there is a value cached for the composite key consisting of the given
445
470
  # +pdf_data+ and +key+ objects.
446
471
  #
447
- # Also see: #cache
472
+ # See: #cache
448
473
  def cached?(pdf_data, key)
449
474
  @cache.key?(pdf_data) && @cache[pdf_data].key?(key)
450
475
  end
@@ -455,29 +480,32 @@ module HexaPDF
455
480
  # It is *not* recommended to clear the whole cache! Better clear the cache for individual PDF
456
481
  # objects!
457
482
  #
458
- # Also see: #cache
483
+ # See: #cache, #cached?
459
484
  def clear_cache(pdf_data = nil)
460
485
  pdf_data ? @cache[pdf_data].clear : @cache.clear
461
486
  end
462
487
 
463
- # Returns the Pages object that provides convenience methods for working with pages.
488
+ # Returns the Pages object that provides convenience methods for working with the pages of the
489
+ # PDF file.
464
490
  #
465
- # Also see: HexaPDF::Type::PageTreeNode
491
+ # See: Pages, Type::PageTreeNode
466
492
  def pages
467
493
  @pages ||= Pages.new(self)
468
494
  end
469
495
 
470
- # Returns the Images object that provides convenience methods for working with images.
496
+ # Returns the Images object that provides convenience methods for working with images (e.g.
497
+ # adding them to the PDF or listing them).
471
498
  def images
472
499
  @images ||= Images.new(self)
473
500
  end
474
501
 
475
- # Returns the Files object that provides convenience methods for working with files.
502
+ # Returns the Files object that provides convenience methods for working with embedded files.
476
503
  def files
477
504
  @files ||= Files.new(self)
478
505
  end
479
506
 
480
- # Returns the Fonts object that provides convenience methods for working with fonts.
507
+ # Returns the Fonts object that provides convenience methods for working with the fonts used in
508
+ # the PDF file.
481
509
  def fonts
482
510
  @fonts ||= Fonts.new(self)
483
511
  end
@@ -496,14 +524,16 @@ module HexaPDF
496
524
 
497
525
  # Returns the main AcroForm object for dealing with interactive forms.
498
526
  #
499
- # See HexaPDF::Type::Catalog#acro_form for details on the arguments.
527
+ # The meaning of the +create+ argument is detailed at Type::Catalog#acro_form.
528
+ #
529
+ # See: Type::AcroForm::Form
500
530
  def acro_form(create: false)
501
531
  catalog.acro_form(create: create)
502
532
  end
503
533
 
504
- # Returns the main document outline object.
534
+ # Returns the entry object to the document outline (a.k.a. bookmarks).
505
535
  #
506
- # See HexaPDF::Type::Outline for details.
536
+ # See: Type::Outline
507
537
  def outline
508
538
  catalog.outline
509
539
  end
@@ -513,7 +543,7 @@ module HexaPDF
513
543
  # Tasks provide an extensible way for performing operations on a PDF document without
514
544
  # cluttering the Document interface.
515
545
  #
516
- # See Task for more information.
546
+ # See: Task
517
547
  def task(name, **opts, &block)
518
548
  task = config.constantize('task.map', name) do
519
549
  raise HexaPDF::Error, "No task named '#{name}' is available"
@@ -522,11 +552,15 @@ module HexaPDF
522
552
  end
523
553
 
524
554
  # Returns the trailer dictionary for the document.
555
+ #
556
+ # See: Type::Trailer
525
557
  def trailer
526
558
  @revisions.current.trailer
527
559
  end
528
560
 
529
561
  # Returns the document's catalog, the root of the object tree.
562
+ #
563
+ # See: Type::Catalog
530
564
  def catalog
531
565
  trailer.catalog
532
566
  end
@@ -537,14 +571,16 @@ module HexaPDF
537
571
  # version has been set manually and the catalog's /Version key refers to a later version, the
538
572
  # later version is used.
539
573
  #
540
- # See: PDF1.7 s7.2.2
574
+ # See: PDF2.0 s7.2.2
541
575
  def version
542
576
  catalog_version = (catalog[:Version] || '1.0').to_s
543
577
  (@version < catalog_version ? catalog_version : @version)
544
578
  end
545
579
 
546
- # Sets the version of the PDF document. The argument must be a string in the format 'M.N'
547
- # where M is the major version and N the minor version (e.g. '1.4' or '2.0').
580
+ # Sets the version of the PDF document.
581
+ #
582
+ # The argument +value+ must be a string in the format 'M.N' where M is the major version and N
583
+ # the minor version (e.g. '1.4' or '2.0').
548
584
  def version=(value)
549
585
  raise ArgumentError, "PDF version must follow format M.N" unless value.to_s.match?(/\A\d\.\d\z/)
550
586
  @version = value.to_s
@@ -557,9 +593,9 @@ module HexaPDF
557
593
 
558
594
  # Encrypts the document.
559
595
  #
560
- # This is done by setting up a security handler for this purpose and populating the trailer's
561
- # Encrypt dictionary accordingly. The actual encryption, however, is only done when writing the
562
- # document.
596
+ # Encryption is done by setting up a security handler for this purpose and populating the
597
+ # trailer's Encrypt dictionary accordingly. The actual encryption, however, is only done when
598
+ # writing the document.
563
599
  #
564
600
  # The security handler used for encrypting is selected via the +name+ argument. All other
565
601
  # arguments are passed on the security handler.
@@ -567,9 +603,8 @@ module HexaPDF
567
603
  # If the document should not be encrypted, the +name+ argument has to be set to +nil+. This
568
604
  # removes the security handler and deletes the trailer's Encrypt dictionary.
569
605
  #
570
- # See: HexaPDF::Encryption::SecurityHandler#set_up_encryption and
571
- # HexaPDF::Encryption::StandardSecurityHandler::EncryptionOptions for possible encryption
572
- # options.
606
+ # See: Encryption::SecurityHandler#set_up_encryption and
607
+ # Encryption::StandardSecurityHandler::EncryptionOptions for possible encryption options.
573
608
  def encrypt(name: :Standard, **options)
574
609
  if name.nil?
575
610
  trailer.delete(:Encrypt)
@@ -605,17 +640,16 @@ module HexaPDF
605
640
  # Signs the document and writes it to the given file or IO object.
606
641
  #
607
642
  # For details on the arguments +file_or_io+, +signature+ and +write_options+ see
608
- # HexaPDF::DigitalSignature::Signatures#add.
643
+ # DigitalSignature::Signatures#add.
609
644
  #
610
645
  # The signing handler to be used is determined by the +handler+ argument together with the rest
611
- # of the keyword arguments (see HexaPDF::DigitalSignature::Signatures#signing_handler for
612
- # details).
646
+ # of the keyword arguments (see DigitalSignature::Signatures#signing_handler for details).
613
647
  #
614
- # If not changed, the default signing handler is
615
- # HexaPDF::DigitalSignature::Signing::DefaultHandler.
648
+ # If not changed, the default signing handler is DigitalSignature::Signing::DefaultHandler.
616
649
  #
617
- # *Note*: Once signing is done the document cannot be changed anymore since it was written. If a
618
- # document needs to be signed multiple times, it needs to be loaded again after writing.
650
+ # *Note*: Once signing is done the document cannot be changed anymore since it was written
651
+ # during the signing process. If a document needs to be signed multiple times, it needs to be
652
+ # loaded again afterwards.
619
653
  def sign(file_or_io, handler: :default, signature: nil, write_options: {}, **handler_options)
620
654
  handler = signatures.signing_handler(name: handler, **handler_options)
621
655
  signatures.add(file_or_io, handler, signature: signature, write_options: write_options)
@@ -626,7 +660,7 @@ module HexaPDF
626
660
  #
627
661
  # If a block is given, it is called on validation problems.
628
662
  #
629
- # See HexaPDF::Object#validate for more information.
663
+ # See Object#validate for more information.
630
664
  def validate(auto_correct: true, only_loaded: false, &block) #:yield: msg, correctable, object
631
665
  result = trailer.validate(auto_correct: auto_correct, &block)
632
666
  each(only_loaded: only_loaded) do |obj|
@@ -651,7 +685,7 @@ module HexaPDF
651
685
  # This is needed, for example, when modifying a signed PDF and the original signature should
652
686
  # stay valid.
653
687
  #
654
- # See: PDF1.7 s7.5.6
688
+ # See: PDF2.0 s7.5.6
655
689
  #
656
690
  # validate::
657
691
  # Validates the document and raises an error if an uncorrectable problem is found.
@@ -43,7 +43,7 @@ module HexaPDF
43
43
  # Common interface for AES algorithms
44
44
  #
45
45
  # This module defines the common interface that is used by the security handlers to encrypt or
46
- # decrypt data with AES. It has to be *prepended* by any AES algorithm class.
46
+ # decrypt data with AES. It has to be *prepended* by any specific AES algorithm class.
47
47
  #
48
48
  # See the ClassMethods module for available class level methods of AES algorithms.
49
49
  #
@@ -79,7 +79,7 @@ module HexaPDF
79
79
  # The data is padded using the PKCS#5 padding scheme and the initialization vector is
80
80
  # prepended to the encrypted data,
81
81
  #
82
- # See: PDF1.7 s7.6.2.
82
+ # See: PDF2.0 s7.6.3
83
83
  def encrypt(key, data)
84
84
  iv = random_bytes(BLOCK_SIZE)
85
85
  iv << new(key, iv, :encrypt).process(pad(data))
@@ -112,7 +112,7 @@ module HexaPDF
112
112
  # It is assumed that the initialization vector is included in the first BLOCK_SIZE bytes
113
113
  # of the data. After the decryption the PKCS#5 padding is removed.
114
114
  #
115
- # See: PDF1.7 s7.6.2.
115
+ # See: PDF2.0 s7.6.3
116
116
  def decrypt(key, data)
117
117
  return data if data.empty? # Handle invalid files with empty strings
118
118
  if data.length % BLOCK_SIZE != 0 || data.length < BLOCK_SIZE
@@ -167,7 +167,7 @@ module HexaPDF
167
167
  # Pads the data to a muliple of BLOCK_SIZE using the PKCS#5 padding scheme and returns the
168
168
  # result.
169
169
  #
170
- # See: PDF1.7 s7.6.2
170
+ # See: PDF2.0 s7.6.3
171
171
  def pad(data)
172
172
  padding_length = BLOCK_SIZE - data.size % BLOCK_SIZE
173
173
  data + padding_length.chr * padding_length
@@ -179,7 +179,7 @@ module HexaPDF
179
179
  # In case the padding is not correct as per the specification, it is assumed that there is
180
180
  # no padding and the input is returned as is.
181
181
  #
182
- # See: PDF1.7 s7.6.2
182
+ # See: PDF2.0 s7.6.3
183
183
  def unpad(data)
184
184
  padding_length = data.getbyte(-1)
185
185
  if padding_length > BLOCK_SIZE || padding_length == 0 ||
@@ -65,7 +65,7 @@ module HexaPDF
65
65
 
66
66
  # Encrypts the given +data+ with the +key+.
67
67
  #
68
- # See: PDF1.7 s7.6.2.
68
+ # See: PDF2.0 s7.6.3
69
69
  def encrypt(key, data)
70
70
  new(key).process(data)
71
71
  end
@@ -48,7 +48,7 @@ module HexaPDF
48
48
  #
49
49
  # This implementation is using AES in Cipher Block Chaining (CBC) mode.
50
50
  #
51
- # See: PDF1.7 s7.6.2
51
+ # See: PDF2.0 s7.6.3
52
52
  class FastAES
53
53
 
54
54
  prepend AES
@@ -68,7 +68,7 @@ module HexaPDF
68
68
  @cipher.send(mode)
69
69
  @cipher.key = key
70
70
  @cipher.iv = iv
71
- @cipher.padding = 0
71
+ @cipher.padding = 0 # Padding handled by HexaPDF, also no @cipher.final call needed
72
72
  end
73
73
 
74
74
  # Encrypts or decrypts the given data whose length must be a multiple of 16.
@@ -45,7 +45,7 @@ module HexaPDF
45
45
 
46
46
  # Implementation of the general encryption algorithm ARC4 using OpenSSL as backend.
47
47
  #
48
- # See: PDF1.7 s7.6.2
48
+ # See: PDF2.0 s7.6.3
49
49
  class FastARC4
50
50
 
51
51
  prepend ARC4
@@ -42,7 +42,7 @@ module HexaPDF
42
42
  # This "algorithm" does nothing, i.e. it returns the given data as is without encrypting or
43
43
  # decrypting it.
44
44
  #
45
- # See: PDF1.7 s7.6.5
45
+ # See: PDF2.0 s7.6.6
46
46
  module Identity
47
47
 
48
48
  class << self
@@ -51,7 +51,7 @@ module HexaPDF
51
51
  #
52
52
  # This implementation is using AES in Cipher Block Chaining (CBC) mode.
53
53
  #
54
- # See: PDF1.7 s7.6.2
54
+ # See: PDF2.0 s7.6.3
55
55
  class RubyAES
56
56
 
57
57
  prepend AES
@@ -46,7 +46,7 @@ module HexaPDF
46
46
  #
47
47
  # For reference: This implementation is about 250 times slower than the FastARC4 version.
48
48
  #
49
- # See: PDF1.7 s7.6.2
49
+ # See: PDF2.0 s7.6.3
50
50
  class RubyARC4
51
51
 
52
52
  prepend ARC4
@@ -47,12 +47,12 @@ module HexaPDF
47
47
  # Contains entries common to all encryption dictionaries. If a specific security handler
48
48
  # needs further fields it should derive a new subclass and add the new fields there.
49
49
  #
50
- # See: PDF1.7 s7.6.1
50
+ # See: PDF2.0 s7.6.2
51
51
  class EncryptionDictionary < Dictionary
52
52
 
53
53
  define_field :Filter, type: Symbol, required: true
54
54
  define_field :SubFilter, type: Symbol, version: '1.3'
55
- define_field :V, type: Integer, required: true
55
+ define_field :V, type: Integer, required: true, allowed_values: [0, 1, 2, 3, 4, 5]
56
56
  define_field :Lenth, type: Integer, default: 40, version: '1.4'
57
57
  define_field :CF, type: Dictionary, version: '1.5'
58
58
  define_field :StmF, type: Symbol, default: :Identity, version: '1.5'
@@ -70,12 +70,8 @@ module HexaPDF
70
70
  # Ensures that the encryption dictionary's content is valid.
71
71
  def perform_validation
72
72
  super
73
- unless [1, 2, 4, 5].include?(value[:V])
74
- yield("Value of /V is not one of 1, 2, 4 or 5", false)
75
- return
76
- end
77
- if value[:V] == 2 && (!key?(:Length) || value[:Length] < 40 ||
78
- value[:Length] > 128 || value[:Length] % 8 != 0)
73
+ length = self[:Length]
74
+ if self[:V] == 2 && (!key?(:Length) || length < 40 || length > 128 || length % 8 != 0)
79
75
  yield("Invalid value for /Length field when /V is 2", false)
80
76
  end
81
77
  end
@@ -94,8 +90,8 @@ module HexaPDF
94
90
  # * The method ::set_up_decryption is used when a security handler should be created from the
95
91
  # document's encryption dictionary.
96
92
  #
97
- # Security handlers could also be created with the ::new method but this is discouraged because
98
- # the above methods provide the correct handling in both cases.
93
+ # It is *not* recommended to create security handlers manually but only with those two methods
94
+ # listed above.
99
95
  #
100
96
  #
101
97
  # == Using SecurityHandler Instances
@@ -107,12 +103,16 @@ module HexaPDF
107
103
  # * #encrypt_string
108
104
  # * #encrypt_stream
109
105
  #
110
- # How the decryption/encryption key is actually computed is deferred to a sub class.
106
+ # How the decryption/encryption key is actually computed is deferred to a sub class, as per the
107
+ # PDF specification.
111
108
  #
112
109
  # Additionally, the #encryption_key_valid? method can be used to check whether the
113
110
  # SecurityHandler instance is built from/built for the current version of the encryption
114
111
  # dictionary.
115
112
  #
113
+ # Note that any manual changes to the encryption dictionary will invalidate the key and lead to
114
+ # an error!
115
+ #
116
116
  #
117
117
  # == Implementing a SecurityHandler Class
118
118
  #
@@ -151,8 +151,8 @@ module HexaPDF
151
151
  # The encryption algorithm.
152
152
  attr_reader :algorithm
153
153
 
154
- # Creates a new encrypted stream data object by utilizing the given stream data object as
155
- # template. The arguments +key+ and +algorithm+ are used for decrypting purposes.
154
+ # Creates a new encrypted stream data object by utilizing the given stream data object +obj+
155
+ # as template. The arguments +key+ and +algorithm+ are used for decrypting purposes.
156
156
  def initialize(obj, key, algorithm)
157
157
  obj.instance_variables.each {|v| instance_variable_set(v, obj.instance_variable_get(v)) }
158
158
  @key = key
@@ -214,7 +214,7 @@ module HexaPDF
214
214
 
215
215
  handler = handler.new(document)
216
216
  dict = document.trailer[:Encrypt] = handler.set_up_decryption(dict, **options)
217
- HexaPDF::Object.make_direct(dict.value)
217
+ HexaPDF::Object.make_direct(dict.value, document)
218
218
  document.revisions.current.update(dict)
219
219
  document.revisions.each do |r|
220
220
  loader = r.loader
@@ -264,7 +264,7 @@ module HexaPDF
264
264
  # Decrypts the strings and the possibly attached stream of the given indirect object in
265
265
  # place.
266
266
  #
267
- # See: PDF1.7 s7.6.2
267
+ # See: PDF2.0 s7.6.3
268
268
  def decrypt(obj)
269
269
  return obj if @is_encrypt_dict[obj] || obj.type == :XRef
270
270
 
@@ -292,7 +292,7 @@ module HexaPDF
292
292
  # Note that some strings won't be encrypted as per the specification. The returned string,
293
293
  # however, is always a different object.
294
294
  #
295
- # See: PDF1.7 s7.6.2
295
+ # See: PDF2.0 s7.6.3
296
296
  def encrypt_string(str, obj)
297
297
  return str.dup if str.empty? || obj == document.trailer[:Encrypt] || obj.type == :XRef ||
298
298
  (obj.type == :Sig && obj[:Contents].equal?(str))
@@ -302,6 +302,9 @@ module HexaPDF
302
302
  end
303
303
 
304
304
  # Returns a Fiber that encrypts the contents of the given stream object.
305
+ #
306
+ # Note that some streams *must not be* encrypted. For those, their standard stream encoding
307
+ # fiber is returned.
305
308
  def encrypt_stream(obj)
306
309
  return obj.stream_encoder if obj.type == :XRef
307
310
 
@@ -321,8 +324,8 @@ module HexaPDF
321
324
  end
322
325
  end
323
326
 
324
- # Computes the encryption key and sets up the algorithms for encrypting the document based on
325
- # the given options, and returns the corresponding encryption dictionary.
327
+ # Computes the encryption key, sets up the algorithms for encrypting the document based on the
328
+ # given options, and returns the corresponding encryption dictionary.
326
329
  #
327
330
  # The security handler specific +options+ as well as the +algorithm+ argument are passed on to
328
331
  # the #prepare_encryption method.
@@ -340,7 +343,7 @@ module HexaPDF
340
343
  # force_v4::
341
344
  # Forces the use of protocol version 4 when key_length=128 and algorithm=:arc4.
342
345
  #
343
- # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
346
+ # See: PDF2.0 s7.6.2
344
347
  def set_up_encryption(key_length: 128, algorithm: :aes, force_v4: false, **options)
345
348
  @dict = document.wrap({}, type: encryption_dictionary_class)
346
349
 
@@ -382,9 +385,13 @@ module HexaPDF
382
385
  #
383
386
  # The security handler specific +options+ are passed on to the #prepare_decryption method.
384
387
  #
385
- # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
388
+ # See: PDF2.0 s7.6.2
386
389
  def set_up_decryption(dictionary, **options)
387
390
  @dict = document.wrap(dictionary, type: encryption_dictionary_class)
391
+ @dict.validate do |msg, correctable, obj|
392
+ next if correctable
393
+ raise HexaPDF::Error, "Validation error for encryption dictionary (#{obj.oid},#{obj.gen}): #{msg}"
394
+ end
388
395
 
389
396
  case dict[:V]
390
397
  when 1, 2
@@ -495,7 +502,7 @@ module HexaPDF
495
502
 
496
503
  # Computes the key for decrypting the indirect object with the given algorithm.
497
504
  #
498
- # See: PDF1.7 s7.6.2 (algorithm 1), PDF2.0 s7.6.2.2 (algorithm 1.A)
505
+ # See: PDF2.0 s7.6.3.2 (algorithm 1), PDF2.0 s7.6.3.3 (algorithm 1.A)
499
506
  def object_key(oid, gen, algorithm)
500
507
  key = encryption_key
501
508
  return key if dict[:V] == 5
@@ -508,13 +515,13 @@ module HexaPDF
508
515
 
509
516
  # Returns the length of the encryption key in bytes based on the security handlers version.
510
517
  #
511
- # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
518
+ # See: PDF2.0 s7.6.2
512
519
  def key_length
513
520
  case dict[:V]
514
521
  when 1 then 5
515
522
  when 2 then dict[:Length] / 8
516
- when 4 then 16 # PDF2.0 s7.6.1 specifies that a /V of 4 is equal to length of 128bit
517
- when 5 then 32 # PDF2.0 s7.6.1 specifies that a /V of 5 is equal to length of 256bit
523
+ when 4 then 16 # PDF2.0 s7.6.2 specifies that a /V of 4 is equal to length of 128bit
524
+ when 5 then 32 # PDF2.0 s7.6.2 specifies that a /V of 5 is equal to length of 256bit
518
525
  end
519
526
  end
520
527