hexapdf 0.32.2 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +63 -1
  3. data/README.md +9 -0
  4. data/examples/002-graphics.rb +15 -17
  5. data/examples/003-arcs.rb +9 -9
  6. data/examples/009-text_layouter_alignment.rb +1 -1
  7. data/examples/010-text_layouter_inline_boxes.rb +2 -2
  8. data/examples/011-text_layouter_line_wrapping.rb +1 -1
  9. data/examples/012-text_layouter_styling.rb +7 -7
  10. data/examples/013-text_layouter_shapes.rb +1 -1
  11. data/examples/014-text_in_polygon.rb +1 -1
  12. data/examples/015-boxes.rb +8 -7
  13. data/examples/016-frame_automatic_box_placement.rb +2 -2
  14. data/examples/017-frame_text_flow.rb +2 -1
  15. data/examples/018-composer.rb +1 -1
  16. data/examples/020-column_box.rb +2 -1
  17. data/examples/025-table_box.rb +46 -0
  18. data/lib/hexapdf/cli/command.rb +5 -2
  19. data/lib/hexapdf/cli/form.rb +5 -5
  20. data/lib/hexapdf/cli/inspect.rb +3 -3
  21. data/lib/hexapdf/composer.rb +104 -52
  22. data/lib/hexapdf/configuration.rb +44 -39
  23. data/lib/hexapdf/content/canvas.rb +393 -267
  24. data/lib/hexapdf/content/color_space.rb +72 -25
  25. data/lib/hexapdf/content/graphic_object/arc.rb +57 -24
  26. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +66 -23
  27. data/lib/hexapdf/content/graphic_object/geom2d.rb +47 -6
  28. data/lib/hexapdf/content/graphic_object/solid_arc.rb +58 -36
  29. data/lib/hexapdf/content/graphic_object.rb +6 -7
  30. data/lib/hexapdf/content/graphics_state.rb +54 -45
  31. data/lib/hexapdf/content/operator.rb +52 -54
  32. data/lib/hexapdf/content/parser.rb +2 -2
  33. data/lib/hexapdf/content/processor.rb +15 -15
  34. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  35. data/lib/hexapdf/content.rb +5 -0
  36. data/lib/hexapdf/dictionary.rb +6 -5
  37. data/lib/hexapdf/dictionary_fields.rb +42 -14
  38. data/lib/hexapdf/digital_signature/cms_handler.rb +2 -2
  39. data/lib/hexapdf/digital_signature/handler.rb +1 -1
  40. data/lib/hexapdf/digital_signature/pkcs1_handler.rb +2 -3
  41. data/lib/hexapdf/digital_signature/signature.rb +6 -6
  42. data/lib/hexapdf/digital_signature/signatures.rb +13 -12
  43. data/lib/hexapdf/digital_signature/signing/default_handler.rb +14 -5
  44. data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +2 -4
  45. data/lib/hexapdf/digital_signature/signing/timestamp_handler.rb +4 -4
  46. data/lib/hexapdf/digital_signature/signing.rb +4 -0
  47. data/lib/hexapdf/digital_signature/verification_result.rb +2 -2
  48. data/lib/hexapdf/digital_signature.rb +7 -2
  49. data/lib/hexapdf/document/destinations.rb +12 -11
  50. data/lib/hexapdf/document/files.rb +1 -1
  51. data/lib/hexapdf/document/fonts.rb +1 -1
  52. data/lib/hexapdf/document/layout.rb +167 -39
  53. data/lib/hexapdf/document/pages.rb +3 -2
  54. data/lib/hexapdf/document.rb +89 -55
  55. data/lib/hexapdf/encryption/aes.rb +5 -5
  56. data/lib/hexapdf/encryption/arc4.rb +1 -1
  57. data/lib/hexapdf/encryption/fast_aes.rb +2 -2
  58. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  59. data/lib/hexapdf/encryption/identity.rb +1 -1
  60. data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
  61. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  62. data/lib/hexapdf/encryption/security_handler.rb +31 -24
  63. data/lib/hexapdf/encryption/standard_security_handler.rb +45 -36
  64. data/lib/hexapdf/encryption.rb +7 -2
  65. data/lib/hexapdf/error.rb +18 -0
  66. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  67. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  68. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  69. data/lib/hexapdf/filter/lzw_decode.rb +1 -1
  70. data/lib/hexapdf/filter/pass_through.rb +1 -1
  71. data/lib/hexapdf/filter/predictor.rb +1 -1
  72. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  73. data/lib/hexapdf/filter.rb +55 -6
  74. data/lib/hexapdf/font/cmap/parser.rb +2 -2
  75. data/lib/hexapdf/font/cmap.rb +1 -1
  76. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  77. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  78. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +2 -2
  79. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  80. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  81. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +3 -3
  82. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  83. data/lib/hexapdf/font/invalid_glyph.rb +3 -0
  84. data/lib/hexapdf/font/true_type_wrapper.rb +17 -4
  85. data/lib/hexapdf/font/type1_wrapper.rb +19 -4
  86. data/lib/hexapdf/font_loader/from_configuration.rb +5 -2
  87. data/lib/hexapdf/font_loader/from_file.rb +5 -5
  88. data/lib/hexapdf/font_loader/standard14.rb +3 -3
  89. data/lib/hexapdf/font_loader.rb +3 -0
  90. data/lib/hexapdf/image_loader/jpeg.rb +2 -2
  91. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  92. data/lib/hexapdf/image_loader/png.rb +2 -2
  93. data/lib/hexapdf/image_loader.rb +1 -1
  94. data/lib/hexapdf/importer.rb +13 -0
  95. data/lib/hexapdf/layout/box.rb +9 -2
  96. data/lib/hexapdf/layout/box_fitter.rb +2 -2
  97. data/lib/hexapdf/layout/column_box.rb +18 -4
  98. data/lib/hexapdf/layout/frame.rb +30 -12
  99. data/lib/hexapdf/layout/image_box.rb +5 -0
  100. data/lib/hexapdf/layout/inline_box.rb +1 -0
  101. data/lib/hexapdf/layout/list_box.rb +17 -1
  102. data/lib/hexapdf/layout/page_style.rb +4 -4
  103. data/lib/hexapdf/layout/style.rb +18 -3
  104. data/lib/hexapdf/layout/table_box.rb +682 -0
  105. data/lib/hexapdf/layout/text_box.rb +5 -3
  106. data/lib/hexapdf/layout/text_fragment.rb +1 -1
  107. data/lib/hexapdf/layout/text_layouter.rb +12 -4
  108. data/lib/hexapdf/layout.rb +1 -0
  109. data/lib/hexapdf/name_tree_node.rb +1 -1
  110. data/lib/hexapdf/number_tree_node.rb +1 -1
  111. data/lib/hexapdf/object.rb +18 -7
  112. data/lib/hexapdf/parser.rb +7 -7
  113. data/lib/hexapdf/pdf_array.rb +1 -1
  114. data/lib/hexapdf/rectangle.rb +1 -1
  115. data/lib/hexapdf/reference.rb +1 -1
  116. data/lib/hexapdf/revision.rb +1 -1
  117. data/lib/hexapdf/revisions.rb +3 -3
  118. data/lib/hexapdf/serializer.rb +15 -15
  119. data/lib/hexapdf/stream.rb +4 -2
  120. data/lib/hexapdf/tokenizer.rb +14 -14
  121. data/lib/hexapdf/type/acro_form/appearance_generator.rb +22 -22
  122. data/lib/hexapdf/type/acro_form/button_field.rb +1 -1
  123. data/lib/hexapdf/type/acro_form/choice_field.rb +1 -1
  124. data/lib/hexapdf/type/acro_form/field.rb +2 -2
  125. data/lib/hexapdf/type/acro_form/form.rb +1 -1
  126. data/lib/hexapdf/type/acro_form/signature_field.rb +4 -4
  127. data/lib/hexapdf/type/acro_form/text_field.rb +1 -1
  128. data/lib/hexapdf/type/acro_form/variable_text_field.rb +1 -1
  129. data/lib/hexapdf/type/acro_form.rb +1 -1
  130. data/lib/hexapdf/type/action.rb +1 -1
  131. data/lib/hexapdf/type/actions/go_to.rb +1 -1
  132. data/lib/hexapdf/type/actions/go_to_r.rb +1 -1
  133. data/lib/hexapdf/type/actions/launch.rb +1 -1
  134. data/lib/hexapdf/type/actions/uri.rb +1 -1
  135. data/lib/hexapdf/type/actions.rb +1 -1
  136. data/lib/hexapdf/type/annotation.rb +3 -3
  137. data/lib/hexapdf/type/annotations/link.rb +1 -1
  138. data/lib/hexapdf/type/annotations/markup_annotation.rb +1 -1
  139. data/lib/hexapdf/type/annotations/text.rb +1 -1
  140. data/lib/hexapdf/type/annotations/widget.rb +2 -2
  141. data/lib/hexapdf/type/annotations.rb +1 -1
  142. data/lib/hexapdf/type/catalog.rb +1 -1
  143. data/lib/hexapdf/type/cid_font.rb +3 -3
  144. data/lib/hexapdf/type/embedded_file.rb +1 -1
  145. data/lib/hexapdf/type/file_specification.rb +2 -2
  146. data/lib/hexapdf/type/font_descriptor.rb +1 -1
  147. data/lib/hexapdf/type/font_simple.rb +2 -2
  148. data/lib/hexapdf/type/font_type0.rb +3 -3
  149. data/lib/hexapdf/type/font_type3.rb +1 -1
  150. data/lib/hexapdf/type/form.rb +1 -1
  151. data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
  152. data/lib/hexapdf/type/icon_fit.rb +1 -1
  153. data/lib/hexapdf/type/image.rb +1 -1
  154. data/lib/hexapdf/type/info.rb +1 -1
  155. data/lib/hexapdf/type/mark_information.rb +1 -1
  156. data/lib/hexapdf/type/names.rb +2 -2
  157. data/lib/hexapdf/type/object_stream.rb +2 -1
  158. data/lib/hexapdf/type/outline.rb +1 -1
  159. data/lib/hexapdf/type/outline_item.rb +1 -1
  160. data/lib/hexapdf/type/page.rb +19 -10
  161. data/lib/hexapdf/type/page_label.rb +1 -1
  162. data/lib/hexapdf/type/page_tree_node.rb +1 -1
  163. data/lib/hexapdf/type/resources.rb +1 -1
  164. data/lib/hexapdf/type/trailer.rb +2 -2
  165. data/lib/hexapdf/type/viewer_preferences.rb +1 -1
  166. data/lib/hexapdf/type/xref_stream.rb +2 -2
  167. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  168. data/lib/hexapdf/version.rb +1 -1
  169. data/lib/hexapdf/writer.rb +4 -4
  170. data/lib/hexapdf/xref_section.rb +2 -2
  171. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +11 -1
  172. data/test/hexapdf/content/graphic_object/test_geom2d.rb +7 -0
  173. data/test/hexapdf/content/test_canvas.rb +0 -1
  174. data/test/hexapdf/digital_signature/test_signatures.rb +22 -0
  175. data/test/hexapdf/document/test_files.rb +2 -2
  176. data/test/hexapdf/document/test_layout.rb +98 -0
  177. data/test/hexapdf/encryption/test_security_handler.rb +12 -11
  178. data/test/hexapdf/encryption/test_standard_security_handler.rb +35 -23
  179. data/test/hexapdf/font/test_true_type_wrapper.rb +18 -1
  180. data/test/hexapdf/font/test_type1_wrapper.rb +15 -1
  181. data/test/hexapdf/layout/test_box.rb +1 -1
  182. data/test/hexapdf/layout/test_column_box.rb +65 -21
  183. data/test/hexapdf/layout/test_frame.rb +14 -14
  184. data/test/hexapdf/layout/test_image_box.rb +4 -0
  185. data/test/hexapdf/layout/test_inline_box.rb +5 -0
  186. data/test/hexapdf/layout/test_list_box.rb +40 -6
  187. data/test/hexapdf/layout/test_page_style.rb +3 -2
  188. data/test/hexapdf/layout/test_style.rb +50 -0
  189. data/test/hexapdf/layout/test_table_box.rb +722 -0
  190. data/test/hexapdf/layout/test_text_box.rb +18 -0
  191. data/test/hexapdf/layout/test_text_layouter.rb +4 -0
  192. data/test/hexapdf/test_dictionary_fields.rb +4 -1
  193. data/test/hexapdf/test_document.rb +1 -0
  194. data/test/hexapdf/test_filter.rb +8 -0
  195. data/test/hexapdf/test_importer.rb +9 -0
  196. data/test/hexapdf/test_object.rb +16 -5
  197. data/test/hexapdf/test_stream.rb +7 -0
  198. data/test/hexapdf/test_writer.rb +3 -3
  199. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +13 -5
  200. data/test/hexapdf/type/acro_form/test_form.rb +4 -3
  201. data/test/hexapdf/type/test_page.rb +18 -4
  202. metadata +17 -8
@@ -47,7 +47,7 @@ module HexaPDF
47
47
  # the encryption key and a set of permissions.
48
48
  class StandardEncryptionDictionary < EncryptionDictionary
49
49
 
50
- define_field :R, type: Integer, required: true
50
+ define_field :R, type: Integer, required: true, allowed_values: [2, 3, 4, 5, 6]
51
51
  define_field :O, type: PDFByteString, required: true
52
52
  define_field :OE, type: PDFByteString, version: '2.0'
53
53
  define_field :U, type: PDFByteString, required: true
@@ -71,12 +71,16 @@ module HexaPDF
71
71
  yield("Value of /OE, /UE or /Perms is missing for dictionary revision 6", false)
72
72
  return
73
73
  end
74
- if value[:U].length != 48 || value[:O].length != 48 || value[:UE].length != 32 ||
75
- value[:OE].length != 32 || value[:Perms].length != 16
76
- yield("Invalid size for /U, /O, /UE, /OE or /Perms values for revisions 6", false)
74
+ [:U, :O].each do |f|
75
+ if value[f].length != 48
76
+ yield("Invalid size (#{value[f].length} instead of 48) for /#{f} for revisions 6",
77
+ value[f].length > 48 && value[f][48..-1].squeeze("\x00").length == 1)
78
+ value[f].slice!(48..-1)
79
+ end
80
+ end
81
+ if value[:UE].length != 32 || value[:OE].length != 32 || value[:Perms].length != 16
82
+ yield("Invalid size for /UE, /OE or /Perms values for revisions 6", false)
77
83
  end
78
- else
79
- yield("Value of /R is not one of 2, 3, 4 or 6", false)
80
84
  end
81
85
  end
82
86
 
@@ -87,11 +91,13 @@ module HexaPDF
87
91
  #
88
92
  # == Overview
89
93
  #
90
- # The PDF specification defines one security handler that should be implemented by all PDF
91
- # conform libraries and applications. This standard security handler allows access permissions
92
- # and a user password as well as an owner password to be set. See
93
- # StandardSecurityHandler::EncryptionOptions for all valid options that can be used with this
94
- # security handler.
94
+ # The PDF specification defines one security handler that should be implemented by all
95
+ # conforming PDF libraries and applications. This standard security handler allows access
96
+ # permissions and a user password as well as an owner password to be set.
97
+ #
98
+ # See StandardSecurityHandler::EncryptionOptions for all valid options that can be used with
99
+ # this security handler when encrypting a document. And see #prepare_decryption for all allowed
100
+ # options when decrypting a document.
95
101
  #
96
102
  # The access permissions (see StandardSecurityHandler::Permissions) can be used to restrict what
97
103
  # a user is allowed to do with a PDF file.
@@ -100,7 +106,7 @@ module HexaPDF
100
106
  # password is supplied. To open such an encrypted PDF file, the +decryption_opts+ provided to
101
107
  # HexaPDF::Document.new needs to contain a :password key with the password.
102
108
  #
103
- # See: PDF1.7 s7.6.3, PDF2.0 s7.6.3
109
+ # See: PDF2.0 s7.6.4
104
110
  class StandardSecurityHandler < SecurityHandler
105
111
 
106
112
  # Defines all available permissions.
@@ -109,13 +115,13 @@ module HexaPDF
109
115
  # permission set. The used symbols are the lower case versions of the constants, i.e. the
110
116
  # symbol for MODIFY_CONSTANT would be :modify_constant.
111
117
  #
112
- # See: PDF1.7 s7.6.3.2
118
+ # See: PDF2.0 s7.6.4.2
113
119
  module Permissions
114
120
 
115
121
  # Printing (if HIGH_QUALITY_PRINT is also set, then high quality printing is allowed)
116
122
  PRINT = 1 << 2
117
123
 
118
- # Modification of the content by operations that are different from those controller by
124
+ # Modification of the content by operations that are different from those controlled by
119
125
  # MODIFY_ANNOTATION, FILL_IN_FORMS and ASSEMBLE_DOCUMENT
120
126
  MODIFY_CONTENT = 1 << 3
121
127
 
@@ -129,6 +135,9 @@ module HexaPDF
129
135
  FILL_IN_FORMS = 1 << 8
130
136
 
131
137
  # Extracting content
138
+ #
139
+ # PDF 2.0 specifies that this bit should always be set by writers and should be ignored by
140
+ # readers. Therefore this is part of the RESERVED constant.
132
141
  EXTRACT_CONTENT = 1 << 9
133
142
 
134
143
  # Assembling of the document (inserting, rotating or deleting of pages and creation of
@@ -142,8 +151,8 @@ module HexaPDF
142
151
  ALL = PRINT | MODIFY_CONTENT | COPY_CONTENT | MODIFY_ANNOTATION | FILL_IN_FORMS |
143
152
  EXTRACT_CONTENT | ASSEMBLE_DOCUMENT | HIGH_QUALITY_PRINT
144
153
 
145
- # Reserved permission bits
146
- RESERVED = 0xFFFFF000 | 0b11000000
154
+ # Reserved permission bits that should always be set
155
+ RESERVED = 0xFFFFF000 | 0b11000000 | EXTRACT_CONTENT
147
156
 
148
157
  # Maps permission symbols to their respective value
149
158
  SYMBOL_TO_PERMISSION = {
@@ -213,7 +222,7 @@ module HexaPDF
213
222
 
214
223
  # Maps the permissions to an integer for use by the standard security handler.
215
224
  #
216
- # See: PDF1.7 s7.6.3.2, ADB1.7 3.5.2 (table 3.20 and the paragraphs before)
225
+ # See: PDF2.0 s7.6.4.2, ADB1.7 3.5.2 (table 3.20 and the paragraphs before)
217
226
  def process_permissions(perms)
218
227
  if perms.kind_of?(Array)
219
228
  perms = perms.inject(0) do |result, perm|
@@ -363,7 +372,7 @@ module HexaPDF
363
372
 
364
373
  # The padding used for passwords with fewer than 32 bytes. Only used for revisions <= 4.
365
374
  #
366
- # See: PDF1.7 s7.6.3.3
375
+ # See: PDF2.0 s7.6.4.3
367
376
  PASSWORD_PADDING = "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08" \
368
377
  "\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A".b
369
378
 
@@ -376,7 +385,7 @@ module HexaPDF
376
385
  # with the user password. If the password is the owner password,
377
386
  # #compute_owner_encryption_key has to be used instead.
378
387
  #
379
- # See: PDF1.7 s7.6.3.3 (algorithm 2), PDF2.0 s7.6.3.3.2 (algorithm 2.A (a)-(b),(e))
388
+ # See: PDF2.0 s7.6.4.3.2 (algorithm 2), PDF2.0 s7.6.4.3.3 (algorithm 2.A (a)-(b),(e))
380
389
  def compute_user_encryption_key(password)
381
390
  if dict[:R] <= 4
382
391
  data = password
@@ -403,11 +412,11 @@ module HexaPDF
403
412
  # For revisions <= 4 this is done by first retrieving the user password through the use of
404
413
  # the owner password and then using the #compute_user_encryption_key method.
405
414
  #
406
- # For revision 6 file encryption key is a string of random bytes that has been encrypted
407
- # with the owner password. If the password is the user password,
408
- # #compute_user_encryption_key has to be used.
415
+ # For revision 6 the file encryption key is a string of random bytes that has been encrypted
416
+ # with the owner password. If the password is the user password, #compute_user_encryption_key
417
+ # has to be used.
409
418
  #
410
- # See: PDF2.0 s7.6.3.3.2 (algorithm 2.A (a)-(d))
419
+ # See: PDF2.0 s7.6.4.3.2 (algorithm 2.A (a)-(d))
411
420
  def compute_owner_encryption_key(password)
412
421
  if dict[:R] <= 4
413
422
  compute_user_encryption_key(user_password_from_owner_password(password))
@@ -426,7 +435,7 @@ module HexaPDF
426
435
  # *Attention*: If revision 6 is used, the /U value has to be computed and set before this
427
436
  # method is used, otherwise the return value is incorrect!
428
437
  #
429
- # See: PDF1.7 s7.6.3.4 (algorithm 3), PDF2.0 s7.6.3.4.7 (algorithm 9 (a))
438
+ # See: PDF2.0 s7.6.4.4.2 (algorithm 3), PDF2.0 s7.6.4.4.8 (algorithm 9 (a))
430
439
  def compute_o_field(owner_password, user_password)
431
440
  if dict[:R] <= 4
432
441
  data = Digest::MD5.digest(owner_password)
@@ -454,7 +463,7 @@ module HexaPDF
454
463
  # Short explanation: Encrypts the file encryption key with a key based on the password and
455
464
  # the /O and /U values.
456
465
  #
457
- # See: PDF2.0 s7.6.3.4.7 (algorithm 9 (b))
466
+ # See: PDF2.0 s7.6.4.4.8 (algorithm 9 (b))
458
467
  def compute_oe_field(password, file_encryption_key)
459
468
  key = compute_hash(password, dict[:O][40, 8], dict[:U])
460
469
  aes_algorithm.new(key, "\0" * 16, :encrypt).process(file_encryption_key)
@@ -466,8 +475,8 @@ module HexaPDF
466
475
  # based on the user password. For revision 6 the /U value is a hash computed from the
467
476
  # password with added validation and key salts.
468
477
  #
469
- # See: PDF1.7 s7.6.3.4 (algorithm 4 for R=2, algorithm 5 for R=3 and R=4)
470
- # PDF2.0 s7.6.3.4.6 (algorithm 8 (a) for R=6)
478
+ # See: PDF2.0 s7.6.4.4.3 (algorithm 4 for R=2), PDF s7.6.4.4.4 (algorithm 5 for R=3 and R=4)
479
+ # PDF2.0 s7.6.4.4.7 (algorithm 8 (a) for R=6)
471
480
  def compute_u_field(password)
472
481
  if dict[:R] == 2
473
482
  key = compute_user_encryption_key(password)
@@ -491,7 +500,7 @@ module HexaPDF
491
500
  # Short explanation: Encrypts the file encryption key with a key based on the password and
492
501
  # the /U value.
493
502
  #
494
- # See: PDF2.0 s7.6.3.4.6 (algorithm 8 (b))
503
+ # See: PDF2.0 s7.6.4.4.7 (algorithm 8 (b))
495
504
  def compute_ue_field(password, file_encryption_key)
496
505
  key = compute_hash(password, dict[:U][40, 8])
497
506
  aes_algorithm.new(key, "\0" * 16, :encrypt).process(file_encryption_key)
@@ -501,7 +510,7 @@ module HexaPDF
501
510
  #
502
511
  # Uses /P and /EncryptMetadata values, so these have to be set beforehand.
503
512
  #
504
- # See: PDF2.0 s7.6.3.4.8 (algorithm 10)
513
+ # See: PDF2.0 s7.6.4.4.9 (algorithm 10)
505
514
  def compute_perms_field(file_encryption_key)
506
515
  data = [dict[:P]].pack('V')
507
516
  data << [0xFFFFFFFF].pack('V')
@@ -513,7 +522,7 @@ module HexaPDF
513
522
 
514
523
  # Authenticates the user password, i.e. decides whether the given user password is valid.
515
524
  #
516
- # See: PDF1.7 s7.6.3.4 (algorithm 6), PDF2.0 s7.6.3.4.9 (algorithm 11)
525
+ # See: PDF2.0 s7.6.4.4.5 (algorithm 6), PDF2.0 s7.6.4.4.10 (algorithm 11)
517
526
  def user_password_valid?(password)
518
527
  if dict[:R] == 2
519
528
  compute_u_field(password) == dict[:U]
@@ -526,7 +535,7 @@ module HexaPDF
526
535
 
527
536
  # Authenticates the owner password, i.e. decides whether the given owner password is valid.
528
537
  #
529
- # See: PDF1.7 s7.6.3.4 (algorithm 7), PDF2.0 s7.6.3.4.10 (algorithm 12)
538
+ # See: PDF2.0 s7.6.4.4.6 (algorithm 7), PDF2.0 s7.6.4.4.11 (algorithm 12)
530
539
  def owner_password_valid?(password)
531
540
  if dict[:R] <= 4
532
541
  user_password_valid?(user_password_from_owner_password(password))
@@ -539,7 +548,7 @@ module HexaPDF
539
548
  #
540
549
  # This method can only be used for revision 6.
541
550
  #
542
- # See: PDF2.0 s7.6.3.4.11 (algorithm 13)
551
+ # See: PDF2.0 s7.6.4.4.12 (algorithm 13)
543
552
  def check_perms_field(encryption_key)
544
553
  decrypted = aes_algorithm.new(encryption_key, "\0" * 16, :decrypt).process(dict[:Perms])
545
554
  if decrypted[9, 3] != "adb"
@@ -553,7 +562,7 @@ module HexaPDF
553
562
 
554
563
  # Returns the user password when given the owner password for revisions <= 4.
555
564
  #
556
- # See: PDF1.7 s7.6.3.4 (algorithm 7 (a) and (b))
565
+ # See: PDF2.0 s7.6.4.4.6 (algorithm 7 (a) and (b))
557
566
  def user_password_from_owner_password(owner_password)
558
567
  data = Digest::MD5.digest(owner_password)
559
568
  if dict[:R] >= 3
@@ -578,7 +587,7 @@ module HexaPDF
578
587
  # "#{password}#{salt}#{user_key}" where +user_key+ has to be empty when doing operations
579
588
  # with the user password.
580
589
  #
581
- # See: PDF2.0 s7.6.3.3.3 (algorithm 2.B)
590
+ # See: PDF2.0 s7.6.4.3.4 (algorithm 2.B)
582
591
  def compute_hash(password, salt, user_key = '')
583
592
  k = Digest::SHA256.digest("#{password}#{salt}#{user_key}")
584
593
  e = ''
@@ -606,8 +615,8 @@ module HexaPDF
606
615
  # * For revision 6 the password is converted into UTF-8 encoding that is normalized
607
616
  # according to the PDF2.0 specification.
608
617
  #
609
- # See: PDF1.7 s7.6.3.3 (algorithm 2 step a)),
610
- # PDF2.0 s7.6.3.3.2 (algorithm 2.A steps a) and b))
618
+ # See: PDF2.0 s7.6.4.3.2 (algorithm 2 step a)),
619
+ # PDF2.0 s7.6.4.3.3 (algorithm 2.A steps a) and b))
611
620
  def prepare_password(password)
612
621
  if dict[:R] <= 4
613
622
  password.to_s[0, 32].encode(Encoding::ISO_8859_1).force_encoding(Encoding::BINARY).
@@ -41,7 +41,7 @@ module HexaPDF
41
41
  # A PDF document may be encrypted so that
42
42
  #
43
43
  # * certain permissions are respected when the document is opened,
44
- # * a password must be specified so that a document can be openend or so that
44
+ # * a password must be specified so that a document can be openend, or so that
45
45
  # * a password must be specified to remove the restrictions and allow full access.
46
46
  #
47
47
  # This module contains all encryption and security related code to facilitate PDF encryption.
@@ -61,6 +61,9 @@ module HexaPDF
61
61
  # additionally allows setting permission information. This security handler is implemented by
62
62
  # the Encryption::StandardSecurityHandler class.
63
63
  #
64
+ # There is also a certificate-based security handler defined by the PDF specification. However,
65
+ # that handler is not implemented.
66
+ #
64
67
  #
65
68
  # === Encryption Algorithms
66
69
  #
@@ -78,8 +81,10 @@ module HexaPDF
78
81
  # Pure Ruby implementations of the algorithms which are naturally much slower than the OpenSSL
79
82
  # based ones. However, these implementation can be used on any Ruby implementation.
80
83
  #
84
+ # The ARC4 algorithm is deprecated with PDF 2.0 and should not be used when creating new
85
+ # documents.
81
86
  #
82
- # See: PDF1.7 s7.6
87
+ # See: PDF2.0 s7.6
83
88
  module Encryption
84
89
 
85
90
  autoload(:ARC4, 'hexapdf/encryption/arc4')
data/lib/hexapdf/error.rb CHANGED
@@ -82,4 +82,22 @@ module HexaPDF
82
82
  # Raised when the encryption method is not supported.
83
83
  class UnsupportedEncryptionError < EncryptionError; end
84
84
 
85
+ # Raised when a font wrapper implementation should encode a missing glyph.
86
+ class MissingGlyphError < Error
87
+
88
+ # Returns the glyph object that contains the information about the missing glyph.
89
+ attr_reader :glyph
90
+
91
+ # Creates a new MissingGlyphError for the given +glyph+.
92
+ def initialize(glyph)
93
+ @glyph = glyph
94
+ end
95
+
96
+ def message # :nodoc:
97
+ "No glyph for #{glyph.str.inspect} in font '#{glyph.font.full_name}' found. \n\n" \
98
+ "Use the configuration option 'font.on_missing_glyph' to customize missing glyph handling."
99
+ end
100
+
101
+ end
102
+
85
103
  end
@@ -45,7 +45,7 @@ module HexaPDF
45
45
  # This filter module implements the ASCII-85 filter which can encode arbitrary data into an
46
46
  # ASCII compatible format that expands the original data only by a factor of 4:5.
47
47
  #
48
- # See: HexaPDF::Filter, PDF1.7 s7.4.2
48
+ # See: HexaPDF::Filter, PDF2.0 s7.4.2
49
49
  module ASCII85Decode
50
50
 
51
51
  VALUE_TO_CHAR = {} #:nodoc:
@@ -44,7 +44,7 @@ module HexaPDF
44
44
  # This filter module implements the ASCII hex decode/encode filter which can encode arbitrary
45
45
  # data into the two byte ASCII hex format that expands the original data by a factor of 1:2.
46
46
  #
47
- # See: HexaPDF::Filter, PDF1.7 s7.4.2
47
+ # See: HexaPDF::Filter, PDF2.0 s7.4.2
48
48
  module ASCIIHexDecode
49
49
 
50
50
  # See HexaPDF::Filter
@@ -45,7 +45,7 @@ module HexaPDF
45
45
 
46
46
  # Implements the Deflate filter using the Zlib library.
47
47
  #
48
- # See: HexaPDF::Filter, PDF1.7 s7.4.4
48
+ # See: HexaPDF::Filter, PDF2.0 s7.4.4
49
49
  module FlateDecode
50
50
 
51
51
  # See HexaPDF::Filter
@@ -48,7 +48,7 @@ module HexaPDF
48
48
  # not aligned to byte boundaries, this filter is not as fast as the other filters. If speed is
49
49
  # a concern, the FlateDecode filter should be used instead.
50
50
  #
51
- # See: HexaPDF::Filter, PDF1.7 s7.4.4
51
+ # See: HexaPDF::Filter, PDF2.0 s7.4.4
52
52
  module LZWDecode
53
53
 
54
54
  CLEAR_TABLE = 256 # :nodoc:
@@ -40,7 +40,7 @@ module HexaPDF
40
40
  # The PassThrough filter just passes the source on unmodified. This is enough for basic
41
41
  # read-write capabilities but not if the unfiltered bytes are needed.
42
42
  #
43
- # See: HexaPDF::Filter, PDF1.7 s7.4
43
+ # See: HexaPDF::Filter, PDF2.0 s7.4
44
44
  module PassThrough
45
45
 
46
46
  # See HexaPDF::Filter
@@ -47,7 +47,7 @@ module HexaPDF
47
47
  # Although a predictor isn't a full PDF filter, it is implemented as one in HexaPDF terms to
48
48
  # allow easy chaining of the predictor.
49
49
  #
50
- # See: PDF1.7 s7.4.4.3, s7.4.4.4, https://partners.adobe.com/public/developer/en/tiff/TIFF6.pdf
50
+ # See: PDF2.0 s7.4.4.3, s7.4.4.4, https://partners.adobe.com/public/developer/en/tiff/TIFF6.pdf
51
51
  # (p64f), http://www.w3.org/TR/PNG-Filters.html
52
52
  #
53
53
  #-- Implemenation notes:
@@ -43,7 +43,7 @@ module HexaPDF
43
43
 
44
44
  # Implements the run length filter.
45
45
  #
46
- # See: HexaPDF::Filter, PDF1.7 s7.4.5
46
+ # See: HexaPDF::Filter, PDF2.0 s7.4.5
47
47
  module RunLengthDecode
48
48
 
49
49
  EOD = 128.chr #:nodoc:
@@ -48,6 +48,8 @@ module HexaPDF
48
48
  attr_reader :length
49
49
 
50
50
  # Initializes the Fiber and sets the +length+.
51
+ #
52
+ # A +length+ of +nil+ is equal to -1.
51
53
  def initialize(length, &block)
52
54
  super(&block)
53
55
  @length = length || -1
@@ -55,6 +57,47 @@ module HexaPDF
55
57
 
56
58
  end
57
59
 
60
+ # Implements part of the Fiber interface so that it can be used instead of a Fiber by HexaPDF
61
+ # when only a single string should be returned.
62
+ class FiberDoubleForString
63
+
64
+ # Creates a new FiberDoubleForString instance for the given string +str+ or for the string
65
+ # returned by invoking the block.
66
+ def initialize(str = nil, &block)
67
+ @block = block
68
+ @str = str
69
+ @block_used = false
70
+ end
71
+
72
+ # Returns the length of the wrapped string.
73
+ #
74
+ # May only be called before #resume!
75
+ def length
76
+ str.length
77
+ end
78
+
79
+ # Returns +true+ if #resume has not yet been called.
80
+ def alive?
81
+ !str.nil?
82
+ end
83
+
84
+ # Returns the wrapped string on the first invocation, +nil+ otherwise.
85
+ def resume
86
+ tmp = str
87
+ @str = nil
88
+ tmp
89
+ end
90
+
91
+ private
92
+
93
+ # Sets the string to the return value of the initially provided block if no string has been
94
+ # provided.
95
+ def str
96
+ @str ||= @block_used || @block.nil? ? nil : (@block_used = true; @block.call)
97
+ end
98
+
99
+ end
100
+
58
101
  # == Overview
59
102
  #
60
103
  # A stream filter is used to compress a stream or to encode it in an ASCII compatible way; or
@@ -83,7 +126,7 @@ module HexaPDF
83
126
  #
84
127
  # Such a fiber should *not* return +nil+ unless this signifies that no more data is coming!
85
128
  #
86
- # See: PDF1.7 s7.4
129
+ # See: PDF2.0 s7.4
87
130
  module Filter
88
131
 
89
132
  autoload(:ASCII85Decode, 'hexapdf/filter/ascii85_decode')
@@ -99,10 +142,16 @@ module HexaPDF
99
142
 
100
143
  autoload(:PassThrough, 'hexapdf/filter/pass_through')
101
144
 
102
- # Returns a Fiber that can be used as a source for decoders/encoders and that is based on a
103
- # String object.
145
+ # Returns a FiberDoubleForString that uses the string returned by the provided block and can be
146
+ # used as a source for decoders/encoders.
147
+ def self.source_from_proc(&block)
148
+ FiberDoubleForString.new(&block)
149
+ end
150
+
151
+ # Returns a FiberDoubleForString that returns the given string and can be used as a source for
152
+ # decoders/encoders.
104
153
  def self.source_from_string(str)
105
- FiberWithLength.new(str.length) { str.dup }
154
+ FiberDoubleForString.new(str.dup)
106
155
  end
107
156
 
108
157
  # Returns a Fiber that can be used as a source for decoders/encoders and that reads chunks of
@@ -149,7 +198,7 @@ module HexaPDF
149
198
  # Note that there will be a problem if the size of the file changes between the invocation of
150
199
  # this method and the actual consumption of the file!
151
200
  #
152
- # See ::source_from_io for a description of the available options.
201
+ # See ::source_from_io for a description of the +pos+, +length+ and +chunk_size+ options.
153
202
  def self.source_from_file(filename, pos: 0, length: -1, chunk_size: 0)
154
203
  fib_length = (length < 0 ? File.stat(filename).size - pos : length)
155
204
  FiberWithLength.new(fib_length) do
@@ -165,7 +214,7 @@ module HexaPDF
165
214
  # Returns the concatenated string chunks retrieved by resuming the given source Fiber until it
166
215
  # is dead.
167
216
  #
168
- # The returned string is always a string with +BINARY+ (= +ASCII-8BIT+) encoding.
217
+ # The returned string is always a string with binary (= +ASCII-8BIT+) encoding.
169
218
  def self.string_from_source(source)
170
219
  str = ''.b
171
220
  while source.alive? && (data = source.resume)
@@ -146,10 +146,10 @@ module HexaPDF
146
146
  # Parses the "bfrange" operator at the current position.
147
147
  #
148
148
  #--
149
- # PDF1.7 s9.10.3 and Adobe Technical Note #5411 have different views as to how "bfrange"
149
+ # PDF2.0 s9.10.3 and Adobe Technical Note #5411 have different views as to how "bfrange"
150
150
  # operators of the form "startCode endCode codePoint" should be handled.
151
151
  #
152
- # PDF1.7 mentions that the last byte of "codePoint" should be incremented, up to a maximum
152
+ # PDF2.0 mentions that the last byte of "codePoint" should be incremented, up to a maximum
153
153
  # of 255. However #5411 has the range "<1379> <137B> <90FE>" as example which contradicts
154
154
  # this.
155
155
  #
@@ -43,7 +43,7 @@ module HexaPDF
43
43
  # Represents a CMap, a mapping from character codes to CIDs (character IDs) or to their Unicode
44
44
  # value.
45
45
  #
46
- # See: PDF1.7 s9.7.5, s9.10.3; Adobe Technical Notes #5014 and #5411
46
+ # See: PDF2.0 s9.7.5, s9.10.3; Adobe Technical Notes #5014 and #5411
47
47
  class CMap
48
48
 
49
49
  autoload(:Parser, 'hexapdf/font/cmap/parser')
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The difference encoding uses a base encoding that can be overlayed with additional mappings.
44
44
  #
45
- # See: PDF1.7 s9.6.6.1
45
+ # See: PDF2.0 s9.6.5.1
46
46
  class DifferenceEncoding < Base
47
47
 
48
48
  # The base encoding.
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The MacExpertEncoding for Latin texts.
44
44
  #
45
- # See: PDF1.7 sD.4
45
+ # See: PDF2.0 sD.4
46
46
  class MacExpertEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The Mac Roman standard encoding for Latin texts.
44
44
  #
45
- # See: PDF1.7 sD.1, sD.2
45
+ # See: PDF2.0 sD.1, sD.2
46
46
  class MacRomanEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -256,7 +256,7 @@ module HexaPDF
256
256
  0264 => :yen,
257
257
  0172 => :z,
258
258
  0060 => :zero,
259
- # additions due to PDF1.7 sD.2 footnote 6
259
+ # additions due to PDF2.0 sD.2 footnote 6
260
260
  0312 => :space,
261
261
  }
262
262
  end
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The Adobe standard encoding for Latin texts.
44
44
  #
45
- # See: PDF1.7 sD.1, sD.2
45
+ # See: PDF2.0 sD.1, sD.2
46
46
  class StandardEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The built-in encoding of the Symbol font.
44
44
  #
45
- # See: PDF1.7 sD.5
45
+ # See: PDF2.0 sD.5
46
46
  class SymbolEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The Windows Code Page 1252, the standard Windows encoding for Latin texts.
44
44
  #
45
- # See: PDF1.7 sD.1, sD.2
45
+ # See: PDF2.0 sD.1, sD.2
46
46
  class WinAnsiEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -265,11 +265,11 @@ module HexaPDF
265
265
  0172 => :z,
266
266
  0236 => :zcaron,
267
267
  0060 => :zero,
268
- # additions due to PDF1.7 sD.2 footnote 5,6
268
+ # additions due to PDF2.0 sD.2 footnote 5,6
269
269
  0240 => :space,
270
270
  0255 => :hyphen,
271
271
  }
272
- # additions due to PDF1.7 sD.2 footnote 3
272
+ # additions due to PDF2.0 sD.2 footnote 3
273
273
  041.upto(255) do |i|
274
274
  next if @code_to_name.key?(i)
275
275
  @code_to_name[i] = :bullet
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The built-in encoding of the ZapfDingbats font.
44
44
  #
45
- # See: PDF1.7 sD.6
45
+ # See: PDF2.0 sD.6
46
46
  class ZapfDingbatsEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -41,6 +41,9 @@ module HexaPDF
41
41
  # font.
42
42
  class InvalidGlyph
43
43
 
44
+ # The associated font object.
45
+ attr_reader :font
46
+
44
47
  # The string that could not be represented as a glyph.
45
48
  attr_reader :str
46
49
 
@@ -51,7 +51,7 @@ module HexaPDF
51
51
  #
52
52
  # * By using a composite font more than 256 characters can be encoded with one font object.
53
53
  # * Fonts for vertical writing can potentially be used.
54
- # * The PDF specification recommends using a composite font (see PDF1.7 s9.9 at the end).
54
+ # * The PDF specification recommends using a composite font (see PDF2.0 s9.9.1 at the end).
55
55
  #
56
56
  # Additionally, TrueType fonts are *always* embedded.
57
57
  class TrueTypeWrapper
@@ -59,6 +59,9 @@ module HexaPDF
59
59
  # Represents a single glyph of the wrapped font.
60
60
  class Glyph
61
61
 
62
+ # The associated font object.
63
+ attr_reader :font
64
+
62
65
  # The glyph ID.
63
66
  attr_reader :id
64
67
 
@@ -171,6 +174,18 @@ module HexaPDF
171
174
  end
172
175
  end
173
176
 
177
+ # Returns a custom Glyph object which represents the given +string+ via the given glyph +id+.
178
+ #
179
+ # This functionality can be used to associate a single glyph id with multiple, different
180
+ # strings for replacement glyph purposes. When used in such a way, the used glyph id is often
181
+ # 0 which represents the missing glyph.
182
+ def custom_glyph(id, string)
183
+ if id < 0 || id >= @wrapped_font[:maxp].num_glyphs
184
+ raise HexaPDF::Error, "Glyph ID #{id} is invalid for font '#{@wrapped_font.full_name}'"
185
+ end
186
+ Glyph.new(@wrapped_font, id, string)
187
+ end
188
+
174
189
  # Returns an array of glyph objects representing the characters in the UTF-8 encoded string.
175
190
  def decode_utf8(str)
176
191
  str.codepoints.map! do |c|
@@ -187,9 +202,7 @@ module HexaPDF
187
202
  def encode(glyph)
188
203
  (@encoded_glyphs[glyph.id] ||=
189
204
  begin
190
- if glyph.kind_of?(InvalidGlyph)
191
- raise HexaPDF::Error, "Glyph for #{glyph.str.inspect} missing"
192
- end
205
+ raise HexaPDF::MissingGlyphError.new(glyph) if glyph.kind_of?(InvalidGlyph)
193
206
  if @subsetter
194
207
  [[@subsetter.use_glyph(glyph.id)].pack('n'), glyph]
195
208
  else