hexapdf 0.32.1 → 0.33.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +76 -1
  3. data/README.md +9 -0
  4. data/examples/002-graphics.rb +15 -17
  5. data/examples/003-arcs.rb +9 -9
  6. data/examples/009-text_layouter_alignment.rb +1 -1
  7. data/examples/010-text_layouter_inline_boxes.rb +2 -2
  8. data/examples/011-text_layouter_line_wrapping.rb +1 -1
  9. data/examples/012-text_layouter_styling.rb +7 -7
  10. data/examples/013-text_layouter_shapes.rb +1 -1
  11. data/examples/014-text_in_polygon.rb +1 -1
  12. data/examples/015-boxes.rb +8 -7
  13. data/examples/016-frame_automatic_box_placement.rb +2 -2
  14. data/examples/017-frame_text_flow.rb +2 -1
  15. data/examples/018-composer.rb +1 -1
  16. data/examples/020-column_box.rb +2 -1
  17. data/examples/025-table_box.rb +46 -0
  18. data/lib/hexapdf/cli/command.rb +5 -2
  19. data/lib/hexapdf/cli/form.rb +5 -5
  20. data/lib/hexapdf/cli/inspect.rb +3 -3
  21. data/lib/hexapdf/cli.rb +4 -0
  22. data/lib/hexapdf/composer.rb +104 -52
  23. data/lib/hexapdf/configuration.rb +44 -39
  24. data/lib/hexapdf/content/canvas.rb +393 -267
  25. data/lib/hexapdf/content/color_space.rb +72 -25
  26. data/lib/hexapdf/content/graphic_object/arc.rb +57 -24
  27. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +66 -23
  28. data/lib/hexapdf/content/graphic_object/geom2d.rb +47 -6
  29. data/lib/hexapdf/content/graphic_object/solid_arc.rb +58 -36
  30. data/lib/hexapdf/content/graphic_object.rb +6 -7
  31. data/lib/hexapdf/content/graphics_state.rb +54 -45
  32. data/lib/hexapdf/content/operator.rb +52 -54
  33. data/lib/hexapdf/content/parser.rb +2 -2
  34. data/lib/hexapdf/content/processor.rb +15 -15
  35. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  36. data/lib/hexapdf/content.rb +5 -0
  37. data/lib/hexapdf/dictionary.rb +6 -5
  38. data/lib/hexapdf/dictionary_fields.rb +42 -14
  39. data/lib/hexapdf/digital_signature/cms_handler.rb +2 -2
  40. data/lib/hexapdf/digital_signature/handler.rb +1 -1
  41. data/lib/hexapdf/digital_signature/pkcs1_handler.rb +2 -3
  42. data/lib/hexapdf/digital_signature/signature.rb +6 -6
  43. data/lib/hexapdf/digital_signature/signatures.rb +13 -12
  44. data/lib/hexapdf/digital_signature/signing/default_handler.rb +14 -5
  45. data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +2 -4
  46. data/lib/hexapdf/digital_signature/signing/timestamp_handler.rb +4 -4
  47. data/lib/hexapdf/digital_signature/signing.rb +4 -0
  48. data/lib/hexapdf/digital_signature/verification_result.rb +2 -2
  49. data/lib/hexapdf/digital_signature.rb +7 -2
  50. data/lib/hexapdf/document/destinations.rb +12 -11
  51. data/lib/hexapdf/document/files.rb +1 -1
  52. data/lib/hexapdf/document/fonts.rb +1 -1
  53. data/lib/hexapdf/document/layout.rb +167 -39
  54. data/lib/hexapdf/document/pages.rb +3 -2
  55. data/lib/hexapdf/document.rb +89 -55
  56. data/lib/hexapdf/encryption/aes.rb +5 -5
  57. data/lib/hexapdf/encryption/arc4.rb +1 -1
  58. data/lib/hexapdf/encryption/fast_aes.rb +2 -2
  59. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  60. data/lib/hexapdf/encryption/identity.rb +1 -1
  61. data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
  62. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  63. data/lib/hexapdf/encryption/security_handler.rb +31 -24
  64. data/lib/hexapdf/encryption/standard_security_handler.rb +45 -36
  65. data/lib/hexapdf/encryption.rb +7 -2
  66. data/lib/hexapdf/error.rb +18 -0
  67. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  68. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  69. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  70. data/lib/hexapdf/filter/lzw_decode.rb +1 -1
  71. data/lib/hexapdf/filter/pass_through.rb +1 -1
  72. data/lib/hexapdf/filter/predictor.rb +1 -1
  73. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  74. data/lib/hexapdf/filter.rb +55 -6
  75. data/lib/hexapdf/font/cmap/parser.rb +2 -2
  76. data/lib/hexapdf/font/cmap.rb +1 -1
  77. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  78. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  79. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +2 -2
  80. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  81. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  82. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +3 -3
  83. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  84. data/lib/hexapdf/font/invalid_glyph.rb +3 -0
  85. data/lib/hexapdf/font/true_type_wrapper.rb +17 -4
  86. data/lib/hexapdf/font/type1_wrapper.rb +19 -4
  87. data/lib/hexapdf/font_loader/from_configuration.rb +5 -2
  88. data/lib/hexapdf/font_loader/from_file.rb +5 -5
  89. data/lib/hexapdf/font_loader/standard14.rb +3 -3
  90. data/lib/hexapdf/font_loader.rb +3 -0
  91. data/lib/hexapdf/image_loader/jpeg.rb +2 -2
  92. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  93. data/lib/hexapdf/image_loader/png.rb +2 -2
  94. data/lib/hexapdf/image_loader.rb +1 -1
  95. data/lib/hexapdf/importer.rb +13 -0
  96. data/lib/hexapdf/layout/box.rb +9 -2
  97. data/lib/hexapdf/layout/box_fitter.rb +2 -2
  98. data/lib/hexapdf/layout/column_box.rb +18 -4
  99. data/lib/hexapdf/layout/frame.rb +30 -12
  100. data/lib/hexapdf/layout/image_box.rb +5 -0
  101. data/lib/hexapdf/layout/inline_box.rb +1 -0
  102. data/lib/hexapdf/layout/list_box.rb +17 -1
  103. data/lib/hexapdf/layout/page_style.rb +4 -4
  104. data/lib/hexapdf/layout/style.rb +18 -3
  105. data/lib/hexapdf/layout/table_box.rb +682 -0
  106. data/lib/hexapdf/layout/text_box.rb +5 -3
  107. data/lib/hexapdf/layout/text_fragment.rb +1 -1
  108. data/lib/hexapdf/layout/text_layouter.rb +12 -4
  109. data/lib/hexapdf/layout.rb +1 -0
  110. data/lib/hexapdf/name_tree_node.rb +1 -1
  111. data/lib/hexapdf/number_tree_node.rb +1 -1
  112. data/lib/hexapdf/object.rb +18 -7
  113. data/lib/hexapdf/parser.rb +8 -8
  114. data/lib/hexapdf/pdf_array.rb +1 -1
  115. data/lib/hexapdf/rectangle.rb +1 -1
  116. data/lib/hexapdf/reference.rb +1 -1
  117. data/lib/hexapdf/revision.rb +1 -1
  118. data/lib/hexapdf/revisions.rb +3 -3
  119. data/lib/hexapdf/serializer.rb +15 -15
  120. data/lib/hexapdf/stream.rb +4 -2
  121. data/lib/hexapdf/tokenizer.rb +14 -14
  122. data/lib/hexapdf/type/acro_form/appearance_generator.rb +22 -22
  123. data/lib/hexapdf/type/acro_form/button_field.rb +1 -1
  124. data/lib/hexapdf/type/acro_form/choice_field.rb +1 -1
  125. data/lib/hexapdf/type/acro_form/field.rb +2 -2
  126. data/lib/hexapdf/type/acro_form/form.rb +1 -1
  127. data/lib/hexapdf/type/acro_form/signature_field.rb +4 -4
  128. data/lib/hexapdf/type/acro_form/text_field.rb +1 -1
  129. data/lib/hexapdf/type/acro_form/variable_text_field.rb +1 -1
  130. data/lib/hexapdf/type/acro_form.rb +1 -1
  131. data/lib/hexapdf/type/action.rb +1 -1
  132. data/lib/hexapdf/type/actions/go_to.rb +1 -1
  133. data/lib/hexapdf/type/actions/go_to_r.rb +1 -1
  134. data/lib/hexapdf/type/actions/launch.rb +1 -1
  135. data/lib/hexapdf/type/actions/uri.rb +1 -1
  136. data/lib/hexapdf/type/actions.rb +1 -1
  137. data/lib/hexapdf/type/annotation.rb +3 -3
  138. data/lib/hexapdf/type/annotations/link.rb +1 -1
  139. data/lib/hexapdf/type/annotations/markup_annotation.rb +1 -1
  140. data/lib/hexapdf/type/annotations/text.rb +1 -1
  141. data/lib/hexapdf/type/annotations/widget.rb +2 -2
  142. data/lib/hexapdf/type/annotations.rb +1 -1
  143. data/lib/hexapdf/type/catalog.rb +1 -1
  144. data/lib/hexapdf/type/cid_font.rb +3 -3
  145. data/lib/hexapdf/type/embedded_file.rb +1 -1
  146. data/lib/hexapdf/type/file_specification.rb +2 -2
  147. data/lib/hexapdf/type/font_descriptor.rb +1 -1
  148. data/lib/hexapdf/type/font_simple.rb +2 -2
  149. data/lib/hexapdf/type/font_type0.rb +3 -3
  150. data/lib/hexapdf/type/font_type3.rb +1 -1
  151. data/lib/hexapdf/type/form.rb +1 -1
  152. data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
  153. data/lib/hexapdf/type/icon_fit.rb +1 -1
  154. data/lib/hexapdf/type/image.rb +1 -1
  155. data/lib/hexapdf/type/info.rb +1 -1
  156. data/lib/hexapdf/type/mark_information.rb +1 -1
  157. data/lib/hexapdf/type/names.rb +2 -2
  158. data/lib/hexapdf/type/object_stream.rb +7 -3
  159. data/lib/hexapdf/type/outline.rb +1 -1
  160. data/lib/hexapdf/type/outline_item.rb +1 -1
  161. data/lib/hexapdf/type/page.rb +19 -10
  162. data/lib/hexapdf/type/page_label.rb +1 -1
  163. data/lib/hexapdf/type/page_tree_node.rb +1 -1
  164. data/lib/hexapdf/type/resources.rb +1 -1
  165. data/lib/hexapdf/type/trailer.rb +2 -2
  166. data/lib/hexapdf/type/viewer_preferences.rb +1 -1
  167. data/lib/hexapdf/type/xref_stream.rb +2 -2
  168. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  169. data/lib/hexapdf/version.rb +1 -1
  170. data/lib/hexapdf/writer.rb +4 -4
  171. data/lib/hexapdf/xref_section.rb +2 -2
  172. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +11 -1
  173. data/test/hexapdf/content/graphic_object/test_geom2d.rb +7 -0
  174. data/test/hexapdf/content/test_canvas.rb +0 -1
  175. data/test/hexapdf/digital_signature/test_signatures.rb +22 -0
  176. data/test/hexapdf/document/test_files.rb +2 -2
  177. data/test/hexapdf/document/test_layout.rb +98 -0
  178. data/test/hexapdf/encryption/test_security_handler.rb +12 -11
  179. data/test/hexapdf/encryption/test_standard_security_handler.rb +35 -23
  180. data/test/hexapdf/font/test_true_type_wrapper.rb +18 -1
  181. data/test/hexapdf/font/test_type1_wrapper.rb +15 -1
  182. data/test/hexapdf/layout/test_box.rb +1 -1
  183. data/test/hexapdf/layout/test_column_box.rb +65 -21
  184. data/test/hexapdf/layout/test_frame.rb +14 -14
  185. data/test/hexapdf/layout/test_image_box.rb +4 -0
  186. data/test/hexapdf/layout/test_inline_box.rb +5 -0
  187. data/test/hexapdf/layout/test_list_box.rb +40 -6
  188. data/test/hexapdf/layout/test_page_style.rb +3 -2
  189. data/test/hexapdf/layout/test_style.rb +50 -0
  190. data/test/hexapdf/layout/test_table_box.rb +722 -0
  191. data/test/hexapdf/layout/test_text_box.rb +18 -0
  192. data/test/hexapdf/layout/test_text_layouter.rb +4 -0
  193. data/test/hexapdf/test_dictionary_fields.rb +4 -1
  194. data/test/hexapdf/test_document.rb +1 -0
  195. data/test/hexapdf/test_filter.rb +8 -0
  196. data/test/hexapdf/test_importer.rb +9 -0
  197. data/test/hexapdf/test_object.rb +16 -5
  198. data/test/hexapdf/test_parser.rb +1 -1
  199. data/test/hexapdf/test_stream.rb +7 -0
  200. data/test/hexapdf/test_writer.rb +3 -3
  201. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +13 -5
  202. data/test/hexapdf/type/acro_form/test_form.rb +4 -3
  203. data/test/hexapdf/type/test_object_stream.rb +9 -3
  204. data/test/hexapdf/type/test_page.rb +18 -4
  205. metadata +17 -8
@@ -47,7 +47,7 @@ module HexaPDF
47
47
  # the encryption key and a set of permissions.
48
48
  class StandardEncryptionDictionary < EncryptionDictionary
49
49
 
50
- define_field :R, type: Integer, required: true
50
+ define_field :R, type: Integer, required: true, allowed_values: [2, 3, 4, 5, 6]
51
51
  define_field :O, type: PDFByteString, required: true
52
52
  define_field :OE, type: PDFByteString, version: '2.0'
53
53
  define_field :U, type: PDFByteString, required: true
@@ -71,12 +71,16 @@ module HexaPDF
71
71
  yield("Value of /OE, /UE or /Perms is missing for dictionary revision 6", false)
72
72
  return
73
73
  end
74
- if value[:U].length != 48 || value[:O].length != 48 || value[:UE].length != 32 ||
75
- value[:OE].length != 32 || value[:Perms].length != 16
76
- yield("Invalid size for /U, /O, /UE, /OE or /Perms values for revisions 6", false)
74
+ [:U, :O].each do |f|
75
+ if value[f].length != 48
76
+ yield("Invalid size (#{value[f].length} instead of 48) for /#{f} for revisions 6",
77
+ value[f].length > 48 && value[f][48..-1].squeeze("\x00").length == 1)
78
+ value[f].slice!(48..-1)
79
+ end
80
+ end
81
+ if value[:UE].length != 32 || value[:OE].length != 32 || value[:Perms].length != 16
82
+ yield("Invalid size for /UE, /OE or /Perms values for revisions 6", false)
77
83
  end
78
- else
79
- yield("Value of /R is not one of 2, 3, 4 or 6", false)
80
84
  end
81
85
  end
82
86
 
@@ -87,11 +91,13 @@ module HexaPDF
87
91
  #
88
92
  # == Overview
89
93
  #
90
- # The PDF specification defines one security handler that should be implemented by all PDF
91
- # conform libraries and applications. This standard security handler allows access permissions
92
- # and a user password as well as an owner password to be set. See
93
- # StandardSecurityHandler::EncryptionOptions for all valid options that can be used with this
94
- # security handler.
94
+ # The PDF specification defines one security handler that should be implemented by all
95
+ # conforming PDF libraries and applications. This standard security handler allows access
96
+ # permissions and a user password as well as an owner password to be set.
97
+ #
98
+ # See StandardSecurityHandler::EncryptionOptions for all valid options that can be used with
99
+ # this security handler when encrypting a document. And see #prepare_decryption for all allowed
100
+ # options when decrypting a document.
95
101
  #
96
102
  # The access permissions (see StandardSecurityHandler::Permissions) can be used to restrict what
97
103
  # a user is allowed to do with a PDF file.
@@ -100,7 +106,7 @@ module HexaPDF
100
106
  # password is supplied. To open such an encrypted PDF file, the +decryption_opts+ provided to
101
107
  # HexaPDF::Document.new needs to contain a :password key with the password.
102
108
  #
103
- # See: PDF1.7 s7.6.3, PDF2.0 s7.6.3
109
+ # See: PDF2.0 s7.6.4
104
110
  class StandardSecurityHandler < SecurityHandler
105
111
 
106
112
  # Defines all available permissions.
@@ -109,13 +115,13 @@ module HexaPDF
109
115
  # permission set. The used symbols are the lower case versions of the constants, i.e. the
110
116
  # symbol for MODIFY_CONSTANT would be :modify_constant.
111
117
  #
112
- # See: PDF1.7 s7.6.3.2
118
+ # See: PDF2.0 s7.6.4.2
113
119
  module Permissions
114
120
 
115
121
  # Printing (if HIGH_QUALITY_PRINT is also set, then high quality printing is allowed)
116
122
  PRINT = 1 << 2
117
123
 
118
- # Modification of the content by operations that are different from those controller by
124
+ # Modification of the content by operations that are different from those controlled by
119
125
  # MODIFY_ANNOTATION, FILL_IN_FORMS and ASSEMBLE_DOCUMENT
120
126
  MODIFY_CONTENT = 1 << 3
121
127
 
@@ -129,6 +135,9 @@ module HexaPDF
129
135
  FILL_IN_FORMS = 1 << 8
130
136
 
131
137
  # Extracting content
138
+ #
139
+ # PDF 2.0 specifies that this bit should always be set by writers and should be ignored by
140
+ # readers. Therefore this is part of the RESERVED constant.
132
141
  EXTRACT_CONTENT = 1 << 9
133
142
 
134
143
  # Assembling of the document (inserting, rotating or deleting of pages and creation of
@@ -142,8 +151,8 @@ module HexaPDF
142
151
  ALL = PRINT | MODIFY_CONTENT | COPY_CONTENT | MODIFY_ANNOTATION | FILL_IN_FORMS |
143
152
  EXTRACT_CONTENT | ASSEMBLE_DOCUMENT | HIGH_QUALITY_PRINT
144
153
 
145
- # Reserved permission bits
146
- RESERVED = 0xFFFFF000 | 0b11000000
154
+ # Reserved permission bits that should always be set
155
+ RESERVED = 0xFFFFF000 | 0b11000000 | EXTRACT_CONTENT
147
156
 
148
157
  # Maps permission symbols to their respective value
149
158
  SYMBOL_TO_PERMISSION = {
@@ -213,7 +222,7 @@ module HexaPDF
213
222
 
214
223
  # Maps the permissions to an integer for use by the standard security handler.
215
224
  #
216
- # See: PDF1.7 s7.6.3.2, ADB1.7 3.5.2 (table 3.20 and the paragraphs before)
225
+ # See: PDF2.0 s7.6.4.2, ADB1.7 3.5.2 (table 3.20 and the paragraphs before)
217
226
  def process_permissions(perms)
218
227
  if perms.kind_of?(Array)
219
228
  perms = perms.inject(0) do |result, perm|
@@ -363,7 +372,7 @@ module HexaPDF
363
372
 
364
373
  # The padding used for passwords with fewer than 32 bytes. Only used for revisions <= 4.
365
374
  #
366
- # See: PDF1.7 s7.6.3.3
375
+ # See: PDF2.0 s7.6.4.3
367
376
  PASSWORD_PADDING = "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08" \
368
377
  "\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A".b
369
378
 
@@ -376,7 +385,7 @@ module HexaPDF
376
385
  # with the user password. If the password is the owner password,
377
386
  # #compute_owner_encryption_key has to be used instead.
378
387
  #
379
- # See: PDF1.7 s7.6.3.3 (algorithm 2), PDF2.0 s7.6.3.3.2 (algorithm 2.A (a)-(b),(e))
388
+ # See: PDF2.0 s7.6.4.3.2 (algorithm 2), PDF2.0 s7.6.4.3.3 (algorithm 2.A (a)-(b),(e))
380
389
  def compute_user_encryption_key(password)
381
390
  if dict[:R] <= 4
382
391
  data = password
@@ -403,11 +412,11 @@ module HexaPDF
403
412
  # For revisions <= 4 this is done by first retrieving the user password through the use of
404
413
  # the owner password and then using the #compute_user_encryption_key method.
405
414
  #
406
- # For revision 6 file encryption key is a string of random bytes that has been encrypted
407
- # with the owner password. If the password is the user password,
408
- # #compute_user_encryption_key has to be used.
415
+ # For revision 6 the file encryption key is a string of random bytes that has been encrypted
416
+ # with the owner password. If the password is the user password, #compute_user_encryption_key
417
+ # has to be used.
409
418
  #
410
- # See: PDF2.0 s7.6.3.3.2 (algorithm 2.A (a)-(d))
419
+ # See: PDF2.0 s7.6.4.3.2 (algorithm 2.A (a)-(d))
411
420
  def compute_owner_encryption_key(password)
412
421
  if dict[:R] <= 4
413
422
  compute_user_encryption_key(user_password_from_owner_password(password))
@@ -426,7 +435,7 @@ module HexaPDF
426
435
  # *Attention*: If revision 6 is used, the /U value has to be computed and set before this
427
436
  # method is used, otherwise the return value is incorrect!
428
437
  #
429
- # See: PDF1.7 s7.6.3.4 (algorithm 3), PDF2.0 s7.6.3.4.7 (algorithm 9 (a))
438
+ # See: PDF2.0 s7.6.4.4.2 (algorithm 3), PDF2.0 s7.6.4.4.8 (algorithm 9 (a))
430
439
  def compute_o_field(owner_password, user_password)
431
440
  if dict[:R] <= 4
432
441
  data = Digest::MD5.digest(owner_password)
@@ -454,7 +463,7 @@ module HexaPDF
454
463
  # Short explanation: Encrypts the file encryption key with a key based on the password and
455
464
  # the /O and /U values.
456
465
  #
457
- # See: PDF2.0 s7.6.3.4.7 (algorithm 9 (b))
466
+ # See: PDF2.0 s7.6.4.4.8 (algorithm 9 (b))
458
467
  def compute_oe_field(password, file_encryption_key)
459
468
  key = compute_hash(password, dict[:O][40, 8], dict[:U])
460
469
  aes_algorithm.new(key, "\0" * 16, :encrypt).process(file_encryption_key)
@@ -466,8 +475,8 @@ module HexaPDF
466
475
  # based on the user password. For revision 6 the /U value is a hash computed from the
467
476
  # password with added validation and key salts.
468
477
  #
469
- # See: PDF1.7 s7.6.3.4 (algorithm 4 for R=2, algorithm 5 for R=3 and R=4)
470
- # PDF2.0 s7.6.3.4.6 (algorithm 8 (a) for R=6)
478
+ # See: PDF2.0 s7.6.4.4.3 (algorithm 4 for R=2), PDF s7.6.4.4.4 (algorithm 5 for R=3 and R=4)
479
+ # PDF2.0 s7.6.4.4.7 (algorithm 8 (a) for R=6)
471
480
  def compute_u_field(password)
472
481
  if dict[:R] == 2
473
482
  key = compute_user_encryption_key(password)
@@ -491,7 +500,7 @@ module HexaPDF
491
500
  # Short explanation: Encrypts the file encryption key with a key based on the password and
492
501
  # the /U value.
493
502
  #
494
- # See: PDF2.0 s7.6.3.4.6 (algorithm 8 (b))
503
+ # See: PDF2.0 s7.6.4.4.7 (algorithm 8 (b))
495
504
  def compute_ue_field(password, file_encryption_key)
496
505
  key = compute_hash(password, dict[:U][40, 8])
497
506
  aes_algorithm.new(key, "\0" * 16, :encrypt).process(file_encryption_key)
@@ -501,7 +510,7 @@ module HexaPDF
501
510
  #
502
511
  # Uses /P and /EncryptMetadata values, so these have to be set beforehand.
503
512
  #
504
- # See: PDF2.0 s7.6.3.4.8 (algorithm 10)
513
+ # See: PDF2.0 s7.6.4.4.9 (algorithm 10)
505
514
  def compute_perms_field(file_encryption_key)
506
515
  data = [dict[:P]].pack('V')
507
516
  data << [0xFFFFFFFF].pack('V')
@@ -513,7 +522,7 @@ module HexaPDF
513
522
 
514
523
  # Authenticates the user password, i.e. decides whether the given user password is valid.
515
524
  #
516
- # See: PDF1.7 s7.6.3.4 (algorithm 6), PDF2.0 s7.6.3.4.9 (algorithm 11)
525
+ # See: PDF2.0 s7.6.4.4.5 (algorithm 6), PDF2.0 s7.6.4.4.10 (algorithm 11)
517
526
  def user_password_valid?(password)
518
527
  if dict[:R] == 2
519
528
  compute_u_field(password) == dict[:U]
@@ -526,7 +535,7 @@ module HexaPDF
526
535
 
527
536
  # Authenticates the owner password, i.e. decides whether the given owner password is valid.
528
537
  #
529
- # See: PDF1.7 s7.6.3.4 (algorithm 7), PDF2.0 s7.6.3.4.10 (algorithm 12)
538
+ # See: PDF2.0 s7.6.4.4.6 (algorithm 7), PDF2.0 s7.6.4.4.11 (algorithm 12)
530
539
  def owner_password_valid?(password)
531
540
  if dict[:R] <= 4
532
541
  user_password_valid?(user_password_from_owner_password(password))
@@ -539,7 +548,7 @@ module HexaPDF
539
548
  #
540
549
  # This method can only be used for revision 6.
541
550
  #
542
- # See: PDF2.0 s7.6.3.4.11 (algorithm 13)
551
+ # See: PDF2.0 s7.6.4.4.12 (algorithm 13)
543
552
  def check_perms_field(encryption_key)
544
553
  decrypted = aes_algorithm.new(encryption_key, "\0" * 16, :decrypt).process(dict[:Perms])
545
554
  if decrypted[9, 3] != "adb"
@@ -553,7 +562,7 @@ module HexaPDF
553
562
 
554
563
  # Returns the user password when given the owner password for revisions <= 4.
555
564
  #
556
- # See: PDF1.7 s7.6.3.4 (algorithm 7 (a) and (b))
565
+ # See: PDF2.0 s7.6.4.4.6 (algorithm 7 (a) and (b))
557
566
  def user_password_from_owner_password(owner_password)
558
567
  data = Digest::MD5.digest(owner_password)
559
568
  if dict[:R] >= 3
@@ -578,7 +587,7 @@ module HexaPDF
578
587
  # "#{password}#{salt}#{user_key}" where +user_key+ has to be empty when doing operations
579
588
  # with the user password.
580
589
  #
581
- # See: PDF2.0 s7.6.3.3.3 (algorithm 2.B)
590
+ # See: PDF2.0 s7.6.4.3.4 (algorithm 2.B)
582
591
  def compute_hash(password, salt, user_key = '')
583
592
  k = Digest::SHA256.digest("#{password}#{salt}#{user_key}")
584
593
  e = ''
@@ -606,8 +615,8 @@ module HexaPDF
606
615
  # * For revision 6 the password is converted into UTF-8 encoding that is normalized
607
616
  # according to the PDF2.0 specification.
608
617
  #
609
- # See: PDF1.7 s7.6.3.3 (algorithm 2 step a)),
610
- # PDF2.0 s7.6.3.3.2 (algorithm 2.A steps a) and b))
618
+ # See: PDF2.0 s7.6.4.3.2 (algorithm 2 step a)),
619
+ # PDF2.0 s7.6.4.3.3 (algorithm 2.A steps a) and b))
611
620
  def prepare_password(password)
612
621
  if dict[:R] <= 4
613
622
  password.to_s[0, 32].encode(Encoding::ISO_8859_1).force_encoding(Encoding::BINARY).
@@ -41,7 +41,7 @@ module HexaPDF
41
41
  # A PDF document may be encrypted so that
42
42
  #
43
43
  # * certain permissions are respected when the document is opened,
44
- # * a password must be specified so that a document can be openend or so that
44
+ # * a password must be specified so that a document can be openend, or so that
45
45
  # * a password must be specified to remove the restrictions and allow full access.
46
46
  #
47
47
  # This module contains all encryption and security related code to facilitate PDF encryption.
@@ -61,6 +61,9 @@ module HexaPDF
61
61
  # additionally allows setting permission information. This security handler is implemented by
62
62
  # the Encryption::StandardSecurityHandler class.
63
63
  #
64
+ # There is also a certificate-based security handler defined by the PDF specification. However,
65
+ # that handler is not implemented.
66
+ #
64
67
  #
65
68
  # === Encryption Algorithms
66
69
  #
@@ -78,8 +81,10 @@ module HexaPDF
78
81
  # Pure Ruby implementations of the algorithms which are naturally much slower than the OpenSSL
79
82
  # based ones. However, these implementation can be used on any Ruby implementation.
80
83
  #
84
+ # The ARC4 algorithm is deprecated with PDF 2.0 and should not be used when creating new
85
+ # documents.
81
86
  #
82
- # See: PDF1.7 s7.6
87
+ # See: PDF2.0 s7.6
83
88
  module Encryption
84
89
 
85
90
  autoload(:ARC4, 'hexapdf/encryption/arc4')
data/lib/hexapdf/error.rb CHANGED
@@ -82,4 +82,22 @@ module HexaPDF
82
82
  # Raised when the encryption method is not supported.
83
83
  class UnsupportedEncryptionError < EncryptionError; end
84
84
 
85
+ # Raised when a font wrapper implementation should encode a missing glyph.
86
+ class MissingGlyphError < Error
87
+
88
+ # Returns the glyph object that contains the information about the missing glyph.
89
+ attr_reader :glyph
90
+
91
+ # Creates a new MissingGlyphError for the given +glyph+.
92
+ def initialize(glyph)
93
+ @glyph = glyph
94
+ end
95
+
96
+ def message # :nodoc:
97
+ "No glyph for #{glyph.str.inspect} in font '#{glyph.font.full_name}' found. \n\n" \
98
+ "Use the configuration option 'font.on_missing_glyph' to customize missing glyph handling."
99
+ end
100
+
101
+ end
102
+
85
103
  end
@@ -45,7 +45,7 @@ module HexaPDF
45
45
  # This filter module implements the ASCII-85 filter which can encode arbitrary data into an
46
46
  # ASCII compatible format that expands the original data only by a factor of 4:5.
47
47
  #
48
- # See: HexaPDF::Filter, PDF1.7 s7.4.2
48
+ # See: HexaPDF::Filter, PDF2.0 s7.4.2
49
49
  module ASCII85Decode
50
50
 
51
51
  VALUE_TO_CHAR = {} #:nodoc:
@@ -44,7 +44,7 @@ module HexaPDF
44
44
  # This filter module implements the ASCII hex decode/encode filter which can encode arbitrary
45
45
  # data into the two byte ASCII hex format that expands the original data by a factor of 1:2.
46
46
  #
47
- # See: HexaPDF::Filter, PDF1.7 s7.4.2
47
+ # See: HexaPDF::Filter, PDF2.0 s7.4.2
48
48
  module ASCIIHexDecode
49
49
 
50
50
  # See HexaPDF::Filter
@@ -45,7 +45,7 @@ module HexaPDF
45
45
 
46
46
  # Implements the Deflate filter using the Zlib library.
47
47
  #
48
- # See: HexaPDF::Filter, PDF1.7 s7.4.4
48
+ # See: HexaPDF::Filter, PDF2.0 s7.4.4
49
49
  module FlateDecode
50
50
 
51
51
  # See HexaPDF::Filter
@@ -48,7 +48,7 @@ module HexaPDF
48
48
  # not aligned to byte boundaries, this filter is not as fast as the other filters. If speed is
49
49
  # a concern, the FlateDecode filter should be used instead.
50
50
  #
51
- # See: HexaPDF::Filter, PDF1.7 s7.4.4
51
+ # See: HexaPDF::Filter, PDF2.0 s7.4.4
52
52
  module LZWDecode
53
53
 
54
54
  CLEAR_TABLE = 256 # :nodoc:
@@ -40,7 +40,7 @@ module HexaPDF
40
40
  # The PassThrough filter just passes the source on unmodified. This is enough for basic
41
41
  # read-write capabilities but not if the unfiltered bytes are needed.
42
42
  #
43
- # See: HexaPDF::Filter, PDF1.7 s7.4
43
+ # See: HexaPDF::Filter, PDF2.0 s7.4
44
44
  module PassThrough
45
45
 
46
46
  # See HexaPDF::Filter
@@ -47,7 +47,7 @@ module HexaPDF
47
47
  # Although a predictor isn't a full PDF filter, it is implemented as one in HexaPDF terms to
48
48
  # allow easy chaining of the predictor.
49
49
  #
50
- # See: PDF1.7 s7.4.4.3, s7.4.4.4, https://partners.adobe.com/public/developer/en/tiff/TIFF6.pdf
50
+ # See: PDF2.0 s7.4.4.3, s7.4.4.4, https://partners.adobe.com/public/developer/en/tiff/TIFF6.pdf
51
51
  # (p64f), http://www.w3.org/TR/PNG-Filters.html
52
52
  #
53
53
  #-- Implemenation notes:
@@ -43,7 +43,7 @@ module HexaPDF
43
43
 
44
44
  # Implements the run length filter.
45
45
  #
46
- # See: HexaPDF::Filter, PDF1.7 s7.4.5
46
+ # See: HexaPDF::Filter, PDF2.0 s7.4.5
47
47
  module RunLengthDecode
48
48
 
49
49
  EOD = 128.chr #:nodoc:
@@ -48,6 +48,8 @@ module HexaPDF
48
48
  attr_reader :length
49
49
 
50
50
  # Initializes the Fiber and sets the +length+.
51
+ #
52
+ # A +length+ of +nil+ is equal to -1.
51
53
  def initialize(length, &block)
52
54
  super(&block)
53
55
  @length = length || -1
@@ -55,6 +57,47 @@ module HexaPDF
55
57
 
56
58
  end
57
59
 
60
+ # Implements part of the Fiber interface so that it can be used instead of a Fiber by HexaPDF
61
+ # when only a single string should be returned.
62
+ class FiberDoubleForString
63
+
64
+ # Creates a new FiberDoubleForString instance for the given string +str+ or for the string
65
+ # returned by invoking the block.
66
+ def initialize(str = nil, &block)
67
+ @block = block
68
+ @str = str
69
+ @block_used = false
70
+ end
71
+
72
+ # Returns the length of the wrapped string.
73
+ #
74
+ # May only be called before #resume!
75
+ def length
76
+ str.length
77
+ end
78
+
79
+ # Returns +true+ if #resume has not yet been called.
80
+ def alive?
81
+ !str.nil?
82
+ end
83
+
84
+ # Returns the wrapped string on the first invocation, +nil+ otherwise.
85
+ def resume
86
+ tmp = str
87
+ @str = nil
88
+ tmp
89
+ end
90
+
91
+ private
92
+
93
+ # Sets the string to the return value of the initially provided block if no string has been
94
+ # provided.
95
+ def str
96
+ @str ||= @block_used || @block.nil? ? nil : (@block_used = true; @block.call)
97
+ end
98
+
99
+ end
100
+
58
101
  # == Overview
59
102
  #
60
103
  # A stream filter is used to compress a stream or to encode it in an ASCII compatible way; or
@@ -83,7 +126,7 @@ module HexaPDF
83
126
  #
84
127
  # Such a fiber should *not* return +nil+ unless this signifies that no more data is coming!
85
128
  #
86
- # See: PDF1.7 s7.4
129
+ # See: PDF2.0 s7.4
87
130
  module Filter
88
131
 
89
132
  autoload(:ASCII85Decode, 'hexapdf/filter/ascii85_decode')
@@ -99,10 +142,16 @@ module HexaPDF
99
142
 
100
143
  autoload(:PassThrough, 'hexapdf/filter/pass_through')
101
144
 
102
- # Returns a Fiber that can be used as a source for decoders/encoders and that is based on a
103
- # String object.
145
+ # Returns a FiberDoubleForString that uses the string returned by the provided block and can be
146
+ # used as a source for decoders/encoders.
147
+ def self.source_from_proc(&block)
148
+ FiberDoubleForString.new(&block)
149
+ end
150
+
151
+ # Returns a FiberDoubleForString that returns the given string and can be used as a source for
152
+ # decoders/encoders.
104
153
  def self.source_from_string(str)
105
- FiberWithLength.new(str.length) { str.dup }
154
+ FiberDoubleForString.new(str.dup)
106
155
  end
107
156
 
108
157
  # Returns a Fiber that can be used as a source for decoders/encoders and that reads chunks of
@@ -149,7 +198,7 @@ module HexaPDF
149
198
  # Note that there will be a problem if the size of the file changes between the invocation of
150
199
  # this method and the actual consumption of the file!
151
200
  #
152
- # See ::source_from_io for a description of the available options.
201
+ # See ::source_from_io for a description of the +pos+, +length+ and +chunk_size+ options.
153
202
  def self.source_from_file(filename, pos: 0, length: -1, chunk_size: 0)
154
203
  fib_length = (length < 0 ? File.stat(filename).size - pos : length)
155
204
  FiberWithLength.new(fib_length) do
@@ -165,7 +214,7 @@ module HexaPDF
165
214
  # Returns the concatenated string chunks retrieved by resuming the given source Fiber until it
166
215
  # is dead.
167
216
  #
168
- # The returned string is always a string with +BINARY+ (= +ASCII-8BIT+) encoding.
217
+ # The returned string is always a string with binary (= +ASCII-8BIT+) encoding.
169
218
  def self.string_from_source(source)
170
219
  str = ''.b
171
220
  while source.alive? && (data = source.resume)
@@ -146,10 +146,10 @@ module HexaPDF
146
146
  # Parses the "bfrange" operator at the current position.
147
147
  #
148
148
  #--
149
- # PDF1.7 s9.10.3 and Adobe Technical Note #5411 have different views as to how "bfrange"
149
+ # PDF2.0 s9.10.3 and Adobe Technical Note #5411 have different views as to how "bfrange"
150
150
  # operators of the form "startCode endCode codePoint" should be handled.
151
151
  #
152
- # PDF1.7 mentions that the last byte of "codePoint" should be incremented, up to a maximum
152
+ # PDF2.0 mentions that the last byte of "codePoint" should be incremented, up to a maximum
153
153
  # of 255. However #5411 has the range "<1379> <137B> <90FE>" as example which contradicts
154
154
  # this.
155
155
  #
@@ -43,7 +43,7 @@ module HexaPDF
43
43
  # Represents a CMap, a mapping from character codes to CIDs (character IDs) or to their Unicode
44
44
  # value.
45
45
  #
46
- # See: PDF1.7 s9.7.5, s9.10.3; Adobe Technical Notes #5014 and #5411
46
+ # See: PDF2.0 s9.7.5, s9.10.3; Adobe Technical Notes #5014 and #5411
47
47
  class CMap
48
48
 
49
49
  autoload(:Parser, 'hexapdf/font/cmap/parser')
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The difference encoding uses a base encoding that can be overlayed with additional mappings.
44
44
  #
45
- # See: PDF1.7 s9.6.6.1
45
+ # See: PDF2.0 s9.6.5.1
46
46
  class DifferenceEncoding < Base
47
47
 
48
48
  # The base encoding.
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The MacExpertEncoding for Latin texts.
44
44
  #
45
- # See: PDF1.7 sD.4
45
+ # See: PDF2.0 sD.4
46
46
  class MacExpertEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The Mac Roman standard encoding for Latin texts.
44
44
  #
45
- # See: PDF1.7 sD.1, sD.2
45
+ # See: PDF2.0 sD.1, sD.2
46
46
  class MacRomanEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -256,7 +256,7 @@ module HexaPDF
256
256
  0264 => :yen,
257
257
  0172 => :z,
258
258
  0060 => :zero,
259
- # additions due to PDF1.7 sD.2 footnote 6
259
+ # additions due to PDF2.0 sD.2 footnote 6
260
260
  0312 => :space,
261
261
  }
262
262
  end
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The Adobe standard encoding for Latin texts.
44
44
  #
45
- # See: PDF1.7 sD.1, sD.2
45
+ # See: PDF2.0 sD.1, sD.2
46
46
  class StandardEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The built-in encoding of the Symbol font.
44
44
  #
45
- # See: PDF1.7 sD.5
45
+ # See: PDF2.0 sD.5
46
46
  class SymbolEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The Windows Code Page 1252, the standard Windows encoding for Latin texts.
44
44
  #
45
- # See: PDF1.7 sD.1, sD.2
45
+ # See: PDF2.0 sD.1, sD.2
46
46
  class WinAnsiEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -265,11 +265,11 @@ module HexaPDF
265
265
  0172 => :z,
266
266
  0236 => :zcaron,
267
267
  0060 => :zero,
268
- # additions due to PDF1.7 sD.2 footnote 5,6
268
+ # additions due to PDF2.0 sD.2 footnote 5,6
269
269
  0240 => :space,
270
270
  0255 => :hyphen,
271
271
  }
272
- # additions due to PDF1.7 sD.2 footnote 3
272
+ # additions due to PDF2.0 sD.2 footnote 3
273
273
  041.upto(255) do |i|
274
274
  next if @code_to_name.key?(i)
275
275
  @code_to_name[i] = :bullet
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # The built-in encoding of the ZapfDingbats font.
44
44
  #
45
- # See: PDF1.7 sD.6
45
+ # See: PDF2.0 sD.6
46
46
  class ZapfDingbatsEncoding < Base
47
47
 
48
48
  def initialize #:nodoc:
@@ -41,6 +41,9 @@ module HexaPDF
41
41
  # font.
42
42
  class InvalidGlyph
43
43
 
44
+ # The associated font object.
45
+ attr_reader :font
46
+
44
47
  # The string that could not be represented as a glyph.
45
48
  attr_reader :str
46
49
 
@@ -51,7 +51,7 @@ module HexaPDF
51
51
  #
52
52
  # * By using a composite font more than 256 characters can be encoded with one font object.
53
53
  # * Fonts for vertical writing can potentially be used.
54
- # * The PDF specification recommends using a composite font (see PDF1.7 s9.9 at the end).
54
+ # * The PDF specification recommends using a composite font (see PDF2.0 s9.9.1 at the end).
55
55
  #
56
56
  # Additionally, TrueType fonts are *always* embedded.
57
57
  class TrueTypeWrapper
@@ -59,6 +59,9 @@ module HexaPDF
59
59
  # Represents a single glyph of the wrapped font.
60
60
  class Glyph
61
61
 
62
+ # The associated font object.
63
+ attr_reader :font
64
+
62
65
  # The glyph ID.
63
66
  attr_reader :id
64
67
 
@@ -171,6 +174,18 @@ module HexaPDF
171
174
  end
172
175
  end
173
176
 
177
+ # Returns a custom Glyph object which represents the given +string+ via the given glyph +id+.
178
+ #
179
+ # This functionality can be used to associate a single glyph id with multiple, different
180
+ # strings for replacement glyph purposes. When used in such a way, the used glyph id is often
181
+ # 0 which represents the missing glyph.
182
+ def custom_glyph(id, string)
183
+ if id < 0 || id >= @wrapped_font[:maxp].num_glyphs
184
+ raise HexaPDF::Error, "Glyph ID #{id} is invalid for font '#{@wrapped_font.full_name}'"
185
+ end
186
+ Glyph.new(@wrapped_font, id, string)
187
+ end
188
+
174
189
  # Returns an array of glyph objects representing the characters in the UTF-8 encoded string.
175
190
  def decode_utf8(str)
176
191
  str.codepoints.map! do |c|
@@ -187,9 +202,7 @@ module HexaPDF
187
202
  def encode(glyph)
188
203
  (@encoded_glyphs[glyph.id] ||=
189
204
  begin
190
- if glyph.kind_of?(InvalidGlyph)
191
- raise HexaPDF::Error, "Glyph for #{glyph.str.inspect} missing"
192
- end
205
+ raise HexaPDF::MissingGlyphError.new(glyph) if glyph.kind_of?(InvalidGlyph)
193
206
  if @subsetter
194
207
  [[@subsetter.use_glyph(glyph.id)].pack('n'), glyph]
195
208
  else