hexapdf 0.46.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +83 -16
  3. data/lib/hexapdf/composer.rb +7 -0
  4. data/lib/hexapdf/configuration.rb +13 -0
  5. data/lib/hexapdf/content/parser.rb +3 -1
  6. data/lib/hexapdf/digital_signature/cms_handler.rb +13 -0
  7. data/lib/hexapdf/digital_signature/signature.rb +1 -1
  8. data/lib/hexapdf/digital_signature/signing/default_handler.rb +1 -0
  9. data/lib/hexapdf/document.rb +14 -3
  10. data/lib/hexapdf/encryption/standard_security_handler.rb +32 -26
  11. data/lib/hexapdf/font/cmap/writer.rb +58 -4
  12. data/lib/hexapdf/font/cmap.rb +7 -0
  13. data/lib/hexapdf/font/true_type_wrapper.rb +41 -16
  14. data/lib/hexapdf/importer.rb +1 -1
  15. data/lib/hexapdf/layout/table_box.rb +57 -10
  16. data/lib/hexapdf/layout/text_fragment.rb +2 -1
  17. data/lib/hexapdf/object.rb +1 -1
  18. data/lib/hexapdf/parser.rb +1 -1
  19. data/lib/hexapdf/reference.rb +1 -1
  20. data/lib/hexapdf/task/merge_acro_form.rb +164 -0
  21. data/lib/hexapdf/task/optimize.rb +4 -4
  22. data/lib/hexapdf/task.rb +1 -0
  23. data/lib/hexapdf/tokenizer.rb +2 -0
  24. data/lib/hexapdf/type/acro_form/appearance_generator.rb +8 -4
  25. data/lib/hexapdf/type/acro_form/form.rb +14 -24
  26. data/lib/hexapdf/type/acro_form/signature_field.rb +18 -7
  27. data/lib/hexapdf/type/acro_form/variable_text_field.rb +12 -4
  28. data/lib/hexapdf/type/actions/go_to.rb +1 -0
  29. data/lib/hexapdf/type/actions/go_to_r.rb +1 -0
  30. data/lib/hexapdf/type/actions/launch.rb +5 -1
  31. data/lib/hexapdf/type/annotation.rb +6 -1
  32. data/lib/hexapdf/type/annotations/markup_annotation.rb +14 -1
  33. data/lib/hexapdf/type/annotations/widget.rb +4 -2
  34. data/lib/hexapdf/type/catalog.rb +3 -0
  35. data/lib/hexapdf/type/cid_font.rb +4 -1
  36. data/lib/hexapdf/type/file_specification.rb +17 -14
  37. data/lib/hexapdf/type/font_descriptor.rb +4 -3
  38. data/lib/hexapdf/type/font_simple.rb +3 -1
  39. data/lib/hexapdf/type/font_true_type.rb +2 -0
  40. data/lib/hexapdf/type/font_type0.rb +1 -1
  41. data/lib/hexapdf/type/font_type1.rb +7 -0
  42. data/lib/hexapdf/type/font_type3.rb +0 -1
  43. data/lib/hexapdf/type/form.rb +5 -2
  44. data/lib/hexapdf/type/graphics_state_parameter.rb +7 -4
  45. data/lib/hexapdf/type/image.rb +8 -4
  46. data/lib/hexapdf/type/info.rb +2 -2
  47. data/lib/hexapdf/type/mark_information.rb +2 -2
  48. data/lib/hexapdf/type/optional_content_configuration.rb +1 -1
  49. data/lib/hexapdf/type/optional_content_membership.rb +1 -1
  50. data/lib/hexapdf/type/page.rb +5 -3
  51. data/lib/hexapdf/type/resources.rb +6 -6
  52. data/lib/hexapdf/type/viewer_preferences.rb +4 -3
  53. data/lib/hexapdf/version.rb +1 -1
  54. data/lib/hexapdf/writer.rb +1 -0
  55. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5-R5.pdf +43 -0
  56. data/test/data/standard-security-handler/nopwd-aes-256bit-V5-R5.pdf +44 -0
  57. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5-R5.pdf +43 -0
  58. data/test/data/standard-security-handler/userpwd-aes-256bit-V5-R5.pdf +0 -0
  59. data/test/hexapdf/common_tokenizer_tests.rb +5 -0
  60. data/test/hexapdf/digital_signature/signing/test_default_handler.rb +6 -0
  61. data/test/hexapdf/digital_signature/test_cms_handler.rb +12 -7
  62. data/test/hexapdf/digital_signature/test_signature.rb +7 -0
  63. data/test/hexapdf/digital_signature/test_signatures.rb +12 -7
  64. data/test/hexapdf/encryption/test_standard_security_handler.rb +5 -2
  65. data/test/hexapdf/font/cmap/test_writer.rb +73 -16
  66. data/test/hexapdf/font/test_true_type_wrapper.rb +17 -3
  67. data/test/hexapdf/layout/test_list_box.rb +7 -7
  68. data/test/hexapdf/layout/test_table_box.rb +52 -0
  69. data/test/hexapdf/layout/test_text_fragment.rb +3 -3
  70. data/test/hexapdf/layout/test_text_layouter.rb +4 -2
  71. data/test/hexapdf/task/test_merge_acro_form.rb +104 -0
  72. data/test/hexapdf/task/test_optimize.rb +2 -0
  73. data/test/hexapdf/test_composer.rb +8 -0
  74. data/test/hexapdf/test_document.rb +12 -3
  75. data/test/hexapdf/test_importer.rb +7 -0
  76. data/test/hexapdf/test_parser.rb +7 -0
  77. data/test/hexapdf/test_writer.rb +19 -5
  78. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +40 -23
  79. data/test/hexapdf/type/acro_form/test_form.rb +7 -8
  80. data/test/hexapdf/type/acro_form/test_signature_field.rb +3 -1
  81. data/test/hexapdf/type/acro_form/test_variable_text_field.rb +14 -1
  82. data/test/hexapdf/type/actions/test_launch.rb +6 -2
  83. data/test/hexapdf/type/annotations/test_widget.rb +4 -0
  84. data/test/hexapdf/type/test_font_type1.rb +5 -0
  85. data/test/hexapdf/type/test_form.rb +1 -1
  86. data/test/hexapdf/type/test_page.rb +7 -1
  87. metadata +8 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f9c1a35d4ad93b48faa1f728bcddc91778da584c8d673e2f15557bd22050a438
4
- data.tar.gz: 2ef8ca70891723643080a402f0808882f66f01a816c422477cc03fad34774859
3
+ metadata.gz: 2889ba1d03e2c351efd694b1583063023fff97c0da636ff5103f88538255735c
4
+ data.tar.gz: 6fb4727db05900e8fccba2ad4e093d1092e17305e5b5616ded97a76cf835673c
5
5
  SHA512:
6
- metadata.gz: 87c109bd8a6711b4df27a40c689a025d816075d6c68c21a1cc22924b5ae827cd44d98e45bff8f43c85403dd82d6a6e54b66c99bfe135298cba33292e9511a1fc
7
- data.tar.gz: e0529e3e244be8366e722ea29ab06a8b922f18698526c79a28d5d11f02da5ee103cba71ecbc6882fefe3de27f02d38f8ce8b505fefbb45839069c74aefaed9ae
6
+ metadata.gz: 00be8ed2c306a88e5bfc0eada97a7e6bf802ec269e832bb21b3521d4077b18ecad11946ddc6f8a6d575820e66339059e59ba2c4cdd2b74d6c7d6defd0f2f5256
7
+ data.tar.gz: 94c6a8178ead2a986921b72b07ef5dc388a5fa6a67945573eec921db30c9940d241d3f47591c8fbbf9bdcf313df0dda536f0fc78e0e946e27dfa3bc13dad9a28
data/CHANGELOG.md CHANGED
@@ -1,3 +1,70 @@
1
+ ## 1.0.0 - 2024-10-26
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Task::MergeAcroForm] for merging AcroForm information for imported
6
+ pages
7
+ * [HexaPDF::Document#write_to_string] and [HexaPDF::Composer#write_to_string]
8
+ for easily writing a document to a String
9
+ * [HexaPDF::Font::CMap::Writer#create_cid_cmap] for creating a character code to
10
+ CID CMap file
11
+
12
+ ### Changed
13
+
14
+ * [HexaPDF::Type::AcroForm::Form] text-like field creation methods to always set
15
+ a default appearance string and the quadding
16
+ * Convenience methods for accessing resources to not add the deprecated /ProcSet
17
+ entry by default
18
+ * [HexaPDF::DigitalSignature::CMSHandler] to add informational output regarding
19
+ the certificate chain on verification
20
+ * Validation of [HexaPDF::Type::FontType1] to ensure correct /Encoding value
21
+
22
+ ### Fixed
23
+
24
+ * [HexaPDF::DigitalSignature::Signature#signed_data] to work for invalid offsets
25
+ * [HexaPDF::DigitalSignature::Signing::DefaultHandler] to update the document's
26
+ version to 2.0 when using PAdES
27
+ * Parsing of invalid `)` character in PDF objects and content streams
28
+ * Handling of files that contain stream length values that are indirect objects
29
+ that do not exist
30
+ * [HexaPDF::Font::TrueTypeWrapper] to correctly handle the situation when
31
+ multiple codepoints refer to the same glyph ID
32
+ * [HexaPDF::Type::Page#contents] to handle null values in /Contents array
33
+
34
+
35
+ ## 0.47.0 - 2024-09-07
36
+
37
+ ### Added
38
+
39
+ * Configuration option 'acro_form.fallback_default_appearance' to allow setting
40
+ a standard default appearance string for a variable text field if none is
41
+ found
42
+ * Support for decrypting files with the proprietary algorithm /R 5
43
+
44
+ ### Changed
45
+
46
+ * [HexaPDF::Task::Optimize] to not remove optional /Type entries containing
47
+ default values
48
+ * Validation of [HexaPDF::Type::AcroForm::Form] to not add a /DA entry
49
+
50
+ ### Fixed
51
+
52
+ * [HexaPDF::Layout::TableBox] to correctly calculcate and distribute row
53
+ heights when row spans are involved
54
+ * [HexaPDF::Type::AcroForm::AppearanceGenerator] to work for files where check
55
+ boxes don't define the name of the on state
56
+ * [HexaPDF::Importer#import] to handle null values in all cases
57
+ * [HexaPDF::Type::AcroForm::VariableTextField] to handle parsing of invalid PDFs
58
+ with symbolic appearance strings
59
+ * [HexaPDF::Type::Annotations::Widget#marker_style] to handle invalid /DA values
60
+ with missing font size or color information
61
+ * [HexaPDF::Type::AcroForm::SignatureField#field_value] to always return a
62
+ correctly wrapped object
63
+ * [HexaPDF::Writer] to remove /Type entry from trailer
64
+ * [HexaPDF::Type::AcroForm::AppearanceGenerator#create_text_appearances] to
65
+ handle invalid appearance streams that are not correct Form XObjects
66
+
67
+
1
68
  ## 0.46.0 - 2024-08-11
2
69
 
3
70
  ### Added
@@ -20,7 +87,7 @@
20
87
  signatures
21
88
  * [HexaPDF::DigitalSignature::CMSHandler#signing_time] to use time from an
22
89
  embedded timestamp authority signature if possible
23
- * [HexaPDF::Layout::Box#fit] to return success for boxes with content
90
+ * HexaPDF::Layout::Box#fit to return success for boxes with content
24
91
  width/height of zero
25
92
  * [HexaPDF::Importer::copy] to optionally allow copying the catalog and page
26
93
  tree nodes
@@ -28,7 +95,7 @@
28
95
  ### Fixed
29
96
 
30
97
  * Setting of correct x-position in fit result for boxes with flow positioning
31
- * [HexaPDF::Layout::ListBox#fit] to respect the set height
98
+ * HexaPDF::Layout::ListBox#fit to respect the set height
32
99
  * CLI command `hexapdf inspect` to work in case of missing Unicde mappings
33
100
  * [HexaPDF::Type::AcroForm::Form#delete_field] to correctly work for fields with
34
101
  an embedded widget
@@ -47,7 +114,7 @@
47
114
 
48
115
  ### Changed
49
116
 
50
- * [HexaPDF::Layout::Box#fit] to set width/height correctly for boxes with
117
+ * HexaPDF::Layout::Box#fit to set width/height correctly for boxes with
51
118
  position `:flow`
52
119
 
53
120
  ### Fixed
@@ -93,9 +160,9 @@
93
160
 
94
161
  ### Fixed
95
162
 
96
- * [HexaPDF::Layout::TextBox#fit] to correctly calculate width in case of flowing
163
+ * HexaPDF::Layout::TextBox#fit to correctly calculate width in case of flowing
97
164
  text around other boxes
98
- * [HexaPDF::Layout::TextBox#draw] to correctly draw border, background... on
165
+ * HexaPDF::Layout::TextBox#draw to correctly draw border, background... on
99
166
  boxes using position 'flow'
100
167
  * Comparison of Hash with [HexaPDF::Dictionary] objects by implementing
101
168
  `#to_hash`
@@ -149,7 +216,7 @@
149
216
  JavaScript action that formats the field's value
150
217
  * [HexaPDF::Type::AcroForm::TextField#set_calculate_action] for setting a
151
218
  JavaScript action that calculates the field's value
152
- * [HexaPDF::Type::AcroForm#recalculate_fields] for recalculating fields
219
+ * [HexaPDF::Type::AcroForm::Form#recalculate_fields] for recalculating fields
153
220
 
154
221
  ### Changed
155
222
 
@@ -206,7 +273,7 @@
206
273
 
207
274
  ### Changed
208
275
 
209
- * [HexaPDF::Layout::Frame::FitResult#draw] to provide better optional content
276
+ * HexaPDF::Layout::Frame::FitResult#draw to provide better optional content
210
277
  group names
211
278
 
212
279
  ### Fixed
@@ -285,8 +352,8 @@
285
352
 
286
353
  ### Changed
287
354
 
288
- * [HexaPDF::Layout::Frame::FitResult#draw] to allow drawing at an offset
289
- * [HexaPDF::Layout::Box#fit] to delegate the actual content fitting to the
355
+ * HexaPDF::Layout::Frame::FitResult#draw to allow drawing at an offset
356
+ * HexaPDF::Layout::Box#fit to delegate the actual content fitting to the
290
357
  `#fit_content` method
291
358
  * [HexaPDF::Document::Layout#box] to allow using the block as drawing block for
292
359
  the base box class
@@ -363,8 +430,8 @@
363
430
 
364
431
  ### Fixed
365
432
 
366
- * [HexaPDF::Layout::ColumnBox#fit] to correctly take initial height into account
367
- * [HexaPDF::Layout::ColumnBox#fit] to ensure correct results in case the
433
+ * HexaPDF::Layout::ColumnBox#fit to correctly take initial height into account
434
+ * HexaPDF::Layout::ColumnBox#fit to ensure correct results in case the
368
435
  requested dimensions are larger than the current region
369
436
  * [HexaPDF::Document::Layout#formatted_text_box] to correctly handle properties
370
437
  * [HexaPDF::Layout::Frame#fit] to raise an error if an invalid value for the
@@ -410,7 +477,7 @@
410
477
  context argument (a page or Form XObject instance)
411
478
  * [HexaPDF::Layout::ListBox] to use its 'fill_color' style property for the item
412
479
  marker color
413
- * [HexaPDF::Layout::Frame::FitResult#draw] to use optional content groups for
480
+ * HexaPDF::Layout::Frame::FitResult#draw to use optional content groups for
414
481
  debug output
415
482
 
416
483
  ### Fixed
@@ -419,7 +486,7 @@
419
486
  default range starting at page 1
420
487
  * [HexaPDF::Type::Page#flatten_annotations] to correctly handle scaled
421
488
  appearances
422
- * Using an unknown style name in [HexaPDF:Document::Layout] method by providing
489
+ * Using an unknown style name in [HexaPDF::Document::Layout] method by providing
423
490
  a useful error message
424
491
  * [HexaPDF::Layout::Box::new] to ensure that the properties attribute is always
425
492
  a hash
@@ -480,7 +547,7 @@
480
547
  final box positions into account
481
548
  * [HexaPDF::Content::Canvas#text] to set the leading only when multiple lines
482
549
  are drawn
483
- * [HexaPDF::Layout::TextBox#split] to use float comparison
550
+ * HexaPDF::Layout::TextBox#split to use float comparison
484
551
  * Validation of standard encryption dictionary to auto-correct invalid /U and /O
485
552
  fields in case they are padded with zeros
486
553
  * [HexaPDF::Document#wrap] handling of sub-type mapping in case of missing type
@@ -897,7 +964,7 @@
897
964
  * [HexaPDF::Layout::WidthFromPolygon] to work correctly in case of very small
898
965
  floating point errors
899
966
  * HexaPDF::Layout::TextFragment#inspect to work in case of interspersed numbers
900
- * [HexaPDF::Layout::TextBox#split] to work for position :flow when box is wider
967
+ * HexaPDF::Layout::TextBox#split to work for position :flow when box is wider
901
968
  than the initial available width
902
969
  * [HexaPDF::Layout::Frame#fit] to create minimally sized mask rectangles
903
970
  * [HexaPDF::Content::GraphicObject::Geom2D] to close the path when drawing
@@ -1833,7 +1900,7 @@
1833
1900
  objects
1834
1901
  * [HexaPDF::Revision#each_modified_object] for iterating over all modified
1835
1902
  objects of a revision
1836
- * [HexaPDF::Layout::Box#split] and [HexaPDF::Layout::TextBox#split] for
1903
+ * [HexaPDF::Layout::Box#split] and HexaPDF::Layout::TextBox#split for
1837
1904
  splitting a box into two parts
1838
1905
  * [HexaPDF::Layout::Frame#full?] for testing whether the frame has any space
1839
1906
  left
@@ -231,6 +231,13 @@ module HexaPDF
231
231
  @document.write(output, optimize: optimize, **options)
232
232
  end
233
233
 
234
+ # Writes the created PDF document to a string and returns that string.
235
+ #
236
+ # See HexaPDF::Document#write for details.
237
+ def write_to_string(optimize: true, **options)
238
+ @document.write_to_string(optimize: optimize, **options)
239
+ end
240
+
234
241
  # :call-seq:
235
242
  # composer.style(name) -> style
236
243
  # composer.style(name, base: :base, **properties) -> style
@@ -182,6 +182,16 @@ module HexaPDF
182
182
  # acro_form.default_font_size::
183
183
  # A number specifying the default font size of AcroForm text fields which should be auto-sized.
184
184
  #
185
+ # acro_form.fallback_default_appearance::
186
+ # A hash containging arguments for
187
+ # HexaPDF::Type::AcroForm::VariableTextField#set_defaut_appearance_string which is used as
188
+ # fallback for fields without a default appearance.
189
+ #
190
+ # If this value is set to +nil+, an error is raised in case a variable text field cannot
191
+ # resolve a default appearance string.
192
+ #
193
+ # The default is the empty hash meaning the defaults from the method are used.
194
+ #
185
195
  # acro_form.fallback_font::
186
196
  # The font that should be used when a variable text field references a font that cannot be used.
187
197
  #
@@ -485,6 +495,7 @@ module HexaPDF
485
495
  Configuration.new('acro_form.appearance_generator' => 'HexaPDF::Type::AcroForm::AppearanceGenerator',
486
496
  'acro_form.create_appearances' => true,
487
497
  'acro_form.default_font_size' => 10,
498
+ 'acro_form.fallback_default_appearance' => {},
488
499
  'acro_form.fallback_font' => 'Helvetica',
489
500
  'acro_form.on_invalid_value' => proc do |field, value|
490
501
  raise HexaPDF::Error, "Invalid value #{value.inspect} for " \
@@ -587,6 +598,7 @@ module HexaPDF
587
598
  optimize: 'HexaPDF::Task::Optimize',
588
599
  dereference: 'HexaPDF::Task::Dereference',
589
600
  pdfa: 'HexaPDF::Task::PDFA',
601
+ merge_acro_form: 'HexaPDF::Task::MergeAcroForm',
590
602
  })
591
603
 
592
604
  # The global configuration object, providing the following options:
@@ -709,6 +721,7 @@ module HexaPDF
709
721
  Metadata: 'HexaPDF::Type::Metadata',
710
722
  OutputIntent: 'HexaPDF::Type::OutputIntent',
711
723
  XXDestOutputProfileRef: 'HexaPDF::Type::OutputIntent::DestOutputProfileRef',
724
+ ExData: 'HexaPDF::Type::Annotations::MarkupAnnotation::ExData',
712
725
  },
713
726
  'object.subtype_map' => {
714
727
  nil => {
@@ -112,7 +112,9 @@ module HexaPDF
112
112
  elsif byte == 93 # ]
113
113
  @ss.pos += 1
114
114
  TOKEN_ARRAY_END
115
- elsif byte == 123 || byte == 125 # { }
115
+ elsif byte == 41 # )
116
+ raise HexaPDF::MalformedPDFError.new("Delimiter ')' found at invalid position", pos: pos)
117
+ elsif byte == 123 || byte == 125 # { } )
116
118
  Token.new(@ss.get_byte)
117
119
  elsif byte == 37 # %
118
120
  unless @ss.skip_until(/(?=[\r\n])/)
@@ -155,6 +155,19 @@ module HexaPDF
155
155
  result.log(:error, "Signature verification failed")
156
156
  end
157
157
 
158
+ certs = [signer_certificate]
159
+ cur_cert = certs.first
160
+ while true
161
+ cur_cert = certificate_chain.find {|cert| cert.subject == cur_cert.issuer }
162
+ if cur_cert && !certs.include?(cur_cert)
163
+ certs << cur_cert
164
+ else
165
+ break
166
+ end
167
+ end
168
+ cert_subjects = certs.map {|cert| cert.subject.to_a.assoc("CN")&.[](1) }
169
+ result.log(:info, "Certificate chain: #{cert_subjects.join(" -> ")}")
170
+
158
171
  result
159
172
  end
160
173
 
@@ -211,7 +211,7 @@ module HexaPDF
211
211
  data = ''.b
212
212
  self[:ByteRange]&.each_slice(2) do |offset, length|
213
213
  io.pos = offset
214
- data << io.read(length)
214
+ data << io.read(length).to_s
215
215
  end
216
216
  data
217
217
  end
@@ -289,6 +289,7 @@ module HexaPDF
289
289
  signature[:Location] = location if location
290
290
  signature[:ContactInfo] = contact_info if contact_info
291
291
  signature[:Prop_Build] = {App: {Name: :HexaPDF, REx: HexaPDF::VERSION}}
292
+ signature.document.version = '2.0' if signature_type == :pades
292
293
 
293
294
  if doc_mdp_permissions
294
295
  doc = signature.document
@@ -724,10 +724,12 @@ module HexaPDF
724
724
  end
725
725
 
726
726
  # :call-seq:
727
- # doc.write(filename, incremental: false, validate: true, update_fields: true, optimize: false)
728
- # doc.write(io, incremental: false, validate: true, update_fields: true, optimize: false)
727
+ # doc.write(filename, incremental: false, validate: true, update_fields: true, optimize: false) -> [start_xref, section]
728
+ # doc.write(io, incremental: false, validate: true, update_fields: true, optimize: false) -> [start_xref, section]
729
729
  #
730
- # Writes the document to the given file (in case +io+ is a String) or IO stream.
730
+ # Writes the document to the given file (in case +io+ is a String) or IO stream. Returns the
731
+ # file position of the start of the last cross-reference section and the last XRefSection object
732
+ # written.
731
733
  #
732
734
  # Before the document is written, it is validated using #validate and an error is raised if the
733
735
  # document is not valid. However, this step can be skipped if needed.
@@ -784,6 +786,15 @@ module HexaPDF
784
786
  end
785
787
  end
786
788
 
789
+ # Writes the document to a string and returns the string.
790
+ #
791
+ # See #write for further information and details on the available arguments.
792
+ def write_to_string(**args)
793
+ io = StringIO.new(''.b)
794
+ write(io)
795
+ io.string
796
+ end
797
+
787
798
  def inspect #:nodoc:
788
799
  "<#{self.class.name}:#{object_id}>"
789
800
  end
@@ -106,6 +106,10 @@ module HexaPDF
106
106
  # password is supplied. To open such an encrypted PDF file, the +decryption_opts+ provided to
107
107
  # HexaPDF::Document.new needs to contain a :password key with the password.
108
108
  #
109
+ # **Note**: While HexaPDF supports reading files encrypted with revision 5, it doesn't support
110
+ # writing such files. This is no problem in practice since revision 5 was an inofficial Adobe
111
+ # extension to PDF 1.7 and revision 6 specified in PDF 2.0 is practically the same.
112
+ #
109
113
  # See: PDF2.0 s7.6.4
110
114
  class StandardSecurityHandler < SecurityHandler
111
115
 
@@ -340,13 +344,13 @@ module HexaPDF
340
344
  # Uses the given password (or the default password if none given) to retrieve the encryption
341
345
  # key.
342
346
  #
343
- # If the optional +check_permissions+ argument is +true+, the permissions for files
344
- # encrypted with revision 6 are checked. Otherwise, permission changes are ignored.
347
+ # If the optional +check_permissions+ argument is +true+, the permissions for files encrypted
348
+ # with revision 5 or 6 are checked. Otherwise, permission changes are ignored.
345
349
  def prepare_decryption(password: '', check_permissions: true)
346
350
  if dict[:Filter] != :Standard
347
351
  raise(HexaPDF::UnsupportedEncryptionError,
348
352
  "Invalid /Filter value #{dict[:Filter]} for standard security handler")
349
- elsif ![2, 3, 4, 6].include?(dict[:R])
353
+ elsif ![2, 3, 4, 5, 6].include?(dict[:R])
350
354
  raise(HexaPDF::UnsupportedEncryptionError,
351
355
  "Invalid /R value #{dict[:R]} for standard security handler")
352
356
  elsif dict[:R] <= 4 && !document.trailer[:ID].kind_of?(PDFArray)
@@ -369,7 +373,7 @@ module HexaPDF
369
373
  raise HexaPDF::EncryptionError, "Invalid password specified"
370
374
  end
371
375
 
372
- check_perms_field(encryption_key) if check_permissions && dict[:R] == 6
376
+ check_perms_field(encryption_key) if check_permissions && dict[:R] >= 5
373
377
 
374
378
  encryption_key
375
379
  end
@@ -396,8 +400,8 @@ module HexaPDF
396
400
  # For revisions <= 4 this is the *only* way for generating the encryption key needed to
397
401
  # encrypt or decrypt a file.
398
402
  #
399
- # For revision 6 the file encryption key is a string of random bytes that has been encrypted
400
- # with the user password. If the password is the owner password,
403
+ # For revision 5 and 6 the file encryption key is a string of random bytes that has been
404
+ # encrypted with the user password. If the password is the owner password,
401
405
  # #compute_owner_encryption_key has to be used instead.
402
406
  #
403
407
  # See: PDF2.0 s7.6.4.3.2 (algorithm 2), PDF2.0 s7.6.4.3.3 (algorithm 2.A (a)-(b),(e))
@@ -416,7 +420,7 @@ module HexaPDF
416
420
  end
417
421
 
418
422
  data[0, n]
419
- elsif dict[:R] == 6
423
+ elsif dict[:R] <= 6
420
424
  key = compute_hash(password, dict[:U][40, 8])
421
425
  aes_algorithm.new(key, "\0" * 16, :decrypt).process(dict[:UE])
422
426
  end
@@ -427,15 +431,15 @@ module HexaPDF
427
431
  # For revisions <= 4 this is done by first retrieving the user password through the use of
428
432
  # the owner password and then using the #compute_user_encryption_key method.
429
433
  #
430
- # For revision 6 the file encryption key is a string of random bytes that has been encrypted
431
- # with the owner password. If the password is the user password, #compute_user_encryption_key
432
- # has to be used.
434
+ # For revisions 5 and 6 the file encryption key is a string of random bytes that has been
435
+ # encrypted with the owner password. If the password is the user password,
436
+ # #compute_user_encryption_key has to be used.
433
437
  #
434
438
  # See: PDF2.0 s7.6.4.3.2 (algorithm 2.A (a)-(d))
435
439
  def compute_owner_encryption_key(password)
436
440
  if dict[:R] <= 4
437
441
  compute_user_encryption_key(user_password_from_owner_password(password))
438
- elsif dict[:R] == 6
442
+ elsif dict[:R] <= 6
439
443
  key = compute_hash(password, dict[:O][40, 8], dict[:U])
440
444
  aes_algorithm.new(key, "\0" * 16, :decrypt).process(dict[:OE])
441
445
  end
@@ -447,7 +451,7 @@ module HexaPDF
447
451
  # the owner password. For revision 6 the /O value is a hash computed from the password and
448
452
  # the /U value with added validation and key salts.
449
453
  #
450
- # *Attention*: If revision 6 is used, the /U value has to be computed and set before this
454
+ # *Attention*: If revision 5 or 6 is used, the /U value has to be computed and set before this
451
455
  # method is used, otherwise the return value is incorrect!
452
456
  #
453
457
  # See: PDF2.0 s7.6.4.4.2 (algorithm 3), PDF2.0 s7.6.4.4.8 (algorithm 9 (a))
@@ -465,14 +469,14 @@ module HexaPDF
465
469
  end
466
470
 
467
471
  data
468
- elsif dict[:R] == 6
472
+ elsif dict[:R] <= 6
469
473
  validation_salt = random_bytes(8)
470
474
  key_salt = random_bytes(8)
471
475
  compute_hash(owner_password, validation_salt, dict[:U]) << validation_salt << key_salt
472
476
  end
473
477
  end
474
478
 
475
- # Computes the encryption dictionary's /OE (owner encryption key) value (for revision 6
479
+ # Computes the encryption dictionary's /OE (owner encryption key) value (for revisions 5 and 6
476
480
  # only).
477
481
  #
478
482
  # Short explanation: Encrypts the file encryption key with a key based on the password and
@@ -487,7 +491,7 @@ module HexaPDF
487
491
  # Computes the encryption dictionary's /U (user password) value.
488
492
  #
489
493
  # Short explanation: For revisions <= 4, the password padding string is encrypted with a key
490
- # based on the user password. For revision 6 the /U value is a hash computed from the
494
+ # based on the user password. For revisions 5 and 6 the /U value is a hash computed from the
491
495
  # password with added validation and key salts.
492
496
  #
493
497
  # See: PDF2.0 s7.6.4.4.3 (algorithm 4 for R=2), PDF s7.6.4.4.4 (algorithm 5 for R=3 and R=4)
@@ -502,14 +506,14 @@ module HexaPDF
502
506
  data = arc4_algorithm.encrypt(key, data)
503
507
  19.times {|i| data = arc4_algorithm.encrypt(xor_key(key, i + 1), data) }
504
508
  data << "hexapdfhexapdfhe"
505
- elsif dict[:R] == 6
509
+ elsif dict[:R] <= 6
506
510
  validation_salt = random_bytes(8)
507
511
  key_salt = random_bytes(8)
508
512
  compute_hash(password, validation_salt) << validation_salt << key_salt
509
513
  end
510
514
  end
511
515
 
512
- # Computes the encryption dictionary's /UE (user encryption key) value (for revision 6
516
+ # Computes the encryption dictionary's /UE (user encryption key) value (for revision 5 and 6
513
517
  # only).
514
518
  #
515
519
  # Short explanation: Encrypts the file encryption key with a key based on the password and
@@ -521,7 +525,8 @@ module HexaPDF
521
525
  aes_algorithm.new(key, "\0" * 16, :encrypt).process(file_encryption_key)
522
526
  end
523
527
 
524
- # Computes the encryption dictionary's /Perms (permissions) value (for revision 6 only).
528
+ # Computes the encryption dictionary's /Perms (permissions) value (for revisions 5 and 6
529
+ # only).
525
530
  #
526
531
  # Uses /P and /EncryptMetadata values, so these have to be set beforehand.
527
532
  #
@@ -543,7 +548,7 @@ module HexaPDF
543
548
  compute_u_field(password) == dict[:U]
544
549
  elsif dict[:R] <= 4
545
550
  compute_u_field(password)[0, 16] == dict[:U][0, 16]
546
- elsif dict[:R] == 6
551
+ elsif dict[:R] <= 6
547
552
  compute_hash(password, dict[:U][32, 8]) == dict[:U][0, 32]
548
553
  end
549
554
  end
@@ -554,14 +559,14 @@ module HexaPDF
554
559
  def owner_password_valid?(password)
555
560
  if dict[:R] <= 4
556
561
  user_password_valid?(user_password_from_owner_password(password))
557
- elsif dict[:R] == 6
562
+ elsif dict[:R] <= 6
558
563
  compute_hash(password, dict[:O][32, 8], dict[:U]) == dict[:O][0, 32]
559
564
  end
560
565
  end
561
566
 
562
567
  # Checks if the decrypted /Perms entry matches the /P and /EncryptMetadata entries.
563
568
  #
564
- # This method can only be used for revision 6.
569
+ # This method can only be used for revisions 5 and 6.
565
570
  #
566
571
  # See: PDF2.0 s7.6.4.4.12 (algorithm 13)
567
572
  def check_perms_field(encryption_key)
@@ -596,17 +601,18 @@ module HexaPDF
596
601
  end
597
602
 
598
603
  # Computes a hash that is used extensively for all operations in security handlers of
599
- # revision 6.
604
+ # revision 5 and 6.
600
605
  #
601
606
  # Note: The original input (as defined by the spec) is calculated as
602
607
  # "#{password}#{salt}#{user_key}" where +user_key+ has to be empty when doing operations
603
608
  # with the user password.
604
609
  #
605
- # See: PDF2.0 s7.6.4.3.4 (algorithm 2.B)
610
+ # See: PDF2.0 s7.6.4.3.4 (algorithm 2.B) and ADB Extension Level 3 s3.5.2
606
611
  def compute_hash(password, salt, user_key = '')
607
612
  k = Digest::SHA256.digest("#{password}#{salt}#{user_key}")
608
- e = ''
613
+ return k if dict[:R] == 5
609
614
 
615
+ e = ''
610
616
  i = 0
611
617
  while i < 64 || e.getbyte(-1) > i - 32
612
618
  k1 = "#{password}#{k}#{user_key}" * 64
@@ -627,7 +633,7 @@ module HexaPDF
627
633
  # * For revisions <= 4, the password is converted into ISO-8859-1 encoding, padded with
628
634
  # PASSWORD_PADDING and truncated to a maximum of 32 bytes.
629
635
  #
630
- # * For revision 6 the password is converted into UTF-8 encoding that is normalized
636
+ # * For revision 5 and 6 the password is converted into UTF-8 encoding that is normalized
631
637
  # according to the PDF2.0 specification.
632
638
  #
633
639
  # See: PDF2.0 s7.6.4.3.2 (algorithm 2 step a)),
@@ -636,7 +642,7 @@ module HexaPDF
636
642
  if dict[:R] <= 4
637
643
  password.to_s[0, 32].encode(Encoding::ISO_8859_1).force_encoding(Encoding::BINARY).
638
644
  ljust(32, PASSWORD_PADDING)
639
- elsif dict[:R] == 6
645
+ elsif dict[:R] <= 6
640
646
  password.to_s.encode(Encoding::UTF_8).force_encoding(Encoding::BINARY)[0, 127]
641
647
  end
642
648
  rescue Encoding::UndefinedConversionError => e
@@ -40,9 +40,7 @@ module HexaPDF
40
40
  module Font
41
41
  class CMap
42
42
 
43
- # Creates a CMap file.
44
- #
45
- # Currently only ToUnicode CMaps are supported.
43
+ # Creates a CMap file, either a ToUnicode CMap or a CID CMap.
46
44
  class Writer
47
45
 
48
46
  # Maximum number of entries in one section.
@@ -74,6 +72,28 @@ module HexaPDF
74
72
  to_unicode_template % result.chop!
75
73
  end
76
74
 
75
+ # Returns a CID CMap for the given input code to CID mapping which needs to be sorted by
76
+ # input codes.
77
+ #
78
+ # Note that the returned CMap always uses a 16-bit input code space!
79
+ def create_cid_cmap(mapping)
80
+ return cid_template % '' if mapping.empty?
81
+
82
+ chars, ranges = compute_section_entries(mapping)
83
+
84
+ result = create_sections("cidchar", chars.size / 2) do |index|
85
+ index *= 2
86
+ sprintf("<%04X>", chars[index]) << " #{chars[index + 1]}\n"
87
+ end
88
+
89
+ result << create_sections("cidrange", ranges.size / 3) do |index|
90
+ index *= 3
91
+ sprintf("<%04X><%04X>", ranges[index], ranges[index + 1]) << " #{ranges[index + 2]}\n"
92
+ end
93
+
94
+ cid_template % result.chop!
95
+ end
96
+
77
97
  private
78
98
 
79
99
  # Computes the entries for the "char" and "range" sections based on the given mapping.
@@ -146,7 +166,7 @@ module HexaPDF
146
166
  result
147
167
  end
148
168
 
149
- # Returns the CMap file template for a ToUnicode CMap.
169
+ # Returns the template for a ToUnicode CMap.
150
170
  def to_unicode_template
151
171
  <<~TEMPLATE
152
172
  /CIDInit /ProcSet findresource begin
@@ -170,6 +190,40 @@ module HexaPDF
170
190
  TEMPLATE
171
191
  end
172
192
 
193
+ # Returns the template for a CID CMap.
194
+ def cid_template
195
+ <<~TEMPLATE
196
+ %%!PS-Adobe-3.0 Resource-CMap
197
+ %%%%DocumentNeededResources: ProcSet (CIDInit)
198
+ %%%%IncludeResource: ProcSet (CIDInit)
199
+ %%%%BeginResource: CMap (Custom)
200
+ %%%%Title: (Custom Adobe Identity 0)
201
+ %%%%Version: 1
202
+ /CIDInit /ProcSet findresource begin
203
+ 12 dict begin
204
+ begincmap
205
+ /CIDSystemInfo 3 dict dup begin
206
+ /Registry (Adobe) def
207
+ /Ordering (Identity) def
208
+ /Supplement 0 def
209
+ end def
210
+ /CMapName /Custom def
211
+ /CMapType 1 def
212
+ /CMapVersion 1 def
213
+ /WMode 0 def
214
+ 1 begincodespacerange
215
+ <0000> <FFFF>
216
+ endcodespacerange
217
+ %s
218
+ endcmap
219
+ CMapName currentdict /CMap defineresource pop
220
+ end
221
+ end
222
+ %%%%EndResource
223
+ %%%%EOF
224
+ TEMPLATE
225
+ end
226
+
173
227
  end
174
228
 
175
229
  end
@@ -85,6 +85,13 @@ module HexaPDF
85
85
  Writer.new.create_to_unicode_cmap(mapping)
86
86
  end
87
87
 
88
+ # Returns a string containing a CID CMap that represents the given code to CID mapping.
89
+ #
90
+ # See: Writer#create_cid_cmap
91
+ def self.create_cid_cmap(mapping)
92
+ Writer.new.create_cid_cmap(mapping)
93
+ end
94
+
88
95
  # The registry part of the CMap version.
89
96
  attr_accessor :registry
90
97