hexapdf 0.47.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -16
  3. data/lib/hexapdf/composer.rb +7 -0
  4. data/lib/hexapdf/configuration.rb +2 -0
  5. data/lib/hexapdf/content/parser.rb +3 -1
  6. data/lib/hexapdf/digital_signature/cms_handler.rb +13 -0
  7. data/lib/hexapdf/digital_signature/signature.rb +1 -1
  8. data/lib/hexapdf/digital_signature/signing/default_handler.rb +1 -0
  9. data/lib/hexapdf/document.rb +14 -3
  10. data/lib/hexapdf/font/cmap/writer.rb +58 -4
  11. data/lib/hexapdf/font/cmap.rb +7 -0
  12. data/lib/hexapdf/font/true_type_wrapper.rb +41 -16
  13. data/lib/hexapdf/layout/text_fragment.rb +2 -1
  14. data/lib/hexapdf/object.rb +1 -1
  15. data/lib/hexapdf/parser.rb +1 -1
  16. data/lib/hexapdf/reference.rb +1 -1
  17. data/lib/hexapdf/task/merge_acro_form.rb +164 -0
  18. data/lib/hexapdf/task.rb +1 -0
  19. data/lib/hexapdf/tokenizer.rb +2 -0
  20. data/lib/hexapdf/type/acro_form/form.rb +14 -27
  21. data/lib/hexapdf/type/acro_form/signature_field.rb +16 -6
  22. data/lib/hexapdf/type/acro_form/variable_text_field.rb +1 -1
  23. data/lib/hexapdf/type/actions/go_to.rb +1 -0
  24. data/lib/hexapdf/type/actions/go_to_r.rb +1 -0
  25. data/lib/hexapdf/type/actions/launch.rb +5 -1
  26. data/lib/hexapdf/type/annotation.rb +6 -1
  27. data/lib/hexapdf/type/annotations/markup_annotation.rb +14 -1
  28. data/lib/hexapdf/type/catalog.rb +3 -0
  29. data/lib/hexapdf/type/cid_font.rb +4 -1
  30. data/lib/hexapdf/type/file_specification.rb +17 -14
  31. data/lib/hexapdf/type/font_descriptor.rb +4 -3
  32. data/lib/hexapdf/type/font_simple.rb +3 -1
  33. data/lib/hexapdf/type/font_true_type.rb +2 -0
  34. data/lib/hexapdf/type/font_type0.rb +1 -1
  35. data/lib/hexapdf/type/font_type1.rb +7 -0
  36. data/lib/hexapdf/type/font_type3.rb +0 -1
  37. data/lib/hexapdf/type/form.rb +5 -2
  38. data/lib/hexapdf/type/graphics_state_parameter.rb +7 -4
  39. data/lib/hexapdf/type/image.rb +8 -4
  40. data/lib/hexapdf/type/info.rb +2 -2
  41. data/lib/hexapdf/type/mark_information.rb +2 -2
  42. data/lib/hexapdf/type/optional_content_configuration.rb +1 -1
  43. data/lib/hexapdf/type/optional_content_membership.rb +1 -1
  44. data/lib/hexapdf/type/page.rb +5 -3
  45. data/lib/hexapdf/type/resources.rb +6 -6
  46. data/lib/hexapdf/type/viewer_preferences.rb +4 -3
  47. data/lib/hexapdf/version.rb +1 -1
  48. data/test/hexapdf/common_tokenizer_tests.rb +5 -0
  49. data/test/hexapdf/digital_signature/signing/test_default_handler.rb +6 -0
  50. data/test/hexapdf/digital_signature/test_cms_handler.rb +12 -7
  51. data/test/hexapdf/digital_signature/test_signature.rb +7 -0
  52. data/test/hexapdf/digital_signature/test_signatures.rb +8 -3
  53. data/test/hexapdf/font/cmap/test_writer.rb +73 -16
  54. data/test/hexapdf/font/test_true_type_wrapper.rb +17 -3
  55. data/test/hexapdf/layout/test_list_box.rb +7 -7
  56. data/test/hexapdf/layout/test_text_fragment.rb +3 -3
  57. data/test/hexapdf/layout/test_text_layouter.rb +4 -2
  58. data/test/hexapdf/task/test_merge_acro_form.rb +104 -0
  59. data/test/hexapdf/test_composer.rb +8 -0
  60. data/test/hexapdf/test_document.rb +9 -0
  61. data/test/hexapdf/test_parser.rb +7 -0
  62. data/test/hexapdf/test_writer.rb +8 -3
  63. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +18 -18
  64. data/test/hexapdf/type/acro_form/test_form.rb +7 -3
  65. data/test/hexapdf/type/actions/test_launch.rb +6 -2
  66. data/test/hexapdf/type/test_font_type1.rb +5 -0
  67. data/test/hexapdf/type/test_form.rb +1 -1
  68. data/test/hexapdf/type/test_page.rb +7 -1
  69. metadata +4 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 15f4c7590b5f2ce321519ac0b4871971c12073a9d4b0df36ab2f2e3c156f09ab
4
- data.tar.gz: 76dac6196e06e80fd88dade3f831b7744c2b763f004d3c389d3efebcace3be82
3
+ metadata.gz: 2889ba1d03e2c351efd694b1583063023fff97c0da636ff5103f88538255735c
4
+ data.tar.gz: 6fb4727db05900e8fccba2ad4e093d1092e17305e5b5616ded97a76cf835673c
5
5
  SHA512:
6
- metadata.gz: 8ea19ed17370cad1a1fa48a7e0cb85c16e3ff62b344d2a9a90c42d2ac98d5ce5f75c356facf6bb8276822db460e2d9f912f523783c82b3539200e33f9de9f383
7
- data.tar.gz: bdf2d93feade9f0654366bbf89711b670cbd03ee88b81f197f756c2032305f9ee7b9265973f31c5a4e18e7afd3e4f9dfd08b46a0483502b896774494294c111a
6
+ metadata.gz: 00be8ed2c306a88e5bfc0eada97a7e6bf802ec269e832bb21b3521d4077b18ecad11946ddc6f8a6d575820e66339059e59ba2c4cdd2b74d6c7d6defd0f2f5256
7
+ data.tar.gz: 94c6a8178ead2a986921b72b07ef5dc388a5fa6a67945573eec921db30c9940d241d3f47591c8fbbf9bdcf313df0dda536f0fc78e0e946e27dfa3bc13dad9a28
data/CHANGELOG.md CHANGED
@@ -1,3 +1,37 @@
1
+ ## 1.0.0 - 2024-10-26
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Task::MergeAcroForm] for merging AcroForm information for imported
6
+ pages
7
+ * [HexaPDF::Document#write_to_string] and [HexaPDF::Composer#write_to_string]
8
+ for easily writing a document to a String
9
+ * [HexaPDF::Font::CMap::Writer#create_cid_cmap] for creating a character code to
10
+ CID CMap file
11
+
12
+ ### Changed
13
+
14
+ * [HexaPDF::Type::AcroForm::Form] text-like field creation methods to always set
15
+ a default appearance string and the quadding
16
+ * Convenience methods for accessing resources to not add the deprecated /ProcSet
17
+ entry by default
18
+ * [HexaPDF::DigitalSignature::CMSHandler] to add informational output regarding
19
+ the certificate chain on verification
20
+ * Validation of [HexaPDF::Type::FontType1] to ensure correct /Encoding value
21
+
22
+ ### Fixed
23
+
24
+ * [HexaPDF::DigitalSignature::Signature#signed_data] to work for invalid offsets
25
+ * [HexaPDF::DigitalSignature::Signing::DefaultHandler] to update the document's
26
+ version to 2.0 when using PAdES
27
+ * Parsing of invalid `)` character in PDF objects and content streams
28
+ * Handling of files that contain stream length values that are indirect objects
29
+ that do not exist
30
+ * [HexaPDF::Font::TrueTypeWrapper] to correctly handle the situation when
31
+ multiple codepoints refer to the same glyph ID
32
+ * [HexaPDF::Type::Page#contents] to handle null values in /Contents array
33
+
34
+
1
35
  ## 0.47.0 - 2024-09-07
2
36
 
3
37
  ### Added
@@ -53,7 +87,7 @@
53
87
  signatures
54
88
  * [HexaPDF::DigitalSignature::CMSHandler#signing_time] to use time from an
55
89
  embedded timestamp authority signature if possible
56
- * [HexaPDF::Layout::Box#fit] to return success for boxes with content
90
+ * HexaPDF::Layout::Box#fit to return success for boxes with content
57
91
  width/height of zero
58
92
  * [HexaPDF::Importer::copy] to optionally allow copying the catalog and page
59
93
  tree nodes
@@ -61,7 +95,7 @@
61
95
  ### Fixed
62
96
 
63
97
  * Setting of correct x-position in fit result for boxes with flow positioning
64
- * [HexaPDF::Layout::ListBox#fit] to respect the set height
98
+ * HexaPDF::Layout::ListBox#fit to respect the set height
65
99
  * CLI command `hexapdf inspect` to work in case of missing Unicde mappings
66
100
  * [HexaPDF::Type::AcroForm::Form#delete_field] to correctly work for fields with
67
101
  an embedded widget
@@ -80,7 +114,7 @@
80
114
 
81
115
  ### Changed
82
116
 
83
- * [HexaPDF::Layout::Box#fit] to set width/height correctly for boxes with
117
+ * HexaPDF::Layout::Box#fit to set width/height correctly for boxes with
84
118
  position `:flow`
85
119
 
86
120
  ### Fixed
@@ -126,9 +160,9 @@
126
160
 
127
161
  ### Fixed
128
162
 
129
- * [HexaPDF::Layout::TextBox#fit] to correctly calculate width in case of flowing
163
+ * HexaPDF::Layout::TextBox#fit to correctly calculate width in case of flowing
130
164
  text around other boxes
131
- * [HexaPDF::Layout::TextBox#draw] to correctly draw border, background... on
165
+ * HexaPDF::Layout::TextBox#draw to correctly draw border, background... on
132
166
  boxes using position 'flow'
133
167
  * Comparison of Hash with [HexaPDF::Dictionary] objects by implementing
134
168
  `#to_hash`
@@ -182,7 +216,7 @@
182
216
  JavaScript action that formats the field's value
183
217
  * [HexaPDF::Type::AcroForm::TextField#set_calculate_action] for setting a
184
218
  JavaScript action that calculates the field's value
185
- * [HexaPDF::Type::AcroForm#recalculate_fields] for recalculating fields
219
+ * [HexaPDF::Type::AcroForm::Form#recalculate_fields] for recalculating fields
186
220
 
187
221
  ### Changed
188
222
 
@@ -239,7 +273,7 @@
239
273
 
240
274
  ### Changed
241
275
 
242
- * [HexaPDF::Layout::Frame::FitResult#draw] to provide better optional content
276
+ * HexaPDF::Layout::Frame::FitResult#draw to provide better optional content
243
277
  group names
244
278
 
245
279
  ### Fixed
@@ -318,8 +352,8 @@
318
352
 
319
353
  ### Changed
320
354
 
321
- * [HexaPDF::Layout::Frame::FitResult#draw] to allow drawing at an offset
322
- * [HexaPDF::Layout::Box#fit] to delegate the actual content fitting to the
355
+ * HexaPDF::Layout::Frame::FitResult#draw to allow drawing at an offset
356
+ * HexaPDF::Layout::Box#fit to delegate the actual content fitting to the
323
357
  `#fit_content` method
324
358
  * [HexaPDF::Document::Layout#box] to allow using the block as drawing block for
325
359
  the base box class
@@ -396,8 +430,8 @@
396
430
 
397
431
  ### Fixed
398
432
 
399
- * [HexaPDF::Layout::ColumnBox#fit] to correctly take initial height into account
400
- * [HexaPDF::Layout::ColumnBox#fit] to ensure correct results in case the
433
+ * HexaPDF::Layout::ColumnBox#fit to correctly take initial height into account
434
+ * HexaPDF::Layout::ColumnBox#fit to ensure correct results in case the
401
435
  requested dimensions are larger than the current region
402
436
  * [HexaPDF::Document::Layout#formatted_text_box] to correctly handle properties
403
437
  * [HexaPDF::Layout::Frame#fit] to raise an error if an invalid value for the
@@ -443,7 +477,7 @@
443
477
  context argument (a page or Form XObject instance)
444
478
  * [HexaPDF::Layout::ListBox] to use its 'fill_color' style property for the item
445
479
  marker color
446
- * [HexaPDF::Layout::Frame::FitResult#draw] to use optional content groups for
480
+ * HexaPDF::Layout::Frame::FitResult#draw to use optional content groups for
447
481
  debug output
448
482
 
449
483
  ### Fixed
@@ -452,7 +486,7 @@
452
486
  default range starting at page 1
453
487
  * [HexaPDF::Type::Page#flatten_annotations] to correctly handle scaled
454
488
  appearances
455
- * Using an unknown style name in [HexaPDF:Document::Layout] method by providing
489
+ * Using an unknown style name in [HexaPDF::Document::Layout] method by providing
456
490
  a useful error message
457
491
  * [HexaPDF::Layout::Box::new] to ensure that the properties attribute is always
458
492
  a hash
@@ -513,7 +547,7 @@
513
547
  final box positions into account
514
548
  * [HexaPDF::Content::Canvas#text] to set the leading only when multiple lines
515
549
  are drawn
516
- * [HexaPDF::Layout::TextBox#split] to use float comparison
550
+ * HexaPDF::Layout::TextBox#split to use float comparison
517
551
  * Validation of standard encryption dictionary to auto-correct invalid /U and /O
518
552
  fields in case they are padded with zeros
519
553
  * [HexaPDF::Document#wrap] handling of sub-type mapping in case of missing type
@@ -930,7 +964,7 @@
930
964
  * [HexaPDF::Layout::WidthFromPolygon] to work correctly in case of very small
931
965
  floating point errors
932
966
  * HexaPDF::Layout::TextFragment#inspect to work in case of interspersed numbers
933
- * [HexaPDF::Layout::TextBox#split] to work for position :flow when box is wider
967
+ * HexaPDF::Layout::TextBox#split to work for position :flow when box is wider
934
968
  than the initial available width
935
969
  * [HexaPDF::Layout::Frame#fit] to create minimally sized mask rectangles
936
970
  * [HexaPDF::Content::GraphicObject::Geom2D] to close the path when drawing
@@ -1866,7 +1900,7 @@
1866
1900
  objects
1867
1901
  * [HexaPDF::Revision#each_modified_object] for iterating over all modified
1868
1902
  objects of a revision
1869
- * [HexaPDF::Layout::Box#split] and [HexaPDF::Layout::TextBox#split] for
1903
+ * [HexaPDF::Layout::Box#split] and HexaPDF::Layout::TextBox#split for
1870
1904
  splitting a box into two parts
1871
1905
  * [HexaPDF::Layout::Frame#full?] for testing whether the frame has any space
1872
1906
  left
@@ -231,6 +231,13 @@ module HexaPDF
231
231
  @document.write(output, optimize: optimize, **options)
232
232
  end
233
233
 
234
+ # Writes the created PDF document to a string and returns that string.
235
+ #
236
+ # See HexaPDF::Document#write for details.
237
+ def write_to_string(optimize: true, **options)
238
+ @document.write_to_string(optimize: optimize, **options)
239
+ end
240
+
234
241
  # :call-seq:
235
242
  # composer.style(name) -> style
236
243
  # composer.style(name, base: :base, **properties) -> style
@@ -598,6 +598,7 @@ module HexaPDF
598
598
  optimize: 'HexaPDF::Task::Optimize',
599
599
  dereference: 'HexaPDF::Task::Dereference',
600
600
  pdfa: 'HexaPDF::Task::PDFA',
601
+ merge_acro_form: 'HexaPDF::Task::MergeAcroForm',
601
602
  })
602
603
 
603
604
  # The global configuration object, providing the following options:
@@ -720,6 +721,7 @@ module HexaPDF
720
721
  Metadata: 'HexaPDF::Type::Metadata',
721
722
  OutputIntent: 'HexaPDF::Type::OutputIntent',
722
723
  XXDestOutputProfileRef: 'HexaPDF::Type::OutputIntent::DestOutputProfileRef',
724
+ ExData: 'HexaPDF::Type::Annotations::MarkupAnnotation::ExData',
723
725
  },
724
726
  'object.subtype_map' => {
725
727
  nil => {
@@ -112,7 +112,9 @@ module HexaPDF
112
112
  elsif byte == 93 # ]
113
113
  @ss.pos += 1
114
114
  TOKEN_ARRAY_END
115
- elsif byte == 123 || byte == 125 # { }
115
+ elsif byte == 41 # )
116
+ raise HexaPDF::MalformedPDFError.new("Delimiter ')' found at invalid position", pos: pos)
117
+ elsif byte == 123 || byte == 125 # { } )
116
118
  Token.new(@ss.get_byte)
117
119
  elsif byte == 37 # %
118
120
  unless @ss.skip_until(/(?=[\r\n])/)
@@ -155,6 +155,19 @@ module HexaPDF
155
155
  result.log(:error, "Signature verification failed")
156
156
  end
157
157
 
158
+ certs = [signer_certificate]
159
+ cur_cert = certs.first
160
+ while true
161
+ cur_cert = certificate_chain.find {|cert| cert.subject == cur_cert.issuer }
162
+ if cur_cert && !certs.include?(cur_cert)
163
+ certs << cur_cert
164
+ else
165
+ break
166
+ end
167
+ end
168
+ cert_subjects = certs.map {|cert| cert.subject.to_a.assoc("CN")&.[](1) }
169
+ result.log(:info, "Certificate chain: #{cert_subjects.join(" -> ")}")
170
+
158
171
  result
159
172
  end
160
173
 
@@ -211,7 +211,7 @@ module HexaPDF
211
211
  data = ''.b
212
212
  self[:ByteRange]&.each_slice(2) do |offset, length|
213
213
  io.pos = offset
214
- data << io.read(length)
214
+ data << io.read(length).to_s
215
215
  end
216
216
  data
217
217
  end
@@ -289,6 +289,7 @@ module HexaPDF
289
289
  signature[:Location] = location if location
290
290
  signature[:ContactInfo] = contact_info if contact_info
291
291
  signature[:Prop_Build] = {App: {Name: :HexaPDF, REx: HexaPDF::VERSION}}
292
+ signature.document.version = '2.0' if signature_type == :pades
292
293
 
293
294
  if doc_mdp_permissions
294
295
  doc = signature.document
@@ -724,10 +724,12 @@ module HexaPDF
724
724
  end
725
725
 
726
726
  # :call-seq:
727
- # doc.write(filename, incremental: false, validate: true, update_fields: true, optimize: false)
728
- # doc.write(io, incremental: false, validate: true, update_fields: true, optimize: false)
727
+ # doc.write(filename, incremental: false, validate: true, update_fields: true, optimize: false) -> [start_xref, section]
728
+ # doc.write(io, incremental: false, validate: true, update_fields: true, optimize: false) -> [start_xref, section]
729
729
  #
730
- # Writes the document to the given file (in case +io+ is a String) or IO stream.
730
+ # Writes the document to the given file (in case +io+ is a String) or IO stream. Returns the
731
+ # file position of the start of the last cross-reference section and the last XRefSection object
732
+ # written.
731
733
  #
732
734
  # Before the document is written, it is validated using #validate and an error is raised if the
733
735
  # document is not valid. However, this step can be skipped if needed.
@@ -784,6 +786,15 @@ module HexaPDF
784
786
  end
785
787
  end
786
788
 
789
+ # Writes the document to a string and returns the string.
790
+ #
791
+ # See #write for further information and details on the available arguments.
792
+ def write_to_string(**args)
793
+ io = StringIO.new(''.b)
794
+ write(io)
795
+ io.string
796
+ end
797
+
787
798
  def inspect #:nodoc:
788
799
  "<#{self.class.name}:#{object_id}>"
789
800
  end
@@ -40,9 +40,7 @@ module HexaPDF
40
40
  module Font
41
41
  class CMap
42
42
 
43
- # Creates a CMap file.
44
- #
45
- # Currently only ToUnicode CMaps are supported.
43
+ # Creates a CMap file, either a ToUnicode CMap or a CID CMap.
46
44
  class Writer
47
45
 
48
46
  # Maximum number of entries in one section.
@@ -74,6 +72,28 @@ module HexaPDF
74
72
  to_unicode_template % result.chop!
75
73
  end
76
74
 
75
+ # Returns a CID CMap for the given input code to CID mapping which needs to be sorted by
76
+ # input codes.
77
+ #
78
+ # Note that the returned CMap always uses a 16-bit input code space!
79
+ def create_cid_cmap(mapping)
80
+ return cid_template % '' if mapping.empty?
81
+
82
+ chars, ranges = compute_section_entries(mapping)
83
+
84
+ result = create_sections("cidchar", chars.size / 2) do |index|
85
+ index *= 2
86
+ sprintf("<%04X>", chars[index]) << " #{chars[index + 1]}\n"
87
+ end
88
+
89
+ result << create_sections("cidrange", ranges.size / 3) do |index|
90
+ index *= 3
91
+ sprintf("<%04X><%04X>", ranges[index], ranges[index + 1]) << " #{ranges[index + 2]}\n"
92
+ end
93
+
94
+ cid_template % result.chop!
95
+ end
96
+
77
97
  private
78
98
 
79
99
  # Computes the entries for the "char" and "range" sections based on the given mapping.
@@ -146,7 +166,7 @@ module HexaPDF
146
166
  result
147
167
  end
148
168
 
149
- # Returns the CMap file template for a ToUnicode CMap.
169
+ # Returns the template for a ToUnicode CMap.
150
170
  def to_unicode_template
151
171
  <<~TEMPLATE
152
172
  /CIDInit /ProcSet findresource begin
@@ -170,6 +190,40 @@ module HexaPDF
170
190
  TEMPLATE
171
191
  end
172
192
 
193
+ # Returns the template for a CID CMap.
194
+ def cid_template
195
+ <<~TEMPLATE
196
+ %%!PS-Adobe-3.0 Resource-CMap
197
+ %%%%DocumentNeededResources: ProcSet (CIDInit)
198
+ %%%%IncludeResource: ProcSet (CIDInit)
199
+ %%%%BeginResource: CMap (Custom)
200
+ %%%%Title: (Custom Adobe Identity 0)
201
+ %%%%Version: 1
202
+ /CIDInit /ProcSet findresource begin
203
+ 12 dict begin
204
+ begincmap
205
+ /CIDSystemInfo 3 dict dup begin
206
+ /Registry (Adobe) def
207
+ /Ordering (Identity) def
208
+ /Supplement 0 def
209
+ end def
210
+ /CMapName /Custom def
211
+ /CMapType 1 def
212
+ /CMapVersion 1 def
213
+ /WMode 0 def
214
+ 1 begincodespacerange
215
+ <0000> <FFFF>
216
+ endcodespacerange
217
+ %s
218
+ endcmap
219
+ CMapName currentdict /CMap defineresource pop
220
+ end
221
+ end
222
+ %%%%EndResource
223
+ %%%%EOF
224
+ TEMPLATE
225
+ end
226
+
173
227
  end
174
228
 
175
229
  end
@@ -85,6 +85,13 @@ module HexaPDF
85
85
  Writer.new.create_to_unicode_cmap(mapping)
86
86
  end
87
87
 
88
+ # Returns a string containing a CID CMap that represents the given code to CID mapping.
89
+ #
90
+ # See: Writer#create_cid_cmap
91
+ def self.create_cid_cmap(mapping)
92
+ Writer.new.create_cid_cmap(mapping)
93
+ end
94
+
88
95
  # The registry part of the CMap version.
89
96
  attr_accessor :registry
90
97
 
@@ -57,6 +57,10 @@ module HexaPDF
57
57
  class TrueTypeWrapper
58
58
 
59
59
  # Represents a single glyph of the wrapped font.
60
+ #
61
+ # Since some characters/strings may be mapped to the same glyph id by the font's builtin cmap
62
+ # table, it is possible that different Glyph instances with the same #id but different #str
63
+ # exist.
60
64
  class Glyph
61
65
 
62
66
  # The associated TrueTypeWrapper object.
@@ -152,6 +156,7 @@ module HexaPDF
152
156
  @id_to_glyph = {}
153
157
  @codepoint_to_glyph = {}
154
158
  @encoded_glyphs = {}
159
+ @last_char_code = 0
155
160
  end
156
161
 
157
162
  # Returns the type of the font, i.e. :TrueType.
@@ -179,14 +184,15 @@ module HexaPDF
179
184
  !@subsetter.nil?
180
185
  end
181
186
 
182
- # Returns a Glyph object for the given glyph ID.
187
+ # Returns a Glyph object for the given glyph ID and +str+ pair.
183
188
  #
184
- # The optional argument +str+ should be the string representation of the glyph. Only use it if
185
- # it is known,
189
+ # The optional argument +str+ should be the string representation of the glyph. It is possible
190
+ # that multiple strings map to the same glyph (e.g. hyphen and soft-hyphen could be
191
+ # represented by the same glyph).
186
192
  #
187
193
  # Note: Although this method is public, it should normally not be used by application code!
188
194
  def glyph(id, str = nil)
189
- @id_to_glyph[id] ||=
195
+ @id_to_glyph[[id, str]] ||=
190
196
  if id >= 0 && id < @wrapped_font[:maxp].num_glyphs
191
197
  Glyph.new(self, id, str || (+'' << (@cmap.gid_to_code(id) || 0xFFFD)))
192
198
  else
@@ -228,14 +234,12 @@ module HexaPDF
228
234
 
229
235
  # Encodes the glyph and returns the code string.
230
236
  def encode(glyph)
231
- (@encoded_glyphs[glyph.id] ||=
237
+ (@encoded_glyphs[glyph] ||=
232
238
  begin
233
239
  raise HexaPDF::MissingGlyphError.new(glyph) if glyph.kind_of?(InvalidGlyph)
234
- if @subsetter
235
- [[@subsetter.use_glyph(glyph.id)].pack('n'), glyph]
236
- else
237
- [[glyph.id].pack('n'), glyph]
238
- end
240
+ @subsetter.use_glyph(glyph.id) if @subsetter
241
+ @last_char_code += 1
242
+ [[@last_char_code].pack('n'), @last_char_code]
239
243
  end)[0]
240
244
  end
241
245
 
@@ -286,7 +290,7 @@ module HexaPDF
286
290
  Supplement: 0},
287
291
  CIDToGIDMap: :Identity})
288
292
  dict = document.add({Type: :Font, Subtype: :Type0, BaseFont: cid_font[:BaseFont],
289
- Encoding: :'Identity-H', DescendantFonts: [cid_font]})
293
+ DescendantFonts: [cid_font]})
290
294
  dict.font_wrapper = self
291
295
 
292
296
  document.register_listener(:complete_objects) do
@@ -294,6 +298,7 @@ module HexaPDF
294
298
  embed_font(dict, document)
295
299
  complete_width_information(dict)
296
300
  create_to_unicode_cmap(dict, document)
301
+ add_encoding_information_cmap(dict, document)
297
302
  end
298
303
 
299
304
  dict
@@ -306,7 +311,7 @@ module HexaPDF
306
311
  return unless @subsetter
307
312
 
308
313
  tag = +''
309
- data = @encoded_glyphs.each_with_object(''.b) {|(id, v), s| s << id.to_s << v[0] }
314
+ data = @encoded_glyphs.each_with_object(''.b) {|(g, v), s| s << g.id.to_s << v[0] }
310
315
  hash = Digest::MD5.hexdigest(data << @wrapped_font.font_name).to_i(16)
311
316
  while hash != 0 && tag.length < 6
312
317
  hash, mod = hash.divmod(UPPERCASE_LETTERS.length)
@@ -336,8 +341,8 @@ module HexaPDF
336
341
  # Adds the /DW and /W fields to the CIDFont dictionary.
337
342
  def complete_width_information(dict)
338
343
  default_width = glyph(3, " ").width.to_i
339
- widths = @encoded_glyphs.reject {|_, v| v[1].width == default_width }.map do |id, v|
340
- [(@subsetter ? @subsetter.subset_glyph_id(id) : id), v[1].width]
344
+ widths = @encoded_glyphs.reject {|g, _| g.width == default_width }.map do |g, _|
345
+ [(@subsetter ? @subsetter.subset_glyph_id(g.id) : g.id), g.width]
341
346
  end.sort!
342
347
  dict[:DescendantFonts].first.set_widths(widths, default_width: default_width)
343
348
  end
@@ -346,9 +351,10 @@ module HexaPDF
346
351
  # correctly.
347
352
  def create_to_unicode_cmap(dict, document)
348
353
  stream = HexaPDF::StreamData.new do
349
- mapping = @encoded_glyphs.keys.map! do |id|
354
+ mapping = @encoded_glyphs.map do |glyph, (_, char_code)|
350
355
  # Using 0xFFFD as mentioned in Adobe #5411, last line before section 1.5
351
- [(@subsetter ? @subsetter.subset_glyph_id(id) : id), @cmap.gid_to_code(id) || 0xFFFD]
356
+ # TODO: glyph.str assumed to consist of single char, No support for multiple chars
357
+ [char_code, glyph.str.ord || 0xFFFD]
352
358
  end.sort_by!(&:first)
353
359
  HexaPDF::Font::CMap.create_to_unicode_cmap(mapping)
354
360
  end
@@ -357,6 +363,25 @@ module HexaPDF
357
363
  dict[:ToUnicode] = stream_obj
358
364
  end
359
365
 
366
+ # Adds the /Encoding entry to the +dict+.
367
+ #
368
+ # This can either be the identity mapping or, if some Unicode codepoints are mapped to the
369
+ # same glyph, a custom CMap.
370
+ def add_encoding_information_cmap(dict, document)
371
+ mapping = @encoded_glyphs.map do |glyph, (_, char_code)|
372
+ # Using 0xFFFD as mentioned in Adobe #5411, last line before section 1.5
373
+ [char_code, (@subsetter ? @subsetter.subset_glyph_id(glyph.id) : glyph.id)]
374
+ end.sort_by!(&:first)
375
+ if mapping.all? {|char_code, cid| char_code == cid }
376
+ dict[:Encoding] = :'Identity-H'
377
+ else
378
+ stream = HexaPDF::StreamData.new { HexaPDF::Font::CMap.create_cid_cmap(mapping) }
379
+ stream_obj = document.add({}, stream: stream)
380
+ stream_obj.set_filter(:FlateDecode)
381
+ dict[:Encoding] = stream_obj
382
+ end
383
+ end
384
+
360
385
  end
361
386
 
362
387
  end
@@ -235,6 +235,7 @@ module HexaPDF
235
235
  end
236
236
  end
237
237
 
238
+ in_text_object = (canvas.graphics_object == :text)
238
239
  canvas.begin_text
239
240
  tlm = canvas.graphics_state.tlm
240
241
  tx = x - tlm.e
@@ -248,7 +249,7 @@ module HexaPDF
248
249
  elsif ty.abs < PRECISION
249
250
  canvas.move_text_cursor(offset: [tx, 0], absolute: false)
250
251
  else
251
- canvas.move_text_cursor(offset: [x, y])
252
+ canvas.move_text_cursor(offset: [x, y], absolute: in_text_object)
252
253
  end
253
254
  canvas.show_glyphs_only(items)
254
255
 
@@ -372,7 +372,7 @@ module HexaPDF
372
372
 
373
373
  # Computes the hash value based on the object and generation numbers.
374
374
  def hash
375
- oid.hash ^ gen.hash
375
+ [oid, gen].hash
376
376
  end
377
377
 
378
378
  def inspect #:nodoc:
@@ -184,7 +184,7 @@ module HexaPDF
184
184
  length = if object[:Length].kind_of?(Integer)
185
185
  object[:Length]
186
186
  elsif object[:Length].kind_of?(Reference)
187
- @document.deref(object[:Length]).value
187
+ @document.deref(object[:Length])&.value || 0
188
188
  else
189
189
  0
190
190
  end
@@ -87,7 +87,7 @@ module HexaPDF
87
87
 
88
88
  # Computes the hash value based on the object and generation numbers.
89
89
  def hash
90
- oid.hash ^ gen.hash
90
+ [oid, gen].hash
91
91
  end
92
92
 
93
93
  # Returns the object identifier as "oid,gen".