hexapdf 0.47.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -16
  3. data/lib/hexapdf/composer.rb +7 -0
  4. data/lib/hexapdf/configuration.rb +2 -0
  5. data/lib/hexapdf/content/parser.rb +3 -1
  6. data/lib/hexapdf/digital_signature/cms_handler.rb +13 -0
  7. data/lib/hexapdf/digital_signature/signature.rb +1 -1
  8. data/lib/hexapdf/digital_signature/signing/default_handler.rb +1 -0
  9. data/lib/hexapdf/document.rb +14 -3
  10. data/lib/hexapdf/font/cmap/writer.rb +58 -4
  11. data/lib/hexapdf/font/cmap.rb +7 -0
  12. data/lib/hexapdf/font/true_type_wrapper.rb +41 -16
  13. data/lib/hexapdf/layout/text_fragment.rb +2 -1
  14. data/lib/hexapdf/object.rb +1 -1
  15. data/lib/hexapdf/parser.rb +1 -1
  16. data/lib/hexapdf/reference.rb +1 -1
  17. data/lib/hexapdf/task/merge_acro_form.rb +164 -0
  18. data/lib/hexapdf/task.rb +1 -0
  19. data/lib/hexapdf/tokenizer.rb +2 -0
  20. data/lib/hexapdf/type/acro_form/form.rb +14 -27
  21. data/lib/hexapdf/type/acro_form/signature_field.rb +16 -6
  22. data/lib/hexapdf/type/acro_form/variable_text_field.rb +1 -1
  23. data/lib/hexapdf/type/actions/go_to.rb +1 -0
  24. data/lib/hexapdf/type/actions/go_to_r.rb +1 -0
  25. data/lib/hexapdf/type/actions/launch.rb +5 -1
  26. data/lib/hexapdf/type/annotation.rb +6 -1
  27. data/lib/hexapdf/type/annotations/markup_annotation.rb +14 -1
  28. data/lib/hexapdf/type/catalog.rb +3 -0
  29. data/lib/hexapdf/type/cid_font.rb +4 -1
  30. data/lib/hexapdf/type/file_specification.rb +17 -14
  31. data/lib/hexapdf/type/font_descriptor.rb +4 -3
  32. data/lib/hexapdf/type/font_simple.rb +3 -1
  33. data/lib/hexapdf/type/font_true_type.rb +2 -0
  34. data/lib/hexapdf/type/font_type0.rb +1 -1
  35. data/lib/hexapdf/type/font_type1.rb +7 -0
  36. data/lib/hexapdf/type/font_type3.rb +0 -1
  37. data/lib/hexapdf/type/form.rb +5 -2
  38. data/lib/hexapdf/type/graphics_state_parameter.rb +7 -4
  39. data/lib/hexapdf/type/image.rb +8 -4
  40. data/lib/hexapdf/type/info.rb +2 -2
  41. data/lib/hexapdf/type/mark_information.rb +2 -2
  42. data/lib/hexapdf/type/optional_content_configuration.rb +1 -1
  43. data/lib/hexapdf/type/optional_content_membership.rb +1 -1
  44. data/lib/hexapdf/type/page.rb +5 -3
  45. data/lib/hexapdf/type/resources.rb +6 -6
  46. data/lib/hexapdf/type/viewer_preferences.rb +4 -3
  47. data/lib/hexapdf/version.rb +1 -1
  48. data/test/hexapdf/common_tokenizer_tests.rb +5 -0
  49. data/test/hexapdf/digital_signature/signing/test_default_handler.rb +6 -0
  50. data/test/hexapdf/digital_signature/test_cms_handler.rb +12 -7
  51. data/test/hexapdf/digital_signature/test_signature.rb +7 -0
  52. data/test/hexapdf/digital_signature/test_signatures.rb +8 -3
  53. data/test/hexapdf/font/cmap/test_writer.rb +73 -16
  54. data/test/hexapdf/font/test_true_type_wrapper.rb +17 -3
  55. data/test/hexapdf/layout/test_list_box.rb +7 -7
  56. data/test/hexapdf/layout/test_text_fragment.rb +3 -3
  57. data/test/hexapdf/layout/test_text_layouter.rb +4 -2
  58. data/test/hexapdf/task/test_merge_acro_form.rb +104 -0
  59. data/test/hexapdf/test_composer.rb +8 -0
  60. data/test/hexapdf/test_document.rb +9 -0
  61. data/test/hexapdf/test_parser.rb +7 -0
  62. data/test/hexapdf/test_writer.rb +8 -3
  63. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +18 -18
  64. data/test/hexapdf/type/acro_form/test_form.rb +7 -3
  65. data/test/hexapdf/type/actions/test_launch.rb +6 -2
  66. data/test/hexapdf/type/test_font_type1.rb +5 -0
  67. data/test/hexapdf/type/test_form.rb +1 -1
  68. data/test/hexapdf/type/test_page.rb +7 -1
  69. metadata +4 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 15f4c7590b5f2ce321519ac0b4871971c12073a9d4b0df36ab2f2e3c156f09ab
4
- data.tar.gz: 76dac6196e06e80fd88dade3f831b7744c2b763f004d3c389d3efebcace3be82
3
+ metadata.gz: 2889ba1d03e2c351efd694b1583063023fff97c0da636ff5103f88538255735c
4
+ data.tar.gz: 6fb4727db05900e8fccba2ad4e093d1092e17305e5b5616ded97a76cf835673c
5
5
  SHA512:
6
- metadata.gz: 8ea19ed17370cad1a1fa48a7e0cb85c16e3ff62b344d2a9a90c42d2ac98d5ce5f75c356facf6bb8276822db460e2d9f912f523783c82b3539200e33f9de9f383
7
- data.tar.gz: bdf2d93feade9f0654366bbf89711b670cbd03ee88b81f197f756c2032305f9ee7b9265973f31c5a4e18e7afd3e4f9dfd08b46a0483502b896774494294c111a
6
+ metadata.gz: 00be8ed2c306a88e5bfc0eada97a7e6bf802ec269e832bb21b3521d4077b18ecad11946ddc6f8a6d575820e66339059e59ba2c4cdd2b74d6c7d6defd0f2f5256
7
+ data.tar.gz: 94c6a8178ead2a986921b72b07ef5dc388a5fa6a67945573eec921db30c9940d241d3f47591c8fbbf9bdcf313df0dda536f0fc78e0e946e27dfa3bc13dad9a28
data/CHANGELOG.md CHANGED
@@ -1,3 +1,37 @@
1
+ ## 1.0.0 - 2024-10-26
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Task::MergeAcroForm] for merging AcroForm information for imported
6
+ pages
7
+ * [HexaPDF::Document#write_to_string] and [HexaPDF::Composer#write_to_string]
8
+ for easily writing a document to a String
9
+ * [HexaPDF::Font::CMap::Writer#create_cid_cmap] for creating a character code to
10
+ CID CMap file
11
+
12
+ ### Changed
13
+
14
+ * [HexaPDF::Type::AcroForm::Form] text-like field creation methods to always set
15
+ a default appearance string and the quadding
16
+ * Convenience methods for accessing resources to not add the deprecated /ProcSet
17
+ entry by default
18
+ * [HexaPDF::DigitalSignature::CMSHandler] to add informational output regarding
19
+ the certificate chain on verification
20
+ * Validation of [HexaPDF::Type::FontType1] to ensure correct /Encoding value
21
+
22
+ ### Fixed
23
+
24
+ * [HexaPDF::DigitalSignature::Signature#signed_data] to work for invalid offsets
25
+ * [HexaPDF::DigitalSignature::Signing::DefaultHandler] to update the document's
26
+ version to 2.0 when using PAdES
27
+ * Parsing of invalid `)` character in PDF objects and content streams
28
+ * Handling of files that contain stream length values that are indirect objects
29
+ that do not exist
30
+ * [HexaPDF::Font::TrueTypeWrapper] to correctly handle the situation when
31
+ multiple codepoints refer to the same glyph ID
32
+ * [HexaPDF::Type::Page#contents] to handle null values in /Contents array
33
+
34
+
1
35
  ## 0.47.0 - 2024-09-07
2
36
 
3
37
  ### Added
@@ -53,7 +87,7 @@
53
87
  signatures
54
88
  * [HexaPDF::DigitalSignature::CMSHandler#signing_time] to use time from an
55
89
  embedded timestamp authority signature if possible
56
- * [HexaPDF::Layout::Box#fit] to return success for boxes with content
90
+ * HexaPDF::Layout::Box#fit to return success for boxes with content
57
91
  width/height of zero
58
92
  * [HexaPDF::Importer::copy] to optionally allow copying the catalog and page
59
93
  tree nodes
@@ -61,7 +95,7 @@
61
95
  ### Fixed
62
96
 
63
97
  * Setting of correct x-position in fit result for boxes with flow positioning
64
- * [HexaPDF::Layout::ListBox#fit] to respect the set height
98
+ * HexaPDF::Layout::ListBox#fit to respect the set height
65
99
  * CLI command `hexapdf inspect` to work in case of missing Unicde mappings
66
100
  * [HexaPDF::Type::AcroForm::Form#delete_field] to correctly work for fields with
67
101
  an embedded widget
@@ -80,7 +114,7 @@
80
114
 
81
115
  ### Changed
82
116
 
83
- * [HexaPDF::Layout::Box#fit] to set width/height correctly for boxes with
117
+ * HexaPDF::Layout::Box#fit to set width/height correctly for boxes with
84
118
  position `:flow`
85
119
 
86
120
  ### Fixed
@@ -126,9 +160,9 @@
126
160
 
127
161
  ### Fixed
128
162
 
129
- * [HexaPDF::Layout::TextBox#fit] to correctly calculate width in case of flowing
163
+ * HexaPDF::Layout::TextBox#fit to correctly calculate width in case of flowing
130
164
  text around other boxes
131
- * [HexaPDF::Layout::TextBox#draw] to correctly draw border, background... on
165
+ * HexaPDF::Layout::TextBox#draw to correctly draw border, background... on
132
166
  boxes using position 'flow'
133
167
  * Comparison of Hash with [HexaPDF::Dictionary] objects by implementing
134
168
  `#to_hash`
@@ -182,7 +216,7 @@
182
216
  JavaScript action that formats the field's value
183
217
  * [HexaPDF::Type::AcroForm::TextField#set_calculate_action] for setting a
184
218
  JavaScript action that calculates the field's value
185
- * [HexaPDF::Type::AcroForm#recalculate_fields] for recalculating fields
219
+ * [HexaPDF::Type::AcroForm::Form#recalculate_fields] for recalculating fields
186
220
 
187
221
  ### Changed
188
222
 
@@ -239,7 +273,7 @@
239
273
 
240
274
  ### Changed
241
275
 
242
- * [HexaPDF::Layout::Frame::FitResult#draw] to provide better optional content
276
+ * HexaPDF::Layout::Frame::FitResult#draw to provide better optional content
243
277
  group names
244
278
 
245
279
  ### Fixed
@@ -318,8 +352,8 @@
318
352
 
319
353
  ### Changed
320
354
 
321
- * [HexaPDF::Layout::Frame::FitResult#draw] to allow drawing at an offset
322
- * [HexaPDF::Layout::Box#fit] to delegate the actual content fitting to the
355
+ * HexaPDF::Layout::Frame::FitResult#draw to allow drawing at an offset
356
+ * HexaPDF::Layout::Box#fit to delegate the actual content fitting to the
323
357
  `#fit_content` method
324
358
  * [HexaPDF::Document::Layout#box] to allow using the block as drawing block for
325
359
  the base box class
@@ -396,8 +430,8 @@
396
430
 
397
431
  ### Fixed
398
432
 
399
- * [HexaPDF::Layout::ColumnBox#fit] to correctly take initial height into account
400
- * [HexaPDF::Layout::ColumnBox#fit] to ensure correct results in case the
433
+ * HexaPDF::Layout::ColumnBox#fit to correctly take initial height into account
434
+ * HexaPDF::Layout::ColumnBox#fit to ensure correct results in case the
401
435
  requested dimensions are larger than the current region
402
436
  * [HexaPDF::Document::Layout#formatted_text_box] to correctly handle properties
403
437
  * [HexaPDF::Layout::Frame#fit] to raise an error if an invalid value for the
@@ -443,7 +477,7 @@
443
477
  context argument (a page or Form XObject instance)
444
478
  * [HexaPDF::Layout::ListBox] to use its 'fill_color' style property for the item
445
479
  marker color
446
- * [HexaPDF::Layout::Frame::FitResult#draw] to use optional content groups for
480
+ * HexaPDF::Layout::Frame::FitResult#draw to use optional content groups for
447
481
  debug output
448
482
 
449
483
  ### Fixed
@@ -452,7 +486,7 @@
452
486
  default range starting at page 1
453
487
  * [HexaPDF::Type::Page#flatten_annotations] to correctly handle scaled
454
488
  appearances
455
- * Using an unknown style name in [HexaPDF:Document::Layout] method by providing
489
+ * Using an unknown style name in [HexaPDF::Document::Layout] method by providing
456
490
  a useful error message
457
491
  * [HexaPDF::Layout::Box::new] to ensure that the properties attribute is always
458
492
  a hash
@@ -513,7 +547,7 @@
513
547
  final box positions into account
514
548
  * [HexaPDF::Content::Canvas#text] to set the leading only when multiple lines
515
549
  are drawn
516
- * [HexaPDF::Layout::TextBox#split] to use float comparison
550
+ * HexaPDF::Layout::TextBox#split to use float comparison
517
551
  * Validation of standard encryption dictionary to auto-correct invalid /U and /O
518
552
  fields in case they are padded with zeros
519
553
  * [HexaPDF::Document#wrap] handling of sub-type mapping in case of missing type
@@ -930,7 +964,7 @@
930
964
  * [HexaPDF::Layout::WidthFromPolygon] to work correctly in case of very small
931
965
  floating point errors
932
966
  * HexaPDF::Layout::TextFragment#inspect to work in case of interspersed numbers
933
- * [HexaPDF::Layout::TextBox#split] to work for position :flow when box is wider
967
+ * HexaPDF::Layout::TextBox#split to work for position :flow when box is wider
934
968
  than the initial available width
935
969
  * [HexaPDF::Layout::Frame#fit] to create minimally sized mask rectangles
936
970
  * [HexaPDF::Content::GraphicObject::Geom2D] to close the path when drawing
@@ -1866,7 +1900,7 @@
1866
1900
  objects
1867
1901
  * [HexaPDF::Revision#each_modified_object] for iterating over all modified
1868
1902
  objects of a revision
1869
- * [HexaPDF::Layout::Box#split] and [HexaPDF::Layout::TextBox#split] for
1903
+ * [HexaPDF::Layout::Box#split] and HexaPDF::Layout::TextBox#split for
1870
1904
  splitting a box into two parts
1871
1905
  * [HexaPDF::Layout::Frame#full?] for testing whether the frame has any space
1872
1906
  left
@@ -231,6 +231,13 @@ module HexaPDF
231
231
  @document.write(output, optimize: optimize, **options)
232
232
  end
233
233
 
234
+ # Writes the created PDF document to a string and returns that string.
235
+ #
236
+ # See HexaPDF::Document#write for details.
237
+ def write_to_string(optimize: true, **options)
238
+ @document.write_to_string(optimize: optimize, **options)
239
+ end
240
+
234
241
  # :call-seq:
235
242
  # composer.style(name) -> style
236
243
  # composer.style(name, base: :base, **properties) -> style
@@ -598,6 +598,7 @@ module HexaPDF
598
598
  optimize: 'HexaPDF::Task::Optimize',
599
599
  dereference: 'HexaPDF::Task::Dereference',
600
600
  pdfa: 'HexaPDF::Task::PDFA',
601
+ merge_acro_form: 'HexaPDF::Task::MergeAcroForm',
601
602
  })
602
603
 
603
604
  # The global configuration object, providing the following options:
@@ -720,6 +721,7 @@ module HexaPDF
720
721
  Metadata: 'HexaPDF::Type::Metadata',
721
722
  OutputIntent: 'HexaPDF::Type::OutputIntent',
722
723
  XXDestOutputProfileRef: 'HexaPDF::Type::OutputIntent::DestOutputProfileRef',
724
+ ExData: 'HexaPDF::Type::Annotations::MarkupAnnotation::ExData',
723
725
  },
724
726
  'object.subtype_map' => {
725
727
  nil => {
@@ -112,7 +112,9 @@ module HexaPDF
112
112
  elsif byte == 93 # ]
113
113
  @ss.pos += 1
114
114
  TOKEN_ARRAY_END
115
- elsif byte == 123 || byte == 125 # { }
115
+ elsif byte == 41 # )
116
+ raise HexaPDF::MalformedPDFError.new("Delimiter ')' found at invalid position", pos: pos)
117
+ elsif byte == 123 || byte == 125 # { } )
116
118
  Token.new(@ss.get_byte)
117
119
  elsif byte == 37 # %
118
120
  unless @ss.skip_until(/(?=[\r\n])/)
@@ -155,6 +155,19 @@ module HexaPDF
155
155
  result.log(:error, "Signature verification failed")
156
156
  end
157
157
 
158
+ certs = [signer_certificate]
159
+ cur_cert = certs.first
160
+ while true
161
+ cur_cert = certificate_chain.find {|cert| cert.subject == cur_cert.issuer }
162
+ if cur_cert && !certs.include?(cur_cert)
163
+ certs << cur_cert
164
+ else
165
+ break
166
+ end
167
+ end
168
+ cert_subjects = certs.map {|cert| cert.subject.to_a.assoc("CN")&.[](1) }
169
+ result.log(:info, "Certificate chain: #{cert_subjects.join(" -> ")}")
170
+
158
171
  result
159
172
  end
160
173
 
@@ -211,7 +211,7 @@ module HexaPDF
211
211
  data = ''.b
212
212
  self[:ByteRange]&.each_slice(2) do |offset, length|
213
213
  io.pos = offset
214
- data << io.read(length)
214
+ data << io.read(length).to_s
215
215
  end
216
216
  data
217
217
  end
@@ -289,6 +289,7 @@ module HexaPDF
289
289
  signature[:Location] = location if location
290
290
  signature[:ContactInfo] = contact_info if contact_info
291
291
  signature[:Prop_Build] = {App: {Name: :HexaPDF, REx: HexaPDF::VERSION}}
292
+ signature.document.version = '2.0' if signature_type == :pades
292
293
 
293
294
  if doc_mdp_permissions
294
295
  doc = signature.document
@@ -724,10 +724,12 @@ module HexaPDF
724
724
  end
725
725
 
726
726
  # :call-seq:
727
- # doc.write(filename, incremental: false, validate: true, update_fields: true, optimize: false)
728
- # doc.write(io, incremental: false, validate: true, update_fields: true, optimize: false)
727
+ # doc.write(filename, incremental: false, validate: true, update_fields: true, optimize: false) -> [start_xref, section]
728
+ # doc.write(io, incremental: false, validate: true, update_fields: true, optimize: false) -> [start_xref, section]
729
729
  #
730
- # Writes the document to the given file (in case +io+ is a String) or IO stream.
730
+ # Writes the document to the given file (in case +io+ is a String) or IO stream. Returns the
731
+ # file position of the start of the last cross-reference section and the last XRefSection object
732
+ # written.
731
733
  #
732
734
  # Before the document is written, it is validated using #validate and an error is raised if the
733
735
  # document is not valid. However, this step can be skipped if needed.
@@ -784,6 +786,15 @@ module HexaPDF
784
786
  end
785
787
  end
786
788
 
789
+ # Writes the document to a string and returns the string.
790
+ #
791
+ # See #write for further information and details on the available arguments.
792
+ def write_to_string(**args)
793
+ io = StringIO.new(''.b)
794
+ write(io)
795
+ io.string
796
+ end
797
+
787
798
  def inspect #:nodoc:
788
799
  "<#{self.class.name}:#{object_id}>"
789
800
  end
@@ -40,9 +40,7 @@ module HexaPDF
40
40
  module Font
41
41
  class CMap
42
42
 
43
- # Creates a CMap file.
44
- #
45
- # Currently only ToUnicode CMaps are supported.
43
+ # Creates a CMap file, either a ToUnicode CMap or a CID CMap.
46
44
  class Writer
47
45
 
48
46
  # Maximum number of entries in one section.
@@ -74,6 +72,28 @@ module HexaPDF
74
72
  to_unicode_template % result.chop!
75
73
  end
76
74
 
75
+ # Returns a CID CMap for the given input code to CID mapping which needs to be sorted by
76
+ # input codes.
77
+ #
78
+ # Note that the returned CMap always uses a 16-bit input code space!
79
+ def create_cid_cmap(mapping)
80
+ return cid_template % '' if mapping.empty?
81
+
82
+ chars, ranges = compute_section_entries(mapping)
83
+
84
+ result = create_sections("cidchar", chars.size / 2) do |index|
85
+ index *= 2
86
+ sprintf("<%04X>", chars[index]) << " #{chars[index + 1]}\n"
87
+ end
88
+
89
+ result << create_sections("cidrange", ranges.size / 3) do |index|
90
+ index *= 3
91
+ sprintf("<%04X><%04X>", ranges[index], ranges[index + 1]) << " #{ranges[index + 2]}\n"
92
+ end
93
+
94
+ cid_template % result.chop!
95
+ end
96
+
77
97
  private
78
98
 
79
99
  # Computes the entries for the "char" and "range" sections based on the given mapping.
@@ -146,7 +166,7 @@ module HexaPDF
146
166
  result
147
167
  end
148
168
 
149
- # Returns the CMap file template for a ToUnicode CMap.
169
+ # Returns the template for a ToUnicode CMap.
150
170
  def to_unicode_template
151
171
  <<~TEMPLATE
152
172
  /CIDInit /ProcSet findresource begin
@@ -170,6 +190,40 @@ module HexaPDF
170
190
  TEMPLATE
171
191
  end
172
192
 
193
+ # Returns the template for a CID CMap.
194
+ def cid_template
195
+ <<~TEMPLATE
196
+ %%!PS-Adobe-3.0 Resource-CMap
197
+ %%%%DocumentNeededResources: ProcSet (CIDInit)
198
+ %%%%IncludeResource: ProcSet (CIDInit)
199
+ %%%%BeginResource: CMap (Custom)
200
+ %%%%Title: (Custom Adobe Identity 0)
201
+ %%%%Version: 1
202
+ /CIDInit /ProcSet findresource begin
203
+ 12 dict begin
204
+ begincmap
205
+ /CIDSystemInfo 3 dict dup begin
206
+ /Registry (Adobe) def
207
+ /Ordering (Identity) def
208
+ /Supplement 0 def
209
+ end def
210
+ /CMapName /Custom def
211
+ /CMapType 1 def
212
+ /CMapVersion 1 def
213
+ /WMode 0 def
214
+ 1 begincodespacerange
215
+ <0000> <FFFF>
216
+ endcodespacerange
217
+ %s
218
+ endcmap
219
+ CMapName currentdict /CMap defineresource pop
220
+ end
221
+ end
222
+ %%%%EndResource
223
+ %%%%EOF
224
+ TEMPLATE
225
+ end
226
+
173
227
  end
174
228
 
175
229
  end
@@ -85,6 +85,13 @@ module HexaPDF
85
85
  Writer.new.create_to_unicode_cmap(mapping)
86
86
  end
87
87
 
88
+ # Returns a string containing a CID CMap that represents the given code to CID mapping.
89
+ #
90
+ # See: Writer#create_cid_cmap
91
+ def self.create_cid_cmap(mapping)
92
+ Writer.new.create_cid_cmap(mapping)
93
+ end
94
+
88
95
  # The registry part of the CMap version.
89
96
  attr_accessor :registry
90
97
 
@@ -57,6 +57,10 @@ module HexaPDF
57
57
  class TrueTypeWrapper
58
58
 
59
59
  # Represents a single glyph of the wrapped font.
60
+ #
61
+ # Since some characters/strings may be mapped to the same glyph id by the font's builtin cmap
62
+ # table, it is possible that different Glyph instances with the same #id but different #str
63
+ # exist.
60
64
  class Glyph
61
65
 
62
66
  # The associated TrueTypeWrapper object.
@@ -152,6 +156,7 @@ module HexaPDF
152
156
  @id_to_glyph = {}
153
157
  @codepoint_to_glyph = {}
154
158
  @encoded_glyphs = {}
159
+ @last_char_code = 0
155
160
  end
156
161
 
157
162
  # Returns the type of the font, i.e. :TrueType.
@@ -179,14 +184,15 @@ module HexaPDF
179
184
  !@subsetter.nil?
180
185
  end
181
186
 
182
- # Returns a Glyph object for the given glyph ID.
187
+ # Returns a Glyph object for the given glyph ID and +str+ pair.
183
188
  #
184
- # The optional argument +str+ should be the string representation of the glyph. Only use it if
185
- # it is known,
189
+ # The optional argument +str+ should be the string representation of the glyph. It is possible
190
+ # that multiple strings map to the same glyph (e.g. hyphen and soft-hyphen could be
191
+ # represented by the same glyph).
186
192
  #
187
193
  # Note: Although this method is public, it should normally not be used by application code!
188
194
  def glyph(id, str = nil)
189
- @id_to_glyph[id] ||=
195
+ @id_to_glyph[[id, str]] ||=
190
196
  if id >= 0 && id < @wrapped_font[:maxp].num_glyphs
191
197
  Glyph.new(self, id, str || (+'' << (@cmap.gid_to_code(id) || 0xFFFD)))
192
198
  else
@@ -228,14 +234,12 @@ module HexaPDF
228
234
 
229
235
  # Encodes the glyph and returns the code string.
230
236
  def encode(glyph)
231
- (@encoded_glyphs[glyph.id] ||=
237
+ (@encoded_glyphs[glyph] ||=
232
238
  begin
233
239
  raise HexaPDF::MissingGlyphError.new(glyph) if glyph.kind_of?(InvalidGlyph)
234
- if @subsetter
235
- [[@subsetter.use_glyph(glyph.id)].pack('n'), glyph]
236
- else
237
- [[glyph.id].pack('n'), glyph]
238
- end
240
+ @subsetter.use_glyph(glyph.id) if @subsetter
241
+ @last_char_code += 1
242
+ [[@last_char_code].pack('n'), @last_char_code]
239
243
  end)[0]
240
244
  end
241
245
 
@@ -286,7 +290,7 @@ module HexaPDF
286
290
  Supplement: 0},
287
291
  CIDToGIDMap: :Identity})
288
292
  dict = document.add({Type: :Font, Subtype: :Type0, BaseFont: cid_font[:BaseFont],
289
- Encoding: :'Identity-H', DescendantFonts: [cid_font]})
293
+ DescendantFonts: [cid_font]})
290
294
  dict.font_wrapper = self
291
295
 
292
296
  document.register_listener(:complete_objects) do
@@ -294,6 +298,7 @@ module HexaPDF
294
298
  embed_font(dict, document)
295
299
  complete_width_information(dict)
296
300
  create_to_unicode_cmap(dict, document)
301
+ add_encoding_information_cmap(dict, document)
297
302
  end
298
303
 
299
304
  dict
@@ -306,7 +311,7 @@ module HexaPDF
306
311
  return unless @subsetter
307
312
 
308
313
  tag = +''
309
- data = @encoded_glyphs.each_with_object(''.b) {|(id, v), s| s << id.to_s << v[0] }
314
+ data = @encoded_glyphs.each_with_object(''.b) {|(g, v), s| s << g.id.to_s << v[0] }
310
315
  hash = Digest::MD5.hexdigest(data << @wrapped_font.font_name).to_i(16)
311
316
  while hash != 0 && tag.length < 6
312
317
  hash, mod = hash.divmod(UPPERCASE_LETTERS.length)
@@ -336,8 +341,8 @@ module HexaPDF
336
341
  # Adds the /DW and /W fields to the CIDFont dictionary.
337
342
  def complete_width_information(dict)
338
343
  default_width = glyph(3, " ").width.to_i
339
- widths = @encoded_glyphs.reject {|_, v| v[1].width == default_width }.map do |id, v|
340
- [(@subsetter ? @subsetter.subset_glyph_id(id) : id), v[1].width]
344
+ widths = @encoded_glyphs.reject {|g, _| g.width == default_width }.map do |g, _|
345
+ [(@subsetter ? @subsetter.subset_glyph_id(g.id) : g.id), g.width]
341
346
  end.sort!
342
347
  dict[:DescendantFonts].first.set_widths(widths, default_width: default_width)
343
348
  end
@@ -346,9 +351,10 @@ module HexaPDF
346
351
  # correctly.
347
352
  def create_to_unicode_cmap(dict, document)
348
353
  stream = HexaPDF::StreamData.new do
349
- mapping = @encoded_glyphs.keys.map! do |id|
354
+ mapping = @encoded_glyphs.map do |glyph, (_, char_code)|
350
355
  # Using 0xFFFD as mentioned in Adobe #5411, last line before section 1.5
351
- [(@subsetter ? @subsetter.subset_glyph_id(id) : id), @cmap.gid_to_code(id) || 0xFFFD]
356
+ # TODO: glyph.str assumed to consist of single char, No support for multiple chars
357
+ [char_code, glyph.str.ord || 0xFFFD]
352
358
  end.sort_by!(&:first)
353
359
  HexaPDF::Font::CMap.create_to_unicode_cmap(mapping)
354
360
  end
@@ -357,6 +363,25 @@ module HexaPDF
357
363
  dict[:ToUnicode] = stream_obj
358
364
  end
359
365
 
366
+ # Adds the /Encoding entry to the +dict+.
367
+ #
368
+ # This can either be the identity mapping or, if some Unicode codepoints are mapped to the
369
+ # same glyph, a custom CMap.
370
+ def add_encoding_information_cmap(dict, document)
371
+ mapping = @encoded_glyphs.map do |glyph, (_, char_code)|
372
+ # Using 0xFFFD as mentioned in Adobe #5411, last line before section 1.5
373
+ [char_code, (@subsetter ? @subsetter.subset_glyph_id(glyph.id) : glyph.id)]
374
+ end.sort_by!(&:first)
375
+ if mapping.all? {|char_code, cid| char_code == cid }
376
+ dict[:Encoding] = :'Identity-H'
377
+ else
378
+ stream = HexaPDF::StreamData.new { HexaPDF::Font::CMap.create_cid_cmap(mapping) }
379
+ stream_obj = document.add({}, stream: stream)
380
+ stream_obj.set_filter(:FlateDecode)
381
+ dict[:Encoding] = stream_obj
382
+ end
383
+ end
384
+
360
385
  end
361
386
 
362
387
  end
@@ -235,6 +235,7 @@ module HexaPDF
235
235
  end
236
236
  end
237
237
 
238
+ in_text_object = (canvas.graphics_object == :text)
238
239
  canvas.begin_text
239
240
  tlm = canvas.graphics_state.tlm
240
241
  tx = x - tlm.e
@@ -248,7 +249,7 @@ module HexaPDF
248
249
  elsif ty.abs < PRECISION
249
250
  canvas.move_text_cursor(offset: [tx, 0], absolute: false)
250
251
  else
251
- canvas.move_text_cursor(offset: [x, y])
252
+ canvas.move_text_cursor(offset: [x, y], absolute: in_text_object)
252
253
  end
253
254
  canvas.show_glyphs_only(items)
254
255
 
@@ -372,7 +372,7 @@ module HexaPDF
372
372
 
373
373
  # Computes the hash value based on the object and generation numbers.
374
374
  def hash
375
- oid.hash ^ gen.hash
375
+ [oid, gen].hash
376
376
  end
377
377
 
378
378
  def inspect #:nodoc:
@@ -184,7 +184,7 @@ module HexaPDF
184
184
  length = if object[:Length].kind_of?(Integer)
185
185
  object[:Length]
186
186
  elsif object[:Length].kind_of?(Reference)
187
- @document.deref(object[:Length]).value
187
+ @document.deref(object[:Length])&.value || 0
188
188
  else
189
189
  0
190
190
  end
@@ -87,7 +87,7 @@ module HexaPDF
87
87
 
88
88
  # Computes the hash value based on the object and generation numbers.
89
89
  def hash
90
- oid.hash ^ gen.hash
90
+ [oid, gen].hash
91
91
  end
92
92
 
93
93
  # Returns the object identifier as "oid,gen".