hexapdf 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +68 -0
  3. data/CONTRIBUTERS +1 -1
  4. data/README.md +35 -4
  5. data/Rakefile +1 -0
  6. data/VERSION +1 -1
  7. data/data/hexapdf/cmap/83pv-RKSJ-H +314 -0
  8. data/data/hexapdf/cmap/90ms-RKSJ-H +259 -0
  9. data/data/hexapdf/cmap/90ms-RKSJ-V +156 -0
  10. data/data/hexapdf/cmap/90msp-RKSJ-H +257 -0
  11. data/data/hexapdf/cmap/90msp-RKSJ-V +155 -0
  12. data/data/hexapdf/cmap/90pv-RKSJ-H +355 -0
  13. data/data/hexapdf/cmap/Add-RKSJ-H +738 -0
  14. data/data/hexapdf/cmap/Add-RKSJ-V +135 -0
  15. data/data/hexapdf/cmap/Adobe-CNS1-UCS2 +18209 -0
  16. data/data/hexapdf/cmap/Adobe-GB1-UCS2 +14267 -0
  17. data/data/hexapdf/cmap/Adobe-Japan1-UCS2 +19159 -0
  18. data/data/hexapdf/cmap/Adobe-Korea1-UCS2 +9267 -0
  19. data/data/hexapdf/cmap/B5pc-H +337 -0
  20. data/data/hexapdf/cmap/B5pc-V +90 -0
  21. data/data/hexapdf/cmap/CNS-EUC-H +490 -0
  22. data/data/hexapdf/cmap/CNS-EUC-V +538 -0
  23. data/data/hexapdf/cmap/ETen-B5-H +343 -0
  24. data/data/hexapdf/cmap/ETen-B5-V +91 -0
  25. data/data/hexapdf/cmap/ETenms-B5-H +79 -0
  26. data/data/hexapdf/cmap/ETenms-B5-V +99 -0
  27. data/data/hexapdf/cmap/EUC-H +207 -0
  28. data/data/hexapdf/cmap/EUC-V +105 -0
  29. data/data/hexapdf/cmap/Ext-RKSJ-H +768 -0
  30. data/data/hexapdf/cmap/Ext-RKSJ-V +117 -0
  31. data/data/hexapdf/cmap/GB-EUC-H +173 -0
  32. data/data/hexapdf/cmap/GB-EUC-V +98 -0
  33. data/data/hexapdf/cmap/GBK-EUC-H +4273 -0
  34. data/data/hexapdf/cmap/GBK-EUC-V +97 -0
  35. data/data/hexapdf/cmap/GBK2K-H +5325 -0
  36. data/data/hexapdf/cmap/GBK2K-V +118 -0
  37. data/data/hexapdf/cmap/GBKp-EUC-H +4272 -0
  38. data/data/hexapdf/cmap/GBKp-EUC-V +97 -0
  39. data/data/hexapdf/cmap/GBpc-EUC-H +175 -0
  40. data/data/hexapdf/cmap/GBpc-EUC-V +98 -0
  41. data/data/hexapdf/cmap/H +200 -0
  42. data/data/hexapdf/cmap/HKscs-B5-H +1331 -0
  43. data/data/hexapdf/cmap/HKscs-B5-V +90 -0
  44. data/data/hexapdf/cmap/Identity-H +339 -0
  45. data/data/hexapdf/cmap/Identity-V +73 -0
  46. data/data/hexapdf/cmap/KSC-EUC-H +562 -0
  47. data/data/hexapdf/cmap/KSC-EUC-V +94 -0
  48. data/data/hexapdf/cmap/KSCms-UHC-H +776 -0
  49. data/data/hexapdf/cmap/KSCms-UHC-HW-H +775 -0
  50. data/data/hexapdf/cmap/KSCms-UHC-HW-V +93 -0
  51. data/data/hexapdf/cmap/KSCms-UHC-V +94 -0
  52. data/data/hexapdf/cmap/KSCpc-EUC-H +608 -0
  53. data/data/hexapdf/cmap/LICENSE.txt +26 -0
  54. data/data/hexapdf/cmap/README.txt +9 -0
  55. data/data/hexapdf/cmap/UniCNS-UCS2-H +16992 -0
  56. data/data/hexapdf/cmap/UniCNS-UCS2-V +90 -0
  57. data/data/hexapdf/cmap/UniCNS-UTF16-H +19117 -0
  58. data/data/hexapdf/cmap/UniCNS-UTF16-V +94 -0
  59. data/data/hexapdf/cmap/UniGB-UCS2-H +14321 -0
  60. data/data/hexapdf/cmap/UniGB-UCS2-V +101 -0
  61. data/data/hexapdf/cmap/UniGB-UTF16-H +14381 -0
  62. data/data/hexapdf/cmap/UniGB-UTF16-V +104 -0
  63. data/data/hexapdf/cmap/UniJIS-UCS2-H +8870 -0
  64. data/data/hexapdf/cmap/UniJIS-UCS2-HW-H +81 -0
  65. data/data/hexapdf/cmap/UniJIS-UCS2-HW-V +279 -0
  66. data/data/hexapdf/cmap/UniJIS-UCS2-V +275 -0
  67. data/data/hexapdf/cmap/UniJIS-UTF16-H +14450 -0
  68. data/data/hexapdf/cmap/UniJIS-UTF16-V +299 -0
  69. data/data/hexapdf/cmap/UniKS-UCS2-H +8725 -0
  70. data/data/hexapdf/cmap/UniKS-UCS2-V +95 -0
  71. data/data/hexapdf/cmap/UniKS-UTF16-H +8895 -0
  72. data/data/hexapdf/cmap/UniKS-UTF16-V +99 -0
  73. data/data/hexapdf/cmap/V +105 -0
  74. data/examples/arc.rb +3 -3
  75. data/examples/merging.rb +4 -1
  76. data/examples/optimizing.rb +3 -0
  77. data/examples/show_char_bboxes.rb +2 -2
  78. data/examples/truetype.rb +2 -2
  79. data/lib/hexapdf/cli.rb +40 -1
  80. data/lib/hexapdf/cli/batch.rb +72 -0
  81. data/lib/hexapdf/cli/command.rb +112 -15
  82. data/lib/hexapdf/cli/files.rb +2 -2
  83. data/lib/hexapdf/cli/images.rb +14 -6
  84. data/lib/hexapdf/cli/info.rb +6 -8
  85. data/lib/hexapdf/cli/inspect.rb +5 -8
  86. data/lib/hexapdf/cli/merge.rb +13 -20
  87. data/lib/hexapdf/cli/modify.rb +4 -7
  88. data/lib/hexapdf/cli/optimize.rb +2 -5
  89. data/lib/hexapdf/configuration.rb +32 -3
  90. data/lib/hexapdf/content/canvas.rb +130 -37
  91. data/lib/hexapdf/content/parser.rb +40 -6
  92. data/lib/hexapdf/content/processor.rb +4 -4
  93. data/lib/hexapdf/document.rb +40 -10
  94. data/lib/hexapdf/document/fonts.rb +1 -0
  95. data/lib/hexapdf/encryption/security_handler.rb +8 -12
  96. data/lib/hexapdf/filter/flate_decode.rb +25 -2
  97. data/lib/hexapdf/font/cmap.rb +124 -8
  98. data/lib/hexapdf/font/cmap/parser.rb +65 -15
  99. data/lib/hexapdf/font/encoding/base.rb +2 -2
  100. data/lib/hexapdf/font/encoding/glyph_list.rb +2 -4
  101. data/lib/hexapdf/font/true_type.rb +1 -0
  102. data/lib/hexapdf/font/true_type/builder.rb +75 -0
  103. data/lib/hexapdf/font/true_type/optimizer.rb +65 -0
  104. data/lib/hexapdf/font/true_type/subsetter.rb +9 -22
  105. data/lib/hexapdf/font/true_type_wrapper.rb +9 -21
  106. data/lib/hexapdf/font_loader.rb +1 -1
  107. data/lib/hexapdf/importer.rb +1 -1
  108. data/lib/hexapdf/serializer.rb +5 -3
  109. data/lib/hexapdf/type.rb +2 -0
  110. data/lib/hexapdf/type/cid_font.rb +120 -0
  111. data/lib/hexapdf/type/font.rb +32 -12
  112. data/lib/hexapdf/type/font_simple.rb +34 -42
  113. data/lib/hexapdf/type/font_type0.rb +148 -0
  114. data/lib/hexapdf/type/form.rb +4 -4
  115. data/lib/hexapdf/type/page.rb +12 -11
  116. data/lib/hexapdf/type/resources.rb +14 -0
  117. data/lib/hexapdf/utils/graphics_helpers.rb +77 -0
  118. data/lib/hexapdf/version.rb +1 -1
  119. data/man/man1/hexapdf.1 +43 -1
  120. data/test/hexapdf/content/test_canvas.rb +76 -0
  121. data/test/hexapdf/content/test_parser.rb +20 -1
  122. data/test/hexapdf/content/test_processor.rb +11 -7
  123. data/test/hexapdf/document/test_fonts.rb +3 -1
  124. data/test/hexapdf/font/cmap/test_parser.rb +42 -7
  125. data/test/hexapdf/font/encoding/test_base.rb +1 -1
  126. data/test/hexapdf/font/encoding/test_glyph_list.rb +3 -3
  127. data/test/hexapdf/font/test_cmap.rb +104 -0
  128. data/test/hexapdf/font/test_true_type_wrapper.rb +63 -46
  129. data/test/hexapdf/font/true_type/test_builder.rb +37 -0
  130. data/test/hexapdf/font/true_type/test_optimizer.rb +27 -0
  131. data/test/hexapdf/font/true_type/test_subsetter.rb +6 -13
  132. data/test/hexapdf/test_configuration.rb +12 -7
  133. data/test/hexapdf/test_document.rb +24 -0
  134. data/test/hexapdf/test_importer.rb +9 -1
  135. data/test/hexapdf/test_writer.rb +2 -2
  136. data/test/hexapdf/type/test_cid_font.rb +61 -0
  137. data/test/hexapdf/type/test_font.rb +31 -4
  138. data/test/hexapdf/type/test_font_simple.rb +6 -21
  139. data/test/hexapdf/type/test_font_type0.rb +114 -0
  140. data/test/hexapdf/type/test_resources.rb +17 -1
  141. data/test/hexapdf/utils/test_graphics_helpers.rb +29 -0
  142. metadata +82 -3
@@ -69,7 +69,7 @@ module HexaPDF
69
69
  end
70
70
 
71
71
  def execute(pdf) #:nodoc:
72
- HexaPDF::Document.open(pdf, decryption_opts: {password: @password}) do |doc|
72
+ with_document(pdf, password: @password) do |doc|
73
73
  if @indices.empty?
74
74
  list_files(doc)
75
75
  else
@@ -103,7 +103,7 @@ module HexaPDF
103
103
  each_file(doc) do |obj, index|
104
104
  next unless @indices.include?(index + 1) || @indices.include?(0)
105
105
  maybe_raise_on_existing_file(obj.path)
106
- puts "Extracting #{obj.path}..."
106
+ puts "Extracting #{obj.path}..." if command_parser.verbosity_info?
107
107
  File.open(obj.path, 'wb') do |file|
108
108
  fiber = obj.embedded_file_stream.stream_decoder
109
109
  while fiber.alive? && (data = fiber.resume)
@@ -31,6 +31,7 @@
31
31
  # is created or manipulated using HexaPDF.
32
32
  #++
33
33
 
34
+ require 'set'
34
35
  require 'hexapdf/cli/command'
35
36
 
36
37
  module HexaPDF
@@ -77,7 +78,7 @@ module HexaPDF
77
78
  end
78
79
 
79
80
  def execute(pdf) #:nodoc:
80
- HexaPDF::Document.open(pdf, decryption_opts: {password: @password}) do |doc|
81
+ with_document(pdf, password: @password) do |doc|
81
82
  if @indices.empty?
82
83
  list_images(doc)
83
84
  else
@@ -106,12 +107,19 @@ module HexaPDF
106
107
 
107
108
  # Extracts the images with the given indices.
108
109
  def extract_images(doc)
110
+ done = Set.new
109
111
  each_image(doc) do |image, index, _|
110
- next unless @indices.include?(index) || @indices.include?(0)
111
- path = "#{@prefix}-#{index}.#{image.info.extension}"
112
- maybe_raise_on_existing_file(path)
113
- puts "Extracting #{path}..."
114
- image.write(path)
112
+ next unless (@indices.include?(index) || @indices.include?(0)) && !done.include?(index)
113
+ info = image.info
114
+ if info.writable
115
+ path = "#{@prefix}-#{index}.#{image.info.extension}"
116
+ maybe_raise_on_existing_file(path)
117
+ puts "Extracting #{path}..." if command_parser.verbosity_info?
118
+ image.write(path)
119
+ done << index
120
+ elsif command_parser.verbosity_warning?
121
+ $stderr.puts "Warning (image #{index}): PDF image format not supported for writing"
122
+ end
115
123
  end
116
124
  end
117
125
 
@@ -73,9 +73,11 @@ module HexaPDF
73
73
  COLUMN_WIDTH = 20 #:nodoc:
74
74
 
75
75
  def output_info(file) # :nodoc:
76
- options = {decryption_opts: {password: @password},
77
- config: {'document.auto_decrypt' => @auto_decrypt}}
76
+ options = pdf_options(@password)
77
+ options[:config]['document.auto_decrypt'] = @auto_decrypt
78
78
  HexaPDF::Document.open(file, options) do |doc|
79
+ output_line("File name", file)
80
+ output_line("File size", File.stat(file).size.to_s + " bytes")
79
81
  INFO_KEYS.each do |name|
80
82
  next unless doc.trailer.info.key?(name)
81
83
  output_line(name.to_s, doc.trailer.info[name].to_s)
@@ -98,17 +100,13 @@ module HexaPDF
98
100
  output_line("Pages", doc.pages.count.to_s)
99
101
  output_line("Version", doc.version)
100
102
  end
101
- rescue HexaPDF::EncryptionError => e
103
+ rescue HexaPDF::EncryptionError
102
104
  if @auto_decrypt
103
105
  @auto_decrypt = false
104
106
  retry
105
107
  else
106
- $stderr.puts "Error while decrypting the PDF file: #{e.message}"
107
- exit(1)
108
+ raise
108
109
  end
109
- rescue HexaPDF::Error => e
110
- $stderr.puts "Error while processing the PDF file: #{e.message}"
111
- exit(1)
112
110
  end
113
111
 
114
112
  def output_line(header, text) #:nodoc:
@@ -94,12 +94,7 @@ module HexaPDF
94
94
  end
95
95
 
96
96
  def execute(file) #:nodoc:
97
- HexaPDF::Document.open(file, decryption_opts: {password: @password}) do |doc|
98
- send("do_#{@exec}", doc)
99
- end
100
- rescue HexaPDF::Error => e
101
- $stderr.puts "Error while processing the PDF file: #{e.message}"
102
- exit(1)
97
+ with_document(file, password: @password) {|doc| send("do_#{@exec}", doc)}
103
98
  end
104
99
 
105
100
  private
@@ -130,7 +125,9 @@ module HexaPDF
130
125
  def do_object(doc) #:nodoc:
131
126
  object = doc.object(pdf_reference_from_string(@param))
132
127
  return unless object
133
- $stderr.puts("Note: Object also has stream data") if object.data.stream
128
+ if object.data.stream && command_parser.verbosity_info?
129
+ $stderr.puts("Note: Object also has stream data")
130
+ end
134
131
  puts HexaPDF::Serializer.new.serialize(object.value)
135
132
  end
136
133
 
@@ -141,7 +138,7 @@ module HexaPDF
141
138
  while source.alive? && (data = source.resume)
142
139
  $stdout.write(data)
143
140
  end
144
- else
141
+ elsif command_parser.verbosity_info?
145
142
  $stderr.puts("Note: Object has no stream data")
146
143
  end
147
144
  end
@@ -105,8 +105,15 @@ module HexaPDF
105
105
  # Create PDF documents for each input file
106
106
  cache = {}
107
107
  @files.each do |spec|
108
- cache[spec.file] ||= HexaPDF::Document.new(io: File.open(spec.file),
109
- decryption_opts: {password: spec.password})
108
+ cache[spec.file] ||=
109
+ begin
110
+ io = if spec.file == output_file
111
+ StringIO.new(File.binread(spec.file))
112
+ else
113
+ File.open(spec.file)
114
+ end
115
+ HexaPDF::Document.new(io: io, **pdf_options(spec.password))
116
+ end
110
117
  spec.file = cache[spec.file]
111
118
  end
112
119
 
@@ -115,24 +122,13 @@ module HexaPDF
115
122
  page_tree = target.add(Type: :Pages)
116
123
  import_pages(page_tree)
117
124
  target.catalog[:Pages] = page_tree
118
-
119
- # Remove potentially imported but unused pages and page tree nodes
120
- retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true}
121
- retained[target.pages.root.data] = true
122
- target.each(current: false) do |obj|
123
- next unless obj.kind_of?(HexaPDF::Dictionary)
124
- if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
125
- target.delete(obj)
126
- end
127
- end
125
+ remove_unused_pages(target)
126
+ target.pages.add unless target.pages.count > 0
128
127
 
129
128
  apply_encryption_options(target)
130
129
  apply_optimization_options(target)
131
130
 
132
- target.write(output_file)
133
- rescue HexaPDF::Error => e
134
- $stderr.puts "Processing error : #{e.message}"
135
- exit(1)
131
+ write_document(target, output_file)
136
132
  end
137
133
 
138
134
  def usage #:nodoc:
@@ -148,10 +144,7 @@ module HexaPDF
148
144
  @files.each do |s|
149
145
  page_list = s.file.pages.to_a
150
146
  s.pages = parse_pages_specification(s.pages, s.file.pages.count)
151
- s.pages.each do |arr|
152
- arr[0] = page_list[arr[0]]
153
- arr[1] = arr[0].value[:Rotate] || :none unless arr[1]
154
- end
147
+ s.pages.each {|arr| arr[0] = page_list[arr[0]]}
155
148
  end
156
149
 
157
150
  if @interleave
@@ -77,16 +77,12 @@ module HexaPDF
77
77
 
78
78
  def execute(in_file, out_file) #:nodoc:
79
79
  maybe_raise_on_existing_file(out_file)
80
- HexaPDF::Document.open(in_file, decryption_opts: {password: @password}) do |doc|
80
+ with_document(in_file, password: @password, out_file: out_file) do |doc|
81
81
  arrange_pages(doc) unless @pages == '1-e'
82
82
  @embed_files.each {|file| doc.files.add(file, embed: true)}
83
83
  apply_encryption_options(doc)
84
84
  apply_optimization_options(doc)
85
- doc.write(out_file)
86
85
  end
87
- rescue HexaPDF::Error => e
88
- $stderr.puts "Processing error : #{e.message}"
89
- exit(1)
90
86
  end
91
87
 
92
88
  private
@@ -100,13 +96,14 @@ module HexaPDF
100
96
  page.value.update(page.copy_inherited_values)
101
97
  if rotation == :none
102
98
  page.delete(:Rotate)
103
- else
99
+ elsif rotation.kind_of?(Integer)
104
100
  page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
105
101
  end
106
102
  new_page_tree.add_page(page)
107
103
  end
108
- doc.delete(doc.catalog.delete(:Pages))
109
104
  doc.catalog[:Pages] = new_page_tree
105
+ remove_unused_pages(doc)
106
+ doc.pages.add unless doc.pages.count > 0
110
107
  end
111
108
 
112
109
  end
@@ -54,6 +54,7 @@ module HexaPDF
54
54
  @out_options.xref_streams = :generate
55
55
  @out_options.object_streams = :generate
56
56
  @out_options.streams = :compress
57
+ @out_options.optimize_fonts = true
57
58
 
58
59
  options.on("--password PASSWORD", "-p", String,
59
60
  "The password for decryption. Use - for reading from standard input.") do |pwd|
@@ -67,14 +68,10 @@ module HexaPDF
67
68
 
68
69
  def execute(in_file, out_file) #:nodoc:
69
70
  maybe_raise_on_existing_file(out_file)
70
- HexaPDF::Document.open(in_file, decryption_opts: {password: @password}) do |doc|
71
+ with_document(in_file, password: @password, out_file: out_file) do |doc|
71
72
  optimize_page_tree(doc)
72
73
  apply_optimization_options(doc)
73
- doc.write(out_file)
74
74
  end
75
- rescue HexaPDF::Error => e
76
- $stderr.puts "Processing error : #{e.message}"
77
- exit(1)
78
75
  end
79
76
 
80
77
  private
@@ -92,13 +92,13 @@ module HexaPDF
92
92
  end
93
93
 
94
94
  # :call-seq:
95
- # config.constantize(name, key = nil) -> constant or nil
95
+ # config.constantize(name, key = nil) -> constant
96
96
  # config.constantize(name, key = nil) {|name| block} -> obj
97
97
  #
98
98
  # Returns the constant the option +name+ is referring to. If +key+ is provided and the value
99
99
  # of the option +name+ responds to \#[], the constant to which +key+ refers is returned.
100
100
  #
101
- # If no constant can be found and no block is provided, +nil+ is returned. If a block is
101
+ # If no constant can be found and no block is provided, an error is raised. If a block is
102
102
  # provided it is called with the option name and its result will be returned.
103
103
  #
104
104
  # config.constantize('encryption.aes') #=> HexaPDF::Encryption::FastAES
@@ -107,7 +107,12 @@ module HexaPDF
107
107
  data = self[name]
108
108
  data = data[key] if key != :__unset && data.respond_to?(:[])
109
109
  (data = ::Object.const_get(data) rescue nil) if data.kind_of?(String)
110
- data = yield(name) if block_given? && data.nil?
110
+ if data.nil? && block_given?
111
+ data = yield(name)
112
+ elsif data.nil?
113
+ raise HexaPDF::Error, "Error getting constant for configuration option '#{name}'" <<
114
+ (key == :__unset ? "" : " and key '#{key}'")
115
+ end
111
116
  data
112
117
  end
113
118
 
@@ -160,6 +165,15 @@ module HexaPDF
160
165
  #
161
166
  # The default implementation raises an error.
162
167
  #
168
+ # font.on_missing_unicode_mapping::
169
+ # Callback hook when a character code point cannot be converted to a Unicode character.
170
+ #
171
+ # The value needs to be an object that responds to \#call(code, font_dict) where +code+ is the
172
+ # decoded code point and +font_dict+ is the font dictionary which was used for the conversion.
173
+ # The returned value is used as the Unicode character and should be a string.
174
+ #
175
+ # The default implementation raises an error.
176
+ #
163
177
  # font_loader::
164
178
  # An array with font loader implementations. When a font should be loaded, the array is
165
179
  # iterated in sequence and the first valid font returned by a font loader is used.
@@ -222,6 +236,10 @@ module HexaPDF
222
236
  'font.on_missing_glyph' => proc do |n, f|
223
237
  raise HexaPDF::Error, "No glyph for '#{n}' in font #{f.font_name} found"
224
238
  end,
239
+ 'font.on_missing_unicode_mapping' => proc do |code_point, font|
240
+ raise HexaPDF::Error, "No Unicode mapping for code point #{code_point} " \
241
+ "in font #{font[:BaseFont]}"
242
+ end,
225
243
  'font_loader' => [
226
244
  'HexaPDF::FontLoader::Standard14',
227
245
  'HexaPDF::FontLoader::FromConfiguration',
@@ -283,6 +301,13 @@ module HexaPDF
283
301
  # Specifies the compression level that should be used with the FlateDecode filter. The level
284
302
  # can range from 0 (no compression), 1 (best speed) to 9 (best compression, default).
285
303
  #
304
+ # filter.flate_memory::
305
+ # Specifies the memory level that should be used with the FlateDecode filter. The level can
306
+ # range from 1 (minimum memory usage; slow, reduces compression) to 9 (maximum memory usage).
307
+ #
308
+ # The HexaPDF default value of 6 has been found in tests to be nearly equivalent to the Zlib
309
+ # default of 8 in terms of speed and compression level but uses less memory.
310
+ #
286
311
  # filter.map::
287
312
  # A mapping from a PDF name (a Symbol) to a filter object (see Filter). If the value is a
288
313
  # String, it should contain the name of a constant that contains a filter object.
@@ -328,6 +353,7 @@ module HexaPDF
328
353
  'encryption.sub_filter_map' => {
329
354
  },
330
355
  'filter.flate_compression' => 9,
356
+ 'filter.flate_memory' => 6,
331
357
  'filter.map' => {
332
358
  ASCIIHexDecode: 'HexaPDF::Filter::ASCIIHexDecode',
333
359
  AHx: 'HexaPDF::Filter::ASCIIHexDecode',
@@ -381,8 +407,11 @@ module HexaPDF
381
407
  'object.subtype_map' => {
382
408
  Image: 'HexaPDF::Type::Image',
383
409
  Form: 'HexaPDF::Type::Form',
410
+ Type0: 'HexaPDF::Type::FontType0',
384
411
  Type1: 'HexaPDF::Type::FontType1',
385
412
  TrueType: 'HexaPDF::Type::FontTrueType',
413
+ CIDFontType0: 'HexaPDF::Type::CIDFont',
414
+ CIDFontType2: 'HexaPDF::Type::CIDFont',
386
415
  },
387
416
  'task.map' => {
388
417
  optimize: 'HexaPDF::Task::Optimize',
@@ -35,6 +35,7 @@ require 'hexapdf/content/graphics_state'
35
35
  require 'hexapdf/content/operator'
36
36
  require 'hexapdf/serializer'
37
37
  require 'hexapdf/utils/math_helpers'
38
+ require 'hexapdf/utils/graphics_helpers'
38
39
  require 'hexapdf/content/graphic_object'
39
40
  require 'hexapdf/stream'
40
41
 
@@ -132,6 +133,7 @@ module HexaPDF
132
133
  class Canvas
133
134
 
134
135
  include HexaPDF::Utils::MathHelpers
136
+ include HexaPDF::Utils::GraphicsHelpers
135
137
 
136
138
  # The context for which the canvas was created (a HexaPDF::Type::Page or HexaPDF::Type::Form
137
139
  # object).
@@ -1693,7 +1695,7 @@ module HexaPDF
1693
1695
  #
1694
1696
  # Low-level method for actually showing text on the canvas.
1695
1697
  #
1696
- # The argument +data+ needs to be a an array of glyph objects valid for the current font,
1698
+ # The argument +glyphs+ needs to be a an array of glyph objects valid for the current font,
1697
1699
  # optionally interspersed with numbers for kerning.
1698
1700
  #
1699
1701
  # Text is always shown at the current position of the text cursor, i.e. the origin of the text
@@ -1702,13 +1704,13 @@ module HexaPDF
1702
1704
  #
1703
1705
  # The text matrix is updated to correctly represent the graphics state after the invocation.
1704
1706
  #
1705
- # This method is usually not invoked directly but by higher level methods like #show_text.
1706
- def show_glyphs(data)
1707
+ # This method is usually not invoked directly but by higher level methods like #text.
1708
+ def show_glyphs(glyphs)
1707
1709
  begin_text
1708
1710
 
1709
1711
  result = [''.b]
1710
1712
  offset = 0
1711
- data.each do |item|
1713
+ glyphs.each do |item|
1712
1714
  if item.kind_of?(Numeric)
1713
1715
  result << item << ''.b
1714
1716
  offset -= item * graphics_state.scaled_font_size
@@ -1718,7 +1720,7 @@ module HexaPDF
1718
1720
 
1719
1721
  offset += item.width * graphics_state.scaled_font_size +
1720
1722
  graphics_state.scaled_character_spacing
1721
- offset += graphics_state.scaled_word_spacing if encoded.length == 1 && item.space?
1723
+ offset += graphics_state.scaled_word_spacing if encoded == " ".freeze
1722
1724
  end
1723
1725
  end
1724
1726
 
@@ -1727,6 +1729,124 @@ module HexaPDF
1727
1729
  self
1728
1730
  end
1729
1731
 
1732
+ # :call-seq:
1733
+ # canvas.show_glyphs_only(glyphs) -> canvas
1734
+ #
1735
+ # Same operation as with #show_glyphs but without updating the text matrix.
1736
+ #
1737
+ # This method should only be used by advanced text layouting algorithms which perform the
1738
+ # necessary calculations themselves!
1739
+ #
1740
+ # *Warning*: Since this method doesn't update the text matrix, all following results from
1741
+ # #text_cursor and other methods using the current text matrix are invalid until the next call
1742
+ # to #text_matrix or #end_text.
1743
+ def show_glyphs_only(glyphs)
1744
+ begin_text
1745
+
1746
+ result = [''.b]
1747
+ glyphs.each do |item|
1748
+ if item.kind_of?(Numeric)
1749
+ result << item << ''.b
1750
+ else
1751
+ result[-1] << @font.encode(item)
1752
+ end
1753
+ end
1754
+
1755
+ serialize1(:TJ, result)
1756
+ self
1757
+ end
1758
+
1759
+ # :call-seq:
1760
+ # canvas.marked_content_point(tag, property_list: nil) -> canvas
1761
+ #
1762
+ # Inserts a marked-content point, optionally associated with a property list.
1763
+ #
1764
+ # A marked-content point is used to identify a position in the content stream for later use by
1765
+ # other applications. The symbol +tag+ is used to uniquely identify the role of the
1766
+ # marked-content point and should be registered with ISO to avoid conflicts.
1767
+ #
1768
+ # The optional +property_list+ argument can either be a valid PDF dictionary or a symbol
1769
+ # referencing an already used property list in the resource dictionary's /Properties
1770
+ # dictionary.
1771
+ #
1772
+ # Examples:
1773
+ #
1774
+ # canvas.marked_content_point(:Divider)
1775
+ # canvas.marked_content_point(:Divider, property_list: {Key: 'value'})
1776
+ #
1777
+ # See: PDF1.7 s14.6
1778
+ def marked_content_point(tag, property_list: nil)
1779
+ raise_unless_at_page_description_level_or_in_text
1780
+ if property_list
1781
+ property_list = resources.property_list(property_list) if property_list.kind_of?(Symbol)
1782
+ invoke2(:DP, tag, resources.add_property_list(property_list))
1783
+ else
1784
+ invoke1(:MP, tag)
1785
+ end
1786
+ self
1787
+ end
1788
+
1789
+ # :call-seq:
1790
+ # canvas.marked_content_sequence(tag, property_list: nil) -> canvas
1791
+ # canvas.marked_content_sequence(tag, property_list: nil) { block } -> canvas
1792
+ #
1793
+ # Inserts a marked-content sequence, optionally associated with a property list.
1794
+ #
1795
+ # A marked-content sequence is used to identify a sequence of complete graphics objects in the
1796
+ # content stream for later use by other applications. The symbol +tag+ is used to uniquely
1797
+ # identify the role of the marked-content sequence and should be registered with ISO to avoid
1798
+ # conflicts.
1799
+ #
1800
+ # The optional +property_list+ argument can either be a valid PDF dictionary or a symbol
1801
+ # referencing an already used property list in the resource dictionary's /Properties
1802
+ # dictionary.
1803
+ #
1804
+ # If invoked without a block, a corresponding call to #end_marked_content_sequence must be
1805
+ # done. Otherwise the marked-content sequence automatically ends when the block is finished.
1806
+ #
1807
+ # Although the PDF specification would allow using marked-content sequences inside text
1808
+ # objects, this is prohibited.
1809
+ #
1810
+ # Examples:
1811
+ #
1812
+ # canvas.marked_content_sequence(:Divider)
1813
+ # # Other instructions
1814
+ # canvas.end_marked_content_sequence
1815
+ #
1816
+ # canvas.marked_content_sequence(:Divider, property_list: {Key: 'value'}) do
1817
+ # # Other instructions
1818
+ # end
1819
+ #
1820
+ # See: PDF1.7 s14.6, #end_marked_content_sequence
1821
+ def marked_content_sequence(tag, property_list: nil)
1822
+ raise_unless_at_page_description_level
1823
+ if property_list
1824
+ property_list = resources.property_list(property_list) if property_list.kind_of?(Symbol)
1825
+ invoke2(:BDC, tag, resources.add_property_list(property_list))
1826
+ else
1827
+ invoke1(:BMC, tag)
1828
+ end
1829
+ if block_given?
1830
+ yield
1831
+ end_marked_content_sequence
1832
+ end
1833
+ self
1834
+ end
1835
+
1836
+ # :call-seq:
1837
+ # canvas.end_marked_content_sequence -> canvas
1838
+ #
1839
+ # Ends a marked-content sequence.
1840
+ #
1841
+ # See #marked_content_sequence for details.
1842
+ #
1843
+ # See: PDF1.7 s14.6, #marked_content_sequence
1844
+ def end_marked_content_sequence
1845
+ raise_unless_at_page_description_level
1846
+ invoke0(:EMC)
1847
+ self
1848
+ end
1849
+
1730
1850
  private
1731
1851
 
1732
1852
  # Invokes the given operator with the operands and serializes it.
@@ -1752,6 +1872,11 @@ module HexaPDF
1752
1872
  @contents << @operators[operator].serialize(@serializer, op1)
1753
1873
  end
1754
1874
 
1875
+ # Optimized method for one operand.
1876
+ def serialize1(operator, op1)
1877
+ @contents << @operators[operator].serialize(@serializer, op1)
1878
+ end
1879
+
1755
1880
  # Optimized method for two operands.
1756
1881
  def invoke2(operator, op1, op2)
1757
1882
  @operators[operator].invoke(self, op1, op2)
@@ -1936,38 +2061,6 @@ module HexaPDF
1936
2061
  curve_to(p3[0], p3[1], p1: p1, p2: p2)
1937
2062
  end
1938
2063
 
1939
- # Given two points p0 = (x0, y0) and p1 = (x1, y1), returns the point on the line through
1940
- # these points that is +distance+ units away from p0.
1941
- #
1942
- # v = p1 - p0
1943
- # result = p0 + distance * v/norm(v)
1944
- def point_on_line(x0, y0, x1, y1, distance:)
1945
- norm = Math.sqrt((x1 - x0)**2 + (y1 - y0)**2)
1946
- [x0 + distance / norm * (x1 - x0), y0 + distance / norm * (y1 - y0)]
1947
- end
1948
-
1949
- # Calculates and returns the requested dimensions for the rectangular object with the given
1950
- # +width+ and +height+ based on the options.
1951
- #
1952
- # +rwidth+::
1953
- # The requested width. If +rheight+ is not specified, it is chosen so that the aspect
1954
- # ratio is maintained
1955
- #
1956
- # +rheight+::
1957
- # The requested height. If +rwidth+ is not specified, it is chosen so that the aspect
1958
- # ratio is maintained
1959
- def calculate_dimensions(width, height, rwidth: nil, rheight: nil)
1960
- if rwidth && rheight
1961
- [rwidth, rheight]
1962
- elsif rwidth
1963
- [rwidth, height * rwidth / width.to_f]
1964
- elsif rheight
1965
- [width * rheight / height.to_f, rheight]
1966
- else
1967
- [width, height]
1968
- end
1969
- end
1970
-
1971
2064
  end
1972
2065
 
1973
2066
  end