hexapdf 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +68 -0
  3. data/CONTRIBUTERS +1 -1
  4. data/README.md +35 -4
  5. data/Rakefile +1 -0
  6. data/VERSION +1 -1
  7. data/data/hexapdf/cmap/83pv-RKSJ-H +314 -0
  8. data/data/hexapdf/cmap/90ms-RKSJ-H +259 -0
  9. data/data/hexapdf/cmap/90ms-RKSJ-V +156 -0
  10. data/data/hexapdf/cmap/90msp-RKSJ-H +257 -0
  11. data/data/hexapdf/cmap/90msp-RKSJ-V +155 -0
  12. data/data/hexapdf/cmap/90pv-RKSJ-H +355 -0
  13. data/data/hexapdf/cmap/Add-RKSJ-H +738 -0
  14. data/data/hexapdf/cmap/Add-RKSJ-V +135 -0
  15. data/data/hexapdf/cmap/Adobe-CNS1-UCS2 +18209 -0
  16. data/data/hexapdf/cmap/Adobe-GB1-UCS2 +14267 -0
  17. data/data/hexapdf/cmap/Adobe-Japan1-UCS2 +19159 -0
  18. data/data/hexapdf/cmap/Adobe-Korea1-UCS2 +9267 -0
  19. data/data/hexapdf/cmap/B5pc-H +337 -0
  20. data/data/hexapdf/cmap/B5pc-V +90 -0
  21. data/data/hexapdf/cmap/CNS-EUC-H +490 -0
  22. data/data/hexapdf/cmap/CNS-EUC-V +538 -0
  23. data/data/hexapdf/cmap/ETen-B5-H +343 -0
  24. data/data/hexapdf/cmap/ETen-B5-V +91 -0
  25. data/data/hexapdf/cmap/ETenms-B5-H +79 -0
  26. data/data/hexapdf/cmap/ETenms-B5-V +99 -0
  27. data/data/hexapdf/cmap/EUC-H +207 -0
  28. data/data/hexapdf/cmap/EUC-V +105 -0
  29. data/data/hexapdf/cmap/Ext-RKSJ-H +768 -0
  30. data/data/hexapdf/cmap/Ext-RKSJ-V +117 -0
  31. data/data/hexapdf/cmap/GB-EUC-H +173 -0
  32. data/data/hexapdf/cmap/GB-EUC-V +98 -0
  33. data/data/hexapdf/cmap/GBK-EUC-H +4273 -0
  34. data/data/hexapdf/cmap/GBK-EUC-V +97 -0
  35. data/data/hexapdf/cmap/GBK2K-H +5325 -0
  36. data/data/hexapdf/cmap/GBK2K-V +118 -0
  37. data/data/hexapdf/cmap/GBKp-EUC-H +4272 -0
  38. data/data/hexapdf/cmap/GBKp-EUC-V +97 -0
  39. data/data/hexapdf/cmap/GBpc-EUC-H +175 -0
  40. data/data/hexapdf/cmap/GBpc-EUC-V +98 -0
  41. data/data/hexapdf/cmap/H +200 -0
  42. data/data/hexapdf/cmap/HKscs-B5-H +1331 -0
  43. data/data/hexapdf/cmap/HKscs-B5-V +90 -0
  44. data/data/hexapdf/cmap/Identity-H +339 -0
  45. data/data/hexapdf/cmap/Identity-V +73 -0
  46. data/data/hexapdf/cmap/KSC-EUC-H +562 -0
  47. data/data/hexapdf/cmap/KSC-EUC-V +94 -0
  48. data/data/hexapdf/cmap/KSCms-UHC-H +776 -0
  49. data/data/hexapdf/cmap/KSCms-UHC-HW-H +775 -0
  50. data/data/hexapdf/cmap/KSCms-UHC-HW-V +93 -0
  51. data/data/hexapdf/cmap/KSCms-UHC-V +94 -0
  52. data/data/hexapdf/cmap/KSCpc-EUC-H +608 -0
  53. data/data/hexapdf/cmap/LICENSE.txt +26 -0
  54. data/data/hexapdf/cmap/README.txt +9 -0
  55. data/data/hexapdf/cmap/UniCNS-UCS2-H +16992 -0
  56. data/data/hexapdf/cmap/UniCNS-UCS2-V +90 -0
  57. data/data/hexapdf/cmap/UniCNS-UTF16-H +19117 -0
  58. data/data/hexapdf/cmap/UniCNS-UTF16-V +94 -0
  59. data/data/hexapdf/cmap/UniGB-UCS2-H +14321 -0
  60. data/data/hexapdf/cmap/UniGB-UCS2-V +101 -0
  61. data/data/hexapdf/cmap/UniGB-UTF16-H +14381 -0
  62. data/data/hexapdf/cmap/UniGB-UTF16-V +104 -0
  63. data/data/hexapdf/cmap/UniJIS-UCS2-H +8870 -0
  64. data/data/hexapdf/cmap/UniJIS-UCS2-HW-H +81 -0
  65. data/data/hexapdf/cmap/UniJIS-UCS2-HW-V +279 -0
  66. data/data/hexapdf/cmap/UniJIS-UCS2-V +275 -0
  67. data/data/hexapdf/cmap/UniJIS-UTF16-H +14450 -0
  68. data/data/hexapdf/cmap/UniJIS-UTF16-V +299 -0
  69. data/data/hexapdf/cmap/UniKS-UCS2-H +8725 -0
  70. data/data/hexapdf/cmap/UniKS-UCS2-V +95 -0
  71. data/data/hexapdf/cmap/UniKS-UTF16-H +8895 -0
  72. data/data/hexapdf/cmap/UniKS-UTF16-V +99 -0
  73. data/data/hexapdf/cmap/V +105 -0
  74. data/examples/arc.rb +3 -3
  75. data/examples/merging.rb +4 -1
  76. data/examples/optimizing.rb +3 -0
  77. data/examples/show_char_bboxes.rb +2 -2
  78. data/examples/truetype.rb +2 -2
  79. data/lib/hexapdf/cli.rb +40 -1
  80. data/lib/hexapdf/cli/batch.rb +72 -0
  81. data/lib/hexapdf/cli/command.rb +112 -15
  82. data/lib/hexapdf/cli/files.rb +2 -2
  83. data/lib/hexapdf/cli/images.rb +14 -6
  84. data/lib/hexapdf/cli/info.rb +6 -8
  85. data/lib/hexapdf/cli/inspect.rb +5 -8
  86. data/lib/hexapdf/cli/merge.rb +13 -20
  87. data/lib/hexapdf/cli/modify.rb +4 -7
  88. data/lib/hexapdf/cli/optimize.rb +2 -5
  89. data/lib/hexapdf/configuration.rb +32 -3
  90. data/lib/hexapdf/content/canvas.rb +130 -37
  91. data/lib/hexapdf/content/parser.rb +40 -6
  92. data/lib/hexapdf/content/processor.rb +4 -4
  93. data/lib/hexapdf/document.rb +40 -10
  94. data/lib/hexapdf/document/fonts.rb +1 -0
  95. data/lib/hexapdf/encryption/security_handler.rb +8 -12
  96. data/lib/hexapdf/filter/flate_decode.rb +25 -2
  97. data/lib/hexapdf/font/cmap.rb +124 -8
  98. data/lib/hexapdf/font/cmap/parser.rb +65 -15
  99. data/lib/hexapdf/font/encoding/base.rb +2 -2
  100. data/lib/hexapdf/font/encoding/glyph_list.rb +2 -4
  101. data/lib/hexapdf/font/true_type.rb +1 -0
  102. data/lib/hexapdf/font/true_type/builder.rb +75 -0
  103. data/lib/hexapdf/font/true_type/optimizer.rb +65 -0
  104. data/lib/hexapdf/font/true_type/subsetter.rb +9 -22
  105. data/lib/hexapdf/font/true_type_wrapper.rb +9 -21
  106. data/lib/hexapdf/font_loader.rb +1 -1
  107. data/lib/hexapdf/importer.rb +1 -1
  108. data/lib/hexapdf/serializer.rb +5 -3
  109. data/lib/hexapdf/type.rb +2 -0
  110. data/lib/hexapdf/type/cid_font.rb +120 -0
  111. data/lib/hexapdf/type/font.rb +32 -12
  112. data/lib/hexapdf/type/font_simple.rb +34 -42
  113. data/lib/hexapdf/type/font_type0.rb +148 -0
  114. data/lib/hexapdf/type/form.rb +4 -4
  115. data/lib/hexapdf/type/page.rb +12 -11
  116. data/lib/hexapdf/type/resources.rb +14 -0
  117. data/lib/hexapdf/utils/graphics_helpers.rb +77 -0
  118. data/lib/hexapdf/version.rb +1 -1
  119. data/man/man1/hexapdf.1 +43 -1
  120. data/test/hexapdf/content/test_canvas.rb +76 -0
  121. data/test/hexapdf/content/test_parser.rb +20 -1
  122. data/test/hexapdf/content/test_processor.rb +11 -7
  123. data/test/hexapdf/document/test_fonts.rb +3 -1
  124. data/test/hexapdf/font/cmap/test_parser.rb +42 -7
  125. data/test/hexapdf/font/encoding/test_base.rb +1 -1
  126. data/test/hexapdf/font/encoding/test_glyph_list.rb +3 -3
  127. data/test/hexapdf/font/test_cmap.rb +104 -0
  128. data/test/hexapdf/font/test_true_type_wrapper.rb +63 -46
  129. data/test/hexapdf/font/true_type/test_builder.rb +37 -0
  130. data/test/hexapdf/font/true_type/test_optimizer.rb +27 -0
  131. data/test/hexapdf/font/true_type/test_subsetter.rb +6 -13
  132. data/test/hexapdf/test_configuration.rb +12 -7
  133. data/test/hexapdf/test_document.rb +24 -0
  134. data/test/hexapdf/test_importer.rb +9 -1
  135. data/test/hexapdf/test_writer.rb +2 -2
  136. data/test/hexapdf/type/test_cid_font.rb +61 -0
  137. data/test/hexapdf/type/test_font.rb +31 -4
  138. data/test/hexapdf/type/test_font_simple.rb +6 -21
  139. data/test/hexapdf/type/test_font_type0.rb +114 -0
  140. data/test/hexapdf/type/test_resources.rb +17 -1
  141. data/test/hexapdf/utils/test_graphics_helpers.rb +29 -0
  142. metadata +82 -3
@@ -69,7 +69,7 @@ module HexaPDF
69
69
  end
70
70
 
71
71
  def execute(pdf) #:nodoc:
72
- HexaPDF::Document.open(pdf, decryption_opts: {password: @password}) do |doc|
72
+ with_document(pdf, password: @password) do |doc|
73
73
  if @indices.empty?
74
74
  list_files(doc)
75
75
  else
@@ -103,7 +103,7 @@ module HexaPDF
103
103
  each_file(doc) do |obj, index|
104
104
  next unless @indices.include?(index + 1) || @indices.include?(0)
105
105
  maybe_raise_on_existing_file(obj.path)
106
- puts "Extracting #{obj.path}..."
106
+ puts "Extracting #{obj.path}..." if command_parser.verbosity_info?
107
107
  File.open(obj.path, 'wb') do |file|
108
108
  fiber = obj.embedded_file_stream.stream_decoder
109
109
  while fiber.alive? && (data = fiber.resume)
@@ -31,6 +31,7 @@
31
31
  # is created or manipulated using HexaPDF.
32
32
  #++
33
33
 
34
+ require 'set'
34
35
  require 'hexapdf/cli/command'
35
36
 
36
37
  module HexaPDF
@@ -77,7 +78,7 @@ module HexaPDF
77
78
  end
78
79
 
79
80
  def execute(pdf) #:nodoc:
80
- HexaPDF::Document.open(pdf, decryption_opts: {password: @password}) do |doc|
81
+ with_document(pdf, password: @password) do |doc|
81
82
  if @indices.empty?
82
83
  list_images(doc)
83
84
  else
@@ -106,12 +107,19 @@ module HexaPDF
106
107
 
107
108
  # Extracts the images with the given indices.
108
109
  def extract_images(doc)
110
+ done = Set.new
109
111
  each_image(doc) do |image, index, _|
110
- next unless @indices.include?(index) || @indices.include?(0)
111
- path = "#{@prefix}-#{index}.#{image.info.extension}"
112
- maybe_raise_on_existing_file(path)
113
- puts "Extracting #{path}..."
114
- image.write(path)
112
+ next unless (@indices.include?(index) || @indices.include?(0)) && !done.include?(index)
113
+ info = image.info
114
+ if info.writable
115
+ path = "#{@prefix}-#{index}.#{image.info.extension}"
116
+ maybe_raise_on_existing_file(path)
117
+ puts "Extracting #{path}..." if command_parser.verbosity_info?
118
+ image.write(path)
119
+ done << index
120
+ elsif command_parser.verbosity_warning?
121
+ $stderr.puts "Warning (image #{index}): PDF image format not supported for writing"
122
+ end
115
123
  end
116
124
  end
117
125
 
@@ -73,9 +73,11 @@ module HexaPDF
73
73
  COLUMN_WIDTH = 20 #:nodoc:
74
74
 
75
75
  def output_info(file) # :nodoc:
76
- options = {decryption_opts: {password: @password},
77
- config: {'document.auto_decrypt' => @auto_decrypt}}
76
+ options = pdf_options(@password)
77
+ options[:config]['document.auto_decrypt'] = @auto_decrypt
78
78
  HexaPDF::Document.open(file, options) do |doc|
79
+ output_line("File name", file)
80
+ output_line("File size", File.stat(file).size.to_s + " bytes")
79
81
  INFO_KEYS.each do |name|
80
82
  next unless doc.trailer.info.key?(name)
81
83
  output_line(name.to_s, doc.trailer.info[name].to_s)
@@ -98,17 +100,13 @@ module HexaPDF
98
100
  output_line("Pages", doc.pages.count.to_s)
99
101
  output_line("Version", doc.version)
100
102
  end
101
- rescue HexaPDF::EncryptionError => e
103
+ rescue HexaPDF::EncryptionError
102
104
  if @auto_decrypt
103
105
  @auto_decrypt = false
104
106
  retry
105
107
  else
106
- $stderr.puts "Error while decrypting the PDF file: #{e.message}"
107
- exit(1)
108
+ raise
108
109
  end
109
- rescue HexaPDF::Error => e
110
- $stderr.puts "Error while processing the PDF file: #{e.message}"
111
- exit(1)
112
110
  end
113
111
 
114
112
  def output_line(header, text) #:nodoc:
@@ -94,12 +94,7 @@ module HexaPDF
94
94
  end
95
95
 
96
96
  def execute(file) #:nodoc:
97
- HexaPDF::Document.open(file, decryption_opts: {password: @password}) do |doc|
98
- send("do_#{@exec}", doc)
99
- end
100
- rescue HexaPDF::Error => e
101
- $stderr.puts "Error while processing the PDF file: #{e.message}"
102
- exit(1)
97
+ with_document(file, password: @password) {|doc| send("do_#{@exec}", doc)}
103
98
  end
104
99
 
105
100
  private
@@ -130,7 +125,9 @@ module HexaPDF
130
125
  def do_object(doc) #:nodoc:
131
126
  object = doc.object(pdf_reference_from_string(@param))
132
127
  return unless object
133
- $stderr.puts("Note: Object also has stream data") if object.data.stream
128
+ if object.data.stream && command_parser.verbosity_info?
129
+ $stderr.puts("Note: Object also has stream data")
130
+ end
134
131
  puts HexaPDF::Serializer.new.serialize(object.value)
135
132
  end
136
133
 
@@ -141,7 +138,7 @@ module HexaPDF
141
138
  while source.alive? && (data = source.resume)
142
139
  $stdout.write(data)
143
140
  end
144
- else
141
+ elsif command_parser.verbosity_info?
145
142
  $stderr.puts("Note: Object has no stream data")
146
143
  end
147
144
  end
@@ -105,8 +105,15 @@ module HexaPDF
105
105
  # Create PDF documents for each input file
106
106
  cache = {}
107
107
  @files.each do |spec|
108
- cache[spec.file] ||= HexaPDF::Document.new(io: File.open(spec.file),
109
- decryption_opts: {password: spec.password})
108
+ cache[spec.file] ||=
109
+ begin
110
+ io = if spec.file == output_file
111
+ StringIO.new(File.binread(spec.file))
112
+ else
113
+ File.open(spec.file)
114
+ end
115
+ HexaPDF::Document.new(io: io, **pdf_options(spec.password))
116
+ end
110
117
  spec.file = cache[spec.file]
111
118
  end
112
119
 
@@ -115,24 +122,13 @@ module HexaPDF
115
122
  page_tree = target.add(Type: :Pages)
116
123
  import_pages(page_tree)
117
124
  target.catalog[:Pages] = page_tree
118
-
119
- # Remove potentially imported but unused pages and page tree nodes
120
- retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true}
121
- retained[target.pages.root.data] = true
122
- target.each(current: false) do |obj|
123
- next unless obj.kind_of?(HexaPDF::Dictionary)
124
- if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
125
- target.delete(obj)
126
- end
127
- end
125
+ remove_unused_pages(target)
126
+ target.pages.add unless target.pages.count > 0
128
127
 
129
128
  apply_encryption_options(target)
130
129
  apply_optimization_options(target)
131
130
 
132
- target.write(output_file)
133
- rescue HexaPDF::Error => e
134
- $stderr.puts "Processing error : #{e.message}"
135
- exit(1)
131
+ write_document(target, output_file)
136
132
  end
137
133
 
138
134
  def usage #:nodoc:
@@ -148,10 +144,7 @@ module HexaPDF
148
144
  @files.each do |s|
149
145
  page_list = s.file.pages.to_a
150
146
  s.pages = parse_pages_specification(s.pages, s.file.pages.count)
151
- s.pages.each do |arr|
152
- arr[0] = page_list[arr[0]]
153
- arr[1] = arr[0].value[:Rotate] || :none unless arr[1]
154
- end
147
+ s.pages.each {|arr| arr[0] = page_list[arr[0]]}
155
148
  end
156
149
 
157
150
  if @interleave
@@ -77,16 +77,12 @@ module HexaPDF
77
77
 
78
78
  def execute(in_file, out_file) #:nodoc:
79
79
  maybe_raise_on_existing_file(out_file)
80
- HexaPDF::Document.open(in_file, decryption_opts: {password: @password}) do |doc|
80
+ with_document(in_file, password: @password, out_file: out_file) do |doc|
81
81
  arrange_pages(doc) unless @pages == '1-e'
82
82
  @embed_files.each {|file| doc.files.add(file, embed: true)}
83
83
  apply_encryption_options(doc)
84
84
  apply_optimization_options(doc)
85
- doc.write(out_file)
86
85
  end
87
- rescue HexaPDF::Error => e
88
- $stderr.puts "Processing error : #{e.message}"
89
- exit(1)
90
86
  end
91
87
 
92
88
  private
@@ -100,13 +96,14 @@ module HexaPDF
100
96
  page.value.update(page.copy_inherited_values)
101
97
  if rotation == :none
102
98
  page.delete(:Rotate)
103
- else
99
+ elsif rotation.kind_of?(Integer)
104
100
  page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
105
101
  end
106
102
  new_page_tree.add_page(page)
107
103
  end
108
- doc.delete(doc.catalog.delete(:Pages))
109
104
  doc.catalog[:Pages] = new_page_tree
105
+ remove_unused_pages(doc)
106
+ doc.pages.add unless doc.pages.count > 0
110
107
  end
111
108
 
112
109
  end
@@ -54,6 +54,7 @@ module HexaPDF
54
54
  @out_options.xref_streams = :generate
55
55
  @out_options.object_streams = :generate
56
56
  @out_options.streams = :compress
57
+ @out_options.optimize_fonts = true
57
58
 
58
59
  options.on("--password PASSWORD", "-p", String,
59
60
  "The password for decryption. Use - for reading from standard input.") do |pwd|
@@ -67,14 +68,10 @@ module HexaPDF
67
68
 
68
69
  def execute(in_file, out_file) #:nodoc:
69
70
  maybe_raise_on_existing_file(out_file)
70
- HexaPDF::Document.open(in_file, decryption_opts: {password: @password}) do |doc|
71
+ with_document(in_file, password: @password, out_file: out_file) do |doc|
71
72
  optimize_page_tree(doc)
72
73
  apply_optimization_options(doc)
73
- doc.write(out_file)
74
74
  end
75
- rescue HexaPDF::Error => e
76
- $stderr.puts "Processing error : #{e.message}"
77
- exit(1)
78
75
  end
79
76
 
80
77
  private
@@ -92,13 +92,13 @@ module HexaPDF
92
92
  end
93
93
 
94
94
  # :call-seq:
95
- # config.constantize(name, key = nil) -> constant or nil
95
+ # config.constantize(name, key = nil) -> constant
96
96
  # config.constantize(name, key = nil) {|name| block} -> obj
97
97
  #
98
98
  # Returns the constant the option +name+ is referring to. If +key+ is provided and the value
99
99
  # of the option +name+ responds to \#[], the constant to which +key+ refers is returned.
100
100
  #
101
- # If no constant can be found and no block is provided, +nil+ is returned. If a block is
101
+ # If no constant can be found and no block is provided, an error is raised. If a block is
102
102
  # provided it is called with the option name and its result will be returned.
103
103
  #
104
104
  # config.constantize('encryption.aes') #=> HexaPDF::Encryption::FastAES
@@ -107,7 +107,12 @@ module HexaPDF
107
107
  data = self[name]
108
108
  data = data[key] if key != :__unset && data.respond_to?(:[])
109
109
  (data = ::Object.const_get(data) rescue nil) if data.kind_of?(String)
110
- data = yield(name) if block_given? && data.nil?
110
+ if data.nil? && block_given?
111
+ data = yield(name)
112
+ elsif data.nil?
113
+ raise HexaPDF::Error, "Error getting constant for configuration option '#{name}'" <<
114
+ (key == :__unset ? "" : " and key '#{key}'")
115
+ end
111
116
  data
112
117
  end
113
118
 
@@ -160,6 +165,15 @@ module HexaPDF
160
165
  #
161
166
  # The default implementation raises an error.
162
167
  #
168
+ # font.on_missing_unicode_mapping::
169
+ # Callback hook when a character code point cannot be converted to a Unicode character.
170
+ #
171
+ # The value needs to be an object that responds to \#call(code, font_dict) where +code+ is the
172
+ # decoded code point and +font_dict+ is the font dictionary which was used for the conversion.
173
+ # The returned value is used as the Unicode character and should be a string.
174
+ #
175
+ # The default implementation raises an error.
176
+ #
163
177
  # font_loader::
164
178
  # An array with font loader implementations. When a font should be loaded, the array is
165
179
  # iterated in sequence and the first valid font returned by a font loader is used.
@@ -222,6 +236,10 @@ module HexaPDF
222
236
  'font.on_missing_glyph' => proc do |n, f|
223
237
  raise HexaPDF::Error, "No glyph for '#{n}' in font #{f.font_name} found"
224
238
  end,
239
+ 'font.on_missing_unicode_mapping' => proc do |code_point, font|
240
+ raise HexaPDF::Error, "No Unicode mapping for code point #{code_point} " \
241
+ "in font #{font[:BaseFont]}"
242
+ end,
225
243
  'font_loader' => [
226
244
  'HexaPDF::FontLoader::Standard14',
227
245
  'HexaPDF::FontLoader::FromConfiguration',
@@ -283,6 +301,13 @@ module HexaPDF
283
301
  # Specifies the compression level that should be used with the FlateDecode filter. The level
284
302
  # can range from 0 (no compression), 1 (best speed) to 9 (best compression, default).
285
303
  #
304
+ # filter.flate_memory::
305
+ # Specifies the memory level that should be used with the FlateDecode filter. The level can
306
+ # range from 1 (minimum memory usage; slow, reduces compression) to 9 (maximum memory usage).
307
+ #
308
+ # The HexaPDF default value of 6 has been found in tests to be nearly equivalent to the Zlib
309
+ # default of 8 in terms of speed and compression level but uses less memory.
310
+ #
286
311
  # filter.map::
287
312
  # A mapping from a PDF name (a Symbol) to a filter object (see Filter). If the value is a
288
313
  # String, it should contain the name of a constant that contains a filter object.
@@ -328,6 +353,7 @@ module HexaPDF
328
353
  'encryption.sub_filter_map' => {
329
354
  },
330
355
  'filter.flate_compression' => 9,
356
+ 'filter.flate_memory' => 6,
331
357
  'filter.map' => {
332
358
  ASCIIHexDecode: 'HexaPDF::Filter::ASCIIHexDecode',
333
359
  AHx: 'HexaPDF::Filter::ASCIIHexDecode',
@@ -381,8 +407,11 @@ module HexaPDF
381
407
  'object.subtype_map' => {
382
408
  Image: 'HexaPDF::Type::Image',
383
409
  Form: 'HexaPDF::Type::Form',
410
+ Type0: 'HexaPDF::Type::FontType0',
384
411
  Type1: 'HexaPDF::Type::FontType1',
385
412
  TrueType: 'HexaPDF::Type::FontTrueType',
413
+ CIDFontType0: 'HexaPDF::Type::CIDFont',
414
+ CIDFontType2: 'HexaPDF::Type::CIDFont',
386
415
  },
387
416
  'task.map' => {
388
417
  optimize: 'HexaPDF::Task::Optimize',
@@ -35,6 +35,7 @@ require 'hexapdf/content/graphics_state'
35
35
  require 'hexapdf/content/operator'
36
36
  require 'hexapdf/serializer'
37
37
  require 'hexapdf/utils/math_helpers'
38
+ require 'hexapdf/utils/graphics_helpers'
38
39
  require 'hexapdf/content/graphic_object'
39
40
  require 'hexapdf/stream'
40
41
 
@@ -132,6 +133,7 @@ module HexaPDF
132
133
  class Canvas
133
134
 
134
135
  include HexaPDF::Utils::MathHelpers
136
+ include HexaPDF::Utils::GraphicsHelpers
135
137
 
136
138
  # The context for which the canvas was created (a HexaPDF::Type::Page or HexaPDF::Type::Form
137
139
  # object).
@@ -1693,7 +1695,7 @@ module HexaPDF
1693
1695
  #
1694
1696
  # Low-level method for actually showing text on the canvas.
1695
1697
  #
1696
- # The argument +data+ needs to be a an array of glyph objects valid for the current font,
1698
+ # The argument +glyphs+ needs to be a an array of glyph objects valid for the current font,
1697
1699
  # optionally interspersed with numbers for kerning.
1698
1700
  #
1699
1701
  # Text is always shown at the current position of the text cursor, i.e. the origin of the text
@@ -1702,13 +1704,13 @@ module HexaPDF
1702
1704
  #
1703
1705
  # The text matrix is updated to correctly represent the graphics state after the invocation.
1704
1706
  #
1705
- # This method is usually not invoked directly but by higher level methods like #show_text.
1706
- def show_glyphs(data)
1707
+ # This method is usually not invoked directly but by higher level methods like #text.
1708
+ def show_glyphs(glyphs)
1707
1709
  begin_text
1708
1710
 
1709
1711
  result = [''.b]
1710
1712
  offset = 0
1711
- data.each do |item|
1713
+ glyphs.each do |item|
1712
1714
  if item.kind_of?(Numeric)
1713
1715
  result << item << ''.b
1714
1716
  offset -= item * graphics_state.scaled_font_size
@@ -1718,7 +1720,7 @@ module HexaPDF
1718
1720
 
1719
1721
  offset += item.width * graphics_state.scaled_font_size +
1720
1722
  graphics_state.scaled_character_spacing
1721
- offset += graphics_state.scaled_word_spacing if encoded.length == 1 && item.space?
1723
+ offset += graphics_state.scaled_word_spacing if encoded == " ".freeze
1722
1724
  end
1723
1725
  end
1724
1726
 
@@ -1727,6 +1729,124 @@ module HexaPDF
1727
1729
  self
1728
1730
  end
1729
1731
 
1732
+ # :call-seq:
1733
+ # canvas.show_glyphs_only(glyphs) -> canvas
1734
+ #
1735
+ # Same operation as with #show_glyphs but without updating the text matrix.
1736
+ #
1737
+ # This method should only be used by advanced text layouting algorithms which perform the
1738
+ # necessary calculations themselves!
1739
+ #
1740
+ # *Warning*: Since this method doesn't update the text matrix, all following results from
1741
+ # #text_cursor and other methods using the current text matrix are invalid until the next call
1742
+ # to #text_matrix or #end_text.
1743
+ def show_glyphs_only(glyphs)
1744
+ begin_text
1745
+
1746
+ result = [''.b]
1747
+ glyphs.each do |item|
1748
+ if item.kind_of?(Numeric)
1749
+ result << item << ''.b
1750
+ else
1751
+ result[-1] << @font.encode(item)
1752
+ end
1753
+ end
1754
+
1755
+ serialize1(:TJ, result)
1756
+ self
1757
+ end
1758
+
1759
+ # :call-seq:
1760
+ # canvas.marked_content_point(tag, property_list: nil) -> canvas
1761
+ #
1762
+ # Inserts a marked-content point, optionally associated with a property list.
1763
+ #
1764
+ # A marked-content point is used to identify a position in the content stream for later use by
1765
+ # other applications. The symbol +tag+ is used to uniquely identify the role of the
1766
+ # marked-content point and should be registered with ISO to avoid conflicts.
1767
+ #
1768
+ # The optional +property_list+ argument can either be a valid PDF dictionary or a symbol
1769
+ # referencing an already used property list in the resource dictionary's /Properties
1770
+ # dictionary.
1771
+ #
1772
+ # Examples:
1773
+ #
1774
+ # canvas.marked_content_point(:Divider)
1775
+ # canvas.marked_content_point(:Divider, property_list: {Key: 'value'})
1776
+ #
1777
+ # See: PDF1.7 s14.6
1778
+ def marked_content_point(tag, property_list: nil)
1779
+ raise_unless_at_page_description_level_or_in_text
1780
+ if property_list
1781
+ property_list = resources.property_list(property_list) if property_list.kind_of?(Symbol)
1782
+ invoke2(:DP, tag, resources.add_property_list(property_list))
1783
+ else
1784
+ invoke1(:MP, tag)
1785
+ end
1786
+ self
1787
+ end
1788
+
1789
+ # :call-seq:
1790
+ # canvas.marked_content_sequence(tag, property_list: nil) -> canvas
1791
+ # canvas.marked_content_sequence(tag, property_list: nil) { block } -> canvas
1792
+ #
1793
+ # Inserts a marked-content sequence, optionally associated with a property list.
1794
+ #
1795
+ # A marked-content sequence is used to identify a sequence of complete graphics objects in the
1796
+ # content stream for later use by other applications. The symbol +tag+ is used to uniquely
1797
+ # identify the role of the marked-content sequence and should be registered with ISO to avoid
1798
+ # conflicts.
1799
+ #
1800
+ # The optional +property_list+ argument can either be a valid PDF dictionary or a symbol
1801
+ # referencing an already used property list in the resource dictionary's /Properties
1802
+ # dictionary.
1803
+ #
1804
+ # If invoked without a block, a corresponding call to #end_marked_content_sequence must be
1805
+ # done. Otherwise the marked-content sequence automatically ends when the block is finished.
1806
+ #
1807
+ # Although the PDF specification would allow using marked-content sequences inside text
1808
+ # objects, this is prohibited.
1809
+ #
1810
+ # Examples:
1811
+ #
1812
+ # canvas.marked_content_sequence(:Divider)
1813
+ # # Other instructions
1814
+ # canvas.end_marked_content_sequence
1815
+ #
1816
+ # canvas.marked_content_sequence(:Divider, property_list: {Key: 'value'}) do
1817
+ # # Other instructions
1818
+ # end
1819
+ #
1820
+ # See: PDF1.7 s14.6, #end_marked_content_sequence
1821
+ def marked_content_sequence(tag, property_list: nil)
1822
+ raise_unless_at_page_description_level
1823
+ if property_list
1824
+ property_list = resources.property_list(property_list) if property_list.kind_of?(Symbol)
1825
+ invoke2(:BDC, tag, resources.add_property_list(property_list))
1826
+ else
1827
+ invoke1(:BMC, tag)
1828
+ end
1829
+ if block_given?
1830
+ yield
1831
+ end_marked_content_sequence
1832
+ end
1833
+ self
1834
+ end
1835
+
1836
+ # :call-seq:
1837
+ # canvas.end_marked_content_sequence -> canvas
1838
+ #
1839
+ # Ends a marked-content sequence.
1840
+ #
1841
+ # See #marked_content_sequence for details.
1842
+ #
1843
+ # See: PDF1.7 s14.6, #marked_content_sequence
1844
+ def end_marked_content_sequence
1845
+ raise_unless_at_page_description_level
1846
+ invoke0(:EMC)
1847
+ self
1848
+ end
1849
+
1730
1850
  private
1731
1851
 
1732
1852
  # Invokes the given operator with the operands and serializes it.
@@ -1752,6 +1872,11 @@ module HexaPDF
1752
1872
  @contents << @operators[operator].serialize(@serializer, op1)
1753
1873
  end
1754
1874
 
1875
+ # Optimized method for one operand.
1876
+ def serialize1(operator, op1)
1877
+ @contents << @operators[operator].serialize(@serializer, op1)
1878
+ end
1879
+
1755
1880
  # Optimized method for two operands.
1756
1881
  def invoke2(operator, op1, op2)
1757
1882
  @operators[operator].invoke(self, op1, op2)
@@ -1936,38 +2061,6 @@ module HexaPDF
1936
2061
  curve_to(p3[0], p3[1], p1: p1, p2: p2)
1937
2062
  end
1938
2063
 
1939
- # Given two points p0 = (x0, y0) and p1 = (x1, y1), returns the point on the line through
1940
- # these points that is +distance+ units away from p0.
1941
- #
1942
- # v = p1 - p0
1943
- # result = p0 + distance * v/norm(v)
1944
- def point_on_line(x0, y0, x1, y1, distance:)
1945
- norm = Math.sqrt((x1 - x0)**2 + (y1 - y0)**2)
1946
- [x0 + distance / norm * (x1 - x0), y0 + distance / norm * (y1 - y0)]
1947
- end
1948
-
1949
- # Calculates and returns the requested dimensions for the rectangular object with the given
1950
- # +width+ and +height+ based on the options.
1951
- #
1952
- # +rwidth+::
1953
- # The requested width. If +rheight+ is not specified, it is chosen so that the aspect
1954
- # ratio is maintained
1955
- #
1956
- # +rheight+::
1957
- # The requested height. If +rwidth+ is not specified, it is chosen so that the aspect
1958
- # ratio is maintained
1959
- def calculate_dimensions(width, height, rwidth: nil, rheight: nil)
1960
- if rwidth && rheight
1961
- [rwidth, rheight]
1962
- elsif rwidth
1963
- [rwidth, height * rwidth / width.to_f]
1964
- elsif rheight
1965
- [width * rheight / height.to_f, rheight]
1966
- else
1967
- [width, height]
1968
- end
1969
- end
1970
-
1971
2064
  end
1972
2065
 
1973
2066
  end