hexapdf 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +68 -0
- data/CONTRIBUTERS +1 -1
- data/README.md +35 -4
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/data/hexapdf/cmap/83pv-RKSJ-H +314 -0
- data/data/hexapdf/cmap/90ms-RKSJ-H +259 -0
- data/data/hexapdf/cmap/90ms-RKSJ-V +156 -0
- data/data/hexapdf/cmap/90msp-RKSJ-H +257 -0
- data/data/hexapdf/cmap/90msp-RKSJ-V +155 -0
- data/data/hexapdf/cmap/90pv-RKSJ-H +355 -0
- data/data/hexapdf/cmap/Add-RKSJ-H +738 -0
- data/data/hexapdf/cmap/Add-RKSJ-V +135 -0
- data/data/hexapdf/cmap/Adobe-CNS1-UCS2 +18209 -0
- data/data/hexapdf/cmap/Adobe-GB1-UCS2 +14267 -0
- data/data/hexapdf/cmap/Adobe-Japan1-UCS2 +19159 -0
- data/data/hexapdf/cmap/Adobe-Korea1-UCS2 +9267 -0
- data/data/hexapdf/cmap/B5pc-H +337 -0
- data/data/hexapdf/cmap/B5pc-V +90 -0
- data/data/hexapdf/cmap/CNS-EUC-H +490 -0
- data/data/hexapdf/cmap/CNS-EUC-V +538 -0
- data/data/hexapdf/cmap/ETen-B5-H +343 -0
- data/data/hexapdf/cmap/ETen-B5-V +91 -0
- data/data/hexapdf/cmap/ETenms-B5-H +79 -0
- data/data/hexapdf/cmap/ETenms-B5-V +99 -0
- data/data/hexapdf/cmap/EUC-H +207 -0
- data/data/hexapdf/cmap/EUC-V +105 -0
- data/data/hexapdf/cmap/Ext-RKSJ-H +768 -0
- data/data/hexapdf/cmap/Ext-RKSJ-V +117 -0
- data/data/hexapdf/cmap/GB-EUC-H +173 -0
- data/data/hexapdf/cmap/GB-EUC-V +98 -0
- data/data/hexapdf/cmap/GBK-EUC-H +4273 -0
- data/data/hexapdf/cmap/GBK-EUC-V +97 -0
- data/data/hexapdf/cmap/GBK2K-H +5325 -0
- data/data/hexapdf/cmap/GBK2K-V +118 -0
- data/data/hexapdf/cmap/GBKp-EUC-H +4272 -0
- data/data/hexapdf/cmap/GBKp-EUC-V +97 -0
- data/data/hexapdf/cmap/GBpc-EUC-H +175 -0
- data/data/hexapdf/cmap/GBpc-EUC-V +98 -0
- data/data/hexapdf/cmap/H +200 -0
- data/data/hexapdf/cmap/HKscs-B5-H +1331 -0
- data/data/hexapdf/cmap/HKscs-B5-V +90 -0
- data/data/hexapdf/cmap/Identity-H +339 -0
- data/data/hexapdf/cmap/Identity-V +73 -0
- data/data/hexapdf/cmap/KSC-EUC-H +562 -0
- data/data/hexapdf/cmap/KSC-EUC-V +94 -0
- data/data/hexapdf/cmap/KSCms-UHC-H +776 -0
- data/data/hexapdf/cmap/KSCms-UHC-HW-H +775 -0
- data/data/hexapdf/cmap/KSCms-UHC-HW-V +93 -0
- data/data/hexapdf/cmap/KSCms-UHC-V +94 -0
- data/data/hexapdf/cmap/KSCpc-EUC-H +608 -0
- data/data/hexapdf/cmap/LICENSE.txt +26 -0
- data/data/hexapdf/cmap/README.txt +9 -0
- data/data/hexapdf/cmap/UniCNS-UCS2-H +16992 -0
- data/data/hexapdf/cmap/UniCNS-UCS2-V +90 -0
- data/data/hexapdf/cmap/UniCNS-UTF16-H +19117 -0
- data/data/hexapdf/cmap/UniCNS-UTF16-V +94 -0
- data/data/hexapdf/cmap/UniGB-UCS2-H +14321 -0
- data/data/hexapdf/cmap/UniGB-UCS2-V +101 -0
- data/data/hexapdf/cmap/UniGB-UTF16-H +14381 -0
- data/data/hexapdf/cmap/UniGB-UTF16-V +104 -0
- data/data/hexapdf/cmap/UniJIS-UCS2-H +8870 -0
- data/data/hexapdf/cmap/UniJIS-UCS2-HW-H +81 -0
- data/data/hexapdf/cmap/UniJIS-UCS2-HW-V +279 -0
- data/data/hexapdf/cmap/UniJIS-UCS2-V +275 -0
- data/data/hexapdf/cmap/UniJIS-UTF16-H +14450 -0
- data/data/hexapdf/cmap/UniJIS-UTF16-V +299 -0
- data/data/hexapdf/cmap/UniKS-UCS2-H +8725 -0
- data/data/hexapdf/cmap/UniKS-UCS2-V +95 -0
- data/data/hexapdf/cmap/UniKS-UTF16-H +8895 -0
- data/data/hexapdf/cmap/UniKS-UTF16-V +99 -0
- data/data/hexapdf/cmap/V +105 -0
- data/examples/arc.rb +3 -3
- data/examples/merging.rb +4 -1
- data/examples/optimizing.rb +3 -0
- data/examples/show_char_bboxes.rb +2 -2
- data/examples/truetype.rb +2 -2
- data/lib/hexapdf/cli.rb +40 -1
- data/lib/hexapdf/cli/batch.rb +72 -0
- data/lib/hexapdf/cli/command.rb +112 -15
- data/lib/hexapdf/cli/files.rb +2 -2
- data/lib/hexapdf/cli/images.rb +14 -6
- data/lib/hexapdf/cli/info.rb +6 -8
- data/lib/hexapdf/cli/inspect.rb +5 -8
- data/lib/hexapdf/cli/merge.rb +13 -20
- data/lib/hexapdf/cli/modify.rb +4 -7
- data/lib/hexapdf/cli/optimize.rb +2 -5
- data/lib/hexapdf/configuration.rb +32 -3
- data/lib/hexapdf/content/canvas.rb +130 -37
- data/lib/hexapdf/content/parser.rb +40 -6
- data/lib/hexapdf/content/processor.rb +4 -4
- data/lib/hexapdf/document.rb +40 -10
- data/lib/hexapdf/document/fonts.rb +1 -0
- data/lib/hexapdf/encryption/security_handler.rb +8 -12
- data/lib/hexapdf/filter/flate_decode.rb +25 -2
- data/lib/hexapdf/font/cmap.rb +124 -8
- data/lib/hexapdf/font/cmap/parser.rb +65 -15
- data/lib/hexapdf/font/encoding/base.rb +2 -2
- data/lib/hexapdf/font/encoding/glyph_list.rb +2 -4
- data/lib/hexapdf/font/true_type.rb +1 -0
- data/lib/hexapdf/font/true_type/builder.rb +75 -0
- data/lib/hexapdf/font/true_type/optimizer.rb +65 -0
- data/lib/hexapdf/font/true_type/subsetter.rb +9 -22
- data/lib/hexapdf/font/true_type_wrapper.rb +9 -21
- data/lib/hexapdf/font_loader.rb +1 -1
- data/lib/hexapdf/importer.rb +1 -1
- data/lib/hexapdf/serializer.rb +5 -3
- data/lib/hexapdf/type.rb +2 -0
- data/lib/hexapdf/type/cid_font.rb +120 -0
- data/lib/hexapdf/type/font.rb +32 -12
- data/lib/hexapdf/type/font_simple.rb +34 -42
- data/lib/hexapdf/type/font_type0.rb +148 -0
- data/lib/hexapdf/type/form.rb +4 -4
- data/lib/hexapdf/type/page.rb +12 -11
- data/lib/hexapdf/type/resources.rb +14 -0
- data/lib/hexapdf/utils/graphics_helpers.rb +77 -0
- data/lib/hexapdf/version.rb +1 -1
- data/man/man1/hexapdf.1 +43 -1
- data/test/hexapdf/content/test_canvas.rb +76 -0
- data/test/hexapdf/content/test_parser.rb +20 -1
- data/test/hexapdf/content/test_processor.rb +11 -7
- data/test/hexapdf/document/test_fonts.rb +3 -1
- data/test/hexapdf/font/cmap/test_parser.rb +42 -7
- data/test/hexapdf/font/encoding/test_base.rb +1 -1
- data/test/hexapdf/font/encoding/test_glyph_list.rb +3 -3
- data/test/hexapdf/font/test_cmap.rb +104 -0
- data/test/hexapdf/font/test_true_type_wrapper.rb +63 -46
- data/test/hexapdf/font/true_type/test_builder.rb +37 -0
- data/test/hexapdf/font/true_type/test_optimizer.rb +27 -0
- data/test/hexapdf/font/true_type/test_subsetter.rb +6 -13
- data/test/hexapdf/test_configuration.rb +12 -7
- data/test/hexapdf/test_document.rb +24 -0
- data/test/hexapdf/test_importer.rb +9 -1
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/test_cid_font.rb +61 -0
- data/test/hexapdf/type/test_font.rb +31 -4
- data/test/hexapdf/type/test_font_simple.rb +6 -21
- data/test/hexapdf/type/test_font_type0.rb +114 -0
- data/test/hexapdf/type/test_resources.rb +17 -1
- data/test/hexapdf/utils/test_graphics_helpers.rb +29 -0
- metadata +82 -3
data/lib/hexapdf/cli/files.rb
CHANGED
|
@@ -69,7 +69,7 @@ module HexaPDF
|
|
|
69
69
|
end
|
|
70
70
|
|
|
71
71
|
def execute(pdf) #:nodoc:
|
|
72
|
-
|
|
72
|
+
with_document(pdf, password: @password) do |doc|
|
|
73
73
|
if @indices.empty?
|
|
74
74
|
list_files(doc)
|
|
75
75
|
else
|
|
@@ -103,7 +103,7 @@ module HexaPDF
|
|
|
103
103
|
each_file(doc) do |obj, index|
|
|
104
104
|
next unless @indices.include?(index + 1) || @indices.include?(0)
|
|
105
105
|
maybe_raise_on_existing_file(obj.path)
|
|
106
|
-
puts "Extracting #{obj.path}..."
|
|
106
|
+
puts "Extracting #{obj.path}..." if command_parser.verbosity_info?
|
|
107
107
|
File.open(obj.path, 'wb') do |file|
|
|
108
108
|
fiber = obj.embedded_file_stream.stream_decoder
|
|
109
109
|
while fiber.alive? && (data = fiber.resume)
|
data/lib/hexapdf/cli/images.rb
CHANGED
|
@@ -31,6 +31,7 @@
|
|
|
31
31
|
# is created or manipulated using HexaPDF.
|
|
32
32
|
#++
|
|
33
33
|
|
|
34
|
+
require 'set'
|
|
34
35
|
require 'hexapdf/cli/command'
|
|
35
36
|
|
|
36
37
|
module HexaPDF
|
|
@@ -77,7 +78,7 @@ module HexaPDF
|
|
|
77
78
|
end
|
|
78
79
|
|
|
79
80
|
def execute(pdf) #:nodoc:
|
|
80
|
-
|
|
81
|
+
with_document(pdf, password: @password) do |doc|
|
|
81
82
|
if @indices.empty?
|
|
82
83
|
list_images(doc)
|
|
83
84
|
else
|
|
@@ -106,12 +107,19 @@ module HexaPDF
|
|
|
106
107
|
|
|
107
108
|
# Extracts the images with the given indices.
|
|
108
109
|
def extract_images(doc)
|
|
110
|
+
done = Set.new
|
|
109
111
|
each_image(doc) do |image, index, _|
|
|
110
|
-
next unless @indices.include?(index) || @indices.include?(0)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
112
|
+
next unless (@indices.include?(index) || @indices.include?(0)) && !done.include?(index)
|
|
113
|
+
info = image.info
|
|
114
|
+
if info.writable
|
|
115
|
+
path = "#{@prefix}-#{index}.#{image.info.extension}"
|
|
116
|
+
maybe_raise_on_existing_file(path)
|
|
117
|
+
puts "Extracting #{path}..." if command_parser.verbosity_info?
|
|
118
|
+
image.write(path)
|
|
119
|
+
done << index
|
|
120
|
+
elsif command_parser.verbosity_warning?
|
|
121
|
+
$stderr.puts "Warning (image #{index}): PDF image format not supported for writing"
|
|
122
|
+
end
|
|
115
123
|
end
|
|
116
124
|
end
|
|
117
125
|
|
data/lib/hexapdf/cli/info.rb
CHANGED
|
@@ -73,9 +73,11 @@ module HexaPDF
|
|
|
73
73
|
COLUMN_WIDTH = 20 #:nodoc:
|
|
74
74
|
|
|
75
75
|
def output_info(file) # :nodoc:
|
|
76
|
-
options =
|
|
77
|
-
|
|
76
|
+
options = pdf_options(@password)
|
|
77
|
+
options[:config]['document.auto_decrypt'] = @auto_decrypt
|
|
78
78
|
HexaPDF::Document.open(file, options) do |doc|
|
|
79
|
+
output_line("File name", file)
|
|
80
|
+
output_line("File size", File.stat(file).size.to_s + " bytes")
|
|
79
81
|
INFO_KEYS.each do |name|
|
|
80
82
|
next unless doc.trailer.info.key?(name)
|
|
81
83
|
output_line(name.to_s, doc.trailer.info[name].to_s)
|
|
@@ -98,17 +100,13 @@ module HexaPDF
|
|
|
98
100
|
output_line("Pages", doc.pages.count.to_s)
|
|
99
101
|
output_line("Version", doc.version)
|
|
100
102
|
end
|
|
101
|
-
rescue HexaPDF::EncryptionError
|
|
103
|
+
rescue HexaPDF::EncryptionError
|
|
102
104
|
if @auto_decrypt
|
|
103
105
|
@auto_decrypt = false
|
|
104
106
|
retry
|
|
105
107
|
else
|
|
106
|
-
|
|
107
|
-
exit(1)
|
|
108
|
+
raise
|
|
108
109
|
end
|
|
109
|
-
rescue HexaPDF::Error => e
|
|
110
|
-
$stderr.puts "Error while processing the PDF file: #{e.message}"
|
|
111
|
-
exit(1)
|
|
112
110
|
end
|
|
113
111
|
|
|
114
112
|
def output_line(header, text) #:nodoc:
|
data/lib/hexapdf/cli/inspect.rb
CHANGED
|
@@ -94,12 +94,7 @@ module HexaPDF
|
|
|
94
94
|
end
|
|
95
95
|
|
|
96
96
|
def execute(file) #:nodoc:
|
|
97
|
-
|
|
98
|
-
send("do_#{@exec}", doc)
|
|
99
|
-
end
|
|
100
|
-
rescue HexaPDF::Error => e
|
|
101
|
-
$stderr.puts "Error while processing the PDF file: #{e.message}"
|
|
102
|
-
exit(1)
|
|
97
|
+
with_document(file, password: @password) {|doc| send("do_#{@exec}", doc)}
|
|
103
98
|
end
|
|
104
99
|
|
|
105
100
|
private
|
|
@@ -130,7 +125,9 @@ module HexaPDF
|
|
|
130
125
|
def do_object(doc) #:nodoc:
|
|
131
126
|
object = doc.object(pdf_reference_from_string(@param))
|
|
132
127
|
return unless object
|
|
133
|
-
|
|
128
|
+
if object.data.stream && command_parser.verbosity_info?
|
|
129
|
+
$stderr.puts("Note: Object also has stream data")
|
|
130
|
+
end
|
|
134
131
|
puts HexaPDF::Serializer.new.serialize(object.value)
|
|
135
132
|
end
|
|
136
133
|
|
|
@@ -141,7 +138,7 @@ module HexaPDF
|
|
|
141
138
|
while source.alive? && (data = source.resume)
|
|
142
139
|
$stdout.write(data)
|
|
143
140
|
end
|
|
144
|
-
|
|
141
|
+
elsif command_parser.verbosity_info?
|
|
145
142
|
$stderr.puts("Note: Object has no stream data")
|
|
146
143
|
end
|
|
147
144
|
end
|
data/lib/hexapdf/cli/merge.rb
CHANGED
|
@@ -105,8 +105,15 @@ module HexaPDF
|
|
|
105
105
|
# Create PDF documents for each input file
|
|
106
106
|
cache = {}
|
|
107
107
|
@files.each do |spec|
|
|
108
|
-
cache[spec.file] ||=
|
|
109
|
-
|
|
108
|
+
cache[spec.file] ||=
|
|
109
|
+
begin
|
|
110
|
+
io = if spec.file == output_file
|
|
111
|
+
StringIO.new(File.binread(spec.file))
|
|
112
|
+
else
|
|
113
|
+
File.open(spec.file)
|
|
114
|
+
end
|
|
115
|
+
HexaPDF::Document.new(io: io, **pdf_options(spec.password))
|
|
116
|
+
end
|
|
110
117
|
spec.file = cache[spec.file]
|
|
111
118
|
end
|
|
112
119
|
|
|
@@ -115,24 +122,13 @@ module HexaPDF
|
|
|
115
122
|
page_tree = target.add(Type: :Pages)
|
|
116
123
|
import_pages(page_tree)
|
|
117
124
|
target.catalog[:Pages] = page_tree
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true}
|
|
121
|
-
retained[target.pages.root.data] = true
|
|
122
|
-
target.each(current: false) do |obj|
|
|
123
|
-
next unless obj.kind_of?(HexaPDF::Dictionary)
|
|
124
|
-
if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
|
|
125
|
-
target.delete(obj)
|
|
126
|
-
end
|
|
127
|
-
end
|
|
125
|
+
remove_unused_pages(target)
|
|
126
|
+
target.pages.add unless target.pages.count > 0
|
|
128
127
|
|
|
129
128
|
apply_encryption_options(target)
|
|
130
129
|
apply_optimization_options(target)
|
|
131
130
|
|
|
132
|
-
target
|
|
133
|
-
rescue HexaPDF::Error => e
|
|
134
|
-
$stderr.puts "Processing error : #{e.message}"
|
|
135
|
-
exit(1)
|
|
131
|
+
write_document(target, output_file)
|
|
136
132
|
end
|
|
137
133
|
|
|
138
134
|
def usage #:nodoc:
|
|
@@ -148,10 +144,7 @@ module HexaPDF
|
|
|
148
144
|
@files.each do |s|
|
|
149
145
|
page_list = s.file.pages.to_a
|
|
150
146
|
s.pages = parse_pages_specification(s.pages, s.file.pages.count)
|
|
151
|
-
s.pages.each
|
|
152
|
-
arr[0] = page_list[arr[0]]
|
|
153
|
-
arr[1] = arr[0].value[:Rotate] || :none unless arr[1]
|
|
154
|
-
end
|
|
147
|
+
s.pages.each {|arr| arr[0] = page_list[arr[0]]}
|
|
155
148
|
end
|
|
156
149
|
|
|
157
150
|
if @interleave
|
data/lib/hexapdf/cli/modify.rb
CHANGED
|
@@ -77,16 +77,12 @@ module HexaPDF
|
|
|
77
77
|
|
|
78
78
|
def execute(in_file, out_file) #:nodoc:
|
|
79
79
|
maybe_raise_on_existing_file(out_file)
|
|
80
|
-
|
|
80
|
+
with_document(in_file, password: @password, out_file: out_file) do |doc|
|
|
81
81
|
arrange_pages(doc) unless @pages == '1-e'
|
|
82
82
|
@embed_files.each {|file| doc.files.add(file, embed: true)}
|
|
83
83
|
apply_encryption_options(doc)
|
|
84
84
|
apply_optimization_options(doc)
|
|
85
|
-
doc.write(out_file)
|
|
86
85
|
end
|
|
87
|
-
rescue HexaPDF::Error => e
|
|
88
|
-
$stderr.puts "Processing error : #{e.message}"
|
|
89
|
-
exit(1)
|
|
90
86
|
end
|
|
91
87
|
|
|
92
88
|
private
|
|
@@ -100,13 +96,14 @@ module HexaPDF
|
|
|
100
96
|
page.value.update(page.copy_inherited_values)
|
|
101
97
|
if rotation == :none
|
|
102
98
|
page.delete(:Rotate)
|
|
103
|
-
|
|
99
|
+
elsif rotation.kind_of?(Integer)
|
|
104
100
|
page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
|
|
105
101
|
end
|
|
106
102
|
new_page_tree.add_page(page)
|
|
107
103
|
end
|
|
108
|
-
doc.delete(doc.catalog.delete(:Pages))
|
|
109
104
|
doc.catalog[:Pages] = new_page_tree
|
|
105
|
+
remove_unused_pages(doc)
|
|
106
|
+
doc.pages.add unless doc.pages.count > 0
|
|
110
107
|
end
|
|
111
108
|
|
|
112
109
|
end
|
data/lib/hexapdf/cli/optimize.rb
CHANGED
|
@@ -54,6 +54,7 @@ module HexaPDF
|
|
|
54
54
|
@out_options.xref_streams = :generate
|
|
55
55
|
@out_options.object_streams = :generate
|
|
56
56
|
@out_options.streams = :compress
|
|
57
|
+
@out_options.optimize_fonts = true
|
|
57
58
|
|
|
58
59
|
options.on("--password PASSWORD", "-p", String,
|
|
59
60
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
|
@@ -67,14 +68,10 @@ module HexaPDF
|
|
|
67
68
|
|
|
68
69
|
def execute(in_file, out_file) #:nodoc:
|
|
69
70
|
maybe_raise_on_existing_file(out_file)
|
|
70
|
-
|
|
71
|
+
with_document(in_file, password: @password, out_file: out_file) do |doc|
|
|
71
72
|
optimize_page_tree(doc)
|
|
72
73
|
apply_optimization_options(doc)
|
|
73
|
-
doc.write(out_file)
|
|
74
74
|
end
|
|
75
|
-
rescue HexaPDF::Error => e
|
|
76
|
-
$stderr.puts "Processing error : #{e.message}"
|
|
77
|
-
exit(1)
|
|
78
75
|
end
|
|
79
76
|
|
|
80
77
|
private
|
|
@@ -92,13 +92,13 @@ module HexaPDF
|
|
|
92
92
|
end
|
|
93
93
|
|
|
94
94
|
# :call-seq:
|
|
95
|
-
# config.constantize(name, key = nil) -> constant
|
|
95
|
+
# config.constantize(name, key = nil) -> constant
|
|
96
96
|
# config.constantize(name, key = nil) {|name| block} -> obj
|
|
97
97
|
#
|
|
98
98
|
# Returns the constant the option +name+ is referring to. If +key+ is provided and the value
|
|
99
99
|
# of the option +name+ responds to \#[], the constant to which +key+ refers is returned.
|
|
100
100
|
#
|
|
101
|
-
# If no constant can be found and no block is provided,
|
|
101
|
+
# If no constant can be found and no block is provided, an error is raised. If a block is
|
|
102
102
|
# provided it is called with the option name and its result will be returned.
|
|
103
103
|
#
|
|
104
104
|
# config.constantize('encryption.aes') #=> HexaPDF::Encryption::FastAES
|
|
@@ -107,7 +107,12 @@ module HexaPDF
|
|
|
107
107
|
data = self[name]
|
|
108
108
|
data = data[key] if key != :__unset && data.respond_to?(:[])
|
|
109
109
|
(data = ::Object.const_get(data) rescue nil) if data.kind_of?(String)
|
|
110
|
-
|
|
110
|
+
if data.nil? && block_given?
|
|
111
|
+
data = yield(name)
|
|
112
|
+
elsif data.nil?
|
|
113
|
+
raise HexaPDF::Error, "Error getting constant for configuration option '#{name}'" <<
|
|
114
|
+
(key == :__unset ? "" : " and key '#{key}'")
|
|
115
|
+
end
|
|
111
116
|
data
|
|
112
117
|
end
|
|
113
118
|
|
|
@@ -160,6 +165,15 @@ module HexaPDF
|
|
|
160
165
|
#
|
|
161
166
|
# The default implementation raises an error.
|
|
162
167
|
#
|
|
168
|
+
# font.on_missing_unicode_mapping::
|
|
169
|
+
# Callback hook when a character code point cannot be converted to a Unicode character.
|
|
170
|
+
#
|
|
171
|
+
# The value needs to be an object that responds to \#call(code, font_dict) where +code+ is the
|
|
172
|
+
# decoded code point and +font_dict+ is the font dictionary which was used for the conversion.
|
|
173
|
+
# The returned value is used as the Unicode character and should be a string.
|
|
174
|
+
#
|
|
175
|
+
# The default implementation raises an error.
|
|
176
|
+
#
|
|
163
177
|
# font_loader::
|
|
164
178
|
# An array with font loader implementations. When a font should be loaded, the array is
|
|
165
179
|
# iterated in sequence and the first valid font returned by a font loader is used.
|
|
@@ -222,6 +236,10 @@ module HexaPDF
|
|
|
222
236
|
'font.on_missing_glyph' => proc do |n, f|
|
|
223
237
|
raise HexaPDF::Error, "No glyph for '#{n}' in font #{f.font_name} found"
|
|
224
238
|
end,
|
|
239
|
+
'font.on_missing_unicode_mapping' => proc do |code_point, font|
|
|
240
|
+
raise HexaPDF::Error, "No Unicode mapping for code point #{code_point} " \
|
|
241
|
+
"in font #{font[:BaseFont]}"
|
|
242
|
+
end,
|
|
225
243
|
'font_loader' => [
|
|
226
244
|
'HexaPDF::FontLoader::Standard14',
|
|
227
245
|
'HexaPDF::FontLoader::FromConfiguration',
|
|
@@ -283,6 +301,13 @@ module HexaPDF
|
|
|
283
301
|
# Specifies the compression level that should be used with the FlateDecode filter. The level
|
|
284
302
|
# can range from 0 (no compression), 1 (best speed) to 9 (best compression, default).
|
|
285
303
|
#
|
|
304
|
+
# filter.flate_memory::
|
|
305
|
+
# Specifies the memory level that should be used with the FlateDecode filter. The level can
|
|
306
|
+
# range from 1 (minimum memory usage; slow, reduces compression) to 9 (maximum memory usage).
|
|
307
|
+
#
|
|
308
|
+
# The HexaPDF default value of 6 has been found in tests to be nearly equivalent to the Zlib
|
|
309
|
+
# default of 8 in terms of speed and compression level but uses less memory.
|
|
310
|
+
#
|
|
286
311
|
# filter.map::
|
|
287
312
|
# A mapping from a PDF name (a Symbol) to a filter object (see Filter). If the value is a
|
|
288
313
|
# String, it should contain the name of a constant that contains a filter object.
|
|
@@ -328,6 +353,7 @@ module HexaPDF
|
|
|
328
353
|
'encryption.sub_filter_map' => {
|
|
329
354
|
},
|
|
330
355
|
'filter.flate_compression' => 9,
|
|
356
|
+
'filter.flate_memory' => 6,
|
|
331
357
|
'filter.map' => {
|
|
332
358
|
ASCIIHexDecode: 'HexaPDF::Filter::ASCIIHexDecode',
|
|
333
359
|
AHx: 'HexaPDF::Filter::ASCIIHexDecode',
|
|
@@ -381,8 +407,11 @@ module HexaPDF
|
|
|
381
407
|
'object.subtype_map' => {
|
|
382
408
|
Image: 'HexaPDF::Type::Image',
|
|
383
409
|
Form: 'HexaPDF::Type::Form',
|
|
410
|
+
Type0: 'HexaPDF::Type::FontType0',
|
|
384
411
|
Type1: 'HexaPDF::Type::FontType1',
|
|
385
412
|
TrueType: 'HexaPDF::Type::FontTrueType',
|
|
413
|
+
CIDFontType0: 'HexaPDF::Type::CIDFont',
|
|
414
|
+
CIDFontType2: 'HexaPDF::Type::CIDFont',
|
|
386
415
|
},
|
|
387
416
|
'task.map' => {
|
|
388
417
|
optimize: 'HexaPDF::Task::Optimize',
|
|
@@ -35,6 +35,7 @@ require 'hexapdf/content/graphics_state'
|
|
|
35
35
|
require 'hexapdf/content/operator'
|
|
36
36
|
require 'hexapdf/serializer'
|
|
37
37
|
require 'hexapdf/utils/math_helpers'
|
|
38
|
+
require 'hexapdf/utils/graphics_helpers'
|
|
38
39
|
require 'hexapdf/content/graphic_object'
|
|
39
40
|
require 'hexapdf/stream'
|
|
40
41
|
|
|
@@ -132,6 +133,7 @@ module HexaPDF
|
|
|
132
133
|
class Canvas
|
|
133
134
|
|
|
134
135
|
include HexaPDF::Utils::MathHelpers
|
|
136
|
+
include HexaPDF::Utils::GraphicsHelpers
|
|
135
137
|
|
|
136
138
|
# The context for which the canvas was created (a HexaPDF::Type::Page or HexaPDF::Type::Form
|
|
137
139
|
# object).
|
|
@@ -1693,7 +1695,7 @@ module HexaPDF
|
|
|
1693
1695
|
#
|
|
1694
1696
|
# Low-level method for actually showing text on the canvas.
|
|
1695
1697
|
#
|
|
1696
|
-
# The argument +
|
|
1698
|
+
# The argument +glyphs+ needs to be a an array of glyph objects valid for the current font,
|
|
1697
1699
|
# optionally interspersed with numbers for kerning.
|
|
1698
1700
|
#
|
|
1699
1701
|
# Text is always shown at the current position of the text cursor, i.e. the origin of the text
|
|
@@ -1702,13 +1704,13 @@ module HexaPDF
|
|
|
1702
1704
|
#
|
|
1703
1705
|
# The text matrix is updated to correctly represent the graphics state after the invocation.
|
|
1704
1706
|
#
|
|
1705
|
-
# This method is usually not invoked directly but by higher level methods like #
|
|
1706
|
-
def show_glyphs(
|
|
1707
|
+
# This method is usually not invoked directly but by higher level methods like #text.
|
|
1708
|
+
def show_glyphs(glyphs)
|
|
1707
1709
|
begin_text
|
|
1708
1710
|
|
|
1709
1711
|
result = [''.b]
|
|
1710
1712
|
offset = 0
|
|
1711
|
-
|
|
1713
|
+
glyphs.each do |item|
|
|
1712
1714
|
if item.kind_of?(Numeric)
|
|
1713
1715
|
result << item << ''.b
|
|
1714
1716
|
offset -= item * graphics_state.scaled_font_size
|
|
@@ -1718,7 +1720,7 @@ module HexaPDF
|
|
|
1718
1720
|
|
|
1719
1721
|
offset += item.width * graphics_state.scaled_font_size +
|
|
1720
1722
|
graphics_state.scaled_character_spacing
|
|
1721
|
-
offset += graphics_state.scaled_word_spacing if encoded
|
|
1723
|
+
offset += graphics_state.scaled_word_spacing if encoded == " ".freeze
|
|
1722
1724
|
end
|
|
1723
1725
|
end
|
|
1724
1726
|
|
|
@@ -1727,6 +1729,124 @@ module HexaPDF
|
|
|
1727
1729
|
self
|
|
1728
1730
|
end
|
|
1729
1731
|
|
|
1732
|
+
# :call-seq:
|
|
1733
|
+
# canvas.show_glyphs_only(glyphs) -> canvas
|
|
1734
|
+
#
|
|
1735
|
+
# Same operation as with #show_glyphs but without updating the text matrix.
|
|
1736
|
+
#
|
|
1737
|
+
# This method should only be used by advanced text layouting algorithms which perform the
|
|
1738
|
+
# necessary calculations themselves!
|
|
1739
|
+
#
|
|
1740
|
+
# *Warning*: Since this method doesn't update the text matrix, all following results from
|
|
1741
|
+
# #text_cursor and other methods using the current text matrix are invalid until the next call
|
|
1742
|
+
# to #text_matrix or #end_text.
|
|
1743
|
+
def show_glyphs_only(glyphs)
|
|
1744
|
+
begin_text
|
|
1745
|
+
|
|
1746
|
+
result = [''.b]
|
|
1747
|
+
glyphs.each do |item|
|
|
1748
|
+
if item.kind_of?(Numeric)
|
|
1749
|
+
result << item << ''.b
|
|
1750
|
+
else
|
|
1751
|
+
result[-1] << @font.encode(item)
|
|
1752
|
+
end
|
|
1753
|
+
end
|
|
1754
|
+
|
|
1755
|
+
serialize1(:TJ, result)
|
|
1756
|
+
self
|
|
1757
|
+
end
|
|
1758
|
+
|
|
1759
|
+
# :call-seq:
|
|
1760
|
+
# canvas.marked_content_point(tag, property_list: nil) -> canvas
|
|
1761
|
+
#
|
|
1762
|
+
# Inserts a marked-content point, optionally associated with a property list.
|
|
1763
|
+
#
|
|
1764
|
+
# A marked-content point is used to identify a position in the content stream for later use by
|
|
1765
|
+
# other applications. The symbol +tag+ is used to uniquely identify the role of the
|
|
1766
|
+
# marked-content point and should be registered with ISO to avoid conflicts.
|
|
1767
|
+
#
|
|
1768
|
+
# The optional +property_list+ argument can either be a valid PDF dictionary or a symbol
|
|
1769
|
+
# referencing an already used property list in the resource dictionary's /Properties
|
|
1770
|
+
# dictionary.
|
|
1771
|
+
#
|
|
1772
|
+
# Examples:
|
|
1773
|
+
#
|
|
1774
|
+
# canvas.marked_content_point(:Divider)
|
|
1775
|
+
# canvas.marked_content_point(:Divider, property_list: {Key: 'value'})
|
|
1776
|
+
#
|
|
1777
|
+
# See: PDF1.7 s14.6
|
|
1778
|
+
def marked_content_point(tag, property_list: nil)
|
|
1779
|
+
raise_unless_at_page_description_level_or_in_text
|
|
1780
|
+
if property_list
|
|
1781
|
+
property_list = resources.property_list(property_list) if property_list.kind_of?(Symbol)
|
|
1782
|
+
invoke2(:DP, tag, resources.add_property_list(property_list))
|
|
1783
|
+
else
|
|
1784
|
+
invoke1(:MP, tag)
|
|
1785
|
+
end
|
|
1786
|
+
self
|
|
1787
|
+
end
|
|
1788
|
+
|
|
1789
|
+
# :call-seq:
|
|
1790
|
+
# canvas.marked_content_sequence(tag, property_list: nil) -> canvas
|
|
1791
|
+
# canvas.marked_content_sequence(tag, property_list: nil) { block } -> canvas
|
|
1792
|
+
#
|
|
1793
|
+
# Inserts a marked-content sequence, optionally associated with a property list.
|
|
1794
|
+
#
|
|
1795
|
+
# A marked-content sequence is used to identify a sequence of complete graphics objects in the
|
|
1796
|
+
# content stream for later use by other applications. The symbol +tag+ is used to uniquely
|
|
1797
|
+
# identify the role of the marked-content sequence and should be registered with ISO to avoid
|
|
1798
|
+
# conflicts.
|
|
1799
|
+
#
|
|
1800
|
+
# The optional +property_list+ argument can either be a valid PDF dictionary or a symbol
|
|
1801
|
+
# referencing an already used property list in the resource dictionary's /Properties
|
|
1802
|
+
# dictionary.
|
|
1803
|
+
#
|
|
1804
|
+
# If invoked without a block, a corresponding call to #end_marked_content_sequence must be
|
|
1805
|
+
# done. Otherwise the marked-content sequence automatically ends when the block is finished.
|
|
1806
|
+
#
|
|
1807
|
+
# Although the PDF specification would allow using marked-content sequences inside text
|
|
1808
|
+
# objects, this is prohibited.
|
|
1809
|
+
#
|
|
1810
|
+
# Examples:
|
|
1811
|
+
#
|
|
1812
|
+
# canvas.marked_content_sequence(:Divider)
|
|
1813
|
+
# # Other instructions
|
|
1814
|
+
# canvas.end_marked_content_sequence
|
|
1815
|
+
#
|
|
1816
|
+
# canvas.marked_content_sequence(:Divider, property_list: {Key: 'value'}) do
|
|
1817
|
+
# # Other instructions
|
|
1818
|
+
# end
|
|
1819
|
+
#
|
|
1820
|
+
# See: PDF1.7 s14.6, #end_marked_content_sequence
|
|
1821
|
+
def marked_content_sequence(tag, property_list: nil)
|
|
1822
|
+
raise_unless_at_page_description_level
|
|
1823
|
+
if property_list
|
|
1824
|
+
property_list = resources.property_list(property_list) if property_list.kind_of?(Symbol)
|
|
1825
|
+
invoke2(:BDC, tag, resources.add_property_list(property_list))
|
|
1826
|
+
else
|
|
1827
|
+
invoke1(:BMC, tag)
|
|
1828
|
+
end
|
|
1829
|
+
if block_given?
|
|
1830
|
+
yield
|
|
1831
|
+
end_marked_content_sequence
|
|
1832
|
+
end
|
|
1833
|
+
self
|
|
1834
|
+
end
|
|
1835
|
+
|
|
1836
|
+
# :call-seq:
|
|
1837
|
+
# canvas.end_marked_content_sequence -> canvas
|
|
1838
|
+
#
|
|
1839
|
+
# Ends a marked-content sequence.
|
|
1840
|
+
#
|
|
1841
|
+
# See #marked_content_sequence for details.
|
|
1842
|
+
#
|
|
1843
|
+
# See: PDF1.7 s14.6, #marked_content_sequence
|
|
1844
|
+
def end_marked_content_sequence
|
|
1845
|
+
raise_unless_at_page_description_level
|
|
1846
|
+
invoke0(:EMC)
|
|
1847
|
+
self
|
|
1848
|
+
end
|
|
1849
|
+
|
|
1730
1850
|
private
|
|
1731
1851
|
|
|
1732
1852
|
# Invokes the given operator with the operands and serializes it.
|
|
@@ -1752,6 +1872,11 @@ module HexaPDF
|
|
|
1752
1872
|
@contents << @operators[operator].serialize(@serializer, op1)
|
|
1753
1873
|
end
|
|
1754
1874
|
|
|
1875
|
+
# Optimized method for one operand.
|
|
1876
|
+
def serialize1(operator, op1)
|
|
1877
|
+
@contents << @operators[operator].serialize(@serializer, op1)
|
|
1878
|
+
end
|
|
1879
|
+
|
|
1755
1880
|
# Optimized method for two operands.
|
|
1756
1881
|
def invoke2(operator, op1, op2)
|
|
1757
1882
|
@operators[operator].invoke(self, op1, op2)
|
|
@@ -1936,38 +2061,6 @@ module HexaPDF
|
|
|
1936
2061
|
curve_to(p3[0], p3[1], p1: p1, p2: p2)
|
|
1937
2062
|
end
|
|
1938
2063
|
|
|
1939
|
-
# Given two points p0 = (x0, y0) and p1 = (x1, y1), returns the point on the line through
|
|
1940
|
-
# these points that is +distance+ units away from p0.
|
|
1941
|
-
#
|
|
1942
|
-
# v = p1 - p0
|
|
1943
|
-
# result = p0 + distance * v/norm(v)
|
|
1944
|
-
def point_on_line(x0, y0, x1, y1, distance:)
|
|
1945
|
-
norm = Math.sqrt((x1 - x0)**2 + (y1 - y0)**2)
|
|
1946
|
-
[x0 + distance / norm * (x1 - x0), y0 + distance / norm * (y1 - y0)]
|
|
1947
|
-
end
|
|
1948
|
-
|
|
1949
|
-
# Calculates and returns the requested dimensions for the rectangular object with the given
|
|
1950
|
-
# +width+ and +height+ based on the options.
|
|
1951
|
-
#
|
|
1952
|
-
# +rwidth+::
|
|
1953
|
-
# The requested width. If +rheight+ is not specified, it is chosen so that the aspect
|
|
1954
|
-
# ratio is maintained
|
|
1955
|
-
#
|
|
1956
|
-
# +rheight+::
|
|
1957
|
-
# The requested height. If +rwidth+ is not specified, it is chosen so that the aspect
|
|
1958
|
-
# ratio is maintained
|
|
1959
|
-
def calculate_dimensions(width, height, rwidth: nil, rheight: nil)
|
|
1960
|
-
if rwidth && rheight
|
|
1961
|
-
[rwidth, rheight]
|
|
1962
|
-
elsif rwidth
|
|
1963
|
-
[rwidth, height * rwidth / width.to_f]
|
|
1964
|
-
elsif rheight
|
|
1965
|
-
[width * rheight / height.to_f, rheight]
|
|
1966
|
-
else
|
|
1967
|
-
[width, height]
|
|
1968
|
-
end
|
|
1969
|
-
end
|
|
1970
|
-
|
|
1971
2064
|
end
|
|
1972
2065
|
|
|
1973
2066
|
end
|