hexapdf 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +68 -0
- data/CONTRIBUTERS +1 -1
- data/README.md +35 -4
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/data/hexapdf/cmap/83pv-RKSJ-H +314 -0
- data/data/hexapdf/cmap/90ms-RKSJ-H +259 -0
- data/data/hexapdf/cmap/90ms-RKSJ-V +156 -0
- data/data/hexapdf/cmap/90msp-RKSJ-H +257 -0
- data/data/hexapdf/cmap/90msp-RKSJ-V +155 -0
- data/data/hexapdf/cmap/90pv-RKSJ-H +355 -0
- data/data/hexapdf/cmap/Add-RKSJ-H +738 -0
- data/data/hexapdf/cmap/Add-RKSJ-V +135 -0
- data/data/hexapdf/cmap/Adobe-CNS1-UCS2 +18209 -0
- data/data/hexapdf/cmap/Adobe-GB1-UCS2 +14267 -0
- data/data/hexapdf/cmap/Adobe-Japan1-UCS2 +19159 -0
- data/data/hexapdf/cmap/Adobe-Korea1-UCS2 +9267 -0
- data/data/hexapdf/cmap/B5pc-H +337 -0
- data/data/hexapdf/cmap/B5pc-V +90 -0
- data/data/hexapdf/cmap/CNS-EUC-H +490 -0
- data/data/hexapdf/cmap/CNS-EUC-V +538 -0
- data/data/hexapdf/cmap/ETen-B5-H +343 -0
- data/data/hexapdf/cmap/ETen-B5-V +91 -0
- data/data/hexapdf/cmap/ETenms-B5-H +79 -0
- data/data/hexapdf/cmap/ETenms-B5-V +99 -0
- data/data/hexapdf/cmap/EUC-H +207 -0
- data/data/hexapdf/cmap/EUC-V +105 -0
- data/data/hexapdf/cmap/Ext-RKSJ-H +768 -0
- data/data/hexapdf/cmap/Ext-RKSJ-V +117 -0
- data/data/hexapdf/cmap/GB-EUC-H +173 -0
- data/data/hexapdf/cmap/GB-EUC-V +98 -0
- data/data/hexapdf/cmap/GBK-EUC-H +4273 -0
- data/data/hexapdf/cmap/GBK-EUC-V +97 -0
- data/data/hexapdf/cmap/GBK2K-H +5325 -0
- data/data/hexapdf/cmap/GBK2K-V +118 -0
- data/data/hexapdf/cmap/GBKp-EUC-H +4272 -0
- data/data/hexapdf/cmap/GBKp-EUC-V +97 -0
- data/data/hexapdf/cmap/GBpc-EUC-H +175 -0
- data/data/hexapdf/cmap/GBpc-EUC-V +98 -0
- data/data/hexapdf/cmap/H +200 -0
- data/data/hexapdf/cmap/HKscs-B5-H +1331 -0
- data/data/hexapdf/cmap/HKscs-B5-V +90 -0
- data/data/hexapdf/cmap/Identity-H +339 -0
- data/data/hexapdf/cmap/Identity-V +73 -0
- data/data/hexapdf/cmap/KSC-EUC-H +562 -0
- data/data/hexapdf/cmap/KSC-EUC-V +94 -0
- data/data/hexapdf/cmap/KSCms-UHC-H +776 -0
- data/data/hexapdf/cmap/KSCms-UHC-HW-H +775 -0
- data/data/hexapdf/cmap/KSCms-UHC-HW-V +93 -0
- data/data/hexapdf/cmap/KSCms-UHC-V +94 -0
- data/data/hexapdf/cmap/KSCpc-EUC-H +608 -0
- data/data/hexapdf/cmap/LICENSE.txt +26 -0
- data/data/hexapdf/cmap/README.txt +9 -0
- data/data/hexapdf/cmap/UniCNS-UCS2-H +16992 -0
- data/data/hexapdf/cmap/UniCNS-UCS2-V +90 -0
- data/data/hexapdf/cmap/UniCNS-UTF16-H +19117 -0
- data/data/hexapdf/cmap/UniCNS-UTF16-V +94 -0
- data/data/hexapdf/cmap/UniGB-UCS2-H +14321 -0
- data/data/hexapdf/cmap/UniGB-UCS2-V +101 -0
- data/data/hexapdf/cmap/UniGB-UTF16-H +14381 -0
- data/data/hexapdf/cmap/UniGB-UTF16-V +104 -0
- data/data/hexapdf/cmap/UniJIS-UCS2-H +8870 -0
- data/data/hexapdf/cmap/UniJIS-UCS2-HW-H +81 -0
- data/data/hexapdf/cmap/UniJIS-UCS2-HW-V +279 -0
- data/data/hexapdf/cmap/UniJIS-UCS2-V +275 -0
- data/data/hexapdf/cmap/UniJIS-UTF16-H +14450 -0
- data/data/hexapdf/cmap/UniJIS-UTF16-V +299 -0
- data/data/hexapdf/cmap/UniKS-UCS2-H +8725 -0
- data/data/hexapdf/cmap/UniKS-UCS2-V +95 -0
- data/data/hexapdf/cmap/UniKS-UTF16-H +8895 -0
- data/data/hexapdf/cmap/UniKS-UTF16-V +99 -0
- data/data/hexapdf/cmap/V +105 -0
- data/examples/arc.rb +3 -3
- data/examples/merging.rb +4 -1
- data/examples/optimizing.rb +3 -0
- data/examples/show_char_bboxes.rb +2 -2
- data/examples/truetype.rb +2 -2
- data/lib/hexapdf/cli.rb +40 -1
- data/lib/hexapdf/cli/batch.rb +72 -0
- data/lib/hexapdf/cli/command.rb +112 -15
- data/lib/hexapdf/cli/files.rb +2 -2
- data/lib/hexapdf/cli/images.rb +14 -6
- data/lib/hexapdf/cli/info.rb +6 -8
- data/lib/hexapdf/cli/inspect.rb +5 -8
- data/lib/hexapdf/cli/merge.rb +13 -20
- data/lib/hexapdf/cli/modify.rb +4 -7
- data/lib/hexapdf/cli/optimize.rb +2 -5
- data/lib/hexapdf/configuration.rb +32 -3
- data/lib/hexapdf/content/canvas.rb +130 -37
- data/lib/hexapdf/content/parser.rb +40 -6
- data/lib/hexapdf/content/processor.rb +4 -4
- data/lib/hexapdf/document.rb +40 -10
- data/lib/hexapdf/document/fonts.rb +1 -0
- data/lib/hexapdf/encryption/security_handler.rb +8 -12
- data/lib/hexapdf/filter/flate_decode.rb +25 -2
- data/lib/hexapdf/font/cmap.rb +124 -8
- data/lib/hexapdf/font/cmap/parser.rb +65 -15
- data/lib/hexapdf/font/encoding/base.rb +2 -2
- data/lib/hexapdf/font/encoding/glyph_list.rb +2 -4
- data/lib/hexapdf/font/true_type.rb +1 -0
- data/lib/hexapdf/font/true_type/builder.rb +75 -0
- data/lib/hexapdf/font/true_type/optimizer.rb +65 -0
- data/lib/hexapdf/font/true_type/subsetter.rb +9 -22
- data/lib/hexapdf/font/true_type_wrapper.rb +9 -21
- data/lib/hexapdf/font_loader.rb +1 -1
- data/lib/hexapdf/importer.rb +1 -1
- data/lib/hexapdf/serializer.rb +5 -3
- data/lib/hexapdf/type.rb +2 -0
- data/lib/hexapdf/type/cid_font.rb +120 -0
- data/lib/hexapdf/type/font.rb +32 -12
- data/lib/hexapdf/type/font_simple.rb +34 -42
- data/lib/hexapdf/type/font_type0.rb +148 -0
- data/lib/hexapdf/type/form.rb +4 -4
- data/lib/hexapdf/type/page.rb +12 -11
- data/lib/hexapdf/type/resources.rb +14 -0
- data/lib/hexapdf/utils/graphics_helpers.rb +77 -0
- data/lib/hexapdf/version.rb +1 -1
- data/man/man1/hexapdf.1 +43 -1
- data/test/hexapdf/content/test_canvas.rb +76 -0
- data/test/hexapdf/content/test_parser.rb +20 -1
- data/test/hexapdf/content/test_processor.rb +11 -7
- data/test/hexapdf/document/test_fonts.rb +3 -1
- data/test/hexapdf/font/cmap/test_parser.rb +42 -7
- data/test/hexapdf/font/encoding/test_base.rb +1 -1
- data/test/hexapdf/font/encoding/test_glyph_list.rb +3 -3
- data/test/hexapdf/font/test_cmap.rb +104 -0
- data/test/hexapdf/font/test_true_type_wrapper.rb +63 -46
- data/test/hexapdf/font/true_type/test_builder.rb +37 -0
- data/test/hexapdf/font/true_type/test_optimizer.rb +27 -0
- data/test/hexapdf/font/true_type/test_subsetter.rb +6 -13
- data/test/hexapdf/test_configuration.rb +12 -7
- data/test/hexapdf/test_document.rb +24 -0
- data/test/hexapdf/test_importer.rb +9 -1
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/test_cid_font.rb +61 -0
- data/test/hexapdf/type/test_font.rb +31 -4
- data/test/hexapdf/type/test_font_simple.rb +6 -21
- data/test/hexapdf/type/test_font_type0.rb +114 -0
- data/test/hexapdf/type/test_resources.rb +17 -1
- data/test/hexapdf/utils/test_graphics_helpers.rb +29 -0
- metadata +82 -3
data/lib/hexapdf/version.rb
CHANGED
data/man/man1/hexapdf.1
CHANGED
@@ -28,6 +28,8 @@ Merging multiple PDF files into one (see the \fBmerge\fP command)
|
|
28
28
|
Modifying an existing PDF file (see the \fBmodify\fP command)
|
29
29
|
.IP \(bu 4
|
30
30
|
Optimizing the file size of a PDF file (see the \fBoptimize\fP command)
|
31
|
+
.IP \(bu 4
|
32
|
+
Batch execution of a command on multiple PDF files (see the \fBbatch\fP command)
|
31
33
|
.PD
|
32
34
|
.P
|
33
35
|
The application contains a built\-in \fBhelp\fP command that can be used to provide a quick reminder of a command\[u2019]s purpose and its options\.
|
@@ -42,6 +44,19 @@ These options are available on every command (except if they are overridden):
|
|
42
44
|
\fB\-\-[no\-]force\fP
|
43
45
|
Force overwriting existing files\. Default: \fIfalse\fP\&\.
|
44
46
|
.TP
|
47
|
+
\fB\-\-strict\fP
|
48
|
+
Enable strict parsing and validation\. By default, correctable parse error and validation problems are treated as warnings which allows processing most PDF files\. If this option is used, correctable parse errors and uncorrectable validation problems are treated as errors\.
|
49
|
+
.RS
|
50
|
+
.P
|
51
|
+
Note that a PDF file may have validation errors and still be usable since most viewing applications are very forgiving\.
|
52
|
+
.RE
|
53
|
+
.TP
|
54
|
+
\fB\-\-verbose\fP, \fB\-v\fP
|
55
|
+
Enable more verbose output\. There are three verbosity levels: 0 (no output), 1 (warning output) and 2 (warning and informational output)\. The default level is 1, specifying this option increases it to 2\.
|
56
|
+
.TP
|
57
|
+
\fB\-\-quiet\fP
|
58
|
+
Suppress any output by setting the verbosity level to 0\. Also see the description of \fB\-\-verbose\fP above\.
|
59
|
+
.TP
|
45
60
|
\fB\-h\fP, \fB\-\-help\fP
|
46
61
|
Show the help for the application if no command was specified, or the command help otherwise\.
|
47
62
|
.SS "Optimization Options"
|
@@ -61,6 +76,9 @@ Defines how streams should be treated: \fIcompress\fP will compress them when po
|
|
61
76
|
.TP
|
62
77
|
\fB\-\-[no\-]compress\-pages\fP
|
63
78
|
Recompress page content streams\. This is a very expensive operation in terms of processing time and won\[u2019]t lead to great file size improvements in many cases\. Default: \fIno\fP\&\.
|
79
|
+
.TP
|
80
|
+
\fB\-\-[no\-]optimize\-fonts\fP
|
81
|
+
Optimize embedded font files by removing normally unneeded font data\. Note that this may have a negative effect on PDFs with forms since form entry usually requires fully embedded font files\. Default: \fIno\fP\&\.
|
64
82
|
.SS "Encryption Options"
|
65
83
|
These options can only be used with the \fBmerge\fP and \fBmodify\fP commands and control if and how an output PDF file should be encrypted\. All options except \fB\-\-decrypt\fP automatically enable \fB\-\-encrypt\fP\&\.
|
66
84
|
.P
|
@@ -141,6 +159,14 @@ allow high quality printing
|
|
141
159
|
hexapdf uses a command\-style interface\. This means that it provides different functionalities depending on the used command, and each command can have its own options\.
|
142
160
|
.P
|
143
161
|
There is no need to write the full command name for hexapdf to understand it, the only requirement is that is must be unambiguous\. So using \fBf\fP for the \fBfiles\fP command is sufficient\. The same is true for long option names and option values\.
|
162
|
+
.P
|
163
|
+
Any command that reads and writes a PDF file may do in\-place processing of the file\. This is automatically done if an input file name is the same as the output file name\. Note that the option \fB\-\-force\fP has to be used in this case\.
|
164
|
+
.SS "batch"
|
165
|
+
Synopsis: \fBbatch\fP \fICOMMAND\fP \fIFILES\.\.\.\fP
|
166
|
+
.P
|
167
|
+
This command allows executing a single command for multiple input files, thereby reducing the overall execution time\.
|
168
|
+
.P
|
169
|
+
The first argument \fICOMMAND\fP is used as a hexapdf command line and must not contain the binary name, just everything else\. The rest of the arguments are the input files\. The specified command will be executed for each input file, with all occurences of {} being replaced by the file name\.
|
144
170
|
.SS "files"
|
145
171
|
Synopsis: \fBfiles\fP [\fBOPTIONS\fP] \fIPDF\fP
|
146
172
|
.P
|
@@ -297,6 +323,8 @@ Some commands allow the specification of pages using a \fIPAGES\fP argument\. Th
|
|
297
323
|
.P
|
298
324
|
If the start number of a page range is higher than the end number, the pages are used in the reverse order\.
|
299
325
|
.P
|
326
|
+
Single page numbers that are not valid are ignored\. If a page number in a page range is higher than the page number of the last page, the page number of the last page is used instead\.
|
327
|
+
.P
|
300
328
|
Step values can be used with page ranges\. If a range is followed by \fI/STEP\fP, \fISTEP\fP \- 1 pages are skipped after each used page\.
|
301
329
|
.P
|
302
330
|
Additionally, the page numbers and ranges can be suffixed with a rotation modifier:
|
@@ -367,9 +395,15 @@ Optimization: Compress the \fBinput\.pdf\fP to get a smaller file size\.
|
|
367
395
|
.SS "files"
|
368
396
|
\fBhexapdf files input\.pdf\fP
|
369
397
|
.br
|
370
|
-
\fBhexapdf files input\.pdf \-
|
398
|
+
\fBhexapdf files input\.pdf \-e 1\fP
|
371
399
|
.P
|
372
400
|
Embedded files: The first command lists the embedded files in the \fBinput\.pdf\fP, the second one then extracts the embedded file with the index 1\.
|
401
|
+
.SS "images"
|
402
|
+
\fBhexapdf images input\.pdf\fP
|
403
|
+
.br
|
404
|
+
\fBhexapdf images input\.pdf \-e \-\-prefix images/image\fP
|
405
|
+
.P
|
406
|
+
Image info and extraction: The first command lists the images of the \fBinput\.pdf\fP, the second one then extracts the images into the subdirectory \fBimages\fP with the prefix \fBimage\fP\&\.
|
373
407
|
.SS "info"
|
374
408
|
\fBhexapdf info input\.pdf\fP
|
375
409
|
.P
|
@@ -380,6 +414,14 @@ File information: Show general information about the PDF file, like PDF version,
|
|
380
414
|
\fBhexapdf inspect input\.pdf \-o 3\fP
|
381
415
|
.P
|
382
416
|
Inspect a PDF: These commands can be used to inspect the internal object structure of a PDF file\. The first command shows the PDF trailer object\. The second one shows the object with the object number 3\.
|
417
|
+
.SS "batch"
|
418
|
+
\fBhexapdf batch \'info {}\' input1\.pdf input2\.pdf input3\.pdf\fP
|
419
|
+
.P
|
420
|
+
Execute the info command for all input files\.
|
421
|
+
.P
|
422
|
+
\fBhexapdf batch \'optimize \-\-object\-streams delete {} done\-{}\' input1\.pdf input2\.pdf input3\.pdf\fP
|
423
|
+
.P
|
424
|
+
Optimize the given input files, creating the three output files \fBdone\-input1\.pdf\fP, \fBdone\-input2\.pdf\fP and \fBdone\-input3\.pdf\fP\&\.
|
383
425
|
.SH "EXIT STATUS"
|
384
426
|
The exit status is 0 if no error happened\. Otherwise it is 1\.
|
385
427
|
.SH "SEE ALSO"
|
@@ -1080,6 +1080,21 @@ describe HexaPDF::Content::Canvas do
|
|
1080
1080
|
end
|
1081
1081
|
end
|
1082
1082
|
|
1083
|
+
describe "show_glyphs_only" do
|
1084
|
+
it "serializes correctly" do
|
1085
|
+
@canvas.font("Times", size: 20)
|
1086
|
+
font = @canvas.font
|
1087
|
+
@canvas.show_glyphs_only(font.decode_utf8("Hal lo").insert(2, -35))
|
1088
|
+
assert_equal(0, @canvas.text_cursor[0])
|
1089
|
+
assert_equal(0, @canvas.text_cursor[1])
|
1090
|
+
assert_operators(@canvas.contents, [[:set_font_and_size, [:F1, 20]],
|
1091
|
+
[:set_leading, [24]],
|
1092
|
+
[:begin_text],
|
1093
|
+
[:show_text_with_positioning, [["Ha", -35, "l lo"]]],
|
1094
|
+
])
|
1095
|
+
end
|
1096
|
+
end
|
1097
|
+
|
1083
1098
|
describe "text" do
|
1084
1099
|
it "sets the text cursor position if instructed" do
|
1085
1100
|
@canvas.font("Times", size: 10)
|
@@ -1110,4 +1125,65 @@ describe HexaPDF::Content::Canvas do
|
|
1110
1125
|
])
|
1111
1126
|
end
|
1112
1127
|
end
|
1128
|
+
|
1129
|
+
describe "marked_content_point" do
|
1130
|
+
it "invokes the operator implementation" do
|
1131
|
+
assert_operator_invoked(:MP, :tag) { @canvas.marked_content_point(:tag) }
|
1132
|
+
assert_operator_invoked(:DP, :tag, :P1) do
|
1133
|
+
@canvas.marked_content_point(:tag, property_list: {key: 5})
|
1134
|
+
end
|
1135
|
+
end
|
1136
|
+
|
1137
|
+
it "is serialized correctly" do
|
1138
|
+
@canvas.marked_content_point(:tag)
|
1139
|
+
assert_operators(@canvas.contents, [[:designate_marked_content_point, [:tag]]])
|
1140
|
+
end
|
1141
|
+
|
1142
|
+
it "fails if invoked while in an unsupported graphics objects" do
|
1143
|
+
assert_raises_in_graphics_object(:path, :clipping_path) { @canvas.marked_content_point(:tag) }
|
1144
|
+
end
|
1145
|
+
end
|
1146
|
+
|
1147
|
+
describe "marked_content_sequence" do
|
1148
|
+
it "invokes the operator implementation" do
|
1149
|
+
assert_operator_invoked(:BMC, :tag) { @canvas.marked_content_sequence(:tag) }
|
1150
|
+
assert_operator_invoked(:BDC, :tag, :P1) do
|
1151
|
+
@canvas.marked_content_sequence(:tag, property_list: {key: 5})
|
1152
|
+
end
|
1153
|
+
end
|
1154
|
+
|
1155
|
+
it "is serialized correctly when no block is used" do
|
1156
|
+
@canvas.marked_content_sequence(:tag)
|
1157
|
+
assert_operators(@canvas.contents, [[:begin_marked_content, [:tag]]])
|
1158
|
+
end
|
1159
|
+
|
1160
|
+
it "is serialized correctly when a block is used" do
|
1161
|
+
@canvas.marked_content_sequence(:tag, property_list: {key: 5}) { }
|
1162
|
+
assert_operators(@canvas.contents, [[:begin_marked_content_with_property_list, [:tag, :P1]],
|
1163
|
+
[:end_marked_content]])
|
1164
|
+
end
|
1165
|
+
|
1166
|
+
it "fails if invoked while in an unsupported graphics objects" do
|
1167
|
+
assert_raises_in_graphics_object(:path, :clipping_path) do
|
1168
|
+
@canvas.marked_content_sequence(:tag)
|
1169
|
+
end
|
1170
|
+
end
|
1171
|
+
end
|
1172
|
+
|
1173
|
+
describe "end_marked_content_sequence" do
|
1174
|
+
it "invokes the operator implementation" do
|
1175
|
+
assert_operator_invoked(:EMC) { @canvas.end_marked_content_sequence }
|
1176
|
+
end
|
1177
|
+
|
1178
|
+
it "is serialized correctly" do
|
1179
|
+
@canvas.end_marked_content_sequence
|
1180
|
+
assert_operators(@page.contents, [[:end_marked_content]])
|
1181
|
+
end
|
1182
|
+
|
1183
|
+
it "fails if invoked while in an unsupported graphics objects" do
|
1184
|
+
assert_raises_in_graphics_object(:path, :clipping_path) do
|
1185
|
+
@canvas.end_marked_content_sequence
|
1186
|
+
end
|
1187
|
+
end
|
1188
|
+
end
|
1113
1189
|
end
|
@@ -21,6 +21,10 @@ describe HexaPDF::Content::Parser do
|
|
21
21
|
end
|
22
22
|
|
23
23
|
describe "parse" do
|
24
|
+
before do
|
25
|
+
@image_data = "x\x9Ccd\xC0\x00\xBB\x1F<\xC6\x14\xA43EI JP)\xB8w\xFDZ\xBA\xA7;Ae\xC4;u\xDB\xF2e\xFD\x95\xE5\x04\x95u/[a`e\x8DK\xD6UA\x96*\xEE\xD9\xBFyS[n6\xD9FQ\xCB\x19\x04\x8DZz\xEC\x84\x98\x944\xB2\bA\x97[\xB8\xB86\xCF\x99G\xA4\xED\x04\x1D\x90^]\e\x92\x9AF\x86FL\x97\x13\xAF\x17\b\n\xDB;\xBD\"\xA3\xF0\xAB\x01\x82\xDA\x94\xA4\x13{v\x13T\x86+:\x16\xF4v/\x9D<\x89rg\xE0J\xDBU\x93\xA7\xCA\xAB\xA8(ija\xD5EI\x00\xD2\bP%a\xD3-w\xCC\xDF\x7FPFQ\t\x97,\xC1\xC8%\xD2\x19X#\xD7':V\xCF\xC2\xC2\xCC\xC1\x91\x9B\x97\x97\x18CH\x02\xD4\n@br\a\x9EdF\xB93(\x89>\xEA\x1AB\x87\xC4@\xE7\x9CN\xD3\xB2\x8Bn>\xA2u\x1A#\xC6\x04\n\xC1\x93\xFB\xF7\x12\x1D\xEDq\xC9\x02C2\xBF\xB5\r\xBF\t\x94xa\xB4E\x84\x06^={\x1Ame\x81G\xC1\xEA\xB3\xE7\x05\x84E\xC8v\x00\xADk@\xFC\xC9\x89\x18\a\x10\xB4\x1D\x7F\x13\x91\xF2\x00\xA4\x1C\xE0\xF7\x021y\x8A\x81\xB8$=s\xFBN\\E\xDC\x9A\xD9\xB3f\xB66\x13\xB4\x85\x18\xC7\x10,\xAB\xF1\xF7z\x18\x06G\xC3\x8C*\x91B\x8C3\xF0D\nA\xED\x90\xF2\x01\x00_\x97\xE3\x80\n".b
|
26
|
+
end
|
27
|
+
|
24
28
|
it "parses a simple content stream without inline images" do
|
25
29
|
@parser.parse("0 0.500 m q Q /Name SCN", @processor)
|
26
30
|
assert_equal([[:move_to, [0, 0.5]], [:save_graphics_state],
|
@@ -28,13 +32,28 @@ describe HexaPDF::Content::Parser do
|
|
28
32
|
[:set_stroking_color, [:Name]]], @processor.recorded_ops)
|
29
33
|
end
|
30
34
|
|
31
|
-
it "parses a content stream with inline
|
35
|
+
it "parses a content stream with an inline image without EI in image data" do
|
32
36
|
@parser.parse("q BI /Name 0.5/Other 1 ID some dataEI Q", @processor)
|
33
37
|
assert_equal([[:save_graphics_state],
|
34
38
|
[:inline_image, [{Name: 0.5, Other: 1}, "some data"]],
|
35
39
|
[:restore_graphics_state]], @processor.recorded_ops)
|
36
40
|
end
|
37
41
|
|
42
|
+
it "parses a content stream with an inline image with EI in image data" do
|
43
|
+
@parser.parse("BI\n/CS/RGB\nID #{@image_data}EI Q\nq 1308 0 0 1 485.996 4531.67 cm\n".b,
|
44
|
+
@processor)
|
45
|
+
assert_equal([[:inline_image, [{CS: :RGB}, @image_data]],
|
46
|
+
[:restore_graphics_state],
|
47
|
+
[:save_graphics_state],
|
48
|
+
[:concatenate_matrix, [1308, 0, 0, 1, 485.996, 4531.67]]
|
49
|
+
], @processor.recorded_ops)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "parses a content stream with an inline image with EI in image data at end of stream" do
|
53
|
+
@parser.parse("BI\n/CS/RGB\nID #{@image_data}EI".b, @processor)
|
54
|
+
assert_equal([[:inline_image, [{CS: :RGB}, @image_data]]], @processor.recorded_ops)
|
55
|
+
end
|
56
|
+
|
38
57
|
it "fails parsing inline images if the dictionary keys are not PDF names" do
|
39
58
|
exp = assert_raises(HexaPDF::Error) do
|
40
59
|
@parser.parse("q BI /Name 0.5 Other 1 ID some dataEI Q", @processor)
|
@@ -112,13 +112,15 @@ describe HexaPDF::Content::Processor do
|
|
112
112
|
|
113
113
|
describe "text decoding" do
|
114
114
|
before do
|
115
|
-
@doc =
|
115
|
+
@doc = HexaPDF::Document.new
|
116
116
|
@processor.process(:BT)
|
117
117
|
@processor.graphics_state.font = @font = @doc.add(Type: :Font, Subtype: :Type1,
|
118
118
|
Encoding: :WinAnsiEncoding,
|
119
119
|
BaseFont: :"Times-Roman")
|
120
120
|
@processor.graphics_state.font_size = 10
|
121
121
|
@processor.graphics_state.text_rise = 10
|
122
|
+
@processor.graphics_state.character_spacing = 1
|
123
|
+
@processor.graphics_state.word_spacing = 2
|
122
124
|
end
|
123
125
|
|
124
126
|
describe "decode_text" do
|
@@ -135,16 +137,18 @@ describe HexaPDF::Content::Processor do
|
|
135
137
|
@processor.graphics_state.text_rise
|
136
138
|
lry = @font.bounding_box[3] / 1000.0 * @processor.graphics_state.font_size +
|
137
139
|
@processor.graphics_state.text_rise
|
138
|
-
arr = ["Hül".encode("Windows-1252"), 20, "le".encode("Windows-1252")]
|
139
|
-
width = "
|
140
|
-
width = (width - 20) * @processor.graphics_state.
|
140
|
+
arr = ["Hül".encode("Windows-1252"), 20, " le".encode("Windows-1252")]
|
141
|
+
width = "Hül le".encode("Windows-1252").codepoints.inject(0) {|s, cp| s + @font.width(cp)}
|
142
|
+
width = (width - 20) * @processor.graphics_state.scaled_font_size +
|
143
|
+
6 * @processor.graphics_state.scaled_character_spacing +
|
144
|
+
@processor.graphics_state.scaled_word_spacing
|
141
145
|
|
142
146
|
box = @processor.send(:decode_text_with_positioning, arr)
|
143
|
-
assert_equal("
|
147
|
+
assert_equal("Hül le", box.string)
|
144
148
|
assert_in_delta(0, box[0].lower_left[0])
|
145
149
|
assert_in_delta(lly, box[0].lower_left[1])
|
146
|
-
assert_in_delta(width, box[
|
147
|
-
assert_in_delta(lry, box[
|
150
|
+
assert_in_delta(width, box[5].upper_right[0])
|
151
|
+
assert_in_delta(lry, box[5].upper_right[1])
|
148
152
|
end
|
149
153
|
|
150
154
|
it "fails if the current font is a vertical font" do
|
@@ -31,7 +31,9 @@ describe HexaPDF::Document::Fonts do
|
|
31
31
|
end
|
32
32
|
|
33
33
|
it "caches loaded fonts" do
|
34
|
-
|
34
|
+
font = @doc.fonts.load(:TestFont)
|
35
|
+
assert_same(font, @doc.fonts.load(:TestFont))
|
36
|
+
assert_same(font, @doc.fonts.load(:TestFont, variant: :none))
|
35
37
|
end
|
36
38
|
|
37
39
|
it "fails if the requested font is not found" do
|
@@ -10,6 +10,7 @@ describe HexaPDF::Font::CMap::Parser do
|
|
10
10
|
/CIDInit /ProcSet findresource begin
|
11
11
|
12 dict begin
|
12
12
|
begincmap
|
13
|
+
/H usecmap
|
13
14
|
/CIDSystemInfo
|
14
15
|
<< /Registry (Adobe)
|
15
16
|
/Ordering (UCS)
|
@@ -17,9 +18,21 @@ begincmap
|
|
17
18
|
>> def
|
18
19
|
/CMapName /Adobe-Identity-UCS def
|
19
20
|
/CMapType 2 def
|
20
|
-
|
21
|
-
|
21
|
+
/WMode 0 def
|
22
|
+
4 begincodespacerange
|
23
|
+
<00> <20>
|
24
|
+
<8140> <9ffc>
|
25
|
+
<a0> <de>
|
26
|
+
<e040> <fbec>
|
22
27
|
endcodespacerange
|
28
|
+
2 begincidchar
|
29
|
+
<8143> 8286
|
30
|
+
<8144> 8274
|
31
|
+
endcidchar
|
32
|
+
2 begincidrange
|
33
|
+
<8145> <8145> 8123
|
34
|
+
<8146> <8148> 9000
|
35
|
+
endcidrange
|
23
36
|
2 beginbfrange
|
24
37
|
<0000> <005E> <0020>
|
25
38
|
<1379> <137B> <90FE>
|
@@ -38,18 +51,40 @@ EOF
|
|
38
51
|
assert_equal("UCS", cmap.ordering)
|
39
52
|
assert_equal(0, cmap.supplement)
|
40
53
|
assert_equal("Adobe-Identity-UCS", cmap.name)
|
54
|
+
assert_equal(0, cmap.wmode)
|
55
|
+
|
56
|
+
# Check mappings from used CMap
|
57
|
+
assert_equal([0x2121, 0x7e7e], cmap.read_codes("\x21\x21\x7e\x7e"))
|
58
|
+
assert_equal(633, cmap.to_cid(0x2121))
|
59
|
+
assert_equal(6455, cmap.to_cid(0x6930))
|
60
|
+
|
61
|
+
# Check codespace ranges
|
62
|
+
assert_equal([0, 0x10, 0x20, 33088, 34175, 40956, 160, 205, 222],
|
63
|
+
cmap.read_codes("\x00\x10\x20\x81\x40\x85\x7f\x9f\xfc\xa0\xcd\xde"))
|
64
|
+
|
65
|
+
# Check individual charater mappings
|
66
|
+
assert_equal(8286, cmap.to_cid(0x8143))
|
67
|
+
assert_equal(8274, cmap.to_cid(0x8144))
|
68
|
+
|
69
|
+
# Check CID ranges
|
70
|
+
assert_equal(8123, cmap.to_cid(0x8145))
|
71
|
+
assert_equal(9000, cmap.to_cid(0x8146))
|
72
|
+
assert_equal(9001, cmap.to_cid(0x8147))
|
73
|
+
assert_equal(9002, cmap.to_cid(0x8148))
|
74
|
+
|
75
|
+
# Check unicode mapping
|
41
76
|
((0x20.chr)..(0x7e.chr)).each_with_index do |str, index|
|
42
77
|
assert_equal(str, cmap.to_unicode(index))
|
43
78
|
end
|
44
|
-
assert_equal("\u{90FE}", cmap.to_unicode(
|
45
|
-
assert_equal("\u{90FF}", cmap.to_unicode(
|
46
|
-
assert_equal("\u{9100}", cmap.to_unicode(
|
79
|
+
assert_equal("\u{90FE}", cmap.to_unicode(0x1379))
|
80
|
+
assert_equal("\u{90FF}", cmap.to_unicode(0x137A))
|
81
|
+
assert_equal("\u{9100}", cmap.to_unicode(0x137B))
|
47
82
|
assert_equal("ff", cmap.to_unicode(0x5F))
|
48
83
|
assert_equal("fi", cmap.to_unicode(0x60))
|
49
84
|
assert_equal("ffl", cmap.to_unicode(0x61))
|
50
85
|
assert_equal("\xD8\x40\xDC\x3E".encode("UTF-8", "UTF-16BE"),
|
51
|
-
cmap.to_unicode(
|
52
|
-
|
86
|
+
cmap.to_unicode(0x3A51))
|
87
|
+
assert_nil(cmap.to_unicode(0xFF))
|
53
88
|
end
|
54
89
|
|
55
90
|
it "fails if there is an invalid token inside the bfrange operator" do
|
@@ -36,9 +36,9 @@ describe HexaPDF::Font::Encoding::GlyphList do
|
|
36
36
|
assert_equal("\u275e", @list.name_to_unicode(:a100, zapf_dingbats: true))
|
37
37
|
end
|
38
38
|
|
39
|
-
it "returns
|
40
|
-
|
41
|
-
|
39
|
+
it "returns nil for unknown glyph names" do
|
40
|
+
assert_nil(@list.name_to_unicode(:MyUnknownGlyphName))
|
41
|
+
assert_nil(@list.name_to_unicode(:a100))
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
require 'hexapdf/font/cmap'
|
5
|
+
|
6
|
+
describe HexaPDF::Font::CMap do
|
7
|
+
before do
|
8
|
+
@cmap = HexaPDF::Font::CMap.new
|
9
|
+
end
|
10
|
+
|
11
|
+
describe "using another CMap" do
|
12
|
+
it "uses all mappings of the other CMap" do
|
13
|
+
other = HexaPDF::Font::CMap.new
|
14
|
+
other.add_codespace_range(0x00..0x80)
|
15
|
+
other.add_codespace_range(0x81..0x9f, 0x40..0xfc)
|
16
|
+
other.add_cid_mapping(0x40, 2000)
|
17
|
+
other.add_cid_range(0x50, 0x60, 3000)
|
18
|
+
other.add_unicode_mapping(0x40, "A")
|
19
|
+
@cmap.use_cmap(other)
|
20
|
+
|
21
|
+
assert_equal([0, 0x80, 0x8140], @cmap.read_codes("\x0\x80\x81\x40"))
|
22
|
+
assert_equal(2000, @cmap.to_cid(0x40))
|
23
|
+
assert_equal(3000, @cmap.to_cid(0x50))
|
24
|
+
assert_equal(3016, @cmap.to_cid(0x60))
|
25
|
+
assert_equal("A", @cmap.to_unicode(0x40))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe "predefined CMaps" do
|
30
|
+
it "can check if there is a predefined CMap for a certain name" do
|
31
|
+
assert(HexaPDF::Font::CMap.predefined?('H'))
|
32
|
+
refute(HexaPDF::Font::CMap.predefined?('Z'))
|
33
|
+
end
|
34
|
+
|
35
|
+
it "returns a predefined CMap using ::for_name" do
|
36
|
+
cmap = HexaPDF::Font::CMap.for_name('GB-EUC-H')
|
37
|
+
assert_equal("Adobe", cmap.registry)
|
38
|
+
assert_equal("GB1", cmap.ordering)
|
39
|
+
assert_equal(0, cmap.supplement)
|
40
|
+
assert_equal('GB-EUC-H', cmap.name)
|
41
|
+
end
|
42
|
+
|
43
|
+
it "fails in a non-existent CMap file should be parsed" do
|
44
|
+
assert_raises(HexaPDF::Error) { HexaPDF::Font::CMap.for_name('unknown') }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe "add codespace ranges and read codes" do
|
49
|
+
before do
|
50
|
+
@cmap.add_codespace_range(0x00..0x80)
|
51
|
+
@cmap.add_codespace_range(0x81..0x9f, 0x40..0xfc)
|
52
|
+
@cmap.add_codespace_range(0xa0..0xde)
|
53
|
+
@cmap.add_codespace_range(0xe0..0xfb, 0x40..0xec)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "can read valid character codes" do
|
57
|
+
assert_equal([0, 0x40, 0x80, 33088, 34175, 40956, 160, 205, 222],
|
58
|
+
@cmap.read_codes("\x00\x40\x80\x81\x40\x85\x7f\x9f\xfc\xa0\xcd\xde"))
|
59
|
+
end
|
60
|
+
|
61
|
+
it "fails if the first byte is not valid" do
|
62
|
+
assert_raises(HexaPDF::Error) { @cmap.read_codes("\xdf") }
|
63
|
+
end
|
64
|
+
|
65
|
+
it "fails if a byte following the first one is not valid" do
|
66
|
+
assert_raises(HexaPDF::Error) { @cmap.read_codes("\x82\x10") }
|
67
|
+
end
|
68
|
+
|
69
|
+
it "fails if too few bytes for a valid code are available" do
|
70
|
+
assert_raises(HexaPDF::Error) { @cmap.read_codes("\x82") }
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
describe "CID definition and retrieval" do
|
75
|
+
it "allows adding and retrieving mappings from individual codes to CIDs" do
|
76
|
+
@cmap.add_cid_mapping(57, 90)
|
77
|
+
assert_equal(90, @cmap.to_cid(57))
|
78
|
+
end
|
79
|
+
|
80
|
+
it "allows adding and retrieving mappings from code ranges to CIDs" do
|
81
|
+
@cmap.add_cid_range(20, 40, 100)
|
82
|
+
@cmap.add_cid_range(30, 35, 10)
|
83
|
+
assert_equal(100, @cmap.to_cid(20))
|
84
|
+
assert_equal(120, @cmap.to_cid(40))
|
85
|
+
assert_equal(10, @cmap.to_cid(30))
|
86
|
+
assert_equal(15, @cmap.to_cid(35))
|
87
|
+
end
|
88
|
+
|
89
|
+
it "returns 0 for unknown code-to-CID mappings" do
|
90
|
+
assert_equal(0, @cmap.to_cid(57))
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "Unicode mapping and retrieval" do
|
95
|
+
it "allows adding and retrieving a code-to-unicode mapping" do
|
96
|
+
@cmap.add_unicode_mapping(20, "ABC")
|
97
|
+
assert_equal("ABC", @cmap.to_unicode(20))
|
98
|
+
end
|
99
|
+
|
100
|
+
it "returns nil for unknown mappings" do
|
101
|
+
assert_nil(@cmap.to_unicode(20))
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|