hexapdf 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +68 -0
  3. data/CONTRIBUTERS +1 -1
  4. data/README.md +35 -4
  5. data/Rakefile +1 -0
  6. data/VERSION +1 -1
  7. data/data/hexapdf/cmap/83pv-RKSJ-H +314 -0
  8. data/data/hexapdf/cmap/90ms-RKSJ-H +259 -0
  9. data/data/hexapdf/cmap/90ms-RKSJ-V +156 -0
  10. data/data/hexapdf/cmap/90msp-RKSJ-H +257 -0
  11. data/data/hexapdf/cmap/90msp-RKSJ-V +155 -0
  12. data/data/hexapdf/cmap/90pv-RKSJ-H +355 -0
  13. data/data/hexapdf/cmap/Add-RKSJ-H +738 -0
  14. data/data/hexapdf/cmap/Add-RKSJ-V +135 -0
  15. data/data/hexapdf/cmap/Adobe-CNS1-UCS2 +18209 -0
  16. data/data/hexapdf/cmap/Adobe-GB1-UCS2 +14267 -0
  17. data/data/hexapdf/cmap/Adobe-Japan1-UCS2 +19159 -0
  18. data/data/hexapdf/cmap/Adobe-Korea1-UCS2 +9267 -0
  19. data/data/hexapdf/cmap/B5pc-H +337 -0
  20. data/data/hexapdf/cmap/B5pc-V +90 -0
  21. data/data/hexapdf/cmap/CNS-EUC-H +490 -0
  22. data/data/hexapdf/cmap/CNS-EUC-V +538 -0
  23. data/data/hexapdf/cmap/ETen-B5-H +343 -0
  24. data/data/hexapdf/cmap/ETen-B5-V +91 -0
  25. data/data/hexapdf/cmap/ETenms-B5-H +79 -0
  26. data/data/hexapdf/cmap/ETenms-B5-V +99 -0
  27. data/data/hexapdf/cmap/EUC-H +207 -0
  28. data/data/hexapdf/cmap/EUC-V +105 -0
  29. data/data/hexapdf/cmap/Ext-RKSJ-H +768 -0
  30. data/data/hexapdf/cmap/Ext-RKSJ-V +117 -0
  31. data/data/hexapdf/cmap/GB-EUC-H +173 -0
  32. data/data/hexapdf/cmap/GB-EUC-V +98 -0
  33. data/data/hexapdf/cmap/GBK-EUC-H +4273 -0
  34. data/data/hexapdf/cmap/GBK-EUC-V +97 -0
  35. data/data/hexapdf/cmap/GBK2K-H +5325 -0
  36. data/data/hexapdf/cmap/GBK2K-V +118 -0
  37. data/data/hexapdf/cmap/GBKp-EUC-H +4272 -0
  38. data/data/hexapdf/cmap/GBKp-EUC-V +97 -0
  39. data/data/hexapdf/cmap/GBpc-EUC-H +175 -0
  40. data/data/hexapdf/cmap/GBpc-EUC-V +98 -0
  41. data/data/hexapdf/cmap/H +200 -0
  42. data/data/hexapdf/cmap/HKscs-B5-H +1331 -0
  43. data/data/hexapdf/cmap/HKscs-B5-V +90 -0
  44. data/data/hexapdf/cmap/Identity-H +339 -0
  45. data/data/hexapdf/cmap/Identity-V +73 -0
  46. data/data/hexapdf/cmap/KSC-EUC-H +562 -0
  47. data/data/hexapdf/cmap/KSC-EUC-V +94 -0
  48. data/data/hexapdf/cmap/KSCms-UHC-H +776 -0
  49. data/data/hexapdf/cmap/KSCms-UHC-HW-H +775 -0
  50. data/data/hexapdf/cmap/KSCms-UHC-HW-V +93 -0
  51. data/data/hexapdf/cmap/KSCms-UHC-V +94 -0
  52. data/data/hexapdf/cmap/KSCpc-EUC-H +608 -0
  53. data/data/hexapdf/cmap/LICENSE.txt +26 -0
  54. data/data/hexapdf/cmap/README.txt +9 -0
  55. data/data/hexapdf/cmap/UniCNS-UCS2-H +16992 -0
  56. data/data/hexapdf/cmap/UniCNS-UCS2-V +90 -0
  57. data/data/hexapdf/cmap/UniCNS-UTF16-H +19117 -0
  58. data/data/hexapdf/cmap/UniCNS-UTF16-V +94 -0
  59. data/data/hexapdf/cmap/UniGB-UCS2-H +14321 -0
  60. data/data/hexapdf/cmap/UniGB-UCS2-V +101 -0
  61. data/data/hexapdf/cmap/UniGB-UTF16-H +14381 -0
  62. data/data/hexapdf/cmap/UniGB-UTF16-V +104 -0
  63. data/data/hexapdf/cmap/UniJIS-UCS2-H +8870 -0
  64. data/data/hexapdf/cmap/UniJIS-UCS2-HW-H +81 -0
  65. data/data/hexapdf/cmap/UniJIS-UCS2-HW-V +279 -0
  66. data/data/hexapdf/cmap/UniJIS-UCS2-V +275 -0
  67. data/data/hexapdf/cmap/UniJIS-UTF16-H +14450 -0
  68. data/data/hexapdf/cmap/UniJIS-UTF16-V +299 -0
  69. data/data/hexapdf/cmap/UniKS-UCS2-H +8725 -0
  70. data/data/hexapdf/cmap/UniKS-UCS2-V +95 -0
  71. data/data/hexapdf/cmap/UniKS-UTF16-H +8895 -0
  72. data/data/hexapdf/cmap/UniKS-UTF16-V +99 -0
  73. data/data/hexapdf/cmap/V +105 -0
  74. data/examples/arc.rb +3 -3
  75. data/examples/merging.rb +4 -1
  76. data/examples/optimizing.rb +3 -0
  77. data/examples/show_char_bboxes.rb +2 -2
  78. data/examples/truetype.rb +2 -2
  79. data/lib/hexapdf/cli.rb +40 -1
  80. data/lib/hexapdf/cli/batch.rb +72 -0
  81. data/lib/hexapdf/cli/command.rb +112 -15
  82. data/lib/hexapdf/cli/files.rb +2 -2
  83. data/lib/hexapdf/cli/images.rb +14 -6
  84. data/lib/hexapdf/cli/info.rb +6 -8
  85. data/lib/hexapdf/cli/inspect.rb +5 -8
  86. data/lib/hexapdf/cli/merge.rb +13 -20
  87. data/lib/hexapdf/cli/modify.rb +4 -7
  88. data/lib/hexapdf/cli/optimize.rb +2 -5
  89. data/lib/hexapdf/configuration.rb +32 -3
  90. data/lib/hexapdf/content/canvas.rb +130 -37
  91. data/lib/hexapdf/content/parser.rb +40 -6
  92. data/lib/hexapdf/content/processor.rb +4 -4
  93. data/lib/hexapdf/document.rb +40 -10
  94. data/lib/hexapdf/document/fonts.rb +1 -0
  95. data/lib/hexapdf/encryption/security_handler.rb +8 -12
  96. data/lib/hexapdf/filter/flate_decode.rb +25 -2
  97. data/lib/hexapdf/font/cmap.rb +124 -8
  98. data/lib/hexapdf/font/cmap/parser.rb +65 -15
  99. data/lib/hexapdf/font/encoding/base.rb +2 -2
  100. data/lib/hexapdf/font/encoding/glyph_list.rb +2 -4
  101. data/lib/hexapdf/font/true_type.rb +1 -0
  102. data/lib/hexapdf/font/true_type/builder.rb +75 -0
  103. data/lib/hexapdf/font/true_type/optimizer.rb +65 -0
  104. data/lib/hexapdf/font/true_type/subsetter.rb +9 -22
  105. data/lib/hexapdf/font/true_type_wrapper.rb +9 -21
  106. data/lib/hexapdf/font_loader.rb +1 -1
  107. data/lib/hexapdf/importer.rb +1 -1
  108. data/lib/hexapdf/serializer.rb +5 -3
  109. data/lib/hexapdf/type.rb +2 -0
  110. data/lib/hexapdf/type/cid_font.rb +120 -0
  111. data/lib/hexapdf/type/font.rb +32 -12
  112. data/lib/hexapdf/type/font_simple.rb +34 -42
  113. data/lib/hexapdf/type/font_type0.rb +148 -0
  114. data/lib/hexapdf/type/form.rb +4 -4
  115. data/lib/hexapdf/type/page.rb +12 -11
  116. data/lib/hexapdf/type/resources.rb +14 -0
  117. data/lib/hexapdf/utils/graphics_helpers.rb +77 -0
  118. data/lib/hexapdf/version.rb +1 -1
  119. data/man/man1/hexapdf.1 +43 -1
  120. data/test/hexapdf/content/test_canvas.rb +76 -0
  121. data/test/hexapdf/content/test_parser.rb +20 -1
  122. data/test/hexapdf/content/test_processor.rb +11 -7
  123. data/test/hexapdf/document/test_fonts.rb +3 -1
  124. data/test/hexapdf/font/cmap/test_parser.rb +42 -7
  125. data/test/hexapdf/font/encoding/test_base.rb +1 -1
  126. data/test/hexapdf/font/encoding/test_glyph_list.rb +3 -3
  127. data/test/hexapdf/font/test_cmap.rb +104 -0
  128. data/test/hexapdf/font/test_true_type_wrapper.rb +63 -46
  129. data/test/hexapdf/font/true_type/test_builder.rb +37 -0
  130. data/test/hexapdf/font/true_type/test_optimizer.rb +27 -0
  131. data/test/hexapdf/font/true_type/test_subsetter.rb +6 -13
  132. data/test/hexapdf/test_configuration.rb +12 -7
  133. data/test/hexapdf/test_document.rb +24 -0
  134. data/test/hexapdf/test_importer.rb +9 -1
  135. data/test/hexapdf/test_writer.rb +2 -2
  136. data/test/hexapdf/type/test_cid_font.rb +61 -0
  137. data/test/hexapdf/type/test_font.rb +31 -4
  138. data/test/hexapdf/type/test_font_simple.rb +6 -21
  139. data/test/hexapdf/type/test_font_type0.rb +114 -0
  140. data/test/hexapdf/type/test_resources.rb +17 -1
  141. data/test/hexapdf/utils/test_graphics_helpers.rb +29 -0
  142. metadata +82 -3
@@ -34,6 +34,6 @@
34
34
  module HexaPDF
35
35
 
36
36
  # The version of HexaPDF.
37
- VERSION = '0.3.0'.freeze
37
+ VERSION = '0.4.0'.freeze
38
38
 
39
39
  end
data/man/man1/hexapdf.1 CHANGED
@@ -28,6 +28,8 @@ Merging multiple PDF files into one (see the \fBmerge\fP command)
28
28
  Modifying an existing PDF file (see the \fBmodify\fP command)
29
29
  .IP \(bu 4
30
30
  Optimizing the file size of a PDF file (see the \fBoptimize\fP command)
31
+ .IP \(bu 4
32
+ Batch execution of a command on multiple PDF files (see the \fBbatch\fP command)
31
33
  .PD
32
34
  .P
33
35
  The application contains a built\-in \fBhelp\fP command that can be used to provide a quick reminder of a command\[u2019]s purpose and its options\.
@@ -42,6 +44,19 @@ These options are available on every command (except if they are overridden):
42
44
  \fB\-\-[no\-]force\fP
43
45
  Force overwriting existing files\. Default: \fIfalse\fP\&\.
44
46
  .TP
47
+ \fB\-\-strict\fP
48
+ Enable strict parsing and validation\. By default, correctable parse error and validation problems are treated as warnings which allows processing most PDF files\. If this option is used, correctable parse errors and uncorrectable validation problems are treated as errors\.
49
+ .RS
50
+ .P
51
+ Note that a PDF file may have validation errors and still be usable since most viewing applications are very forgiving\.
52
+ .RE
53
+ .TP
54
+ \fB\-\-verbose\fP, \fB\-v\fP
55
+ Enable more verbose output\. There are three verbosity levels: 0 (no output), 1 (warning output) and 2 (warning and informational output)\. The default level is 1, specifying this option increases it to 2\.
56
+ .TP
57
+ \fB\-\-quiet\fP
58
+ Suppress any output by setting the verbosity level to 0\. Also see the description of \fB\-\-verbose\fP above\.
59
+ .TP
45
60
  \fB\-h\fP, \fB\-\-help\fP
46
61
  Show the help for the application if no command was specified, or the command help otherwise\.
47
62
  .SS "Optimization Options"
@@ -61,6 +76,9 @@ Defines how streams should be treated: \fIcompress\fP will compress them when po
61
76
  .TP
62
77
  \fB\-\-[no\-]compress\-pages\fP
63
78
  Recompress page content streams\. This is a very expensive operation in terms of processing time and won\[u2019]t lead to great file size improvements in many cases\. Default: \fIno\fP\&\.
79
+ .TP
80
+ \fB\-\-[no\-]optimize\-fonts\fP
81
+ Optimize embedded font files by removing normally unneeded font data\. Note that this may have a negative effect on PDFs with forms since form entry usually requires fully embedded font files\. Default: \fIno\fP\&\.
64
82
  .SS "Encryption Options"
65
83
  These options can only be used with the \fBmerge\fP and \fBmodify\fP commands and control if and how an output PDF file should be encrypted\. All options except \fB\-\-decrypt\fP automatically enable \fB\-\-encrypt\fP\&\.
66
84
  .P
@@ -141,6 +159,14 @@ allow high quality printing
141
159
  hexapdf uses a command\-style interface\. This means that it provides different functionalities depending on the used command, and each command can have its own options\.
142
160
  .P
143
161
  There is no need to write the full command name for hexapdf to understand it, the only requirement is that is must be unambiguous\. So using \fBf\fP for the \fBfiles\fP command is sufficient\. The same is true for long option names and option values\.
162
+ .P
163
+ Any command that reads and writes a PDF file may do in\-place processing of the file\. This is automatically done if an input file name is the same as the output file name\. Note that the option \fB\-\-force\fP has to be used in this case\.
164
+ .SS "batch"
165
+ Synopsis: \fBbatch\fP \fICOMMAND\fP \fIFILES\.\.\.\fP
166
+ .P
167
+ This command allows executing a single command for multiple input files, thereby reducing the overall execution time\.
168
+ .P
169
+ The first argument \fICOMMAND\fP is used as a hexapdf command line and must not contain the binary name, just everything else\. The rest of the arguments are the input files\. The specified command will be executed for each input file, with all occurences of {} being replaced by the file name\.
144
170
  .SS "files"
145
171
  Synopsis: \fBfiles\fP [\fBOPTIONS\fP] \fIPDF\fP
146
172
  .P
@@ -297,6 +323,8 @@ Some commands allow the specification of pages using a \fIPAGES\fP argument\. Th
297
323
  .P
298
324
  If the start number of a page range is higher than the end number, the pages are used in the reverse order\.
299
325
  .P
326
+ Single page numbers that are not valid are ignored\. If a page number in a page range is higher than the page number of the last page, the page number of the last page is used instead\.
327
+ .P
300
328
  Step values can be used with page ranges\. If a range is followed by \fI/STEP\fP, \fISTEP\fP \- 1 pages are skipped after each used page\.
301
329
  .P
302
330
  Additionally, the page numbers and ranges can be suffixed with a rotation modifier:
@@ -367,9 +395,15 @@ Optimization: Compress the \fBinput\.pdf\fP to get a smaller file size\.
367
395
  .SS "files"
368
396
  \fBhexapdf files input\.pdf\fP
369
397
  .br
370
- \fBhexapdf files input\.pdf \-i 1\fP
398
+ \fBhexapdf files input\.pdf \-e 1\fP
371
399
  .P
372
400
  Embedded files: The first command lists the embedded files in the \fBinput\.pdf\fP, the second one then extracts the embedded file with the index 1\.
401
+ .SS "images"
402
+ \fBhexapdf images input\.pdf\fP
403
+ .br
404
+ \fBhexapdf images input\.pdf \-e \-\-prefix images/image\fP
405
+ .P
406
+ Image info and extraction: The first command lists the images of the \fBinput\.pdf\fP, the second one then extracts the images into the subdirectory \fBimages\fP with the prefix \fBimage\fP\&\.
373
407
  .SS "info"
374
408
  \fBhexapdf info input\.pdf\fP
375
409
  .P
@@ -380,6 +414,14 @@ File information: Show general information about the PDF file, like PDF version,
380
414
  \fBhexapdf inspect input\.pdf \-o 3\fP
381
415
  .P
382
416
  Inspect a PDF: These commands can be used to inspect the internal object structure of a PDF file\. The first command shows the PDF trailer object\. The second one shows the object with the object number 3\.
417
+ .SS "batch"
418
+ \fBhexapdf batch \'info {}\' input1\.pdf input2\.pdf input3\.pdf\fP
419
+ .P
420
+ Execute the info command for all input files\.
421
+ .P
422
+ \fBhexapdf batch \'optimize \-\-object\-streams delete {} done\-{}\' input1\.pdf input2\.pdf input3\.pdf\fP
423
+ .P
424
+ Optimize the given input files, creating the three output files \fBdone\-input1\.pdf\fP, \fBdone\-input2\.pdf\fP and \fBdone\-input3\.pdf\fP\&\.
383
425
  .SH "EXIT STATUS"
384
426
  The exit status is 0 if no error happened\. Otherwise it is 1\.
385
427
  .SH "SEE ALSO"
@@ -1080,6 +1080,21 @@ describe HexaPDF::Content::Canvas do
1080
1080
  end
1081
1081
  end
1082
1082
 
1083
+ describe "show_glyphs_only" do
1084
+ it "serializes correctly" do
1085
+ @canvas.font("Times", size: 20)
1086
+ font = @canvas.font
1087
+ @canvas.show_glyphs_only(font.decode_utf8("Hal lo").insert(2, -35))
1088
+ assert_equal(0, @canvas.text_cursor[0])
1089
+ assert_equal(0, @canvas.text_cursor[1])
1090
+ assert_operators(@canvas.contents, [[:set_font_and_size, [:F1, 20]],
1091
+ [:set_leading, [24]],
1092
+ [:begin_text],
1093
+ [:show_text_with_positioning, [["Ha", -35, "l lo"]]],
1094
+ ])
1095
+ end
1096
+ end
1097
+
1083
1098
  describe "text" do
1084
1099
  it "sets the text cursor position if instructed" do
1085
1100
  @canvas.font("Times", size: 10)
@@ -1110,4 +1125,65 @@ describe HexaPDF::Content::Canvas do
1110
1125
  ])
1111
1126
  end
1112
1127
  end
1128
+
1129
+ describe "marked_content_point" do
1130
+ it "invokes the operator implementation" do
1131
+ assert_operator_invoked(:MP, :tag) { @canvas.marked_content_point(:tag) }
1132
+ assert_operator_invoked(:DP, :tag, :P1) do
1133
+ @canvas.marked_content_point(:tag, property_list: {key: 5})
1134
+ end
1135
+ end
1136
+
1137
+ it "is serialized correctly" do
1138
+ @canvas.marked_content_point(:tag)
1139
+ assert_operators(@canvas.contents, [[:designate_marked_content_point, [:tag]]])
1140
+ end
1141
+
1142
+ it "fails if invoked while in an unsupported graphics objects" do
1143
+ assert_raises_in_graphics_object(:path, :clipping_path) { @canvas.marked_content_point(:tag) }
1144
+ end
1145
+ end
1146
+
1147
+ describe "marked_content_sequence" do
1148
+ it "invokes the operator implementation" do
1149
+ assert_operator_invoked(:BMC, :tag) { @canvas.marked_content_sequence(:tag) }
1150
+ assert_operator_invoked(:BDC, :tag, :P1) do
1151
+ @canvas.marked_content_sequence(:tag, property_list: {key: 5})
1152
+ end
1153
+ end
1154
+
1155
+ it "is serialized correctly when no block is used" do
1156
+ @canvas.marked_content_sequence(:tag)
1157
+ assert_operators(@canvas.contents, [[:begin_marked_content, [:tag]]])
1158
+ end
1159
+
1160
+ it "is serialized correctly when a block is used" do
1161
+ @canvas.marked_content_sequence(:tag, property_list: {key: 5}) { }
1162
+ assert_operators(@canvas.contents, [[:begin_marked_content_with_property_list, [:tag, :P1]],
1163
+ [:end_marked_content]])
1164
+ end
1165
+
1166
+ it "fails if invoked while in an unsupported graphics objects" do
1167
+ assert_raises_in_graphics_object(:path, :clipping_path) do
1168
+ @canvas.marked_content_sequence(:tag)
1169
+ end
1170
+ end
1171
+ end
1172
+
1173
+ describe "end_marked_content_sequence" do
1174
+ it "invokes the operator implementation" do
1175
+ assert_operator_invoked(:EMC) { @canvas.end_marked_content_sequence }
1176
+ end
1177
+
1178
+ it "is serialized correctly" do
1179
+ @canvas.end_marked_content_sequence
1180
+ assert_operators(@page.contents, [[:end_marked_content]])
1181
+ end
1182
+
1183
+ it "fails if invoked while in an unsupported graphics objects" do
1184
+ assert_raises_in_graphics_object(:path, :clipping_path) do
1185
+ @canvas.end_marked_content_sequence
1186
+ end
1187
+ end
1188
+ end
1113
1189
  end
@@ -21,6 +21,10 @@ describe HexaPDF::Content::Parser do
21
21
  end
22
22
 
23
23
  describe "parse" do
24
+ before do
25
+ @image_data = "x\x9Ccd\xC0\x00\xBB\x1F<\xC6\x14\xA43EI JP)\xB8w\xFDZ\xBA\xA7;Ae\xC4;u\xDB\xF2e\xFD\x95\xE5\x04\x95u/[a`e\x8DK\xD6UA\x96*\xEE\xD9\xBFyS[n6\xD9FQ\xCB\x19\x04\x8DZz\xEC\x84\x98\x944\xB2\bA\x97[\xB8\xB86\xCF\x99G\xA4\xED\x04\x1D\x90^]\e\x92\x9AF\x86FL\x97\x13\xAF\x17\b\n\xDB;\xBD\"\xA3\xF0\xAB\x01\x82\xDA\x94\xA4\x13{v\x13T\x86+:\x16\xF4v/\x9D<\x89rg\xE0J\xDBU\x93\xA7\xCA\xAB\xA8(ija\xD5EI\x00\xD2\bP%a\xD3-w\xCC\xDF\x7FPFQ\t\x97,\xC1\xC8%\xD2\x19X#\xD7':V\xCF\xC2\xC2\xCC\xC1\x91\x9B\x97\x97\x18CH\x02\xD4\n@br\a\x9EdF\xB93(\x89>\xEA\x1AB\x87\xC4@\xE7\x9CN\xD3\xB2\x8Bn>\xA2u\x1A#\xC6\x04\n\xC1\x93\xFB\xF7\x12\x1D\xEDq\xC9\x02C2\xBF\xB5\r\xBF\t\x94xa\xB4E\x84\x06^={\x1Ame\x81G\xC1\xEA\xB3\xE7\x05\x84E\xC8v\x00\xADk@\xFC\xC9\x89\x18\a\x10\xB4\x1D\x7F\x13\x91\xF2\x00\xA4\x1C\xE0\xF7\x021y\x8A\x81\xB8$=s\xFBN\\E\xDC\x9A\xD9\xB3f\xB66\x13\xB4\x85\x18\xC7\x10,\xAB\xF1\xF7z\x18\x06G\xC3\x8C*\x91B\x8C3\xF0D\nA\xED\x90\xF2\x01\x00_\x97\xE3\x80\n".b
26
+ end
27
+
24
28
  it "parses a simple content stream without inline images" do
25
29
  @parser.parse("0 0.500 m q Q /Name SCN", @processor)
26
30
  assert_equal([[:move_to, [0, 0.5]], [:save_graphics_state],
@@ -28,13 +32,28 @@ describe HexaPDF::Content::Parser do
28
32
  [:set_stroking_color, [:Name]]], @processor.recorded_ops)
29
33
  end
30
34
 
31
- it "parses a content stream with inline images" do
35
+ it "parses a content stream with an inline image without EI in image data" do
32
36
  @parser.parse("q BI /Name 0.5/Other 1 ID some dataEI Q", @processor)
33
37
  assert_equal([[:save_graphics_state],
34
38
  [:inline_image, [{Name: 0.5, Other: 1}, "some data"]],
35
39
  [:restore_graphics_state]], @processor.recorded_ops)
36
40
  end
37
41
 
42
+ it "parses a content stream with an inline image with EI in image data" do
43
+ @parser.parse("BI\n/CS/RGB\nID #{@image_data}EI Q\nq 1308 0 0 1 485.996 4531.67 cm\n".b,
44
+ @processor)
45
+ assert_equal([[:inline_image, [{CS: :RGB}, @image_data]],
46
+ [:restore_graphics_state],
47
+ [:save_graphics_state],
48
+ [:concatenate_matrix, [1308, 0, 0, 1, 485.996, 4531.67]]
49
+ ], @processor.recorded_ops)
50
+ end
51
+
52
+ it "parses a content stream with an inline image with EI in image data at end of stream" do
53
+ @parser.parse("BI\n/CS/RGB\nID #{@image_data}EI".b, @processor)
54
+ assert_equal([[:inline_image, [{CS: :RGB}, @image_data]]], @processor.recorded_ops)
55
+ end
56
+
38
57
  it "fails parsing inline images if the dictionary keys are not PDF names" do
39
58
  exp = assert_raises(HexaPDF::Error) do
40
59
  @parser.parse("q BI /Name 0.5 Other 1 ID some dataEI Q", @processor)
@@ -112,13 +112,15 @@ describe HexaPDF::Content::Processor do
112
112
 
113
113
  describe "text decoding" do
114
114
  before do
115
- @doc = HexaPDF::Document.new
115
+ @doc = HexaPDF::Document.new
116
116
  @processor.process(:BT)
117
117
  @processor.graphics_state.font = @font = @doc.add(Type: :Font, Subtype: :Type1,
118
118
  Encoding: :WinAnsiEncoding,
119
119
  BaseFont: :"Times-Roman")
120
120
  @processor.graphics_state.font_size = 10
121
121
  @processor.graphics_state.text_rise = 10
122
+ @processor.graphics_state.character_spacing = 1
123
+ @processor.graphics_state.word_spacing = 2
122
124
  end
123
125
 
124
126
  describe "decode_text" do
@@ -135,16 +137,18 @@ describe HexaPDF::Content::Processor do
135
137
  @processor.graphics_state.text_rise
136
138
  lry = @font.bounding_box[3] / 1000.0 * @processor.graphics_state.font_size +
137
139
  @processor.graphics_state.text_rise
138
- arr = ["Hül".encode("Windows-1252"), 20, "le".encode("Windows-1252")]
139
- width = "Hülle".encode("Windows-1252").codepoints.inject(0) {|s, cp| s + @font.width(cp)}
140
- width = (width - 20) * @processor.graphics_state.font_size / 1000.0
140
+ arr = ["Hül".encode("Windows-1252"), 20, " le".encode("Windows-1252")]
141
+ width = "Hül le".encode("Windows-1252").codepoints.inject(0) {|s, cp| s + @font.width(cp)}
142
+ width = (width - 20) * @processor.graphics_state.scaled_font_size +
143
+ 6 * @processor.graphics_state.scaled_character_spacing +
144
+ @processor.graphics_state.scaled_word_spacing
141
145
 
142
146
  box = @processor.send(:decode_text_with_positioning, arr)
143
- assert_equal("Hülle", box.string)
147
+ assert_equal("Hül le", box.string)
144
148
  assert_in_delta(0, box[0].lower_left[0])
145
149
  assert_in_delta(lly, box[0].lower_left[1])
146
- assert_in_delta(width, box[4].upper_right[0])
147
- assert_in_delta(lry, box[4].upper_right[1])
150
+ assert_in_delta(width, box[5].upper_right[0])
151
+ assert_in_delta(lry, box[5].upper_right[1])
148
152
  end
149
153
 
150
154
  it "fails if the current font is a vertical font" do
@@ -31,7 +31,9 @@ describe HexaPDF::Document::Fonts do
31
31
  end
32
32
 
33
33
  it "caches loaded fonts" do
34
- assert_same(@doc.fonts.load(:TestFont), @doc.fonts.load(:TestFont))
34
+ font = @doc.fonts.load(:TestFont)
35
+ assert_same(font, @doc.fonts.load(:TestFont))
36
+ assert_same(font, @doc.fonts.load(:TestFont, variant: :none))
35
37
  end
36
38
 
37
39
  it "fails if the requested font is not found" do
@@ -10,6 +10,7 @@ describe HexaPDF::Font::CMap::Parser do
10
10
  /CIDInit /ProcSet findresource begin
11
11
  12 dict begin
12
12
  begincmap
13
+ /H usecmap
13
14
  /CIDSystemInfo
14
15
  << /Registry (Adobe)
15
16
  /Ordering (UCS)
@@ -17,9 +18,21 @@ begincmap
17
18
  >> def
18
19
  /CMapName /Adobe-Identity-UCS def
19
20
  /CMapType 2 def
20
- 1 begincodespacerange
21
- <0000> <FFFF>
21
+ /WMode 0 def
22
+ 4 begincodespacerange
23
+ <00> <20>
24
+ <8140> <9ffc>
25
+ <a0> <de>
26
+ <e040> <fbec>
22
27
  endcodespacerange
28
+ 2 begincidchar
29
+ <8143> 8286
30
+ <8144> 8274
31
+ endcidchar
32
+ 2 begincidrange
33
+ <8145> <8145> 8123
34
+ <8146> <8148> 9000
35
+ endcidrange
23
36
  2 beginbfrange
24
37
  <0000> <005E> <0020>
25
38
  <1379> <137B> <90FE>
@@ -38,18 +51,40 @@ EOF
38
51
  assert_equal("UCS", cmap.ordering)
39
52
  assert_equal(0, cmap.supplement)
40
53
  assert_equal("Adobe-Identity-UCS", cmap.name)
54
+ assert_equal(0, cmap.wmode)
55
+
56
+ # Check mappings from used CMap
57
+ assert_equal([0x2121, 0x7e7e], cmap.read_codes("\x21\x21\x7e\x7e"))
58
+ assert_equal(633, cmap.to_cid(0x2121))
59
+ assert_equal(6455, cmap.to_cid(0x6930))
60
+
61
+ # Check codespace ranges
62
+ assert_equal([0, 0x10, 0x20, 33088, 34175, 40956, 160, 205, 222],
63
+ cmap.read_codes("\x00\x10\x20\x81\x40\x85\x7f\x9f\xfc\xa0\xcd\xde"))
64
+
65
+ # Check individual charater mappings
66
+ assert_equal(8286, cmap.to_cid(0x8143))
67
+ assert_equal(8274, cmap.to_cid(0x8144))
68
+
69
+ # Check CID ranges
70
+ assert_equal(8123, cmap.to_cid(0x8145))
71
+ assert_equal(9000, cmap.to_cid(0x8146))
72
+ assert_equal(9001, cmap.to_cid(0x8147))
73
+ assert_equal(9002, cmap.to_cid(0x8148))
74
+
75
+ # Check unicode mapping
41
76
  ((0x20.chr)..(0x7e.chr)).each_with_index do |str, index|
42
77
  assert_equal(str, cmap.to_unicode(index))
43
78
  end
44
- assert_equal("\u{90FE}", cmap.to_unicode(0x13 * 256 + 0x79))
45
- assert_equal("\u{90FF}", cmap.to_unicode(0x13 * 256 + 0x7A))
46
- assert_equal("\u{9100}", cmap.to_unicode(0x13 * 256 + 0x7B))
79
+ assert_equal("\u{90FE}", cmap.to_unicode(0x1379))
80
+ assert_equal("\u{90FF}", cmap.to_unicode(0x137A))
81
+ assert_equal("\u{9100}", cmap.to_unicode(0x137B))
47
82
  assert_equal("ff", cmap.to_unicode(0x5F))
48
83
  assert_equal("fi", cmap.to_unicode(0x60))
49
84
  assert_equal("ffl", cmap.to_unicode(0x61))
50
85
  assert_equal("\xD8\x40\xDC\x3E".encode("UTF-8", "UTF-16BE"),
51
- cmap.to_unicode(0x3A * 256 + 0x51))
52
- assert_equal("", cmap.to_unicode(0xFF))
86
+ cmap.to_unicode(0x3A51))
87
+ assert_nil(cmap.to_unicode(0xFF))
53
88
  end
54
89
 
55
90
  it "fails if there is an invalid token inside the bfrange operator" do
@@ -29,7 +29,7 @@ describe HexaPDF::Font::Encoding::Base do
29
29
  end
30
30
 
31
31
  it "returns an empty string for an unmapped code" do
32
- assert_equal('', @base.unicode(66))
32
+ assert_nil(@base.unicode(66))
33
33
  end
34
34
  end
35
35
  end
@@ -36,9 +36,9 @@ describe HexaPDF::Font::Encoding::GlyphList do
36
36
  assert_equal("\u275e", @list.name_to_unicode(:a100, zapf_dingbats: true))
37
37
  end
38
38
 
39
- it "returns an empty string for unknown glyph names" do
40
- assert_equal('', @list.name_to_unicode(:MyUnknownGlyphName))
41
- assert_equal('', @list.name_to_unicode(:a100))
39
+ it "returns nil for unknown glyph names" do
40
+ assert_nil(@list.name_to_unicode(:MyUnknownGlyphName))
41
+ assert_nil(@list.name_to_unicode(:a100))
42
42
  end
43
43
  end
44
44
 
@@ -0,0 +1,104 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'test_helper'
4
+ require 'hexapdf/font/cmap'
5
+
6
+ describe HexaPDF::Font::CMap do
7
+ before do
8
+ @cmap = HexaPDF::Font::CMap.new
9
+ end
10
+
11
+ describe "using another CMap" do
12
+ it "uses all mappings of the other CMap" do
13
+ other = HexaPDF::Font::CMap.new
14
+ other.add_codespace_range(0x00..0x80)
15
+ other.add_codespace_range(0x81..0x9f, 0x40..0xfc)
16
+ other.add_cid_mapping(0x40, 2000)
17
+ other.add_cid_range(0x50, 0x60, 3000)
18
+ other.add_unicode_mapping(0x40, "A")
19
+ @cmap.use_cmap(other)
20
+
21
+ assert_equal([0, 0x80, 0x8140], @cmap.read_codes("\x0\x80\x81\x40"))
22
+ assert_equal(2000, @cmap.to_cid(0x40))
23
+ assert_equal(3000, @cmap.to_cid(0x50))
24
+ assert_equal(3016, @cmap.to_cid(0x60))
25
+ assert_equal("A", @cmap.to_unicode(0x40))
26
+ end
27
+ end
28
+
29
+ describe "predefined CMaps" do
30
+ it "can check if there is a predefined CMap for a certain name" do
31
+ assert(HexaPDF::Font::CMap.predefined?('H'))
32
+ refute(HexaPDF::Font::CMap.predefined?('Z'))
33
+ end
34
+
35
+ it "returns a predefined CMap using ::for_name" do
36
+ cmap = HexaPDF::Font::CMap.for_name('GB-EUC-H')
37
+ assert_equal("Adobe", cmap.registry)
38
+ assert_equal("GB1", cmap.ordering)
39
+ assert_equal(0, cmap.supplement)
40
+ assert_equal('GB-EUC-H', cmap.name)
41
+ end
42
+
43
+ it "fails in a non-existent CMap file should be parsed" do
44
+ assert_raises(HexaPDF::Error) { HexaPDF::Font::CMap.for_name('unknown') }
45
+ end
46
+ end
47
+
48
+ describe "add codespace ranges and read codes" do
49
+ before do
50
+ @cmap.add_codespace_range(0x00..0x80)
51
+ @cmap.add_codespace_range(0x81..0x9f, 0x40..0xfc)
52
+ @cmap.add_codespace_range(0xa0..0xde)
53
+ @cmap.add_codespace_range(0xe0..0xfb, 0x40..0xec)
54
+ end
55
+
56
+ it "can read valid character codes" do
57
+ assert_equal([0, 0x40, 0x80, 33088, 34175, 40956, 160, 205, 222],
58
+ @cmap.read_codes("\x00\x40\x80\x81\x40\x85\x7f\x9f\xfc\xa0\xcd\xde"))
59
+ end
60
+
61
+ it "fails if the first byte is not valid" do
62
+ assert_raises(HexaPDF::Error) { @cmap.read_codes("\xdf") }
63
+ end
64
+
65
+ it "fails if a byte following the first one is not valid" do
66
+ assert_raises(HexaPDF::Error) { @cmap.read_codes("\x82\x10") }
67
+ end
68
+
69
+ it "fails if too few bytes for a valid code are available" do
70
+ assert_raises(HexaPDF::Error) { @cmap.read_codes("\x82") }
71
+ end
72
+ end
73
+
74
+ describe "CID definition and retrieval" do
75
+ it "allows adding and retrieving mappings from individual codes to CIDs" do
76
+ @cmap.add_cid_mapping(57, 90)
77
+ assert_equal(90, @cmap.to_cid(57))
78
+ end
79
+
80
+ it "allows adding and retrieving mappings from code ranges to CIDs" do
81
+ @cmap.add_cid_range(20, 40, 100)
82
+ @cmap.add_cid_range(30, 35, 10)
83
+ assert_equal(100, @cmap.to_cid(20))
84
+ assert_equal(120, @cmap.to_cid(40))
85
+ assert_equal(10, @cmap.to_cid(30))
86
+ assert_equal(15, @cmap.to_cid(35))
87
+ end
88
+
89
+ it "returns 0 for unknown code-to-CID mappings" do
90
+ assert_equal(0, @cmap.to_cid(57))
91
+ end
92
+ end
93
+
94
+ describe "Unicode mapping and retrieval" do
95
+ it "allows adding and retrieving a code-to-unicode mapping" do
96
+ @cmap.add_unicode_mapping(20, "ABC")
97
+ assert_equal("ABC", @cmap.to_unicode(20))
98
+ end
99
+
100
+ it "returns nil for unknown mappings" do
101
+ assert_nil(@cmap.to_unicode(20))
102
+ end
103
+ end
104
+ end