@vaadin-component-factory/vcf-pdf-viewer 0.9.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/README.md +1 -1
  2. package/package.json +42 -26
  3. package/{src/display → pdfjs/dist}/display_utils.js +344 -139
  4. package/{src/display → pdfjs/dist}/fetch_stream.js +115 -97
  5. package/pdfjs/dist/l10n_utils.js +140 -0
  6. package/{src/shared → pdfjs/dist}/message_handler.js +243 -259
  7. package/{src/display → pdfjs/dist}/network.js +149 -87
  8. package/{src/display/content_disposition.js → pdfjs/dist/network_utils.js} +167 -55
  9. package/{src/display → pdfjs/dist}/node_stream.js +133 -98
  10. package/pdfjs/dist/pdf.js +12778 -0
  11. package/pdfjs/dist/pdf_link_service.js +638 -0
  12. package/pdfjs/dist/pdf_rendering_queue.js +199 -0
  13. package/pdfjs/dist/pdf_thumbnail_viewer.js +819 -0
  14. package/pdfjs/dist/pdf_viewer.js +3598 -0
  15. package/pdfjs/dist/ui_utils.js +1033 -0
  16. package/{src/shared → pdfjs/dist}/util.js +301 -287
  17. package/pdfjs/dist/worker.js +62813 -0
  18. package/src/vcf-pdf-viewer.js +98 -46
  19. package/theme/lumo/vcf-pdf-viewer-styles.js +4 -4
  20. package/theme/material/vcf-pdf-viewer-styles.js +4 -4
  21. package/theme/material/vcf-pdf-viewer.js +2 -2
  22. package/src/core/.eslintrc +0 -13
  23. package/src/core/annotation.js +0 -2948
  24. package/src/core/arithmetic_decoder.js +0 -182
  25. package/src/core/ascii_85_stream.js +0 -98
  26. package/src/core/ascii_hex_stream.js +0 -79
  27. package/src/core/base_stream.js +0 -110
  28. package/src/core/bidi.js +0 -438
  29. package/src/core/calibri_factors.js +0 -308
  30. package/src/core/catalog.js +0 -1459
  31. package/src/core/ccitt.js +0 -1062
  32. package/src/core/ccitt_stream.js +0 -60
  33. package/src/core/cff_font.js +0 -116
  34. package/src/core/cff_parser.js +0 -1949
  35. package/src/core/charsets.js +0 -119
  36. package/src/core/chunked_stream.js +0 -557
  37. package/src/core/cmap.js +0 -1039
  38. package/src/core/colorspace.js +0 -1533
  39. package/src/core/core_utils.js +0 -464
  40. package/src/core/crypto.js +0 -1900
  41. package/src/core/decode_stream.js +0 -170
  42. package/src/core/decrypt_stream.js +0 -59
  43. package/src/core/default_appearance.js +0 -99
  44. package/src/core/document.js +0 -1456
  45. package/src/core/encodings.js +0 -301
  46. package/src/core/evaluator.js +0 -4601
  47. package/src/core/file_spec.js +0 -108
  48. package/src/core/flate_stream.js +0 -402
  49. package/src/core/font_renderer.js +0 -882
  50. package/src/core/fonts.js +0 -3260
  51. package/src/core/fonts_utils.js +0 -221
  52. package/src/core/function.js +0 -1257
  53. package/src/core/glyf.js +0 -706
  54. package/src/core/glyphlist.js +0 -4558
  55. package/src/core/helvetica_factors.js +0 -353
  56. package/src/core/image.js +0 -802
  57. package/src/core/image_utils.js +0 -291
  58. package/src/core/jbig2.js +0 -2572
  59. package/src/core/jbig2_stream.js +0 -73
  60. package/src/core/jpeg_stream.js +0 -105
  61. package/src/core/jpg.js +0 -1416
  62. package/src/core/jpx.js +0 -2343
  63. package/src/core/jpx_stream.js +0 -87
  64. package/src/core/liberationsans_widths.js +0 -221
  65. package/src/core/lzw_stream.js +0 -150
  66. package/src/core/metadata_parser.js +0 -146
  67. package/src/core/metrics.js +0 -2970
  68. package/src/core/murmurhash3.js +0 -139
  69. package/src/core/myriadpro_factors.js +0 -290
  70. package/src/core/name_number_tree.js +0 -153
  71. package/src/core/object_loader.js +0 -149
  72. package/src/core/opentype_file_builder.js +0 -154
  73. package/src/core/operator_list.js +0 -734
  74. package/src/core/parser.js +0 -1416
  75. package/src/core/pattern.js +0 -985
  76. package/src/core/pdf_manager.js +0 -217
  77. package/src/core/predictor_stream.js +0 -238
  78. package/src/core/primitives.js +0 -402
  79. package/src/core/ps_parser.js +0 -272
  80. package/src/core/run_length_stream.js +0 -61
  81. package/src/core/segoeui_factors.js +0 -308
  82. package/src/core/standard_fonts.js +0 -817
  83. package/src/core/stream.js +0 -103
  84. package/src/core/struct_tree.js +0 -335
  85. package/src/core/to_unicode_map.js +0 -103
  86. package/src/core/type1_font.js +0 -421
  87. package/src/core/type1_parser.js +0 -776
  88. package/src/core/unicode.js +0 -1649
  89. package/src/core/worker.js +0 -848
  90. package/src/core/worker_stream.js +0 -135
  91. package/src/core/writer.js +0 -278
  92. package/src/core/xfa/bind.js +0 -652
  93. package/src/core/xfa/builder.js +0 -207
  94. package/src/core/xfa/config.js +0 -1926
  95. package/src/core/xfa/connection_set.js +0 -202
  96. package/src/core/xfa/data.js +0 -82
  97. package/src/core/xfa/datasets.js +0 -76
  98. package/src/core/xfa/factory.js +0 -111
  99. package/src/core/xfa/fonts.js +0 -181
  100. package/src/core/xfa/formcalc_lexer.js +0 -385
  101. package/src/core/xfa/formcalc_parser.js +0 -1340
  102. package/src/core/xfa/html_utils.js +0 -639
  103. package/src/core/xfa/layout.js +0 -383
  104. package/src/core/xfa/locale_set.js +0 -345
  105. package/src/core/xfa/namespaces.js +0 -81
  106. package/src/core/xfa/parser.js +0 -184
  107. package/src/core/xfa/setup.js +0 -38
  108. package/src/core/xfa/signature.js +0 -40
  109. package/src/core/xfa/som.js +0 -338
  110. package/src/core/xfa/stylesheet.js +0 -40
  111. package/src/core/xfa/template.js +0 -6260
  112. package/src/core/xfa/text.js +0 -290
  113. package/src/core/xfa/unknown.js +0 -29
  114. package/src/core/xfa/utils.js +0 -217
  115. package/src/core/xfa/xdp.js +0 -59
  116. package/src/core/xfa/xfa_object.js +0 -1130
  117. package/src/core/xfa/xhtml.js +0 -543
  118. package/src/core/xfa_fonts.js +0 -208
  119. package/src/core/xml_parser.js +0 -507
  120. package/src/core/xref.js +0 -899
  121. package/src/display/annotation_layer.js +0 -2107
  122. package/src/display/annotation_storage.js +0 -113
  123. package/src/display/api.js +0 -3292
  124. package/src/display/base_factory.js +0 -180
  125. package/src/display/canvas.js +0 -2828
  126. package/src/display/font_loader.js +0 -484
  127. package/src/display/metadata.js +0 -41
  128. package/src/display/network_utils.js +0 -100
  129. package/src/display/node_utils.js +0 -83
  130. package/src/display/optional_content_config.js +0 -189
  131. package/src/display/pattern_helper.js +0 -659
  132. package/src/display/svg.js +0 -1709
  133. package/src/display/text_layer.js +0 -847
  134. package/src/display/transport_stream.js +0 -303
  135. package/src/display/worker_options.js +0 -40
  136. package/src/display/xfa_layer.js +0 -204
  137. package/src/doc_helper.js +0 -25
  138. package/src/images/logo.svg +0 -41
  139. package/src/interfaces.js +0 -169
  140. package/src/license_header.js +0 -14
  141. package/src/license_header_libre.js +0 -21
  142. package/src/pdf.image_decoders.js +0 -46
  143. package/src/pdf.js +0 -146
  144. package/src/pdf.sandbox.external.js +0 -181
  145. package/src/pdf.sandbox.js +0 -151
  146. package/src/pdf.scripting.js +0 -25
  147. package/src/pdf.worker.entry.js +0 -19
  148. package/src/pdf.worker.js +0 -23
  149. package/src/scripting_api/aform.js +0 -608
  150. package/src/scripting_api/app.js +0 -621
  151. package/src/scripting_api/color.js +0 -129
  152. package/src/scripting_api/common.js +0 -58
  153. package/src/scripting_api/console.js +0 -38
  154. package/src/scripting_api/constants.js +0 -208
  155. package/src/scripting_api/doc.js +0 -1195
  156. package/src/scripting_api/error.js +0 -23
  157. package/src/scripting_api/event.js +0 -232
  158. package/src/scripting_api/field.js +0 -620
  159. package/src/scripting_api/fullscreen.js +0 -145
  160. package/src/scripting_api/initialization.js +0 -223
  161. package/src/scripting_api/pdf_object.js +0 -24
  162. package/src/scripting_api/print_params.js +0 -146
  163. package/src/scripting_api/proxy.js +0 -139
  164. package/src/scripting_api/thermometer.js +0 -69
  165. package/src/scripting_api/util.js +0 -581
  166. package/src/shared/.eslintrc +0 -13
  167. package/src/shared/cffStandardStrings.js +0 -311
  168. package/src/shared/compatibility.js +0 -114
  169. package/src/shared/fonts_utils.js +0 -429
  170. package/src/shared/is_node.js +0 -27
  171. package/src/shared/scripting_utils.js +0 -85
  172. package/src/worker_loader.js +0 -32
@@ -1,4601 +0,0 @@
1
- /* Copyright 2012 Mozilla Foundation
2
- *
3
- * Licensed under the Apache License, Version 2.0 (the "License");
4
- * you may not use this file except in compliance with the License.
5
- * You may obtain a copy of the License at
6
- *
7
- * http://www.apache.org/licenses/LICENSE-2.0
8
- *
9
- * Unless required by applicable law or agreed to in writing, software
10
- * distributed under the License is distributed on an "AS IS" BASIS,
11
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- * See the License for the specific language governing permissions and
13
- * limitations under the License.
14
- */
15
- /* eslint-disable no-var */
16
-
17
- import {
18
- AbortException,
19
- assert,
20
- CMapCompressionType,
21
- createPromiseCapability,
22
- FONT_IDENTITY_MATRIX,
23
- FormatError,
24
- IDENTITY_MATRIX,
25
- info,
26
- isArrayEqual,
27
- isNum,
28
- isString,
29
- OPS,
30
- shadow,
31
- stringToPDFString,
32
- TextRenderingMode,
33
- UNSUPPORTED_FEATURES,
34
- Util,
35
- warn,
36
- } from "../shared/util.js";
37
- import { CMapFactory, IdentityCMap } from "./cmap.js";
38
- import {
39
- Cmd,
40
- Dict,
41
- EOF,
42
- isDict,
43
- isName,
44
- isRef,
45
- isStream,
46
- Name,
47
- Ref,
48
- RefSet,
49
- } from "./primitives.js";
50
- import { ErrorFont, Font } from "./fonts.js";
51
- import { FontFlags, getFontType } from "./fonts_utils.js";
52
- import {
53
- getEncoding,
54
- MacRomanEncoding,
55
- StandardEncoding,
56
- SymbolSetEncoding,
57
- WinAnsiEncoding,
58
- ZapfDingbatsEncoding,
59
- } from "./encodings.js";
60
- import {
61
- getFontNameToFileMap,
62
- getSerifFonts,
63
- getStandardFontName,
64
- getStdFontMap,
65
- getSymbolsFonts,
66
- } from "./standard_fonts.js";
67
- import {
68
- getNormalizedUnicodes,
69
- getUnicodeForGlyph,
70
- reverseIfRtl,
71
- } from "./unicode.js";
72
- import { getTilingPatternIR, Pattern } from "./pattern.js";
73
- import { IdentityToUnicodeMap, ToUnicodeMap } from "./to_unicode_map.js";
74
- import { isPDFFunction, PDFFunctionFactory } from "./function.js";
75
- import { Lexer, Parser } from "./parser.js";
76
- import {
77
- LocalColorSpaceCache,
78
- LocalGStateCache,
79
- LocalImageCache,
80
- LocalTilingPatternCache,
81
- } from "./image_utils.js";
82
- import { NullStream, Stream } from "./stream.js";
83
- import { bidi } from "./bidi.js";
84
- import { ColorSpace } from "./colorspace.js";
85
- import { DecodeStream } from "./decode_stream.js";
86
- import { getGlyphsUnicode } from "./glyphlist.js";
87
- import { getLookupTableFactory } from "./core_utils.js";
88
- import { getMetrics } from "./metrics.js";
89
- import { getXfaFontName } from "./xfa_fonts.js";
90
- import { MurmurHash3_64 } from "./murmurhash3.js";
91
- import { OperatorList } from "./operator_list.js";
92
- import { PDFImage } from "./image.js";
93
-
94
- const DefaultPartialEvaluatorOptions = Object.freeze({
95
- maxImageSize: -1,
96
- disableFontFace: false,
97
- ignoreErrors: false,
98
- isEvalSupported: true,
99
- fontExtraProperties: false,
100
- useSystemFonts: true,
101
- cMapUrl: null,
102
- standardFontDataUrl: null,
103
- });
104
-
105
- const PatternType = {
106
- TILING: 1,
107
- SHADING: 2,
108
- };
109
-
110
- const deferred = Promise.resolve();
111
-
112
- // Convert PDF blend mode names to HTML5 blend mode names.
113
- function normalizeBlendMode(value, parsingArray = false) {
114
- if (Array.isArray(value)) {
115
- // Use the first *supported* BM value in the Array (fixes issue11279.pdf).
116
- for (let i = 0, ii = value.length; i < ii; i++) {
117
- const maybeBM = normalizeBlendMode(value[i], /* parsingArray = */ true);
118
- if (maybeBM) {
119
- return maybeBM;
120
- }
121
- }
122
- warn(`Unsupported blend mode Array: ${value}`);
123
- return "source-over";
124
- }
125
-
126
- if (!isName(value)) {
127
- if (parsingArray) {
128
- return null;
129
- }
130
- return "source-over";
131
- }
132
- switch (value.name) {
133
- case "Normal":
134
- case "Compatible":
135
- return "source-over";
136
- case "Multiply":
137
- return "multiply";
138
- case "Screen":
139
- return "screen";
140
- case "Overlay":
141
- return "overlay";
142
- case "Darken":
143
- return "darken";
144
- case "Lighten":
145
- return "lighten";
146
- case "ColorDodge":
147
- return "color-dodge";
148
- case "ColorBurn":
149
- return "color-burn";
150
- case "HardLight":
151
- return "hard-light";
152
- case "SoftLight":
153
- return "soft-light";
154
- case "Difference":
155
- return "difference";
156
- case "Exclusion":
157
- return "exclusion";
158
- case "Hue":
159
- return "hue";
160
- case "Saturation":
161
- return "saturation";
162
- case "Color":
163
- return "color";
164
- case "Luminosity":
165
- return "luminosity";
166
- }
167
- if (parsingArray) {
168
- return null;
169
- }
170
- warn(`Unsupported blend mode: ${value.name}`);
171
- return "source-over";
172
- }
173
-
174
- // Trying to minimize Date.now() usage and check every 100 time.
175
- class TimeSlotManager {
176
- static get TIME_SLOT_DURATION_MS() {
177
- return shadow(this, "TIME_SLOT_DURATION_MS", 20);
178
- }
179
-
180
- static get CHECK_TIME_EVERY() {
181
- return shadow(this, "CHECK_TIME_EVERY", 100);
182
- }
183
-
184
- constructor() {
185
- this.reset();
186
- }
187
-
188
- check() {
189
- if (++this.checked < TimeSlotManager.CHECK_TIME_EVERY) {
190
- return false;
191
- }
192
- this.checked = 0;
193
- return this.endTime <= Date.now();
194
- }
195
-
196
- reset() {
197
- this.endTime = Date.now() + TimeSlotManager.TIME_SLOT_DURATION_MS;
198
- this.checked = 0;
199
- }
200
- }
201
-
202
- class PartialEvaluator {
203
- constructor({
204
- xref,
205
- handler,
206
- pageIndex,
207
- idFactory,
208
- fontCache,
209
- builtInCMapCache,
210
- standardFontDataCache,
211
- globalImageCache,
212
- options = null,
213
- }) {
214
- this.xref = xref;
215
- this.handler = handler;
216
- this.pageIndex = pageIndex;
217
- this.idFactory = idFactory;
218
- this.fontCache = fontCache;
219
- this.builtInCMapCache = builtInCMapCache;
220
- this.standardFontDataCache = standardFontDataCache;
221
- this.globalImageCache = globalImageCache;
222
- this.options = options || DefaultPartialEvaluatorOptions;
223
- this.parsingType3Font = false;
224
-
225
- this._fetchBuiltInCMapBound = this.fetchBuiltInCMap.bind(this);
226
- }
227
-
228
- /**
229
- * Since Functions are only cached (locally) by reference, we can share one
230
- * `PDFFunctionFactory` instance within this `PartialEvaluator` instance.
231
- */
232
- get _pdfFunctionFactory() {
233
- const pdfFunctionFactory = new PDFFunctionFactory({
234
- xref: this.xref,
235
- isEvalSupported: this.options.isEvalSupported,
236
- });
237
- return shadow(this, "_pdfFunctionFactory", pdfFunctionFactory);
238
- }
239
-
240
- clone(newOptions = null) {
241
- const newEvaluator = Object.create(this);
242
- newEvaluator.options = Object.assign(
243
- Object.create(null),
244
- this.options,
245
- newOptions
246
- );
247
- return newEvaluator;
248
- }
249
-
250
- hasBlendModes(resources, nonBlendModesSet) {
251
- if (!(resources instanceof Dict)) {
252
- return false;
253
- }
254
- if (resources.objId && nonBlendModesSet.has(resources.objId)) {
255
- return false;
256
- }
257
-
258
- const processed = new RefSet(nonBlendModesSet);
259
- if (resources.objId) {
260
- processed.put(resources.objId);
261
- }
262
-
263
- const nodes = [resources],
264
- xref = this.xref;
265
- while (nodes.length) {
266
- const node = nodes.shift();
267
- // First check the current resources for blend modes.
268
- const graphicStates = node.get("ExtGState");
269
- if (graphicStates instanceof Dict) {
270
- for (let graphicState of graphicStates.getRawValues()) {
271
- if (graphicState instanceof Ref) {
272
- if (processed.has(graphicState)) {
273
- continue; // The ExtGState has already been processed.
274
- }
275
- try {
276
- graphicState = xref.fetch(graphicState);
277
- } catch (ex) {
278
- // Avoid parsing a corrupt ExtGState more than once.
279
- processed.put(graphicState);
280
-
281
- info(`hasBlendModes - ignoring ExtGState: "${ex}".`);
282
- continue;
283
- }
284
- }
285
- if (!(graphicState instanceof Dict)) {
286
- continue;
287
- }
288
- if (graphicState.objId) {
289
- processed.put(graphicState.objId);
290
- }
291
-
292
- const bm = graphicState.get("BM");
293
- if (bm instanceof Name) {
294
- if (bm.name !== "Normal") {
295
- return true;
296
- }
297
- continue;
298
- }
299
- if (bm !== undefined && Array.isArray(bm)) {
300
- for (const element of bm) {
301
- if (element instanceof Name && element.name !== "Normal") {
302
- return true;
303
- }
304
- }
305
- }
306
- }
307
- }
308
- // Descend into the XObjects to look for more resources and blend modes.
309
- const xObjects = node.get("XObject");
310
- if (!(xObjects instanceof Dict)) {
311
- continue;
312
- }
313
- for (let xObject of xObjects.getRawValues()) {
314
- if (xObject instanceof Ref) {
315
- if (processed.has(xObject)) {
316
- // The XObject has already been processed, and by avoiding a
317
- // redundant `xref.fetch` we can *significantly* reduce the load
318
- // time for badly generated PDF files (fixes issue6961.pdf).
319
- continue;
320
- }
321
- try {
322
- xObject = xref.fetch(xObject);
323
- } catch (ex) {
324
- // Avoid parsing a corrupt XObject more than once.
325
- processed.put(xObject);
326
-
327
- info(`hasBlendModes - ignoring XObject: "${ex}".`);
328
- continue;
329
- }
330
- }
331
- if (!isStream(xObject)) {
332
- continue;
333
- }
334
- if (xObject.dict.objId) {
335
- processed.put(xObject.dict.objId);
336
- }
337
- const xResources = xObject.dict.get("Resources");
338
- if (!(xResources instanceof Dict)) {
339
- continue;
340
- }
341
- // Checking objId to detect an infinite loop.
342
- if (xResources.objId && processed.has(xResources.objId)) {
343
- continue;
344
- }
345
-
346
- nodes.push(xResources);
347
- if (xResources.objId) {
348
- processed.put(xResources.objId);
349
- }
350
- }
351
- }
352
-
353
- // When no blend modes exist, there's no need re-fetch/re-parse any of the
354
- // processed `Ref`s again for subsequent pages. This helps reduce redundant
355
- // `XRef.fetch` calls for some documents (e.g. issue6961.pdf).
356
- processed.forEach(ref => {
357
- nonBlendModesSet.put(ref);
358
- });
359
- return false;
360
- }
361
-
362
- async fetchBuiltInCMap(name) {
363
- const cachedData = this.builtInCMapCache.get(name);
364
- if (cachedData) {
365
- return cachedData;
366
- }
367
- let data;
368
-
369
- if (this.options.cMapUrl !== null) {
370
- // Only compressed CMaps are (currently) supported here.
371
- const url = `${this.options.cMapUrl}${name}.bcmap`;
372
- const response = await fetch(url);
373
- if (!response.ok) {
374
- throw new Error(
375
- `fetchBuiltInCMap: failed to fetch file "${url}" with "${response.statusText}".`
376
- );
377
- }
378
- data = {
379
- cMapData: new Uint8Array(await response.arrayBuffer()),
380
- compressionType: CMapCompressionType.BINARY,
381
- };
382
- } else {
383
- // Get the data on the main-thread instead.
384
- data = await this.handler.sendWithPromise("FetchBuiltInCMap", { name });
385
- }
386
-
387
- if (data.compressionType !== CMapCompressionType.NONE) {
388
- // Given the size of uncompressed CMaps, only cache compressed ones.
389
- this.builtInCMapCache.set(name, data);
390
- }
391
- return data;
392
- }
393
-
394
- async fetchStandardFontData(name) {
395
- const cachedData = this.standardFontDataCache.get(name);
396
- if (cachedData) {
397
- return new Stream(cachedData);
398
- }
399
-
400
- // The symbol fonts are not consistent across platforms, always load the
401
- // standard font data for them.
402
- if (
403
- this.options.useSystemFonts &&
404
- name !== "Symbol" &&
405
- name !== "ZapfDingbats"
406
- ) {
407
- return null;
408
- }
409
-
410
- const standardFontNameToFileName = getFontNameToFileMap(),
411
- filename = standardFontNameToFileName[name];
412
- let data;
413
-
414
- if (this.options.standardFontDataUrl !== null) {
415
- const url = `${this.options.standardFontDataUrl}${filename}`;
416
- const response = await fetch(url);
417
- if (!response.ok) {
418
- warn(
419
- `fetchStandardFontData: failed to fetch file "${url}" with "${response.statusText}".`
420
- );
421
- } else {
422
- data = await response.arrayBuffer();
423
- }
424
- } else {
425
- // Get the data on the main-thread instead.
426
- try {
427
- data = await this.handler.sendWithPromise("FetchStandardFontData", {
428
- filename,
429
- });
430
- } catch (e) {
431
- warn(
432
- `fetchStandardFontData: failed to fetch file "${filename}" with "${e}".`
433
- );
434
- }
435
- }
436
-
437
- if (!data) {
438
- return null;
439
- }
440
- // Cache the "raw" standard font data, to avoid fetching it repeateadly
441
- // (see e.g. issue 11399).
442
- this.standardFontDataCache.set(name, data);
443
-
444
- return new Stream(data);
445
- }
446
-
447
- async buildFormXObject(
448
- resources,
449
- xobj,
450
- smask,
451
- operatorList,
452
- task,
453
- initialState,
454
- localColorSpaceCache
455
- ) {
456
- const dict = xobj.dict;
457
- const matrix = dict.getArray("Matrix");
458
- let bbox = dict.getArray("BBox");
459
- if (Array.isArray(bbox) && bbox.length === 4) {
460
- bbox = Util.normalizeRect(bbox);
461
- } else {
462
- bbox = null;
463
- }
464
- let optionalContent = null,
465
- groupOptions;
466
- if (dict.has("OC")) {
467
- optionalContent = await this.parseMarkedContentProps(
468
- dict.get("OC"),
469
- resources
470
- );
471
- operatorList.addOp(OPS.beginMarkedContentProps, ["OC", optionalContent]);
472
- }
473
- const group = dict.get("Group");
474
- if (group) {
475
- groupOptions = {
476
- matrix,
477
- bbox,
478
- smask,
479
- isolated: false,
480
- knockout: false,
481
- };
482
-
483
- const groupSubtype = group.get("S");
484
- let colorSpace = null;
485
- if (isName(groupSubtype, "Transparency")) {
486
- groupOptions.isolated = group.get("I") || false;
487
- groupOptions.knockout = group.get("K") || false;
488
- if (group.has("CS")) {
489
- const cs = group.getRaw("CS");
490
-
491
- const cachedColorSpace = ColorSpace.getCached(
492
- cs,
493
- this.xref,
494
- localColorSpaceCache
495
- );
496
- if (cachedColorSpace) {
497
- colorSpace = cachedColorSpace;
498
- } else {
499
- colorSpace = await this.parseColorSpace({
500
- cs,
501
- resources,
502
- localColorSpaceCache,
503
- });
504
- }
505
- }
506
- }
507
-
508
- if (smask && smask.backdrop) {
509
- colorSpace = colorSpace || ColorSpace.singletons.rgb;
510
- smask.backdrop = colorSpace.getRgb(smask.backdrop, 0);
511
- }
512
-
513
- operatorList.addOp(OPS.beginGroup, [groupOptions]);
514
- }
515
-
516
- operatorList.addOp(OPS.paintFormXObjectBegin, [matrix, bbox]);
517
-
518
- return this.getOperatorList({
519
- stream: xobj,
520
- task,
521
- resources: dict.get("Resources") || resources,
522
- operatorList,
523
- initialState,
524
- }).then(function () {
525
- operatorList.addOp(OPS.paintFormXObjectEnd, []);
526
-
527
- if (group) {
528
- operatorList.addOp(OPS.endGroup, [groupOptions]);
529
- }
530
-
531
- if (optionalContent) {
532
- operatorList.addOp(OPS.endMarkedContent, []);
533
- }
534
- });
535
- }
536
-
537
- _sendImgData(objId, imgData, cacheGlobally = false) {
538
- const transfers = imgData ? [imgData.data.buffer] : null;
539
-
540
- if (this.parsingType3Font || cacheGlobally) {
541
- return this.handler.send(
542
- "commonobj",
543
- [objId, "Image", imgData],
544
- transfers
545
- );
546
- }
547
- return this.handler.send(
548
- "obj",
549
- [objId, this.pageIndex, "Image", imgData],
550
- transfers
551
- );
552
- }
553
-
554
- async buildPaintImageXObject({
555
- resources,
556
- image,
557
- isInline = false,
558
- operatorList,
559
- cacheKey,
560
- localImageCache,
561
- localColorSpaceCache,
562
- }) {
563
- const dict = image.dict;
564
- const imageRef = dict.objId;
565
- const w = dict.get("Width", "W");
566
- const h = dict.get("Height", "H");
567
-
568
- if (!(w && isNum(w)) || !(h && isNum(h))) {
569
- warn("Image dimensions are missing, or not numbers.");
570
- return undefined;
571
- }
572
- const maxImageSize = this.options.maxImageSize;
573
- if (maxImageSize !== -1 && w * h > maxImageSize) {
574
- warn("Image exceeded maximum allowed size and was removed.");
575
- return undefined;
576
- }
577
-
578
- const imageMask = dict.get("ImageMask", "IM") || false;
579
- let imgData, args;
580
- if (imageMask) {
581
- // This depends on a tmpCanvas being filled with the
582
- // current fillStyle, such that processing the pixel
583
- // data can't be done here. Instead of creating a
584
- // complete PDFImage, only read the information needed
585
- // for later.
586
-
587
- const width = dict.get("Width", "W");
588
- const height = dict.get("Height", "H");
589
- const bitStrideLength = (width + 7) >> 3;
590
- const imgArray = image.getBytes(
591
- bitStrideLength * height,
592
- /* forceClamped = */ true
593
- );
594
- const decode = dict.getArray("Decode", "D");
595
-
596
- imgData = PDFImage.createMask({
597
- imgArray,
598
- width,
599
- height,
600
- imageIsFromDecodeStream: image instanceof DecodeStream,
601
- inverseDecode: !!decode && decode[0] > 0,
602
- });
603
- imgData.cached = !!cacheKey;
604
- args = [imgData];
605
-
606
- operatorList.addOp(OPS.paintImageMaskXObject, args);
607
- if (cacheKey) {
608
- localImageCache.set(cacheKey, imageRef, {
609
- fn: OPS.paintImageMaskXObject,
610
- args,
611
- });
612
- }
613
- return undefined;
614
- }
615
-
616
- const softMask = dict.get("SMask", "SM") || false;
617
- const mask = dict.get("Mask") || false;
618
-
619
- const SMALL_IMAGE_DIMENSIONS = 200;
620
- // Inlining small images into the queue as RGB data
621
- if (isInline && !softMask && !mask && w + h < SMALL_IMAGE_DIMENSIONS) {
622
- const imageObj = new PDFImage({
623
- xref: this.xref,
624
- res: resources,
625
- image,
626
- isInline,
627
- pdfFunctionFactory: this._pdfFunctionFactory,
628
- localColorSpaceCache,
629
- });
630
- // We force the use of RGBA_32BPP images here, because we can't handle
631
- // any other kind.
632
- imgData = imageObj.createImageData(/* forceRGBA = */ true);
633
- operatorList.addOp(OPS.paintInlineImageXObject, [imgData]);
634
- return undefined;
635
- }
636
-
637
- // If there is no imageMask, create the PDFImage and a lot
638
- // of image processing can be done here.
639
- let objId = `img_${this.idFactory.createObjId()}`,
640
- cacheGlobally = false;
641
-
642
- if (this.parsingType3Font) {
643
- objId = `${this.idFactory.getDocId()}_type3_${objId}`;
644
- } else if (imageRef) {
645
- cacheGlobally = this.globalImageCache.shouldCache(
646
- imageRef,
647
- this.pageIndex
648
- );
649
-
650
- if (cacheGlobally) {
651
- objId = `${this.idFactory.getDocId()}_${objId}`;
652
- }
653
- }
654
-
655
- // Ensure that the dependency is added before the image is decoded.
656
- operatorList.addDependency(objId);
657
- args = [objId, w, h];
658
-
659
- PDFImage.buildImage({
660
- xref: this.xref,
661
- res: resources,
662
- image,
663
- isInline,
664
- pdfFunctionFactory: this._pdfFunctionFactory,
665
- localColorSpaceCache,
666
- })
667
- .then(imageObj => {
668
- imgData = imageObj.createImageData(/* forceRGBA = */ false);
669
-
670
- if (cacheKey && imageRef && cacheGlobally) {
671
- this.globalImageCache.addByteSize(imageRef, imgData.data.length);
672
- }
673
- return this._sendImgData(objId, imgData, cacheGlobally);
674
- })
675
- .catch(reason => {
676
- warn(`Unable to decode image "${objId}": "${reason}".`);
677
-
678
- return this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
679
- });
680
-
681
- operatorList.addOp(OPS.paintImageXObject, args);
682
- if (cacheKey) {
683
- localImageCache.set(cacheKey, imageRef, {
684
- fn: OPS.paintImageXObject,
685
- args,
686
- });
687
-
688
- if (imageRef) {
689
- assert(!isInline, "Cannot cache an inline image globally.");
690
- this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
691
-
692
- if (cacheGlobally) {
693
- this.globalImageCache.setData(imageRef, {
694
- objId,
695
- fn: OPS.paintImageXObject,
696
- args,
697
- byteSize: 0, // Temporary entry, note `addByteSize` above.
698
- });
699
- }
700
- }
701
- }
702
- return undefined;
703
- }
704
-
705
- handleSMask(
706
- smask,
707
- resources,
708
- operatorList,
709
- task,
710
- stateManager,
711
- localColorSpaceCache
712
- ) {
713
- const smaskContent = smask.get("G");
714
- const smaskOptions = {
715
- subtype: smask.get("S").name,
716
- backdrop: smask.get("BC"),
717
- };
718
-
719
- // The SMask might have a alpha/luminosity value transfer function --
720
- // we will build a map of integer values in range 0..255 to be fast.
721
- const transferObj = smask.get("TR");
722
- if (isPDFFunction(transferObj)) {
723
- const transferFn = this._pdfFunctionFactory.create(transferObj);
724
- const transferMap = new Uint8Array(256);
725
- const tmp = new Float32Array(1);
726
- for (let i = 0; i < 256; i++) {
727
- tmp[0] = i / 255;
728
- transferFn(tmp, 0, tmp, 0);
729
- transferMap[i] = (tmp[0] * 255) | 0;
730
- }
731
- smaskOptions.transferMap = transferMap;
732
- }
733
-
734
- return this.buildFormXObject(
735
- resources,
736
- smaskContent,
737
- smaskOptions,
738
- operatorList,
739
- task,
740
- stateManager.state.clone(),
741
- localColorSpaceCache
742
- );
743
- }
744
-
745
- handleTransferFunction(tr) {
746
- let transferArray;
747
- if (Array.isArray(tr)) {
748
- transferArray = tr;
749
- } else if (isPDFFunction(tr)) {
750
- transferArray = [tr];
751
- } else {
752
- return null; // Not a valid transfer function entry.
753
- }
754
-
755
- const transferMaps = [];
756
- let numFns = 0,
757
- numEffectfulFns = 0;
758
- for (const entry of transferArray) {
759
- const transferObj = this.xref.fetchIfRef(entry);
760
- numFns++;
761
-
762
- if (isName(transferObj, "Identity")) {
763
- transferMaps.push(null);
764
- continue;
765
- } else if (!isPDFFunction(transferObj)) {
766
- return null; // Not a valid transfer function object.
767
- }
768
-
769
- const transferFn = this._pdfFunctionFactory.create(transferObj);
770
- const transferMap = new Uint8Array(256),
771
- tmp = new Float32Array(1);
772
- for (let j = 0; j < 256; j++) {
773
- tmp[0] = j / 255;
774
- transferFn(tmp, 0, tmp, 0);
775
- transferMap[j] = (tmp[0] * 255) | 0;
776
- }
777
- transferMaps.push(transferMap);
778
- numEffectfulFns++;
779
- }
780
-
781
- if (!(numFns === 1 || numFns === 4)) {
782
- return null; // Only 1 or 4 functions are supported, by the specification.
783
- }
784
- if (numEffectfulFns === 0) {
785
- return null; // Only /Identity transfer functions found, which are no-ops.
786
- }
787
- return transferMaps;
788
- }
789
-
790
- handleTilingType(
791
- fn,
792
- color,
793
- resources,
794
- pattern,
795
- patternDict,
796
- operatorList,
797
- task,
798
- cacheKey,
799
- localTilingPatternCache
800
- ) {
801
- // Create an IR of the pattern code.
802
- const tilingOpList = new OperatorList();
803
- // Merge the available resources, to prevent issues when the patternDict
804
- // is missing some /Resources entries (fixes issue6541.pdf).
805
- const patternResources = Dict.merge({
806
- xref: this.xref,
807
- dictArray: [patternDict.get("Resources"), resources],
808
- });
809
-
810
- return this.getOperatorList({
811
- stream: pattern,
812
- task,
813
- resources: patternResources,
814
- operatorList: tilingOpList,
815
- })
816
- .then(function () {
817
- const operatorListIR = tilingOpList.getIR();
818
- const tilingPatternIR = getTilingPatternIR(
819
- operatorListIR,
820
- patternDict,
821
- color
822
- );
823
- // Add the dependencies to the parent operator list so they are
824
- // resolved before the sub operator list is executed synchronously.
825
- operatorList.addDependencies(tilingOpList.dependencies);
826
- operatorList.addOp(fn, tilingPatternIR);
827
-
828
- if (cacheKey) {
829
- localTilingPatternCache.set(cacheKey, patternDict.objId, {
830
- operatorListIR,
831
- dict: patternDict,
832
- });
833
- }
834
- })
835
- .catch(reason => {
836
- if (reason instanceof AbortException) {
837
- return;
838
- }
839
- if (this.options.ignoreErrors) {
840
- // Error(s) in the TilingPattern -- sending unsupported feature
841
- // notification and allow rendering to continue.
842
- this.handler.send("UnsupportedFeature", {
843
- featureId: UNSUPPORTED_FEATURES.errorTilingPattern,
844
- });
845
- warn(`handleTilingType - ignoring pattern: "${reason}".`);
846
- return;
847
- }
848
- throw reason;
849
- });
850
- }
851
-
852
- handleSetFont(
853
- resources,
854
- fontArgs,
855
- fontRef,
856
- operatorList,
857
- task,
858
- state,
859
- fallbackFontDict = null,
860
- cssFontInfo = null
861
- ) {
862
- const fontName =
863
- fontArgs && fontArgs[0] instanceof Name ? fontArgs[0].name : null;
864
-
865
- return this.loadFont(
866
- fontName,
867
- fontRef,
868
- resources,
869
- fallbackFontDict,
870
- cssFontInfo
871
- )
872
- .then(translated => {
873
- if (!translated.font.isType3Font) {
874
- return translated;
875
- }
876
- return translated
877
- .loadType3Data(this, resources, task)
878
- .then(function () {
879
- // Add the dependencies to the parent operatorList so they are
880
- // resolved before Type3 operatorLists are executed synchronously.
881
- operatorList.addDependencies(translated.type3Dependencies);
882
-
883
- return translated;
884
- })
885
- .catch(reason => {
886
- // Error in the font data -- sending unsupported feature
887
- // notification.
888
- this.handler.send("UnsupportedFeature", {
889
- featureId: UNSUPPORTED_FEATURES.errorFontLoadType3,
890
- });
891
- return new TranslatedFont({
892
- loadedName: "g_font_error",
893
- font: new ErrorFont(`Type3 font load error: ${reason}`),
894
- dict: translated.font,
895
- evaluatorOptions: this.options,
896
- });
897
- });
898
- })
899
- .then(translated => {
900
- state.font = translated.font;
901
- translated.send(this.handler);
902
- return translated.loadedName;
903
- });
904
- }
905
-
906
- handleText(chars, state) {
907
- const font = state.font;
908
- const glyphs = font.charsToGlyphs(chars);
909
-
910
- if (font.data) {
911
- const isAddToPathSet = !!(
912
- state.textRenderingMode & TextRenderingMode.ADD_TO_PATH_FLAG
913
- );
914
- if (
915
- isAddToPathSet ||
916
- state.fillColorSpace.name === "Pattern" ||
917
- font.disableFontFace ||
918
- this.options.disableFontFace
919
- ) {
920
- PartialEvaluator.buildFontPaths(
921
- font,
922
- glyphs,
923
- this.handler,
924
- this.options
925
- );
926
- }
927
- }
928
- return glyphs;
929
- }
930
-
931
- ensureStateFont(state) {
932
- if (state.font) {
933
- return;
934
- }
935
- const reason = new FormatError(
936
- "Missing setFont (Tf) operator before text rendering operator."
937
- );
938
-
939
- if (this.options.ignoreErrors) {
940
- // Missing setFont operator before text rendering operator -- sending
941
- // unsupported feature notification and allow rendering to continue.
942
- this.handler.send("UnsupportedFeature", {
943
- featureId: UNSUPPORTED_FEATURES.errorFontState,
944
- });
945
- warn(`ensureStateFont: "${reason}".`);
946
- return;
947
- }
948
- throw reason;
949
- }
950
-
951
- async setGState({
952
- resources,
953
- gState,
954
- operatorList,
955
- cacheKey,
956
- task,
957
- stateManager,
958
- localGStateCache,
959
- localColorSpaceCache,
960
- }) {
961
- const gStateRef = gState.objId;
962
- let isSimpleGState = true;
963
- // This array holds the converted/processed state data.
964
- const gStateObj = [];
965
- const gStateKeys = gState.getKeys();
966
- let promise = Promise.resolve();
967
- for (let i = 0, ii = gStateKeys.length; i < ii; i++) {
968
- const key = gStateKeys[i];
969
- const value = gState.get(key);
970
- switch (key) {
971
- case "Type":
972
- break;
973
- case "LW":
974
- case "LC":
975
- case "LJ":
976
- case "ML":
977
- case "D":
978
- case "RI":
979
- case "FL":
980
- case "CA":
981
- case "ca":
982
- gStateObj.push([key, value]);
983
- break;
984
- case "Font":
985
- isSimpleGState = false;
986
-
987
- promise = promise.then(() => {
988
- return this.handleSetFont(
989
- resources,
990
- null,
991
- value[0],
992
- operatorList,
993
- task,
994
- stateManager.state
995
- ).then(function (loadedName) {
996
- operatorList.addDependency(loadedName);
997
- gStateObj.push([key, [loadedName, value[1]]]);
998
- });
999
- });
1000
- break;
1001
- case "BM":
1002
- gStateObj.push([key, normalizeBlendMode(value)]);
1003
- break;
1004
- case "SMask":
1005
- if (isName(value, "None")) {
1006
- gStateObj.push([key, false]);
1007
- break;
1008
- }
1009
- if (isDict(value)) {
1010
- isSimpleGState = false;
1011
-
1012
- promise = promise.then(() => {
1013
- return this.handleSMask(
1014
- value,
1015
- resources,
1016
- operatorList,
1017
- task,
1018
- stateManager,
1019
- localColorSpaceCache
1020
- );
1021
- });
1022
- gStateObj.push([key, true]);
1023
- } else {
1024
- warn("Unsupported SMask type");
1025
- }
1026
- break;
1027
- case "TR":
1028
- const transferMaps = this.handleTransferFunction(value);
1029
- gStateObj.push([key, transferMaps]);
1030
- break;
1031
- // Only generate info log messages for the following since
1032
- // they are unlikely to have a big impact on the rendering.
1033
- case "OP":
1034
- case "op":
1035
- case "OPM":
1036
- case "BG":
1037
- case "BG2":
1038
- case "UCR":
1039
- case "UCR2":
1040
- case "TR2":
1041
- case "HT":
1042
- case "SM":
1043
- case "SA":
1044
- case "AIS":
1045
- case "TK":
1046
- // TODO implement these operators.
1047
- info("graphic state operator " + key);
1048
- break;
1049
- default:
1050
- info("Unknown graphic state operator " + key);
1051
- break;
1052
- }
1053
- }
1054
- return promise.then(function () {
1055
- if (gStateObj.length > 0) {
1056
- operatorList.addOp(OPS.setGState, [gStateObj]);
1057
- }
1058
-
1059
- if (isSimpleGState) {
1060
- localGStateCache.set(cacheKey, gStateRef, gStateObj);
1061
- }
1062
- });
1063
- }
1064
-
1065
- loadFont(
1066
- fontName,
1067
- font,
1068
- resources,
1069
- fallbackFontDict = null,
1070
- cssFontInfo = null
1071
- ) {
1072
- const errorFont = async () => {
1073
- return new TranslatedFont({
1074
- loadedName: "g_font_error",
1075
- font: new ErrorFont(`Font "${fontName}" is not available.`),
1076
- dict: font,
1077
- evaluatorOptions: this.options,
1078
- });
1079
- };
1080
-
1081
- const xref = this.xref;
1082
- let fontRef;
1083
- if (font) {
1084
- // Loading by ref.
1085
- if (!isRef(font)) {
1086
- throw new FormatError('The "font" object should be a reference.');
1087
- }
1088
- fontRef = font;
1089
- } else {
1090
- // Loading by name.
1091
- const fontRes = resources.get("Font");
1092
- if (fontRes) {
1093
- fontRef = fontRes.getRaw(fontName);
1094
- }
1095
- }
1096
- if (!fontRef) {
1097
- const partialMsg = `Font "${
1098
- fontName || (font && font.toString())
1099
- }" is not available`;
1100
-
1101
- if (!this.options.ignoreErrors && !this.parsingType3Font) {
1102
- warn(`${partialMsg}.`);
1103
- return errorFont();
1104
- }
1105
- // Font not found -- sending unsupported feature notification.
1106
- this.handler.send("UnsupportedFeature", {
1107
- featureId: UNSUPPORTED_FEATURES.errorFontMissing,
1108
- });
1109
- warn(`${partialMsg} -- attempting to fallback to a default font.`);
1110
-
1111
- // Falling back to a default font to avoid completely broken rendering,
1112
- // but note that there're no guarantees that things will look "correct".
1113
- if (fallbackFontDict) {
1114
- fontRef = fallbackFontDict;
1115
- } else {
1116
- fontRef = PartialEvaluator.fallbackFontDict;
1117
- }
1118
- }
1119
-
1120
- if (this.fontCache.has(fontRef)) {
1121
- return this.fontCache.get(fontRef);
1122
- }
1123
-
1124
- font = xref.fetchIfRef(fontRef);
1125
- if (!isDict(font)) {
1126
- return errorFont();
1127
- }
1128
-
1129
- // We are holding `font.cacheKey` references only for `fontRef`s that
1130
- // are not actually `Ref`s, but rather `Dict`s. See explanation below.
1131
- if (font.cacheKey && this.fontCache.has(font.cacheKey)) {
1132
- return this.fontCache.get(font.cacheKey);
1133
- }
1134
-
1135
- const fontCapability = createPromiseCapability();
1136
-
1137
- let preEvaluatedFont;
1138
- try {
1139
- preEvaluatedFont = this.preEvaluateFont(font);
1140
- preEvaluatedFont.cssFontInfo = cssFontInfo;
1141
- } catch (reason) {
1142
- warn(`loadFont - preEvaluateFont failed: "${reason}".`);
1143
- return errorFont();
1144
- }
1145
- const { descriptor, hash } = preEvaluatedFont;
1146
-
1147
- const fontRefIsRef = isRef(fontRef);
1148
- let fontID;
1149
- if (fontRefIsRef) {
1150
- fontID = `f${fontRef.toString()}`;
1151
- }
1152
-
1153
- if (hash && isDict(descriptor)) {
1154
- if (!descriptor.fontAliases) {
1155
- descriptor.fontAliases = Object.create(null);
1156
- }
1157
- const fontAliases = descriptor.fontAliases;
1158
-
1159
- if (fontAliases[hash]) {
1160
- const aliasFontRef = fontAliases[hash].aliasRef;
1161
- if (fontRefIsRef && aliasFontRef && this.fontCache.has(aliasFontRef)) {
1162
- this.fontCache.putAlias(fontRef, aliasFontRef);
1163
- return this.fontCache.get(fontRef);
1164
- }
1165
- } else {
1166
- fontAliases[hash] = {
1167
- fontID: this.idFactory.createFontId(),
1168
- };
1169
- }
1170
-
1171
- if (fontRefIsRef) {
1172
- fontAliases[hash].aliasRef = fontRef;
1173
- }
1174
- fontID = fontAliases[hash].fontID;
1175
- }
1176
-
1177
- // Workaround for bad PDF generators that reference fonts incorrectly,
1178
- // where `fontRef` is a `Dict` rather than a `Ref` (fixes bug946506.pdf).
1179
- // In this case we cannot put the font into `this.fontCache` (which is
1180
- // a `RefSetCache`), since it's not possible to use a `Dict` as a key.
1181
- //
1182
- // However, if we don't cache the font it's not possible to remove it
1183
- // when `cleanup` is triggered from the API, which causes issues on
1184
- // subsequent rendering operations (see issue7403.pdf) and would force us
1185
- // to unnecessarily load the same fonts over and over.
1186
- //
1187
- // Instead, we cheat a bit by using a modified `fontID` as a key in
1188
- // `this.fontCache`, to allow the font to be cached.
1189
- // NOTE: This works because `RefSetCache` calls `toString()` on provided
1190
- // keys. Also, since `fontRef` is used when getting cached fonts,
1191
- // we'll not accidentally match fonts cached with the `fontID`.
1192
- if (fontRefIsRef) {
1193
- this.fontCache.put(fontRef, fontCapability.promise);
1194
- } else {
1195
- if (!fontID) {
1196
- fontID = this.idFactory.createFontId();
1197
- }
1198
- font.cacheKey = `cacheKey_${fontID}`;
1199
- this.fontCache.put(font.cacheKey, fontCapability.promise);
1200
- }
1201
- assert(
1202
- fontID && fontID.startsWith("f"),
1203
- 'The "fontID" must be (correctly) defined.'
1204
- );
1205
-
1206
- // Keep track of each font we translated so the caller can
1207
- // load them asynchronously before calling display on a page.
1208
- font.loadedName = `${this.idFactory.getDocId()}_${fontID}`;
1209
-
1210
- this.translateFont(preEvaluatedFont)
1211
- .then(translatedFont => {
1212
- if (translatedFont.fontType !== undefined) {
1213
- const xrefFontStats = xref.stats.fontTypes;
1214
- xrefFontStats[translatedFont.fontType] = true;
1215
- }
1216
-
1217
- fontCapability.resolve(
1218
- new TranslatedFont({
1219
- loadedName: font.loadedName,
1220
- font: translatedFont,
1221
- dict: font,
1222
- evaluatorOptions: this.options,
1223
- })
1224
- );
1225
- })
1226
- .catch(reason => {
1227
- // TODO fontCapability.reject?
1228
- // Error in the font data -- sending unsupported feature notification.
1229
- this.handler.send("UnsupportedFeature", {
1230
- featureId: UNSUPPORTED_FEATURES.errorFontTranslate,
1231
- });
1232
- warn(`loadFont - translateFont failed: "${reason}".`);
1233
-
1234
- try {
1235
- // error, but it's still nice to have font type reported
1236
- const fontFile3 = descriptor && descriptor.get("FontFile3");
1237
- const subtype = fontFile3 && fontFile3.get("Subtype");
1238
- const fontType = getFontType(
1239
- preEvaluatedFont.type,
1240
- subtype && subtype.name
1241
- );
1242
- const xrefFontStats = xref.stats.fontTypes;
1243
- xrefFontStats[fontType] = true;
1244
- } catch (ex) {}
1245
-
1246
- fontCapability.resolve(
1247
- new TranslatedFont({
1248
- loadedName: font.loadedName,
1249
- font: new ErrorFont(
1250
- reason instanceof Error ? reason.message : reason
1251
- ),
1252
- dict: font,
1253
- evaluatorOptions: this.options,
1254
- })
1255
- );
1256
- });
1257
- return fontCapability.promise;
1258
- }
1259
-
1260
- buildPath(operatorList, fn, args, parsingText = false) {
1261
- const lastIndex = operatorList.length - 1;
1262
- if (!args) {
1263
- args = [];
1264
- }
1265
- if (
1266
- lastIndex < 0 ||
1267
- operatorList.fnArray[lastIndex] !== OPS.constructPath
1268
- ) {
1269
- // Handle corrupt PDF documents that contains path operators inside of
1270
- // text objects, which may shift subsequent text, by enclosing the path
1271
- // operator in save/restore operators (fixes issue10542_reduced.pdf).
1272
- //
1273
- // Note that this will effectively disable the optimization in the
1274
- // `else` branch below, but given that this type of corruption is
1275
- // *extremely* rare that shouldn't really matter much in practice.
1276
- if (parsingText) {
1277
- warn(`Encountered path operator "${fn}" inside of a text object.`);
1278
- operatorList.addOp(OPS.save, null);
1279
- }
1280
-
1281
- operatorList.addOp(OPS.constructPath, [[fn], args]);
1282
-
1283
- if (parsingText) {
1284
- operatorList.addOp(OPS.restore, null);
1285
- }
1286
- } else {
1287
- const opArgs = operatorList.argsArray[lastIndex];
1288
- opArgs[0].push(fn);
1289
- Array.prototype.push.apply(opArgs[1], args);
1290
- }
1291
- }
1292
-
1293
- parseColorSpace({ cs, resources, localColorSpaceCache }) {
1294
- return ColorSpace.parseAsync({
1295
- cs,
1296
- xref: this.xref,
1297
- resources,
1298
- pdfFunctionFactory: this._pdfFunctionFactory,
1299
- localColorSpaceCache,
1300
- }).catch(reason => {
1301
- if (reason instanceof AbortException) {
1302
- return null;
1303
- }
1304
- if (this.options.ignoreErrors) {
1305
- // Error(s) in the ColorSpace -- sending unsupported feature
1306
- // notification and allow rendering to continue.
1307
- this.handler.send("UnsupportedFeature", {
1308
- featureId: UNSUPPORTED_FEATURES.errorColorSpace,
1309
- });
1310
- warn(`parseColorSpace - ignoring ColorSpace: "${reason}".`);
1311
- return null;
1312
- }
1313
- throw reason;
1314
- });
1315
- }
1316
-
1317
- parseShading({
1318
- keyObj,
1319
- shading,
1320
- resources,
1321
- localColorSpaceCache,
1322
- localShadingPatternCache,
1323
- matrix = null,
1324
- }) {
1325
- // Shadings and patterns may be referenced by the same name but the resource
1326
- // dictionary could be different so we can't use the name for the cache key.
1327
- let id = localShadingPatternCache.get(keyObj);
1328
- if (!id) {
1329
- var shadingFill = Pattern.parseShading(
1330
- shading,
1331
- matrix,
1332
- this.xref,
1333
- resources,
1334
- this.handler,
1335
- this._pdfFunctionFactory,
1336
- localColorSpaceCache
1337
- );
1338
- const patternIR = shadingFill.getIR();
1339
- id = `pattern_${this.idFactory.createObjId()}`;
1340
- localShadingPatternCache.set(keyObj, id);
1341
- this.handler.send("obj", [id, this.pageIndex, "Pattern", patternIR]);
1342
- }
1343
- return id;
1344
- }
1345
-
1346
- handleColorN(
1347
- operatorList,
1348
- fn,
1349
- args,
1350
- cs,
1351
- patterns,
1352
- resources,
1353
- task,
1354
- localColorSpaceCache,
1355
- localTilingPatternCache,
1356
- localShadingPatternCache
1357
- ) {
1358
- // compile tiling patterns
1359
- const patternName = args.pop();
1360
- // SCN/scn applies patterns along with normal colors
1361
- if (patternName instanceof Name) {
1362
- const name = patternName.name;
1363
-
1364
- const localTilingPattern = localTilingPatternCache.getByName(name);
1365
- if (localTilingPattern) {
1366
- try {
1367
- const color = cs.base ? cs.base.getRgb(args, 0) : null;
1368
- const tilingPatternIR = getTilingPatternIR(
1369
- localTilingPattern.operatorListIR,
1370
- localTilingPattern.dict,
1371
- color
1372
- );
1373
- operatorList.addOp(fn, tilingPatternIR);
1374
- return undefined;
1375
- } catch (ex) {
1376
- // Handle any errors during normal TilingPattern parsing.
1377
- }
1378
- }
1379
- // TODO: Attempt to lookup cached TilingPatterns by reference as well,
1380
- // if and only if there are PDF documents where doing so would
1381
- // significantly improve performance.
1382
-
1383
- const pattern = patterns.get(name);
1384
- if (pattern) {
1385
- const dict = isStream(pattern) ? pattern.dict : pattern;
1386
- const typeNum = dict.get("PatternType");
1387
-
1388
- if (typeNum === PatternType.TILING) {
1389
- const color = cs.base ? cs.base.getRgb(args, 0) : null;
1390
- return this.handleTilingType(
1391
- fn,
1392
- color,
1393
- resources,
1394
- pattern,
1395
- dict,
1396
- operatorList,
1397
- task,
1398
- /* cacheKey = */ name,
1399
- localTilingPatternCache
1400
- );
1401
- } else if (typeNum === PatternType.SHADING) {
1402
- const shading = dict.get("Shading");
1403
- const matrix = dict.getArray("Matrix");
1404
- const objId = this.parseShading({
1405
- keyObj: pattern,
1406
- shading,
1407
- matrix,
1408
- resources,
1409
- localColorSpaceCache,
1410
- localShadingPatternCache,
1411
- });
1412
- operatorList.addOp(fn, ["Shading", objId]);
1413
- return undefined;
1414
- }
1415
- throw new FormatError(`Unknown PatternType: ${typeNum}`);
1416
- }
1417
- }
1418
- throw new FormatError(`Unknown PatternName: ${patternName}`);
1419
- }
1420
-
1421
- _parseVisibilityExpression(array, nestingCounter, currentResult) {
1422
- const MAX_NESTING = 10;
1423
- if (++nestingCounter > MAX_NESTING) {
1424
- warn("Visibility expression is too deeply nested");
1425
- return;
1426
- }
1427
- const length = array.length;
1428
- const operator = this.xref.fetchIfRef(array[0]);
1429
- if (length < 2 || !isName(operator)) {
1430
- warn("Invalid visibility expression");
1431
- return;
1432
- }
1433
- switch (operator.name) {
1434
- case "And":
1435
- case "Or":
1436
- case "Not":
1437
- currentResult.push(operator.name);
1438
- break;
1439
- default:
1440
- warn(`Invalid operator ${operator.name} in visibility expression`);
1441
- return;
1442
- }
1443
- for (let i = 1; i < length; i++) {
1444
- const raw = array[i];
1445
- const object = this.xref.fetchIfRef(raw);
1446
- if (Array.isArray(object)) {
1447
- const nestedResult = [];
1448
- currentResult.push(nestedResult);
1449
- // Recursively parse a subarray.
1450
- this._parseVisibilityExpression(object, nestingCounter, nestedResult);
1451
- } else if (isRef(raw)) {
1452
- // Reference to an OCG dictionary.
1453
- currentResult.push(raw.toString());
1454
- }
1455
- }
1456
- }
1457
-
1458
- async parseMarkedContentProps(contentProperties, resources) {
1459
- let optionalContent;
1460
- if (isName(contentProperties)) {
1461
- const properties = resources.get("Properties");
1462
- optionalContent = properties.get(contentProperties.name);
1463
- } else if (isDict(contentProperties)) {
1464
- optionalContent = contentProperties;
1465
- } else {
1466
- throw new FormatError("Optional content properties malformed.");
1467
- }
1468
-
1469
- const optionalContentType = optionalContent.get("Type").name;
1470
- if (optionalContentType === "OCG") {
1471
- return {
1472
- type: optionalContentType,
1473
- id: optionalContent.objId,
1474
- };
1475
- } else if (optionalContentType === "OCMD") {
1476
- const expression = optionalContent.get("VE");
1477
- if (Array.isArray(expression)) {
1478
- const result = [];
1479
- this._parseVisibilityExpression(expression, 0, result);
1480
- if (result.length > 0) {
1481
- return {
1482
- type: "OCMD",
1483
- expression: result,
1484
- };
1485
- }
1486
- }
1487
-
1488
- const optionalContentGroups = optionalContent.get("OCGs");
1489
- if (
1490
- Array.isArray(optionalContentGroups) ||
1491
- isDict(optionalContentGroups)
1492
- ) {
1493
- const groupIds = [];
1494
- if (Array.isArray(optionalContentGroups)) {
1495
- for (const ocg of optionalContentGroups) {
1496
- groupIds.push(ocg.toString());
1497
- }
1498
- } else {
1499
- // Dictionary, just use the obj id.
1500
- groupIds.push(optionalContentGroups.objId);
1501
- }
1502
-
1503
- return {
1504
- type: optionalContentType,
1505
- ids: groupIds,
1506
- policy: isName(optionalContent.get("P"))
1507
- ? optionalContent.get("P").name
1508
- : null,
1509
- expression: null,
1510
- };
1511
- } else if (isRef(optionalContentGroups)) {
1512
- return {
1513
- type: optionalContentType,
1514
- id: optionalContentGroups.toString(),
1515
- };
1516
- }
1517
- }
1518
- return null;
1519
- }
1520
-
1521
- getOperatorList({
1522
- stream,
1523
- task,
1524
- resources,
1525
- operatorList,
1526
- initialState = null,
1527
- fallbackFontDict = null,
1528
- }) {
1529
- // Ensure that `resources`/`initialState` is correctly initialized,
1530
- // even if the provided parameter is e.g. `null`.
1531
- resources = resources || Dict.empty;
1532
- initialState = initialState || new EvalState();
1533
-
1534
- if (!operatorList) {
1535
- throw new Error('getOperatorList: missing "operatorList" parameter');
1536
- }
1537
-
1538
- const self = this;
1539
- const xref = this.xref;
1540
- let parsingText = false;
1541
- const localImageCache = new LocalImageCache();
1542
- const localColorSpaceCache = new LocalColorSpaceCache();
1543
- const localGStateCache = new LocalGStateCache();
1544
- const localTilingPatternCache = new LocalTilingPatternCache();
1545
- const localShadingPatternCache = new Map();
1546
-
1547
- const xobjs = resources.get("XObject") || Dict.empty;
1548
- const patterns = resources.get("Pattern") || Dict.empty;
1549
- const stateManager = new StateManager(initialState);
1550
- const preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
1551
- const timeSlotManager = new TimeSlotManager();
1552
-
1553
- function closePendingRestoreOPS(argument) {
1554
- for (let i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) {
1555
- operatorList.addOp(OPS.restore, []);
1556
- }
1557
- }
1558
-
1559
- return new Promise(function promiseBody(resolve, reject) {
1560
- const next = function (promise) {
1561
- Promise.all([promise, operatorList.ready]).then(function () {
1562
- try {
1563
- promiseBody(resolve, reject);
1564
- } catch (ex) {
1565
- reject(ex);
1566
- }
1567
- }, reject);
1568
- };
1569
- task.ensureNotTerminated();
1570
- timeSlotManager.reset();
1571
-
1572
- const operation = {};
1573
- let stop, i, ii, cs, name, isValidName;
1574
- while (!(stop = timeSlotManager.check())) {
1575
- // The arguments parsed by read() are used beyond this loop, so we
1576
- // cannot reuse the same array on each iteration. Therefore we pass
1577
- // in |null| as the initial value (see the comment on
1578
- // EvaluatorPreprocessor_read() for why).
1579
- operation.args = null;
1580
- if (!preprocessor.read(operation)) {
1581
- break;
1582
- }
1583
- let args = operation.args;
1584
- let fn = operation.fn;
1585
-
1586
- switch (fn | 0) {
1587
- case OPS.paintXObject:
1588
- // eagerly compile XForm objects
1589
- isValidName = args[0] instanceof Name;
1590
- name = args[0].name;
1591
-
1592
- if (isValidName) {
1593
- const localImage = localImageCache.getByName(name);
1594
- if (localImage) {
1595
- operatorList.addOp(localImage.fn, localImage.args);
1596
- args = null;
1597
- continue;
1598
- }
1599
- }
1600
-
1601
- next(
1602
- new Promise(function (resolveXObject, rejectXObject) {
1603
- if (!isValidName) {
1604
- throw new FormatError("XObject must be referred to by name.");
1605
- }
1606
-
1607
- let xobj = xobjs.getRaw(name);
1608
- if (xobj instanceof Ref) {
1609
- const localImage = localImageCache.getByRef(xobj);
1610
- if (localImage) {
1611
- operatorList.addOp(localImage.fn, localImage.args);
1612
-
1613
- resolveXObject();
1614
- return;
1615
- }
1616
-
1617
- const globalImage = self.globalImageCache.getData(
1618
- xobj,
1619
- self.pageIndex
1620
- );
1621
- if (globalImage) {
1622
- operatorList.addDependency(globalImage.objId);
1623
- operatorList.addOp(globalImage.fn, globalImage.args);
1624
-
1625
- resolveXObject();
1626
- return;
1627
- }
1628
-
1629
- xobj = xref.fetch(xobj);
1630
- }
1631
-
1632
- if (!isStream(xobj)) {
1633
- throw new FormatError("XObject should be a stream");
1634
- }
1635
-
1636
- const type = xobj.dict.get("Subtype");
1637
- if (!isName(type)) {
1638
- throw new FormatError("XObject should have a Name subtype");
1639
- }
1640
-
1641
- if (type.name === "Form") {
1642
- stateManager.save();
1643
- self
1644
- .buildFormXObject(
1645
- resources,
1646
- xobj,
1647
- null,
1648
- operatorList,
1649
- task,
1650
- stateManager.state.clone(),
1651
- localColorSpaceCache
1652
- )
1653
- .then(function () {
1654
- stateManager.restore();
1655
- resolveXObject();
1656
- }, rejectXObject);
1657
- return;
1658
- } else if (type.name === "Image") {
1659
- self
1660
- .buildPaintImageXObject({
1661
- resources,
1662
- image: xobj,
1663
- operatorList,
1664
- cacheKey: name,
1665
- localImageCache,
1666
- localColorSpaceCache,
1667
- })
1668
- .then(resolveXObject, rejectXObject);
1669
- return;
1670
- } else if (type.name === "PS") {
1671
- // PostScript XObjects are unused when viewing documents.
1672
- // See section 4.7.1 of Adobe's PDF reference.
1673
- info("Ignored XObject subtype PS");
1674
- } else {
1675
- throw new FormatError(
1676
- `Unhandled XObject subtype ${type.name}`
1677
- );
1678
- }
1679
- resolveXObject();
1680
- }).catch(function (reason) {
1681
- if (reason instanceof AbortException) {
1682
- return;
1683
- }
1684
- if (self.options.ignoreErrors) {
1685
- // Error(s) in the XObject -- sending unsupported feature
1686
- // notification and allow rendering to continue.
1687
- self.handler.send("UnsupportedFeature", {
1688
- featureId: UNSUPPORTED_FEATURES.errorXObject,
1689
- });
1690
- warn(`getOperatorList - ignoring XObject: "${reason}".`);
1691
- return;
1692
- }
1693
- throw reason;
1694
- })
1695
- );
1696
- return;
1697
- case OPS.setFont:
1698
- var fontSize = args[1];
1699
- // eagerly collect all fonts
1700
- next(
1701
- self
1702
- .handleSetFont(
1703
- resources,
1704
- args,
1705
- null,
1706
- operatorList,
1707
- task,
1708
- stateManager.state,
1709
- fallbackFontDict
1710
- )
1711
- .then(function (loadedName) {
1712
- operatorList.addDependency(loadedName);
1713
- operatorList.addOp(OPS.setFont, [loadedName, fontSize]);
1714
- })
1715
- );
1716
- return;
1717
- case OPS.beginText:
1718
- parsingText = true;
1719
- break;
1720
- case OPS.endText:
1721
- parsingText = false;
1722
- break;
1723
- case OPS.endInlineImage:
1724
- var cacheKey = args[0].cacheKey;
1725
- if (cacheKey) {
1726
- const localImage = localImageCache.getByName(cacheKey);
1727
- if (localImage) {
1728
- operatorList.addOp(localImage.fn, localImage.args);
1729
- args = null;
1730
- continue;
1731
- }
1732
- }
1733
- next(
1734
- self.buildPaintImageXObject({
1735
- resources,
1736
- image: args[0],
1737
- isInline: true,
1738
- operatorList,
1739
- cacheKey,
1740
- localImageCache,
1741
- localColorSpaceCache,
1742
- })
1743
- );
1744
- return;
1745
- case OPS.showText:
1746
- if (!stateManager.state.font) {
1747
- self.ensureStateFont(stateManager.state);
1748
- continue;
1749
- }
1750
- args[0] = self.handleText(args[0], stateManager.state);
1751
- break;
1752
- case OPS.showSpacedText:
1753
- if (!stateManager.state.font) {
1754
- self.ensureStateFont(stateManager.state);
1755
- continue;
1756
- }
1757
- var arr = args[0];
1758
- var combinedGlyphs = [];
1759
- var arrLength = arr.length;
1760
- var state = stateManager.state;
1761
- for (i = 0; i < arrLength; ++i) {
1762
- const arrItem = arr[i];
1763
- if (isString(arrItem)) {
1764
- Array.prototype.push.apply(
1765
- combinedGlyphs,
1766
- self.handleText(arrItem, state)
1767
- );
1768
- } else if (isNum(arrItem)) {
1769
- combinedGlyphs.push(arrItem);
1770
- }
1771
- }
1772
- args[0] = combinedGlyphs;
1773
- fn = OPS.showText;
1774
- break;
1775
- case OPS.nextLineShowText:
1776
- if (!stateManager.state.font) {
1777
- self.ensureStateFont(stateManager.state);
1778
- continue;
1779
- }
1780
- operatorList.addOp(OPS.nextLine);
1781
- args[0] = self.handleText(args[0], stateManager.state);
1782
- fn = OPS.showText;
1783
- break;
1784
- case OPS.nextLineSetSpacingShowText:
1785
- if (!stateManager.state.font) {
1786
- self.ensureStateFont(stateManager.state);
1787
- continue;
1788
- }
1789
- operatorList.addOp(OPS.nextLine);
1790
- operatorList.addOp(OPS.setWordSpacing, [args.shift()]);
1791
- operatorList.addOp(OPS.setCharSpacing, [args.shift()]);
1792
- args[0] = self.handleText(args[0], stateManager.state);
1793
- fn = OPS.showText;
1794
- break;
1795
- case OPS.setTextRenderingMode:
1796
- stateManager.state.textRenderingMode = args[0];
1797
- break;
1798
-
1799
- case OPS.setFillColorSpace: {
1800
- const cachedColorSpace = ColorSpace.getCached(
1801
- args[0],
1802
- xref,
1803
- localColorSpaceCache
1804
- );
1805
- if (cachedColorSpace) {
1806
- stateManager.state.fillColorSpace = cachedColorSpace;
1807
- continue;
1808
- }
1809
-
1810
- next(
1811
- self
1812
- .parseColorSpace({
1813
- cs: args[0],
1814
- resources,
1815
- localColorSpaceCache,
1816
- })
1817
- .then(function (colorSpace) {
1818
- if (colorSpace) {
1819
- stateManager.state.fillColorSpace = colorSpace;
1820
- }
1821
- })
1822
- );
1823
- return;
1824
- }
1825
- case OPS.setStrokeColorSpace: {
1826
- const cachedColorSpace = ColorSpace.getCached(
1827
- args[0],
1828
- xref,
1829
- localColorSpaceCache
1830
- );
1831
- if (cachedColorSpace) {
1832
- stateManager.state.strokeColorSpace = cachedColorSpace;
1833
- continue;
1834
- }
1835
-
1836
- next(
1837
- self
1838
- .parseColorSpace({
1839
- cs: args[0],
1840
- resources,
1841
- localColorSpaceCache,
1842
- })
1843
- .then(function (colorSpace) {
1844
- if (colorSpace) {
1845
- stateManager.state.strokeColorSpace = colorSpace;
1846
- }
1847
- })
1848
- );
1849
- return;
1850
- }
1851
- case OPS.setFillColor:
1852
- cs = stateManager.state.fillColorSpace;
1853
- args = cs.getRgb(args, 0);
1854
- fn = OPS.setFillRGBColor;
1855
- break;
1856
- case OPS.setStrokeColor:
1857
- cs = stateManager.state.strokeColorSpace;
1858
- args = cs.getRgb(args, 0);
1859
- fn = OPS.setStrokeRGBColor;
1860
- break;
1861
- case OPS.setFillGray:
1862
- stateManager.state.fillColorSpace = ColorSpace.singletons.gray;
1863
- args = ColorSpace.singletons.gray.getRgb(args, 0);
1864
- fn = OPS.setFillRGBColor;
1865
- break;
1866
- case OPS.setStrokeGray:
1867
- stateManager.state.strokeColorSpace = ColorSpace.singletons.gray;
1868
- args = ColorSpace.singletons.gray.getRgb(args, 0);
1869
- fn = OPS.setStrokeRGBColor;
1870
- break;
1871
- case OPS.setFillCMYKColor:
1872
- stateManager.state.fillColorSpace = ColorSpace.singletons.cmyk;
1873
- args = ColorSpace.singletons.cmyk.getRgb(args, 0);
1874
- fn = OPS.setFillRGBColor;
1875
- break;
1876
- case OPS.setStrokeCMYKColor:
1877
- stateManager.state.strokeColorSpace = ColorSpace.singletons.cmyk;
1878
- args = ColorSpace.singletons.cmyk.getRgb(args, 0);
1879
- fn = OPS.setStrokeRGBColor;
1880
- break;
1881
- case OPS.setFillRGBColor:
1882
- stateManager.state.fillColorSpace = ColorSpace.singletons.rgb;
1883
- args = ColorSpace.singletons.rgb.getRgb(args, 0);
1884
- break;
1885
- case OPS.setStrokeRGBColor:
1886
- stateManager.state.strokeColorSpace = ColorSpace.singletons.rgb;
1887
- args = ColorSpace.singletons.rgb.getRgb(args, 0);
1888
- break;
1889
- case OPS.setFillColorN:
1890
- cs = stateManager.state.fillColorSpace;
1891
- if (cs.name === "Pattern") {
1892
- next(
1893
- self.handleColorN(
1894
- operatorList,
1895
- OPS.setFillColorN,
1896
- args,
1897
- cs,
1898
- patterns,
1899
- resources,
1900
- task,
1901
- localColorSpaceCache,
1902
- localTilingPatternCache,
1903
- localShadingPatternCache
1904
- )
1905
- );
1906
- return;
1907
- }
1908
- args = cs.getRgb(args, 0);
1909
- fn = OPS.setFillRGBColor;
1910
- break;
1911
- case OPS.setStrokeColorN:
1912
- cs = stateManager.state.strokeColorSpace;
1913
- if (cs.name === "Pattern") {
1914
- next(
1915
- self.handleColorN(
1916
- operatorList,
1917
- OPS.setStrokeColorN,
1918
- args,
1919
- cs,
1920
- patterns,
1921
- resources,
1922
- task,
1923
- localColorSpaceCache,
1924
- localTilingPatternCache,
1925
- localShadingPatternCache
1926
- )
1927
- );
1928
- return;
1929
- }
1930
- args = cs.getRgb(args, 0);
1931
- fn = OPS.setStrokeRGBColor;
1932
- break;
1933
-
1934
- case OPS.shadingFill:
1935
- var shadingRes = resources.get("Shading");
1936
- if (!shadingRes) {
1937
- throw new FormatError("No shading resource found");
1938
- }
1939
-
1940
- var shading = shadingRes.get(args[0].name);
1941
- if (!shading) {
1942
- throw new FormatError("No shading object found");
1943
- }
1944
- const patternId = self.parseShading({
1945
- keyObj: shading,
1946
- shading,
1947
- resources,
1948
- localColorSpaceCache,
1949
- localShadingPatternCache,
1950
- });
1951
- args = [patternId];
1952
- fn = OPS.shadingFill;
1953
- break;
1954
- case OPS.setGState:
1955
- isValidName = args[0] instanceof Name;
1956
- name = args[0].name;
1957
-
1958
- if (isValidName) {
1959
- const localGStateObj = localGStateCache.getByName(name);
1960
- if (localGStateObj) {
1961
- if (localGStateObj.length > 0) {
1962
- operatorList.addOp(OPS.setGState, [localGStateObj]);
1963
- }
1964
- args = null;
1965
- continue;
1966
- }
1967
- }
1968
-
1969
- next(
1970
- new Promise(function (resolveGState, rejectGState) {
1971
- if (!isValidName) {
1972
- throw new FormatError("GState must be referred to by name.");
1973
- }
1974
-
1975
- const extGState = resources.get("ExtGState");
1976
- if (!(extGState instanceof Dict)) {
1977
- throw new FormatError("ExtGState should be a dictionary.");
1978
- }
1979
-
1980
- const gState = extGState.get(name);
1981
- // TODO: Attempt to lookup cached GStates by reference as well,
1982
- // if and only if there are PDF documents where doing so
1983
- // would significantly improve performance.
1984
- if (!(gState instanceof Dict)) {
1985
- throw new FormatError("GState should be a dictionary.");
1986
- }
1987
-
1988
- self
1989
- .setGState({
1990
- resources,
1991
- gState,
1992
- operatorList,
1993
- cacheKey: name,
1994
- task,
1995
- stateManager,
1996
- localGStateCache,
1997
- localColorSpaceCache,
1998
- })
1999
- .then(resolveGState, rejectGState);
2000
- }).catch(function (reason) {
2001
- if (reason instanceof AbortException) {
2002
- return;
2003
- }
2004
- if (self.options.ignoreErrors) {
2005
- // Error(s) in the ExtGState -- sending unsupported feature
2006
- // notification and allow parsing/rendering to continue.
2007
- self.handler.send("UnsupportedFeature", {
2008
- featureId: UNSUPPORTED_FEATURES.errorExtGState,
2009
- });
2010
- warn(`getOperatorList - ignoring ExtGState: "${reason}".`);
2011
- return;
2012
- }
2013
- throw reason;
2014
- })
2015
- );
2016
- return;
2017
- case OPS.moveTo:
2018
- case OPS.lineTo:
2019
- case OPS.curveTo:
2020
- case OPS.curveTo2:
2021
- case OPS.curveTo3:
2022
- case OPS.closePath:
2023
- case OPS.rectangle:
2024
- self.buildPath(operatorList, fn, args, parsingText);
2025
- continue;
2026
- case OPS.markPoint:
2027
- case OPS.markPointProps:
2028
- case OPS.beginCompat:
2029
- case OPS.endCompat:
2030
- // Ignore operators where the corresponding handlers are known to
2031
- // be no-op in CanvasGraphics (display/canvas.js). This prevents
2032
- // serialization errors and is also a bit more efficient.
2033
- // We could also try to serialize all objects in a general way,
2034
- // e.g. as done in https://github.com/mozilla/pdf.js/pull/6266,
2035
- // but doing so is meaningless without knowing the semantics.
2036
- continue;
2037
- case OPS.beginMarkedContentProps:
2038
- if (!isName(args[0])) {
2039
- warn(`Expected name for beginMarkedContentProps arg0=${args[0]}`);
2040
- continue;
2041
- }
2042
- if (args[0].name === "OC") {
2043
- next(
2044
- self
2045
- .parseMarkedContentProps(args[1], resources)
2046
- .then(data => {
2047
- operatorList.addOp(OPS.beginMarkedContentProps, [
2048
- "OC",
2049
- data,
2050
- ]);
2051
- })
2052
- .catch(reason => {
2053
- if (reason instanceof AbortException) {
2054
- return;
2055
- }
2056
- if (self.options.ignoreErrors) {
2057
- self.handler.send("UnsupportedFeature", {
2058
- featureId: UNSUPPORTED_FEATURES.errorMarkedContent,
2059
- });
2060
- warn(
2061
- `getOperatorList - ignoring beginMarkedContentProps: "${reason}".`
2062
- );
2063
- return;
2064
- }
2065
- throw reason;
2066
- })
2067
- );
2068
- return;
2069
- }
2070
- // Other marked content types aren't supported yet.
2071
- args = [
2072
- args[0].name,
2073
- args[1] instanceof Dict ? args[1].get("MCID") : null,
2074
- ];
2075
-
2076
- break;
2077
- case OPS.beginMarkedContent:
2078
- case OPS.endMarkedContent:
2079
- default:
2080
- // Note: Ignore the operator if it has `Dict` arguments, since
2081
- // those are non-serializable, otherwise postMessage will throw
2082
- // "An object could not be cloned.".
2083
- if (args !== null) {
2084
- for (i = 0, ii = args.length; i < ii; i++) {
2085
- if (args[i] instanceof Dict) {
2086
- break;
2087
- }
2088
- }
2089
- if (i < ii) {
2090
- warn("getOperatorList - ignoring operator: " + fn);
2091
- continue;
2092
- }
2093
- }
2094
- }
2095
- operatorList.addOp(fn, args);
2096
- }
2097
- if (stop) {
2098
- next(deferred);
2099
- return;
2100
- }
2101
- // Some PDFs don't close all restores inside object/form.
2102
- // Closing those for them.
2103
- closePendingRestoreOPS();
2104
- resolve();
2105
- }).catch(reason => {
2106
- if (reason instanceof AbortException) {
2107
- return;
2108
- }
2109
- if (this.options.ignoreErrors) {
2110
- // Error(s) in the OperatorList -- sending unsupported feature
2111
- // notification and allow rendering to continue.
2112
- this.handler.send("UnsupportedFeature", {
2113
- featureId: UNSUPPORTED_FEATURES.errorOperatorList,
2114
- });
2115
- warn(
2116
- `getOperatorList - ignoring errors during "${task.name}" ` +
2117
- `task: "${reason}".`
2118
- );
2119
-
2120
- closePendingRestoreOPS();
2121
- return;
2122
- }
2123
- throw reason;
2124
- });
2125
- }
2126
-
2127
- getTextContent({
2128
- stream,
2129
- task,
2130
- resources,
2131
- stateManager = null,
2132
- normalizeWhitespace = false,
2133
- combineTextItems = false,
2134
- includeMarkedContent = false,
2135
- sink,
2136
- seenStyles = new Set(),
2137
- }) {
2138
- // Ensure that `resources`/`stateManager` is correctly initialized,
2139
- // even if the provided parameter is e.g. `null`.
2140
- resources = resources || Dict.empty;
2141
- stateManager = stateManager || new StateManager(new TextState());
2142
-
2143
- const WhitespaceRegexp = /\s/g;
2144
-
2145
- const textContent = {
2146
- items: [],
2147
- styles: Object.create(null),
2148
- };
2149
- const textContentItem = {
2150
- initialized: false,
2151
- str: [],
2152
- totalWidth: 0,
2153
- totalHeight: 0,
2154
- width: 0,
2155
- height: 0,
2156
- vertical: false,
2157
- lastCharSize: 0,
2158
- prevTransform: null,
2159
- textAdvanceScale: 0,
2160
- spaceWidth: 0,
2161
- spaceInFlowMin: 0,
2162
- spaceInFlowMax: 0,
2163
- trackingSpaceMin: Infinity,
2164
- transform: null,
2165
- fontName: null,
2166
- hasEOL: false,
2167
- isLastCharWhiteSpace: false,
2168
- };
2169
-
2170
- // Used in addFakeSpaces.
2171
- // wsw stands for whitespace width.
2172
-
2173
- // A white <= wsw * TRACKING_SPACE_FACTOR is a tracking space
2174
- // so it doesn't count as a space.
2175
- const TRACKING_SPACE_FACTOR = 0.3;
2176
-
2177
- // A white with a width in [wsw * MIN_FACTOR; wsw * MAX_FACTOR]
2178
- // is a space which will be inserted in the current flow of words.
2179
- // If the width is outside of this range then the flow is broken
2180
- // (which means a new span in the text layer).
2181
- // It's useful to adjust the best as possible the span in the layer
2182
- // to what is displayed in the canvas.
2183
- const SPACE_IN_FLOW_MIN_FACTOR = 0.3;
2184
- const SPACE_IN_FLOW_MAX_FACTOR = 1.3;
2185
-
2186
- const self = this;
2187
- const xref = this.xref;
2188
- const showSpacedTextBuffer = [];
2189
-
2190
- // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
2191
- let xobjs = null;
2192
- const emptyXObjectCache = new LocalImageCache();
2193
- const emptyGStateCache = new LocalGStateCache();
2194
-
2195
- const preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
2196
-
2197
- let textState;
2198
-
2199
- function getCurrentTextTransform() {
2200
- // 9.4.4 Text Space Details
2201
- const font = textState.font;
2202
- const tsm = [
2203
- textState.fontSize * textState.textHScale,
2204
- 0,
2205
- 0,
2206
- textState.fontSize,
2207
- 0,
2208
- textState.textRise,
2209
- ];
2210
-
2211
- if (
2212
- font.isType3Font &&
2213
- (textState.fontSize <= 1 || font.isCharBBox) &&
2214
- !isArrayEqual(textState.fontMatrix, FONT_IDENTITY_MATRIX)
2215
- ) {
2216
- const glyphHeight = font.bbox[3] - font.bbox[1];
2217
- if (glyphHeight > 0) {
2218
- tsm[3] *= glyphHeight * textState.fontMatrix[3];
2219
- }
2220
- }
2221
-
2222
- return Util.transform(
2223
- textState.ctm,
2224
- Util.transform(textState.textMatrix, tsm)
2225
- );
2226
- }
2227
-
2228
- function ensureTextContentItem() {
2229
- if (textContentItem.initialized) {
2230
- return textContentItem;
2231
- }
2232
- const font = textState.font,
2233
- loadedName = font.loadedName;
2234
- if (!seenStyles.has(loadedName)) {
2235
- seenStyles.add(loadedName);
2236
-
2237
- textContent.styles[loadedName] = {
2238
- fontFamily: font.fallbackName,
2239
- ascent: font.ascent,
2240
- descent: font.descent,
2241
- vertical: font.vertical,
2242
- };
2243
- }
2244
- textContentItem.fontName = loadedName;
2245
-
2246
- const trm = (textContentItem.transform = getCurrentTextTransform());
2247
- if (!font.vertical) {
2248
- textContentItem.width = textContentItem.totalWidth = 0;
2249
- textContentItem.height = textContentItem.totalHeight = Math.hypot(
2250
- trm[2],
2251
- trm[3]
2252
- );
2253
- textContentItem.vertical = false;
2254
- } else {
2255
- textContentItem.width = textContentItem.totalWidth = Math.hypot(
2256
- trm[0],
2257
- trm[1]
2258
- );
2259
- textContentItem.height = textContentItem.totalHeight = 0;
2260
- textContentItem.vertical = true;
2261
- }
2262
-
2263
- const scaleLineX = Math.hypot(
2264
- textState.textLineMatrix[0],
2265
- textState.textLineMatrix[1]
2266
- );
2267
- const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
2268
- textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
2269
- textContentItem.lastCharSize = textContentItem.lastCharSize || 0;
2270
-
2271
- const spaceWidth = (font.spaceWidth / 1000) * textState.fontSize;
2272
- if (spaceWidth) {
2273
- textContentItem.spaceWidth = spaceWidth;
2274
- textContentItem.trackingSpaceMin = spaceWidth * TRACKING_SPACE_FACTOR;
2275
- textContentItem.spaceInFlowMin = spaceWidth * SPACE_IN_FLOW_MIN_FACTOR;
2276
- textContentItem.spaceInFlowMax = spaceWidth * SPACE_IN_FLOW_MAX_FACTOR;
2277
- } else {
2278
- textContentItem.spaceWidth = 0;
2279
- textContentItem.trackingSpaceMin = Infinity;
2280
- }
2281
-
2282
- textContentItem.hasEOL = false;
2283
-
2284
- textContentItem.initialized = true;
2285
- return textContentItem;
2286
- }
2287
-
2288
- function updateAdvanceScale() {
2289
- if (!textContentItem.initialized) {
2290
- return;
2291
- }
2292
-
2293
- const scaleLineX = Math.hypot(
2294
- textState.textLineMatrix[0],
2295
- textState.textLineMatrix[1]
2296
- );
2297
- const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
2298
- const scaleFactor = scaleCtmX * scaleLineX;
2299
- if (scaleFactor === textContentItem.textAdvanceScale) {
2300
- return;
2301
- }
2302
-
2303
- if (!textContentItem.vertical) {
2304
- textContentItem.totalWidth +=
2305
- textContentItem.width * textContentItem.textAdvanceScale;
2306
- textContentItem.width = 0;
2307
- } else {
2308
- textContentItem.totalHeight +=
2309
- textContentItem.height * textContentItem.textAdvanceScale;
2310
- textContentItem.height = 0;
2311
- }
2312
-
2313
- textContentItem.textAdvanceScale = scaleFactor;
2314
- }
2315
-
2316
- function replaceWhitespace(str) {
2317
- // Replaces all whitespaces with standard spaces (0x20), to avoid
2318
- // alignment issues between the textLayer and the canvas if the text
2319
- // contains e.g. tabs (fixes issue6612.pdf).
2320
- const ii = str.length;
2321
- let i = 0,
2322
- code;
2323
- while (i < ii && (code = str.charCodeAt(i)) >= 0x20 && code <= 0x7f) {
2324
- i++;
2325
- }
2326
- return i < ii ? str.replace(WhitespaceRegexp, " ") : str;
2327
- }
2328
-
2329
- function runBidiTransform(textChunk) {
2330
- const text = textChunk.str.join("");
2331
- const bidiResult = bidi(text, -1, textChunk.vertical);
2332
- const str = normalizeWhitespace
2333
- ? replaceWhitespace(bidiResult.str)
2334
- : bidiResult.str;
2335
- return {
2336
- str,
2337
- dir: bidiResult.dir,
2338
- width: textChunk.totalWidth,
2339
- height: textChunk.totalHeight,
2340
- transform: textChunk.transform,
2341
- fontName: textChunk.fontName,
2342
- hasEOL: textChunk.hasEOL,
2343
- };
2344
- }
2345
-
2346
- function handleSetFont(fontName, fontRef) {
2347
- return self
2348
- .loadFont(fontName, fontRef, resources)
2349
- .then(function (translated) {
2350
- if (!translated.font.isType3Font) {
2351
- return translated;
2352
- }
2353
- return translated
2354
- .loadType3Data(self, resources, task)
2355
- .catch(function () {
2356
- // Ignore Type3-parsing errors, since we only use `loadType3Data`
2357
- // here to ensure that we'll always obtain a useful /FontBBox.
2358
- })
2359
- .then(function () {
2360
- return translated;
2361
- });
2362
- })
2363
- .then(function (translated) {
2364
- textState.font = translated.font;
2365
- textState.fontMatrix =
2366
- translated.font.fontMatrix || FONT_IDENTITY_MATRIX;
2367
- });
2368
- }
2369
-
2370
- function compareWithLastPosition(fontSize) {
2371
- if (
2372
- !combineTextItems ||
2373
- !textState.font ||
2374
- !textContentItem.prevTransform
2375
- ) {
2376
- return;
2377
- }
2378
-
2379
- const currentTransform = getCurrentTextTransform();
2380
- const posX = currentTransform[4];
2381
- const posY = currentTransform[5];
2382
- const lastPosX = textContentItem.prevTransform[4];
2383
- const lastPosY = textContentItem.prevTransform[5];
2384
-
2385
- if (lastPosX === posX && lastPosY === posY) {
2386
- return;
2387
- }
2388
-
2389
- const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
2390
- const advanceY = (posY - lastPosY) / textContentItem.textAdvanceScale;
2391
- const HALF_LAST_CHAR = -0.5 * textContentItem.lastCharSize;
2392
-
2393
- if (textState.font.vertical) {
2394
- if (
2395
- Math.abs(advanceX) >
2396
- textContentItem.width /
2397
- textContentItem.textAdvanceScale /* not the same column */
2398
- ) {
2399
- appendEOL();
2400
- return;
2401
- }
2402
-
2403
- if (HALF_LAST_CHAR > advanceY) {
2404
- return;
2405
- }
2406
-
2407
- if (advanceY > textContentItem.trackingSpaceMin) {
2408
- textContentItem.height += advanceY;
2409
- } else if (!addFakeSpaces(advanceY, 0, textContentItem.prevTransform)) {
2410
- if (textContentItem.str.length === 0) {
2411
- textContent.items.push({
2412
- str: " ",
2413
- dir: "ltr",
2414
- width: 0,
2415
- height: advanceY,
2416
- transform: textContentItem.prevTransform,
2417
- fontName: textContentItem.fontName,
2418
- hasEOL: false,
2419
- });
2420
- textContentItem.isLastCharWhiteSpace = true;
2421
- } else {
2422
- textContentItem.height += advanceY;
2423
- }
2424
- }
2425
-
2426
- return;
2427
- }
2428
-
2429
- if (
2430
- Math.abs(advanceY) >
2431
- textContentItem.height /
2432
- textContentItem.textAdvanceScale /* not the same line */
2433
- ) {
2434
- appendEOL();
2435
- return;
2436
- }
2437
-
2438
- if (HALF_LAST_CHAR > advanceX) {
2439
- return;
2440
- }
2441
-
2442
- if (advanceX <= textContentItem.trackingSpaceMin) {
2443
- textContentItem.width += advanceX;
2444
- } else if (!addFakeSpaces(advanceX, 0, textContentItem.prevTransform)) {
2445
- if (textContentItem.str.length === 0) {
2446
- textContent.items.push({
2447
- str: " ",
2448
- dir: "ltr",
2449
- width: advanceX,
2450
- height: 0,
2451
- transform: textContentItem.prevTransform,
2452
- fontName: textContentItem.fontName,
2453
- hasEOL: false,
2454
- });
2455
- textContentItem.isLastCharWhiteSpace = true;
2456
- } else {
2457
- textContentItem.width += advanceX;
2458
- }
2459
- }
2460
- }
2461
-
2462
- function buildTextContentItem({ chars, extraSpacing, isFirstChunk }) {
2463
- const font = textState.font;
2464
- if (!chars) {
2465
- // Just move according to the space we have.
2466
- const charSpacing = textState.charSpacing + extraSpacing;
2467
- if (charSpacing) {
2468
- if (!font.vertical) {
2469
- textState.translateTextMatrix(
2470
- charSpacing * textState.textHScale,
2471
- 0
2472
- );
2473
- } else {
2474
- textState.translateTextMatrix(0, charSpacing);
2475
- }
2476
- }
2477
-
2478
- return;
2479
- }
2480
-
2481
- const NormalizedUnicodes = getNormalizedUnicodes();
2482
- const glyphs = font.charsToGlyphs(chars);
2483
- const scale = textState.fontMatrix[0] * textState.fontSize;
2484
- if (isFirstChunk) {
2485
- compareWithLastPosition(scale);
2486
- }
2487
-
2488
- let textChunk = ensureTextContentItem();
2489
- let size = 0;
2490
- let lastCharSize = 0;
2491
-
2492
- for (let i = 0, ii = glyphs.length; i < ii; i++) {
2493
- const glyph = glyphs[i];
2494
- let charSpacing =
2495
- textState.charSpacing + (i === ii - 1 ? extraSpacing : 0);
2496
-
2497
- let glyphUnicode = glyph.unicode;
2498
- if (glyph.isSpace) {
2499
- charSpacing += textState.wordSpacing;
2500
- textChunk.isLastCharWhiteSpace = true;
2501
- } else {
2502
- glyphUnicode = NormalizedUnicodes[glyphUnicode] || glyphUnicode;
2503
- glyphUnicode = reverseIfRtl(glyphUnicode);
2504
- textChunk.isLastCharWhiteSpace = false;
2505
- }
2506
- textChunk.str.push(glyphUnicode);
2507
-
2508
- const glyphWidth =
2509
- font.vertical && glyph.vmetric ? glyph.vmetric[0] : glyph.width;
2510
-
2511
- let scaledDim = glyphWidth * scale;
2512
- if (!font.vertical) {
2513
- scaledDim *= textState.textHScale;
2514
- textState.translateTextMatrix(scaledDim, 0);
2515
- } else {
2516
- textState.translateTextMatrix(0, scaledDim);
2517
- scaledDim = Math.abs(scaledDim);
2518
- }
2519
- size += scaledDim;
2520
-
2521
- if (charSpacing) {
2522
- if (!font.vertical) {
2523
- charSpacing *= textState.textHScale;
2524
- }
2525
-
2526
- scaledDim += charSpacing;
2527
- const wasSplit =
2528
- charSpacing > textContentItem.trackingSpaceMin &&
2529
- addFakeSpaces(charSpacing, size);
2530
- if (!font.vertical) {
2531
- textState.translateTextMatrix(charSpacing, 0);
2532
- } else {
2533
- textState.translateTextMatrix(0, charSpacing);
2534
- }
2535
-
2536
- if (wasSplit) {
2537
- textChunk = ensureTextContentItem();
2538
- size = 0;
2539
- } else {
2540
- size += charSpacing;
2541
- }
2542
- }
2543
-
2544
- lastCharSize = scaledDim;
2545
- }
2546
-
2547
- textChunk.lastCharSize = lastCharSize;
2548
- if (!font.vertical) {
2549
- textChunk.width += size;
2550
- } else {
2551
- textChunk.height += size;
2552
- }
2553
-
2554
- textChunk.prevTransform = getCurrentTextTransform();
2555
- }
2556
-
2557
- function appendEOL() {
2558
- if (textContentItem.initialized) {
2559
- textContentItem.hasEOL = true;
2560
- flushTextContentItem();
2561
- } else if (textContent.items.length > 0) {
2562
- textContent.items[textContent.items.length - 1].hasEOL = true;
2563
- } else {
2564
- textContent.items.push({
2565
- str: "",
2566
- dir: "ltr",
2567
- width: 0,
2568
- height: 0,
2569
- transform: getCurrentTextTransform(),
2570
- fontName: textState.font.loadedName,
2571
- hasEOL: true,
2572
- });
2573
- }
2574
-
2575
- textContentItem.isLastCharWhiteSpace = false;
2576
- textContentItem.lastCharSize = 0;
2577
- }
2578
-
2579
- function addFakeSpaces(width, size, transf = null) {
2580
- if (
2581
- textContentItem.spaceInFlowMin <= width &&
2582
- width <= textContentItem.spaceInFlowMax
2583
- ) {
2584
- if (textContentItem.initialized) {
2585
- textContentItem.str.push(" ");
2586
- textContentItem.isLastCharWhiteSpace = true;
2587
- }
2588
- return false;
2589
- }
2590
-
2591
- const fontName = textContentItem.fontName;
2592
-
2593
- let height = 0;
2594
- width *= textContentItem.textAdvanceScale;
2595
- if (!textContentItem.vertical) {
2596
- textContentItem.width += size;
2597
- } else {
2598
- textContentItem.height += size;
2599
- height = width;
2600
- width = 0;
2601
- }
2602
-
2603
- flushTextContentItem();
2604
-
2605
- if (textContentItem.isLastCharWhiteSpace) {
2606
- return true;
2607
- }
2608
-
2609
- textContentItem.isLastCharWhiteSpace = true;
2610
- textContent.items.push({
2611
- str: " ",
2612
- // TODO: check if using the orientation from last chunk is
2613
- // better or not.
2614
- dir: "ltr",
2615
- width,
2616
- height,
2617
- transform: transf ? transf : getCurrentTextTransform(),
2618
- fontName,
2619
- hasEOL: false,
2620
- });
2621
-
2622
- return true;
2623
- }
2624
-
2625
- function flushTextContentItem() {
2626
- if (!textContentItem.initialized || !textContentItem.str) {
2627
- return;
2628
- }
2629
-
2630
- // Do final text scaling.
2631
- if (!textContentItem.vertical) {
2632
- textContentItem.totalWidth +=
2633
- textContentItem.width * textContentItem.textAdvanceScale;
2634
- } else {
2635
- textContentItem.totalHeight +=
2636
- textContentItem.height * textContentItem.textAdvanceScale;
2637
- }
2638
-
2639
- textContent.items.push(runBidiTransform(textContentItem));
2640
- textContentItem.initialized = false;
2641
- textContentItem.str.length = 0;
2642
- }
2643
-
2644
- function enqueueChunk() {
2645
- const length = textContent.items.length;
2646
- if (length > 0) {
2647
- sink.enqueue(textContent, length);
2648
- textContent.items = [];
2649
- textContent.styles = Object.create(null);
2650
- }
2651
- }
2652
-
2653
- const timeSlotManager = new TimeSlotManager();
2654
-
2655
- return new Promise(function promiseBody(resolve, reject) {
2656
- const next = function (promise) {
2657
- enqueueChunk();
2658
- Promise.all([promise, sink.ready]).then(function () {
2659
- try {
2660
- promiseBody(resolve, reject);
2661
- } catch (ex) {
2662
- reject(ex);
2663
- }
2664
- }, reject);
2665
- };
2666
- task.ensureNotTerminated();
2667
- timeSlotManager.reset();
2668
-
2669
- const operation = {};
2670
- let stop,
2671
- args = [];
2672
- while (!(stop = timeSlotManager.check())) {
2673
- // The arguments parsed by read() are not used beyond this loop, so
2674
- // we can reuse the same array on every iteration, thus avoiding
2675
- // unnecessary allocations.
2676
- args.length = 0;
2677
- operation.args = args;
2678
- if (!preprocessor.read(operation)) {
2679
- break;
2680
- }
2681
- textState = stateManager.state;
2682
- const fn = operation.fn;
2683
- args = operation.args;
2684
-
2685
- switch (fn | 0) {
2686
- case OPS.setFont:
2687
- // Optimization to ignore multiple identical Tf commands.
2688
- var fontNameArg = args[0].name,
2689
- fontSizeArg = args[1];
2690
- if (
2691
- textState.font &&
2692
- fontNameArg === textState.fontName &&
2693
- fontSizeArg === textState.fontSize
2694
- ) {
2695
- break;
2696
- }
2697
-
2698
- flushTextContentItem();
2699
- textState.fontName = fontNameArg;
2700
- textState.fontSize = fontSizeArg;
2701
- next(handleSetFont(fontNameArg, null));
2702
- return;
2703
- case OPS.setTextRise:
2704
- flushTextContentItem();
2705
- textState.textRise = args[0];
2706
- break;
2707
- case OPS.setHScale:
2708
- flushTextContentItem();
2709
- textState.textHScale = args[0] / 100;
2710
- break;
2711
- case OPS.setLeading:
2712
- flushTextContentItem();
2713
- textState.leading = args[0];
2714
- break;
2715
- case OPS.moveText:
2716
- textState.translateTextLineMatrix(args[0], args[1]);
2717
- textState.textMatrix = textState.textLineMatrix.slice();
2718
- break;
2719
- case OPS.setLeadingMoveText:
2720
- flushTextContentItem();
2721
- textState.leading = -args[1];
2722
- textState.translateTextLineMatrix(args[0], args[1]);
2723
- textState.textMatrix = textState.textLineMatrix.slice();
2724
- break;
2725
- case OPS.nextLine:
2726
- appendEOL();
2727
- textState.carriageReturn();
2728
- break;
2729
- case OPS.setTextMatrix:
2730
- textState.setTextMatrix(
2731
- args[0],
2732
- args[1],
2733
- args[2],
2734
- args[3],
2735
- args[4],
2736
- args[5]
2737
- );
2738
- textState.setTextLineMatrix(
2739
- args[0],
2740
- args[1],
2741
- args[2],
2742
- args[3],
2743
- args[4],
2744
- args[5]
2745
- );
2746
- updateAdvanceScale();
2747
- break;
2748
- case OPS.setCharSpacing:
2749
- textState.charSpacing = args[0];
2750
- break;
2751
- case OPS.setWordSpacing:
2752
- textState.wordSpacing = args[0];
2753
- break;
2754
- case OPS.beginText:
2755
- flushTextContentItem();
2756
- textState.textMatrix = IDENTITY_MATRIX.slice();
2757
- textState.textLineMatrix = IDENTITY_MATRIX.slice();
2758
- break;
2759
- case OPS.showSpacedText:
2760
- if (!stateManager.state.font) {
2761
- self.ensureStateFont(stateManager.state);
2762
- continue;
2763
- }
2764
-
2765
- const spaceFactor =
2766
- ((textState.font.vertical ? 1 : -1) * textState.fontSize) / 1000;
2767
- const elements = args[0];
2768
- let isFirstChunk = true;
2769
- for (let i = 0, ii = elements.length; i < ii - 1; i++) {
2770
- const item = elements[i];
2771
- if (typeof item === "string") {
2772
- showSpacedTextBuffer.push(item);
2773
- } else if (typeof item === "number" && item !== 0) {
2774
- // PDF Specification 5.3.2 states:
2775
- // The number is expressed in thousandths of a unit of text
2776
- // space.
2777
- // This amount is subtracted from the current horizontal or
2778
- // vertical coordinate, depending on the writing mode.
2779
- // In the default coordinate system, a positive adjustment
2780
- // has the effect of moving the next glyph painted either to
2781
- // the left or down by the given amount.
2782
- const str = showSpacedTextBuffer.join("");
2783
- showSpacedTextBuffer.length = 0;
2784
- buildTextContentItem({
2785
- chars: str,
2786
- extraSpacing: item * spaceFactor,
2787
- isFirstChunk,
2788
- });
2789
- if (str && isFirstChunk) {
2790
- isFirstChunk = false;
2791
- }
2792
- }
2793
- }
2794
-
2795
- const item = elements[elements.length - 1];
2796
- if (typeof item === "string") {
2797
- showSpacedTextBuffer.push(item);
2798
- }
2799
-
2800
- if (showSpacedTextBuffer.length > 0) {
2801
- const str = showSpacedTextBuffer.join("");
2802
- showSpacedTextBuffer.length = 0;
2803
- buildTextContentItem({
2804
- chars: str,
2805
- extraSpacing: 0,
2806
- isFirstChunk,
2807
- });
2808
- }
2809
- break;
2810
- case OPS.showText:
2811
- if (!stateManager.state.font) {
2812
- self.ensureStateFont(stateManager.state);
2813
- continue;
2814
- }
2815
-
2816
- buildTextContentItem({
2817
- chars: args[0],
2818
- extraSpacing: 0,
2819
- isFirstChunk: true,
2820
- });
2821
- break;
2822
- case OPS.nextLineShowText:
2823
- if (!stateManager.state.font) {
2824
- self.ensureStateFont(stateManager.state);
2825
- continue;
2826
- }
2827
- textContentItem.hasEOL = true;
2828
- flushTextContentItem();
2829
- textState.carriageReturn();
2830
- buildTextContentItem({
2831
- chars: args[0],
2832
- extraSpacing: 0,
2833
- isFirstChunk: true,
2834
- });
2835
- break;
2836
- case OPS.nextLineSetSpacingShowText:
2837
- if (!stateManager.state.font) {
2838
- self.ensureStateFont(stateManager.state);
2839
- continue;
2840
- }
2841
- textContentItem.hasEOL = true;
2842
- flushTextContentItem();
2843
- textState.wordSpacing = args[0];
2844
- textState.charSpacing = args[1];
2845
- textState.carriageReturn();
2846
- buildTextContentItem({
2847
- chars: args[2],
2848
- extraSpacing: 0,
2849
- isFirstChunk: true,
2850
- });
2851
- break;
2852
- case OPS.paintXObject:
2853
- flushTextContentItem();
2854
- if (!xobjs) {
2855
- xobjs = resources.get("XObject") || Dict.empty;
2856
- }
2857
-
2858
- var isValidName = args[0] instanceof Name;
2859
- var name = args[0].name;
2860
-
2861
- if (isValidName && emptyXObjectCache.getByName(name)) {
2862
- break;
2863
- }
2864
-
2865
- next(
2866
- new Promise(function (resolveXObject, rejectXObject) {
2867
- if (!isValidName) {
2868
- throw new FormatError("XObject must be referred to by name.");
2869
- }
2870
-
2871
- let xobj = xobjs.getRaw(name);
2872
- if (xobj instanceof Ref) {
2873
- if (emptyXObjectCache.getByRef(xobj)) {
2874
- resolveXObject();
2875
- return;
2876
- }
2877
-
2878
- const globalImage = self.globalImageCache.getData(
2879
- xobj,
2880
- self.pageIndex
2881
- );
2882
- if (globalImage) {
2883
- resolveXObject();
2884
- return;
2885
- }
2886
-
2887
- xobj = xref.fetch(xobj);
2888
- }
2889
-
2890
- if (!isStream(xobj)) {
2891
- throw new FormatError("XObject should be a stream");
2892
- }
2893
-
2894
- const type = xobj.dict.get("Subtype");
2895
- if (!isName(type)) {
2896
- throw new FormatError("XObject should have a Name subtype");
2897
- }
2898
-
2899
- if (type.name !== "Form") {
2900
- emptyXObjectCache.set(name, xobj.dict.objId, true);
2901
-
2902
- resolveXObject();
2903
- return;
2904
- }
2905
-
2906
- // Use a new `StateManager` to prevent incorrect positioning
2907
- // of textItems *after* the Form XObject, since errors in the
2908
- // data can otherwise prevent `restore` operators from
2909
- // executing.
2910
- // NOTE: Only an issue when `options.ignoreErrors === true`.
2911
- const currentState = stateManager.state.clone();
2912
- const xObjStateManager = new StateManager(currentState);
2913
-
2914
- const matrix = xobj.dict.getArray("Matrix");
2915
- if (Array.isArray(matrix) && matrix.length === 6) {
2916
- xObjStateManager.transform(matrix);
2917
- }
2918
-
2919
- // Enqueue the `textContent` chunk before parsing the /Form
2920
- // XObject.
2921
- enqueueChunk();
2922
- const sinkWrapper = {
2923
- enqueueInvoked: false,
2924
-
2925
- enqueue(chunk, size) {
2926
- this.enqueueInvoked = true;
2927
- sink.enqueue(chunk, size);
2928
- },
2929
-
2930
- get desiredSize() {
2931
- return sink.desiredSize;
2932
- },
2933
-
2934
- get ready() {
2935
- return sink.ready;
2936
- },
2937
- };
2938
-
2939
- self
2940
- .getTextContent({
2941
- stream: xobj,
2942
- task,
2943
- resources: xobj.dict.get("Resources") || resources,
2944
- stateManager: xObjStateManager,
2945
- normalizeWhitespace,
2946
- combineTextItems,
2947
- includeMarkedContent,
2948
- sink: sinkWrapper,
2949
- seenStyles,
2950
- })
2951
- .then(function () {
2952
- if (!sinkWrapper.enqueueInvoked) {
2953
- emptyXObjectCache.set(name, xobj.dict.objId, true);
2954
- }
2955
- resolveXObject();
2956
- }, rejectXObject);
2957
- }).catch(function (reason) {
2958
- if (reason instanceof AbortException) {
2959
- return;
2960
- }
2961
- if (self.options.ignoreErrors) {
2962
- // Error(s) in the XObject -- allow text-extraction to
2963
- // continue.
2964
- warn(`getTextContent - ignoring XObject: "${reason}".`);
2965
- return;
2966
- }
2967
- throw reason;
2968
- })
2969
- );
2970
- return;
2971
- case OPS.setGState:
2972
- isValidName = args[0] instanceof Name;
2973
- name = args[0].name;
2974
-
2975
- if (isValidName && emptyGStateCache.getByName(name)) {
2976
- break;
2977
- }
2978
-
2979
- next(
2980
- new Promise(function (resolveGState, rejectGState) {
2981
- if (!isValidName) {
2982
- throw new FormatError("GState must be referred to by name.");
2983
- }
2984
-
2985
- const extGState = resources.get("ExtGState");
2986
- if (!(extGState instanceof Dict)) {
2987
- throw new FormatError("ExtGState should be a dictionary.");
2988
- }
2989
-
2990
- const gState = extGState.get(name);
2991
- // TODO: Attempt to lookup cached GStates by reference as well,
2992
- // if and only if there are PDF documents where doing so
2993
- // would significantly improve performance.
2994
- if (!(gState instanceof Dict)) {
2995
- throw new FormatError("GState should be a dictionary.");
2996
- }
2997
-
2998
- const gStateFont = gState.get("Font");
2999
- if (!gStateFont) {
3000
- emptyGStateCache.set(name, gState.objId, true);
3001
-
3002
- resolveGState();
3003
- return;
3004
- }
3005
- flushTextContentItem();
3006
-
3007
- textState.fontName = null;
3008
- textState.fontSize = gStateFont[1];
3009
- handleSetFont(null, gStateFont[0]).then(
3010
- resolveGState,
3011
- rejectGState
3012
- );
3013
- }).catch(function (reason) {
3014
- if (reason instanceof AbortException) {
3015
- return;
3016
- }
3017
- if (self.options.ignoreErrors) {
3018
- // Error(s) in the ExtGState -- allow text-extraction to
3019
- // continue.
3020
- warn(`getTextContent - ignoring ExtGState: "${reason}".`);
3021
- return;
3022
- }
3023
- throw reason;
3024
- })
3025
- );
3026
- return;
3027
- case OPS.beginMarkedContent:
3028
- if (includeMarkedContent) {
3029
- textContent.items.push({
3030
- type: "beginMarkedContent",
3031
- tag: isName(args[0]) ? args[0].name : null,
3032
- });
3033
- }
3034
- break;
3035
- case OPS.beginMarkedContentProps:
3036
- if (includeMarkedContent) {
3037
- flushTextContentItem();
3038
- let mcid = null;
3039
- if (isDict(args[1])) {
3040
- mcid = args[1].get("MCID");
3041
- }
3042
- textContent.items.push({
3043
- type: "beginMarkedContentProps",
3044
- id: Number.isInteger(mcid)
3045
- ? `${self.idFactory.getPageObjId()}_mcid${mcid}`
3046
- : null,
3047
- tag: isName(args[0]) ? args[0].name : null,
3048
- });
3049
- }
3050
- break;
3051
- case OPS.endMarkedContent:
3052
- if (includeMarkedContent) {
3053
- flushTextContentItem();
3054
- textContent.items.push({
3055
- type: "endMarkedContent",
3056
- });
3057
- }
3058
- break;
3059
- } // switch
3060
- if (textContent.items.length >= sink.desiredSize) {
3061
- // Wait for ready, if we reach highWaterMark.
3062
- stop = true;
3063
- break;
3064
- }
3065
- } // while
3066
- if (stop) {
3067
- next(deferred);
3068
- return;
3069
- }
3070
- flushTextContentItem();
3071
- enqueueChunk();
3072
- resolve();
3073
- }).catch(reason => {
3074
- if (reason instanceof AbortException) {
3075
- return;
3076
- }
3077
- if (this.options.ignoreErrors) {
3078
- // Error(s) in the TextContent -- allow text-extraction to continue.
3079
- warn(
3080
- `getTextContent - ignoring errors during "${task.name}" ` +
3081
- `task: "${reason}".`
3082
- );
3083
-
3084
- flushTextContentItem();
3085
- enqueueChunk();
3086
- return;
3087
- }
3088
- throw reason;
3089
- });
3090
- }
3091
-
3092
- extractDataStructures(dict, baseDict, properties) {
3093
- const xref = this.xref;
3094
- let cidToGidBytes;
3095
- // 9.10.2
3096
- const toUnicodePromise = this.readToUnicode(
3097
- properties.toUnicode || dict.get("ToUnicode") || baseDict.get("ToUnicode")
3098
- );
3099
-
3100
- if (properties.composite) {
3101
- // CIDSystemInfo helps to match CID to glyphs
3102
- const cidSystemInfo = dict.get("CIDSystemInfo");
3103
- if (isDict(cidSystemInfo)) {
3104
- properties.cidSystemInfo = {
3105
- registry: stringToPDFString(cidSystemInfo.get("Registry")),
3106
- ordering: stringToPDFString(cidSystemInfo.get("Ordering")),
3107
- supplement: cidSystemInfo.get("Supplement"),
3108
- };
3109
- }
3110
-
3111
- const cidToGidMap = dict.get("CIDToGIDMap");
3112
- if (isStream(cidToGidMap)) {
3113
- cidToGidBytes = cidToGidMap.getBytes();
3114
- }
3115
- }
3116
-
3117
- // Based on 9.6.6 of the spec the encoding can come from multiple places
3118
- // and depends on the font type. The base encoding and differences are
3119
- // read here, but the encoding that is actually used is chosen during
3120
- // glyph mapping in the font.
3121
- // TODO: Loading the built in encoding in the font would allow the
3122
- // differences to be merged in here not require us to hold on to it.
3123
- const differences = [];
3124
- let baseEncodingName = null;
3125
- let encoding;
3126
- if (dict.has("Encoding")) {
3127
- encoding = dict.get("Encoding");
3128
- if (isDict(encoding)) {
3129
- baseEncodingName = encoding.get("BaseEncoding");
3130
- baseEncodingName = isName(baseEncodingName)
3131
- ? baseEncodingName.name
3132
- : null;
3133
- // Load the differences between the base and original
3134
- if (encoding.has("Differences")) {
3135
- const diffEncoding = encoding.get("Differences");
3136
- let index = 0;
3137
- for (let j = 0, jj = diffEncoding.length; j < jj; j++) {
3138
- const data = xref.fetchIfRef(diffEncoding[j]);
3139
- if (isNum(data)) {
3140
- index = data;
3141
- } else if (isName(data)) {
3142
- differences[index++] = data.name;
3143
- } else {
3144
- throw new FormatError(
3145
- `Invalid entry in 'Differences' array: ${data}`
3146
- );
3147
- }
3148
- }
3149
- }
3150
- } else if (isName(encoding)) {
3151
- baseEncodingName = encoding.name;
3152
- } else {
3153
- throw new FormatError("Encoding is not a Name nor a Dict");
3154
- }
3155
- // According to table 114 if the encoding is a named encoding it must be
3156
- // one of these predefined encodings.
3157
- if (
3158
- baseEncodingName !== "MacRomanEncoding" &&
3159
- baseEncodingName !== "MacExpertEncoding" &&
3160
- baseEncodingName !== "WinAnsiEncoding"
3161
- ) {
3162
- baseEncodingName = null;
3163
- }
3164
- }
3165
-
3166
- if (baseEncodingName) {
3167
- properties.defaultEncoding = getEncoding(baseEncodingName);
3168
- } else {
3169
- const isSymbolicFont = !!(properties.flags & FontFlags.Symbolic);
3170
- const isNonsymbolicFont = !!(properties.flags & FontFlags.Nonsymbolic);
3171
- // According to "Table 114" in section "9.6.6.1 General" (under
3172
- // "9.6.6 Character Encoding") of the PDF specification, a Nonsymbolic
3173
- // font should use the `StandardEncoding` if no encoding is specified.
3174
- encoding = StandardEncoding;
3175
- if (properties.type === "TrueType" && !isNonsymbolicFont) {
3176
- encoding = WinAnsiEncoding;
3177
- }
3178
- // The Symbolic attribute can be misused for regular fonts
3179
- // Heuristic: we have to check if the font is a standard one also
3180
- if (isSymbolicFont) {
3181
- encoding = MacRomanEncoding;
3182
- if (!properties.file || properties.isInternalFont) {
3183
- if (/Symbol/i.test(properties.name)) {
3184
- encoding = SymbolSetEncoding;
3185
- } else if (/Dingbats|Wingdings/i.test(properties.name)) {
3186
- encoding = ZapfDingbatsEncoding;
3187
- }
3188
- }
3189
- }
3190
- properties.defaultEncoding = encoding;
3191
- }
3192
-
3193
- properties.differences = differences;
3194
- properties.baseEncodingName = baseEncodingName;
3195
- properties.hasEncoding = !!baseEncodingName || differences.length > 0;
3196
- properties.dict = dict;
3197
- return toUnicodePromise
3198
- .then(readToUnicode => {
3199
- properties.toUnicode = readToUnicode;
3200
- return this.buildToUnicode(properties);
3201
- })
3202
- .then(builtToUnicode => {
3203
- properties.toUnicode = builtToUnicode;
3204
- if (cidToGidBytes) {
3205
- properties.cidToGidMap = this.readCidToGidMap(
3206
- cidToGidBytes,
3207
- builtToUnicode
3208
- );
3209
- }
3210
- return properties;
3211
- });
3212
- }
3213
-
3214
- /**
3215
- * @returns {Array}
3216
- * @private
3217
- */
3218
- _simpleFontToUnicode(properties, forceGlyphs = false) {
3219
- assert(!properties.composite, "Must be a simple font.");
3220
-
3221
- const toUnicode = [];
3222
- const encoding = properties.defaultEncoding.slice();
3223
- const baseEncodingName = properties.baseEncodingName;
3224
- // Merge in the differences array.
3225
- const differences = properties.differences;
3226
- for (const charcode in differences) {
3227
- const glyphName = differences[charcode];
3228
- if (glyphName === ".notdef") {
3229
- // Skip .notdef to prevent rendering errors, e.g. boxes appearing
3230
- // where there should be spaces (fixes issue5256.pdf).
3231
- continue;
3232
- }
3233
- encoding[charcode] = glyphName;
3234
- }
3235
- const glyphsUnicodeMap = getGlyphsUnicode();
3236
- for (const charcode in encoding) {
3237
- // a) Map the character code to a character name.
3238
- let glyphName = encoding[charcode];
3239
- // b) Look up the character name in the Adobe Glyph List (see the
3240
- // Bibliography) to obtain the corresponding Unicode value.
3241
- if (glyphName === "") {
3242
- continue;
3243
- } else if (glyphsUnicodeMap[glyphName] === undefined) {
3244
- // (undocumented) c) Few heuristics to recognize unknown glyphs
3245
- // NOTE: Adobe Reader does not do this step, but OSX Preview does
3246
- let code = 0;
3247
- switch (glyphName[0]) {
3248
- case "G": // Gxx glyph
3249
- if (glyphName.length === 3) {
3250
- code = parseInt(glyphName.substring(1), 16);
3251
- }
3252
- break;
3253
- case "g": // g00xx glyph
3254
- if (glyphName.length === 5) {
3255
- code = parseInt(glyphName.substring(1), 16);
3256
- }
3257
- break;
3258
- case "C": // Cdd{d} glyph
3259
- case "c": // cdd{d} glyph
3260
- if (glyphName.length >= 3 && glyphName.length <= 4) {
3261
- const codeStr = glyphName.substring(1);
3262
-
3263
- if (forceGlyphs) {
3264
- code = parseInt(codeStr, 16);
3265
- break;
3266
- }
3267
- // Normally the Cdd{d}/cdd{d} glyphName format will contain
3268
- // regular, i.e. base 10, charCodes (see issue4550.pdf)...
3269
- code = +codeStr;
3270
-
3271
- // ... however some PDF generators violate that assumption by
3272
- // containing glyph, i.e. base 16, codes instead.
3273
- // In that case we need to re-parse the *entire* encoding to
3274
- // prevent broken text-selection (fixes issue9655_reduced.pdf).
3275
- if (
3276
- Number.isNaN(code) &&
3277
- Number.isInteger(parseInt(codeStr, 16))
3278
- ) {
3279
- return this._simpleFontToUnicode(
3280
- properties,
3281
- /* forceGlyphs */ true
3282
- );
3283
- }
3284
- }
3285
- break;
3286
- default:
3287
- // 'uniXXXX'/'uXXXX{XX}' glyphs
3288
- const unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
3289
- if (unicode !== -1) {
3290
- code = unicode;
3291
- }
3292
- }
3293
- if (code > 0 && code <= 0x10ffff && Number.isInteger(code)) {
3294
- // If `baseEncodingName` is one the predefined encodings, and `code`
3295
- // equals `charcode`, using the glyph defined in the baseEncoding
3296
- // seems to yield a better `toUnicode` mapping (fixes issue 5070).
3297
- if (baseEncodingName && code === +charcode) {
3298
- const baseEncoding = getEncoding(baseEncodingName);
3299
- if (baseEncoding && (glyphName = baseEncoding[charcode])) {
3300
- toUnicode[charcode] = String.fromCharCode(
3301
- glyphsUnicodeMap[glyphName]
3302
- );
3303
- continue;
3304
- }
3305
- }
3306
- toUnicode[charcode] = String.fromCodePoint(code);
3307
- }
3308
- continue;
3309
- }
3310
- toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]);
3311
- }
3312
- return toUnicode;
3313
- }
3314
-
3315
- /**
3316
- * Builds a char code to unicode map based on section 9.10 of the spec.
3317
- * @param {Object} properties Font properties object.
3318
- * @returns {Promise} A Promise that is resolved with a
3319
- * {ToUnicodeMap|IdentityToUnicodeMap} object.
3320
- */
3321
- async buildToUnicode(properties) {
3322
- properties.hasIncludedToUnicodeMap =
3323
- !!properties.toUnicode && properties.toUnicode.length > 0;
3324
-
3325
- // Section 9.10.2 Mapping Character Codes to Unicode Values
3326
- if (properties.hasIncludedToUnicodeMap) {
3327
- // Some fonts contain incomplete ToUnicode data, causing issues with
3328
- // text-extraction. For simple fonts, containing encoding information,
3329
- // use a fallback ToUnicode map to improve this (fixes issue8229.pdf).
3330
- if (!properties.composite && properties.hasEncoding) {
3331
- properties.fallbackToUnicode = this._simpleFontToUnicode(properties);
3332
- }
3333
- return properties.toUnicode;
3334
- }
3335
-
3336
- // According to the spec if the font is a simple font we should only map
3337
- // to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or
3338
- // the differences array only contains adobe standard or symbol set names,
3339
- // in pratice it seems better to always try to create a toUnicode map
3340
- // based of the default encoding.
3341
- if (!properties.composite /* is simple font */) {
3342
- return new ToUnicodeMap(this._simpleFontToUnicode(properties));
3343
- }
3344
-
3345
- // If the font is a composite font that uses one of the predefined CMaps
3346
- // listed in Table 118 (except Identity–H and Identity–V) or whose
3347
- // descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
3348
- // Adobe-Korea1 character collection:
3349
- if (
3350
- properties.composite &&
3351
- ((properties.cMap.builtInCMap &&
3352
- !(properties.cMap instanceof IdentityCMap)) ||
3353
- (properties.cidSystemInfo.registry === "Adobe" &&
3354
- (properties.cidSystemInfo.ordering === "GB1" ||
3355
- properties.cidSystemInfo.ordering === "CNS1" ||
3356
- properties.cidSystemInfo.ordering === "Japan1" ||
3357
- properties.cidSystemInfo.ordering === "Korea1")))
3358
- ) {
3359
- // Then:
3360
- // a) Map the character code to a character identifier (CID) according
3361
- // to the font’s CMap.
3362
- // b) Obtain the registry and ordering of the character collection used
3363
- // by the font’s CMap (for example, Adobe and Japan1) from its
3364
- // CIDSystemInfo dictionary.
3365
- const { registry, ordering } = properties.cidSystemInfo;
3366
- // c) Construct a second CMap name by concatenating the registry and
3367
- // ordering obtained in step (b) in the format registry–ordering–UCS2
3368
- // (for example, Adobe–Japan1–UCS2).
3369
- const ucs2CMapName = Name.get(`${registry}-${ordering}-UCS2`);
3370
- // d) Obtain the CMap with the name constructed in step (c) (available
3371
- // from the ASN Web site; see the Bibliography).
3372
- const ucs2CMap = await CMapFactory.create({
3373
- encoding: ucs2CMapName,
3374
- fetchBuiltInCMap: this._fetchBuiltInCMapBound,
3375
- useCMap: null,
3376
- });
3377
- const toUnicode = [];
3378
- properties.cMap.forEach(function (charcode, cid) {
3379
- if (cid > 0xffff) {
3380
- throw new FormatError("Max size of CID is 65,535");
3381
- }
3382
- // e) Map the CID obtained in step (a) according to the CMap
3383
- // obtained in step (d), producing a Unicode value.
3384
- const ucs2 = ucs2CMap.lookup(cid);
3385
- if (ucs2) {
3386
- toUnicode[charcode] = String.fromCharCode(
3387
- (ucs2.charCodeAt(0) << 8) + ucs2.charCodeAt(1)
3388
- );
3389
- }
3390
- });
3391
- return new ToUnicodeMap(toUnicode);
3392
- }
3393
-
3394
- // The viewer's choice, just use an identity map.
3395
- return new IdentityToUnicodeMap(properties.firstChar, properties.lastChar);
3396
- }
3397
-
3398
- readToUnicode(cmapObj) {
3399
- if (!cmapObj) {
3400
- return Promise.resolve(null);
3401
- }
3402
- if (isName(cmapObj)) {
3403
- return CMapFactory.create({
3404
- encoding: cmapObj,
3405
- fetchBuiltInCMap: this._fetchBuiltInCMapBound,
3406
- useCMap: null,
3407
- }).then(function (cmap) {
3408
- if (cmap instanceof IdentityCMap) {
3409
- return new IdentityToUnicodeMap(0, 0xffff);
3410
- }
3411
- return new ToUnicodeMap(cmap.getMap());
3412
- });
3413
- } else if (isStream(cmapObj)) {
3414
- return CMapFactory.create({
3415
- encoding: cmapObj,
3416
- fetchBuiltInCMap: this._fetchBuiltInCMapBound,
3417
- useCMap: null,
3418
- }).then(
3419
- function (cmap) {
3420
- if (cmap instanceof IdentityCMap) {
3421
- return new IdentityToUnicodeMap(0, 0xffff);
3422
- }
3423
- const map = new Array(cmap.length);
3424
- // Convert UTF-16BE
3425
- // NOTE: cmap can be a sparse array, so use forEach instead of
3426
- // `for(;;)` to iterate over all keys.
3427
- cmap.forEach(function (charCode, token) {
3428
- const str = [];
3429
- for (let k = 0; k < token.length; k += 2) {
3430
- const w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
3431
- if ((w1 & 0xf800) !== 0xd800) {
3432
- // w1 < 0xD800 || w1 > 0xDFFF
3433
- str.push(w1);
3434
- continue;
3435
- }
3436
- k += 2;
3437
- const w2 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
3438
- str.push(((w1 & 0x3ff) << 10) + (w2 & 0x3ff) + 0x10000);
3439
- }
3440
- map[charCode] = String.fromCodePoint.apply(String, str);
3441
- });
3442
- return new ToUnicodeMap(map);
3443
- },
3444
- reason => {
3445
- if (reason instanceof AbortException) {
3446
- return null;
3447
- }
3448
- if (this.options.ignoreErrors) {
3449
- // Error in the ToUnicode data -- sending unsupported feature
3450
- // notification and allow font parsing to continue.
3451
- this.handler.send("UnsupportedFeature", {
3452
- featureId: UNSUPPORTED_FEATURES.errorFontToUnicode,
3453
- });
3454
- warn(`readToUnicode - ignoring ToUnicode data: "${reason}".`);
3455
- return null;
3456
- }
3457
- throw reason;
3458
- }
3459
- );
3460
- }
3461
- return Promise.resolve(null);
3462
- }
3463
-
3464
- readCidToGidMap(glyphsData, toUnicode) {
3465
- // Extract the encoding from the CIDToGIDMap
3466
-
3467
- // Set encoding 0 to later verify the font has an encoding
3468
- const result = [];
3469
- for (let j = 0, jj = glyphsData.length; j < jj; j++) {
3470
- const glyphID = (glyphsData[j++] << 8) | glyphsData[j];
3471
- const code = j >> 1;
3472
- if (glyphID === 0 && !toUnicode.has(code)) {
3473
- continue;
3474
- }
3475
- result[code] = glyphID;
3476
- }
3477
- return result;
3478
- }
3479
-
3480
- extractWidths(dict, descriptor, properties) {
3481
- const xref = this.xref;
3482
- let glyphsWidths = [];
3483
- let defaultWidth = 0;
3484
- const glyphsVMetrics = [];
3485
- let defaultVMetrics;
3486
- let i, ii, j, jj, start, code, widths;
3487
- if (properties.composite) {
3488
- defaultWidth = dict.has("DW") ? dict.get("DW") : 1000;
3489
-
3490
- widths = dict.get("W");
3491
- if (widths) {
3492
- for (i = 0, ii = widths.length; i < ii; i++) {
3493
- start = xref.fetchIfRef(widths[i++]);
3494
- code = xref.fetchIfRef(widths[i]);
3495
- if (Array.isArray(code)) {
3496
- for (j = 0, jj = code.length; j < jj; j++) {
3497
- glyphsWidths[start++] = xref.fetchIfRef(code[j]);
3498
- }
3499
- } else {
3500
- const width = xref.fetchIfRef(widths[++i]);
3501
- for (j = start; j <= code; j++) {
3502
- glyphsWidths[j] = width;
3503
- }
3504
- }
3505
- }
3506
- }
3507
-
3508
- if (properties.vertical) {
3509
- let vmetrics = dict.getArray("DW2") || [880, -1000];
3510
- defaultVMetrics = [vmetrics[1], defaultWidth * 0.5, vmetrics[0]];
3511
- vmetrics = dict.get("W2");
3512
- if (vmetrics) {
3513
- for (i = 0, ii = vmetrics.length; i < ii; i++) {
3514
- start = xref.fetchIfRef(vmetrics[i++]);
3515
- code = xref.fetchIfRef(vmetrics[i]);
3516
- if (Array.isArray(code)) {
3517
- for (j = 0, jj = code.length; j < jj; j++) {
3518
- glyphsVMetrics[start++] = [
3519
- xref.fetchIfRef(code[j++]),
3520
- xref.fetchIfRef(code[j++]),
3521
- xref.fetchIfRef(code[j]),
3522
- ];
3523
- }
3524
- } else {
3525
- const vmetric = [
3526
- xref.fetchIfRef(vmetrics[++i]),
3527
- xref.fetchIfRef(vmetrics[++i]),
3528
- xref.fetchIfRef(vmetrics[++i]),
3529
- ];
3530
- for (j = start; j <= code; j++) {
3531
- glyphsVMetrics[j] = vmetric;
3532
- }
3533
- }
3534
- }
3535
- }
3536
- }
3537
- } else {
3538
- const firstChar = properties.firstChar;
3539
- widths = dict.get("Widths");
3540
- if (widths) {
3541
- j = firstChar;
3542
- for (i = 0, ii = widths.length; i < ii; i++) {
3543
- glyphsWidths[j++] = xref.fetchIfRef(widths[i]);
3544
- }
3545
- defaultWidth = parseFloat(descriptor.get("MissingWidth")) || 0;
3546
- } else {
3547
- // Trying get the BaseFont metrics (see comment above).
3548
- const baseFontName = dict.get("BaseFont");
3549
- if (isName(baseFontName)) {
3550
- const metrics = this.getBaseFontMetrics(baseFontName.name);
3551
-
3552
- glyphsWidths = this.buildCharCodeToWidth(metrics.widths, properties);
3553
- defaultWidth = metrics.defaultWidth;
3554
- }
3555
- }
3556
- }
3557
-
3558
- // Heuristic: detection of monospace font by checking all non-zero widths
3559
- let isMonospace = true;
3560
- let firstWidth = defaultWidth;
3561
- for (const glyph in glyphsWidths) {
3562
- const glyphWidth = glyphsWidths[glyph];
3563
- if (!glyphWidth) {
3564
- continue;
3565
- }
3566
- if (!firstWidth) {
3567
- firstWidth = glyphWidth;
3568
- continue;
3569
- }
3570
- if (firstWidth !== glyphWidth) {
3571
- isMonospace = false;
3572
- break;
3573
- }
3574
- }
3575
- if (isMonospace) {
3576
- properties.flags |= FontFlags.FixedPitch;
3577
- }
3578
-
3579
- properties.defaultWidth = defaultWidth;
3580
- properties.widths = glyphsWidths;
3581
- properties.defaultVMetrics = defaultVMetrics;
3582
- properties.vmetrics = glyphsVMetrics;
3583
- }
3584
-
3585
- isSerifFont(baseFontName) {
3586
- // Simulating descriptor flags attribute
3587
- const fontNameWoStyle = baseFontName.split("-")[0];
3588
- return (
3589
- fontNameWoStyle in getSerifFonts() ||
3590
- fontNameWoStyle.search(/serif/gi) !== -1
3591
- );
3592
- }
3593
-
3594
- getBaseFontMetrics(name) {
3595
- let defaultWidth = 0;
3596
- let widths = Object.create(null);
3597
- let monospace = false;
3598
- const stdFontMap = getStdFontMap();
3599
- let lookupName = stdFontMap[name] || name;
3600
- const Metrics = getMetrics();
3601
-
3602
- if (!(lookupName in Metrics)) {
3603
- // Use default fonts for looking up font metrics if the passed
3604
- // font is not a base font
3605
- if (this.isSerifFont(name)) {
3606
- lookupName = "Times-Roman";
3607
- } else {
3608
- lookupName = "Helvetica";
3609
- }
3610
- }
3611
- const glyphWidths = Metrics[lookupName];
3612
-
3613
- if (isNum(glyphWidths)) {
3614
- defaultWidth = glyphWidths;
3615
- monospace = true;
3616
- } else {
3617
- widths = glyphWidths(); // expand lazy widths array
3618
- }
3619
-
3620
- return {
3621
- defaultWidth,
3622
- monospace,
3623
- widths,
3624
- };
3625
- }
3626
-
3627
- buildCharCodeToWidth(widthsByGlyphName, properties) {
3628
- const widths = Object.create(null);
3629
- const differences = properties.differences;
3630
- const encoding = properties.defaultEncoding;
3631
- for (let charCode = 0; charCode < 256; charCode++) {
3632
- if (charCode in differences && widthsByGlyphName[differences[charCode]]) {
3633
- widths[charCode] = widthsByGlyphName[differences[charCode]];
3634
- continue;
3635
- }
3636
- if (charCode in encoding && widthsByGlyphName[encoding[charCode]]) {
3637
- widths[charCode] = widthsByGlyphName[encoding[charCode]];
3638
- continue;
3639
- }
3640
- }
3641
- return widths;
3642
- }
3643
-
3644
- preEvaluateFont(dict) {
3645
- const baseDict = dict;
3646
- let type = dict.get("Subtype");
3647
- if (!isName(type)) {
3648
- throw new FormatError("invalid font Subtype");
3649
- }
3650
-
3651
- let composite = false;
3652
- let hash, toUnicode;
3653
- if (type.name === "Type0") {
3654
- // If font is a composite
3655
- // - get the descendant font
3656
- // - set the type according to the descendant font
3657
- // - get the FontDescriptor from the descendant font
3658
- const df = dict.get("DescendantFonts");
3659
- if (!df) {
3660
- throw new FormatError("Descendant fonts are not specified");
3661
- }
3662
- dict = Array.isArray(df) ? this.xref.fetchIfRef(df[0]) : df;
3663
-
3664
- if (!(dict instanceof Dict)) {
3665
- throw new FormatError("Descendant font is not a dictionary.");
3666
- }
3667
- type = dict.get("Subtype");
3668
- if (!isName(type)) {
3669
- throw new FormatError("invalid font Subtype");
3670
- }
3671
- composite = true;
3672
- }
3673
-
3674
- const firstChar = dict.get("FirstChar") || 0,
3675
- lastChar = dict.get("LastChar") || (composite ? 0xffff : 0xff);
3676
- const descriptor = dict.get("FontDescriptor");
3677
- if (descriptor) {
3678
- hash = new MurmurHash3_64();
3679
-
3680
- const encoding = baseDict.getRaw("Encoding");
3681
- if (isName(encoding)) {
3682
- hash.update(encoding.name);
3683
- } else if (isRef(encoding)) {
3684
- hash.update(encoding.toString());
3685
- } else if (isDict(encoding)) {
3686
- for (const entry of encoding.getRawValues()) {
3687
- if (isName(entry)) {
3688
- hash.update(entry.name);
3689
- } else if (isRef(entry)) {
3690
- hash.update(entry.toString());
3691
- } else if (Array.isArray(entry)) {
3692
- // 'Differences' array (fixes bug1157493.pdf).
3693
- const diffLength = entry.length,
3694
- diffBuf = new Array(diffLength);
3695
-
3696
- for (let j = 0; j < diffLength; j++) {
3697
- const diffEntry = entry[j];
3698
- if (isName(diffEntry)) {
3699
- diffBuf[j] = diffEntry.name;
3700
- } else if (isNum(diffEntry) || isRef(diffEntry)) {
3701
- diffBuf[j] = diffEntry.toString();
3702
- }
3703
- }
3704
- hash.update(diffBuf.join());
3705
- }
3706
- }
3707
- }
3708
-
3709
- hash.update(`${firstChar}-${lastChar}`); // Fixes issue10665_reduced.pdf
3710
-
3711
- toUnicode = dict.get("ToUnicode") || baseDict.get("ToUnicode");
3712
- if (isStream(toUnicode)) {
3713
- const stream = toUnicode.str || toUnicode;
3714
- const uint8array = stream.buffer
3715
- ? new Uint8Array(stream.buffer.buffer, 0, stream.bufferLength)
3716
- : new Uint8Array(
3717
- stream.bytes.buffer,
3718
- stream.start,
3719
- stream.end - stream.start
3720
- );
3721
- hash.update(uint8array);
3722
- } else if (isName(toUnicode)) {
3723
- hash.update(toUnicode.name);
3724
- }
3725
-
3726
- const widths = dict.get("Widths") || baseDict.get("Widths");
3727
- if (Array.isArray(widths)) {
3728
- const widthsBuf = [];
3729
- for (const entry of widths) {
3730
- if (isNum(entry) || isRef(entry)) {
3731
- widthsBuf.push(entry.toString());
3732
- }
3733
- }
3734
- hash.update(widthsBuf.join());
3735
- }
3736
-
3737
- if (composite) {
3738
- hash.update("compositeFont");
3739
-
3740
- const compositeWidths = dict.get("W") || baseDict.get("W");
3741
- if (Array.isArray(compositeWidths)) {
3742
- const widthsBuf = [];
3743
- for (const entry of compositeWidths) {
3744
- if (isNum(entry) || isRef(entry)) {
3745
- widthsBuf.push(entry.toString());
3746
- } else if (Array.isArray(entry)) {
3747
- const subWidthsBuf = [];
3748
- for (const element of entry) {
3749
- if (isNum(element) || isRef(element)) {
3750
- subWidthsBuf.push(element.toString());
3751
- }
3752
- }
3753
- widthsBuf.push(`[${subWidthsBuf.join()}]`);
3754
- }
3755
- }
3756
- hash.update(widthsBuf.join());
3757
- }
3758
- }
3759
- }
3760
-
3761
- return {
3762
- descriptor,
3763
- dict,
3764
- baseDict,
3765
- composite,
3766
- type: type.name,
3767
- firstChar,
3768
- lastChar,
3769
- toUnicode,
3770
- hash: hash ? hash.hexdigest() : "",
3771
- };
3772
- }
3773
-
3774
- async translateFont({
3775
- descriptor,
3776
- dict,
3777
- baseDict,
3778
- composite,
3779
- type,
3780
- firstChar,
3781
- lastChar,
3782
- toUnicode,
3783
- cssFontInfo,
3784
- }) {
3785
- const isType3Font = type === "Type3";
3786
- let properties;
3787
-
3788
- if (!descriptor) {
3789
- if (isType3Font) {
3790
- // FontDescriptor is only required for Type3 fonts when the document
3791
- // is a tagged pdf. Create a barbebones one to get by.
3792
- descriptor = new Dict(null);
3793
- descriptor.set("FontName", Name.get(type));
3794
- descriptor.set("FontBBox", dict.getArray("FontBBox") || [0, 0, 0, 0]);
3795
- } else {
3796
- // Before PDF 1.5 if the font was one of the base 14 fonts, having a
3797
- // FontDescriptor was not required.
3798
- // This case is here for compatibility.
3799
- let baseFontName = dict.get("BaseFont");
3800
- if (!isName(baseFontName)) {
3801
- throw new FormatError("Base font is not specified");
3802
- }
3803
-
3804
- // Using base font name as a font name.
3805
- baseFontName = baseFontName.name.replace(/[,_]/g, "-");
3806
- const metrics = this.getBaseFontMetrics(baseFontName);
3807
-
3808
- // Simulating descriptor flags attribute
3809
- const fontNameWoStyle = baseFontName.split("-")[0];
3810
- const flags =
3811
- (this.isSerifFont(fontNameWoStyle) ? FontFlags.Serif : 0) |
3812
- (metrics.monospace ? FontFlags.FixedPitch : 0) |
3813
- (getSymbolsFonts()[fontNameWoStyle]
3814
- ? FontFlags.Symbolic
3815
- : FontFlags.Nonsymbolic);
3816
-
3817
- properties = {
3818
- type,
3819
- name: baseFontName,
3820
- loadedName: baseDict.loadedName,
3821
- widths: metrics.widths,
3822
- defaultWidth: metrics.defaultWidth,
3823
- flags,
3824
- firstChar,
3825
- lastChar,
3826
- toUnicode,
3827
- xHeight: 0,
3828
- capHeight: 0,
3829
- italicAngle: 0,
3830
- isType3Font,
3831
- };
3832
- const widths = dict.get("Widths");
3833
-
3834
- const standardFontName = getStandardFontName(baseFontName);
3835
- let file = null;
3836
- if (standardFontName) {
3837
- properties.isStandardFont = true;
3838
- file = await this.fetchStandardFontData(standardFontName);
3839
- properties.isInternalFont = !!file;
3840
- }
3841
- return this.extractDataStructures(dict, dict, properties).then(
3842
- newProperties => {
3843
- if (widths) {
3844
- const glyphWidths = [];
3845
- let j = firstChar;
3846
- for (let i = 0, ii = widths.length; i < ii; i++) {
3847
- glyphWidths[j++] = this.xref.fetchIfRef(widths[i]);
3848
- }
3849
- newProperties.widths = glyphWidths;
3850
- } else {
3851
- newProperties.widths = this.buildCharCodeToWidth(
3852
- metrics.widths,
3853
- newProperties
3854
- );
3855
- }
3856
- return new Font(baseFontName, file, newProperties);
3857
- }
3858
- );
3859
- }
3860
- }
3861
-
3862
- // According to the spec if 'FontDescriptor' is declared, 'FirstChar',
3863
- // 'LastChar' and 'Widths' should exist too, but some PDF encoders seem
3864
- // to ignore this rule when a variant of a standard font is used.
3865
- // TODO Fill the width array depending on which of the base font this is
3866
- // a variant.
3867
-
3868
- let fontName = descriptor.get("FontName");
3869
- let baseFont = dict.get("BaseFont");
3870
- // Some bad PDFs have a string as the font name.
3871
- if (isString(fontName)) {
3872
- fontName = Name.get(fontName);
3873
- }
3874
- if (isString(baseFont)) {
3875
- baseFont = Name.get(baseFont);
3876
- }
3877
-
3878
- if (!isType3Font) {
3879
- const fontNameStr = fontName && fontName.name;
3880
- const baseFontStr = baseFont && baseFont.name;
3881
- if (fontNameStr !== baseFontStr) {
3882
- info(
3883
- `The FontDescriptor's FontName is "${fontNameStr}" but ` +
3884
- `should be the same as the Font's BaseFont "${baseFontStr}".`
3885
- );
3886
- // Workaround for cases where e.g. fontNameStr = 'Arial' and
3887
- // baseFontStr = 'Arial,Bold' (needed when no font file is embedded).
3888
- if (fontNameStr && baseFontStr && baseFontStr.startsWith(fontNameStr)) {
3889
- fontName = baseFont;
3890
- }
3891
- }
3892
- }
3893
- fontName = fontName || baseFont;
3894
-
3895
- if (!isName(fontName)) {
3896
- throw new FormatError("invalid font name");
3897
- }
3898
-
3899
- let fontFile, subtype, length1, length2, length3;
3900
- try {
3901
- fontFile = descriptor.get("FontFile", "FontFile2", "FontFile3");
3902
- } catch (ex) {
3903
- if (!this.options.ignoreErrors) {
3904
- throw ex;
3905
- }
3906
- warn(`translateFont - fetching "${fontName.name}" font file: "${ex}".`);
3907
- fontFile = new NullStream();
3908
- }
3909
- let isStandardFont = false;
3910
- let isInternalFont = false;
3911
- let glyphScaleFactors = null;
3912
- if (fontFile) {
3913
- if (fontFile.dict) {
3914
- const subtypeEntry = fontFile.dict.get("Subtype");
3915
- if (subtypeEntry instanceof Name) {
3916
- subtype = subtypeEntry.name;
3917
- }
3918
- length1 = fontFile.dict.get("Length1");
3919
- length2 = fontFile.dict.get("Length2");
3920
- length3 = fontFile.dict.get("Length3");
3921
- }
3922
- } else if (cssFontInfo) {
3923
- // We've a missing XFA font.
3924
- const standardFontName = getXfaFontName(fontName.name);
3925
- if (standardFontName) {
3926
- cssFontInfo.fontFamily = `${cssFontInfo.fontFamily}-PdfJS-XFA`;
3927
- cssFontInfo.lineHeight = standardFontName.lineHeight || null;
3928
- glyphScaleFactors = standardFontName.factors || null;
3929
- fontFile = await this.fetchStandardFontData(standardFontName.name);
3930
- isInternalFont = !!fontFile;
3931
- type = "TrueType";
3932
- }
3933
- } else if (!isType3Font) {
3934
- const standardFontName = getStandardFontName(fontName.name);
3935
- if (standardFontName) {
3936
- isStandardFont = true;
3937
- fontFile = await this.fetchStandardFontData(standardFontName);
3938
- isInternalFont = !!fontFile;
3939
- }
3940
- }
3941
-
3942
- properties = {
3943
- type,
3944
- name: fontName.name,
3945
- subtype,
3946
- file: fontFile,
3947
- length1,
3948
- length2,
3949
- length3,
3950
- isStandardFont,
3951
- isInternalFont,
3952
- loadedName: baseDict.loadedName,
3953
- composite,
3954
- fixedPitch: false,
3955
- fontMatrix: dict.getArray("FontMatrix") || FONT_IDENTITY_MATRIX,
3956
- firstChar,
3957
- lastChar,
3958
- toUnicode,
3959
- bbox: descriptor.getArray("FontBBox") || dict.getArray("FontBBox"),
3960
- ascent: descriptor.get("Ascent"),
3961
- descent: descriptor.get("Descent"),
3962
- xHeight: descriptor.get("XHeight") || 0,
3963
- capHeight: descriptor.get("CapHeight") || 0,
3964
- flags: descriptor.get("Flags"),
3965
- italicAngle: descriptor.get("ItalicAngle") || 0,
3966
- isType3Font,
3967
- cssFontInfo,
3968
- scaleFactors: glyphScaleFactors,
3969
- };
3970
-
3971
- if (composite) {
3972
- const cidEncoding = baseDict.get("Encoding");
3973
- if (isName(cidEncoding)) {
3974
- properties.cidEncoding = cidEncoding.name;
3975
- }
3976
- const cMap = await CMapFactory.create({
3977
- encoding: cidEncoding,
3978
- fetchBuiltInCMap: this._fetchBuiltInCMapBound,
3979
- useCMap: null,
3980
- });
3981
- properties.cMap = cMap;
3982
- properties.vertical = properties.cMap.vertical;
3983
- }
3984
-
3985
- return this.extractDataStructures(dict, baseDict, properties).then(
3986
- newProperties => {
3987
- this.extractWidths(dict, descriptor, newProperties);
3988
-
3989
- return new Font(fontName.name, fontFile, newProperties);
3990
- }
3991
- );
3992
- }
3993
-
3994
- static buildFontPaths(font, glyphs, handler, evaluatorOptions) {
3995
- function buildPath(fontChar) {
3996
- const glyphName = `${font.loadedName}_path_${fontChar}`;
3997
- try {
3998
- if (font.renderer.hasBuiltPath(fontChar)) {
3999
- return;
4000
- }
4001
- handler.send("commonobj", [
4002
- glyphName,
4003
- "FontPath",
4004
- font.renderer.getPathJs(fontChar),
4005
- ]);
4006
- } catch (reason) {
4007
- if (evaluatorOptions.ignoreErrors) {
4008
- // Error in the font data -- sending unsupported feature notification
4009
- // and allow glyph path building to continue.
4010
- handler.send("UnsupportedFeature", {
4011
- featureId: UNSUPPORTED_FEATURES.errorFontBuildPath,
4012
- });
4013
- warn(`buildFontPaths - ignoring ${glyphName} glyph: "${reason}".`);
4014
- return;
4015
- }
4016
- throw reason;
4017
- }
4018
- }
4019
-
4020
- for (const glyph of glyphs) {
4021
- buildPath(glyph.fontChar);
4022
-
4023
- // If the glyph has an accent we need to build a path for its
4024
- // fontChar too, otherwise CanvasGraphics_paintChar will fail.
4025
- const accent = glyph.accent;
4026
- if (accent && accent.fontChar) {
4027
- buildPath(accent.fontChar);
4028
- }
4029
- }
4030
- }
4031
-
4032
- static get fallbackFontDict() {
4033
- const dict = new Dict();
4034
- dict.set("BaseFont", Name.get("PDFJS-FallbackFont"));
4035
- dict.set("Type", Name.get("FallbackType"));
4036
- dict.set("Subtype", Name.get("FallbackType"));
4037
- dict.set("Encoding", Name.get("WinAnsiEncoding"));
4038
-
4039
- return shadow(this, "fallbackFontDict", dict);
4040
- }
4041
- }
4042
-
4043
- class TranslatedFont {
4044
- constructor({ loadedName, font, dict, evaluatorOptions }) {
4045
- this.loadedName = loadedName;
4046
- this.font = font;
4047
- this.dict = dict;
4048
- this._evaluatorOptions = evaluatorOptions || DefaultPartialEvaluatorOptions;
4049
- this.type3Loaded = null;
4050
- this.type3Dependencies = font.isType3Font ? new Set() : null;
4051
- this.sent = false;
4052
- }
4053
-
4054
- send(handler) {
4055
- if (this.sent) {
4056
- return;
4057
- }
4058
- this.sent = true;
4059
-
4060
- handler.send("commonobj", [
4061
- this.loadedName,
4062
- "Font",
4063
- this.font.exportData(this._evaluatorOptions.fontExtraProperties),
4064
- ]);
4065
- }
4066
-
4067
- fallback(handler) {
4068
- if (!this.font.data) {
4069
- return;
4070
- }
4071
- // When font loading failed, fall back to the built-in font renderer.
4072
- this.font.disableFontFace = true;
4073
- // An arbitrary number of text rendering operators could have been
4074
- // encountered between the point in time when the 'Font' message was sent
4075
- // to the main-thread, and the point in time when the 'FontFallback'
4076
- // message was received on the worker-thread.
4077
- // To ensure that all 'FontPath's are available on the main-thread, when
4078
- // font loading failed, attempt to resend *all* previously parsed glyphs.
4079
- PartialEvaluator.buildFontPaths(
4080
- this.font,
4081
- /* glyphs = */ this.font.glyphCacheValues,
4082
- handler,
4083
- this._evaluatorOptions
4084
- );
4085
- }
4086
-
4087
- loadType3Data(evaluator, resources, task) {
4088
- if (this.type3Loaded) {
4089
- return this.type3Loaded;
4090
- }
4091
- if (!this.font.isType3Font) {
4092
- throw new Error("Must be a Type3 font.");
4093
- }
4094
- // When parsing Type3 glyphs, always ignore them if there are errors.
4095
- // Compared to the parsing of e.g. an entire page, it doesn't really
4096
- // make sense to only be able to render a Type3 glyph partially.
4097
- const type3Evaluator = evaluator.clone({ ignoreErrors: false });
4098
- type3Evaluator.parsingType3Font = true;
4099
-
4100
- const translatedFont = this.font,
4101
- type3Dependencies = this.type3Dependencies;
4102
- let loadCharProcsPromise = Promise.resolve();
4103
- const charProcs = this.dict.get("CharProcs");
4104
- const fontResources = this.dict.get("Resources") || resources;
4105
- const charProcOperatorList = Object.create(null);
4106
-
4107
- const isEmptyBBox =
4108
- !translatedFont.bbox || isArrayEqual(translatedFont.bbox, [0, 0, 0, 0]);
4109
-
4110
- for (const key of charProcs.getKeys()) {
4111
- loadCharProcsPromise = loadCharProcsPromise.then(() => {
4112
- const glyphStream = charProcs.get(key);
4113
- const operatorList = new OperatorList();
4114
- return type3Evaluator
4115
- .getOperatorList({
4116
- stream: glyphStream,
4117
- task,
4118
- resources: fontResources,
4119
- operatorList,
4120
- })
4121
- .then(() => {
4122
- // According to the PDF specification, section "9.6.5 Type 3 Fonts"
4123
- // and "Table 113":
4124
- // "A glyph description that begins with the d1 operator should
4125
- // not execute any operators that set the colour (or other
4126
- // colour-related parameters) in the graphics state;
4127
- // any use of such operators shall be ignored."
4128
- if (operatorList.fnArray[0] === OPS.setCharWidthAndBounds) {
4129
- this._removeType3ColorOperators(operatorList, isEmptyBBox);
4130
- }
4131
- charProcOperatorList[key] = operatorList.getIR();
4132
-
4133
- for (const dependency of operatorList.dependencies) {
4134
- type3Dependencies.add(dependency);
4135
- }
4136
- })
4137
- .catch(function (reason) {
4138
- warn(`Type3 font resource "${key}" is not available.`);
4139
- const dummyOperatorList = new OperatorList();
4140
- charProcOperatorList[key] = dummyOperatorList.getIR();
4141
- });
4142
- });
4143
- }
4144
- this.type3Loaded = loadCharProcsPromise.then(() => {
4145
- translatedFont.charProcOperatorList = charProcOperatorList;
4146
- if (this._bbox) {
4147
- translatedFont.isCharBBox = true;
4148
- translatedFont.bbox = this._bbox;
4149
- }
4150
- });
4151
- return this.type3Loaded;
4152
- }
4153
-
4154
- /**
4155
- * @private
4156
- */
4157
- _removeType3ColorOperators(operatorList, isEmptyBBox = false) {
4158
- if (
4159
- typeof PDFJSDev === "undefined" ||
4160
- PDFJSDev.test("!PRODUCTION || TESTING")
4161
- ) {
4162
- assert(
4163
- operatorList.fnArray[0] === OPS.setCharWidthAndBounds,
4164
- "Type3 glyph shall start with the d1 operator."
4165
- );
4166
- }
4167
- if (isEmptyBBox) {
4168
- if (!this._bbox) {
4169
- this._bbox = [Infinity, Infinity, -Infinity, -Infinity];
4170
- }
4171
- const charBBox = Util.normalizeRect(operatorList.argsArray[0].slice(2));
4172
-
4173
- this._bbox[0] = Math.min(this._bbox[0], charBBox[0]);
4174
- this._bbox[1] = Math.min(this._bbox[1], charBBox[1]);
4175
- this._bbox[2] = Math.max(this._bbox[2], charBBox[2]);
4176
- this._bbox[3] = Math.max(this._bbox[3], charBBox[3]);
4177
- }
4178
- let i = 1,
4179
- ii = operatorList.length;
4180
- while (i < ii) {
4181
- switch (operatorList.fnArray[i]) {
4182
- case OPS.setStrokeColorSpace:
4183
- case OPS.setFillColorSpace:
4184
- case OPS.setStrokeColor:
4185
- case OPS.setStrokeColorN:
4186
- case OPS.setFillColor:
4187
- case OPS.setFillColorN:
4188
- case OPS.setStrokeGray:
4189
- case OPS.setFillGray:
4190
- case OPS.setStrokeRGBColor:
4191
- case OPS.setFillRGBColor:
4192
- case OPS.setStrokeCMYKColor:
4193
- case OPS.setFillCMYKColor:
4194
- case OPS.shadingFill:
4195
- case OPS.setRenderingIntent:
4196
- operatorList.fnArray.splice(i, 1);
4197
- operatorList.argsArray.splice(i, 1);
4198
- ii--;
4199
- continue;
4200
-
4201
- case OPS.setGState:
4202
- const [gStateObj] = operatorList.argsArray[i];
4203
- let j = 0,
4204
- jj = gStateObj.length;
4205
- while (j < jj) {
4206
- const [gStateKey] = gStateObj[j];
4207
- switch (gStateKey) {
4208
- case "TR":
4209
- case "TR2":
4210
- case "HT":
4211
- case "BG":
4212
- case "BG2":
4213
- case "UCR":
4214
- case "UCR2":
4215
- gStateObj.splice(j, 1);
4216
- jj--;
4217
- continue;
4218
- }
4219
- j++;
4220
- }
4221
- break;
4222
- }
4223
- i++;
4224
- }
4225
- }
4226
- }
4227
-
4228
- class StateManager {
4229
- constructor(initialState = new EvalState()) {
4230
- this.state = initialState;
4231
- this.stateStack = [];
4232
- }
4233
-
4234
- save() {
4235
- const old = this.state;
4236
- this.stateStack.push(this.state);
4237
- this.state = old.clone();
4238
- }
4239
-
4240
- restore() {
4241
- const prev = this.stateStack.pop();
4242
- if (prev) {
4243
- this.state = prev;
4244
- }
4245
- }
4246
-
4247
- transform(args) {
4248
- this.state.ctm = Util.transform(this.state.ctm, args);
4249
- }
4250
- }
4251
-
4252
- class TextState {
4253
- constructor() {
4254
- this.ctm = new Float32Array(IDENTITY_MATRIX);
4255
- this.fontName = null;
4256
- this.fontSize = 0;
4257
- this.font = null;
4258
- this.fontMatrix = FONT_IDENTITY_MATRIX;
4259
- this.textMatrix = IDENTITY_MATRIX.slice();
4260
- this.textLineMatrix = IDENTITY_MATRIX.slice();
4261
- this.charSpacing = 0;
4262
- this.wordSpacing = 0;
4263
- this.leading = 0;
4264
- this.textHScale = 1;
4265
- this.textRise = 0;
4266
- }
4267
-
4268
- setTextMatrix(a, b, c, d, e, f) {
4269
- const m = this.textMatrix;
4270
- m[0] = a;
4271
- m[1] = b;
4272
- m[2] = c;
4273
- m[3] = d;
4274
- m[4] = e;
4275
- m[5] = f;
4276
- }
4277
-
4278
- setTextLineMatrix(a, b, c, d, e, f) {
4279
- const m = this.textLineMatrix;
4280
- m[0] = a;
4281
- m[1] = b;
4282
- m[2] = c;
4283
- m[3] = d;
4284
- m[4] = e;
4285
- m[5] = f;
4286
- }
4287
-
4288
- translateTextMatrix(x, y) {
4289
- const m = this.textMatrix;
4290
- m[4] = m[0] * x + m[2] * y + m[4];
4291
- m[5] = m[1] * x + m[3] * y + m[5];
4292
- }
4293
-
4294
- translateTextLineMatrix(x, y) {
4295
- const m = this.textLineMatrix;
4296
- m[4] = m[0] * x + m[2] * y + m[4];
4297
- m[5] = m[1] * x + m[3] * y + m[5];
4298
- }
4299
-
4300
- carriageReturn() {
4301
- this.translateTextLineMatrix(0, -this.leading);
4302
- this.textMatrix = this.textLineMatrix.slice();
4303
- }
4304
-
4305
- clone() {
4306
- const clone = Object.create(this);
4307
- clone.textMatrix = this.textMatrix.slice();
4308
- clone.textLineMatrix = this.textLineMatrix.slice();
4309
- clone.fontMatrix = this.fontMatrix.slice();
4310
- return clone;
4311
- }
4312
- }
4313
-
4314
- class EvalState {
4315
- constructor() {
4316
- this.ctm = new Float32Array(IDENTITY_MATRIX);
4317
- this.font = null;
4318
- this.textRenderingMode = TextRenderingMode.FILL;
4319
- this.fillColorSpace = ColorSpace.singletons.gray;
4320
- this.strokeColorSpace = ColorSpace.singletons.gray;
4321
- }
4322
-
4323
- clone() {
4324
- return Object.create(this);
4325
- }
4326
- }
4327
-
4328
- class EvaluatorPreprocessor {
4329
- static get opMap() {
4330
- // Specifies properties for each command
4331
- //
4332
- // If variableArgs === true: [0, `numArgs`] expected
4333
- // If variableArgs === false: exactly `numArgs` expected
4334
- const getOPMap = getLookupTableFactory(function (t) {
4335
- // Graphic state
4336
- t.w = { id: OPS.setLineWidth, numArgs: 1, variableArgs: false };
4337
- t.J = { id: OPS.setLineCap, numArgs: 1, variableArgs: false };
4338
- t.j = { id: OPS.setLineJoin, numArgs: 1, variableArgs: false };
4339
- t.M = { id: OPS.setMiterLimit, numArgs: 1, variableArgs: false };
4340
- t.d = { id: OPS.setDash, numArgs: 2, variableArgs: false };
4341
- t.ri = { id: OPS.setRenderingIntent, numArgs: 1, variableArgs: false };
4342
- t.i = { id: OPS.setFlatness, numArgs: 1, variableArgs: false };
4343
- t.gs = { id: OPS.setGState, numArgs: 1, variableArgs: false };
4344
- t.q = { id: OPS.save, numArgs: 0, variableArgs: false };
4345
- t.Q = { id: OPS.restore, numArgs: 0, variableArgs: false };
4346
- t.cm = { id: OPS.transform, numArgs: 6, variableArgs: false };
4347
-
4348
- // Path
4349
- t.m = { id: OPS.moveTo, numArgs: 2, variableArgs: false };
4350
- t.l = { id: OPS.lineTo, numArgs: 2, variableArgs: false };
4351
- t.c = { id: OPS.curveTo, numArgs: 6, variableArgs: false };
4352
- t.v = { id: OPS.curveTo2, numArgs: 4, variableArgs: false };
4353
- t.y = { id: OPS.curveTo3, numArgs: 4, variableArgs: false };
4354
- t.h = { id: OPS.closePath, numArgs: 0, variableArgs: false };
4355
- t.re = { id: OPS.rectangle, numArgs: 4, variableArgs: false };
4356
- t.S = { id: OPS.stroke, numArgs: 0, variableArgs: false };
4357
- t.s = { id: OPS.closeStroke, numArgs: 0, variableArgs: false };
4358
- t.f = { id: OPS.fill, numArgs: 0, variableArgs: false };
4359
- t.F = { id: OPS.fill, numArgs: 0, variableArgs: false };
4360
- t["f*"] = { id: OPS.eoFill, numArgs: 0, variableArgs: false };
4361
- t.B = { id: OPS.fillStroke, numArgs: 0, variableArgs: false };
4362
- t["B*"] = { id: OPS.eoFillStroke, numArgs: 0, variableArgs: false };
4363
- t.b = { id: OPS.closeFillStroke, numArgs: 0, variableArgs: false };
4364
- t["b*"] = { id: OPS.closeEOFillStroke, numArgs: 0, variableArgs: false };
4365
- t.n = { id: OPS.endPath, numArgs: 0, variableArgs: false };
4366
-
4367
- // Clipping
4368
- t.W = { id: OPS.clip, numArgs: 0, variableArgs: false };
4369
- t["W*"] = { id: OPS.eoClip, numArgs: 0, variableArgs: false };
4370
-
4371
- // Text
4372
- t.BT = { id: OPS.beginText, numArgs: 0, variableArgs: false };
4373
- t.ET = { id: OPS.endText, numArgs: 0, variableArgs: false };
4374
- t.Tc = { id: OPS.setCharSpacing, numArgs: 1, variableArgs: false };
4375
- t.Tw = { id: OPS.setWordSpacing, numArgs: 1, variableArgs: false };
4376
- t.Tz = { id: OPS.setHScale, numArgs: 1, variableArgs: false };
4377
- t.TL = { id: OPS.setLeading, numArgs: 1, variableArgs: false };
4378
- t.Tf = { id: OPS.setFont, numArgs: 2, variableArgs: false };
4379
- t.Tr = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false };
4380
- t.Ts = { id: OPS.setTextRise, numArgs: 1, variableArgs: false };
4381
- t.Td = { id: OPS.moveText, numArgs: 2, variableArgs: false };
4382
- t.TD = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false };
4383
- t.Tm = { id: OPS.setTextMatrix, numArgs: 6, variableArgs: false };
4384
- t["T*"] = { id: OPS.nextLine, numArgs: 0, variableArgs: false };
4385
- t.Tj = { id: OPS.showText, numArgs: 1, variableArgs: false };
4386
- t.TJ = { id: OPS.showSpacedText, numArgs: 1, variableArgs: false };
4387
- t["'"] = { id: OPS.nextLineShowText, numArgs: 1, variableArgs: false };
4388
- t['"'] = {
4389
- id: OPS.nextLineSetSpacingShowText,
4390
- numArgs: 3,
4391
- variableArgs: false,
4392
- };
4393
-
4394
- // Type3 fonts
4395
- t.d0 = { id: OPS.setCharWidth, numArgs: 2, variableArgs: false };
4396
- t.d1 = {
4397
- id: OPS.setCharWidthAndBounds,
4398
- numArgs: 6,
4399
- variableArgs: false,
4400
- };
4401
-
4402
- // Color
4403
- t.CS = { id: OPS.setStrokeColorSpace, numArgs: 1, variableArgs: false };
4404
- t.cs = { id: OPS.setFillColorSpace, numArgs: 1, variableArgs: false };
4405
- t.SC = { id: OPS.setStrokeColor, numArgs: 4, variableArgs: true };
4406
- t.SCN = { id: OPS.setStrokeColorN, numArgs: 33, variableArgs: true };
4407
- t.sc = { id: OPS.setFillColor, numArgs: 4, variableArgs: true };
4408
- t.scn = { id: OPS.setFillColorN, numArgs: 33, variableArgs: true };
4409
- t.G = { id: OPS.setStrokeGray, numArgs: 1, variableArgs: false };
4410
- t.g = { id: OPS.setFillGray, numArgs: 1, variableArgs: false };
4411
- t.RG = { id: OPS.setStrokeRGBColor, numArgs: 3, variableArgs: false };
4412
- t.rg = { id: OPS.setFillRGBColor, numArgs: 3, variableArgs: false };
4413
- t.K = { id: OPS.setStrokeCMYKColor, numArgs: 4, variableArgs: false };
4414
- t.k = { id: OPS.setFillCMYKColor, numArgs: 4, variableArgs: false };
4415
-
4416
- // Shading
4417
- t.sh = { id: OPS.shadingFill, numArgs: 1, variableArgs: false };
4418
-
4419
- // Images
4420
- t.BI = { id: OPS.beginInlineImage, numArgs: 0, variableArgs: false };
4421
- t.ID = { id: OPS.beginImageData, numArgs: 0, variableArgs: false };
4422
- t.EI = { id: OPS.endInlineImage, numArgs: 1, variableArgs: false };
4423
-
4424
- // XObjects
4425
- t.Do = { id: OPS.paintXObject, numArgs: 1, variableArgs: false };
4426
- t.MP = { id: OPS.markPoint, numArgs: 1, variableArgs: false };
4427
- t.DP = { id: OPS.markPointProps, numArgs: 2, variableArgs: false };
4428
- t.BMC = { id: OPS.beginMarkedContent, numArgs: 1, variableArgs: false };
4429
- t.BDC = {
4430
- id: OPS.beginMarkedContentProps,
4431
- numArgs: 2,
4432
- variableArgs: false,
4433
- };
4434
- t.EMC = { id: OPS.endMarkedContent, numArgs: 0, variableArgs: false };
4435
-
4436
- // Compatibility
4437
- t.BX = { id: OPS.beginCompat, numArgs: 0, variableArgs: false };
4438
- t.EX = { id: OPS.endCompat, numArgs: 0, variableArgs: false };
4439
-
4440
- // (reserved partial commands for the lexer)
4441
- t.BM = null;
4442
- t.BD = null;
4443
- t.true = null;
4444
- t.fa = null;
4445
- t.fal = null;
4446
- t.fals = null;
4447
- t.false = null;
4448
- t.nu = null;
4449
- t.nul = null;
4450
- t.null = null;
4451
- });
4452
-
4453
- return shadow(this, "opMap", getOPMap());
4454
- }
4455
-
4456
- static get MAX_INVALID_PATH_OPS() {
4457
- return shadow(this, "MAX_INVALID_PATH_OPS", 20);
4458
- }
4459
-
4460
- constructor(stream, xref, stateManager = new StateManager()) {
4461
- // TODO(mduan): pass array of knownCommands rather than this.opMap
4462
- // dictionary
4463
- this.parser = new Parser({
4464
- lexer: new Lexer(stream, EvaluatorPreprocessor.opMap),
4465
- xref,
4466
- });
4467
- this.stateManager = stateManager;
4468
- this.nonProcessedArgs = [];
4469
- this._numInvalidPathOPS = 0;
4470
- }
4471
-
4472
- get savedStatesDepth() {
4473
- return this.stateManager.stateStack.length;
4474
- }
4475
-
4476
- // |operation| is an object with two fields:
4477
- //
4478
- // - |fn| is an out param.
4479
- //
4480
- // - |args| is an inout param. On entry, it should have one of two values.
4481
- //
4482
- // - An empty array. This indicates that the caller is providing the
4483
- // array in which the args will be stored in. The caller should use
4484
- // this value if it can reuse a single array for each call to read().
4485
- //
4486
- // - |null|. This indicates that the caller needs this function to create
4487
- // the array in which any args are stored in. If there are zero args,
4488
- // this function will leave |operation.args| as |null| (thus avoiding
4489
- // allocations that would occur if we used an empty array to represent
4490
- // zero arguments). Otherwise, it will replace |null| with a new array
4491
- // containing the arguments. The caller should use this value if it
4492
- // cannot reuse an array for each call to read().
4493
- //
4494
- // These two modes are present because this function is very hot and so
4495
- // avoiding allocations where possible is worthwhile.
4496
- //
4497
- read(operation) {
4498
- let args = operation.args;
4499
- while (true) {
4500
- const obj = this.parser.getObj();
4501
- if (obj instanceof Cmd) {
4502
- const cmd = obj.cmd;
4503
- // Check that the command is valid
4504
- const opSpec = EvaluatorPreprocessor.opMap[cmd];
4505
- if (!opSpec) {
4506
- warn(`Unknown command "${cmd}".`);
4507
- continue;
4508
- }
4509
-
4510
- const fn = opSpec.id;
4511
- const numArgs = opSpec.numArgs;
4512
- let argsLength = args !== null ? args.length : 0;
4513
-
4514
- if (!opSpec.variableArgs) {
4515
- // Postscript commands can be nested, e.g. /F2 /GS2 gs 5.711 Tf
4516
- if (argsLength !== numArgs) {
4517
- const nonProcessedArgs = this.nonProcessedArgs;
4518
- while (argsLength > numArgs) {
4519
- nonProcessedArgs.push(args.shift());
4520
- argsLength--;
4521
- }
4522
- while (argsLength < numArgs && nonProcessedArgs.length !== 0) {
4523
- if (args === null) {
4524
- args = [];
4525
- }
4526
- args.unshift(nonProcessedArgs.pop());
4527
- argsLength++;
4528
- }
4529
- }
4530
-
4531
- if (argsLength < numArgs) {
4532
- const partialMsg =
4533
- `command ${cmd}: expected ${numArgs} args, ` +
4534
- `but received ${argsLength} args.`;
4535
-
4536
- // Incomplete path operators, in particular, can result in fairly
4537
- // chaotic rendering artifacts. Hence the following heuristics is
4538
- // used to error, rather than just warn, once a number of invalid
4539
- // path operators have been encountered (fixes bug1443140.pdf).
4540
- if (
4541
- fn >= OPS.moveTo &&
4542
- fn <= OPS.endPath && // Path operator
4543
- ++this._numInvalidPathOPS >
4544
- EvaluatorPreprocessor.MAX_INVALID_PATH_OPS
4545
- ) {
4546
- throw new FormatError(`Invalid ${partialMsg}`);
4547
- }
4548
- // If we receive too few arguments, it's not possible to execute
4549
- // the command, hence we skip the command.
4550
- warn(`Skipping ${partialMsg}`);
4551
- if (args !== null) {
4552
- args.length = 0;
4553
- }
4554
- continue;
4555
- }
4556
- } else if (argsLength > numArgs) {
4557
- info(
4558
- `Command ${cmd}: expected [0, ${numArgs}] args, ` +
4559
- `but received ${argsLength} args.`
4560
- );
4561
- }
4562
-
4563
- // TODO figure out how to type-check vararg functions
4564
- this.preprocessCommand(fn, args);
4565
-
4566
- operation.fn = fn;
4567
- operation.args = args;
4568
- return true;
4569
- }
4570
- if (obj === EOF) {
4571
- return false; // no more commands
4572
- }
4573
- // argument
4574
- if (obj !== null) {
4575
- if (args === null) {
4576
- args = [];
4577
- }
4578
- args.push(obj);
4579
- if (args.length > 33) {
4580
- throw new FormatError("Too many arguments");
4581
- }
4582
- }
4583
- }
4584
- }
4585
-
4586
- preprocessCommand(fn, args) {
4587
- switch (fn | 0) {
4588
- case OPS.save:
4589
- this.stateManager.save();
4590
- break;
4591
- case OPS.restore:
4592
- this.stateManager.restore();
4593
- break;
4594
- case OPS.transform:
4595
- this.stateManager.transform(args);
4596
- break;
4597
- }
4598
- }
4599
- }
4600
-
4601
- export { EvaluatorPreprocessor, PartialEvaluator };