@syncfusion/ej2-pdf-data-extract 30.1.42 → 30.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/ej2-pdf-data-extract.umd.min.js +1 -2
  2. package/dist/global/ej2-pdf-data-extract.min.js +1 -2
  3. package/dist/global/index.d.ts +1 -2
  4. package/package.json +14 -46
  5. package/dist/ts/index.d.ts +0 -20
  6. package/dist/ts/index.ts +0 -20
  7. package/dist/ts/pdf-data-extract/core/content-parser-helper.d.ts +0 -62
  8. package/dist/ts/pdf-data-extract/core/content-parser-helper.ts +0 -640
  9. package/dist/ts/pdf-data-extract/core/enum.d.ts +0 -6
  10. package/dist/ts/pdf-data-extract/core/enum.ts +0 -6
  11. package/dist/ts/pdf-data-extract/core/graphic-state.d.ts +0 -33
  12. package/dist/ts/pdf-data-extract/core/graphic-state.ts +0 -106
  13. package/dist/ts/pdf-data-extract/core/pdf-data-extractor.d.ts +0 -210
  14. package/dist/ts/pdf-data-extract/core/pdf-data-extractor.ts +0 -977
  15. package/dist/ts/pdf-data-extract/core/pdf-text-parser.d.ts +0 -67
  16. package/dist/ts/pdf-data-extract/core/pdf-text-parser.ts +0 -495
  17. package/dist/ts/pdf-data-extract/core/redaction/index.d.ts +0 -4
  18. package/dist/ts/pdf-data-extract/core/redaction/index.ts +0 -4
  19. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.d.ts +0 -55
  20. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.ts +0 -592
  21. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.d.ts +0 -281
  22. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.ts +0 -342
  23. package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.d.ts +0 -129
  24. package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.ts +0 -322
  25. package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.d.ts +0 -12
  26. package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.ts +0 -153
  27. package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.d.ts +0 -24
  28. package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.ts +0 -281
  29. package/dist/ts/pdf-data-extract/core/text-extraction/cmap.d.ts +0 -50
  30. package/dist/ts/pdf-data-extract/core/text-extraction/cmap.ts +0 -565
  31. package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.d.ts +0 -191
  32. package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.ts +0 -1928
  33. package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.d.ts +0 -102
  34. package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.ts +0 -5780
  35. package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.d.ts +0 -167
  36. package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.ts +0 -1842
  37. package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.d.ts +0 -5
  38. package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.ts +0 -16
  39. package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.d.ts +0 -18
  40. package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.ts +0 -630
  41. package/dist/ts/pdf-data-extract/core/text-extraction/glyph.d.ts +0 -93
  42. package/dist/ts/pdf-data-extract/core/text-extraction/glyph.ts +0 -622
  43. package/dist/ts/pdf-data-extract/core/text-extraction/index.d.ts +0 -10
  44. package/dist/ts/pdf-data-extract/core/text-extraction/index.ts +0 -10
  45. package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.d.ts +0 -38
  46. package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.ts +0 -150
  47. package/dist/ts/pdf-data-extract/core/text-extraction/metrics.d.ts +0 -16
  48. package/dist/ts/pdf-data-extract/core/text-extraction/metrics.ts +0 -2938
  49. package/dist/ts/pdf-data-extract/core/text-structure.d.ts +0 -628
  50. package/dist/ts/pdf-data-extract/core/text-structure.ts +0 -668
  51. package/dist/ts/pdf-data-extract/core/utils.d.ts +0 -99
  52. package/dist/ts/pdf-data-extract/core/utils.ts +0 -626
  53. package/dist/ts/pdf-data-extract/index.d.ts +0 -20
  54. package/dist/ts/pdf-data-extract/index.ts +0 -20
@@ -1,626 +0,0 @@
1
- import {_ContentParser, _PdfBaseStream, _PdfContentStream, _PdfCrossReference, _PdfDictionary, _PdfRecord, _PdfReference, PdfPage } from '@syncfusion/ej2-pdf';
2
- import { _FontStructure } from './text-extraction/font-structure';
3
- import { _GraphicState, _TextState } from './graphic-state';
4
- import { _TextProcessingMode } from './enum';
5
- import { _PdfContentParserHelper } from './content-parser-helper';
6
- import { PdfDataExtractor } from './pdf-data-extractor';
7
-
8
- /**
9
- * Removes escape sequences from a text string and returns the cleaned text.
10
- *
11
- * @param {string} text - The string to process.
12
- * @returns {string} The processed string without escape sequences.
13
- */
14
- export function _ignoreEscapeSequence(text: string): string {
15
- let index: number = -1;
16
- do {
17
- index = text.indexOf('\\', index + 1);
18
- if (index >= 0) {
19
- if (text.length > index + 1) {
20
- const nextLiteral: string = text[index + 1];
21
- if (nextLiteral === '\\' || nextLiteral === '(' || nextLiteral === ')') {
22
- text = text.slice(0, index) + text.slice(index + 1);
23
- }
24
- } else {
25
- text = text.slice(0, index) + text.slice(index + 1);
26
- index = -1;
27
- }
28
- }
29
- } while (index >= 0);
30
- return text;
31
- }
32
- /**
33
- * Adds font resources from a PDF dictionary into a collection.
34
- *
35
- * @param {_PdfDictionary} dictionary - PDF Dictionary containing font resources.
36
- * @param {_PdfCrossReference} crossReference - The cross-reference of the PDF document.
37
- * @returns {Map<string, _FontStructure>} A map of font structure objects.
38
- */
39
- export function _addFontResources(dictionary: _PdfDictionary, crossReference: _PdfCrossReference): Map<string, _FontStructure> {
40
- const font: _PdfDictionary = dictionary.get('Font');
41
- const fontCollection: Map<string, _FontStructure> = new Map<string, _FontStructure>();
42
- if (typeof(font) !== 'undefined' && font !== null) {
43
- font.forEach((key: any, value: any) => { //eslint-disable-line
44
- const fontDictionary: _PdfDictionary = crossReference._fetch(value);
45
- const fontstruct: _FontStructure = new _FontStructure(fontDictionary, crossReference);
46
- fontCollection.set(key.toString(), fontstruct);
47
- });
48
- }
49
- return fontCollection;
50
- }
51
- /**
52
- * Retrieves XObject resources from a PDF dictionary and their associated cross-references.
53
- *
54
- * @param {_PdfDictionary} resources - The resources dictionary from a PDF page.
55
- * @param {_PdfCrossReference} crossReference - The cross-reference of the PDF document.
56
- * @returns {Map<string, any>} A map of XObject resources.
57
- */
58
- export function _getXObjectResources(resources: _PdfDictionary, crossReference: _PdfCrossReference): Map<string, any> { //eslint-disable-line
59
- const xObjectCollection: Map<string, any> = new Map<string, any>(); //eslint-disable-line
60
- if (resources && resources.has('XObject')) {
61
- const xObjects: _PdfDictionary = resources.get('XObject') as _PdfDictionary;
62
- xObjects.forEach((key: any, value: any) => { //eslint-disable-line
63
- if (value instanceof _PdfReference) {
64
- const xobject: _PdfDictionary = crossReference._fetch(value) as _PdfDictionary;
65
- if (xobject instanceof _PdfBaseStream && xobject.dictionary.has('Subtype') && xobject.dictionary.get('Subtype').name === 'Form') {
66
- xObjectCollection.set(key, xobject);
67
- }
68
- }
69
- });
70
- }
71
- return xObjectCollection;
72
- }
73
- /**
74
- * Converts a hexadecimal string to its equivalent character representation.
75
- *
76
- * @param {string} hex - The hexadecimal string to convert.
77
- * @returns {string} The resulting string of characters.
78
- */
79
- export function _hexToChar(hex: string): string {
80
- if (hex.startsWith('0x')) {
81
- hex = hex.slice(2);
82
- }
83
- hex = hex.replace(/\s+/g, '');
84
- let result: string = '';
85
- for (let i: number = 0; i < hex.length; i += 2) {
86
- const byte: string = hex.slice(i, i + 2);
87
- const charCode: number = parseInt(byte, 16);
88
- result += String.fromCharCode(charCode);
89
- }
90
- return result;
91
- }
92
- /**
93
- * Skips recognized escape sequences in a text string.
94
- *
95
- * @param {string} text - The string containing escape sequences.
96
- * @returns {string} A new string with escape sequences removed or handled appropriately.
97
- */
98
- export function _skipEscapeSequence(text: string): string {
99
- let isDefault: boolean = false;
100
- let replaceText: string = text;
101
- let isAlphabetic: boolean = false;
102
- while (replaceText.indexOf('\\') !== -1) {
103
- isAlphabetic = false;
104
- const i: number = replaceText.indexOf('\\');
105
- if (i + 1 !== replaceText.length) {
106
- const escapeSequence: string = replaceText.substring(i + 1, i + 2);
107
- switch (escapeSequence) {
108
- case 'a':
109
- text = text.replace(/\\a/g, '\x07');
110
- break;
111
- case '(':
112
- text = text.replace(/\\\(/g, '(');
113
- break;
114
- case ')':
115
- text = text.replace(/\\\)/g, ')');
116
- break;
117
- case 'b':
118
- text = text.replace(/\\b/g, '\b');
119
- break;
120
- case 'e':
121
- text = text.replace(/\\e/g, '\\e');
122
- isAlphabetic = true;
123
- break;
124
- case 'f':
125
- text = text.replace(/\\f/g, '\f');
126
- break;
127
- case 'n':
128
- text = text.replace(/\\n/g, '\n');
129
- break;
130
- case 'r':
131
- text = text.replace(/\\r/g, '\r');
132
- break;
133
- case 't':
134
- text = text.replace(/\\t/g, '\t');
135
- break;
136
- case 'v':
137
- text = text.replace(/\\v/g, '\v');
138
- break;
139
- case "'": //eslint-disable-line
140
- text = text.replace(/\\'/g, "'"); //eslint-disable-line
141
- break;
142
- default: {
143
- const charCode: number = escapeSequence.charCodeAt(0);
144
- if (charCode === 3) {
145
- text = text.replace(/\\/g, '\\');
146
- isDefault = true;
147
- } else if (charCode >= 127) {
148
- text = text.replace(/\\/g, '');
149
- isDefault = true;
150
- } else if ((charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122)) {
151
- replaceText = replaceText.slice(0, i) + replaceText.slice(i + 1);
152
- isAlphabetic = true;
153
- } else {
154
- let isUnrecognizedEscapeSequence: boolean = false;
155
- if (text.includes('\\')) {
156
- for (let c: number = 0; c < text.length - 1; c++) {
157
- const nextChar: string = text[c + 1];
158
- if (text[Number.parseInt(c.toString(), 10)] === '\\' && text[c + 1] === '\\') {
159
- c++;
160
- } else if (text[Number.parseInt(c.toString(), 10)] === '\\' && /[A-Zdghijklmopqswyz]/.test(nextChar)) {
161
- isUnrecognizedEscapeSequence = true;
162
- break;
163
- }
164
- }
165
- }
166
- if (isUnrecognizedEscapeSequence) {
167
- text = text.replace(/\\/g, '');
168
- } else {
169
- text = _parseEscapedText(text);
170
- }
171
- isDefault = true;
172
- }
173
- break;
174
- }
175
- }
176
- if (isDefault) {
177
- break;
178
- } else if (!isAlphabetic){
179
- replaceText = text;
180
- }
181
- } else {
182
- break;
183
- }
184
- }
185
- return text;
186
- }
187
- /**
188
- * Converts escape sequences in a string to their corresponding literal characters.
189
- *
190
- * @param {string} text The input string containing escape sequences.
191
- *
192
- * @returns {string} The parsed string with escape sequences replaced by literal characters.
193
- */
194
- export function _parseEscapedText(text: string): string {
195
- return text
196
- .replace(/\\n/g, '\n')
197
- .replace(/\\r/g, '\r')
198
- .replace(/\\t/g, '\t')
199
- .replace(/\\"/g, '\"') // eslint-disable-line
200
- .replace(/\\'/g, "'") // eslint-disable-line
201
- .replace(/\\</g, '<')
202
- .replace(/\\>/g, '>')
203
- .replace(/\\\(/g, '(')
204
- .replace(/\\\)/g, ')')
205
- .replace(/\\\{/g, '{')
206
- .replace(/\\\}/g, '}')
207
- .replace(/\\\[/g, '[')
208
- .replace(/\\\]/g, ']')
209
- .replace(/\\\|/g, '|')
210
- .replace(/\\\*/g, '*')
211
- .replace(/\\\?/g, '?')
212
- .replace(/\\\-/g, "-") // eslint-disable-line
213
- .replace(/\\\+/g, '+')
214
- .replace(/\\\./g, '.')
215
- .replace(/\\\//g, '/')
216
- .replace(/\\,/g, ',')
217
- .replace(/\\:/g, ':')
218
- .replace(/\\;/g, ';')
219
- .replace(/\\=/g, '=')
220
- .replace(/\\&/g, '&')
221
- .replace(/\\%/g, '%')
222
- .replace(/\\#/g, '#')
223
- .replace(/\\!/g, '!')
224
- .replace(/\\u([0-9A-Fa-f]{4})/g, (_, p1) => String.fromCharCode(parseInt(p1, 16))) // eslint-disable-line
225
- .replace(/\\\\/g, '\\');
226
- }
227
- /**
228
- * Retrieves a literal string, decoding escape sequences and null characters.
229
- *
230
- * @param {string} encodedText - The encoded string to decode.
231
- * @param {string} [encoding] - The encoding used in the text.
232
- * @returns {string} The decoded literal string.
233
- */
234
- export function _getLiteralString(encodedText: string, encoding?: string): string {
235
- let decodedText: string = encodedText;
236
- let octalIndex: number = -1;
237
- let limit: number = 3;
238
- let temp: string = '';
239
- while ((decodedText.indexOf('\\') !== -1 && (decodedText.indexOf('\\\\') === -1)) ||
240
- decodedText.indexOf('\0') ||
241
- ((decodedText.indexOf('\\\\') !== -1) && encoding === 'Encoding')) {
242
- let octalText: string = '';
243
- if (decodedText.indexOf('\\', octalIndex + 1) >= 0) {
244
- const nullPosition: number = decodedText.indexOf('\0', octalIndex + 1);
245
- octalIndex = decodedText.indexOf('\\', octalIndex + 1);
246
- if (nullPosition > -1 && octalIndex > nullPosition) {
247
- octalIndex = nullPosition;
248
- limit = 2;
249
- }
250
- } else {
251
- octalIndex = decodedText.indexOf('\0', octalIndex + 1);
252
- if (octalIndex < 0) {
253
- break;
254
- }
255
- limit = 2;
256
- }
257
- for (let i: number = octalIndex + 1; i <= octalIndex + limit; i++) {
258
- if (i < decodedText.length) {
259
- const val: number = parseInt(decodedText[Number.parseInt(i.toString(), 10)], 10);
260
- if (!isNaN(val) && val <= 8) {
261
- octalText += decodedText[Number.parseInt(i.toString(), 10)];
262
- } else {
263
- octalText = '';
264
- break;
265
- }
266
- } else {
267
- octalText = '';
268
- }
269
- }
270
- if (octalText !== '') {
271
- const decimalValue: number = parseInt(octalText, 8);
272
- temp = String.fromCharCode(decimalValue);
273
- decodedText = decodedText.substring(0, octalIndex) + decodedText.substring(octalIndex + limit + 1);
274
- decodedText = decodedText.substring(0, octalIndex) + temp + decodedText.substring(octalIndex);
275
- }
276
- }
277
- if (decodedText.indexOf('\\') !== -1 && encoding === 'Encoding') {
278
- if (decodedText.indexOf('\\\\') === -1) {
279
- let initialLength: number = 0;
280
- while ((decodedText.indexOf('\\') === -1 && decodedText.indexOf('\\') !== -1) && (decodedText.length !== initialLength)) {
281
- initialLength = decodedText.length;
282
- decodedText = _skipEscapeSequence(decodedText);
283
- }
284
- }
285
- }
286
- return decodedText;
287
- }
288
- /**
289
- * Decodes encoded text using the specified font structure, adjusting for encoding differences.
290
- *
291
- * @param {string} encodedText - The encoded string to decode.
292
- * @param {_FontStructure} font - The font structure for decoding glyphs.
293
- * @param {string[]} inputText - An array of strings representing parts of the text.
294
- * @returns {string} The decoded text.
295
- */
296
- export function _decodeEncodedText(encodedText: string, font: _FontStructure, inputText: string[]): string {
297
- let key: string = '';
298
- let isHex: boolean = false;
299
- let decodedText: any ; // eslint-disable-line
300
- let tempString: string = '';
301
- const decodedList: string[] = [];
302
- const encoding: string = font._encoding;
303
- let splittedText: string[] = inputText;
304
- switch (encodedText[0]) {
305
- case '(':
306
- encodedText = encodedText.substring(1, encodedText.length - 1);
307
- if (encodedText.indexOf('\\\n') !== -1 || encodedText.indexOf('\\(') !== -1 || encodedText.indexOf('\\)') !== -1) {
308
- if (encodedText.indexOf('\\\n') !== -1) {
309
- encodedText = encodedText.replace(/\\\n/g, '');
310
- }
311
- if (encodedText.indexOf('\\(') !== -1) {
312
- encodedText = encodedText.replace(/\\\(/g, '(');
313
- }
314
- if (encodedText.indexOf('\\)') !== -1) {
315
- encodedText = encodedText.replace(/\\\)/g, ')');
316
- }
317
- }
318
- if (encodedText.indexOf('\\n') !== -1) {
319
- encodedText = encodedText.replace(/\\n/g, '\n');
320
- }
321
- if (encodedText.indexOf('\\r') !== -1) {
322
- encodedText = encodedText.replace(/\\r/g, '\r');
323
- }
324
- encodedText = _getLiteralString(encodedText, encoding);
325
- encodedText = _skipEscapeSequence(encodedText);
326
- decodedText = font._charsToGlyphs(encodedText);
327
- for (let i: number = 0; i < decodedText.length; i++) {
328
- key += decodedText[Number.parseInt(i.toString(), 10)]._unicode;
329
- }
330
- decodedText += key;
331
- break;
332
- case '[':
333
- splittedText = inputText;
334
- for (let i: number = 0; i < splittedText.length; i++) {
335
- let input: string = splittedText[Number.parseInt(i.toString(), 10)];
336
- if (input.indexOf('\\\n') !== -1) {
337
- input = input.replace(/\\\n/g, '');
338
- }
339
- if (input[0] === '<') {
340
- isHex = true;
341
- input = input.slice(1, -1);
342
- } else if (input[0] === '(') {
343
- input = input.slice(1, -1);
344
- } else if (input.length > 0){
345
- input = input.replace('\n', '');
346
- decodedList.push(input);
347
- continue;
348
- }
349
- if (isHex) {
350
- tempString = _hexToChar(input);
351
- } else {
352
- tempString = _getLiteralString(input, encoding);
353
- if (tempString.indexOf('\\') !== -1) {
354
- tempString = _skipEscapeSequence(tempString);
355
- }
356
- }
357
- decodedText = font._charsToGlyphs(tempString);
358
- for (let i: number = 0; i < decodedText.length; i++) {
359
- key += decodedText[Number.parseInt(i.toString(), 10)]._unicode;
360
- }
361
- }
362
- break;
363
- case '<':
364
- encodedText = encodedText.substring(1, encodedText.length - 1);
365
- tempString = _hexToChar(encodedText);
366
- decodedText = font._charsToGlyphs(tempString);
367
- for (let i: number = 0; i < decodedText.length; i++) {
368
- key += decodedText[Number.parseInt(i.toString(), 10)]._unicode;
369
- }
370
- decodedText += key;
371
- break;
372
- }
373
- return key;
374
- }
375
- /**
376
- * Retrieves a content stream object for a specified XObject, processing it according to the provided mode.
377
- *
378
- * @param {string[]} xObjectElement - The XObject elements to process.
379
- * @param {PdfPage} page - The PDF page to which the content stream belongs.
380
- * @param {Map<string, any>} xObjectCollection - A collection of XObject elements.
381
- * @param {_PdfContentParserHelper | PdfDataExtractor} data - The data extractor or content parser helper.
382
- * @param {_TextProcessingMode} [mode] - The mode of text processing.
383
- * @param {_GraphicState} [graphicState] - The current graphic state.
384
- * @returns {_PdfContentStream | void} The processed PDF content stream or void.
385
- */
386
- export function _getXObject(xObjectElement: string[], page: PdfPage, xObjectCollection: Map<string, any>, data?: _PdfContentParserHelper | PdfDataExtractor, mode?: _TextProcessingMode, graphicState?: _GraphicState): _PdfContentStream | void { //eslint-disable-line
387
- const xobject: string = xObjectElement[0].replace('/', '');
388
- let array: Uint8Array;
389
- let contentParser: _PdfContentParserHelper;
390
- let extractor: PdfDataExtractor;
391
- if (data instanceof _PdfContentParserHelper) {
392
- contentParser = data;
393
- } else {
394
- extractor = data;
395
- }
396
- if (xObjectCollection.has(xobject)) {
397
- let base: any = xObjectCollection.get(xobject); //eslint-disable-line
398
- if (base) {
399
- if (base instanceof _PdfContentStream) {
400
- array = new Uint8Array(base._bytes);
401
- } else if (base instanceof _PdfBaseStream) {
402
- array = base.getBytes();
403
- }
404
- if (array) {
405
- const parser: _ContentParser = new _ContentParser(array);
406
- const recordCollection: _PdfRecord[] = parser._readContent();
407
- let childFontCollection: Map<string, _FontStructure> = new Map<string, _FontStructure>();
408
- let xObjectCollection: Map<string, any> = new Map<string, any>(); //eslint-disable-line
409
- if (base.dictionary.has('Resources')) {
410
- const childResources: _PdfDictionary = base.dictionary.get('Resources');
411
- childFontCollection = _addFontResources(childResources, childResources._crossReference);
412
- xObjectCollection = _getXObjectResources(childResources, childResources._crossReference);
413
- }
414
- let state: _GraphicState;
415
- if (typeof(mode) !== 'undefined') {
416
- if (base.dictionary.has('Matrix')) {
417
- const currentState: _TextState = graphicState._state._clone();
418
- state = new _GraphicState(currentState);
419
- const matrix: number[] = base.dictionary.get('Matrix');
420
- if (matrix) {
421
- state._transform(matrix);
422
- }
423
- } else {
424
- state = graphicState;
425
- }
426
- }
427
- if (mode === _TextProcessingMode.textLineExtraction || mode === _TextProcessingMode.textExtraction) {
428
- contentParser._processRecordCollection(recordCollection, page, childFontCollection, xObjectCollection, state);
429
- } else if (mode === _TextProcessingMode.redaction) {
430
- const pdfStream: any = contentParser._processRecordCollection(recordCollection, page, // eslint-disable-line
431
- childFontCollection, xObjectCollection, state);
432
- return pdfStream;
433
- } else {
434
- extractor._renderTextAsLayOut(recordCollection, page, childFontCollection, xObjectCollection);
435
- }
436
- }
437
- }
438
- }
439
- }
440
- /**
441
- * Parses encoded text and returns both the decoded string list and width table.
442
- *
443
- * @param {string} encodedText - The encoded text string to be parsed.
444
- * @param {_FontStructure} font - The font structure used to map encoded characters to glyphs.
445
- * @returns {object} An object containing `decodedStrings`, a list of decoded strings, and `charWidths`, a list of character widths.
446
- */
447
- export function _parseEncodedText(encodedText: string, font: _FontStructure): [string[], number[][]] {
448
- const decodedList: string[] = [];
449
- let key: string = '';
450
- let decodedText: any; // eslint-disable-line
451
- let widths: number[] = [];
452
- const widthtable: number[][] = [];
453
- let tempString: string = '';
454
- const isRotatePage: boolean = false;
455
- switch (encodedText[0]) {
456
- case '(':
457
- encodedText = encodedText.substring(1, encodedText.length - 1);
458
- if (encodedText.indexOf('\\\n') !== -1 || encodedText.indexOf('\\(') !== -1 || encodedText.indexOf('\\)') !== -1) {
459
- if (encodedText.indexOf('\\\n') !== -1) {
460
- encodedText = encodedText.replace(/\\\n/g, '');
461
- }
462
- if (encodedText.indexOf('\\(') !== -1) {
463
- encodedText = encodedText.replace(/\\\(/g, '(');
464
- }
465
- if (encodedText.indexOf('\\)') !== -1) {
466
- encodedText = encodedText.replace(/\\\)/g, ')');
467
- }
468
- }
469
- if (encodedText.indexOf('\\n') !== -1) {
470
- encodedText = encodedText.replace(/\\n/g, '\n');
471
- }
472
- if (encodedText.indexOf('\\r') !== -1) {
473
- encodedText = encodedText.replace(/\\r/g, '\r');
474
- }
475
- encodedText = _getLiteralString(encodedText);
476
- encodedText = _skipEscapeSequence(encodedText);
477
-
478
- if (!isRotatePage) {
479
- decodedText = font._charsToGlyphs(encodedText);
480
- for (let i: number = 0; i < decodedText.length; i++) {
481
- if (decodedText[Number.parseInt(i.toString(), 10)]._unicode.length > 1) {
482
- key += decodedText[Number.parseInt(i.toString(), 10)]._unicode;
483
- for (let j: number = 0; j < decodedText[Number.parseInt(i.toString(), 10)]._unicode.length; j++) {
484
- widths.push(decodedText[Number.parseInt(i.toString(), 10)]._width);
485
- }
486
- } else {
487
- key += decodedText[Number.parseInt(i.toString(), 10)]._unicode;
488
- widths.push(decodedText[Number.parseInt(i.toString(), 10)]._width);
489
- }
490
- }
491
- } else {
492
- key = encodedText;
493
- }
494
- key += 's';
495
- decodedList.push(key);
496
- widthtable.push(widths);
497
- break;
498
- case '[':
499
- encodedText = encodedText.replace(/\\\n/g, '');
500
- if (encodedText.length > 1) {
501
- encodedText = encodedText.substring(1, encodedText.length - 1);
502
- }
503
- while (encodedText.length > 0) {
504
- let isHex: boolean = false;
505
- let listElement: string = '';
506
- let textStart: number = encodedText.indexOf('(');
507
- let textEnd: number = encodedText.indexOf(')');
508
- for (let j: number = textEnd + 1; j < encodedText.length; j++) {
509
- if (encodedText[Number.parseInt(j.toString(), 10)] === '(') {
510
- break;
511
- } else if (encodedText[Number.parseInt(j.toString(), 10)] === ')') {
512
- textEnd = j;
513
- break;
514
- }
515
- }
516
- const textHexStart: number = encodedText.indexOf('<');
517
- const textHexEnd: number = encodedText.indexOf('>');
518
- if (textHexStart < textStart && textHexStart > -1) {
519
- textStart = textHexStart;
520
- textEnd = textHexEnd;
521
- isHex = true;
522
- }
523
- if (textStart < 0) {
524
- textStart = encodedText.indexOf('<');
525
- textEnd = encodedText.indexOf('>');
526
- if (textStart >= 0) {
527
- isHex = true;
528
- } else {
529
- decodedList.push(encodedText);
530
- break;
531
- }
532
- }
533
- if (textEnd < 0 && encodedText.length > 0) {
534
- decodedList.push(encodedText);
535
- break;
536
- } else if (textEnd > 0) {
537
- while (encodedText[textEnd - 1] === '\\') {
538
- if (textEnd - 1 > 0 && encodedText[textEnd - 2] === '\\') {
539
- break;
540
- }
541
- const nextEnd: number = encodedText.indexOf(')', textEnd + 1);
542
- if (nextEnd >= 0) {
543
- textEnd = nextEnd;
544
- } else {
545
- break;
546
- }
547
- }
548
- }
549
- if (textStart !== 0) {
550
- listElement = encodedText.substring(0, textStart);
551
- decodedList.push(listElement);
552
- }
553
- let tempString: string = encodedText.substring(textStart + 1, textEnd);
554
- if (isHex) {
555
- tempString = _hexToChar(tempString);
556
- } else {
557
- tempString = _getLiteralString(tempString);
558
- if (tempString.indexOf('\\') !== -1) {
559
- tempString = _skipEscapeSequence(tempString);
560
- }
561
- }
562
- if (!isRotatePage) {
563
- decodedText = font._charsToGlyphs(tempString);
564
- for (let i: number = 0; i < decodedText.length; i++) {
565
- if (decodedText[Number.parseInt(i.toString(), 10)]._unicode.length > 1) {
566
- key += decodedText[Number.parseInt(i.toString(), 10)]._unicode;
567
- for (let j: number = 0; j < decodedText[Number.parseInt(i.toString(), 10)]._unicode.length; j++) {
568
- widths.push(decodedText[Number.parseInt(i.toString(), 10)]._width);
569
- }
570
- } else {
571
- key += decodedText[Number.parseInt(i.toString(), 10)]._unicode;
572
- widths.push(decodedText[Number.parseInt(i.toString(), 10)]._width);
573
- }
574
- }
575
- } else {
576
- key = tempString;
577
- }
578
- key += 's';
579
- decodedList.push(key);
580
- widthtable.push(widths);
581
- key = '';
582
- widths = [];
583
- encodedText = encodedText.substring(textEnd + 1);
584
- }
585
- break;
586
-
587
- case '<':
588
- encodedText = encodedText.substring(1, encodedText.length - 1);
589
- tempString = _hexToChar(encodedText);
590
- if (!isRotatePage) {
591
- decodedText = font._charsToGlyphs(tempString);
592
- for (let i: number = 0; i < decodedText.length; i++) {
593
- key += decodedText[Number.parseInt(i.toString(), 10)]._unicode;
594
- widths.push(decodedText[Number.parseInt(i.toString(), 10)]._width);
595
- }
596
- } else {
597
- key = tempString;
598
- }
599
- key += 's';
600
- decodedList.push(key);
601
- widthtable.push(widths);
602
- break;
603
- }
604
- return [decodedList, widthtable];
605
- }
606
- /**
607
- * Compare two arrays of numbers to determine if they are equal.
608
- *
609
- * This function checks if two arrays have the same length and
610
- * identical elements in the same order.
611
- *
612
- * @param {number[]} arr1 - The first array to compare.
613
- * @param {number[]} arr2 - The second array to compare.
614
- * @returns {boolean} 'true' if the arrays are equal, otherwise 'false'.
615
- */
616
- export function _isArrayEqual(arr1: number[], arr2: number[]): boolean {
617
- if (arr1.length !== arr2.length) {
618
- return false;
619
- }
620
- for (let i: number = 0, ii: number = arr1.length; i < ii; i++) {
621
- if (arr1[Number.parseInt(i.toString(), 10)] !== arr2[Number.parseInt(i.toString(), 10)]) {
622
- return false;
623
- }
624
- }
625
- return true;
626
- }
@@ -1,20 +0,0 @@
1
- export * from './core/redaction/pdf-redaction-processor';
2
- export * from './core/redaction/pdf-redaction-region';
3
- export * from './core/redaction/pdf-redactor';
4
- export * from './core/redaction/text-glyph-mapper';
5
- export * from './core/text-extraction/binary-cmap-reader';
6
- export * from './core/text-extraction/cmap';
7
- export * from './core/text-extraction/compact-font-parser';
8
- export * from './core/text-extraction/encoding-utils';
9
- export * from './core/text-extraction/font-structure';
10
- export * from './core/text-extraction/font-tables';
11
- export * from './core/text-extraction/font-utils';
12
- export * from './core/text-extraction/glyph';
13
- export * from './core/text-extraction/matrix-helper';
14
- export * from './core/text-extraction/metrics';
15
- export * from './core/pdf-data-extractor';
16
- export * from './core/content-parser-helper';
17
- export * from './core/graphic-state';
18
- export * from './core/pdf-text-parser';
19
- export * from './core/text-structure';
20
- export * from './core/utils';
@@ -1,20 +0,0 @@
1
- export * from './core/redaction/pdf-redaction-processor';
2
- export * from './core/redaction/pdf-redaction-region';
3
- export * from './core/redaction/pdf-redactor';
4
- export * from './core/redaction/text-glyph-mapper';
5
- export * from './core/text-extraction/binary-cmap-reader';
6
- export * from './core/text-extraction/cmap';
7
- export * from './core/text-extraction/compact-font-parser';
8
- export * from './core/text-extraction/encoding-utils';
9
- export * from './core/text-extraction/font-structure';
10
- export * from './core/text-extraction/font-tables';
11
- export * from './core/text-extraction/font-utils';
12
- export * from './core/text-extraction/glyph';
13
- export * from './core/text-extraction/matrix-helper';
14
- export * from './core/text-extraction/metrics';
15
- export * from './core/pdf-data-extractor';
16
- export * from './core/content-parser-helper';
17
- export * from './core/graphic-state';
18
- export * from './core/pdf-text-parser';
19
- export * from './core/text-structure';
20
- export * from './core/utils';