@syncfusion/ej2-pdf-data-extract 30.1.42 → 30.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/ej2-pdf-data-extract.umd.min.js +1 -2
  2. package/dist/global/ej2-pdf-data-extract.min.js +1 -2
  3. package/dist/global/index.d.ts +1 -2
  4. package/package.json +14 -46
  5. package/dist/ts/index.d.ts +0 -20
  6. package/dist/ts/index.ts +0 -20
  7. package/dist/ts/pdf-data-extract/core/content-parser-helper.d.ts +0 -62
  8. package/dist/ts/pdf-data-extract/core/content-parser-helper.ts +0 -640
  9. package/dist/ts/pdf-data-extract/core/enum.d.ts +0 -6
  10. package/dist/ts/pdf-data-extract/core/enum.ts +0 -6
  11. package/dist/ts/pdf-data-extract/core/graphic-state.d.ts +0 -33
  12. package/dist/ts/pdf-data-extract/core/graphic-state.ts +0 -106
  13. package/dist/ts/pdf-data-extract/core/pdf-data-extractor.d.ts +0 -210
  14. package/dist/ts/pdf-data-extract/core/pdf-data-extractor.ts +0 -977
  15. package/dist/ts/pdf-data-extract/core/pdf-text-parser.d.ts +0 -67
  16. package/dist/ts/pdf-data-extract/core/pdf-text-parser.ts +0 -495
  17. package/dist/ts/pdf-data-extract/core/redaction/index.d.ts +0 -4
  18. package/dist/ts/pdf-data-extract/core/redaction/index.ts +0 -4
  19. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.d.ts +0 -55
  20. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.ts +0 -592
  21. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.d.ts +0 -281
  22. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.ts +0 -342
  23. package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.d.ts +0 -129
  24. package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.ts +0 -322
  25. package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.d.ts +0 -12
  26. package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.ts +0 -153
  27. package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.d.ts +0 -24
  28. package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.ts +0 -281
  29. package/dist/ts/pdf-data-extract/core/text-extraction/cmap.d.ts +0 -50
  30. package/dist/ts/pdf-data-extract/core/text-extraction/cmap.ts +0 -565
  31. package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.d.ts +0 -191
  32. package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.ts +0 -1928
  33. package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.d.ts +0 -102
  34. package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.ts +0 -5780
  35. package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.d.ts +0 -167
  36. package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.ts +0 -1842
  37. package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.d.ts +0 -5
  38. package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.ts +0 -16
  39. package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.d.ts +0 -18
  40. package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.ts +0 -630
  41. package/dist/ts/pdf-data-extract/core/text-extraction/glyph.d.ts +0 -93
  42. package/dist/ts/pdf-data-extract/core/text-extraction/glyph.ts +0 -622
  43. package/dist/ts/pdf-data-extract/core/text-extraction/index.d.ts +0 -10
  44. package/dist/ts/pdf-data-extract/core/text-extraction/index.ts +0 -10
  45. package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.d.ts +0 -38
  46. package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.ts +0 -150
  47. package/dist/ts/pdf-data-extract/core/text-extraction/metrics.d.ts +0 -16
  48. package/dist/ts/pdf-data-extract/core/text-extraction/metrics.ts +0 -2938
  49. package/dist/ts/pdf-data-extract/core/text-structure.d.ts +0 -628
  50. package/dist/ts/pdf-data-extract/core/text-structure.ts +0 -668
  51. package/dist/ts/pdf-data-extract/core/utils.d.ts +0 -99
  52. package/dist/ts/pdf-data-extract/core/utils.ts +0 -626
  53. package/dist/ts/pdf-data-extract/index.d.ts +0 -20
  54. package/dist/ts/pdf-data-extract/index.ts +0 -20
@@ -1,640 +0,0 @@
1
- import { _ContentParser, _PdfContentStream, _PdfCrossReference, _PdfRecord, _PdfReference, PdfDocument, PdfFontStyle, PdfPage, PdfPath, PdfRotationAngle } from '@syncfusion/ej2-pdf';
2
- import { TextGlyph, TextLine, TextWord } from './text-structure';
3
- import { _TextProcessingMode } from './enum';
4
- import { PdfRedactor } from './redaction/pdf-redactor';
5
- import { _GraphicState, _TextState } from './graphic-state';
6
- import { _FontStructure } from './text-extraction';
7
- import { _decodeEncodedText, _getXObject } from './utils';
8
- import { _PdfTextParser } from './pdf-text-parser';
9
-
10
- export class _PdfContentParserHelper {
11
- _document: PdfDocument;
12
- _identityMatrix: number[] = [1, 0, 0, 1, 0, 0];
13
- _fontSize: number;
14
- _width: number = 0;
15
- _height: number = 0;
16
- _crossReference: _PdfCrossReference;
17
- _resultantText: string = '';
18
- _textGlyph: TextGlyph[] = [];
19
- _textWord: TextWord[] = [];
20
- _textLine: TextLine[] = []
21
- _mode: _TextProcessingMode;
22
- _isContainsRedactionText: boolean = false;
23
- _isNotUpdated: boolean;
24
- _redaction: PdfRedactor;
25
- _yPosition: number = 0;
26
- _xPosition: number = 0;
27
- _parser: _PdfTextParser = new _PdfTextParser();
28
- constructor();
29
- constructor(mode: _TextProcessingMode);
30
- constructor(mode: _TextProcessingMode, redaction?: PdfRedactor);
31
- constructor(mode?: _TextProcessingMode, redaction?: PdfRedactor) {
32
- if (typeof(mode) !== 'undefined') {
33
- this._mode = mode;
34
- }
35
- if (this._mode === _TextProcessingMode.redaction) {
36
- this._redaction = redaction;
37
- this._document = redaction._document;
38
- }
39
- }
40
- _getPageRecordCollection(page: PdfPage): _PdfRecord[] {
41
- const combinedContent: Uint8Array = page._combineContent();
42
- const parser: _ContentParser = new _ContentParser(combinedContent);
43
- const recordCollection: _PdfRecord[] = parser._readContent();
44
- return recordCollection;
45
- }
46
- _processTjOperator(record: _PdfRecord, textState: _TextState, currentFont: _FontStructure, page: PdfPage, fontCollection:
47
- Map<string, _FontStructure>): { updatedText: string; isChangeOperator: boolean } | void {
48
- currentFont = this._parser._getTextFont(fontCollection, textState, this._crossReference);
49
- let element: string = '';
50
- if (record._operator === '"') {
51
- element = record._operands[2];
52
- } else {
53
- element = record._operands[0];
54
- }
55
- let result: any; // eslint-disable-line
56
- let elements: any; // eslint-disable-line
57
- let textGlyphs: TextGlyph[] = [];
58
- let encodedText: string[] = [];
59
- let decodedText: string[] = [];
60
- let updatedText: string = '';
61
- let isChangeOperator: boolean = false;
62
- let object: any; // eslint-disable-line
63
- let extractedText: string;
64
- let text: string;
65
- if (this._mode === _TextProcessingMode.textLineExtraction) {
66
- result = this._parser._getSplitText(element, currentFont, record._splitText);
67
- object = this._getTextElementsFromTjOperator(result.decodedList, currentFont, textState, page);
68
- extractedText = object.extractedText;
69
- text = object.tempString;
70
- this._setTextLineCollection(text, currentFont, textState, page, extractedText);
71
- } else if (this._mode === _TextProcessingMode.textExtraction) {
72
- currentFont = this._parser._getTextFont(fontCollection, textState, this._crossReference);
73
- this._extractTextElement(element, currentFont, record._splitText);
74
- if (record._operator === "'" || record._operator === '"') { //eslint-disable-line
75
- this._resultantText += '\r\n';
76
- }
77
- } else if (this._mode === _TextProcessingMode.redaction) {
78
- if (this._isContainsRedactionText) {
79
- const glyphs: TextGlyph[] = [];
80
- result = this._parser._getSplitText(element, currentFont, record._splitText, true);
81
- elements = this._getTextElementsFromTjOperator(result.decodedList, currentFont, textState, page, glyphs, result.inputType);
82
- textGlyphs = elements.textGlyphs;
83
- decodedText = elements.decodedText;
84
- encodedText = elements.encodedText;
85
- updatedText = this._redaction._replacedText(textGlyphs, encodedText, element, decodedText);
86
- if (updatedText === record._operands[0]) {
87
- this._isNotUpdated = true;
88
- } else {
89
- isChangeOperator = true;
90
- }
91
- return { updatedText, isChangeOperator };
92
- }
93
- }
94
- }
95
- _setTextLineCollection(text: string, currentFont: _FontStructure, textState: _TextState, page: PdfPage, extractedText: string): void{
96
- if (text !== '') {
97
- const textWord: TextWord = new TextWord();
98
- textWord._text = text;
99
- textWord._glyphs = this._textGlyph;
100
- const pdfPath: PdfPath = new PdfPath();
101
- for (let i: number = 0; i < this._textGlyph.length; i++) {
102
- pdfPath.addRectangle(this._textGlyph[Number.parseInt(i.toString(), 10)
103
- ]._bounds[0]
104
- , this._textGlyph[Number.parseInt(i.toString(), 10)
105
- ]._bounds[1], this._textGlyph[Number.parseInt(i.toString(), 10)
106
- ]._bounds[2],
107
- this._textGlyph[Number.parseInt(i.toString(), 10)]._bounds[3]);
108
- }
109
- textWord._bounds = pdfPath._getBounds();
110
- textWord._fontName = currentFont._name;
111
- textWord._fontStyle = currentFont._fontStyle;
112
- textWord._fontSize = this._fontSize;
113
- this._textWord.push(textWord);
114
- this._height = 0;
115
- }
116
- this._width = 0;
117
- this._textGlyph = [];
118
- const textLine1: TextLine = new TextLine();
119
- textLine1._text = extractedText;
120
- textLine1._wordCollection = this._textWord;
121
- textLine1._fontName = currentFont._name;
122
- textLine1._fontStyle = currentFont._fontStyle;
123
- textLine1._fontSize = textState._fontSize;
124
- textLine1._pageIndex = page._pageIndex;
125
- const pdfPath: PdfPath = new PdfPath();
126
- for (let i: number = 0; i < this._textWord.length; i++) {
127
- pdfPath.addRectangle(this._textWord[Number.parseInt(i.toString(), 10)
128
- ]._bounds[0]
129
- , this._textWord[Number.parseInt(i.toString(), 10)
130
- ]._bounds[1], this._textWord[Number.parseInt(i.toString(), 10)
131
- ]._bounds[2],
132
- this._textWord[Number.parseInt(i.toString(), 10)]._bounds[3]);
133
- }
134
- textLine1._bounds = pdfPath._getBounds();
135
- this._textLine.push(textLine1);
136
- }
137
- _processTJOperator(record: _PdfRecord, textState: _TextState, currentFont: _FontStructure, page: PdfPage, fontCollection:
138
- Map<string, _FontStructure>): { updatedText: string, isChangeOperator: boolean } {
139
- currentFont = this._parser._getTextFont(fontCollection, textState, this._crossReference);
140
- const element: string = record._operands[0];
141
- let result: any; // eslint-disable-line
142
- let elements: any; // eslint-disable-line
143
- let textGlyphs: TextGlyph[] = [];
144
- let encodedText: string[] = [];
145
- let updatedText: string = '';
146
- let decodedText: string[] = [];
147
- let isChangeOperator: boolean = false;
148
- currentFont = this._parser._getTextFont(fontCollection, textState, this._crossReference);
149
- if (this._mode === _TextProcessingMode.textLineExtraction) {
150
- result = this._parser._getSplitText(element[0], currentFont, record._splitText);
151
- const object: any = this._getTextElementsFromTJOperator(result.decodedList, currentFont, textState, page); // eslint-disable-line
152
- this._setTextLineCollection(object.tempString, currentFont, textState, page, object.extractedText);
153
- } else if (this._mode === _TextProcessingMode.textExtraction) {
154
- currentFont = this._parser._getTextFont(fontCollection, textState, this._crossReference);
155
- this._extractTextElement(element, currentFont, record._splitText);
156
- if (record._operator === "'") { //eslint-disable-line
157
- this._resultantText += '\r\n';
158
- }
159
- } else if (this._mode === _TextProcessingMode.redaction) {
160
- if (this._isContainsRedactionText) {
161
- const glyphs: TextGlyph[] = [];
162
- result = this._parser._getSplitText(element[0], currentFont, record._splitText, true);
163
- elements = this._getTextElementsFromTJOperator(result.decodedList, currentFont, textState, page, glyphs, result.inputType);
164
- textGlyphs = elements.textGlyphs;
165
- decodedText = elements.decodedText;
166
- encodedText = elements.encodeText;
167
- updatedText = this._redaction._replacedText(textGlyphs, encodedText, element, decodedText);
168
- if (updatedText === element) {
169
- this._isNotUpdated = true;
170
- } else {
171
- isChangeOperator = true;
172
- }
173
- }
174
- }
175
- return { updatedText, isChangeOperator };
176
- }
177
- _processSingleQuoteOperator(record: _PdfRecord, textState: _TextState, currentFont: _FontStructure, page: PdfPage, fontCollection:
178
- Map<string, _FontStructure>): { updatedText: string, isChangeOperator: boolean } | void {
179
- textState._carriageReturn();
180
- const result: any = this._processTjOperator(record, textState, currentFont, page, fontCollection); // eslint-disable-line
181
- if (typeof result === 'object' && result !== null) {
182
- const { updatedText, isChangeOperator } = result;
183
- return { updatedText, isChangeOperator };
184
- }
185
- }
186
- _processDoubleQuoteOperator(record: _PdfRecord, textState: _TextState, currentFont: _FontStructure, page: PdfPage, fontCollection:
187
- Map<string, _FontStructure>): { updatedText: string, isChangeOperator: boolean } | void {
188
- textState._wordSpacing = Number(record._operands[0]);
189
- textState._charSpacing = Number(record._operands[1]);
190
- textState._carriageReturn();
191
- const result: any = this._processTjOperator(record, textState, currentFont, page, fontCollection); // eslint-disable-line
192
- if (typeof result === 'object' && result !== null) {
193
- const { updatedText, isChangeOperator } = result;
194
- return { updatedText, isChangeOperator };
195
- }
196
- }
197
- _processRecordCollection(recordCollection: _PdfRecord[], page: PdfPage, fontCollection: Map<string, _FontStructure>,
198
- xObjectCollection: Map<string, any>, graphicState: _GraphicState): _PdfContentStream | void | string | TextLine[] { // eslint-disable-line
199
- let textState: _TextState;
200
- let red: number = 0;
201
- let green: number = 0;
202
- let blue: number = 0;
203
- let updatedText: string = '';
204
- const stream: _PdfContentStream = new _PdfContentStream([]);
205
- for (let i: number = 0 ; i < recordCollection.length; i++) {
206
- const record: _PdfRecord = recordCollection[Number.parseInt(i.toString(), 10)];
207
- const token: string = record._operator;
208
- const element: string[] = record._operands;
209
- this._parser._processCommand(token, element, graphicState);
210
- textState = graphicState._state;
211
- let isChangeOperator: boolean = false;
212
- let currentFont: _FontStructure;
213
- switch (token) {
214
- case 'Tm':
215
- if (this._mode !== _TextProcessingMode.textExtraction) {
216
- this._parser._setTextMatrix(element, textState);
217
- }
218
- if (this._mode === _TextProcessingMode.redaction) {
219
- const x: number = textState._textMatrix[4];
220
- const y: number = textState._textMatrix[5];
221
- if (this._parser._isFoundText(x, y, page, this._redaction._redactionBounds)) {
222
- this._isContainsRedactionText = true;
223
- }
224
- if (recordCollection.length !== i + 1 && !this._isContainsRedactionText) {
225
- this._isContainsRedactionText = true;
226
- }
227
- if (!this._isContainsRedactionText && page.size[1] === y) {
228
- this._isContainsRedactionText = true;
229
- }
230
- }
231
- break;
232
- case 'cm':
233
- {
234
- if (this._mode === _TextProcessingMode.redaction) {
235
- const x: number = parseFloat(element[4]);
236
- const y: number = parseFloat(element[5]);
237
- if (this._parser._isFoundText(x, y, page, this._redaction._redactionBounds)) {
238
- this._isContainsRedactionText = true;
239
- }
240
- }
241
- }
242
- break;
243
- case 'BT':
244
- if (this._mode !== _TextProcessingMode.textExtraction) {
245
- this._parser._beginText(textState, this._identityMatrix);
246
- }
247
- break;
248
- case 'ET':
249
- if (this._mode === _TextProcessingMode.textExtraction) {
250
- this._resultantText += '\r\n';
251
- } else if (this._mode === _TextProcessingMode.redaction) {
252
- this._isContainsRedactionText = false;
253
- this._xPosition = 0;
254
- this._yPosition = 0;
255
- }
256
- break;
257
- case 'Tf':
258
- this._parser._setFont(element, textState);
259
- break;
260
- case 'Tc':
261
- if (this._mode !== _TextProcessingMode.textExtraction) {
262
- this._parser._setCharSpacing(element, textState);
263
- }
264
- break;
265
- case 'Tw':
266
- if (this._mode !== _TextProcessingMode.textExtraction) {
267
- this._parser._setWordSpacing(element, textState);
268
- }
269
- break;
270
- case 'Tz':
271
- if (this._mode !== _TextProcessingMode.textExtraction) {
272
- this._parser._setTextHorizontalScale(element, textState);
273
- }
274
- break;
275
- case 'TL':
276
- if (this._mode !== _TextProcessingMode.textExtraction) {
277
- this._parser._updateTextLeading(element, textState);
278
- }
279
- break;
280
- case 'Td':
281
- if (this._mode !== _TextProcessingMode.textExtraction) {
282
- this._parser._moveTextPlacement(element, textState);
283
- }
284
- if (this._mode === _TextProcessingMode.redaction) {
285
- this._xPosition = this._xPosition + parseFloat(element[0]);
286
- this._yPosition = this._yPosition - parseFloat(element[1]);
287
- if (this._parser._isFoundText(this._xPosition, this._yPosition, page, this._redaction._redactionBounds)) {
288
- this._isContainsRedactionText = true;
289
- }
290
- if (recordCollection.length !== i + 1 && !this._isContainsRedactionText) {
291
- const temp: string = recordCollection[i + 1]._operator;
292
- if (temp === 'Tj' || temp === 'TJ' || temp === '"' || temp === "'") { // eslint-disable-line
293
- this._isContainsRedactionText = true;
294
- }
295
- }
296
- }
297
- break;
298
- case 'TD':
299
- if (this._mode !== _TextProcessingMode.textExtraction) {
300
- this._parser._moveTextPlacementAndSetLeading(element, textState);
301
- }
302
- if (this._mode === _TextProcessingMode.redaction) {
303
- this._xPosition = this._xPosition + parseFloat(element[0]);
304
- this._yPosition = this._yPosition - parseFloat(element[1]);
305
- if (this._parser._isFoundText(this._xPosition, this._yPosition, page, this._redaction._redactionBounds)) {
306
- this._isContainsRedactionText = true;
307
- }
308
- if (recordCollection.length !== i + 1 && !this._isContainsRedactionText) {
309
- const temp: string = recordCollection[i + 1]._operator;
310
- if (temp === 'Tj' || temp === 'TJ' || temp === '"' || temp === "'") { // eslint-disable-line
311
- this._isContainsRedactionText = true;
312
- }
313
- }
314
- }
315
- break;
316
- case 'Ts':
317
- if (this._mode !== _TextProcessingMode.textExtraction) {
318
- this._parser._setTextRise(element, textState);
319
- }
320
- break;
321
- case 'Tj':
322
- {
323
- const result: any = this._processTjOperator(record, textState, currentFont, page, fontCollection); // eslint-disable-line
324
- if (record._operands) {
325
- if (typeof result === 'object' && result !== null) {
326
- updatedText = result.updatedText;
327
- isChangeOperator = result.isChangeOperator;
328
- }
329
- }
330
- break;
331
- }
332
- case 'TJ':
333
- {
334
- const result: any = this._processTJOperator(record, textState, currentFont, page, fontCollection); // eslint-disable-line
335
- if (typeof result === 'object' && result !== null) {
336
- updatedText = result.updatedText;
337
- isChangeOperator = result.isChangeOperator;
338
- }
339
- break;
340
- }
341
- case "'": // eslint-disable-line
342
- {
343
- const result: any = this._processSingleQuoteOperator(record, textState, currentFont, page, // eslint-disable-line
344
- fontCollection);
345
- if (typeof result === 'object' && result !== null) {
346
- updatedText = result.updatedText;
347
- isChangeOperator = result.isChangeOperator;
348
- }
349
- break;
350
- }
351
- case '"':
352
- {
353
- const result: any = this._processDoubleQuoteOperator(record, textState, currentFont, page, // eslint-disable-line
354
- fontCollection);
355
- if (typeof result === 'object' && result !== null) {
356
- updatedText = result.updatedText;
357
- isChangeOperator = result.isChangeOperator;
358
- }
359
- break;
360
- }
361
- case 'T*':
362
- if (this._mode === _TextProcessingMode.textExtraction) {
363
- this._resultantText += '\r\n';
364
- } else {
365
- this._parser._setNewLineWithLeading(textState);
366
- }
367
- break;
368
- case 'Do':
369
- {
370
- const xobject: string = element[0].replace('/', '');
371
- if (xObjectCollection.has(xobject)) {
372
- let base: any = xObjectCollection.get(xobject); //eslint-disable-line
373
- if (base) {
374
- if (this._mode === _TextProcessingMode.textExtraction || this._mode === _TextProcessingMode.textLineExtraction) {
375
- _getXObject(element, page, xObjectCollection, this, this._mode, graphicState);
376
- } else if (this._mode === _TextProcessingMode.redaction) {
377
- let pdfStream: any = _getXObject(element, page, xObjectCollection, this, this._mode, graphicState); // eslint-disable-line
378
- delete base.dictionary._map.Length;
379
- delete base.dictionary._map.Filter;
380
- base.dictionary.update('Length', pdfStream.length);
381
- pdfStream.dictionary = base.dictionary;
382
- pdfStream.dictionary._updated = true;
383
- let objectId: any = base.dictionary.objId; // eslint-disable-line
384
- const strParts: string[] = objectId.split(' ');
385
- const reference: _PdfReference = _PdfReference.get(Number(strParts[0]), Number(strParts[1]));
386
- this._document._crossReference._cacheMap.set(reference, pdfStream);
387
- }
388
- }
389
- }
390
- break;
391
- }
392
- case 'RG':
393
- case 'k':
394
- case 'g':
395
- case 'rg':
396
- red = Number(element[0]);
397
- green = Number(element[1]);
398
- blue = Number(element[2]);
399
- textState._textColor = [red, green, blue];
400
- }
401
- if (this._mode === _TextProcessingMode.redaction) {
402
- if (!isChangeOperator) {
403
- updatedText = '';
404
- }
405
- this._redaction._optimizeContent(recordCollection, i, updatedText, stream);
406
- isChangeOperator = false;
407
- }
408
- }
409
- if (this._mode === _TextProcessingMode.redaction) {
410
- stream.write('\r\n');
411
- return stream;
412
- } else if (this._mode === _TextProcessingMode.textExtraction) {
413
- return this._resultantText;
414
- } else if (this._mode === _TextProcessingMode.textLineExtraction) {
415
- return this._textLine;
416
- }
417
- return;
418
- }
419
- _extractTextElement(elements: string, currentFont: _FontStructure, inputText: string[]): void {
420
- const decodedText: string = _decodeEncodedText(elements, currentFont, inputText);
421
- this._resultantText += decodedText;
422
- }
423
- _getTextElementsFromTjOperator(decodedList: string[], currentFont: _FontStructure, textState: _TextState, page: PdfPage,
424
- textGlyphs?: TextGlyph[], inputType?: string[]): any {// eslint-disable-line
425
- this._textWord = [];
426
- let tempString: string = '';
427
- const text: string[] = decodedList;
428
- const previousRect: { x: number, y: number, width: number, height: number } = {x: 0, y: 0, width: 0 , height: 0};
429
- const decodedText: string[] = [];
430
- let encodedText: string[] = [];
431
- let extractedText: string = '';
432
- const index: number = 0;
433
- let hex: string[] = [];
434
- if (text.length > 0) {
435
- if (typeof(textGlyphs) !== 'undefined') {
436
- if (inputType[0] !== ' ') {
437
- hex = this._parser._splitHexString(inputType[0]);
438
- }
439
- const result: any = this._parser._getTextContentItem(currentFont, text[0], 0 , textState, page, tempString, previousRect, extractedText, this, textGlyphs, hex, index, encodedText); // eslint-disable-line
440
- decodedText[0] = '(' + result.extractedText + ')';
441
- encodedText = result.encodedText;
442
- extractedText = result.extractedText;
443
- return {textGlyphs, decodedText, encodedText};
444
- } else {
445
- const result: any = this._parser._getTextContentItem(currentFont, text[0], 0 , textState, page, tempString, previousRect, extractedText, this); //eslint-disable-line
446
- tempString = result.tempString;
447
- extractedText = result.extractedText;
448
- this._fontSize = result.fontSize;
449
- return {tempString, extractedText};
450
- }
451
- }
452
- }
453
- _getTextElementsFromTJOperator(decodedList: string[], currentFont: _FontStructure, textState: _TextState, page: PdfPage,
454
- textGlyphs?: TextGlyph[], inputType?: string[]): any { //eslint-disable-line
455
- let textValues: string[] = [];
456
- this._textWord = [];
457
- let tempString: string = '';
458
- textValues = decodedList;
459
- let iszerspace: boolean = false;
460
- let text: string = '';
461
- let str: string = '';
462
- let previousRect: { x: number, y: number, width: number, height: number } = {x: 0, y: 0, width: 0 , height: 0};
463
- const decodedText: string[] = [];
464
- let encodedText: string[] = [];
465
- let index: number = 0;
466
- let i: number = 0;
467
- let hex: string[] = [];
468
- let extractedText: string = '';
469
- const spaceFactor: number = ((currentFont._vertical ? 1 : -1) * textState._fontSize) / 1000;
470
- for (let j: number = 0; j < textValues.length; j++) {
471
- const word: string = textValues[Number.parseInt(j.toString(), 10)];
472
- const digit: any = Number(word); // eslint-disable-line
473
- if (digit || digit === 0) {
474
- if (typeof(textGlyphs) !== 'undefined') {
475
- if (j > 0 && inputType[j - 1] !== ' ') {
476
- hex = this._parser._splitHexString(inputType[j - 1]);
477
- }
478
- const result: any = this._parser._getTextContentItem(currentFont, text, digit * spaceFactor, textState, page, tempString, previousRect, extractedText, this, textGlyphs, hex, index, encodedText); // eslint-disable-line
479
- textGlyphs = result.textGlyphs;
480
- extractedText = result.extractedText;
481
- encodedText = result.encodedText;
482
- index = result.index;
483
- extractedText = '';
484
- decodedText[i++] = '(' + result.extractedText + ')';
485
- decodedText[i++] = word;
486
- } else {
487
- const result: any = this._parser._getTextContentItem(currentFont, text, digit * spaceFactor, textState, page, tempString, previousRect, extractedText, this); // eslint-disable-line
488
- tempString = result.tempString;
489
- extractedText = result.extractedText;
490
- this._fontSize = result.fontSize;
491
- previousRect = result.previousRect;
492
- }
493
- } else if (digit !== 0) {
494
- text = word;
495
- if (iszerspace) {
496
- text = str + text;
497
- iszerspace = false;
498
- }
499
- } else {
500
- iszerspace = true;
501
- str = text;
502
- }
503
- }
504
- if (typeof(textGlyphs) !== 'undefined') {
505
- if (inputType[textValues.length - 1] !== ' ') {
506
- hex = this._parser._splitHexString(inputType[textValues.length - 1]);
507
- }
508
- const result: any = this._parser._getTextContentItem(currentFont, text, 0, textState, page, // eslint-disable-line
509
- tempString, previousRect, extractedText, this, textGlyphs,
510
- hex, index, encodedText);
511
- decodedText[Number.parseInt(i.toString(), 10)] = '(' + result.extractedText + ')';
512
- const encodeText: string[] = result.encodedText;
513
- return {textGlyphs, decodedText, encodeText};
514
- } else {
515
- const result: any = this._parser._getTextContentItem(currentFont, text, 0 , textState, page, // eslint-disable-line
516
- tempString, previousRect, extractedText, this);
517
- tempString = result.tempString;
518
- extractedText = result.extractedText;
519
- this._fontSize = result.fontSize;
520
- return {tempString, extractedText};
521
- }
522
- }
523
- _splitWords(glyph: string, tempString: string, fontName: string, fontStyle: PdfFontStyle , page: PdfPage,
524
- rotation?: number, textColor?: number[], fontSize?: number, textBounds?:
525
- { x: number, y: number, width: number, height: number },
526
- previousRect?: { x: number, y: number, width: number, height: number }): any { //eslint-disable-line
527
- let isSpace: boolean = false;
528
- if (/\s/.test(glyph)) {
529
- isSpace = true;
530
- }
531
- const currentRect: any = textBounds; //eslint-disable-line
532
- const addTextWord: any = (text: string, glyphs: TextGlyph[], width: number) => { //eslint-disable-line
533
- const textWord: TextWord = new TextWord();
534
- textWord._text = text;
535
- textWord._glyphs = glyphs;
536
- const pdfPath: PdfPath = new PdfPath();
537
- for (let i: number = 0; i < glyphs.length; i++) {
538
- pdfPath.addRectangle(glyphs[Number.parseInt(i.toString(), 10)
539
- ]._bounds[0]
540
- , glyphs[Number.parseInt(i.toString(), 10)
541
- ]._bounds[1], glyphs[Number.parseInt(i.toString(), 10)
542
- ]._bounds[2],
543
- glyphs[Number.parseInt(i.toString(), 10)]._bounds[3]);
544
- }
545
- textWord._bounds = pdfPath._getBounds();
546
- textWord._fontName = fontName;
547
- textWord._fontStyle = fontStyle;
548
- textWord._fontSize = fontSize;
549
- this._textWord.push(textWord);
550
- };
551
- if (isSpace) {
552
- if (tempString) {
553
- if (page.rotation === PdfRotationAngle.angle90 || page.rotation === PdfRotationAngle.angle270 || rotation === 90) {
554
- addTextWord(tempString, this._textGlyph, this._height);
555
- } else {
556
- addTextWord(tempString, this._textGlyph, this._width);
557
- }
558
- this._textGlyph = [];
559
- tempString = '';
560
- }
561
- const textGlyph: TextGlyph = new TextGlyph();
562
- textGlyph._text = glyph;
563
- textGlyph._bounds = [currentRect.x, currentRect.y, currentRect.width, currentRect.height];
564
- textGlyph._fontName = fontName;
565
- textGlyph._fontStyle = fontStyle;
566
- textGlyph._fontSize = fontSize;
567
- textGlyph._color = textColor;
568
- if (page.rotation !== PdfRotationAngle.angle0) {
569
- textGlyph._isRotated = true;
570
- } else {
571
- textGlyph._isRotated = false;
572
- }
573
- this._textGlyph.push(textGlyph);
574
- if (page.rotation === PdfRotationAngle.angle90 || page.rotation === PdfRotationAngle.angle270 || rotation === 90) {
575
- addTextWord(glyph, this._textGlyph, currentRect.height);
576
- } else {
577
- addTextWord(glyph, this._textGlyph, currentRect.width);
578
- }
579
- this._width = 0;
580
- this._height = 0;
581
- this._textGlyph = [];
582
- previousRect = null;
583
- } else if (previousRect !== null && previousRect.width > 0) {
584
- let spacingFactor: number = currentRect.height * 0.07;
585
- if (spacingFactor < 2) {
586
- spacingFactor = 2;
587
- }
588
- let difference: number;
589
- if (page.rotation === PdfRotationAngle.angle90) {
590
- difference = previousRect.y + previousRect.height - currentRect.y;
591
- } else if (page.rotation === PdfRotationAngle.angle270 || rotation === 90) {
592
- difference = currentRect.y + currentRect.height - previousRect.y;
593
- } else if (page.rotation === PdfRotationAngle.angle180) {
594
- difference = currentRect.x + currentRect.width - previousRect.x;
595
- } else {
596
- difference = previousRect.x + previousRect.width - currentRect.x;
597
- }
598
- if (difference > 0) {
599
- if (spacingFactor === 2) {
600
- spacingFactor = 2.5;
601
- }
602
- }
603
- if (Math.abs(difference) > spacingFactor) {
604
- if (page.rotation === PdfRotationAngle.angle90 || page.rotation === PdfRotationAngle.angle270) {
605
- addTextWord(tempString, this._textGlyph, this._height);
606
- } else {
607
- addTextWord(tempString, this._textGlyph, this._width);
608
- }
609
- this._width = 0;
610
- this._height = 0;
611
- this._textGlyph = [];
612
- tempString = '';
613
- previousRect = {x: 0, y: 0, width: 0 , height: 0};
614
- }
615
- }
616
- if (!isSpace) {
617
- const textGlyph: TextGlyph = new TextGlyph();
618
- textGlyph._text = glyph;
619
- textGlyph._bounds = [currentRect.x, currentRect.y, currentRect.width, currentRect.height];
620
- textGlyph._fontName = fontName;
621
- textGlyph._fontStyle = fontStyle;
622
- textGlyph._fontSize = fontSize;
623
- textGlyph._color = textColor;
624
- if (page.rotation !== PdfRotationAngle.angle0) {
625
- textGlyph._isRotated = true;
626
- } else {
627
- textGlyph._isRotated = false;
628
- }
629
- textGlyph._isRotated = false;
630
- this._textGlyph.push(textGlyph);
631
- if (page.rotation === PdfRotationAngle.angle90 || page.rotation === PdfRotationAngle.angle270 || rotation === 90) {
632
- this._height += currentRect.height;
633
- } else {
634
- this._width += currentRect.width;
635
- }
636
- tempString += glyph;
637
- }
638
- return {tempString, previousRect};
639
- }
640
- }
@@ -1,6 +0,0 @@
1
- export declare enum _TextProcessingMode {
2
- textExtraction = 0,
3
- textLayOut = 1,
4
- redaction = 2,
5
- textLineExtraction = 3
6
- }
@@ -1,6 +0,0 @@
1
- export enum _TextProcessingMode {
2
- textExtraction,
3
- textLayOut,
4
- redaction,
5
- textLineExtraction
6
- }