@syncfusion/ej2-pdf-data-extract 31.1.17 → 31.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/ej2-pdf-data-extract.umd.min.js +1 -1
  2. package/dist/global/ej2-pdf-data-extract.min.js +1 -1
  3. package/dist/global/index.d.ts +1 -1
  4. package/package.json +12 -44
  5. package/dist/ts/index.d.ts +0 -23
  6. package/dist/ts/index.ts +0 -23
  7. package/dist/ts/pdf-data-extract/core/content-parser-helper.d.ts +0 -62
  8. package/dist/ts/pdf-data-extract/core/content-parser-helper.ts +0 -661
  9. package/dist/ts/pdf-data-extract/core/enum.d.ts +0 -6
  10. package/dist/ts/pdf-data-extract/core/enum.ts +0 -6
  11. package/dist/ts/pdf-data-extract/core/graphic-state.d.ts +0 -33
  12. package/dist/ts/pdf-data-extract/core/graphic-state.ts +0 -106
  13. package/dist/ts/pdf-data-extract/core/pdf-data-extractor.d.ts +0 -210
  14. package/dist/ts/pdf-data-extract/core/pdf-data-extractor.ts +0 -998
  15. package/dist/ts/pdf-data-extract/core/pdf-text-parser.d.ts +0 -63
  16. package/dist/ts/pdf-data-extract/core/pdf-text-parser.ts +0 -498
  17. package/dist/ts/pdf-data-extract/core/redaction/index.d.ts +0 -7
  18. package/dist/ts/pdf-data-extract/core/redaction/index.ts +0 -7
  19. package/dist/ts/pdf-data-extract/core/redaction/pdf-path-segment.d.ts +0 -24
  20. package/dist/ts/pdf-data-extract/core/redaction/pdf-path-segment.ts +0 -59
  21. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.d.ts +0 -55
  22. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.ts +0 -590
  23. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.d.ts +0 -281
  24. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.ts +0 -342
  25. package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.d.ts +0 -172
  26. package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.ts +0 -414
  27. package/dist/ts/pdf-data-extract/core/redaction/pdf-shape-redaction.d.ts +0 -45
  28. package/dist/ts/pdf-data-extract/core/redaction/pdf-shape-redaction.ts +0 -281
  29. package/dist/ts/pdf-data-extract/core/redaction/shape-parser-helper.d.ts +0 -32
  30. package/dist/ts/pdf-data-extract/core/redaction/shape-parser-helper.ts +0 -479
  31. package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.d.ts +0 -12
  32. package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.ts +0 -153
  33. package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.d.ts +0 -24
  34. package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.ts +0 -281
  35. package/dist/ts/pdf-data-extract/core/text-extraction/cmap.d.ts +0 -50
  36. package/dist/ts/pdf-data-extract/core/text-extraction/cmap.ts +0 -565
  37. package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.d.ts +0 -191
  38. package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.ts +0 -1928
  39. package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.d.ts +0 -102
  40. package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.ts +0 -5780
  41. package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.d.ts +0 -167
  42. package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.ts +0 -1842
  43. package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.d.ts +0 -5
  44. package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.ts +0 -16
  45. package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.d.ts +0 -18
  46. package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.ts +0 -630
  47. package/dist/ts/pdf-data-extract/core/text-extraction/glyph.d.ts +0 -93
  48. package/dist/ts/pdf-data-extract/core/text-extraction/glyph.ts +0 -622
  49. package/dist/ts/pdf-data-extract/core/text-extraction/index.d.ts +0 -10
  50. package/dist/ts/pdf-data-extract/core/text-extraction/index.ts +0 -10
  51. package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.d.ts +0 -38
  52. package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.ts +0 -150
  53. package/dist/ts/pdf-data-extract/core/text-extraction/metrics.d.ts +0 -16
  54. package/dist/ts/pdf-data-extract/core/text-extraction/metrics.ts +0 -2938
  55. package/dist/ts/pdf-data-extract/core/text-structure.d.ts +0 -628
  56. package/dist/ts/pdf-data-extract/core/text-structure.ts +0 -668
  57. package/dist/ts/pdf-data-extract/core/utils.d.ts +0 -99
  58. package/dist/ts/pdf-data-extract/core/utils.ts +0 -626
  59. package/dist/ts/pdf-data-extract/index.d.ts +0 -23
  60. package/dist/ts/pdf-data-extract/index.ts +0 -23
@@ -1,661 +0,0 @@
1
- import { _ContentParser, _PdfContentStream, _PdfCrossReference, _PdfRecord, _PdfReference, PdfDocument, PdfFontStyle, PdfPage, PdfPath, PdfRotationAngle } from '@syncfusion/ej2-pdf';
2
- import { TextGlyph, TextLine, TextWord } from './text-structure';
3
- import { _TextProcessingMode } from './enum';
4
- import { PdfRedactor } from './redaction/pdf-redactor';
5
- import { _GraphicState, _TextState } from './graphic-state';
6
- import { _FontStructure } from './text-extraction';
7
- import { _decodeEncodedText, _getXObject } from './utils';
8
- import { _PdfTextParser } from './pdf-text-parser';
9
- import { _PdfShapeParser } from './redaction/shape-parser-helper';
10
-
11
- export class _PdfContentParserHelper {
12
- _document: PdfDocument;
13
- _identityMatrix: number[] = [1, 0, 0, 1, 0, 0];
14
- _fontSize: number;
15
- _width: number = 0;
16
- _height: number = 0;
17
- _crossReference: _PdfCrossReference;
18
- _resultantText: string = '';
19
- _textGlyph: TextGlyph[] = [];
20
- _textWord: TextWord[] = [];
21
- _textLine: TextLine[] = []
22
- _mode: _TextProcessingMode;
23
- _isContainsRedactionText: boolean = false;
24
- _isNotUpdated: boolean;
25
- _redaction: PdfRedactor;
26
- _yPosition: number = 0;
27
- _xPosition: number = 0;
28
- _parser: _PdfTextParser = new _PdfTextParser();
29
- constructor();
30
- constructor(mode: _TextProcessingMode);
31
- constructor(mode: _TextProcessingMode, redaction?: PdfRedactor);
32
- constructor(mode?: _TextProcessingMode, redaction?: PdfRedactor) {
33
- if (typeof(mode) !== 'undefined') {
34
- this._mode = mode;
35
- }
36
- if (this._mode === _TextProcessingMode.redaction) {
37
- this._redaction = redaction;
38
- this._document = redaction._document;
39
- }
40
- }
41
- _getPageRecordCollection(page: PdfPage): _PdfRecord[] {
42
- const combinedContent: Uint8Array = page._combineContent();
43
- const parser: _ContentParser = new _ContentParser(combinedContent);
44
- const recordCollection: _PdfRecord[] = parser._readContent();
45
- return recordCollection;
46
- }
47
- _processTjOperator(record: _PdfRecord, textState: _TextState, currentFont: _FontStructure, page: PdfPage, fontCollection:
48
- Map<string, _FontStructure>): { updatedText: string; isChangeOperator: boolean } | void {
49
- currentFont = this._parser._getTextFont(fontCollection, textState, this._crossReference);
50
- let element: string = '';
51
- if (record._operator === '"') {
52
- element = record._operands[2];
53
- } else {
54
- element = record._operands[0];
55
- }
56
- let result: any; // eslint-disable-line
57
- let elements: any; // eslint-disable-line
58
- let textGlyphs: TextGlyph[] = [];
59
- let encodedText: string[] = [];
60
- let decodedText: string[] = [];
61
- let updatedText: string = '';
62
- let isChangeOperator: boolean = false;
63
- let object: any; // eslint-disable-line
64
- let extractedText: string;
65
- let text: string;
66
- if (this._mode === _TextProcessingMode.textLineExtraction) {
67
- result = this._parser._getSplitText(element, currentFont, record._splitText);
68
- object = this._getTextElementsFromTjOperator(result.decodedList, currentFont, textState, page);
69
- extractedText = object.extractedText;
70
- text = object.tempString;
71
- this._setTextLineCollection(text, currentFont, textState, page, extractedText);
72
- } else if (this._mode === _TextProcessingMode.textExtraction) {
73
- currentFont = this._parser._getTextFont(fontCollection, textState, this._crossReference);
74
- this._extractTextElement(element, currentFont, record._splitText);
75
- if (record._operator === "'" || record._operator === '"') { //eslint-disable-line
76
- this._resultantText += '\r\n';
77
- }
78
- } else if (this._mode === _TextProcessingMode.redaction) {
79
- if (this._isContainsRedactionText) {
80
- const glyphs: TextGlyph[] = [];
81
- result = this._parser._getSplitText(element, currentFont, record._splitText, true);
82
- elements = this._getTextElementsFromTjOperator(result.decodedList, currentFont, textState, page, glyphs, result.inputType);
83
- textGlyphs = elements.textGlyphs;
84
- decodedText = elements.decodedText;
85
- encodedText = elements.encodedText;
86
- updatedText = this._redaction._replacedText(textGlyphs, encodedText, element, decodedText);
87
- if (updatedText === record._operands[0]) {
88
- this._isNotUpdated = true;
89
- } else {
90
- isChangeOperator = true;
91
- }
92
- return { updatedText, isChangeOperator };
93
- }
94
- }
95
- }
96
- _setTextLineCollection(text: string, currentFont: _FontStructure, textState: _TextState, page: PdfPage, extractedText: string): void{
97
- if (text !== '') {
98
- const textWord: TextWord = new TextWord();
99
- textWord._text = text;
100
- textWord._glyphs = this._textGlyph;
101
- const pdfPath: PdfPath = new PdfPath();
102
- for (let i: number = 0; i < this._textGlyph.length; i++) {
103
- pdfPath.addRectangle(this._textGlyph[Number.parseInt(i.toString(), 10)
104
- ]._bounds[0]
105
- , this._textGlyph[Number.parseInt(i.toString(), 10)
106
- ]._bounds[1], this._textGlyph[Number.parseInt(i.toString(), 10)
107
- ]._bounds[2],
108
- this._textGlyph[Number.parseInt(i.toString(), 10)]._bounds[3]);
109
- }
110
- textWord._bounds = pdfPath._getBounds();
111
- textWord._fontName = currentFont._name;
112
- textWord._fontStyle = currentFont._fontStyle;
113
- textWord._fontSize = this._fontSize;
114
- this._textWord.push(textWord);
115
- this._height = 0;
116
- }
117
- this._width = 0;
118
- this._textGlyph = [];
119
- const textLine1: TextLine = new TextLine();
120
- textLine1._text = extractedText;
121
- textLine1._wordCollection = this._textWord;
122
- textLine1._fontName = currentFont._name;
123
- textLine1._fontStyle = currentFont._fontStyle;
124
- textLine1._fontSize = textState._fontSize;
125
- textLine1._pageIndex = page._pageIndex;
126
- const pdfPath: PdfPath = new PdfPath();
127
- for (let i: number = 0; i < this._textWord.length; i++) {
128
- pdfPath.addRectangle(this._textWord[Number.parseInt(i.toString(), 10)
129
- ]._bounds[0]
130
- , this._textWord[Number.parseInt(i.toString(), 10)
131
- ]._bounds[1], this._textWord[Number.parseInt(i.toString(), 10)
132
- ]._bounds[2],
133
- this._textWord[Number.parseInt(i.toString(), 10)]._bounds[3]);
134
- }
135
- textLine1._bounds = pdfPath._getBounds();
136
- this._textLine.push(textLine1);
137
- }
138
- _processTJOperator(record: _PdfRecord, textState: _TextState, currentFont: _FontStructure, page: PdfPage, fontCollection:
139
- Map<string, _FontStructure>): { updatedText: string, isChangeOperator: boolean } {
140
- currentFont = this._parser._getTextFont(fontCollection, textState, this._crossReference);
141
- const element: string = record._operands[0];
142
- let result: any; // eslint-disable-line
143
- let elements: any; // eslint-disable-line
144
- let textGlyphs: TextGlyph[] = [];
145
- let encodedText: string[] = [];
146
- let updatedText: string = '';
147
- let decodedText: string[] = [];
148
- let isChangeOperator: boolean = false;
149
- currentFont = this._parser._getTextFont(fontCollection, textState, this._crossReference);
150
- if (this._mode === _TextProcessingMode.textLineExtraction) {
151
- result = this._parser._getSplitText(element[0], currentFont, record._splitText);
152
- const object: any = this._getTextElementsFromTJOperator(result.decodedList, currentFont, textState, page); // eslint-disable-line
153
- this._setTextLineCollection(object.tempString, currentFont, textState, page, object.extractedText);
154
- } else if (this._mode === _TextProcessingMode.textExtraction) {
155
- currentFont = this._parser._getTextFont(fontCollection, textState, this._crossReference);
156
- this._extractTextElement(element, currentFont, record._splitText);
157
- if (record._operator === "'") { //eslint-disable-line
158
- this._resultantText += '\r\n';
159
- }
160
- } else if (this._mode === _TextProcessingMode.redaction) {
161
- if (this._isContainsRedactionText) {
162
- const glyphs: TextGlyph[] = [];
163
- result = this._parser._getSplitText(element[0], currentFont, record._splitText, true);
164
- elements = this._getTextElementsFromTJOperator(result.decodedList, currentFont, textState, page, glyphs, result.inputType);
165
- textGlyphs = elements.textGlyphs;
166
- decodedText = elements.decodedText;
167
- encodedText = elements.encodeText;
168
- updatedText = this._redaction._replacedText(textGlyphs, encodedText, element, decodedText);
169
- if (updatedText === element) {
170
- this._isNotUpdated = true;
171
- } else {
172
- isChangeOperator = true;
173
- }
174
- }
175
- }
176
- return { updatedText, isChangeOperator };
177
- }
178
- _processSingleQuoteOperator(record: _PdfRecord, textState: _TextState, currentFont: _FontStructure, page: PdfPage, fontCollection:
179
- Map<string, _FontStructure>): { updatedText: string, isChangeOperator: boolean } | void {
180
- textState._carriageReturn();
181
- const result: any = this._processTjOperator(record, textState, currentFont, page, fontCollection); // eslint-disable-line
182
- if (typeof result === 'object' && result !== null) {
183
- const { updatedText, isChangeOperator } = result;
184
- return { updatedText, isChangeOperator };
185
- }
186
- }
187
- _processDoubleQuoteOperator(record: _PdfRecord, textState: _TextState, currentFont: _FontStructure, page: PdfPage, fontCollection:
188
- Map<string, _FontStructure>): { updatedText: string, isChangeOperator: boolean } | void {
189
- textState._wordSpacing = Number(record._operands[0]);
190
- textState._charSpacing = Number(record._operands[1]);
191
- textState._carriageReturn();
192
- const result: any = this._processTjOperator(record, textState, currentFont, page, fontCollection); // eslint-disable-line
193
- if (typeof result === 'object' && result !== null) {
194
- const { updatedText, isChangeOperator } = result;
195
- return { updatedText, isChangeOperator };
196
- }
197
- }
198
- _processRecordCollection(recordCollection: _PdfRecord[], page: PdfPage, fontCollection: Map<string, _FontStructure>,
199
- xObjectCollection: Map<string, any>, graphicState: _GraphicState): _PdfContentStream | void | string | TextLine[] { // eslint-disable-line
200
- let textState: _TextState;
201
- let red: number = 0;
202
- let green: number = 0;
203
- let blue: number = 0;
204
- let updatedText: string = '';
205
- let parser: _PdfShapeParser;
206
- let skipUntil: number = -1;
207
- const stream: _PdfContentStream = new _PdfContentStream([]);
208
- for (let i: number = 0 ; i < recordCollection.length; i++) {
209
- const record: _PdfRecord = recordCollection[Number.parseInt(i.toString(), 10)];
210
- const token: string = record._operator;
211
- const element: string[] = record._operands;
212
- this._parser._processCommand(token, element, graphicState);
213
- textState = graphicState._state;
214
- let isChangeOperator: boolean = false;
215
- let currentFont: _FontStructure;
216
- const currentIndex: number = i;
217
- switch (token) {
218
- case 'Tm':
219
- if (this._mode !== _TextProcessingMode.textExtraction) {
220
- this._parser._setTextMatrix(element, textState);
221
- }
222
- if (this._mode === _TextProcessingMode.redaction) {
223
- const x: number = textState._textMatrix[4];
224
- const y: number = textState._textMatrix[5];
225
- if (this._parser._isFoundText(x, y, page, this._redaction._redactionRegion)) {
226
- this._isContainsRedactionText = true;
227
- }
228
- if (recordCollection.length !== i + 1 && !this._isContainsRedactionText) {
229
- this._isContainsRedactionText = true;
230
- }
231
- if (!this._isContainsRedactionText && page.size[1] === y) {
232
- this._isContainsRedactionText = true;
233
- }
234
- }
235
- break;
236
- case 'cm':
237
- {
238
- if (this._mode === _TextProcessingMode.redaction) {
239
- const x: number = parseFloat(element[4]);
240
- const y: number = parseFloat(element[5]);
241
- if (this._parser._isFoundText(x, y, page, this._redaction._redactionRegion)) {
242
- this._isContainsRedactionText = true;
243
- }
244
- }
245
- }
246
- break;
247
- case 'BT':
248
- if (this._mode !== _TextProcessingMode.textExtraction) {
249
- this._parser._beginText(textState, this._identityMatrix);
250
- }
251
- break;
252
- case 'ET':
253
- if (this._mode === _TextProcessingMode.textExtraction) {
254
- this._resultantText += '\r\n';
255
- } else if (this._mode === _TextProcessingMode.redaction) {
256
- this._isContainsRedactionText = false;
257
- this._xPosition = 0;
258
- this._yPosition = 0;
259
- }
260
- break;
261
- case 'Tf':
262
- this._parser._setFont(element, textState);
263
- break;
264
- case 'Tc':
265
- if (this._mode !== _TextProcessingMode.textExtraction) {
266
- this._parser._setCharSpacing(element, textState);
267
- }
268
- break;
269
- case 'Tw':
270
- if (this._mode !== _TextProcessingMode.textExtraction) {
271
- this._parser._setWordSpacing(element, textState);
272
- }
273
- break;
274
- case 'Tz':
275
- if (this._mode !== _TextProcessingMode.textExtraction) {
276
- this._parser._setTextHorizontalScale(element, textState);
277
- }
278
- break;
279
- case 'TL':
280
- if (this._mode !== _TextProcessingMode.textExtraction) {
281
- this._parser._updateTextLeading(element, textState);
282
- }
283
- break;
284
- case 'Td':
285
- if (this._mode !== _TextProcessingMode.textExtraction) {
286
- this._parser._moveTextPlacement(element, textState);
287
- }
288
- if (this._mode === _TextProcessingMode.redaction) {
289
- this._xPosition = this._xPosition + parseFloat(element[0]);
290
- this._yPosition = this._yPosition - parseFloat(element[1]);
291
- if (this._parser._isFoundText(this._xPosition, this._yPosition, page, this._redaction._redactionRegion)) {
292
- this._isContainsRedactionText = true;
293
- }
294
- if (recordCollection.length !== i + 1 && !this._isContainsRedactionText) {
295
- const temp: string = recordCollection[i + 1]._operator;
296
- if (temp === 'Tj' || temp === 'TJ' || temp === '"' || temp === "'") { // eslint-disable-line
297
- this._isContainsRedactionText = true;
298
- }
299
- }
300
- }
301
- break;
302
- case 'TD':
303
- if (this._mode !== _TextProcessingMode.textExtraction) {
304
- this._parser._moveTextPlacementAndSetLeading(element, textState);
305
- }
306
- if (this._mode === _TextProcessingMode.redaction) {
307
- this._xPosition = this._xPosition + parseFloat(element[0]);
308
- this._yPosition = this._yPosition - parseFloat(element[1]);
309
- if (this._parser._isFoundText(this._xPosition, this._yPosition, page, this._redaction._redactionRegion)) {
310
- this._isContainsRedactionText = true;
311
- }
312
- if (recordCollection.length !== i + 1 && !this._isContainsRedactionText) {
313
- const temp: string = recordCollection[i + 1]._operator;
314
- if (temp === 'Tj' || temp === 'TJ' || temp === '"' || temp === "'") { // eslint-disable-line
315
- this._isContainsRedactionText = true;
316
- }
317
- }
318
- }
319
- break;
320
- case 'Ts':
321
- if (this._mode !== _TextProcessingMode.textExtraction) {
322
- this._parser._setTextRise(element, textState);
323
- }
324
- break;
325
- case 'Tj':
326
- {
327
- const result: any = this._processTjOperator(record, textState, currentFont, page, fontCollection); // eslint-disable-line
328
- if (record._operands) {
329
- if (typeof result === 'object' && result !== null) {
330
- updatedText = result.updatedText;
331
- isChangeOperator = result.isChangeOperator;
332
- }
333
- }
334
- break;
335
- }
336
- case 'TJ':
337
- {
338
- const result: any = this._processTJOperator(record, textState, currentFont, page, fontCollection); // eslint-disable-line
339
- if (typeof result === 'object' && result !== null) {
340
- updatedText = result.updatedText;
341
- isChangeOperator = result.isChangeOperator;
342
- }
343
- break;
344
- }
345
- case "'": // eslint-disable-line
346
- {
347
- const result: any = this._processSingleQuoteOperator(record, textState, currentFont, page, // eslint-disable-line
348
- fontCollection);
349
- if (typeof result === 'object' && result !== null) {
350
- updatedText = result.updatedText;
351
- isChangeOperator = result.isChangeOperator;
352
- }
353
- break;
354
- }
355
- case '"':
356
- {
357
- const result: any = this._processDoubleQuoteOperator(record, textState, currentFont, page, // eslint-disable-line
358
- fontCollection);
359
- if (typeof result === 'object' && result !== null) {
360
- updatedText = result.updatedText;
361
- isChangeOperator = result.isChangeOperator;
362
- }
363
- break;
364
- }
365
- case 'T*':
366
- if (this._mode === _TextProcessingMode.textExtraction) {
367
- this._resultantText += '\r\n';
368
- } else {
369
- this._parser._setNewLineWithLeading(textState);
370
- }
371
- break;
372
- case 'Do':
373
- {
374
- const xobject: string = element[0].replace('/', '');
375
- if (xObjectCollection.has(xobject)) {
376
- let base: any = xObjectCollection.get(xobject); //eslint-disable-line
377
- if (base) {
378
- if (this._mode === _TextProcessingMode.textExtraction || this._mode === _TextProcessingMode.textLineExtraction) {
379
- _getXObject(element, page, xObjectCollection, this, this._mode, graphicState);
380
- } else if (this._mode === _TextProcessingMode.redaction) {
381
- let pdfStream: any = _getXObject(element, page, xObjectCollection, this, this._mode, graphicState); // eslint-disable-line
382
- delete base.dictionary._map.Length;
383
- delete base.dictionary._map.Filter;
384
- base.dictionary.update('Length', pdfStream.length);
385
- pdfStream.dictionary = base.dictionary;
386
- pdfStream.dictionary._updated = true;
387
- let objectId: any = base.dictionary.objId; // eslint-disable-line
388
- const strParts: string[] = objectId.split(' ');
389
- const reference: _PdfReference = _PdfReference.get(Number(strParts[0]), Number(strParts[1]));
390
- this._document._crossReference._cacheMap.set(reference, pdfStream);
391
- }
392
- }
393
- }
394
- break;
395
- }
396
- case 'RG':
397
- case 'k':
398
- case 'g':
399
- case 'rg':
400
- red = Number(element[0]);
401
- green = Number(element[1]);
402
- blue = Number(element[2]);
403
- textState._textColor = [red, green, blue];
404
- break;
405
- case 're':
406
- {
407
- parser = new _PdfShapeParser();
408
- const records: _PdfRecord[] = parser._processRectangle(recordCollection, i, element);
409
- if (record && records.length > 0) {
410
- recordCollection.splice(i--, 1, ...records);
411
- }
412
- break;
413
- }
414
- case 'm':
415
- parser = new _PdfShapeParser();
416
- skipUntil = parser._findRedactPath(recordCollection, i, page, this._redaction, this._mode, stream);
417
- if (skipUntil !== -1) {
418
- i = skipUntil;
419
- }
420
- break;
421
- }
422
- if (this._mode === _TextProcessingMode.redaction && currentIndex === i) {
423
- if (!isChangeOperator) {
424
- updatedText = '';
425
- }
426
- this._redaction._optimizeContent(recordCollection, i, updatedText, stream);
427
- isChangeOperator = false;
428
- }
429
- }
430
- if (this._mode === _TextProcessingMode.redaction) {
431
- stream.write('\r\n');
432
- return stream;
433
- } else if (this._mode === _TextProcessingMode.textExtraction) {
434
- return this._resultantText;
435
- } else if (this._mode === _TextProcessingMode.textLineExtraction) {
436
- return this._textLine;
437
- }
438
- return;
439
- }
440
- _extractTextElement(elements: string, currentFont: _FontStructure, inputText: string[]): void {
441
- const decodedText: string = _decodeEncodedText(elements, currentFont, inputText);
442
- this._resultantText += decodedText;
443
- }
444
- _getTextElementsFromTjOperator(decodedList: string[], currentFont: _FontStructure, textState: _TextState, page: PdfPage,
445
- textGlyphs?: TextGlyph[], inputType?: string[]): any {// eslint-disable-line
446
- this._textWord = [];
447
- let tempString: string = '';
448
- const text: string[] = decodedList;
449
- const previousRect: { x: number, y: number, width: number, height: number } = {x: 0, y: 0, width: 0 , height: 0};
450
- const decodedText: string[] = [];
451
- let encodedText: string[] = [];
452
- let extractedText: string = '';
453
- const index: number = 0;
454
- let hex: string[] = [];
455
- if (text.length > 0) {
456
- if (typeof(textGlyphs) !== 'undefined') {
457
- if (inputType[0] !== ' ') {
458
- hex = this._parser._splitHexString(inputType[0]);
459
- }
460
- const result: any = this._parser._getTextContentItem(currentFont, text[0], 0 , textState, page, tempString, previousRect, extractedText, this, textGlyphs, hex, index, encodedText); // eslint-disable-line
461
- decodedText[0] = '(' + result.extractedText + ')';
462
- encodedText = result.encodedText;
463
- extractedText = result.extractedText;
464
- return {textGlyphs, decodedText, encodedText};
465
- } else {
466
- const result: any = this._parser._getTextContentItem(currentFont, text[0], 0 , textState, page, tempString, previousRect, extractedText, this); //eslint-disable-line
467
- tempString = result.tempString;
468
- extractedText = result.extractedText;
469
- this._fontSize = result.fontSize;
470
- return {tempString, extractedText};
471
- }
472
- }
473
- }
474
- _getTextElementsFromTJOperator(decodedList: string[], currentFont: _FontStructure, textState: _TextState, page: PdfPage,
475
- textGlyphs?: TextGlyph[], inputType?: string[]): any { //eslint-disable-line
476
- let textValues: string[] = [];
477
- this._textWord = [];
478
- let tempString: string = '';
479
- textValues = decodedList;
480
- let iszerspace: boolean = false;
481
- let text: string = '';
482
- let str: string = '';
483
- let previousRect: { x: number, y: number, width: number, height: number } = {x: 0, y: 0, width: 0 , height: 0};
484
- const decodedText: string[] = [];
485
- let encodedText: string[] = [];
486
- let index: number = 0;
487
- let i: number = 0;
488
- let hex: string[] = [];
489
- let extractedText: string = '';
490
- const spaceFactor: number = ((currentFont._vertical ? 1 : -1) * textState._fontSize) / 1000;
491
- for (let j: number = 0; j < textValues.length; j++) {
492
- const word: string = textValues[Number.parseInt(j.toString(), 10)];
493
- const digit: any = Number(word); // eslint-disable-line
494
- if (digit || digit === 0) {
495
- if (typeof(textGlyphs) !== 'undefined') {
496
- if (j > 0 && inputType[j - 1] !== ' ') {
497
- hex = this._parser._splitHexString(inputType[j - 1]);
498
- }
499
- const result: any = this._parser._getTextContentItem(currentFont, text, digit * spaceFactor, textState, page, tempString, previousRect, extractedText, this, textGlyphs, hex, index, encodedText); // eslint-disable-line
500
- textGlyphs = result.textGlyphs;
501
- extractedText = result.extractedText;
502
- encodedText = result.encodedText;
503
- index = result.index;
504
- extractedText = '';
505
- decodedText[i++] = '(' + result.extractedText + ')';
506
- decodedText[i++] = word;
507
- } else {
508
- const result: any = this._parser._getTextContentItem(currentFont, text, digit * spaceFactor, textState, page, tempString, previousRect, extractedText, this); // eslint-disable-line
509
- tempString = result.tempString;
510
- extractedText = result.extractedText;
511
- this._fontSize = result.fontSize;
512
- previousRect = result.previousRect;
513
- }
514
- } else if (digit !== 0) {
515
- text = word;
516
- if (iszerspace) {
517
- text = str + text;
518
- iszerspace = false;
519
- }
520
- } else {
521
- iszerspace = true;
522
- str = text;
523
- }
524
- }
525
- if (typeof(textGlyphs) !== 'undefined') {
526
- if (inputType[textValues.length - 1] !== ' ') {
527
- hex = this._parser._splitHexString(inputType[textValues.length - 1]);
528
- }
529
- const result: any = this._parser._getTextContentItem(currentFont, text, 0, textState, page, // eslint-disable-line
530
- tempString, previousRect, extractedText, this, textGlyphs,
531
- hex, index, encodedText);
532
- decodedText[Number.parseInt(i.toString(), 10)] = '(' + result.extractedText + ')';
533
- const encodeText: string[] = result.encodedText;
534
- return {textGlyphs, decodedText, encodeText};
535
- } else {
536
- const result: any = this._parser._getTextContentItem(currentFont, text, 0 , textState, page, // eslint-disable-line
537
- tempString, previousRect, extractedText, this);
538
- tempString = result.tempString;
539
- extractedText = result.extractedText;
540
- this._fontSize = result.fontSize;
541
- return {tempString, extractedText};
542
- }
543
- }
544
- _splitWords(glyph: string, tempString: string, fontName: string, fontStyle: PdfFontStyle , page: PdfPage,
545
- rotation?: number, textColor?: number[], fontSize?: number, textBounds?:
546
- { x: number, y: number, width: number, height: number },
547
- previousRect?: { x: number, y: number, width: number, height: number }): any { //eslint-disable-line
548
- let isSpace: boolean = false;
549
- if (/\s/.test(glyph)) {
550
- isSpace = true;
551
- }
552
- const currentRect: any = textBounds; //eslint-disable-line
553
- const addTextWord: any = (text: string, glyphs: TextGlyph[], width: number) => { //eslint-disable-line
554
- const textWord: TextWord = new TextWord();
555
- textWord._text = text;
556
- textWord._glyphs = glyphs;
557
- const pdfPath: PdfPath = new PdfPath();
558
- for (let i: number = 0; i < glyphs.length; i++) {
559
- pdfPath.addRectangle(glyphs[Number.parseInt(i.toString(), 10)
560
- ]._bounds[0]
561
- , glyphs[Number.parseInt(i.toString(), 10)
562
- ]._bounds[1], glyphs[Number.parseInt(i.toString(), 10)
563
- ]._bounds[2],
564
- glyphs[Number.parseInt(i.toString(), 10)]._bounds[3]);
565
- }
566
- textWord._bounds = pdfPath._getBounds();
567
- textWord._fontName = fontName;
568
- textWord._fontStyle = fontStyle;
569
- textWord._fontSize = fontSize;
570
- this._textWord.push(textWord);
571
- };
572
- if (isSpace) {
573
- if (tempString) {
574
- if (page.rotation === PdfRotationAngle.angle90 || page.rotation === PdfRotationAngle.angle270 || rotation === 90) {
575
- addTextWord(tempString, this._textGlyph, this._height);
576
- } else {
577
- addTextWord(tempString, this._textGlyph, this._width);
578
- }
579
- this._textGlyph = [];
580
- tempString = '';
581
- }
582
- const textGlyph: TextGlyph = new TextGlyph();
583
- textGlyph._text = glyph;
584
- textGlyph._bounds = [currentRect.x, currentRect.y, currentRect.width, currentRect.height];
585
- textGlyph._fontName = fontName;
586
- textGlyph._fontStyle = fontStyle;
587
- textGlyph._fontSize = fontSize;
588
- textGlyph._color = textColor;
589
- if (page.rotation !== PdfRotationAngle.angle0) {
590
- textGlyph._isRotated = true;
591
- } else {
592
- textGlyph._isRotated = false;
593
- }
594
- this._textGlyph.push(textGlyph);
595
- if (page.rotation === PdfRotationAngle.angle90 || page.rotation === PdfRotationAngle.angle270 || rotation === 90) {
596
- addTextWord(glyph, this._textGlyph, currentRect.height);
597
- } else {
598
- addTextWord(glyph, this._textGlyph, currentRect.width);
599
- }
600
- this._width = 0;
601
- this._height = 0;
602
- this._textGlyph = [];
603
- previousRect = null;
604
- } else if (previousRect !== null && previousRect.width > 0) {
605
- let spacingFactor: number = currentRect.height * 0.07;
606
- if (spacingFactor < 2) {
607
- spacingFactor = 2;
608
- }
609
- let difference: number;
610
- if (page.rotation === PdfRotationAngle.angle90) {
611
- difference = previousRect.y + previousRect.height - currentRect.y;
612
- } else if (page.rotation === PdfRotationAngle.angle270 || rotation === 90) {
613
- difference = currentRect.y + currentRect.height - previousRect.y;
614
- } else if (page.rotation === PdfRotationAngle.angle180) {
615
- difference = currentRect.x + currentRect.width - previousRect.x;
616
- } else {
617
- difference = previousRect.x + previousRect.width - currentRect.x;
618
- }
619
- if (difference > 0) {
620
- if (spacingFactor === 2) {
621
- spacingFactor = 2.5;
622
- }
623
- }
624
- if (Math.abs(difference) > spacingFactor) {
625
- if (page.rotation === PdfRotationAngle.angle90 || page.rotation === PdfRotationAngle.angle270) {
626
- addTextWord(tempString, this._textGlyph, this._height);
627
- } else {
628
- addTextWord(tempString, this._textGlyph, this._width);
629
- }
630
- this._width = 0;
631
- this._height = 0;
632
- this._textGlyph = [];
633
- tempString = '';
634
- previousRect = {x: 0, y: 0, width: 0 , height: 0};
635
- }
636
- }
637
- if (!isSpace) {
638
- const textGlyph: TextGlyph = new TextGlyph();
639
- textGlyph._text = glyph;
640
- textGlyph._bounds = [currentRect.x, currentRect.y, currentRect.width, currentRect.height];
641
- textGlyph._fontName = fontName;
642
- textGlyph._fontStyle = fontStyle;
643
- textGlyph._fontSize = fontSize;
644
- textGlyph._color = textColor;
645
- if (page.rotation !== PdfRotationAngle.angle0) {
646
- textGlyph._isRotated = true;
647
- } else {
648
- textGlyph._isRotated = false;
649
- }
650
- textGlyph._isRotated = false;
651
- this._textGlyph.push(textGlyph);
652
- if (page.rotation === PdfRotationAngle.angle90 || page.rotation === PdfRotationAngle.angle270 || rotation === 90) {
653
- this._height += currentRect.height;
654
- } else {
655
- this._width += currentRect.width;
656
- }
657
- tempString += glyph;
658
- }
659
- return {tempString, previousRect};
660
- }
661
- }
@@ -1,6 +0,0 @@
1
- export declare enum _TextProcessingMode {
2
- textExtraction = 0,
3
- textLayOut = 1,
4
- redaction = 2,
5
- textLineExtraction = 3
6
- }
@@ -1,6 +0,0 @@
1
- export enum _TextProcessingMode {
2
- textExtraction,
3
- textLayOut,
4
- redaction,
5
- textLineExtraction
6
- }