@syncfusion/ej2-pdf-data-extract 31.1.17 → 31.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/ej2-pdf-data-extract.umd.min.js +1 -1
  2. package/dist/global/ej2-pdf-data-extract.min.js +1 -1
  3. package/dist/global/index.d.ts +1 -1
  4. package/package.json +12 -44
  5. package/dist/ts/index.d.ts +0 -23
  6. package/dist/ts/index.ts +0 -23
  7. package/dist/ts/pdf-data-extract/core/content-parser-helper.d.ts +0 -62
  8. package/dist/ts/pdf-data-extract/core/content-parser-helper.ts +0 -661
  9. package/dist/ts/pdf-data-extract/core/enum.d.ts +0 -6
  10. package/dist/ts/pdf-data-extract/core/enum.ts +0 -6
  11. package/dist/ts/pdf-data-extract/core/graphic-state.d.ts +0 -33
  12. package/dist/ts/pdf-data-extract/core/graphic-state.ts +0 -106
  13. package/dist/ts/pdf-data-extract/core/pdf-data-extractor.d.ts +0 -210
  14. package/dist/ts/pdf-data-extract/core/pdf-data-extractor.ts +0 -998
  15. package/dist/ts/pdf-data-extract/core/pdf-text-parser.d.ts +0 -63
  16. package/dist/ts/pdf-data-extract/core/pdf-text-parser.ts +0 -498
  17. package/dist/ts/pdf-data-extract/core/redaction/index.d.ts +0 -7
  18. package/dist/ts/pdf-data-extract/core/redaction/index.ts +0 -7
  19. package/dist/ts/pdf-data-extract/core/redaction/pdf-path-segment.d.ts +0 -24
  20. package/dist/ts/pdf-data-extract/core/redaction/pdf-path-segment.ts +0 -59
  21. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.d.ts +0 -55
  22. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.ts +0 -590
  23. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.d.ts +0 -281
  24. package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.ts +0 -342
  25. package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.d.ts +0 -172
  26. package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.ts +0 -414
  27. package/dist/ts/pdf-data-extract/core/redaction/pdf-shape-redaction.d.ts +0 -45
  28. package/dist/ts/pdf-data-extract/core/redaction/pdf-shape-redaction.ts +0 -281
  29. package/dist/ts/pdf-data-extract/core/redaction/shape-parser-helper.d.ts +0 -32
  30. package/dist/ts/pdf-data-extract/core/redaction/shape-parser-helper.ts +0 -479
  31. package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.d.ts +0 -12
  32. package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.ts +0 -153
  33. package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.d.ts +0 -24
  34. package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.ts +0 -281
  35. package/dist/ts/pdf-data-extract/core/text-extraction/cmap.d.ts +0 -50
  36. package/dist/ts/pdf-data-extract/core/text-extraction/cmap.ts +0 -565
  37. package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.d.ts +0 -191
  38. package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.ts +0 -1928
  39. package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.d.ts +0 -102
  40. package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.ts +0 -5780
  41. package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.d.ts +0 -167
  42. package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.ts +0 -1842
  43. package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.d.ts +0 -5
  44. package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.ts +0 -16
  45. package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.d.ts +0 -18
  46. package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.ts +0 -630
  47. package/dist/ts/pdf-data-extract/core/text-extraction/glyph.d.ts +0 -93
  48. package/dist/ts/pdf-data-extract/core/text-extraction/glyph.ts +0 -622
  49. package/dist/ts/pdf-data-extract/core/text-extraction/index.d.ts +0 -10
  50. package/dist/ts/pdf-data-extract/core/text-extraction/index.ts +0 -10
  51. package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.d.ts +0 -38
  52. package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.ts +0 -150
  53. package/dist/ts/pdf-data-extract/core/text-extraction/metrics.d.ts +0 -16
  54. package/dist/ts/pdf-data-extract/core/text-extraction/metrics.ts +0 -2938
  55. package/dist/ts/pdf-data-extract/core/text-structure.d.ts +0 -628
  56. package/dist/ts/pdf-data-extract/core/text-structure.ts +0 -668
  57. package/dist/ts/pdf-data-extract/core/utils.d.ts +0 -99
  58. package/dist/ts/pdf-data-extract/core/utils.ts +0 -626
  59. package/dist/ts/pdf-data-extract/index.d.ts +0 -23
  60. package/dist/ts/pdf-data-extract/index.ts +0 -23
@@ -1,33 +0,0 @@
1
- export declare class _TextState {
2
- _ctm: number[];
3
- _fontName: string;
4
- _fontSize: number;
5
- _font: any;
6
- _fontMatrix: number[];
7
- _textMatrix: number[];
8
- _textLineMatrix: number[];
9
- _charSpacing: number;
10
- _wordSpacing: number;
11
- _leading: number;
12
- _textHScale: number;
13
- _textRise: number;
14
- _identityMatrix: number[];
15
- _fontIdentityMatrix: number[];
16
- _textColor: number[];
17
- constructor();
18
- _setTextMatrix(a: number, b: number, c: number, d: number, e: number, f: number): void;
19
- _setTextLineMatrix(a: number, b: number, c: number, d: number, e: number, f: number): void;
20
- _translateTextMatrix(x: number, y: number): void;
21
- _translateTextLineMatrix(x: number, y: number): void;
22
- _carriageReturn(): void;
23
- _clone(): any;
24
- }
25
- export declare class _GraphicState {
26
- _state: _TextState;
27
- _stateStack: any;
28
- constructor(currentState?: _TextState);
29
- _save(): void;
30
- _restore(): void;
31
- _transform(args: number[]): void;
32
- _transformMatrix(m1: number[], m2: number[]): number[];
33
- }
@@ -1,106 +0,0 @@
1
- export class _TextState {
2
- _ctm: number[];
3
- _fontName: string;
4
- _fontSize: number;
5
- _font: any; //eslint-disable-line
6
- _fontMatrix: number[];
7
- _textMatrix: number[];
8
- _textLineMatrix: number[];
9
- _charSpacing: number;
10
- _wordSpacing: number;
11
- _leading: number;
12
- _textHScale: number;
13
- _textRise: number;
14
- _identityMatrix: number[] = [1, 0, 0, 1, 0, 0];
15
- _fontIdentityMatrix: number[] = [0.001, 0, 0, 0.001, 0, 0];
16
- _textColor: number[] = [];
17
- constructor() {
18
- this._ctm = this._identityMatrix;
19
- this._fontName = null;
20
- this._fontSize = 0;
21
- this._font = null;
22
- this._fontMatrix = this._fontIdentityMatrix;
23
- this._textMatrix = this._identityMatrix.slice();
24
- this._textLineMatrix = this._identityMatrix.slice();
25
- this._charSpacing = 0;
26
- this._wordSpacing = 0;
27
- this._leading = 0;
28
- this._textHScale = 1;
29
- this._textRise = 0;
30
- }
31
- _setTextMatrix(a: number, b: number, c: number, d: number, e: number, f: number): void {
32
- const matrix: number[] = this._textMatrix;
33
- matrix[0] = a;
34
- matrix[1] = b;
35
- matrix[2] = c;
36
- matrix[3] = d;
37
- matrix[4] = e;
38
- matrix[5] = f;
39
- }
40
- _setTextLineMatrix(a: number, b: number, c: number, d: number, e: number, f: number): void {
41
- const matrix: number[] = this._textLineMatrix;
42
- matrix[0] = a;
43
- matrix[1] = b;
44
- matrix[2] = c;
45
- matrix[3] = d;
46
- matrix[4] = e;
47
- matrix[5] = f;
48
- }
49
- _translateTextMatrix(x: number, y: number): void {
50
- const matrix: number[] = this._textMatrix;
51
- matrix[4] = matrix[0] * x + matrix[2] * y + matrix[4];
52
- matrix[5] = matrix[1] * x + matrix[3] * y + matrix[5];
53
- }
54
- _translateTextLineMatrix(x: number, y: number): void {
55
- const matrix: number[] = this._textLineMatrix;
56
- matrix[4] = matrix[0] * x + matrix[2] * y + matrix[4];
57
- matrix[5] = matrix[1] * x + matrix[3] * y + matrix[5];
58
- }
59
- _carriageReturn(): void {
60
- this._translateTextLineMatrix(0, -this._leading);
61
- this._textMatrix = this._textLineMatrix.slice();
62
- }
63
- _clone(): any { //eslint-disable-line
64
- const clone: any = Object.create(this); //eslint-disable-line
65
- clone._textMatrix = this._textMatrix.slice();
66
- clone._textLineMatrix = this._textLineMatrix.slice();
67
- clone._fontMatrix = this._fontMatrix.slice();
68
- return clone;
69
- }
70
- }
71
- export class _GraphicState {
72
- _state: _TextState ;
73
- _stateStack: any; //eslint-disable-line
74
- constructor(currentState?: _TextState) {
75
- if (!currentState) {
76
- this._state = new _TextState();
77
- } else {
78
- this._state = currentState;
79
- }
80
- this._stateStack = [];
81
- }
82
- _save(): void {
83
- const oldState: _TextState = this._state;
84
- this._stateStack.push(this._state);
85
- this._state = oldState._clone();
86
- }
87
- _restore(): void {
88
- const prev: _TextState = this._stateStack.pop();
89
- if (prev) {
90
- this._state = prev;
91
- }
92
- }
93
- _transform(args: number[]): void {
94
- this._state._ctm = this._transformMatrix(this._state._ctm, args);
95
- }
96
- _transformMatrix(m1: number[], m2: number[]): number[] {
97
- return [
98
- m1[0] * m2[0] + m1[2] * m2[1],
99
- m1[1] * m2[0] + m1[3] * m2[1],
100
- m1[0] * m2[2] + m1[2] * m2[3],
101
- m1[1] * m2[2] + m1[3] * m2[3],
102
- m1[0] * m2[4] + m1[2] * m2[5] + m1[4],
103
- m1[1] * m2[4] + m1[3] * m2[5] + m1[5]
104
- ];
105
- }
106
- }
@@ -1,210 +0,0 @@
1
- import { _MatrixHelper, _TransformationStack } from './text-extraction/matrix-helper';
2
- import { TextGlyph, TextLine, TextWord } from './text-structure';
3
- import { _PdfContentParserHelper } from './content-parser-helper';
4
- import { _GraphicState } from './graphic-state';
5
- import { _FontStructure } from './text-extraction';
6
- import { _PdfCrossReference, _PdfRecord, PdfDocument, PdfFontStyle, PdfPage } from '@syncfusion/ej2-pdf';
7
- import { _PdfTextParser } from './pdf-text-parser';
8
- /**
9
- * Represents a utility for extracting data from a PDF document.
10
- * ```typescript
11
- * // Load an existing PDF document
12
- * let document: PdfDocument = new PdfDocument(data, password);
13
- * // Initialize a new instance of the `PdfDataExtractor` class
14
- * let extractor: PdfDataExtractor = new PdfDataExtractor(document);
15
- * // Extract `TextLine` from the PDF document.
16
- * let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
17
- * // Save the document
18
- * document.save('output.pdf');
19
- * // Destroy the document
20
- * document.destroy();
21
- * ```
22
- */
23
- export declare class PdfDataExtractor {
24
- _document: PdfDocument;
25
- _rotation: number;
26
- _textMatrix: _MatrixHelper;
27
- _textLeading: number;
28
- _textColor: number[];
29
- _textLineMatrix: _MatrixHelper;
30
- _extractedText: string;
31
- _hasLeading: boolean;
32
- _hasNoSpacing: boolean;
33
- _textLines: TextLine[];
34
- _transformations: _TransformationStack;
35
- _identityMatrix: number[];
36
- _currentLocation: number[];
37
- _currentFont: string;
38
- _tempBoundingRectangle: {
39
- x: number;
40
- y: number;
41
- width: number;
42
- height: number;
43
- };
44
- _boundingRectangle: {
45
- x: number;
46
- y: number;
47
- width: number;
48
- height: number;
49
- };
50
- _previousRect: {
51
- x: number;
52
- y: number;
53
- width: number;
54
- height: number;
55
- };
56
- _fontSize: number;
57
- _textHorizontalScaling: number;
58
- _previousTextMatrix: _MatrixHelper;
59
- _previousFontSize: number;
60
- _previousExtractText: string;
61
- _arise: number;
62
- _isTextMatrix: boolean;
63
- _currentTextMatrix: _MatrixHelper;
64
- _text: string;
65
- _hasTj: boolean;
66
- _hasTm: boolean;
67
- _hasET: boolean;
68
- _characterSpacing: number;
69
- _wordSpacing: number;
70
- _hasBeginMarkedContent: boolean;
71
- _differenceX: number;
72
- _textScale: number;
73
- _textRise: number;
74
- _width: number;
75
- _height: number;
76
- _crossReference: _PdfCrossReference;
77
- _resultantText: string;
78
- _currentExtractedText: string;
79
- _initialTransForm: _MatrixHelper;
80
- _textGlyph: TextGlyph[];
81
- _textWord: TextWord[];
82
- _textLine: TextLine[];
83
- _textExtraction: string[];
84
- _fontCollection: Map<string, _FontStructure>;
85
- _ctm: _MatrixHelper;
86
- _objects: _MatrixHelper[];
87
- _isLayout: boolean;
88
- _isRotatePage: boolean;
89
- _isExtractTextLines: boolean;
90
- _contentParser: _PdfContentParserHelper;
91
- _parser: _PdfTextParser;
92
- /**
93
- * Initialize a new instance of the `PdfDataExtractor` class
94
- *
95
- * @param {PdfDocument} document PDF document
96
- * ```typescript
97
- * // Load an existing PDF document
98
- * let document: PdfDocument = new PdfDocument(data1);
99
- * // Initialize a new instance of the `PdfDataExtractor` class
100
- * let extractor: PdfDataExtractor = new PdfDataExtractor(document);
101
- * // Extracts text from the PDF Page based on its layout
102
- * let text: string = extractor.extractText({isLayout: true});
103
- * // Save the output PDF
104
- * document.save(‘Output.pdf’);
105
- * // Destroy the documents
106
- * document.destroy();
107
- * ```
108
- */
109
- constructor(document: PdfDocument);
110
- /**
111
- * Extract text from the PDF document
112
- *
113
- * @returns {string} The extracted text
114
- *
115
- * ```typescript
116
- * // Load an existing PDF document
117
- * let document: PdfDocument = new PdfDocument(data1);
118
- * // Initialize a new instance of the `PdfDataExtractor` class
119
- * let extractor: PdfDataExtractor = new PdfDataExtractor(document);
120
- * // Extract text content from the PDF document.
121
- * let text: string = extractor.extractText();
122
- * // Save the output PDF
123
- * document.save(‘Output.pdf’);
124
- * // Destroy the documents
125
- * document.destroy();
126
- * ```
127
- */
128
- extractText(): string;
129
- /**
130
- * Extract text from the page ranges specified by start and end page number
131
- *
132
- * @param {object} options Options to specify the page range to be selected and to extract the text.
133
- * @returns {string} The extracted text
134
- *
135
- * ```typescript
136
- * // Load an existing PDF document
137
- * let document: PdfDocument = new PdfDocument(data1);
138
- * // Initialize a new instance of the `PdfDataExtractor` class
139
- * let extractor: PdfDataExtractor = new PdfDataExtractor(document);
140
- * // Extract text content from the PDF document.
141
- * let text: string = extractor.extractText({ startPageIndex: 0, endPageIndex: document.pageCount - 1 });
142
- * // Save the output PDF
143
- * document.save(‘Output.pdf’);
144
- * // Destroy the documents
145
- * document.destroy();
146
- * ```
147
- */
148
- extractText(options: {
149
- isLayout?: boolean;
150
- startPageIndex?: number;
151
- endPageIndex?: number;
152
- }): string;
153
- _renderTextAsLayOut(recordCollection: _PdfRecord[], page: PdfPage, fontCollection: Map<string, _FontStructure>, xObjectCollection: Map<string, any>): any;
154
- _renderText(page: PdfPage, fontCollection: Map<string, _FontStructure>, xObjectCollection: Map<string, any>, graphicState: _GraphicState): any;
155
- _setTextLeading(textLeading: number): void;
156
- _moveToNextLine(tx: number, ty: number, textLineMatrix: _MatrixHelper): void;
157
- _updateTextMatrix(tj: number): _MatrixHelper;
158
- _updateTextLineMatrix(char: string, width: number): void;
159
- _renderTextElementFromTJ(elements: string[], page: PdfPage, fontCollection: Map<string, _FontStructure>): string;
160
- _getTextHeight(font: _FontStructure, textMatrix: _MatrixHelper): number;
161
- _transform(m1: number[], m2: number[]): number[];
162
- _buildTextContentStream(elements: string[], page: PdfPage, fontCollection: Map<string, _FontStructure>): void;
163
- _getTextWidth(text: string, extraSpacing: number, currentFont: _FontStructure, page: PdfPage, tempString: string): string;
164
- _splitWords(glyph: string, tempString: string, fontName: string, fontStyle: PdfFontStyle, page: PdfPage, rotation?: number, textColor?: number[]): string;
165
- _getTextRenderingMatrix(): _MatrixHelper;
166
- _renderFont(fontElements: string[]): void;
167
- /**
168
- * Extract `TextLine` collection from the PDF document.
169
- *
170
- * @returns {TextLine[]} The extracted textLines
171
- *
172
- * ```typescript
173
- * // Load an existing PDF document
174
- * let document: PdfDocument = new PdfDocument(data1);
175
- * // Initialize a new instance of the `PdfDataExtractor` class
176
- * let extractor: PdfDataExtractor = new PdfDataExtractor(document);
177
- * // Extract `TextLine` from the PDF document.
178
- * let textCollection: TextLine[] = extractor.extractTextLines();
179
- * // Save the output PDF
180
- * document.save(‘Output.pdf’);
181
- * // Destroy the documents
182
- * document.destroy();
183
- * ```
184
- */
185
- extractTextLines(): TextLine[];
186
- /**
187
- * Extract `TextLine` from the PDF document.
188
- *
189
- * @param {object} options The options to specify the page range to be selected.
190
- * @returns {TextLine[]} The extracted textLines
191
- *
192
- * ```typescript
193
- * // Load an existing PDF document
194
- * let document: PdfDocument = new PdfDocument(data1);
195
- * // Initialize a new instance of the `PdfDataExtractor` class
196
- * let extractor: PdfDataExtractor = new PdfDataExtractor(document);
197
- * // Extract `TextLine` from the PDF document.
198
- * let textCollection: TextLine[] = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount - 1});
199
- * // Save the output PDF
200
- * document.save(‘Output.pdf’);
201
- * // Destroy the documents
202
- * document.destroy();
203
- * ```
204
- */
205
- extractTextLines(options: {
206
- startPageIndex?: number;
207
- endPageIndex?: number;
208
- }): TextLine[];
209
- _processPages(startIndex: number, endIndex: number): void;
210
- }