@syncfusion/ej2-pdf-data-extract 30.1.42 → 30.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ej2-pdf-data-extract.umd.min.js +1 -2
- package/dist/global/ej2-pdf-data-extract.min.js +1 -2
- package/dist/global/index.d.ts +1 -2
- package/package.json +14 -46
- package/dist/ts/index.d.ts +0 -20
- package/dist/ts/index.ts +0 -20
- package/dist/ts/pdf-data-extract/core/content-parser-helper.d.ts +0 -62
- package/dist/ts/pdf-data-extract/core/content-parser-helper.ts +0 -640
- package/dist/ts/pdf-data-extract/core/enum.d.ts +0 -6
- package/dist/ts/pdf-data-extract/core/enum.ts +0 -6
- package/dist/ts/pdf-data-extract/core/graphic-state.d.ts +0 -33
- package/dist/ts/pdf-data-extract/core/graphic-state.ts +0 -106
- package/dist/ts/pdf-data-extract/core/pdf-data-extractor.d.ts +0 -210
- package/dist/ts/pdf-data-extract/core/pdf-data-extractor.ts +0 -977
- package/dist/ts/pdf-data-extract/core/pdf-text-parser.d.ts +0 -67
- package/dist/ts/pdf-data-extract/core/pdf-text-parser.ts +0 -495
- package/dist/ts/pdf-data-extract/core/redaction/index.d.ts +0 -4
- package/dist/ts/pdf-data-extract/core/redaction/index.ts +0 -4
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.d.ts +0 -55
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.ts +0 -592
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.d.ts +0 -281
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.ts +0 -342
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.d.ts +0 -129
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.ts +0 -322
- package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.d.ts +0 -12
- package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.ts +0 -153
- package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.d.ts +0 -24
- package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.ts +0 -281
- package/dist/ts/pdf-data-extract/core/text-extraction/cmap.d.ts +0 -50
- package/dist/ts/pdf-data-extract/core/text-extraction/cmap.ts +0 -565
- package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.d.ts +0 -191
- package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.ts +0 -1928
- package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.d.ts +0 -102
- package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.ts +0 -5780
- package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.d.ts +0 -167
- package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.ts +0 -1842
- package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.d.ts +0 -5
- package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.ts +0 -16
- package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.d.ts +0 -18
- package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.ts +0 -630
- package/dist/ts/pdf-data-extract/core/text-extraction/glyph.d.ts +0 -93
- package/dist/ts/pdf-data-extract/core/text-extraction/glyph.ts +0 -622
- package/dist/ts/pdf-data-extract/core/text-extraction/index.d.ts +0 -10
- package/dist/ts/pdf-data-extract/core/text-extraction/index.ts +0 -10
- package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.d.ts +0 -38
- package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.ts +0 -150
- package/dist/ts/pdf-data-extract/core/text-extraction/metrics.d.ts +0 -16
- package/dist/ts/pdf-data-extract/core/text-extraction/metrics.ts +0 -2938
- package/dist/ts/pdf-data-extract/core/text-structure.d.ts +0 -628
- package/dist/ts/pdf-data-extract/core/text-structure.ts +0 -668
- package/dist/ts/pdf-data-extract/core/utils.d.ts +0 -99
- package/dist/ts/pdf-data-extract/core/utils.ts +0 -626
- package/dist/ts/pdf-data-extract/index.d.ts +0 -20
- package/dist/ts/pdf-data-extract/index.ts +0 -20
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
export declare class _TextState {
|
|
2
|
-
_ctm: number[];
|
|
3
|
-
_fontName: string;
|
|
4
|
-
_fontSize: number;
|
|
5
|
-
_font: any;
|
|
6
|
-
_fontMatrix: number[];
|
|
7
|
-
_textMatrix: number[];
|
|
8
|
-
_textLineMatrix: number[];
|
|
9
|
-
_charSpacing: number;
|
|
10
|
-
_wordSpacing: number;
|
|
11
|
-
_leading: number;
|
|
12
|
-
_textHScale: number;
|
|
13
|
-
_textRise: number;
|
|
14
|
-
_identityMatrix: number[];
|
|
15
|
-
_fontIdentityMatrix: number[];
|
|
16
|
-
_textColor: number[];
|
|
17
|
-
constructor();
|
|
18
|
-
_setTextMatrix(a: number, b: number, c: number, d: number, e: number, f: number): void;
|
|
19
|
-
_setTextLineMatrix(a: number, b: number, c: number, d: number, e: number, f: number): void;
|
|
20
|
-
_translateTextMatrix(x: number, y: number): void;
|
|
21
|
-
_translateTextLineMatrix(x: number, y: number): void;
|
|
22
|
-
_carriageReturn(): void;
|
|
23
|
-
_clone(): any;
|
|
24
|
-
}
|
|
25
|
-
export declare class _GraphicState {
|
|
26
|
-
_state: _TextState;
|
|
27
|
-
_stateStack: any;
|
|
28
|
-
constructor(currentState?: _TextState);
|
|
29
|
-
_save(): void;
|
|
30
|
-
_restore(): void;
|
|
31
|
-
_transform(args: number[]): void;
|
|
32
|
-
_transformMatrix(m1: number[], m2: number[]): number[];
|
|
33
|
-
}
|
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
export class _TextState {
|
|
2
|
-
_ctm: number[];
|
|
3
|
-
_fontName: string;
|
|
4
|
-
_fontSize: number;
|
|
5
|
-
_font: any; //eslint-disable-line
|
|
6
|
-
_fontMatrix: number[];
|
|
7
|
-
_textMatrix: number[];
|
|
8
|
-
_textLineMatrix: number[];
|
|
9
|
-
_charSpacing: number;
|
|
10
|
-
_wordSpacing: number;
|
|
11
|
-
_leading: number;
|
|
12
|
-
_textHScale: number;
|
|
13
|
-
_textRise: number;
|
|
14
|
-
_identityMatrix: number[] = [1, 0, 0, 1, 0, 0];
|
|
15
|
-
_fontIdentityMatrix: number[] = [0.001, 0, 0, 0.001, 0, 0];
|
|
16
|
-
_textColor: number[] = [];
|
|
17
|
-
constructor() {
|
|
18
|
-
this._ctm = this._identityMatrix;
|
|
19
|
-
this._fontName = null;
|
|
20
|
-
this._fontSize = 0;
|
|
21
|
-
this._font = null;
|
|
22
|
-
this._fontMatrix = this._fontIdentityMatrix;
|
|
23
|
-
this._textMatrix = this._identityMatrix.slice();
|
|
24
|
-
this._textLineMatrix = this._identityMatrix.slice();
|
|
25
|
-
this._charSpacing = 0;
|
|
26
|
-
this._wordSpacing = 0;
|
|
27
|
-
this._leading = 0;
|
|
28
|
-
this._textHScale = 1;
|
|
29
|
-
this._textRise = 0;
|
|
30
|
-
}
|
|
31
|
-
_setTextMatrix(a: number, b: number, c: number, d: number, e: number, f: number): void {
|
|
32
|
-
const matrix: number[] = this._textMatrix;
|
|
33
|
-
matrix[0] = a;
|
|
34
|
-
matrix[1] = b;
|
|
35
|
-
matrix[2] = c;
|
|
36
|
-
matrix[3] = d;
|
|
37
|
-
matrix[4] = e;
|
|
38
|
-
matrix[5] = f;
|
|
39
|
-
}
|
|
40
|
-
_setTextLineMatrix(a: number, b: number, c: number, d: number, e: number, f: number): void {
|
|
41
|
-
const matrix: number[] = this._textLineMatrix;
|
|
42
|
-
matrix[0] = a;
|
|
43
|
-
matrix[1] = b;
|
|
44
|
-
matrix[2] = c;
|
|
45
|
-
matrix[3] = d;
|
|
46
|
-
matrix[4] = e;
|
|
47
|
-
matrix[5] = f;
|
|
48
|
-
}
|
|
49
|
-
_translateTextMatrix(x: number, y: number): void {
|
|
50
|
-
const matrix: number[] = this._textMatrix;
|
|
51
|
-
matrix[4] = matrix[0] * x + matrix[2] * y + matrix[4];
|
|
52
|
-
matrix[5] = matrix[1] * x + matrix[3] * y + matrix[5];
|
|
53
|
-
}
|
|
54
|
-
_translateTextLineMatrix(x: number, y: number): void {
|
|
55
|
-
const matrix: number[] = this._textLineMatrix;
|
|
56
|
-
matrix[4] = matrix[0] * x + matrix[2] * y + matrix[4];
|
|
57
|
-
matrix[5] = matrix[1] * x + matrix[3] * y + matrix[5];
|
|
58
|
-
}
|
|
59
|
-
_carriageReturn(): void {
|
|
60
|
-
this._translateTextLineMatrix(0, -this._leading);
|
|
61
|
-
this._textMatrix = this._textLineMatrix.slice();
|
|
62
|
-
}
|
|
63
|
-
_clone(): any { //eslint-disable-line
|
|
64
|
-
const clone: any = Object.create(this); //eslint-disable-line
|
|
65
|
-
clone._textMatrix = this._textMatrix.slice();
|
|
66
|
-
clone._textLineMatrix = this._textLineMatrix.slice();
|
|
67
|
-
clone._fontMatrix = this._fontMatrix.slice();
|
|
68
|
-
return clone;
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
export class _GraphicState {
|
|
72
|
-
_state: _TextState ;
|
|
73
|
-
_stateStack: any; //eslint-disable-line
|
|
74
|
-
constructor(currentState?: _TextState) {
|
|
75
|
-
if (!currentState) {
|
|
76
|
-
this._state = new _TextState();
|
|
77
|
-
} else {
|
|
78
|
-
this._state = currentState;
|
|
79
|
-
}
|
|
80
|
-
this._stateStack = [];
|
|
81
|
-
}
|
|
82
|
-
_save(): void {
|
|
83
|
-
const oldState: _TextState = this._state;
|
|
84
|
-
this._stateStack.push(this._state);
|
|
85
|
-
this._state = oldState._clone();
|
|
86
|
-
}
|
|
87
|
-
_restore(): void {
|
|
88
|
-
const prev: _TextState = this._stateStack.pop();
|
|
89
|
-
if (prev) {
|
|
90
|
-
this._state = prev;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
_transform(args: number[]): void {
|
|
94
|
-
this._state._ctm = this._transformMatrix(this._state._ctm, args);
|
|
95
|
-
}
|
|
96
|
-
_transformMatrix(m1: number[], m2: number[]): number[] {
|
|
97
|
-
return [
|
|
98
|
-
m1[0] * m2[0] + m1[2] * m2[1],
|
|
99
|
-
m1[1] * m2[0] + m1[3] * m2[1],
|
|
100
|
-
m1[0] * m2[2] + m1[2] * m2[3],
|
|
101
|
-
m1[1] * m2[2] + m1[3] * m2[3],
|
|
102
|
-
m1[0] * m2[4] + m1[2] * m2[5] + m1[4],
|
|
103
|
-
m1[1] * m2[4] + m1[3] * m2[5] + m1[5]
|
|
104
|
-
];
|
|
105
|
-
}
|
|
106
|
-
}
|
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
import { _MatrixHelper, _TransformationStack } from './text-extraction/matrix-helper';
|
|
2
|
-
import { TextGlyph, TextLine, TextWord } from './text-structure';
|
|
3
|
-
import { _PdfContentParserHelper } from './content-parser-helper';
|
|
4
|
-
import { _GraphicState } from './graphic-state';
|
|
5
|
-
import { _FontStructure } from './text-extraction';
|
|
6
|
-
import { _PdfCrossReference, _PdfRecord, PdfDocument, PdfFontStyle, PdfPage } from '@syncfusion/ej2-pdf';
|
|
7
|
-
import { _PdfTextParser } from './pdf-text-parser';
|
|
8
|
-
/**
|
|
9
|
-
* Represents a utility for extracting data from a PDF document.
|
|
10
|
-
* ```typescript
|
|
11
|
-
* // Load an existing PDF document
|
|
12
|
-
* let document: PdfDocument = new PdfDocument(data, password);
|
|
13
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
14
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
15
|
-
* // Extract `TextLine` from the PDF document.
|
|
16
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
17
|
-
* // Save the document
|
|
18
|
-
* document.save('output.pdf');
|
|
19
|
-
* // Destroy the document
|
|
20
|
-
* document.destroy();
|
|
21
|
-
* ```
|
|
22
|
-
*/
|
|
23
|
-
export declare class PdfDataExtractor {
|
|
24
|
-
_document: PdfDocument;
|
|
25
|
-
_rotation: number;
|
|
26
|
-
_textMatrix: _MatrixHelper;
|
|
27
|
-
_textLeading: number;
|
|
28
|
-
_textColor: number[];
|
|
29
|
-
_textLineMatrix: _MatrixHelper;
|
|
30
|
-
_extractedText: string;
|
|
31
|
-
_hasLeading: boolean;
|
|
32
|
-
_hasNoSpacing: boolean;
|
|
33
|
-
_textLines: TextLine[];
|
|
34
|
-
_transformations: _TransformationStack;
|
|
35
|
-
_identityMatrix: number[];
|
|
36
|
-
_currentLocation: number[];
|
|
37
|
-
_currentFont: string;
|
|
38
|
-
_tempBoundingRectangle: {
|
|
39
|
-
x: number;
|
|
40
|
-
y: number;
|
|
41
|
-
width: number;
|
|
42
|
-
height: number;
|
|
43
|
-
};
|
|
44
|
-
_boundingRectangle: {
|
|
45
|
-
x: number;
|
|
46
|
-
y: number;
|
|
47
|
-
width: number;
|
|
48
|
-
height: number;
|
|
49
|
-
};
|
|
50
|
-
_previousRect: {
|
|
51
|
-
x: number;
|
|
52
|
-
y: number;
|
|
53
|
-
width: number;
|
|
54
|
-
height: number;
|
|
55
|
-
};
|
|
56
|
-
_fontSize: number;
|
|
57
|
-
_textHorizontalScaling: number;
|
|
58
|
-
_previousTextMatrix: _MatrixHelper;
|
|
59
|
-
_previousFontSize: number;
|
|
60
|
-
_previousExtractText: string;
|
|
61
|
-
_arise: number;
|
|
62
|
-
_isTextMatrix: boolean;
|
|
63
|
-
_currentTextMatrix: _MatrixHelper;
|
|
64
|
-
_text: string;
|
|
65
|
-
_hasTj: boolean;
|
|
66
|
-
_hasTm: boolean;
|
|
67
|
-
_hasET: boolean;
|
|
68
|
-
_characterSpacing: number;
|
|
69
|
-
_wordSpacing: number;
|
|
70
|
-
_hasBeginMarkedContent: boolean;
|
|
71
|
-
_differenceX: number;
|
|
72
|
-
_textScale: number;
|
|
73
|
-
_textRise: number;
|
|
74
|
-
_width: number;
|
|
75
|
-
_height: number;
|
|
76
|
-
_crossReference: _PdfCrossReference;
|
|
77
|
-
_resultantText: string;
|
|
78
|
-
_currentExtractedText: string;
|
|
79
|
-
_initialTransForm: _MatrixHelper;
|
|
80
|
-
_textGlyph: TextGlyph[];
|
|
81
|
-
_textWord: TextWord[];
|
|
82
|
-
_textLine: TextLine[];
|
|
83
|
-
_textExtraction: string[];
|
|
84
|
-
_fontCollection: Map<string, _FontStructure>;
|
|
85
|
-
_ctm: _MatrixHelper;
|
|
86
|
-
_objects: _MatrixHelper[];
|
|
87
|
-
_isLayout: boolean;
|
|
88
|
-
_isRotatePage: boolean;
|
|
89
|
-
_isExtractTextLines: boolean;
|
|
90
|
-
_contentParser: _PdfContentParserHelper;
|
|
91
|
-
_parser: _PdfTextParser;
|
|
92
|
-
/**
|
|
93
|
-
* Initialize a new instance of the `PdfDataExtractor` class
|
|
94
|
-
*
|
|
95
|
-
* @param {PdfDocument} document PDF document
|
|
96
|
-
* ```typescript
|
|
97
|
-
* // Load an existing PDF document
|
|
98
|
-
* let document: PdfDocument = new PdfDocument(data1);
|
|
99
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
100
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
101
|
-
* // Extracts text from the PDF Page based on its layout
|
|
102
|
-
* let text: string = extractor.extractText({isLayout: true});
|
|
103
|
-
* // Save the output PDF
|
|
104
|
-
* document.save(‘Output.pdf’);
|
|
105
|
-
* // Destroy the documents
|
|
106
|
-
* document.destroy();
|
|
107
|
-
* ```
|
|
108
|
-
*/
|
|
109
|
-
constructor(document: PdfDocument);
|
|
110
|
-
/**
|
|
111
|
-
* Extract text from the PDF document
|
|
112
|
-
*
|
|
113
|
-
* @returns {string} The extracted text
|
|
114
|
-
*
|
|
115
|
-
* ```typescript
|
|
116
|
-
* // Load an existing PDF document
|
|
117
|
-
* let document: PdfDocument = new PdfDocument(data1);
|
|
118
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
119
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
120
|
-
* // Extract text content from the PDF document.
|
|
121
|
-
* let text: string = extractor.extractText();
|
|
122
|
-
* // Save the output PDF
|
|
123
|
-
* document.save(‘Output.pdf’);
|
|
124
|
-
* // Destroy the documents
|
|
125
|
-
* document.destroy();
|
|
126
|
-
* ```
|
|
127
|
-
*/
|
|
128
|
-
extractText(): string;
|
|
129
|
-
/**
|
|
130
|
-
* Extract text from the page ranges specified by start and end page number
|
|
131
|
-
*
|
|
132
|
-
* @param {object} options Options to specify the page range to be selected and to extract the text.
|
|
133
|
-
* @returns {string} The extracted text
|
|
134
|
-
*
|
|
135
|
-
* ```typescript
|
|
136
|
-
* // Load an existing PDF document
|
|
137
|
-
* let document: PdfDocument = new PdfDocument(data1);
|
|
138
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
139
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
140
|
-
* // Extract text content from the PDF document.
|
|
141
|
-
* let text: string = extractor.extractText({ startPageIndex: 0, endPageIndex: document.pageCount - 1 });
|
|
142
|
-
* // Save the output PDF
|
|
143
|
-
* document.save(‘Output.pdf’);
|
|
144
|
-
* // Destroy the documents
|
|
145
|
-
* document.destroy();
|
|
146
|
-
* ```
|
|
147
|
-
*/
|
|
148
|
-
extractText(options: {
|
|
149
|
-
isLayout?: boolean;
|
|
150
|
-
startPageIndex?: number;
|
|
151
|
-
endPageIndex?: number;
|
|
152
|
-
}): string;
|
|
153
|
-
_renderTextAsLayOut(recordCollection: _PdfRecord[], page: PdfPage, fontCollection: Map<string, _FontStructure>, xObjectCollection: Map<string, any>): any;
|
|
154
|
-
_renderText(page: PdfPage, fontCollection: Map<string, _FontStructure>, xObjectCollection: Map<string, any>, graphicState: _GraphicState): any;
|
|
155
|
-
_setTextLeading(textLeading: number): void;
|
|
156
|
-
_moveToNextLine(tx: number, ty: number, textLineMatrix: _MatrixHelper): void;
|
|
157
|
-
_updateTextMatrix(tj: number): _MatrixHelper;
|
|
158
|
-
_updateTextLineMatrix(char: string, width: number): void;
|
|
159
|
-
_renderTextElementFromTJ(elements: string[], page: PdfPage, fontCollection: Map<string, _FontStructure>): string;
|
|
160
|
-
_getTextHeight(font: _FontStructure, textMatrix: _MatrixHelper): number;
|
|
161
|
-
_transform(m1: number[], m2: number[]): number[];
|
|
162
|
-
_buildTextContentStream(elements: string[], page: PdfPage, fontCollection: Map<string, _FontStructure>): void;
|
|
163
|
-
_getTextWidth(text: string, extraSpacing: number, currentFont: _FontStructure, page: PdfPage, tempString: string): string;
|
|
164
|
-
_splitWords(glyph: string, tempString: string, fontName: string, fontStyle: PdfFontStyle, page: PdfPage, rotation?: number, textColor?: number[]): string;
|
|
165
|
-
_getTextRenderingMatrix(): _MatrixHelper;
|
|
166
|
-
_renderFont(fontElements: string[]): void;
|
|
167
|
-
/**
|
|
168
|
-
* Extract `TextLine` collection from the PDF document.
|
|
169
|
-
*
|
|
170
|
-
* @returns {TextLine[]} The extracted textLines
|
|
171
|
-
*
|
|
172
|
-
* ```typescript
|
|
173
|
-
* // Load an existing PDF document
|
|
174
|
-
* let document: PdfDocument = new PdfDocument(data1);
|
|
175
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
176
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
177
|
-
* // Extract `TextLine` from the PDF document.
|
|
178
|
-
* let textCollection: TextLine[] = extractor.extractTextLines();
|
|
179
|
-
* // Save the output PDF
|
|
180
|
-
* document.save(‘Output.pdf’);
|
|
181
|
-
* // Destroy the documents
|
|
182
|
-
* document.destroy();
|
|
183
|
-
* ```
|
|
184
|
-
*/
|
|
185
|
-
extractTextLines(): TextLine[];
|
|
186
|
-
/**
|
|
187
|
-
* Extract `TextLine` from the PDF document.
|
|
188
|
-
*
|
|
189
|
-
* @param {object} options The options to specify the page range to be selected.
|
|
190
|
-
* @returns {TextLine[]} The extracted textLines
|
|
191
|
-
*
|
|
192
|
-
* ```typescript
|
|
193
|
-
* // Load an existing PDF document
|
|
194
|
-
* let document: PdfDocument = new PdfDocument(data1);
|
|
195
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
196
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
197
|
-
* // Extract `TextLine` from the PDF document.
|
|
198
|
-
* let textCollection: TextLine[] = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount - 1});
|
|
199
|
-
* // Save the output PDF
|
|
200
|
-
* document.save(‘Output.pdf’);
|
|
201
|
-
* // Destroy the documents
|
|
202
|
-
* document.destroy();
|
|
203
|
-
* ```
|
|
204
|
-
*/
|
|
205
|
-
extractTextLines(options: {
|
|
206
|
-
startPageIndex?: number;
|
|
207
|
-
endPageIndex?: number;
|
|
208
|
-
}): TextLine[];
|
|
209
|
-
_processPages(startIndex: number, endIndex: number): void;
|
|
210
|
-
}
|