@syncfusion/ej2-pdf-data-extract 30.1.41 → 30.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ej2-pdf-data-extract.umd.min.js +1 -2
- package/dist/global/ej2-pdf-data-extract.min.js +1 -2
- package/dist/global/index.d.ts +1 -2
- package/package.json +14 -46
- package/dist/ts/index.d.ts +0 -20
- package/dist/ts/index.ts +0 -20
- package/dist/ts/pdf-data-extract/core/content-parser-helper.d.ts +0 -62
- package/dist/ts/pdf-data-extract/core/content-parser-helper.ts +0 -640
- package/dist/ts/pdf-data-extract/core/enum.d.ts +0 -6
- package/dist/ts/pdf-data-extract/core/enum.ts +0 -6
- package/dist/ts/pdf-data-extract/core/graphic-state.d.ts +0 -33
- package/dist/ts/pdf-data-extract/core/graphic-state.ts +0 -106
- package/dist/ts/pdf-data-extract/core/pdf-data-extractor.d.ts +0 -210
- package/dist/ts/pdf-data-extract/core/pdf-data-extractor.ts +0 -977
- package/dist/ts/pdf-data-extract/core/pdf-text-parser.d.ts +0 -67
- package/dist/ts/pdf-data-extract/core/pdf-text-parser.ts +0 -495
- package/dist/ts/pdf-data-extract/core/redaction/index.d.ts +0 -4
- package/dist/ts/pdf-data-extract/core/redaction/index.ts +0 -4
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.d.ts +0 -55
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-processor.ts +0 -592
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.d.ts +0 -281
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redaction-region.ts +0 -342
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.d.ts +0 -129
- package/dist/ts/pdf-data-extract/core/redaction/pdf-redactor.ts +0 -322
- package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.d.ts +0 -12
- package/dist/ts/pdf-data-extract/core/redaction/text-glyph-mapper.ts +0 -153
- package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.d.ts +0 -24
- package/dist/ts/pdf-data-extract/core/text-extraction/binary-cmap-reader.ts +0 -281
- package/dist/ts/pdf-data-extract/core/text-extraction/cmap.d.ts +0 -50
- package/dist/ts/pdf-data-extract/core/text-extraction/cmap.ts +0 -565
- package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.d.ts +0 -191
- package/dist/ts/pdf-data-extract/core/text-extraction/compact-font-parser.ts +0 -1928
- package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.d.ts +0 -102
- package/dist/ts/pdf-data-extract/core/text-extraction/encoding-utils.ts +0 -5780
- package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.d.ts +0 -167
- package/dist/ts/pdf-data-extract/core/text-extraction/font-structure.ts +0 -1842
- package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.d.ts +0 -5
- package/dist/ts/pdf-data-extract/core/text-extraction/font-tables.ts +0 -16
- package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.d.ts +0 -18
- package/dist/ts/pdf-data-extract/core/text-extraction/font-utils.ts +0 -630
- package/dist/ts/pdf-data-extract/core/text-extraction/glyph.d.ts +0 -93
- package/dist/ts/pdf-data-extract/core/text-extraction/glyph.ts +0 -622
- package/dist/ts/pdf-data-extract/core/text-extraction/index.d.ts +0 -10
- package/dist/ts/pdf-data-extract/core/text-extraction/index.ts +0 -10
- package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.d.ts +0 -38
- package/dist/ts/pdf-data-extract/core/text-extraction/matrix-helper.ts +0 -150
- package/dist/ts/pdf-data-extract/core/text-extraction/metrics.d.ts +0 -16
- package/dist/ts/pdf-data-extract/core/text-extraction/metrics.ts +0 -2938
- package/dist/ts/pdf-data-extract/core/text-structure.d.ts +0 -628
- package/dist/ts/pdf-data-extract/core/text-structure.ts +0 -668
- package/dist/ts/pdf-data-extract/core/utils.d.ts +0 -99
- package/dist/ts/pdf-data-extract/core/utils.ts +0 -626
- package/dist/ts/pdf-data-extract/index.d.ts +0 -20
- package/dist/ts/pdf-data-extract/index.ts +0 -20
|
@@ -1,668 +0,0 @@
|
|
|
1
|
-
import { PdfFontStyle } from '@syncfusion/ej2-pdf';
|
|
2
|
-
/**
|
|
3
|
-
* Represents a single line of extracted text from the PDF page.
|
|
4
|
-
* ```typescript
|
|
5
|
-
* // Load an existing PDF document
|
|
6
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
7
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
8
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
9
|
-
* // Extract `TextLine` from the PDF document.
|
|
10
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
11
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
12
|
-
* // Gets the bounds of the text line.
|
|
13
|
-
* let lineBounds: number[] = textLine.bounds;
|
|
14
|
-
* // Gets the single line of extracted text from the PDF page.
|
|
15
|
-
* let line: string = textLine.text;
|
|
16
|
-
* // Gets the page index of the text line extracted.
|
|
17
|
-
* let pageIndex: number = textLine.pageIndex;
|
|
18
|
-
* // Gets the collection of text words extracted from a specified page in a PDF document.
|
|
19
|
-
* let words: TextWord[] = textLine.words;
|
|
20
|
-
* // Gets the name of the font used for a particular line of text.
|
|
21
|
-
* let fontName: string = textLine.fontName;
|
|
22
|
-
* // Gets the font style used for a particular line of text.
|
|
23
|
-
* let fontStyle: PdfFontStyle = textLine.fontStyle;
|
|
24
|
-
* // Gets the font size used for a particular line of text.
|
|
25
|
-
* let fontSize: number = textLine.fontSize;
|
|
26
|
-
* });
|
|
27
|
-
* // Save the document
|
|
28
|
-
* document.save('output.pdf');
|
|
29
|
-
* // Destroy the document
|
|
30
|
-
* document.destroy();
|
|
31
|
-
* ```
|
|
32
|
-
*/
|
|
33
|
-
export class TextLine {
|
|
34
|
-
_text: string;
|
|
35
|
-
_wordCollection: TextWord[] = [];
|
|
36
|
-
_fontName: string;
|
|
37
|
-
_fontSize: number;
|
|
38
|
-
_fontStyle: PdfFontStyle;
|
|
39
|
-
_bounds: number[];
|
|
40
|
-
_pageIndex: number;
|
|
41
|
-
/**
|
|
42
|
-
* Gets the single line of extracted text from the PDF page.
|
|
43
|
-
*
|
|
44
|
-
* @returns {string} The single line of extracted text from the PDF page.
|
|
45
|
-
*
|
|
46
|
-
* ```typescript
|
|
47
|
-
* // Load an existing PDF document
|
|
48
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
49
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
50
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
51
|
-
* // Extract `TextLine` from the PDF document.
|
|
52
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
53
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
54
|
-
* // Gets the single line of extracted text from the PDF page.
|
|
55
|
-
* let line: string = textLine.text;
|
|
56
|
-
* });
|
|
57
|
-
* // Save the document
|
|
58
|
-
* document.save('output.pdf');
|
|
59
|
-
* // Destroy the document
|
|
60
|
-
* document.destroy();
|
|
61
|
-
* ```
|
|
62
|
-
*/
|
|
63
|
-
get text(): string {
|
|
64
|
-
return this._text;
|
|
65
|
-
}
|
|
66
|
-
/**
|
|
67
|
-
* Gets the collection of text words extracted from a specified page in a PDF document.
|
|
68
|
-
*
|
|
69
|
-
* @returns {TextWord[]} The collection of text words extracted from a specified page in a PDF document.
|
|
70
|
-
*
|
|
71
|
-
* ```typescript
|
|
72
|
-
* // Load an existing PDF document
|
|
73
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
74
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
75
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
76
|
-
* // Extract `TextLine` from the PDF document.
|
|
77
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
78
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
79
|
-
* // Gets the collection of text words extracted from a specified page in a PDF document.
|
|
80
|
-
* let words: TextWord[] = textLine.words;
|
|
81
|
-
* });
|
|
82
|
-
* // Save the document
|
|
83
|
-
* document.save('output.pdf');
|
|
84
|
-
* // Destroy the document
|
|
85
|
-
* document.destroy();
|
|
86
|
-
* ```
|
|
87
|
-
*/
|
|
88
|
-
get words(): TextWord[] {
|
|
89
|
-
return this._wordCollection;
|
|
90
|
-
}
|
|
91
|
-
/**
|
|
92
|
-
* Gets the name of the font used for a particular line of text.
|
|
93
|
-
*
|
|
94
|
-
* @returns {string} The name of the font used for a particular line of text.
|
|
95
|
-
*
|
|
96
|
-
* ```typescript
|
|
97
|
-
* // Load an existing PDF document
|
|
98
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
99
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
100
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
101
|
-
* // Extract `TextLine` from the PDF document.
|
|
102
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
103
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
104
|
-
* // Gets the name of the font used for a particular line of text.
|
|
105
|
-
* let fontName: string = textLine.fontName;
|
|
106
|
-
* // Save the document
|
|
107
|
-
* document.save('output.pdf');
|
|
108
|
-
* // Destroy the document
|
|
109
|
-
* document.destroy();
|
|
110
|
-
* ```
|
|
111
|
-
*/
|
|
112
|
-
get fontName(): string {
|
|
113
|
-
return this._fontName;
|
|
114
|
-
}
|
|
115
|
-
/**
|
|
116
|
-
* Gets the font size used for a particular line of text.
|
|
117
|
-
*
|
|
118
|
-
* @returns {number} The font size used for a particular line of text.
|
|
119
|
-
*
|
|
120
|
-
* ```typescript
|
|
121
|
-
* // Load an existing PDF document
|
|
122
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
123
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
124
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
125
|
-
* // Extract `TextLine` from the PDF document.
|
|
126
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
127
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
128
|
-
* // Gets the font size used for a particular line of text.
|
|
129
|
-
* let fontSize: number = textLine.fontSize;
|
|
130
|
-
* });
|
|
131
|
-
* // Save the document
|
|
132
|
-
* document.save('output.pdf');
|
|
133
|
-
* // Destroy the document
|
|
134
|
-
* document.destroy();
|
|
135
|
-
* ```
|
|
136
|
-
*/
|
|
137
|
-
get fontSize(): number {
|
|
138
|
-
return this._fontSize;
|
|
139
|
-
}
|
|
140
|
-
/**
|
|
141
|
-
* Gets the font style used for a particular line of text.
|
|
142
|
-
*
|
|
143
|
-
* @returns {PdfFontStyle} The font style used for a particular line of text.
|
|
144
|
-
*
|
|
145
|
-
* ```typescript
|
|
146
|
-
* // Load an existing PDF document
|
|
147
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
148
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
149
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
150
|
-
* // Extract `TextLine` from the PDF document.
|
|
151
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
152
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
153
|
-
* // Gets the font style used for a particular line of text.
|
|
154
|
-
* let fontStyle: number = textLine.fontStyle;
|
|
155
|
-
* });
|
|
156
|
-
* // Save the document
|
|
157
|
-
* document.save('output.pdf');
|
|
158
|
-
* // Destroy the document
|
|
159
|
-
* document.destroy();
|
|
160
|
-
* ```
|
|
161
|
-
*/
|
|
162
|
-
get fontStyle(): PdfFontStyle {
|
|
163
|
-
return this._fontStyle;
|
|
164
|
-
}
|
|
165
|
-
/**
|
|
166
|
-
* Gets the bounds of the text line.
|
|
167
|
-
*
|
|
168
|
-
* @returns {number[]} The bounds of the text line.
|
|
169
|
-
*
|
|
170
|
-
* ```typescript
|
|
171
|
-
* // Load an existing PDF document
|
|
172
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
173
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
174
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
175
|
-
* // Extract `TextLine` from the PDF document.
|
|
176
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
177
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
178
|
-
* // Gets the bounds of the text line.
|
|
179
|
-
* let lineBounds: number[] = textLine.bounds;
|
|
180
|
-
* });
|
|
181
|
-
* // Save the document
|
|
182
|
-
* document.save('output.pdf');
|
|
183
|
-
* // Destroy the document
|
|
184
|
-
* document.destroy();
|
|
185
|
-
* ```
|
|
186
|
-
*/
|
|
187
|
-
get bounds(): number[] {
|
|
188
|
-
return this._bounds;
|
|
189
|
-
}
|
|
190
|
-
/**
|
|
191
|
-
* Gets the page index of the text line extracted.
|
|
192
|
-
*
|
|
193
|
-
* @returns {number} Gets the page index of the text line extracted.
|
|
194
|
-
*
|
|
195
|
-
* ```typescript
|
|
196
|
-
* // Load an existing PDF document
|
|
197
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
198
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
199
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
200
|
-
* // Extract `TextLine` from the PDF document.
|
|
201
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
202
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
203
|
-
* // Gets the page index of the text line extracted.
|
|
204
|
-
* let pageIndex: number = textLine.pageIndex;
|
|
205
|
-
* });
|
|
206
|
-
* // Save the document
|
|
207
|
-
* document.save('output.pdf');
|
|
208
|
-
* // Destroy the document
|
|
209
|
-
* document.destroy();
|
|
210
|
-
* ```
|
|
211
|
-
*/
|
|
212
|
-
get pageIndex(): number {
|
|
213
|
-
return this._pageIndex;
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
/**
|
|
217
|
-
* Represents a single word of extracted text from the PDF page.
|
|
218
|
-
* ```typescript
|
|
219
|
-
* // Load an existing PDF document
|
|
220
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
221
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
222
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
223
|
-
* // Extract `TextLine` from the PDF document.
|
|
224
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
225
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
226
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
227
|
-
* // Gets the bounds of the text word.
|
|
228
|
-
* let wordBounds: number[] = textWord.bounds;
|
|
229
|
-
* // Gets the single word of extracted text from the PDF page.
|
|
230
|
-
* let word: string = textWord.text;
|
|
231
|
-
* // Gets the collection of text glyphs extracted from a specified page in a PDF document.
|
|
232
|
-
* let glyphs: TextGlyph[] = textword.glyphs;
|
|
233
|
-
* // Gets the name of the font used for a particular word.
|
|
234
|
-
* let wordFontName: string = textword.fontName;
|
|
235
|
-
* // Gets the style of the font used for a particular word.
|
|
236
|
-
* let wordFontStyle: PdfFontStyle = textword.fontStyle;
|
|
237
|
-
* // Gets the size of the font used for a particular word.
|
|
238
|
-
* let wordFontSize: number = textword.fontSize;
|
|
239
|
-
* });
|
|
240
|
-
* });
|
|
241
|
-
* // Save the document
|
|
242
|
-
* document.save('output.pdf');
|
|
243
|
-
* // Destroy the document
|
|
244
|
-
* document.destroy();
|
|
245
|
-
* ```
|
|
246
|
-
*/
|
|
247
|
-
export class TextWord {
|
|
248
|
-
_text: string;
|
|
249
|
-
_bounds: number[];
|
|
250
|
-
_glyphs: TextGlyph[] = [];
|
|
251
|
-
_fontName: string;
|
|
252
|
-
_fontSize: number;
|
|
253
|
-
_fontStyle: PdfFontStyle;
|
|
254
|
-
_words: string;
|
|
255
|
-
/**
|
|
256
|
-
* Gets the single word of extracted text from the PDF page.
|
|
257
|
-
*
|
|
258
|
-
* @returns {string} The single word of extracted text from the PDF page.
|
|
259
|
-
*
|
|
260
|
-
* ```typescript
|
|
261
|
-
* // Load an existing PDF document
|
|
262
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
263
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
264
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
265
|
-
* // Extract `TextLine` from the PDF document.
|
|
266
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
267
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
268
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
269
|
-
* // Gets the single word of extracted text from the PDF page.
|
|
270
|
-
* let word: string = textWord.text;
|
|
271
|
-
* });
|
|
272
|
-
* });
|
|
273
|
-
* // Save the document
|
|
274
|
-
* document.save('output.pdf');
|
|
275
|
-
* // Destroy the document
|
|
276
|
-
* document.destroy();
|
|
277
|
-
* ```
|
|
278
|
-
*/
|
|
279
|
-
get text(): string {
|
|
280
|
-
return this._text;
|
|
281
|
-
}
|
|
282
|
-
/**
|
|
283
|
-
* Gets the collection of text glyphs extracted from a specified page in a PDF document.
|
|
284
|
-
*
|
|
285
|
-
* @returns {TextGlyph[]} The collection of text glyphs extracted from a specified page in a PDF document.
|
|
286
|
-
*
|
|
287
|
-
* ```typescript
|
|
288
|
-
* // Load an existing PDF document
|
|
289
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
290
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
291
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
292
|
-
* // Extract `TextLine` from the PDF document.
|
|
293
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
294
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
295
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
296
|
-
* // Gets the collection of text glyphs extracted from a specified page in a PDF document.
|
|
297
|
-
* let glyphs: TextGlyph[] = textword.glyphs;
|
|
298
|
-
* });
|
|
299
|
-
* });
|
|
300
|
-
* // Save the document
|
|
301
|
-
* document.save('output.pdf');
|
|
302
|
-
* // Destroy the document
|
|
303
|
-
* document.destroy();
|
|
304
|
-
* ```
|
|
305
|
-
*/
|
|
306
|
-
get glyphs(): TextGlyph[] {
|
|
307
|
-
return this._glyphs;
|
|
308
|
-
}
|
|
309
|
-
/**
|
|
310
|
-
* Gets the name of the font used for a particular word.
|
|
311
|
-
*
|
|
312
|
-
* @returns {string} The name of the font used for a particular word.
|
|
313
|
-
*
|
|
314
|
-
* ```typescript
|
|
315
|
-
* // Load an existing PDF document
|
|
316
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
317
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
318
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
319
|
-
* // Extract `TextLine` from the PDF document.
|
|
320
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
321
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
322
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
323
|
-
* // Gets the name of the font used for a particular word.
|
|
324
|
-
* let wordFontName: string = textword.fontName;
|
|
325
|
-
* });
|
|
326
|
-
* });
|
|
327
|
-
* // Save the document
|
|
328
|
-
* document.save('output.pdf');
|
|
329
|
-
* // Destroy the document
|
|
330
|
-
* document.destroy();
|
|
331
|
-
* ```
|
|
332
|
-
*/
|
|
333
|
-
get fontName(): string {
|
|
334
|
-
return this._fontName;
|
|
335
|
-
}
|
|
336
|
-
/**
|
|
337
|
-
* Gets the size of the font used for a particular word.
|
|
338
|
-
*
|
|
339
|
-
* @returns {number} The size of the font used for a particular word.
|
|
340
|
-
*
|
|
341
|
-
* ```typescript
|
|
342
|
-
* // Load an existing PDF document
|
|
343
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
344
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
345
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
346
|
-
* // Extract `TextLine` from the PDF document.
|
|
347
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
348
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
349
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
350
|
-
* // Gets the size of the font used for a particular word.
|
|
351
|
-
* let wordFontSize: number = textword.fontSize;
|
|
352
|
-
* });
|
|
353
|
-
* });
|
|
354
|
-
* // Save the document
|
|
355
|
-
* document.save('output.pdf');
|
|
356
|
-
* // Destroy the document
|
|
357
|
-
* document.destroy();
|
|
358
|
-
* ```
|
|
359
|
-
*/
|
|
360
|
-
get fontSize(): number {
|
|
361
|
-
return this._fontSize;
|
|
362
|
-
}
|
|
363
|
-
/**
|
|
364
|
-
* Gets the style of the font used for a particular word.
|
|
365
|
-
*
|
|
366
|
-
* @returns {PdfFontStyle} The style of the font used for a particular word.
|
|
367
|
-
*
|
|
368
|
-
* ```typescript
|
|
369
|
-
* // Load an existing PDF document
|
|
370
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
371
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
372
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
373
|
-
* // Extract `TextLine` from the PDF document.
|
|
374
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
375
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
376
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
377
|
-
* // Gets the style of the font used for a particular word.
|
|
378
|
-
* let wordFontStyle: PdfFontStyle = textword.fontStyle;
|
|
379
|
-
* });
|
|
380
|
-
* });
|
|
381
|
-
* // Save the document
|
|
382
|
-
* document.save('output.pdf');
|
|
383
|
-
* // Destroy the document
|
|
384
|
-
* document.destroy();
|
|
385
|
-
* ```
|
|
386
|
-
*/
|
|
387
|
-
get fontStyle(): PdfFontStyle {
|
|
388
|
-
return this._fontStyle;
|
|
389
|
-
}
|
|
390
|
-
/**
|
|
391
|
-
* Gets the bounds of the text word.
|
|
392
|
-
*
|
|
393
|
-
* @returns {number[]} The bounds of the text word.
|
|
394
|
-
*
|
|
395
|
-
* ```typescript
|
|
396
|
-
* // Load an existing PDF document
|
|
397
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
398
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
399
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
400
|
-
* // Extract `TextLine` from the PDF document.
|
|
401
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
402
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
403
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
404
|
-
* // Gets the bounds of the text word.
|
|
405
|
-
* let wordBounds: number[] = textWord.bounds;
|
|
406
|
-
* });
|
|
407
|
-
* });
|
|
408
|
-
* // Save the document
|
|
409
|
-
* document.save('output.pdf');
|
|
410
|
-
* // Destroy the document
|
|
411
|
-
* document.destroy();
|
|
412
|
-
* ```
|
|
413
|
-
*/
|
|
414
|
-
get bounds(): number[] {
|
|
415
|
-
return this._bounds;
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
/**
|
|
419
|
-
* Represents a single glyph of extracted text from the PDF page.
|
|
420
|
-
* ```typescript
|
|
421
|
-
* // Load an existing PDF document
|
|
422
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
423
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
424
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
425
|
-
* // Extract `TextLine` from the PDF document.
|
|
426
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
427
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
428
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
429
|
-
* textWord.glyphs.forEach((textGlyph: TextGlyph) => {
|
|
430
|
-
* // Gets the bounds of the text glyph
|
|
431
|
-
* let glyphBounds: number[] = textGlyph.bounds;
|
|
432
|
-
* // Gets the single character of extracted text from the PDF page.
|
|
433
|
-
* let character: string = textGlyph.text;
|
|
434
|
-
* // Gets the font size used for a particular character of the text.
|
|
435
|
-
* let fontSize: number = textGlyph.fontSize;
|
|
436
|
-
* // Gets the name of the font used for a particular character of the text.
|
|
437
|
-
* let fontName: string = textGlyph.fontName;
|
|
438
|
-
* // Gets the font style used for a particular character of the text.
|
|
439
|
-
* let fontStyle: PdfFontStyle = textGlyph.fontStyle;
|
|
440
|
-
* // Gets the text color of the text glyph.
|
|
441
|
-
* let color: number[] = textGlyph.color;
|
|
442
|
-
* // Gets the value indicating whether the glyph is rotated or not.
|
|
443
|
-
* let isRotated: boolean = textGlyph.isRotated;
|
|
444
|
-
* });
|
|
445
|
-
* });
|
|
446
|
-
* });
|
|
447
|
-
* // Save the document
|
|
448
|
-
* document.save('output.pdf');
|
|
449
|
-
* // Destroy the document
|
|
450
|
-
* document.destroy();
|
|
451
|
-
* ```
|
|
452
|
-
*/
|
|
453
|
-
export class TextGlyph {
|
|
454
|
-
_text: string;
|
|
455
|
-
_width: number;
|
|
456
|
-
_fontName: string;
|
|
457
|
-
_isHex: boolean = false;
|
|
458
|
-
_charSpacing: number;
|
|
459
|
-
_wordSpacing: number;
|
|
460
|
-
_fontSize: number;
|
|
461
|
-
_isReplace: boolean = false;
|
|
462
|
-
_fontStyle: PdfFontStyle;
|
|
463
|
-
_bounds: number[];
|
|
464
|
-
_color: number[];
|
|
465
|
-
_isRotated: boolean;
|
|
466
|
-
/**
|
|
467
|
-
* Gets the single character of extracted text from the PDF page.
|
|
468
|
-
*
|
|
469
|
-
* @returns {string} The single character of extracted text from the PDF page.
|
|
470
|
-
*
|
|
471
|
-
* ```typescript
|
|
472
|
-
* // Load an existing PDF document
|
|
473
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
474
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
475
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
476
|
-
* // Extract `TextLine` from the PDF document.
|
|
477
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
478
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
479
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
480
|
-
* textWord.glyphs.forEach((textGlyph: TextGlyph) => {
|
|
481
|
-
* // Gets the single character of extracted text from the PDF page.
|
|
482
|
-
* let character: string = textGlyph.text;
|
|
483
|
-
* });
|
|
484
|
-
* });
|
|
485
|
-
* });
|
|
486
|
-
* // Save the document
|
|
487
|
-
* document.save('output.pdf');
|
|
488
|
-
* // Destroy the document
|
|
489
|
-
* document.destroy();
|
|
490
|
-
* ```
|
|
491
|
-
*/
|
|
492
|
-
get text(): string {
|
|
493
|
-
return this._text;
|
|
494
|
-
}
|
|
495
|
-
/**
|
|
496
|
-
* Gets the name of the font used for a particular character of the text.
|
|
497
|
-
*
|
|
498
|
-
* @returns {string} The name of the font used for a particular character of the text.
|
|
499
|
-
*
|
|
500
|
-
* ```typescript
|
|
501
|
-
* // Load an existing PDF document
|
|
502
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
503
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
504
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
505
|
-
* // Extract `TextLine` from the PDF document.
|
|
506
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
507
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
508
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
509
|
-
* textWord.glyphs.forEach((textGlyph: TextGlyph) => {
|
|
510
|
-
* // Gets the name of the font used for a particular character of the text.
|
|
511
|
-
* let fontName: string = textGlyph.fontName;
|
|
512
|
-
* });
|
|
513
|
-
* });
|
|
514
|
-
* });
|
|
515
|
-
* // Save the document
|
|
516
|
-
* document.save('output.pdf');
|
|
517
|
-
* // Destroy the document
|
|
518
|
-
* document.destroy();
|
|
519
|
-
* ```
|
|
520
|
-
*/
|
|
521
|
-
get fontName(): string {
|
|
522
|
-
return this._fontName;
|
|
523
|
-
}
|
|
524
|
-
/**
|
|
525
|
-
* Gets the font size used for a particular character of the text.
|
|
526
|
-
*
|
|
527
|
-
* @returns {number} The font size used for a particular character of the text.
|
|
528
|
-
*
|
|
529
|
-
* ```typescript
|
|
530
|
-
* // Load an existing PDF document
|
|
531
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
532
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
533
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
534
|
-
* // Extract `TextLine` from the PDF document.
|
|
535
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
536
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
537
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
538
|
-
* textWord.glyphs.forEach((textGlyph: TextGlyph) => {
|
|
539
|
-
* // Gets the font size used for a particular character of the text.
|
|
540
|
-
* let fontSize: number = textGlyph.fontSize;
|
|
541
|
-
* });
|
|
542
|
-
* });
|
|
543
|
-
* });
|
|
544
|
-
* // Save the document
|
|
545
|
-
* document.save('output.pdf');
|
|
546
|
-
* // Destroy the document
|
|
547
|
-
* document.destroy();
|
|
548
|
-
* ```
|
|
549
|
-
*/
|
|
550
|
-
get fontSize(): number {
|
|
551
|
-
return this._fontSize;
|
|
552
|
-
}
|
|
553
|
-
/**
|
|
554
|
-
* Gets the font style used for a particular character of the text.
|
|
555
|
-
*
|
|
556
|
-
* @returns {PdfFontStyle} The font style used for a particular character of the text.
|
|
557
|
-
*
|
|
558
|
-
* ```typescript
|
|
559
|
-
* // Load an existing PDF document
|
|
560
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
561
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
562
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
563
|
-
* // Extract `TextLine` from the PDF document.
|
|
564
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
565
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
566
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
567
|
-
* // Gets the font style used for a particular character of the text.
|
|
568
|
-
* let fontStyle: PdfFontStyle = textGlyph.fontStyle;
|
|
569
|
-
* });
|
|
570
|
-
* });
|
|
571
|
-
* });
|
|
572
|
-
* // Save the document
|
|
573
|
-
* document.save('output.pdf');
|
|
574
|
-
* // Destroy the document
|
|
575
|
-
* document.destroy();
|
|
576
|
-
* ```
|
|
577
|
-
*/
|
|
578
|
-
get fontStyle(): PdfFontStyle {
|
|
579
|
-
return this._fontStyle;
|
|
580
|
-
}
|
|
581
|
-
/**
|
|
582
|
-
* Gets the bounds of the text glyph.
|
|
583
|
-
*
|
|
584
|
-
* @returns {string} The bounds of the text glyph.
|
|
585
|
-
*
|
|
586
|
-
* ```typescript
|
|
587
|
-
* // Load an existing PDF document
|
|
588
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
589
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
590
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
591
|
-
* // Extract `TextLine` from the PDF document.
|
|
592
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
593
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
594
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
595
|
-
* textWord.glyphs.forEach((textGlyph: TextGlyph) => {
|
|
596
|
-
* // Gets the bounds of the text glyph
|
|
597
|
-
* let glyphBounds: number[] = textGlyph.bounds;
|
|
598
|
-
* });
|
|
599
|
-
* });
|
|
600
|
-
* });
|
|
601
|
-
* // Save the document
|
|
602
|
-
* document.save('output.pdf');
|
|
603
|
-
* // Destroy the document
|
|
604
|
-
* document.destroy();
|
|
605
|
-
* ```
|
|
606
|
-
*/
|
|
607
|
-
get bounds(): number[] {
|
|
608
|
-
return this._bounds;
|
|
609
|
-
}
|
|
610
|
-
/**
|
|
611
|
-
* Gets the color of the text glyph.
|
|
612
|
-
*
|
|
613
|
-
* @returns {number[]} Text color as an array of numbers.
|
|
614
|
-
*
|
|
615
|
-
* ```typescript
|
|
616
|
-
* // Load an existing PDF document
|
|
617
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
618
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
619
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
620
|
-
* // Extract `TextLine` from the PDF document.
|
|
621
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
622
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
623
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
624
|
-
* textWord.glyphs.forEach((textGlyph: TextGlyph) => {
|
|
625
|
-
* // Gets the text color of the text glyph.
|
|
626
|
-
* let color: number[] = textGlyph.color;
|
|
627
|
-
* });
|
|
628
|
-
* });
|
|
629
|
-
* });
|
|
630
|
-
* // Save the document
|
|
631
|
-
* document.save('output.pdf');
|
|
632
|
-
* // Destroy the document
|
|
633
|
-
* document.destroy();
|
|
634
|
-
* ```
|
|
635
|
-
*/
|
|
636
|
-
get color(): number[] {
|
|
637
|
-
return this._color;
|
|
638
|
-
}
|
|
639
|
-
/**
|
|
640
|
-
* Gets the value indicating whether the glyph is rotated or not.
|
|
641
|
-
*
|
|
642
|
-
* @returns {boolean} The rotated value of text glyph.
|
|
643
|
-
*
|
|
644
|
-
* ```typescript
|
|
645
|
-
* // Load an existing PDF document
|
|
646
|
-
* let document: PdfDocument = new PdfDocument(data);
|
|
647
|
-
* // Initialize a new instance of the `PdfDataExtractor` class
|
|
648
|
-
* let extractor: PdfDataExtractor = new PdfDataExtractor(document);
|
|
649
|
-
* // Extract `TextLine` from the PDF document.
|
|
650
|
-
* let textLines: Array<TextLine> = extractor.extractTextLines({ startPageIndex: 0, endPageIndex: document.pageCount-1});
|
|
651
|
-
* textLines.forEach((textLine: TextLine) => {
|
|
652
|
-
* textLine.words.forEach((textWord: TextWord) => {
|
|
653
|
-
* textWord.glyphs.forEach((textGlyph: TextGlyph) => {
|
|
654
|
-
* // Gets the value indicating whether the glyph is rotated or not.
|
|
655
|
-
* let isRotated: boolean = textGlyph.isRotated;
|
|
656
|
-
* });
|
|
657
|
-
* });
|
|
658
|
-
* });
|
|
659
|
-
* // Save the document
|
|
660
|
-
* document.save('output.pdf');
|
|
661
|
-
* // Destroy the document
|
|
662
|
-
* document.destroy();
|
|
663
|
-
* ```
|
|
664
|
-
*/
|
|
665
|
-
get isRotated(): boolean {
|
|
666
|
-
return this._isRotated;
|
|
667
|
-
}
|
|
668
|
-
}
|