@pdftron/pdfnet-node-samples 9.4.2 → 9.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +18 -18
- package/readme.md +12 -12
- package/samples/AddImageTest/AddImageTest.js +1 -1
- package/samples/AddImageTest/NODEJS/AddImageTest.js +116 -0
- package/samples/AddImageTest/NODEJS/RunTest.sh +2 -0
- package/samples/AdvancedImagingTest/AdvancedImagingTest.js +1 -1
- package/samples/AdvancedImagingTest/NODEJS/AdvancedImagingTest.js +78 -0
- package/samples/AdvancedImagingTest/NODEJS/RunTest.sh +2 -0
- package/samples/AnnotationTest/AnnotationTest.js +1 -1
- package/samples/AnnotationTest/NODEJS/AnnotationTest.js +642 -0
- package/samples/AnnotationTest/NODEJS/RunTest.sh +2 -0
- package/samples/BookmarkTest/BookmarkTest.js +1 -1
- package/samples/BookmarkTest/NODEJS/BookmarkTest.js +220 -0
- package/samples/BookmarkTest/NODEJS/RunTest.sh +2 -0
- package/samples/CAD2PDFTest/CAD2PDFTest.js +79 -79
- package/samples/CAD2PDFTest/NODEJS/CAD2PDFTest.js +80 -0
- package/samples/CAD2PDFTest/NODEJS/RunTest.sh +2 -0
- package/samples/ContentReplacerTest/ContentReplacerTest.js +1 -1
- package/samples/ContentReplacerTest/NODEJS/ContentReplacerTest.js +75 -0
- package/samples/ContentReplacerTest/NODEJS/RunTest.sh +2 -0
- package/samples/ConvertTest/ConvertTest.js +270 -256
- package/samples/ConvertTest/NODEJS/ConvertTest.js +270 -0
- package/samples/ConvertTest/NODEJS/RunTest.sh +2 -0
- package/samples/DataExtractionTest/DataExtractionTest.js +180 -0
- package/samples/DataExtractionTest/NODEJS/DataExtractionTest.js +180 -0
- package/samples/DataExtractionTest/NODEJS/RunTest.sh +2 -0
- package/samples/DigitalSignaturesTest/DigitalSignaturesTest.js +526 -526
- package/samples/DigitalSignaturesTest/NODEJS/DigitalSignaturesTest.js +527 -0
- package/samples/DigitalSignaturesTest/NODEJS/RunTest.sh +2 -0
- package/samples/ElementBuilderTest/ElementBuilderTest.js +1 -1
- package/samples/ElementBuilderTest/NODEJS/ElementBuilderTest.js +514 -0
- package/samples/ElementBuilderTest/NODEJS/RunTest.sh +2 -0
- package/samples/ElementEditTest/ElementEditTest.js +1 -1
- package/samples/ElementEditTest/NODEJS/ElementEditTest.js +111 -0
- package/samples/ElementEditTest/NODEJS/RunTest.sh +2 -0
- package/samples/ElementReaderAdvTest/ElementReaderAdvTest.js +1 -1
- package/samples/ElementReaderAdvTest/NODEJS/ElementReaderAdvTest.js +305 -0
- package/samples/ElementReaderAdvTest/NODEJS/RunTest.sh +2 -0
- package/samples/ElementReaderTest/ElementReaderTest.js +1 -1
- package/samples/ElementReaderTest/NODEJS/ElementReaderTest.js +77 -0
- package/samples/ElementReaderTest/NODEJS/RunTest.sh +2 -0
- package/samples/EncTest/EncTest.js +1 -1
- package/samples/EncTest/NODEJS/EncTest.js +176 -0
- package/samples/EncTest/NODEJS/RunTest.sh +2 -0
- package/samples/FDFTest/FDFTest.js +218 -218
- package/samples/FDFTest/NODEJS/FDFTest.js +219 -0
- package/samples/FDFTest/NODEJS/RunTest.sh +2 -0
- package/samples/HTML2PDFTest/HTML2PDFTest.js +165 -165
- package/samples/HTML2PDFTest/NODEJS/HTML2PDFTest.js +166 -0
- package/samples/HTML2PDFTest/NODEJS/RunTest.sh +2 -0
- package/samples/HighlightsTest/HighlightsTest.js +1 -1
- package/samples/HighlightsTest/NODEJS/HighlightsTest.js +97 -0
- package/samples/HighlightsTest/NODEJS/RunTest.sh +2 -0
- package/samples/ImageExtractTest/ImageExtractTest.js +129 -129
- package/samples/ImageExtractTest/NODEJS/ImageExtractTest.js +130 -0
- package/samples/ImageExtractTest/NODEJS/RunTest.sh +2 -0
- package/samples/ImpositionTest/ImpositionTest.js +86 -86
- package/samples/ImpositionTest/NODEJS/ImpositionTest.js +87 -0
- package/samples/ImpositionTest/NODEJS/RunTest.sh +2 -0
- package/samples/InteractiveFormsTest/InteractiveFormsTest.js +381 -381
- package/samples/InteractiveFormsTest/NODEJS/InteractiveFormsTest.js +382 -0
- package/samples/InteractiveFormsTest/NODEJS/RunTest.sh +2 -0
- package/samples/JBIG2Test/JBIG2Test.js +88 -88
- package/samples/JBIG2Test/NODEJS/JBIG2Test.js +89 -0
- package/samples/JBIG2Test/NODEJS/RunTest.sh +2 -0
- package/samples/LicenseKey/NODEJS/LicenseKey.js +11 -0
- package/samples/LogicalStructureTest/LogicalStructureTest.js +1 -1
- package/samples/LogicalStructureTest/NODEJS/LogicalStructureTest.js +251 -0
- package/samples/LogicalStructureTest/NODEJS/RunTest.sh +2 -0
- package/samples/OCRTest/NODEJS/OCRTest.js +235 -0
- package/samples/OCRTest/NODEJS/RunTest.sh +2 -0
- package/samples/OCRTest/OCRTest.js +234 -234
- package/samples/OfficeTemplateTest/NODEJS/OfficeTemplateTest.js +79 -0
- package/samples/OfficeTemplateTest/NODEJS/RunTest.sh +2 -0
- package/samples/OfficeTemplateTest/OfficeTemplateTest.js +79 -77
- package/samples/OfficeToPDFTest/NODEJS/OfficeToPDFTest.js +125 -0
- package/samples/OfficeToPDFTest/NODEJS/RunTest.sh +2 -0
- package/samples/OfficeToPDFTest/OfficeToPDFTest.js +125 -125
- package/samples/OptimizerTest/NODEJS/OptimizerTest.js +192 -0
- package/samples/OptimizerTest/NODEJS/RunTest.sh +2 -0
- package/samples/OptimizerTest/OptimizerTest.js +191 -191
- package/samples/PDF2HtmlTest/NODEJS/PDF2HtmlTest.js +123 -0
- package/samples/PDF2HtmlTest/NODEJS/RunTest.sh +2 -0
- package/samples/PDF2HtmlTest/PDF2HtmlTest.js +1 -1
- package/samples/PDF2OfficeTest/NODEJS/PDF2OfficeTest.js +158 -0
- package/samples/PDF2OfficeTest/NODEJS/RunTest.sh +2 -0
- package/samples/PDF2OfficeTest/PDF2OfficeTest.js +1 -1
- package/samples/PDFATest/NODEJS/PDFATest.js +85 -0
- package/samples/PDFATest/NODEJS/RunTest.sh +2 -0
- package/samples/PDFATest/PDFATest.js +1 -1
- package/samples/PDFDocMemoryTest/NODEJS/PDFDocMemoryTest.js +85 -0
- package/samples/PDFDocMemoryTest/NODEJS/RunTest.sh +2 -0
- package/samples/PDFDocMemoryTest/PDFDocMemoryTest.js +84 -84
- package/samples/PDFDrawTest/NODEJS/PDFDrawTest.js +306 -0
- package/samples/PDFDrawTest/NODEJS/RunTest.sh +2 -0
- package/samples/PDFDrawTest/PDFDrawTest.js +1 -1
- package/samples/PDFLayersTest/NODEJS/PDFLayersTest.js +295 -0
- package/samples/PDFLayersTest/NODEJS/RunTest.sh +2 -0
- package/samples/PDFLayersTest/PDFLayersTest.js +1 -1
- package/samples/PDFPackageTest/NODEJS/PDFPackageTest.js +112 -0
- package/samples/PDFPackageTest/NODEJS/RunTest.sh +2 -0
- package/samples/PDFPackageTest/PDFPackageTest.js +111 -111
- package/samples/PDFPageTest/NODEJS/PDFPageTest.js +190 -0
- package/samples/PDFPageTest/NODEJS/RunTest.sh +2 -0
- package/samples/PDFPageTest/PDFPageTest.js +1 -1
- package/samples/PDFRedactTest/NODEJS/PDFRedactTest.js +75 -0
- package/samples/PDFRedactTest/NODEJS/RunTest.sh +2 -0
- package/samples/PDFRedactTest/PDFRedactTest.js +1 -1
- package/samples/PageLabelsTest/NODEJS/PageLabelsTest.js +139 -0
- package/samples/PageLabelsTest/NODEJS/RunTest.sh +2 -0
- package/samples/PageLabelsTest/PageLabelsTest.js +138 -138
- package/samples/PatternTest/NODEJS/PatternTest.js +227 -0
- package/samples/PatternTest/NODEJS/RunTest.sh +2 -0
- package/samples/PatternTest/PatternTest.js +226 -226
- package/samples/RectTest/NODEJS/RectTest.js +41 -0
- package/samples/RectTest/NODEJS/RunTest.sh +2 -0
- package/samples/RectTest/RectTest.js +1 -1
- package/samples/SDFTest/NODEJS/RunTest.sh +2 -0
- package/samples/SDFTest/NODEJS/SDFTest.js +88 -0
- package/samples/SDFTest/SDFTest.js +2 -3
- package/samples/StamperTest/NODEJS/RunTest.sh +2 -0
- package/samples/StamperTest/NODEJS/StamperTest.js +256 -0
- package/samples/StamperTest/StamperTest.js +1 -1
- package/samples/TestFiles/financial.pdf +0 -0
- package/samples/TestFiles/formfields-scanned.pdf +0 -0
- package/samples/TestFiles/formfields.pdf +0 -0
- package/samples/TestFiles/table.pdf +0 -0
- package/samples/TestFiles/tiger.svg +378 -0
- package/samples/TextExtractTest/NODEJS/RunTest.sh +2 -0
- package/samples/TextExtractTest/NODEJS/TextExtractTest.js +287 -0
- package/samples/TextExtractTest/TextExtractTest.js +1 -1
- package/samples/TextSearchTest/NODEJS/RunTest.sh +2 -0
- package/samples/TextSearchTest/NODEJS/TextSearchTest.js +122 -0
- package/samples/TextSearchTest/TextSearchTest.js +1 -1
- package/samples/U3DTest/NODEJS/RunTest.sh +2 -0
- package/samples/U3DTest/NODEJS/U3DTest.js +105 -0
- package/samples/U3DTest/U3DTest.js +104 -104
- package/samples/UndoRedoTest/NODEJS/RunTest.sh +2 -0
- package/samples/UndoRedoTest/NODEJS/UndoRedoTest.js +101 -0
- package/samples/UndoRedoTest/UndoRedoTest.js +1 -1
- package/samples/UnicodeWriteTest/NODEJS/RunTest.sh +2 -0
- package/samples/UnicodeWriteTest/NODEJS/UnicodeWriteTest.js +174 -0
- package/samples/UnicodeWriteTest/UnicodeWriteTest.js +1 -1
- package/samples/WebViewerConvertTest/NODEJS/RunTest.sh +2 -0
- package/samples/WebViewerConvertTest/NODEJS/WebViewerConvertTest.js +136 -0
- package/samples/WebViewerConvertTest/WebViewerConvertTest.js +135 -135
- package/samples/runall.bat +12 -12
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
//---------------------------------------------------------------------------------------
|
|
2
|
+
// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
|
|
3
|
+
// Consult legal.txt regarding legal and license information.
|
|
4
|
+
//---------------------------------------------------------------------------------------
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
const { PDFNet } = require('../../../lib/pdfnet.js');
|
|
8
|
+
const PDFTronLicense = require('../../LicenseKey/NODEJS/LicenseKey');
|
|
9
|
+
|
|
10
|
+
((exports) => {
|
|
11
|
+
|
|
12
|
+
exports.runTextExtractTest = async () => {
|
|
13
|
+
// A utility method used to dump all text content in the console window.
|
|
14
|
+
const dumpAllText = async (reader) => {
|
|
15
|
+
let element;
|
|
16
|
+
let bbox;
|
|
17
|
+
let arr;
|
|
18
|
+
while ((element = await reader.next()) !== null) {
|
|
19
|
+
switch (await element.getType()) {
|
|
20
|
+
case PDFNet.Element.Type.e_text_begin:
|
|
21
|
+
console.log('\n--> Text Block Begin');
|
|
22
|
+
break;
|
|
23
|
+
case PDFNet.Element.Type.e_text_end:
|
|
24
|
+
console.log('\n--> Text Block End');
|
|
25
|
+
break;
|
|
26
|
+
case PDFNet.Element.Type.e_text:
|
|
27
|
+
bbox = await element.getBBox();
|
|
28
|
+
console.log('\n--> BBox: ' + bbox.x1.toFixed(2) + ', ' + bbox.y1.toFixed(2) + ', ' + bbox.x2.toFixed(2) + ', ' + bbox.y2.toFixed(2) + '\n');
|
|
29
|
+
arr = await element.getTextString();
|
|
30
|
+
console.log(arr);
|
|
31
|
+
break;
|
|
32
|
+
case PDFNet.Element.Type.e_text_new_line:
|
|
33
|
+
console.log('\n--> New Line');
|
|
34
|
+
break;
|
|
35
|
+
case PDFNet.Element.Type.e_form:
|
|
36
|
+
reader.formBegin();
|
|
37
|
+
await dumpAllText(reader);
|
|
38
|
+
reader.end();
|
|
39
|
+
break;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
// helper method for ReadTextFromRect
|
|
45
|
+
const rectTextSearch = async (reader, pos, srchStr) => {
|
|
46
|
+
let element;
|
|
47
|
+
let arr;
|
|
48
|
+
while ((element = await reader.next()) !== null) {
|
|
49
|
+
let bbox;
|
|
50
|
+
switch (await element.getType()) {
|
|
51
|
+
case PDFNet.Element.Type.e_text:
|
|
52
|
+
bbox = await element.getBBox();
|
|
53
|
+
if (await bbox.intersectRect(bbox, pos)) {
|
|
54
|
+
arr = await element.getTextString();
|
|
55
|
+
srchStr += arr + '\n';
|
|
56
|
+
}
|
|
57
|
+
break;
|
|
58
|
+
case PDFNet.Element.Type.e_text_new_line:
|
|
59
|
+
break;
|
|
60
|
+
case PDFNet.Element.Type.e_form:
|
|
61
|
+
reader.formBegin();
|
|
62
|
+
srchStr += await rectTextSearch(reader, pos, srchStr); // possibly need srchStr = ...
|
|
63
|
+
reader.end();
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return srchStr;
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const readTextFromRect = async (page, pos, reader) => {
|
|
71
|
+
let srchStr = '';
|
|
72
|
+
reader.beginOnPage(page); // uses default parameters.
|
|
73
|
+
srchStr += await rectTextSearch(reader, pos, srchStr);
|
|
74
|
+
reader.end();
|
|
75
|
+
return srchStr;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
const twoDigitHex = function (num) {
|
|
79
|
+
const hexStr = num.toString(16).toUpperCase();
|
|
80
|
+
return ('0' + hexStr).substr(-2);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const printStyle = async (s) => {
|
|
84
|
+
const rgb = await s.getColor();
|
|
85
|
+
const rColorVal = await rgb.get(0);
|
|
86
|
+
const gColorVal = await rgb.get(1);
|
|
87
|
+
const bColorVal = await rgb.get(2);
|
|
88
|
+
const rgbHex = twoDigitHex(rColorVal) + twoDigitHex(gColorVal) + twoDigitHex(bColorVal)
|
|
89
|
+
const fontName = await s.getFontName();
|
|
90
|
+
const fontSize = await s.getFontSize();
|
|
91
|
+
const serifOutput = ((await s.isSerif()) ? ' sans-serif; ' : ' ');
|
|
92
|
+
const returnString = ' style="font-family:' + fontName + '; font-size:' + fontSize + ';' + serifOutput + 'color:#' + rgbHex + ';"';
|
|
93
|
+
return returnString;
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
const main = async () => {
|
|
97
|
+
// eslint-disable-next-line no-unused-vars
|
|
98
|
+
let ret = 0;
|
|
99
|
+
|
|
100
|
+
// Relative path to the folder containing test files.
|
|
101
|
+
const inputPath = '../../TestFiles/';
|
|
102
|
+
const inputFilename = 'newsletter.pdf'; // addimage.pdf, newsletter.pdf
|
|
103
|
+
|
|
104
|
+
const example1Basic = false;
|
|
105
|
+
const example2XML = false;
|
|
106
|
+
const example3Wordlist = false;
|
|
107
|
+
const example4Advanced = true;
|
|
108
|
+
const example5LowLevel = false;
|
|
109
|
+
|
|
110
|
+
try {
|
|
111
|
+
await PDFNet.startDeallocateStack();
|
|
112
|
+
const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + inputFilename);
|
|
113
|
+
doc.initSecurityHandler();
|
|
114
|
+
|
|
115
|
+
const page = await doc.getPage(1);
|
|
116
|
+
|
|
117
|
+
if (page.id === '0') {
|
|
118
|
+
console.log('Page not found.');
|
|
119
|
+
return 1;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const txt = await PDFNet.TextExtractor.create();
|
|
123
|
+
txt.begin(page);
|
|
124
|
+
|
|
125
|
+
let text;
|
|
126
|
+
let line;
|
|
127
|
+
let word;
|
|
128
|
+
|
|
129
|
+
// Example 1. Get all text on the page in a single string.
|
|
130
|
+
// Words will be separated with space or new line characters.
|
|
131
|
+
if (example1Basic) {
|
|
132
|
+
const wordCount = await txt.getWordCount();
|
|
133
|
+
console.log('Word Count: ' + wordCount);
|
|
134
|
+
text = await txt.getAsText();
|
|
135
|
+
console.log('\n\n- GetAsText --------------------------');
|
|
136
|
+
console.log(text);
|
|
137
|
+
console.log('-----------------------------------------------------------');
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Example 2. Get XML logical structure for the page.
|
|
141
|
+
if (example2XML) {
|
|
142
|
+
text = await txt.getAsXML(PDFNet.TextExtractor.XMLOutputFlags.e_words_as_elements | PDFNet.TextExtractor.XMLOutputFlags.e_output_bbox | PDFNet.TextExtractor.XMLOutputFlags.e_output_style_info);
|
|
143
|
+
console.log('\n\n- GetAsXML --------------------------\n' + text);
|
|
144
|
+
console.log('-----------------------------------------------------------');
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Example 3. Extract words one by one.
|
|
148
|
+
if (example3Wordlist) {
|
|
149
|
+
line = await txt.getFirstLine();
|
|
150
|
+
for (; (await line.isValid()); line = (await line.getNextLine())) {
|
|
151
|
+
for (word = await line.getFirstWord(); await word.isValid(); word = await word.getNextWord()) {
|
|
152
|
+
text = await word.getString();
|
|
153
|
+
console.log(text);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
console.log('-----------------------------------------------------------');
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Example 4. A more advanced text extraction example.
|
|
160
|
+
// The output is XML structure containing paragraphs, lines, words,
|
|
161
|
+
// as well as style and positioning information.
|
|
162
|
+
if (example4Advanced) {
|
|
163
|
+
let b;
|
|
164
|
+
let q;
|
|
165
|
+
let curFlowID = -1;
|
|
166
|
+
let curParaID = -1;
|
|
167
|
+
|
|
168
|
+
console.log('<PDFText>');
|
|
169
|
+
|
|
170
|
+
// For each line on the page...
|
|
171
|
+
for (line = await txt.getFirstLine(); await line.isValid(); line = await line.getNextLine()) {
|
|
172
|
+
if ((await line.getNumWords()) === 0) {
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
if (curFlowID !== await line.getFlowID()) {
|
|
176
|
+
if (curFlowID !== -1) {
|
|
177
|
+
if (curParaID !== -1) {
|
|
178
|
+
curParaID = -1;
|
|
179
|
+
console.log('</Para>');
|
|
180
|
+
}
|
|
181
|
+
console.log('</Flow>');
|
|
182
|
+
}
|
|
183
|
+
curFlowID = await line.getFlowID();
|
|
184
|
+
console.log('<Flow id="' + curFlowID + '">');
|
|
185
|
+
}
|
|
186
|
+
if (curParaID !== await line.getParagraphID()) {
|
|
187
|
+
if (curParaID !== -1) {
|
|
188
|
+
console.log('</Para>');
|
|
189
|
+
}
|
|
190
|
+
curParaID = await line.getParagraphID();
|
|
191
|
+
console.log('<Para id="' + curParaID + '">');
|
|
192
|
+
}
|
|
193
|
+
b = await line.getBBox();
|
|
194
|
+
const lineStyle = await line.getStyle();
|
|
195
|
+
let outputStringLineBox = '<Line box="' + b.x1.toFixed(2) + ', ' + b.y1.toFixed(2) + ', ' + b.x2.toFixed(2) + ', ' + b.y2.toFixed(2) + '"';
|
|
196
|
+
outputStringLineBox += (await printStyle(lineStyle));
|
|
197
|
+
const currentLineNum = await line.getCurrentNum();
|
|
198
|
+
outputStringLineBox += ' cur_num="' + currentLineNum + '">';
|
|
199
|
+
console.log(outputStringLineBox);
|
|
200
|
+
|
|
201
|
+
// For each word in the line...
|
|
202
|
+
for (word = await line.getFirstWord(); await word.isValid(); word = await word.getNextWord()) {
|
|
203
|
+
// output bounding box for the word
|
|
204
|
+
q = await word.getBBox();
|
|
205
|
+
const currentNum = await word.getCurrentNum();
|
|
206
|
+
let outputStringWord = '<Word box="' + q.x1.toFixed(2) + ', ' + q.y1.toFixed(2) + ', ' + q.x2.toFixed(2) + ', ' + q.y2.toFixed(2) + '" cur_num="' + currentNum + '"';
|
|
207
|
+
const sz = await word.getStringLen();
|
|
208
|
+
if (sz === 0) {
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
// if the word style is different from the parent style, output the new style
|
|
212
|
+
const sty = await word.getStyle();
|
|
213
|
+
if (!(await sty.compare(lineStyle))) {
|
|
214
|
+
outputStringWord += await printStyle(sty);
|
|
215
|
+
}
|
|
216
|
+
outputStringWord += '>' + (await word.getString()) + '</Word>';
|
|
217
|
+
console.log(outputStringWord);
|
|
218
|
+
}
|
|
219
|
+
console.log('</Line>');
|
|
220
|
+
}
|
|
221
|
+
if (curFlowID !== -1) {
|
|
222
|
+
if (curParaID !== -1) {
|
|
223
|
+
curParaID = -1;
|
|
224
|
+
console.log('</Para>');
|
|
225
|
+
}
|
|
226
|
+
console.log('</Flow>');
|
|
227
|
+
}
|
|
228
|
+
console.log('</PDFText>');
|
|
229
|
+
}
|
|
230
|
+
await PDFNet.endDeallocateStack();
|
|
231
|
+
} catch (err) {
|
|
232
|
+
console.log(err);
|
|
233
|
+
console.log(err.stack);
|
|
234
|
+
ret = 1;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
if (example5LowLevel) {
|
|
239
|
+
ret = 0;
|
|
240
|
+
try {
|
|
241
|
+
await PDFNet.startDeallocateStack();
|
|
242
|
+
const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + inputFilename);
|
|
243
|
+
doc.initSecurityHandler();
|
|
244
|
+
|
|
245
|
+
// Example 1. Extract all text content from the document
|
|
246
|
+
const reader = await PDFNet.ElementReader.create();
|
|
247
|
+
const itr = await doc.getPageIterator(1);
|
|
248
|
+
|
|
249
|
+
// Read every page
|
|
250
|
+
for (itr; await itr.hasNext(); itr.next()) {
|
|
251
|
+
const page = await itr.current();
|
|
252
|
+
reader.beginOnPage(page);
|
|
253
|
+
await dumpAllText(reader);
|
|
254
|
+
reader.end();
|
|
255
|
+
}
|
|
256
|
+
// Example 2. Extract text content based on the
|
|
257
|
+
// selection rectangle.
|
|
258
|
+
console.log('\n----------------------------------------------------');
|
|
259
|
+
console.log('Extract text based on the selection rectangle.');
|
|
260
|
+
console.log('----------------------------------------------------');
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
const firstPage = await (await doc.getPageIterator()).current();
|
|
264
|
+
let s1 = await readTextFromRect(firstPage, (await PDFNet.Rect.init(27, 392, 563, 534)), reader);
|
|
265
|
+
console.log('\nField 1: ' + s1);
|
|
266
|
+
|
|
267
|
+
s1 = await readTextFromRect(firstPage, (await PDFNet.Rect.init(28, 551, 106, 623)), reader);
|
|
268
|
+
console.log('Field 2: ' + s1);
|
|
269
|
+
|
|
270
|
+
s1 = await readTextFromRect(firstPage, (await PDFNet.Rect.init(208, 550, 387, 621)), reader);
|
|
271
|
+
console.log('Field 3: ' + s1);
|
|
272
|
+
|
|
273
|
+
// ...
|
|
274
|
+
console.log('Done');
|
|
275
|
+
await PDFNet.endDeallocateStack();
|
|
276
|
+
} catch (err) {
|
|
277
|
+
console.log(err.stack);
|
|
278
|
+
ret = 1;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
};
|
|
282
|
+
PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) { console.log('Error: ' + JSON.stringify(error)); }).then(function () { return PDFNet.shutdown(); });
|
|
283
|
+
};
|
|
284
|
+
exports.runTextExtractTest();
|
|
285
|
+
})(exports);
|
|
286
|
+
// eslint-disable-next-line spaced-comment
|
|
287
|
+
//# sourceURL=TextExtractTest.js
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
//---------------------------------------------------------------------------------------
|
|
2
|
-
// Copyright (c) 2001-
|
|
2
|
+
// Copyright (c) 2001-2023 by PDFTron Systems Inc. All Rights Reserved.
|
|
3
3
|
// Consult legal.txt regarding legal and license information.
|
|
4
4
|
//---------------------------------------------------------------------------------------
|
|
5
5
|
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
//---------------------------------------------------------------------------------------
|
|
2
|
+
// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
|
|
3
|
+
// Consult legal.txt regarding legal and license information.
|
|
4
|
+
//---------------------------------------------------------------------------------------
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
const { PDFNet } = require('../../../lib/pdfnet.js');
|
|
8
|
+
const PDFTronLicense = require('../../LicenseKey/NODEJS/LicenseKey');
|
|
9
|
+
|
|
10
|
+
((exports) => {
|
|
11
|
+
|
|
12
|
+
exports.runTextSearchTest = () => {
|
|
13
|
+
|
|
14
|
+
const main = async() => {
|
|
15
|
+
// Relative path to the folder containing test files.
|
|
16
|
+
const inputURL = '../../TestFiles/';
|
|
17
|
+
const inputFilename = 'credit card numbers.pdf'; // addimage.pdf, newsletter.pdf
|
|
18
|
+
|
|
19
|
+
try {
|
|
20
|
+
const doc = await PDFNet.PDFDoc.createFromFilePath(inputURL + inputFilename);
|
|
21
|
+
doc.initSecurityHandler();
|
|
22
|
+
|
|
23
|
+
const txtSearch = await PDFNet.TextSearch.create();
|
|
24
|
+
let mode = PDFNet.TextSearch.Mode.e_whole_word + PDFNet.TextSearch.Mode.e_page_stop; // Uses both whole word and page stop
|
|
25
|
+
let pattern = 'joHn sMiTh';
|
|
26
|
+
|
|
27
|
+
txtSearch.begin(doc, pattern, mode); // searches for the "pattern" in the document while following the inputted modes.
|
|
28
|
+
|
|
29
|
+
let step = 0;
|
|
30
|
+
|
|
31
|
+
// call Run() iteratively to find all matching instances of the word 'joHn sMiTh'
|
|
32
|
+
/* eslint-disable-next-line no-constant-condition */
|
|
33
|
+
while (true) {
|
|
34
|
+
const result = await txtSearch.run();
|
|
35
|
+
let hlts;
|
|
36
|
+
if (result.code === PDFNet.TextSearch.ResultCode.e_found) {
|
|
37
|
+
if (step === 0) { // Step 0: found "John Smith"
|
|
38
|
+
// note that, here, 'ambient_str' and 'highlights' are not written to,
|
|
39
|
+
// as 'e_ambient_string' and 'e_highlight' are not set.
|
|
40
|
+
console.log(result.out_str + "'s credit card number is: ");
|
|
41
|
+
|
|
42
|
+
// now switch to using regular expressions to find John's credit card number
|
|
43
|
+
mode = await txtSearch.getMode();
|
|
44
|
+
mode += PDFNet.TextSearch.Mode.e_reg_expression + PDFNet.TextSearch.Mode.e_highlight;
|
|
45
|
+
txtSearch.setMode(mode);
|
|
46
|
+
pattern = '\\d{4}-\\d{4}-\\d{4}-\\d{4}'; // or "(\\d{4}-){3}\\d{4}"
|
|
47
|
+
txtSearch.setPattern(pattern);
|
|
48
|
+
|
|
49
|
+
++step;
|
|
50
|
+
} else if (step === 1) {
|
|
51
|
+
// step 1: found John's credit card number
|
|
52
|
+
console.log(' ' + result.out_str);
|
|
53
|
+
// note that, here, 'hlts' is written to, as 'e_highlight' has been set.
|
|
54
|
+
// output the highlight info of the credit card number.
|
|
55
|
+
hlts = result.highlights;
|
|
56
|
+
hlts.begin(doc);
|
|
57
|
+
while ((await hlts.hasNext())) {
|
|
58
|
+
const highlightPageNum = await hlts.getCurrentPageNumber();
|
|
59
|
+
console.log('The current highlight is from page: ' + highlightPageNum);
|
|
60
|
+
await hlts.next();
|
|
61
|
+
}
|
|
62
|
+
// see if there is an AMEX card number
|
|
63
|
+
pattern = '\\d{4}-\\d{6}-\\d{5}';
|
|
64
|
+
txtSearch.setPattern(pattern);
|
|
65
|
+
|
|
66
|
+
++step;
|
|
67
|
+
} else if (step === 2) {
|
|
68
|
+
// found an AMEX card number
|
|
69
|
+
console.log('\nThere is an AMEX card number:\n ' + result.out_str);
|
|
70
|
+
|
|
71
|
+
// change mode to find the owner of the credit card; supposedly, the owner's
|
|
72
|
+
// name proceeds the number
|
|
73
|
+
mode = await txtSearch.getMode();
|
|
74
|
+
mode += PDFNet.TextSearch.Mode.e_search_up;
|
|
75
|
+
txtSearch.setMode(mode);
|
|
76
|
+
pattern = '[A-z]++ [A-z]++';
|
|
77
|
+
txtSearch.setPattern(pattern);
|
|
78
|
+
|
|
79
|
+
++step;
|
|
80
|
+
} else if (step === 3) {
|
|
81
|
+
// found the owner's name of the AMEX card
|
|
82
|
+
console.log("Is the owner's name:\n " + result.out_str + '?');
|
|
83
|
+
|
|
84
|
+
// add a link annotation based on the location of the found instance
|
|
85
|
+
hlts = result.highlights;
|
|
86
|
+
await hlts.begin(doc); // is await needed?
|
|
87
|
+
while ((await hlts.hasNext())) {
|
|
88
|
+
const curPage = await doc.getPage((await hlts.getCurrentPageNumber()));
|
|
89
|
+
const quadArr = await hlts.getCurrentQuads();
|
|
90
|
+
for (let i = 0; i < quadArr.length; ++i) {
|
|
91
|
+
const currQuad = quadArr[i];
|
|
92
|
+
const x1 = Math.min(Math.min(Math.min(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
|
|
93
|
+
const x2 = Math.max(Math.max(Math.max(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
|
|
94
|
+
const y1 = Math.min(Math.min(Math.min(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
|
|
95
|
+
const y2 = Math.max(Math.max(Math.max(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
|
|
96
|
+
|
|
97
|
+
const hyperLink = await PDFNet.LinkAnnot.create(doc, (await PDFNet.Rect.init(x1, y1, x2, y2)));
|
|
98
|
+
await hyperLink.setAction((await PDFNet.Action.createURI(doc, 'http://www.pdftron.com')));
|
|
99
|
+
await curPage.annotPushBack(hyperLink);
|
|
100
|
+
}
|
|
101
|
+
hlts.next();
|
|
102
|
+
}
|
|
103
|
+
await doc.save('../../TestFiles/Output/credit card numbers_linked.pdf', PDFNet.SDFDoc.SaveOptions.e_linearized);
|
|
104
|
+
break;
|
|
105
|
+
}
|
|
106
|
+
} else if (result.code === PDFNet.TextSearch.ResultCode.e_page) {
|
|
107
|
+
// you can update your UI here, if needed
|
|
108
|
+
console.log('page end');
|
|
109
|
+
} else if (result.code === PDFNet.TextSearch.ResultCode.e_done) {
|
|
110
|
+
break;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
} catch (err) {
|
|
114
|
+
console.log(err);
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error){console.log('Error: ' + JSON.stringify(error));}).then(function(){return PDFNet.shutdown();});
|
|
118
|
+
};
|
|
119
|
+
exports.runTextSearchTest();
|
|
120
|
+
})(exports);
|
|
121
|
+
// eslint-disable-next-line spaced-comment
|
|
122
|
+
//# sourceURL=TextSearchTest.js
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
//---------------------------------------------------------------------------------------
|
|
2
|
-
// Copyright (c) 2001-
|
|
2
|
+
// Copyright (c) 2001-2023 by PDFTron Systems Inc. All Rights Reserved.
|
|
3
3
|
// Consult legal.txt regarding legal and license information.
|
|
4
4
|
//---------------------------------------------------------------------------------------
|
|
5
5
|
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
//---------------------------------------------------------------------------------------
|
|
2
|
+
// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
|
|
3
|
+
// Consult legal.txt regarding legal and license information.
|
|
4
|
+
//---------------------------------------------------------------------------------------
|
|
5
|
+
|
|
6
|
+
const { PDFNet } = require('../../../lib/pdfnet.js');
|
|
7
|
+
const PDFTronLicense = require('../../LicenseKey/NODEJS/LicenseKey');
|
|
8
|
+
|
|
9
|
+
((exports) => {
|
|
10
|
+
'use strict';
|
|
11
|
+
|
|
12
|
+
exports.runU3DTest = () => {
|
|
13
|
+
const input_path = '../../TestFiles/';
|
|
14
|
+
|
|
15
|
+
const create3DAnnotation = async (doc, annots) => {
|
|
16
|
+
// ---------------------------------------------------------------------------------
|
|
17
|
+
// Create a 3D annotation based on U3D content. PDF 1.6 introduces the capability
|
|
18
|
+
// for collections of three-dimensional objects, such as those used by CAD software,
|
|
19
|
+
// to be embedded in PDF files.
|
|
20
|
+
const link_3D = await doc.createIndirectDict();
|
|
21
|
+
link_3D.putName('Subtype', '3D');
|
|
22
|
+
|
|
23
|
+
// Annotation location on the page
|
|
24
|
+
const link_3D_rect = await PDFNet.Rect.init(25, 180, 585, 643);
|
|
25
|
+
link_3D.putRect('Rect', link_3D_rect.x1, link_3D_rect.y1,
|
|
26
|
+
link_3D_rect.x2, link_3D_rect.y2);
|
|
27
|
+
annots.pushBack(link_3D);
|
|
28
|
+
|
|
29
|
+
// The 3DA entry is an activation dictionary (see Table 9.34 in the PDF Reference Manual)
|
|
30
|
+
// that determines how the state of the annotation and its associated artwork can change.
|
|
31
|
+
const activation_dict_3D = await link_3D.putDict('3DA');
|
|
32
|
+
|
|
33
|
+
// Set the annotation so that it is activated as soon as the page containing the
|
|
34
|
+
// annotation is opened. Other options are: PV (page view) and XA (explicit) activation.
|
|
35
|
+
activation_dict_3D.putName('A', 'PO');
|
|
36
|
+
|
|
37
|
+
// Embed U3D Streams (3D Model/Artwork).
|
|
38
|
+
const u3d_file = await PDFNet.Filter.createMappedFileFromUString(input_path + 'dice.u3d');
|
|
39
|
+
const u3d_reader = await PDFNet.FilterReader.create(u3d_file);
|
|
40
|
+
|
|
41
|
+
// To embed 3D stream without compression, you can omit the second parameter in CreateIndirectStream.
|
|
42
|
+
const flateEncode = await PDFNet.Filter.createFlateEncode();
|
|
43
|
+
const u3d_data_dict = await doc.createIndirectStreamFromFilter(u3d_reader, flateEncode);
|
|
44
|
+
u3d_data_dict.putName('Subtype', 'U3D');
|
|
45
|
+
link_3D.put('3DD', u3d_data_dict);
|
|
46
|
+
|
|
47
|
+
// Set the initial view of the 3D artwork that should be used when the annotation is activated.
|
|
48
|
+
const view3D_dict = await link_3D.putDict('3DV');
|
|
49
|
+
view3D_dict.putString('IN', 'Unnamed');
|
|
50
|
+
view3D_dict.putString('XN', 'Default');
|
|
51
|
+
view3D_dict.putName('MS', 'M');
|
|
52
|
+
view3D_dict.putNumber('CO', 27.5);
|
|
53
|
+
|
|
54
|
+
// A 12-element 3D transformation matrix that specifies a position and orientation
|
|
55
|
+
// of the camera in world coordinates.
|
|
56
|
+
const tr3d = await view3D_dict.putArray('C2W');
|
|
57
|
+
tr3d.pushBackNumber(1); tr3d.pushBackNumber(0); tr3d.pushBackNumber(0);
|
|
58
|
+
tr3d.pushBackNumber(0); tr3d.pushBackNumber(0); tr3d.pushBackNumber(-1);
|
|
59
|
+
tr3d.pushBackNumber(0); tr3d.pushBackNumber(1); tr3d.pushBackNumber(0);
|
|
60
|
+
tr3d.pushBackNumber(0); tr3d.pushBackNumber(-27.5); tr3d.pushBackNumber(0);
|
|
61
|
+
|
|
62
|
+
// Create annotation appearance stream, a thumbnail which is used during printing or
|
|
63
|
+
// in PDF processors that do not understand 3D data.
|
|
64
|
+
const ap_dict = await link_3D.putDict('AP');
|
|
65
|
+
|
|
66
|
+
const builder = await PDFNet.ElementBuilder.create();
|
|
67
|
+
const writer = await PDFNet.ElementWriter.create();
|
|
68
|
+
|
|
69
|
+
writer.begin(doc);
|
|
70
|
+
|
|
71
|
+
const thumb_pathname = input_path + 'dice.jpg';
|
|
72
|
+
const image = await PDFNet.Image.createFromFile(doc, thumb_pathname);
|
|
73
|
+
writer.writePlacedElement(await builder.createImageScaled(image, 0.0, 0.0, await link_3D_rect.width(), await link_3D_rect.height()));
|
|
74
|
+
|
|
75
|
+
const normal_ap_stream = await writer.end();
|
|
76
|
+
normal_ap_stream.putName('Subtype', 'Form');
|
|
77
|
+
normal_ap_stream.putRect('BBox', 0, 0, await link_3D_rect.width(), await link_3D_rect.height());
|
|
78
|
+
ap_dict.put('N', normal_ap_stream);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const main = async () => {
|
|
82
|
+
const output_path = '../../TestFiles/Output/';
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
const doc = await PDFNet.PDFDoc.create();
|
|
86
|
+
const page = await doc.pageCreate();
|
|
87
|
+
doc.pagePushBack(page);
|
|
88
|
+
const annots = await doc.createIndirectArray();
|
|
89
|
+
page.getSDFObj().then(sdf => sdf.put('Annots', annots));
|
|
90
|
+
|
|
91
|
+
await create3DAnnotation(doc, annots);
|
|
92
|
+
doc.save(output_path + 'dice_u3d.pdf', PDFNet.SDFDoc.SaveOptions.e_linearized);
|
|
93
|
+
console.log('Done');
|
|
94
|
+
} catch (err) {
|
|
95
|
+
console.log(err);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error) {
|
|
99
|
+
console.log('Error: ' + JSON.stringify(error));
|
|
100
|
+
}).then(function(){ return PDFNet.shutdown(); });
|
|
101
|
+
};
|
|
102
|
+
exports.runU3DTest();
|
|
103
|
+
})(exports);
|
|
104
|
+
// eslint-disable-next-line spaced-comment
|
|
105
|
+
//# sourceURL=U3DTest.js
|