@pdftron/pdfnet-node-samples 10.7.0 → 10.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +21 -20
- package/readme.md +12 -12
- package/samples/AddImageTest/AddImageTest.js +115 -115
- package/samples/AdvancedImagingTest/AdvancedImagingTest.js +78 -78
- package/samples/AnnotationTest/AnnotationTest.js +641 -641
- package/samples/BookmarkTest/BookmarkTest.js +219 -219
- package/samples/CAD2PDFTest/CAD2PDFTest.js +79 -79
- package/samples/ContentReplacerTest/ContentReplacerTest.js +75 -75
- package/samples/ConvertPrintTest/ConvertPrintTest.js +153 -153
- package/samples/ConvertTest/ConvertTest.js +203 -203
- package/samples/DataExtractionTest/DataExtractionTest.js +214 -214
- package/samples/DigitalSignaturesTest/DigitalSignaturesTest.js +526 -526
- package/samples/DocumentCreationTest/DocumentCreationTest.js +409 -409
- package/samples/ElementBuilderTest/ElementBuilderTest.js +513 -513
- package/samples/ElementEditTest/ElementEditTest.js +110 -110
- package/samples/ElementReaderAdvTest/ElementReaderAdvTest.js +305 -305
- package/samples/ElementReaderTest/ElementReaderTest.js +77 -77
- package/samples/EncTest/EncTest.js +175 -175
- package/samples/FDFTest/FDFTest.js +218 -218
- package/samples/HTML2PDFTest/HTML2PDFTest.js +164 -164
- package/samples/HighlightsTest/HighlightsTest.js +97 -97
- package/samples/ImageExtractTest/ImageExtractTest.js +129 -129
- package/samples/ImpositionTest/ImpositionTest.js +86 -86
- package/samples/InteractiveFormsTest/InteractiveFormsTest.js +381 -381
- package/samples/JBIG2Test/JBIG2Test.js +88 -88
- package/samples/LicenseKey/LicenseKey.js +11 -11
- package/samples/LogicalStructureTest/LogicalStructureTest.js +250 -250
- package/samples/OCRTest/OCRTest.js +235 -235
- package/samples/OfficeTemplateTest/OfficeTemplateTest.js +79 -79
- package/samples/OfficeToPDFTest/OfficeToPDFTest.js +125 -125
- package/samples/OptimizerTest/OptimizerTest.js +191 -191
- package/samples/PDF2HtmlTest/PDF2HtmlTest.js +123 -123
- package/samples/PDF2OfficeTest/PDF2OfficeTest.js +158 -158
- package/samples/PDFATest/PDFATest.js +85 -85
- package/samples/PDFDocMemoryTest/PDFDocMemoryTest.js +84 -84
- package/samples/PDFDrawTest/PDFDrawTest.js +305 -305
- package/samples/PDFLayersTest/PDFLayersTest.js +294 -294
- package/samples/PDFPackageTest/PDFPackageTest.js +111 -111
- package/samples/PDFPageTest/PDFPageTest.js +189 -189
- package/samples/PDFRedactTest/PDFRedactTest.js +74 -74
- package/samples/PageLabelsTest/PageLabelsTest.js +138 -138
- package/samples/PatternTest/PatternTest.js +226 -226
- package/samples/RectTest/RectTest.js +40 -40
- package/samples/SDFTest/SDFTest.js +87 -87
- package/samples/StamperTest/StamperTest.js +255 -255
- package/samples/TestFiles/Misc-Fixed.pfa +1166 -1166
- package/samples/TestFiles/SHA-2 Root USERTrust RSA CA Sectigo timestamping.crt +34 -34
- package/samples/TestFiles/form1_annots.xfdf +33 -33
- package/samples/TestFiles/form1_data.xfdf +139 -139
- package/samples/TestFiles/my_stream.txt +2310 -2310
- package/samples/TestFiles/tiger.svg +378 -378
- package/samples/TextExtractTest/TextExtractTest.js +286 -286
- package/samples/TextSearchTest/TextSearchTest.js +121 -121
- package/samples/U3DTest/U3DTest.js +104 -104
- package/samples/UndoRedoTest/UndoRedoTest.js +101 -101
- package/samples/UnicodeWriteTest/UnicodeWriteTest.js +173 -173
- package/samples/WebViewerConvertTest/WebViewerConvertTest.js +135 -135
- package/samples/runall.bat +12 -12
- package/samples/runall.sh +15 -15
|
@@ -1,214 +1,214 @@
|
|
|
1
|
-
//---------------------------------------------------------------------------------------
|
|
2
|
-
// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
|
|
3
|
-
// Consult legal.txt regarding legal and license information.
|
|
4
|
-
//---------------------------------------------------------------------------------------
|
|
5
|
-
|
|
6
|
-
//---------------------------------------------------------------------------------------
|
|
7
|
-
// The Data Extraction suite is an optional PDFNet add-on collection that can be used to
|
|
8
|
-
// extract various types of data from PDF documents.
|
|
9
|
-
//
|
|
10
|
-
// The Apryse SDK Data Extraction suite can be downloaded from http://www.pdftron.com/
|
|
11
|
-
//---------------------------------------------------------------------------------------
|
|
12
|
-
|
|
13
|
-
const fs = require('fs');
|
|
14
|
-
const { PDFNet } = require('@pdftron/pdfnet-node');
|
|
15
|
-
const PDFTronLicense = require('../LicenseKey/LicenseKey');
|
|
16
|
-
|
|
17
|
-
((exports) => {
|
|
18
|
-
'use strict';
|
|
19
|
-
|
|
20
|
-
exports.runDataExtractionTest = () => {
|
|
21
|
-
|
|
22
|
-
const main = async () => {
|
|
23
|
-
|
|
24
|
-
const inputPath = '../TestFiles/';
|
|
25
|
-
const outputPath = '../TestFiles/Output/';
|
|
26
|
-
|
|
27
|
-
//////////////////////////////////////////////////////////////////////////
|
|
28
|
-
|
|
29
|
-
await PDFNet.addResourceSearchPath('../../lib/');
|
|
30
|
-
|
|
31
|
-
//////////////////////////////////////////////////////////////////////////
|
|
32
|
-
// The following sample illustrates how to extract tables from PDF documents.
|
|
33
|
-
//////////////////////////////////////////////////////////////////////////
|
|
34
|
-
|
|
35
|
-
// Test if the add-on is installed
|
|
36
|
-
if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular)) {
|
|
37
|
-
console.log('\nUnable to run Data Extraction: Apryse SDK Tabular Data module not available.');
|
|
38
|
-
console.log('---------------------------------------------------------------');
|
|
39
|
-
console.log('The Data Extraction suite is an optional add-on, available for download');
|
|
40
|
-
console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
|
|
41
|
-
console.log('downloaded this module, ensure that the SDK is able to find the required files');
|
|
42
|
-
console.log('using the PDFNet.addResourceSearchPath() function.\n');
|
|
43
|
-
}
|
|
44
|
-
else
|
|
45
|
-
{
|
|
46
|
-
try {
|
|
47
|
-
// Extract tabular data as a JSON file
|
|
48
|
-
console.log('Extract tabular data as a JSON file');
|
|
49
|
-
|
|
50
|
-
let outputFile = outputPath + 'table.json';
|
|
51
|
-
await PDFNet.DataExtractionModule.extractData(inputPath + 'table.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular);
|
|
52
|
-
|
|
53
|
-
console.log('Result saved in ' + outputFile);
|
|
54
|
-
|
|
55
|
-
///////////////////////////////////////////////////////
|
|
56
|
-
// Extract tabular data as a JSON string
|
|
57
|
-
console.log('Extract tabular data as a JSON string');
|
|
58
|
-
|
|
59
|
-
outputFile = outputPath + 'financial.json';
|
|
60
|
-
const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'financial.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular);
|
|
61
|
-
fs.writeFileSync(outputFile, json);
|
|
62
|
-
|
|
63
|
-
console.log('Result saved in ' + outputFile);
|
|
64
|
-
|
|
65
|
-
///////////////////////////////////////////////////////
|
|
66
|
-
// Extract tabular data as an XLSX file
|
|
67
|
-
console.log('Extract tabular data as an XLSX file');
|
|
68
|
-
|
|
69
|
-
outputFile = outputPath + 'table.xlsx';
|
|
70
|
-
await PDFNet.DataExtractionModule.extractToXLSX(inputPath + 'table.pdf', outputFile);
|
|
71
|
-
|
|
72
|
-
console.log('Result saved in ' + outputFile);
|
|
73
|
-
|
|
74
|
-
///////////////////////////////////////////////////////
|
|
75
|
-
// Extract tabular data as an XLSX stream (also known as filter)
|
|
76
|
-
console.log('Extract tabular data as an XLSX stream');
|
|
77
|
-
|
|
78
|
-
outputFile = outputPath + 'financial.xlsx';
|
|
79
|
-
const outputXlsxStream = await PDFNet.Filter.createMemoryFilter(0, false);
|
|
80
|
-
const options = new PDFNet.DataExtractionModule.DataExtractionOptions();
|
|
81
|
-
options.setPages("1"); // page 1
|
|
82
|
-
await PDFNet.DataExtractionModule.extractToXLSXWithFilter(inputPath + 'financial.pdf', outputXlsxStream, options);
|
|
83
|
-
outputXlsxStream.memoryFilterSetAsInputFilter();
|
|
84
|
-
outputXlsxStream.writeToFile(outputFile, false);
|
|
85
|
-
|
|
86
|
-
console.log('Result saved in ' + outputFile);
|
|
87
|
-
} catch (err) {
|
|
88
|
-
console.log(err);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
//////////////////////////////////////////////////////////////////////////
|
|
93
|
-
// The following sample illustrates how to extract document structure from PDF documents.
|
|
94
|
-
//////////////////////////////////////////////////////////////////////////
|
|
95
|
-
|
|
96
|
-
// Test if the add-on is installed
|
|
97
|
-
if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure)) {
|
|
98
|
-
console.log('\nUnable to run Data Extraction: Apryse SDK Structured Output module not available.');
|
|
99
|
-
console.log('---------------------------------------------------------------');
|
|
100
|
-
console.log('The Data Extraction suite is an optional add-on, available for download');
|
|
101
|
-
console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
|
|
102
|
-
console.log('downloaded this module, ensure that the SDK is able to find the required files');
|
|
103
|
-
console.log('using the PDFNet.addResourceSearchPath() function.\n');
|
|
104
|
-
}
|
|
105
|
-
else
|
|
106
|
-
{
|
|
107
|
-
try {
|
|
108
|
-
// Extract document structure as a JSON file
|
|
109
|
-
console.log('Extract document structure as a JSON file');
|
|
110
|
-
|
|
111
|
-
let outputFile = outputPath + 'paragraphs_and_tables.json';
|
|
112
|
-
await PDFNet.DataExtractionModule.extractData(inputPath + 'paragraphs_and_tables.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure);
|
|
113
|
-
|
|
114
|
-
console.log('Result saved in ' + outputFile);
|
|
115
|
-
|
|
116
|
-
///////////////////////////////////////////////////////
|
|
117
|
-
// Extract document structure as a JSON string
|
|
118
|
-
console.log('Extract document structure as a JSON string');
|
|
119
|
-
|
|
120
|
-
outputFile = outputPath + 'tagged.json';
|
|
121
|
-
const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'tagged.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure);
|
|
122
|
-
fs.writeFileSync(outputFile, json);
|
|
123
|
-
|
|
124
|
-
console.log('Result saved in ' + outputFile);
|
|
125
|
-
} catch (err) {
|
|
126
|
-
console.log(err);
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
//////////////////////////////////////////////////////////////////////////
|
|
131
|
-
// The following sample illustrates how to extract form fields from PDF documents.
|
|
132
|
-
//////////////////////////////////////////////////////////////////////////
|
|
133
|
-
|
|
134
|
-
// Test if the add-on is installed
|
|
135
|
-
if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_Form)) {
|
|
136
|
-
console.log('\nUnable to run Data Extraction: Apryse SDK AIFormFieldExtractor module not available.');
|
|
137
|
-
console.log('---------------------------------------------------------------');
|
|
138
|
-
console.log('The Data Extraction suite is an optional add-on, available for download');
|
|
139
|
-
console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
|
|
140
|
-
console.log('downloaded this module, ensure that the SDK is able to find the required files');
|
|
141
|
-
console.log('using the PDFNet.addResourceSearchPath() function.\n');
|
|
142
|
-
}
|
|
143
|
-
else
|
|
144
|
-
{
|
|
145
|
-
try {
|
|
146
|
-
// Extract form fields as a JSON file
|
|
147
|
-
console.log('Extract form fields as a JSON file');
|
|
148
|
-
|
|
149
|
-
let outputFile = outputPath + 'formfields-scanned.json';
|
|
150
|
-
await PDFNet.DataExtractionModule.extractData(inputPath + 'formfields-scanned.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_Form);
|
|
151
|
-
|
|
152
|
-
console.log('Result saved in ' + outputFile);
|
|
153
|
-
|
|
154
|
-
///////////////////////////////////////////////////////
|
|
155
|
-
// Extract form fields as a JSON string
|
|
156
|
-
console.log('Extract form fields as a JSON string');
|
|
157
|
-
|
|
158
|
-
outputFile = outputPath + 'formfields.json';
|
|
159
|
-
const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'formfields.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_Form);
|
|
160
|
-
fs.writeFileSync(outputFile, json);
|
|
161
|
-
|
|
162
|
-
console.log('Result saved in ' + outputFile);
|
|
163
|
-
|
|
164
|
-
//////////////////////////////////////////////////////////////////////////
|
|
165
|
-
// Detect and add form fields to a PDF document.
|
|
166
|
-
// Document already has form fields, and this sample will update to new found fields.
|
|
167
|
-
{
|
|
168
|
-
console.log('Detect and add form fields in a PDF file, keep new fields');
|
|
169
|
-
|
|
170
|
-
const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + "formfields-scanned-withfields.pdf");
|
|
171
|
-
|
|
172
|
-
await PDFNet.DataExtractionModule.detectAndAddFormFieldsToPDF(doc);
|
|
173
|
-
outputFile = outputPath + 'formfields-scanned-fields-new.pdf';
|
|
174
|
-
await doc.save(outputFile, PDFNet.SDFDoc.SaveOptions.e_linearized);
|
|
175
|
-
|
|
176
|
-
console.log('Result saved in ' + outputFile);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
//////////////////////////////////////////////////////////////////////////
|
|
180
|
-
// Detect and add form fields to a PDF document.
|
|
181
|
-
// Document already has form fields, and this sample will keep the original fields.
|
|
182
|
-
{
|
|
183
|
-
console.log('Detect and add form fields in a PDF file, keep old fields');
|
|
184
|
-
|
|
185
|
-
const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + "formfields-scanned-withfields.pdf");
|
|
186
|
-
|
|
187
|
-
const options = new PDFNet.DataExtractionModule.DataExtractionOptions();
|
|
188
|
-
options.setOverlappingFormFieldBehavior('KeepOld');
|
|
189
|
-
|
|
190
|
-
await PDFNet.DataExtractionModule.detectAndAddFormFieldsToPDF(doc, options);
|
|
191
|
-
outputFile = outputPath + 'formfields-scanned-fields-old.pdf';
|
|
192
|
-
await doc.save(outputFile, PDFNet.SDFDoc.SaveOptions.e_linearized);
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
console.log('Result saved in ' + outputFile);
|
|
196
|
-
|
|
197
|
-
} catch (err) {
|
|
198
|
-
console.log(err);
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
//////////////////////////////////////////////////////////////////////////
|
|
203
|
-
|
|
204
|
-
console.log('Done.');
|
|
205
|
-
};
|
|
206
|
-
|
|
207
|
-
PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
|
|
208
|
-
console.log('Error: ' + JSON.stringify(error));
|
|
209
|
-
}).then(function () { return PDFNet.shutdown(); });
|
|
210
|
-
};
|
|
211
|
-
exports.runDataExtractionTest();
|
|
212
|
-
})(exports);
|
|
213
|
-
// eslint-disable-next-line spaced-comment
|
|
214
|
-
//# sourceURL=DataExtractionTest.js
|
|
1
|
+
//---------------------------------------------------------------------------------------
|
|
2
|
+
// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
|
|
3
|
+
// Consult legal.txt regarding legal and license information.
|
|
4
|
+
//---------------------------------------------------------------------------------------
|
|
5
|
+
|
|
6
|
+
//---------------------------------------------------------------------------------------
|
|
7
|
+
// The Data Extraction suite is an optional PDFNet add-on collection that can be used to
|
|
8
|
+
// extract various types of data from PDF documents.
|
|
9
|
+
//
|
|
10
|
+
// The Apryse SDK Data Extraction suite can be downloaded from http://www.pdftron.com/
|
|
11
|
+
//---------------------------------------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const { PDFNet } = require('@pdftron/pdfnet-node');
|
|
15
|
+
const PDFTronLicense = require('../LicenseKey/LicenseKey');
|
|
16
|
+
|
|
17
|
+
((exports) => {
|
|
18
|
+
'use strict';
|
|
19
|
+
|
|
20
|
+
exports.runDataExtractionTest = () => {
|
|
21
|
+
|
|
22
|
+
const main = async () => {
|
|
23
|
+
|
|
24
|
+
const inputPath = '../TestFiles/';
|
|
25
|
+
const outputPath = '../TestFiles/Output/';
|
|
26
|
+
|
|
27
|
+
//////////////////////////////////////////////////////////////////////////
|
|
28
|
+
|
|
29
|
+
await PDFNet.addResourceSearchPath('../../lib/');
|
|
30
|
+
|
|
31
|
+
//////////////////////////////////////////////////////////////////////////
|
|
32
|
+
// The following sample illustrates how to extract tables from PDF documents.
|
|
33
|
+
//////////////////////////////////////////////////////////////////////////
|
|
34
|
+
|
|
35
|
+
// Test if the add-on is installed
|
|
36
|
+
if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular)) {
|
|
37
|
+
console.log('\nUnable to run Data Extraction: Apryse SDK Tabular Data module not available.');
|
|
38
|
+
console.log('---------------------------------------------------------------');
|
|
39
|
+
console.log('The Data Extraction suite is an optional add-on, available for download');
|
|
40
|
+
console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
|
|
41
|
+
console.log('downloaded this module, ensure that the SDK is able to find the required files');
|
|
42
|
+
console.log('using the PDFNet.addResourceSearchPath() function.\n');
|
|
43
|
+
}
|
|
44
|
+
else
|
|
45
|
+
{
|
|
46
|
+
try {
|
|
47
|
+
// Extract tabular data as a JSON file
|
|
48
|
+
console.log('Extract tabular data as a JSON file');
|
|
49
|
+
|
|
50
|
+
let outputFile = outputPath + 'table.json';
|
|
51
|
+
await PDFNet.DataExtractionModule.extractData(inputPath + 'table.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular);
|
|
52
|
+
|
|
53
|
+
console.log('Result saved in ' + outputFile);
|
|
54
|
+
|
|
55
|
+
///////////////////////////////////////////////////////
|
|
56
|
+
// Extract tabular data as a JSON string
|
|
57
|
+
console.log('Extract tabular data as a JSON string');
|
|
58
|
+
|
|
59
|
+
outputFile = outputPath + 'financial.json';
|
|
60
|
+
const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'financial.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular);
|
|
61
|
+
fs.writeFileSync(outputFile, json);
|
|
62
|
+
|
|
63
|
+
console.log('Result saved in ' + outputFile);
|
|
64
|
+
|
|
65
|
+
///////////////////////////////////////////////////////
|
|
66
|
+
// Extract tabular data as an XLSX file
|
|
67
|
+
console.log('Extract tabular data as an XLSX file');
|
|
68
|
+
|
|
69
|
+
outputFile = outputPath + 'table.xlsx';
|
|
70
|
+
await PDFNet.DataExtractionModule.extractToXLSX(inputPath + 'table.pdf', outputFile);
|
|
71
|
+
|
|
72
|
+
console.log('Result saved in ' + outputFile);
|
|
73
|
+
|
|
74
|
+
///////////////////////////////////////////////////////
|
|
75
|
+
// Extract tabular data as an XLSX stream (also known as filter)
|
|
76
|
+
console.log('Extract tabular data as an XLSX stream');
|
|
77
|
+
|
|
78
|
+
outputFile = outputPath + 'financial.xlsx';
|
|
79
|
+
const outputXlsxStream = await PDFNet.Filter.createMemoryFilter(0, false);
|
|
80
|
+
const options = new PDFNet.DataExtractionModule.DataExtractionOptions();
|
|
81
|
+
options.setPages("1"); // page 1
|
|
82
|
+
await PDFNet.DataExtractionModule.extractToXLSXWithFilter(inputPath + 'financial.pdf', outputXlsxStream, options);
|
|
83
|
+
outputXlsxStream.memoryFilterSetAsInputFilter();
|
|
84
|
+
outputXlsxStream.writeToFile(outputFile, false);
|
|
85
|
+
|
|
86
|
+
console.log('Result saved in ' + outputFile);
|
|
87
|
+
} catch (err) {
|
|
88
|
+
console.log(err);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
//////////////////////////////////////////////////////////////////////////
|
|
93
|
+
// The following sample illustrates how to extract document structure from PDF documents.
|
|
94
|
+
//////////////////////////////////////////////////////////////////////////
|
|
95
|
+
|
|
96
|
+
// Test if the add-on is installed
|
|
97
|
+
if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure)) {
|
|
98
|
+
console.log('\nUnable to run Data Extraction: Apryse SDK Structured Output module not available.');
|
|
99
|
+
console.log('---------------------------------------------------------------');
|
|
100
|
+
console.log('The Data Extraction suite is an optional add-on, available for download');
|
|
101
|
+
console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
|
|
102
|
+
console.log('downloaded this module, ensure that the SDK is able to find the required files');
|
|
103
|
+
console.log('using the PDFNet.addResourceSearchPath() function.\n');
|
|
104
|
+
}
|
|
105
|
+
else
|
|
106
|
+
{
|
|
107
|
+
try {
|
|
108
|
+
// Extract document structure as a JSON file
|
|
109
|
+
console.log('Extract document structure as a JSON file');
|
|
110
|
+
|
|
111
|
+
let outputFile = outputPath + 'paragraphs_and_tables.json';
|
|
112
|
+
await PDFNet.DataExtractionModule.extractData(inputPath + 'paragraphs_and_tables.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure);
|
|
113
|
+
|
|
114
|
+
console.log('Result saved in ' + outputFile);
|
|
115
|
+
|
|
116
|
+
///////////////////////////////////////////////////////
|
|
117
|
+
// Extract document structure as a JSON string
|
|
118
|
+
console.log('Extract document structure as a JSON string');
|
|
119
|
+
|
|
120
|
+
outputFile = outputPath + 'tagged.json';
|
|
121
|
+
const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'tagged.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure);
|
|
122
|
+
fs.writeFileSync(outputFile, json);
|
|
123
|
+
|
|
124
|
+
console.log('Result saved in ' + outputFile);
|
|
125
|
+
} catch (err) {
|
|
126
|
+
console.log(err);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
//////////////////////////////////////////////////////////////////////////
|
|
131
|
+
// The following sample illustrates how to extract form fields from PDF documents.
|
|
132
|
+
//////////////////////////////////////////////////////////////////////////
|
|
133
|
+
|
|
134
|
+
// Test if the add-on is installed
|
|
135
|
+
if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_Form)) {
|
|
136
|
+
console.log('\nUnable to run Data Extraction: Apryse SDK AIFormFieldExtractor module not available.');
|
|
137
|
+
console.log('---------------------------------------------------------------');
|
|
138
|
+
console.log('The Data Extraction suite is an optional add-on, available for download');
|
|
139
|
+
console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
|
|
140
|
+
console.log('downloaded this module, ensure that the SDK is able to find the required files');
|
|
141
|
+
console.log('using the PDFNet.addResourceSearchPath() function.\n');
|
|
142
|
+
}
|
|
143
|
+
else
|
|
144
|
+
{
|
|
145
|
+
try {
|
|
146
|
+
// Extract form fields as a JSON file
|
|
147
|
+
console.log('Extract form fields as a JSON file');
|
|
148
|
+
|
|
149
|
+
let outputFile = outputPath + 'formfields-scanned.json';
|
|
150
|
+
await PDFNet.DataExtractionModule.extractData(inputPath + 'formfields-scanned.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_Form);
|
|
151
|
+
|
|
152
|
+
console.log('Result saved in ' + outputFile);
|
|
153
|
+
|
|
154
|
+
///////////////////////////////////////////////////////
|
|
155
|
+
// Extract form fields as a JSON string
|
|
156
|
+
console.log('Extract form fields as a JSON string');
|
|
157
|
+
|
|
158
|
+
outputFile = outputPath + 'formfields.json';
|
|
159
|
+
const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'formfields.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_Form);
|
|
160
|
+
fs.writeFileSync(outputFile, json);
|
|
161
|
+
|
|
162
|
+
console.log('Result saved in ' + outputFile);
|
|
163
|
+
|
|
164
|
+
//////////////////////////////////////////////////////////////////////////
|
|
165
|
+
// Detect and add form fields to a PDF document.
|
|
166
|
+
// Document already has form fields, and this sample will update to new found fields.
|
|
167
|
+
{
|
|
168
|
+
console.log('Detect and add form fields in a PDF file, keep new fields');
|
|
169
|
+
|
|
170
|
+
const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + "formfields-scanned-withfields.pdf");
|
|
171
|
+
|
|
172
|
+
await PDFNet.DataExtractionModule.detectAndAddFormFieldsToPDF(doc);
|
|
173
|
+
outputFile = outputPath + 'formfields-scanned-fields-new.pdf';
|
|
174
|
+
await doc.save(outputFile, PDFNet.SDFDoc.SaveOptions.e_linearized);
|
|
175
|
+
|
|
176
|
+
console.log('Result saved in ' + outputFile);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
//////////////////////////////////////////////////////////////////////////
|
|
180
|
+
// Detect and add form fields to a PDF document.
|
|
181
|
+
// Document already has form fields, and this sample will keep the original fields.
|
|
182
|
+
{
|
|
183
|
+
console.log('Detect and add form fields in a PDF file, keep old fields');
|
|
184
|
+
|
|
185
|
+
const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + "formfields-scanned-withfields.pdf");
|
|
186
|
+
|
|
187
|
+
const options = new PDFNet.DataExtractionModule.DataExtractionOptions();
|
|
188
|
+
options.setOverlappingFormFieldBehavior('KeepOld');
|
|
189
|
+
|
|
190
|
+
await PDFNet.DataExtractionModule.detectAndAddFormFieldsToPDF(doc, options);
|
|
191
|
+
outputFile = outputPath + 'formfields-scanned-fields-old.pdf';
|
|
192
|
+
await doc.save(outputFile, PDFNet.SDFDoc.SaveOptions.e_linearized);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
console.log('Result saved in ' + outputFile);
|
|
196
|
+
|
|
197
|
+
} catch (err) {
|
|
198
|
+
console.log(err);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
//////////////////////////////////////////////////////////////////////////
|
|
203
|
+
|
|
204
|
+
console.log('Done.');
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
|
|
208
|
+
console.log('Error: ' + JSON.stringify(error));
|
|
209
|
+
}).then(function () { return PDFNet.shutdown(); });
|
|
210
|
+
};
|
|
211
|
+
exports.runDataExtractionTest();
|
|
212
|
+
})(exports);
|
|
213
|
+
// eslint-disable-next-line spaced-comment
|
|
214
|
+
//# sourceURL=DataExtractionTest.js
|