@pdftron/pdfnet-node-samples 9.1.0-beta → 9.2.0-2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +18 -18
- package/readme.md +5 -3
- package/samples/AddImageTest/AddImageTest.js +115 -115
- package/samples/AdvancedImagingTest/AdvancedImagingTest.js +64 -64
- package/samples/AnnotationTest/AnnotationTest.js +641 -641
- package/samples/BookmarkTest/BookmarkTest.js +219 -219
- package/samples/CAD2PDFTest/CAD2PDFTest.js +3 -3
- package/samples/ContentReplacerTest/ContentReplacerTest.js +75 -75
- package/samples/ConvertTest/ConvertTest.js +2 -2
- package/samples/DigitalSignaturesTest/DigitalSignaturesTest.js +2 -2
- package/samples/ElementBuilderTest/ElementBuilderTest.js +513 -513
- package/samples/ElementEditTest/ElementEditTest.js +110 -110
- package/samples/ElementReaderAdvTest/ElementReaderAdvTest.js +305 -305
- package/samples/ElementReaderTest/ElementReaderTest.js +77 -77
- package/samples/EncTest/EncTest.js +175 -175
- package/samples/FDFTest/FDFTest.js +2 -2
- package/samples/HTML2PDFTest/HTML2PDFTest.js +54 -44
- package/samples/ImageExtractTest/ImageExtractTest.js +2 -2
- package/samples/ImpositionTest/ImpositionTest.js +2 -2
- package/samples/InteractiveFormsTest/InteractiveFormsTest.js +2 -2
- package/samples/JBIG2Test/JBIG2Test.js +3 -3
- package/samples/LicenseKey/LicenseKey.js +11 -11
- package/samples/LogicalStructureTest/LogicalStructureTest.js +250 -250
- package/samples/OCRTest/OCRTest.js +2 -2
- package/samples/OfficeTemplateTest/OfficeTemplateTest.js +76 -0
- package/samples/OfficeToPDFTest/OfficeToPDFTest.js +2 -2
- package/samples/OptimizerTest/OptimizerTest.js +2 -2
- package/samples/PDF2HtmlTest/PDF2HtmlTest.js +123 -117
- package/samples/PDF2OfficeTest/PDF2OfficeTest.js +158 -0
- package/samples/PDFATest/PDFATest.js +85 -85
- package/samples/PDFDocMemoryTest/PDFDocMemoryTest.js +2 -2
- package/samples/PDFDrawTest/PDFDrawTest.js +305 -305
- package/samples/PDFLayersTest/PDFLayersTest.js +294 -294
- package/samples/PDFPackageTest/PDFPackageTest.js +3 -3
- package/samples/PDFPageTest/PDFPageTest.js +189 -189
- package/samples/PDFRedactTest/PDFRedactTest.js +74 -74
- package/samples/PageLabelsTest/PageLabelsTest.js +2 -2
- package/samples/PatternTest/PatternTest.js +2 -2
- package/samples/RectTest/RectTest.js +40 -40
- package/samples/SDFTest/SDFTest.js +88 -88
- package/samples/StamperTest/StamperTest.js +255 -255
- package/samples/TestFiles/Misc-Fixed.pfa +1166 -1166
- package/samples/TestFiles/SHA-2 Root USERTrust RSA CA Sectigo timestamping.crt +34 -34
- package/samples/TestFiles/form1_annots.xfdf +33 -33
- package/samples/TestFiles/form1_data.xfdf +139 -139
- package/samples/TestFiles/my_stream.txt +2310 -2310
- package/samples/TextExtractTest/TextExtractTest.js +286 -286
- package/samples/TextSearchTest/TextSearchTest.js +121 -121
- package/samples/U3DTest/U3DTest.js +2 -2
- package/samples/UndoRedoTest/UndoRedoTest.js +101 -101
- package/samples/UnicodeWriteTest/UnicodeWriteTest.js +173 -173
- package/samples/WebViewerConvertTest/WebViewerConvertTest.js +2 -2
- package/samples/runall.bat +3 -4
- package/samples/runall.sh +4 -3
- package/samples/AddImageTest/RunTest.bat +0 -2
- package/samples/AddImageTest/RunTest.sh +0 -2
- package/samples/AdvancedImagingTest/RunTest.bat +0 -2
- package/samples/AdvancedImagingTest/RunTest.sh +0 -2
- package/samples/AnnotationTest/RunTest.bat +0 -2
- package/samples/AnnotationTest/RunTest.sh +0 -2
- package/samples/BookmarkTest/RunTest.bat +0 -2
- package/samples/BookmarkTest/RunTest.sh +0 -2
- package/samples/CAD2PDFTest/RunTest.bat +0 -2
- package/samples/CAD2PDFTest/RunTest.sh +0 -2
- package/samples/ContentReplacerTest/RunTest.bat +0 -2
- package/samples/ContentReplacerTest/RunTest.sh +0 -2
- package/samples/ConvertTest/RunTest.bat +0 -2
- package/samples/ConvertTest/RunTest.sh +0 -2
- package/samples/DigitalSignaturesTest/RunTest.bat +0 -2
- package/samples/DigitalSignaturesTest/RunTest.sh +0 -2
- package/samples/ElementBuilderTest/RunTest.bat +0 -2
- package/samples/ElementBuilderTest/RunTest.sh +0 -2
- package/samples/ElementEditTest/RunTest.bat +0 -2
- package/samples/ElementEditTest/RunTest.sh +0 -2
- package/samples/ElementReaderAdvTest/RunTest.bat +0 -2
- package/samples/ElementReaderAdvTest/RunTest.sh +0 -2
- package/samples/ElementReaderTest/RunTest.bat +0 -2
- package/samples/ElementReaderTest/RunTest.sh +0 -2
- package/samples/EncTest/RunTest.bat +0 -2
- package/samples/EncTest/RunTest.sh +0 -2
- package/samples/FDFTest/RunTest.bat +0 -2
- package/samples/FDFTest/RunTest.sh +0 -2
- package/samples/HTML2PDFTest/RunTest.bat +0 -2
- package/samples/HTML2PDFTest/RunTest.sh +0 -2
- package/samples/ImageExtractTest/RunTest.bat +0 -2
- package/samples/ImageExtractTest/RunTest.sh +0 -2
- package/samples/ImpositionTest/RunTest.bat +0 -2
- package/samples/ImpositionTest/RunTest.sh +0 -2
- package/samples/InteractiveFormsTest/RunTest.bat +0 -2
- package/samples/InteractiveFormsTest/RunTest.sh +0 -2
- package/samples/JBIG2Test/RunTest.bat +0 -2
- package/samples/JBIG2Test/RunTest.sh +0 -2
- package/samples/LogicalStructureTest/RunTest.bat +0 -2
- package/samples/LogicalStructureTest/RunTest.sh +0 -2
- package/samples/OCRTest/RunTest.bat +0 -2
- package/samples/OCRTest/RunTest.sh +0 -2
- package/samples/OfficeToPDFTest/RunTest.bat +0 -2
- package/samples/OfficeToPDFTest/RunTest.sh +0 -2
- package/samples/OptimizerTest/RunTest.bat +0 -2
- package/samples/OptimizerTest/RunTest.sh +0 -2
- package/samples/PDF2HtmlTest/RunTest.bat +0 -2
- package/samples/PDF2HtmlTest/RunTest.sh +0 -2
- package/samples/PDF2WordTest/PDF2WordTest.js +0 -85
- package/samples/PDF2WordTest/RunTest.bat +0 -2
- package/samples/PDF2WordTest/RunTest.sh +0 -2
- package/samples/PDFATest/RunTest.bat +0 -2
- package/samples/PDFATest/RunTest.sh +0 -2
- package/samples/PDFDocMemoryTest/RunTest.bat +0 -2
- package/samples/PDFDocMemoryTest/RunTest.sh +0 -2
- package/samples/PDFDrawTest/RunTest.bat +0 -2
- package/samples/PDFDrawTest/RunTest.sh +0 -2
- package/samples/PDFLayersTest/RunTest.bat +0 -2
- package/samples/PDFLayersTest/RunTest.sh +0 -2
- package/samples/PDFPackageTest/RunTest.bat +0 -2
- package/samples/PDFPackageTest/RunTest.sh +0 -2
- package/samples/PDFPageTest/RunTest.bat +0 -2
- package/samples/PDFPageTest/RunTest.sh +0 -2
- package/samples/PDFRedactTest/RunTest.bat +0 -2
- package/samples/PDFRedactTest/RunTest.sh +0 -2
- package/samples/PageLabelsTest/RunTest.bat +0 -2
- package/samples/PageLabelsTest/RunTest.sh +0 -2
- package/samples/PatternTest/RunTest.bat +0 -2
- package/samples/PatternTest/RunTest.sh +0 -2
- package/samples/RectTest/RunTest.bat +0 -2
- package/samples/RectTest/RunTest.sh +0 -2
- package/samples/SDFTest/RunTest.bat +0 -2
- package/samples/SDFTest/RunTest.sh +0 -2
- package/samples/StamperTest/RunTest.bat +0 -2
- package/samples/StamperTest/RunTest.sh +0 -2
- package/samples/TextExtractTest/RunTest.bat +0 -2
- package/samples/TextExtractTest/RunTest.sh +0 -2
- package/samples/TextSearchTest/RunTest.bat +0 -2
- package/samples/TextSearchTest/RunTest.sh +0 -2
- package/samples/U3DTest/RunTest.bat +0 -2
- package/samples/U3DTest/RunTest.sh +0 -2
- package/samples/UndoRedoTest/RunTest.bat +0 -2
- package/samples/UndoRedoTest/RunTest.sh +0 -2
- package/samples/UnicodeWriteTest/RunTest.bat +0 -2
- package/samples/UnicodeWriteTest/RunTest.sh +0 -2
- package/samples/WebViewerConvertTest/RunTest.bat +0 -2
- package/samples/WebViewerConvertTest/RunTest.sh +0 -2
|
@@ -1,251 +1,251 @@
|
|
|
1
|
-
//---------------------------------------------------------------------------------------
|
|
2
|
-
// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
|
|
3
|
-
// Consult legal.txt regarding legal and license information.
|
|
4
|
-
//---------------------------------------------------------------------------------------
|
|
5
|
-
|
|
6
|
-
//---------------------------------------------------------------------------------------
|
|
7
|
-
// This sample explores the structure and content of a tagged PDF document and dumps
|
|
8
|
-
// the structure information to the console window.
|
|
9
|
-
//
|
|
10
|
-
// In tagged PDF documents StructTree acts as a central repository for information
|
|
11
|
-
// related to a PDF document's logical structure. The tree consists of StructElement-s
|
|
12
|
-
// and ContentItem-s which are leaf nodes of the structure tree.
|
|
13
|
-
//
|
|
14
|
-
// The sample can be extended to access and extract the marked-content elements such
|
|
15
|
-
// as text and images.
|
|
16
|
-
//---------------------------------------------------------------------------------------
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
const { PDFNet } = require('@pdftron/pdfnet-node');
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
((exports) => {
|
|
23
|
-
|
|
24
|
-
exports.runLogicalStructureTest = () => {
|
|
25
|
-
|
|
26
|
-
const printAndIndent = (printState, indent) => {
|
|
27
|
-
console.log(printState.str);
|
|
28
|
-
|
|
29
|
-
let indentStr = '';
|
|
30
|
-
for (let i = 0; i < indent; ++i) {
|
|
31
|
-
indentStr += ' ';
|
|
32
|
-
}
|
|
33
|
-
printState.str = indentStr;
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
// Used in code snippet 1.
|
|
37
|
-
const processStructElement = async(element, indent, printState) => {
|
|
38
|
-
if (!(await element.isValid())) {
|
|
39
|
-
return;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
// Print out the type and title info, if any.
|
|
44
|
-
printAndIndent(printState, indent++);
|
|
45
|
-
printState.str += 'Type: ' + (await element.getType());
|
|
46
|
-
if (await element.hasTitle()) {
|
|
47
|
-
printState.str += '. Title: ' + (await element.getTitle());
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
const num = await element.getNumKids();
|
|
51
|
-
for (let i = 0; i < num; ++i) {
|
|
52
|
-
// Check is the kid is a leaf node (i.e. it is a ContentItem).
|
|
53
|
-
if (await element.isContentItem(i)) {
|
|
54
|
-
const cont = await element.getAsContentItem(i);
|
|
55
|
-
const type = await cont.getType();
|
|
56
|
-
|
|
57
|
-
const page = await cont.getPage();
|
|
58
|
-
|
|
59
|
-
printAndIndent(printState, indent);
|
|
60
|
-
printState.str += 'Content Item. Part of page #' + (await page.getIndex());
|
|
61
|
-
|
|
62
|
-
printAndIndent(printState, indent);
|
|
63
|
-
switch (type) {
|
|
64
|
-
case PDFNet.ContentItem.Type.e_MCID:
|
|
65
|
-
case PDFNet.ContentItem.Type.e_MCR:
|
|
66
|
-
printState.str += 'MCID: ' + (await cont.getMCID());
|
|
67
|
-
break;
|
|
68
|
-
case PDFNet.ContentItem.Type.e_OBJR:
|
|
69
|
-
{
|
|
70
|
-
printState.str += 'OBJR ';
|
|
71
|
-
const refObj = await cont.getRefObj();
|
|
72
|
-
if (refObj) {
|
|
73
|
-
printState.str += '- Referenced Object#: ' + refObj.getObjNum();
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
break;
|
|
77
|
-
default:
|
|
78
|
-
break;
|
|
79
|
-
}
|
|
80
|
-
} else { // the kid is another StructElement node.
|
|
81
|
-
await processStructElement(await element.getAsStructElem(i), indent, printState);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
};
|
|
85
|
-
|
|
86
|
-
// Used in code snippet 2.
|
|
87
|
-
const processElements = async(reader, printState) => {
|
|
88
|
-
let element;
|
|
89
|
-
while (element = await reader.next()) { // Read page contents
|
|
90
|
-
// In this sample we process only paths & text, but the code can be
|
|
91
|
-
// extended to handle any element type.
|
|
92
|
-
const type = await element.getType();
|
|
93
|
-
if (type === PDFNet.Element.Type.e_path || type === PDFNet.Element.Type.e_text || type === PDFNet.Element.Type.e_path) {
|
|
94
|
-
switch (type) {
|
|
95
|
-
case PDFNet.Element.Type.e_path: // Process path ...
|
|
96
|
-
printState.str += '\nPATH: ';
|
|
97
|
-
break;
|
|
98
|
-
case PDFNet.Element.Type.e_text: // Process text ...
|
|
99
|
-
printState.str += '\nTEXT: ' + (await element.getTextString()) + '\n';
|
|
100
|
-
break;
|
|
101
|
-
case PDFNet.Element.Type.e_form: // Process form XObjects
|
|
102
|
-
printState.str += '\nFORM XObject: ';
|
|
103
|
-
// reader.formBegin();
|
|
104
|
-
// await ProcessElements(reader);
|
|
105
|
-
// reader.end();
|
|
106
|
-
break;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
// Check if the element is associated with any structural element.
|
|
110
|
-
// Content items are leaf nodes of the structure tree.
|
|
111
|
-
const structParent = await element.getParentStructElement();
|
|
112
|
-
if (await structParent.isValid()) {
|
|
113
|
-
// Print out the parent structural element's type, title, and object number.
|
|
114
|
-
printState.str += ' Type: ' + (await structParent.getType()) + ', MCID: ' + (await element.getStructMCID());
|
|
115
|
-
if (await structParent.hasTitle()) {
|
|
116
|
-
printState.str += '. Title: ' + (await structParent.getTitle());
|
|
117
|
-
}
|
|
118
|
-
printState.str += ', Obj#: ' + (await (await structParent.getSDFObj()).getObjNum());
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
};
|
|
123
|
-
|
|
124
|
-
// Used in code snippet 3.
|
|
125
|
-
const processElements2 = async(reader, mcidPageMap) => {
|
|
126
|
-
let element;
|
|
127
|
-
while (element = await reader.next()) { // Read page contents
|
|
128
|
-
// In this sample we process only text, but the code can be extended
|
|
129
|
-
// to handle paths, images, or any other Element type.
|
|
130
|
-
const mcid = await element.getStructMCID();
|
|
131
|
-
if (mcid >= 0 && (await element.getType()) === PDFNet.Element.Type.e_text) {
|
|
132
|
-
const val = await element.getTextString();
|
|
133
|
-
if (mcid in mcidPageMap) {
|
|
134
|
-
mcidPageMap[mcid] += val;
|
|
135
|
-
} else {
|
|
136
|
-
mcidPageMap[mcid] = val;
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
};
|
|
141
|
-
|
|
142
|
-
// Used in code snippet 3.
|
|
143
|
-
const processStructElement2 = async(element, mcidDocMap, indent, printState) => {
|
|
144
|
-
if (!(await element.isValid())) {
|
|
145
|
-
return;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// Print out the type and title info, if any.
|
|
149
|
-
printAndIndent(printState, indent);
|
|
150
|
-
printState.str += '<' + (await element.getType());
|
|
151
|
-
if (await element.hasTitle()) {
|
|
152
|
-
printState.str += ' title="' + (await element.getTitle()) + '"';
|
|
153
|
-
}
|
|
154
|
-
printState.str += '>';
|
|
155
|
-
|
|
156
|
-
const num = await element.getNumKids();
|
|
157
|
-
for (let i = 0; i < num; ++i) {
|
|
158
|
-
if (await element.isContentItem(i)) {
|
|
159
|
-
const cont = await element.getAsContentItem(i);
|
|
160
|
-
if ((await cont.getType()) === PDFNet.ContentItem.Type.e_MCID) {
|
|
161
|
-
const pageNum = await (await cont.getPage()).getIndex();
|
|
162
|
-
const mcidPageMap = mcidDocMap[pageNum];
|
|
163
|
-
if (mcidPageMap) {
|
|
164
|
-
const mcid = await cont.getMCID();
|
|
165
|
-
if (mcid in mcidPageMap) {
|
|
166
|
-
printState.str += mcidPageMap[mcid];
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
} else { // the kid is another StructElement node.
|
|
171
|
-
await processStructElement2(await element.getAsStructElem(i), mcidDocMap, indent + 1, printState);
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
printAndIndent(printState, indent);
|
|
176
|
-
printState.str += '</' + (await element.getType()) + '>';
|
|
177
|
-
};
|
|
178
|
-
|
|
179
|
-
const main = async() => {
|
|
180
|
-
// Relative path to the folder containing test files.
|
|
181
|
-
const inputPath = '../TestFiles/';
|
|
182
|
-
const printState = { str: '' };
|
|
183
|
-
try { // Extract logical structure from a PDF document
|
|
184
|
-
const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + 'tagged.pdf');
|
|
185
|
-
doc.initSecurityHandler();
|
|
186
|
-
|
|
187
|
-
let reader = null;
|
|
188
|
-
let tree = null;
|
|
189
|
-
|
|
190
|
-
console.log('____________________________________________________________');
|
|
191
|
-
console.log('Sample 1 - Traverse logical structure tree...');
|
|
192
|
-
tree = await doc.getStructTree();
|
|
193
|
-
if (await tree.isValid()) {
|
|
194
|
-
console.log('Document has a StructTree root.');
|
|
195
|
-
for (let i = 0, numKids = await tree.getNumKids(); i < numKids; ++i) {
|
|
196
|
-
// Recursively get structure info for all child elements.
|
|
197
|
-
await processStructElement(await tree.getKid(i), 0, printState);
|
|
198
|
-
}
|
|
199
|
-
} else {
|
|
200
|
-
console.log('This document does not contain any logical structure.');
|
|
201
|
-
}
|
|
202
|
-
printAndIndent(printState, 0);
|
|
203
|
-
console.log('Done 1.');
|
|
204
|
-
|
|
205
|
-
console.log('____________________________________________________________');
|
|
206
|
-
console.log('Sample 2 - Get parent logical structure elements from');
|
|
207
|
-
console.log('layout elements.');
|
|
208
|
-
reader = await PDFNet.ElementReader.create();
|
|
209
|
-
for (let itr = await doc.getPageIterator(); await itr.hasNext(); itr.next()) {
|
|
210
|
-
reader.beginOnPage(await itr.current());
|
|
211
|
-
await processElements(reader, printState);
|
|
212
|
-
reader.end();
|
|
213
|
-
}
|
|
214
|
-
printAndIndent(printState, 0);
|
|
215
|
-
console.log('Done 2.');
|
|
216
|
-
|
|
217
|
-
console.log('____________________________________________________________');
|
|
218
|
-
console.log("Sample 3 - 'XML style' extraction of PDF logical structure and page content.");
|
|
219
|
-
{
|
|
220
|
-
const mcidDocMap = {};
|
|
221
|
-
for (let itr = await doc.getPageIterator(); await itr.hasNext(); itr.next()) {
|
|
222
|
-
const page = await itr.current();
|
|
223
|
-
reader.beginOnPage(page);
|
|
224
|
-
const pageNum = await page.getIndex();
|
|
225
|
-
const pageMcidMap = {};
|
|
226
|
-
mcidDocMap[pageNum] = pageMcidMap;
|
|
227
|
-
await processElements2(reader, pageMcidMap);
|
|
228
|
-
reader.end();
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
tree = await doc.getStructTree();
|
|
232
|
-
if (await tree.isValid()) {
|
|
233
|
-
for (let i = 0, numKids = await tree.getNumKids(); i < numKids; ++i) {
|
|
234
|
-
await processStructElement2(await tree.getKid(i), mcidDocMap, 0, printState);
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
printAndIndent(printState, 0);
|
|
239
|
-
console.log('Done 3.');
|
|
240
|
-
await doc.save(inputPath + 'Output/LogicalStructure.pdf', 0);
|
|
241
|
-
} catch (err) {
|
|
242
|
-
console.log(err);
|
|
243
|
-
}
|
|
244
|
-
};
|
|
245
|
-
|
|
246
|
-
PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error){console.log('Error: ' + JSON.stringify(error));}).then(function(){PDFNet.shutdown();});
|
|
247
|
-
};
|
|
248
|
-
exports.runLogicalStructureTest();
|
|
249
|
-
})(exports);
|
|
250
|
-
// eslint-disable-next-line spaced-comment
|
|
1
|
+
//---------------------------------------------------------------------------------------
|
|
2
|
+
// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
|
|
3
|
+
// Consult legal.txt regarding legal and license information.
|
|
4
|
+
//---------------------------------------------------------------------------------------
|
|
5
|
+
|
|
6
|
+
//---------------------------------------------------------------------------------------
|
|
7
|
+
// This sample explores the structure and content of a tagged PDF document and dumps
|
|
8
|
+
// the structure information to the console window.
|
|
9
|
+
//
|
|
10
|
+
// In tagged PDF documents StructTree acts as a central repository for information
|
|
11
|
+
// related to a PDF document's logical structure. The tree consists of StructElement-s
|
|
12
|
+
// and ContentItem-s which are leaf nodes of the structure tree.
|
|
13
|
+
//
|
|
14
|
+
// The sample can be extended to access and extract the marked-content elements such
|
|
15
|
+
// as text and images.
|
|
16
|
+
//---------------------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
const { PDFNet } = require('@pdftron/pdfnet-node');
|
|
20
|
+
const PDFTronLicense = require('../LicenseKey/LicenseKey');
|
|
21
|
+
|
|
22
|
+
((exports) => {
|
|
23
|
+
|
|
24
|
+
exports.runLogicalStructureTest = () => {
|
|
25
|
+
|
|
26
|
+
const printAndIndent = (printState, indent) => {
|
|
27
|
+
console.log(printState.str);
|
|
28
|
+
|
|
29
|
+
let indentStr = '';
|
|
30
|
+
for (let i = 0; i < indent; ++i) {
|
|
31
|
+
indentStr += ' ';
|
|
32
|
+
}
|
|
33
|
+
printState.str = indentStr;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
// Used in code snippet 1.
|
|
37
|
+
const processStructElement = async(element, indent, printState) => {
|
|
38
|
+
if (!(await element.isValid())) {
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
// Print out the type and title info, if any.
|
|
44
|
+
printAndIndent(printState, indent++);
|
|
45
|
+
printState.str += 'Type: ' + (await element.getType());
|
|
46
|
+
if (await element.hasTitle()) {
|
|
47
|
+
printState.str += '. Title: ' + (await element.getTitle());
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const num = await element.getNumKids();
|
|
51
|
+
for (let i = 0; i < num; ++i) {
|
|
52
|
+
// Check is the kid is a leaf node (i.e. it is a ContentItem).
|
|
53
|
+
if (await element.isContentItem(i)) {
|
|
54
|
+
const cont = await element.getAsContentItem(i);
|
|
55
|
+
const type = await cont.getType();
|
|
56
|
+
|
|
57
|
+
const page = await cont.getPage();
|
|
58
|
+
|
|
59
|
+
printAndIndent(printState, indent);
|
|
60
|
+
printState.str += 'Content Item. Part of page #' + (await page.getIndex());
|
|
61
|
+
|
|
62
|
+
printAndIndent(printState, indent);
|
|
63
|
+
switch (type) {
|
|
64
|
+
case PDFNet.ContentItem.Type.e_MCID:
|
|
65
|
+
case PDFNet.ContentItem.Type.e_MCR:
|
|
66
|
+
printState.str += 'MCID: ' + (await cont.getMCID());
|
|
67
|
+
break;
|
|
68
|
+
case PDFNet.ContentItem.Type.e_OBJR:
|
|
69
|
+
{
|
|
70
|
+
printState.str += 'OBJR ';
|
|
71
|
+
const refObj = await cont.getRefObj();
|
|
72
|
+
if (refObj) {
|
|
73
|
+
printState.str += '- Referenced Object#: ' + refObj.getObjNum();
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
break;
|
|
77
|
+
default:
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
} else { // the kid is another StructElement node.
|
|
81
|
+
await processStructElement(await element.getAsStructElem(i), indent, printState);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
// Used in code snippet 2.
|
|
87
|
+
const processElements = async(reader, printState) => {
|
|
88
|
+
let element;
|
|
89
|
+
while (element = await reader.next()) { // Read page contents
|
|
90
|
+
// In this sample we process only paths & text, but the code can be
|
|
91
|
+
// extended to handle any element type.
|
|
92
|
+
const type = await element.getType();
|
|
93
|
+
if (type === PDFNet.Element.Type.e_path || type === PDFNet.Element.Type.e_text || type === PDFNet.Element.Type.e_path) {
|
|
94
|
+
switch (type) {
|
|
95
|
+
case PDFNet.Element.Type.e_path: // Process path ...
|
|
96
|
+
printState.str += '\nPATH: ';
|
|
97
|
+
break;
|
|
98
|
+
case PDFNet.Element.Type.e_text: // Process text ...
|
|
99
|
+
printState.str += '\nTEXT: ' + (await element.getTextString()) + '\n';
|
|
100
|
+
break;
|
|
101
|
+
case PDFNet.Element.Type.e_form: // Process form XObjects
|
|
102
|
+
printState.str += '\nFORM XObject: ';
|
|
103
|
+
// reader.formBegin();
|
|
104
|
+
// await ProcessElements(reader);
|
|
105
|
+
// reader.end();
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Check if the element is associated with any structural element.
|
|
110
|
+
// Content items are leaf nodes of the structure tree.
|
|
111
|
+
const structParent = await element.getParentStructElement();
|
|
112
|
+
if (await structParent.isValid()) {
|
|
113
|
+
// Print out the parent structural element's type, title, and object number.
|
|
114
|
+
printState.str += ' Type: ' + (await structParent.getType()) + ', MCID: ' + (await element.getStructMCID());
|
|
115
|
+
if (await structParent.hasTitle()) {
|
|
116
|
+
printState.str += '. Title: ' + (await structParent.getTitle());
|
|
117
|
+
}
|
|
118
|
+
printState.str += ', Obj#: ' + (await (await structParent.getSDFObj()).getObjNum());
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
// Used in code snippet 3.
|
|
125
|
+
const processElements2 = async(reader, mcidPageMap) => {
|
|
126
|
+
let element;
|
|
127
|
+
while (element = await reader.next()) { // Read page contents
|
|
128
|
+
// In this sample we process only text, but the code can be extended
|
|
129
|
+
// to handle paths, images, or any other Element type.
|
|
130
|
+
const mcid = await element.getStructMCID();
|
|
131
|
+
if (mcid >= 0 && (await element.getType()) === PDFNet.Element.Type.e_text) {
|
|
132
|
+
const val = await element.getTextString();
|
|
133
|
+
if (mcid in mcidPageMap) {
|
|
134
|
+
mcidPageMap[mcid] += val;
|
|
135
|
+
} else {
|
|
136
|
+
mcidPageMap[mcid] = val;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
// Used in code snippet 3.
|
|
143
|
+
const processStructElement2 = async(element, mcidDocMap, indent, printState) => {
|
|
144
|
+
if (!(await element.isValid())) {
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Print out the type and title info, if any.
|
|
149
|
+
printAndIndent(printState, indent);
|
|
150
|
+
printState.str += '<' + (await element.getType());
|
|
151
|
+
if (await element.hasTitle()) {
|
|
152
|
+
printState.str += ' title="' + (await element.getTitle()) + '"';
|
|
153
|
+
}
|
|
154
|
+
printState.str += '>';
|
|
155
|
+
|
|
156
|
+
const num = await element.getNumKids();
|
|
157
|
+
for (let i = 0; i < num; ++i) {
|
|
158
|
+
if (await element.isContentItem(i)) {
|
|
159
|
+
const cont = await element.getAsContentItem(i);
|
|
160
|
+
if ((await cont.getType()) === PDFNet.ContentItem.Type.e_MCID) {
|
|
161
|
+
const pageNum = await (await cont.getPage()).getIndex();
|
|
162
|
+
const mcidPageMap = mcidDocMap[pageNum];
|
|
163
|
+
if (mcidPageMap) {
|
|
164
|
+
const mcid = await cont.getMCID();
|
|
165
|
+
if (mcid in mcidPageMap) {
|
|
166
|
+
printState.str += mcidPageMap[mcid];
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
} else { // the kid is another StructElement node.
|
|
171
|
+
await processStructElement2(await element.getAsStructElem(i), mcidDocMap, indent + 1, printState);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
printAndIndent(printState, indent);
|
|
176
|
+
printState.str += '</' + (await element.getType()) + '>';
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
const main = async() => {
|
|
180
|
+
// Relative path to the folder containing test files.
|
|
181
|
+
const inputPath = '../TestFiles/';
|
|
182
|
+
const printState = { str: '' };
|
|
183
|
+
try { // Extract logical structure from a PDF document
|
|
184
|
+
const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + 'tagged.pdf');
|
|
185
|
+
doc.initSecurityHandler();
|
|
186
|
+
|
|
187
|
+
let reader = null;
|
|
188
|
+
let tree = null;
|
|
189
|
+
|
|
190
|
+
console.log('____________________________________________________________');
|
|
191
|
+
console.log('Sample 1 - Traverse logical structure tree...');
|
|
192
|
+
tree = await doc.getStructTree();
|
|
193
|
+
if (await tree.isValid()) {
|
|
194
|
+
console.log('Document has a StructTree root.');
|
|
195
|
+
for (let i = 0, numKids = await tree.getNumKids(); i < numKids; ++i) {
|
|
196
|
+
// Recursively get structure info for all child elements.
|
|
197
|
+
await processStructElement(await tree.getKid(i), 0, printState);
|
|
198
|
+
}
|
|
199
|
+
} else {
|
|
200
|
+
console.log('This document does not contain any logical structure.');
|
|
201
|
+
}
|
|
202
|
+
printAndIndent(printState, 0);
|
|
203
|
+
console.log('Done 1.');
|
|
204
|
+
|
|
205
|
+
console.log('____________________________________________________________');
|
|
206
|
+
console.log('Sample 2 - Get parent logical structure elements from');
|
|
207
|
+
console.log('layout elements.');
|
|
208
|
+
reader = await PDFNet.ElementReader.create();
|
|
209
|
+
for (let itr = await doc.getPageIterator(); await itr.hasNext(); itr.next()) {
|
|
210
|
+
reader.beginOnPage(await itr.current());
|
|
211
|
+
await processElements(reader, printState);
|
|
212
|
+
reader.end();
|
|
213
|
+
}
|
|
214
|
+
printAndIndent(printState, 0);
|
|
215
|
+
console.log('Done 2.');
|
|
216
|
+
|
|
217
|
+
console.log('____________________________________________________________');
|
|
218
|
+
console.log("Sample 3 - 'XML style' extraction of PDF logical structure and page content.");
|
|
219
|
+
{
|
|
220
|
+
const mcidDocMap = {};
|
|
221
|
+
for (let itr = await doc.getPageIterator(); await itr.hasNext(); itr.next()) {
|
|
222
|
+
const page = await itr.current();
|
|
223
|
+
reader.beginOnPage(page);
|
|
224
|
+
const pageNum = await page.getIndex();
|
|
225
|
+
const pageMcidMap = {};
|
|
226
|
+
mcidDocMap[pageNum] = pageMcidMap;
|
|
227
|
+
await processElements2(reader, pageMcidMap);
|
|
228
|
+
reader.end();
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
tree = await doc.getStructTree();
|
|
232
|
+
if (await tree.isValid()) {
|
|
233
|
+
for (let i = 0, numKids = await tree.getNumKids(); i < numKids; ++i) {
|
|
234
|
+
await processStructElement2(await tree.getKid(i), mcidDocMap, 0, printState);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
printAndIndent(printState, 0);
|
|
239
|
+
console.log('Done 3.');
|
|
240
|
+
await doc.save(inputPath + 'Output/LogicalStructure.pdf', 0);
|
|
241
|
+
} catch (err) {
|
|
242
|
+
console.log(err);
|
|
243
|
+
}
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error){console.log('Error: ' + JSON.stringify(error));}).then(function(){return PDFNet.shutdown();});
|
|
247
|
+
};
|
|
248
|
+
exports.runLogicalStructureTest();
|
|
249
|
+
})(exports);
|
|
250
|
+
// eslint-disable-next-line spaced-comment
|
|
251
251
|
//# sourceURL=LogicalStructureTest.js
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
const { PDFNet } = require('@pdftron/pdfnet-node');
|
|
8
|
-
|
|
8
|
+
const PDFTronLicense = require('../LicenseKey/LicenseKey');
|
|
9
9
|
|
|
10
10
|
((exports) => {
|
|
11
11
|
'use strict';
|
|
@@ -242,7 +242,7 @@ var PDFTronLicense = require('../LicenseKey/LicenseKey');
|
|
|
242
242
|
};
|
|
243
243
|
PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error) {
|
|
244
244
|
console.log('Error: ' + JSON.stringify(error));
|
|
245
|
-
}).then(function(){ PDFNet.shutdown(); });
|
|
245
|
+
}).then(function(){ return PDFNet.shutdown(); });
|
|
246
246
|
};
|
|
247
247
|
exports.runOCRTest();
|
|
248
248
|
})(exports);
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
//---------------------------------------------------------------------------------------
|
|
2
|
+
// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
|
|
3
|
+
// Consult legal.txt regarding legal and license information.
|
|
4
|
+
//---------------------------------------------------------------------------------------
|
|
5
|
+
|
|
6
|
+
//------------------------------------------------------------------------------
|
|
7
|
+
// The following sample illustrates how to use the PDF::Convert utility class
|
|
8
|
+
// to convert MS Office files to PDF and replace templated tags present in the document
|
|
9
|
+
// with content supplied via json
|
|
10
|
+
//
|
|
11
|
+
// For a detailed specification of the template format and supported features,
|
|
12
|
+
// see: https://www.pdftron.com/documentation/core/guides/generate-via-template/data-model/
|
|
13
|
+
//
|
|
14
|
+
// This conversion is performed entirely within the PDFNet and has *no*
|
|
15
|
+
// external or system dependencies -- Conversion results will be
|
|
16
|
+
// the same whether on Windows, Linux or Android.
|
|
17
|
+
//
|
|
18
|
+
// Please contact us if you have any questions.
|
|
19
|
+
//------------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
const { PDFNet } = require('@pdftron/pdfnet-node');
|
|
22
|
+
const PDFTronLicense = require('../LicenseKey/LicenseKey');
|
|
23
|
+
|
|
24
|
+
((exports) => {
|
|
25
|
+
'use strict';
|
|
26
|
+
|
|
27
|
+
exports.runOfficeTemplateTest = () => {
|
|
28
|
+
|
|
29
|
+
const inputPath = '../TestFiles/';
|
|
30
|
+
const outputPath = inputPath + 'Output/';
|
|
31
|
+
const inputFilename = 'SYH_Letter.docx'
|
|
32
|
+
const outputFilename = 'SYH_Letter.pdf'
|
|
33
|
+
|
|
34
|
+
const main = async () => {
|
|
35
|
+
|
|
36
|
+
PDFNet.addResourceSearchPath('../Resources');
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
const options = new PDFNet.Convert.OfficeToPDFOptions();
|
|
40
|
+
|
|
41
|
+
options.setTemplateParamsJson(JSON.stringify({
|
|
42
|
+
'dest_given_name': 'Janice N.',
|
|
43
|
+
'dest_street_address': "187 Duizelstraat",
|
|
44
|
+
'dest_surname': 'Symonds',
|
|
45
|
+
'dest_title': 'Ms.',
|
|
46
|
+
'land_location': '225 Parc St., Rochelle, QC ',
|
|
47
|
+
'lease_problem': 'According to the city records, the lease was initiated in September 2010 and never terminated',
|
|
48
|
+
'logo': { 'image_url': inputPath + 'logo_red.png', 'width' : 64, 'height': 64 },
|
|
49
|
+
'sender_name': 'Arnold Smith'
|
|
50
|
+
}));
|
|
51
|
+
|
|
52
|
+
// perform the conversion with template delimiters and content dictionary
|
|
53
|
+
const pdfdoc = await PDFNet.Convert.officeToPdfWithPath(inputPath + inputFilename, options);
|
|
54
|
+
|
|
55
|
+
// save the result
|
|
56
|
+
await pdfdoc.save(outputPath + outputFilename, PDFNet.SDFDoc.SaveOptions.e_linearized);
|
|
57
|
+
|
|
58
|
+
// And we're done!
|
|
59
|
+
console.log('Saved ' + outputFilename);
|
|
60
|
+
|
|
61
|
+
} catch (err) {
|
|
62
|
+
console.log(err);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
console.log('Done.');
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
|
|
69
|
+
console.log('Error: ' + JSON.stringify(error));
|
|
70
|
+
}).then(function () { return PDFNet.shutdown(); });
|
|
71
|
+
|
|
72
|
+
};
|
|
73
|
+
exports.runOfficeTemplateTest();
|
|
74
|
+
})(exports);
|
|
75
|
+
// eslint-disable-next-line spaced-comment
|
|
76
|
+
//# sourceURL=OfficeTemplateTest.js
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
//------------------------------------------------------------------------------
|
|
16
16
|
|
|
17
17
|
const { PDFNet } = require('@pdftron/pdfnet-node');
|
|
18
|
-
|
|
18
|
+
const PDFTronLicense = require('../LicenseKey/LicenseKey');
|
|
19
19
|
|
|
20
20
|
((exports) => {
|
|
21
21
|
'use strict';
|
|
@@ -116,7 +116,7 @@ var PDFTronLicense = require('../LicenseKey/LicenseKey');
|
|
|
116
116
|
|
|
117
117
|
PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
|
|
118
118
|
console.log('Error: ' + JSON.stringify(error));
|
|
119
|
-
}).then(function () { PDFNet.shutdown(); });
|
|
119
|
+
}).then(function () { return PDFNet.shutdown(); });
|
|
120
120
|
|
|
121
121
|
};
|
|
122
122
|
exports.runOfficeToPDF();
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
//---------------------------------------------------------------------------------------
|
|
35
35
|
|
|
36
36
|
const { PDFNet } = require('@pdftron/pdfnet-node');
|
|
37
|
-
|
|
37
|
+
const PDFTronLicense = require('../LicenseKey/LicenseKey');
|
|
38
38
|
|
|
39
39
|
((exports) => {
|
|
40
40
|
'use strict';
|
|
@@ -184,7 +184,7 @@ var PDFTronLicense = require('../LicenseKey/LicenseKey');
|
|
|
184
184
|
};
|
|
185
185
|
PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
|
|
186
186
|
console.log('Error: ' + JSON.stringify(error));
|
|
187
|
-
}).then(function () { PDFNet.shutdown(); });
|
|
187
|
+
}).then(function () { return PDFNet.shutdown(); });
|
|
188
188
|
};
|
|
189
189
|
exports.runOptimizerTest();
|
|
190
190
|
})(exports);
|