@pdftron/pdfnet-node-samples 9.4.2 → 9.5.0-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/package.json +18 -18
  2. package/readme.md +12 -12
  3. package/samples/runall.bat +12 -12
  4. package/samples/AddImageTest/AddImageTest.js +0 -116
  5. package/samples/AdvancedImagingTest/AdvancedImagingTest.js +0 -78
  6. package/samples/AnnotationTest/AnnotationTest.js +0 -642
  7. package/samples/BookmarkTest/BookmarkTest.js +0 -220
  8. package/samples/CAD2PDFTest/CAD2PDFTest.js +0 -80
  9. package/samples/ContentReplacerTest/ContentReplacerTest.js +0 -75
  10. package/samples/ConvertTest/ConvertTest.js +0 -256
  11. package/samples/DigitalSignaturesTest/DigitalSignaturesTest.js +0 -527
  12. package/samples/ElementBuilderTest/ElementBuilderTest.js +0 -514
  13. package/samples/ElementEditTest/ElementEditTest.js +0 -111
  14. package/samples/ElementReaderAdvTest/ElementReaderAdvTest.js +0 -305
  15. package/samples/ElementReaderTest/ElementReaderTest.js +0 -77
  16. package/samples/EncTest/EncTest.js +0 -176
  17. package/samples/FDFTest/FDFTest.js +0 -219
  18. package/samples/HTML2PDFTest/HTML2PDFTest.js +0 -166
  19. package/samples/HighlightsTest/HighlightsTest.js +0 -97
  20. package/samples/ImageExtractTest/ImageExtractTest.js +0 -130
  21. package/samples/ImpositionTest/ImpositionTest.js +0 -87
  22. package/samples/InteractiveFormsTest/InteractiveFormsTest.js +0 -382
  23. package/samples/JBIG2Test/JBIG2Test.js +0 -89
  24. package/samples/LicenseKey/LicenseKey.js +0 -11
  25. package/samples/LogicalStructureTest/LogicalStructureTest.js +0 -251
  26. package/samples/OCRTest/OCRTest.js +0 -235
  27. package/samples/OfficeTemplateTest/OfficeTemplateTest.js +0 -77
  28. package/samples/OfficeToPDFTest/OfficeToPDFTest.js +0 -125
  29. package/samples/OptimizerTest/OptimizerTest.js +0 -192
  30. package/samples/PDF2HtmlTest/PDF2HtmlTest.js +0 -123
  31. package/samples/PDF2OfficeTest/PDF2OfficeTest.js +0 -158
  32. package/samples/PDFATest/PDFATest.js +0 -85
  33. package/samples/PDFDocMemoryTest/PDFDocMemoryTest.js +0 -85
  34. package/samples/PDFDrawTest/PDFDrawTest.js +0 -306
  35. package/samples/PDFLayersTest/PDFLayersTest.js +0 -295
  36. package/samples/PDFPackageTest/PDFPackageTest.js +0 -112
  37. package/samples/PDFPageTest/PDFPageTest.js +0 -190
  38. package/samples/PDFRedactTest/PDFRedactTest.js +0 -75
  39. package/samples/PageLabelsTest/PageLabelsTest.js +0 -139
  40. package/samples/PatternTest/PatternTest.js +0 -227
  41. package/samples/RectTest/RectTest.js +0 -41
  42. package/samples/SDFTest/SDFTest.js +0 -89
  43. package/samples/StamperTest/StamperTest.js +0 -256
  44. package/samples/TestFiles/BusinessCardTemplate.pdf +0 -0
  45. package/samples/TestFiles/Fishermen.docx +0 -0
  46. package/samples/TestFiles/Font_licenses.txt +0 -140
  47. package/samples/TestFiles/GlobalSignRootForTST.cer +0 -0
  48. package/samples/TestFiles/License.txt +0 -1
  49. package/samples/TestFiles/Misc-Fixed.pfa +0 -1166
  50. package/samples/TestFiles/NotoSans_with_hindi.ttf +0 -0
  51. package/samples/TestFiles/Output/empty +0 -1
  52. package/samples/TestFiles/SHA-2 Root USERTrust RSA CA Sectigo timestamping.crt +0 -34
  53. package/samples/TestFiles/SYH_Letter.docx +0 -0
  54. package/samples/TestFiles/TigerText.pdf +0 -0
  55. package/samples/TestFiles/US061222892-a.pdf +0 -0
  56. package/samples/TestFiles/butterfly.png +0 -0
  57. package/samples/TestFiles/credit card numbers.pdf +0 -0
  58. package/samples/TestFiles/dice.jpg +0 -0
  59. package/samples/TestFiles/dice.u3d +0 -0
  60. package/samples/TestFiles/doc_to_sign.pdf +0 -0
  61. package/samples/TestFiles/factsheet_Arabic.docx +0 -0
  62. package/samples/TestFiles/fish.pdf +0 -0
  63. package/samples/TestFiles/font.ttf +0 -0
  64. package/samples/TestFiles/form1.pdf +1 -245
  65. package/samples/TestFiles/form1_annots.xfdf +0 -34
  66. package/samples/TestFiles/form1_data.fdf +0 -4
  67. package/samples/TestFiles/form1_data.xfdf +0 -140
  68. package/samples/TestFiles/grayscale.tif +0 -0
  69. package/samples/TestFiles/hindi_sample_utf16le.txt +0 -0
  70. package/samples/TestFiles/imagemask.dat +0 -32
  71. package/samples/TestFiles/logo_red.png +0 -0
  72. package/samples/TestFiles/lorem_ipsum.pdf +0 -0
  73. package/samples/TestFiles/multipage.tif +0 -0
  74. package/samples/TestFiles/my_stream.txt +0 -2310
  75. package/samples/TestFiles/newsletter.pdf +0 -0
  76. package/samples/TestFiles/newsletter.xod +0 -0
  77. package/samples/TestFiles/numbered.pdf +0 -0
  78. package/samples/TestFiles/op_blend_test.pdf +0 -1498
  79. package/samples/TestFiles/palm.jp2 +0 -0
  80. package/samples/TestFiles/paragraphs_and_tables.pdf +0 -0
  81. package/samples/TestFiles/pdfnet.gif +0 -0
  82. package/samples/TestFiles/pdftron.bmp +0 -0
  83. package/samples/TestFiles/pdftron.cer +0 -0
  84. package/samples/TestFiles/pdftron.pfx +0 -0
  85. package/samples/TestFiles/pdftron_smart_substitution.plugin +0 -0
  86. package/samples/TestFiles/peppers.jpg +0 -0
  87. package/samples/TestFiles/signature.jpg +0 -0
  88. package/samples/TestFiles/simple-emf.emf +0 -0
  89. package/samples/TestFiles/simple-excel_2007.xlsx +0 -0
  90. package/samples/TestFiles/simple-outlook.msg +0 -0
  91. package/samples/TestFiles/simple-powerpoint_2007.pptx +0 -0
  92. package/samples/TestFiles/simple-publisher.pub +0 -0
  93. package/samples/TestFiles/simple-rtf.rtf +0 -224
  94. package/samples/TestFiles/simple-text.txt +0 -61
  95. package/samples/TestFiles/simple-visio.vsd +0 -0
  96. package/samples/TestFiles/simple-webpage.html +0 -731
  97. package/samples/TestFiles/simple-webpage.mht +0 -6972
  98. package/samples/TestFiles/simple-webpage_files/colorschememapping.xml +0 -2
  99. package/samples/TestFiles/simple-webpage_files/filelist.xml +0 -14
  100. package/samples/TestFiles/simple-webpage_files/image001.gif +0 -0
  101. package/samples/TestFiles/simple-webpage_files/image002.png +0 -0
  102. package/samples/TestFiles/simple-webpage_files/image003.jpg +0 -0
  103. package/samples/TestFiles/simple-webpage_files/image004.emz +0 -0
  104. package/samples/TestFiles/simple-webpage_files/image005.gif +0 -0
  105. package/samples/TestFiles/simple-webpage_files/image006.png +0 -0
  106. package/samples/TestFiles/simple-webpage_files/image007.gif +0 -0
  107. package/samples/TestFiles/simple-webpage_files/oledata.mso +0 -0
  108. package/samples/TestFiles/simple-webpage_files/themedata.thmx +0 -0
  109. package/samples/TestFiles/simple-word_2007.docx +0 -0
  110. package/samples/TestFiles/simple-xps.xps +0 -0
  111. package/samples/TestFiles/tagged.pdf +0 -0
  112. package/samples/TestFiles/the_rime_of_the_ancient_mariner.docx +0 -0
  113. package/samples/TestFiles/tiger.pdf +0 -0
  114. package/samples/TestFiles/waiver.pdf +0 -0
  115. package/samples/TestFiles/waiver_withApprovalField.pdf +0 -0
  116. package/samples/TestFiles/waiver_withApprovalField_certified.pdf +1 -424
  117. package/samples/TestFiles/waiver_withApprovalField_certified_approved.pdf +1 -466
  118. package/samples/TextExtractTest/TextExtractTest.js +0 -287
  119. package/samples/TextSearchTest/TextSearchTest.js +0 -122
  120. package/samples/U3DTest/U3DTest.js +0 -105
  121. package/samples/UndoRedoTest/UndoRedoTest.js +0 -101
  122. package/samples/UnicodeWriteTest/UnicodeWriteTest.js +0 -174
  123. package/samples/WebViewerConvertTest/WebViewerConvertTest.js +0 -136
@@ -1,251 +0,0 @@
1
- //---------------------------------------------------------------------------------------
2
- // Copyright (c) 2001-2022 by PDFTron Systems Inc. All Rights Reserved.
3
- // Consult legal.txt regarding legal and license information.
4
- //---------------------------------------------------------------------------------------
5
-
6
- //---------------------------------------------------------------------------------------
7
- // This sample explores the structure and content of a tagged PDF document and dumps
8
- // the structure information to the console window.
9
- //
10
- // In tagged PDF documents StructTree acts as a central repository for information
11
- // related to a PDF document's logical structure. The tree consists of StructElement-s
12
- // and ContentItem-s which are leaf nodes of the structure tree.
13
- //
14
- // The sample can be extended to access and extract the marked-content elements such
15
- // as text and images.
16
- //---------------------------------------------------------------------------------------
17
-
18
-
19
- const { PDFNet } = require('@pdftron/pdfnet-node');
20
- const PDFTronLicense = require('../LicenseKey/LicenseKey');
21
-
22
- ((exports) => {
23
-
24
- exports.runLogicalStructureTest = () => {
25
-
26
- const printAndIndent = (printState, indent) => {
27
- console.log(printState.str);
28
-
29
- let indentStr = '';
30
- for (let i = 0; i < indent; ++i) {
31
- indentStr += ' ';
32
- }
33
- printState.str = indentStr;
34
- };
35
-
36
- // Used in code snippet 1.
37
- const processStructElement = async(element, indent, printState) => {
38
- if (!(await element.isValid())) {
39
- return;
40
- }
41
-
42
-
43
- // Print out the type and title info, if any.
44
- printAndIndent(printState, indent++);
45
- printState.str += 'Type: ' + (await element.getType());
46
- if (await element.hasTitle()) {
47
- printState.str += '. Title: ' + (await element.getTitle());
48
- }
49
-
50
- const num = await element.getNumKids();
51
- for (let i = 0; i < num; ++i) {
52
- // Check is the kid is a leaf node (i.e. it is a ContentItem).
53
- if (await element.isContentItem(i)) {
54
- const cont = await element.getAsContentItem(i);
55
- const type = await cont.getType();
56
-
57
- const page = await cont.getPage();
58
-
59
- printAndIndent(printState, indent);
60
- printState.str += 'Content Item. Part of page #' + (await page.getIndex());
61
-
62
- printAndIndent(printState, indent);
63
- switch (type) {
64
- case PDFNet.ContentItem.Type.e_MCID:
65
- case PDFNet.ContentItem.Type.e_MCR:
66
- printState.str += 'MCID: ' + (await cont.getMCID());
67
- break;
68
- case PDFNet.ContentItem.Type.e_OBJR:
69
- {
70
- printState.str += 'OBJR ';
71
- const refObj = await cont.getRefObj();
72
- if (refObj) {
73
- printState.str += '- Referenced Object#: ' + refObj.getObjNum();
74
- }
75
- }
76
- break;
77
- default:
78
- break;
79
- }
80
- } else { // the kid is another StructElement node.
81
- await processStructElement(await element.getAsStructElem(i), indent, printState);
82
- }
83
- }
84
- };
85
-
86
- // Used in code snippet 2.
87
- const processElements = async(reader, printState) => {
88
- let element;
89
- while (element = await reader.next()) { // Read page contents
90
- // In this sample we process only paths & text, but the code can be
91
- // extended to handle any element type.
92
- const type = await element.getType();
93
- if (type === PDFNet.Element.Type.e_path || type === PDFNet.Element.Type.e_text || type === PDFNet.Element.Type.e_path) {
94
- switch (type) {
95
- case PDFNet.Element.Type.e_path: // Process path ...
96
- printState.str += '\nPATH: ';
97
- break;
98
- case PDFNet.Element.Type.e_text: // Process text ...
99
- printState.str += '\nTEXT: ' + (await element.getTextString()) + '\n';
100
- break;
101
- case PDFNet.Element.Type.e_form: // Process form XObjects
102
- printState.str += '\nFORM XObject: ';
103
- // reader.formBegin();
104
- // await ProcessElements(reader);
105
- // reader.end();
106
- break;
107
- }
108
-
109
- // Check if the element is associated with any structural element.
110
- // Content items are leaf nodes of the structure tree.
111
- const structParent = await element.getParentStructElement();
112
- if (await structParent.isValid()) {
113
- // Print out the parent structural element's type, title, and object number.
114
- printState.str += ' Type: ' + (await structParent.getType()) + ', MCID: ' + (await element.getStructMCID());
115
- if (await structParent.hasTitle()) {
116
- printState.str += '. Title: ' + (await structParent.getTitle());
117
- }
118
- printState.str += ', Obj#: ' + (await (await structParent.getSDFObj()).getObjNum());
119
- }
120
- }
121
- }
122
- };
123
-
124
- // Used in code snippet 3.
125
- const processElements2 = async(reader, mcidPageMap) => {
126
- let element;
127
- while (element = await reader.next()) { // Read page contents
128
- // In this sample we process only text, but the code can be extended
129
- // to handle paths, images, or any other Element type.
130
- const mcid = await element.getStructMCID();
131
- if (mcid >= 0 && (await element.getType()) === PDFNet.Element.Type.e_text) {
132
- const val = await element.getTextString();
133
- if (mcid in mcidPageMap) {
134
- mcidPageMap[mcid] += val;
135
- } else {
136
- mcidPageMap[mcid] = val;
137
- }
138
- }
139
- }
140
- };
141
-
142
- // Used in code snippet 3.
143
- const processStructElement2 = async(element, mcidDocMap, indent, printState) => {
144
- if (!(await element.isValid())) {
145
- return;
146
- }
147
-
148
- // Print out the type and title info, if any.
149
- printAndIndent(printState, indent);
150
- printState.str += '<' + (await element.getType());
151
- if (await element.hasTitle()) {
152
- printState.str += ' title="' + (await element.getTitle()) + '"';
153
- }
154
- printState.str += '>';
155
-
156
- const num = await element.getNumKids();
157
- for (let i = 0; i < num; ++i) {
158
- if (await element.isContentItem(i)) {
159
- const cont = await element.getAsContentItem(i);
160
- if ((await cont.getType()) === PDFNet.ContentItem.Type.e_MCID) {
161
- const pageNum = await (await cont.getPage()).getIndex();
162
- const mcidPageMap = mcidDocMap[pageNum];
163
- if (mcidPageMap) {
164
- const mcid = await cont.getMCID();
165
- if (mcid in mcidPageMap) {
166
- printState.str += mcidPageMap[mcid];
167
- }
168
- }
169
- }
170
- } else { // the kid is another StructElement node.
171
- await processStructElement2(await element.getAsStructElem(i), mcidDocMap, indent + 1, printState);
172
- }
173
- }
174
-
175
- printAndIndent(printState, indent);
176
- printState.str += '</' + (await element.getType()) + '>';
177
- };
178
-
179
- const main = async() => {
180
- // Relative path to the folder containing test files.
181
- const inputPath = '../TestFiles/';
182
- const printState = { str: '' };
183
- try { // Extract logical structure from a PDF document
184
- const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + 'tagged.pdf');
185
- doc.initSecurityHandler();
186
-
187
- let reader = null;
188
- let tree = null;
189
-
190
- console.log('____________________________________________________________');
191
- console.log('Sample 1 - Traverse logical structure tree...');
192
- tree = await doc.getStructTree();
193
- if (await tree.isValid()) {
194
- console.log('Document has a StructTree root.');
195
- for (let i = 0, numKids = await tree.getNumKids(); i < numKids; ++i) {
196
- // Recursively get structure info for all child elements.
197
- await processStructElement(await tree.getKid(i), 0, printState);
198
- }
199
- } else {
200
- console.log('This document does not contain any logical structure.');
201
- }
202
- printAndIndent(printState, 0);
203
- console.log('Done 1.');
204
-
205
- console.log('____________________________________________________________');
206
- console.log('Sample 2 - Get parent logical structure elements from');
207
- console.log('layout elements.');
208
- reader = await PDFNet.ElementReader.create();
209
- for (let itr = await doc.getPageIterator(); await itr.hasNext(); itr.next()) {
210
- reader.beginOnPage(await itr.current());
211
- await processElements(reader, printState);
212
- reader.end();
213
- }
214
- printAndIndent(printState, 0);
215
- console.log('Done 2.');
216
-
217
- console.log('____________________________________________________________');
218
- console.log("Sample 3 - 'XML style' extraction of PDF logical structure and page content.");
219
- {
220
- const mcidDocMap = {};
221
- for (let itr = await doc.getPageIterator(); await itr.hasNext(); itr.next()) {
222
- const page = await itr.current();
223
- reader.beginOnPage(page);
224
- const pageNum = await page.getIndex();
225
- const pageMcidMap = {};
226
- mcidDocMap[pageNum] = pageMcidMap;
227
- await processElements2(reader, pageMcidMap);
228
- reader.end();
229
- }
230
-
231
- tree = await doc.getStructTree();
232
- if (await tree.isValid()) {
233
- for (let i = 0, numKids = await tree.getNumKids(); i < numKids; ++i) {
234
- await processStructElement2(await tree.getKid(i), mcidDocMap, 0, printState);
235
- }
236
- }
237
- }
238
- printAndIndent(printState, 0);
239
- console.log('Done 3.');
240
- await doc.save(inputPath + 'Output/LogicalStructure.pdf', 0);
241
- } catch (err) {
242
- console.log(err);
243
- }
244
- };
245
-
246
- PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error){console.log('Error: ' + JSON.stringify(error));}).then(function(){return PDFNet.shutdown();});
247
- };
248
- exports.runLogicalStructureTest();
249
- })(exports);
250
- // eslint-disable-next-line spaced-comment
251
- //# sourceURL=LogicalStructureTest.js
@@ -1,235 +0,0 @@
1
- //---------------------------------------------------------------------------------------
2
- // Copyright (c) 2001-2022 by PDFTron Systems Inc. All Rights Reserved.
3
- // Consult legal.txt regarding legal and license information.
4
- //---------------------------------------------------------------------------------------
5
-
6
-
7
- const { PDFNet } = require('@pdftron/pdfnet-node');
8
- const PDFTronLicense = require('../LicenseKey/LicenseKey');
9
-
10
- ((exports) => {
11
- 'use strict';
12
-
13
- //---------------------------------------------------------------------------------------
14
- // The following sample illustrates how to use OCR module
15
- //---------------------------------------------------------------------------------------
16
- exports.runOCRTest = () => {
17
- const main = async () => {
18
- try {
19
-
20
- PDFNet.addResourceSearchPath('../../lib/');
21
-
22
- const useIRIS = await PDFNet.OCRModule.isIRISModuleAvailable();
23
- if (!(await PDFNet.OCRModule.isModuleAvailable())) {
24
- console.log('\nUnable to run OCRTest: PDFTron SDK OCR module not available.');
25
- console.log('---------------------------------------------------------------');
26
- console.log('The OCR module is an optional add-on, available for download');
27
- console.log('at http://www.pdftron.com/. If you have already downloaded this');
28
- console.log('module, ensure that the SDK is able to find the required files');
29
- console.log('using the PDFNet.addResourceSearchPath() function.\n');
30
-
31
- return;
32
- }
33
-
34
- // Relative path to the folder containing test files.
35
- const input_path = '../TestFiles/OCR/';
36
- const output_path = '../TestFiles/Output/';
37
-
38
- //--------------------------------------------------------------------------------
39
- // Example 1) Process image without specifying options, default language - English - is used
40
- try {
41
-
42
- // A) Setup empty destination doc
43
- const doc = await PDFNet.PDFDoc.create();
44
-
45
- await doc.initSecurityHandler();
46
-
47
- const opts = new PDFNet.OCRModule.OCROptions();
48
- if(useIRIS) opts.setOCREngine('iris');
49
-
50
- // B) Run OCR on the .png with options
51
- await PDFNet.OCRModule.imageToPDF(doc, input_path + 'psychomachia_excerpt.png', opts);
52
-
53
- // C) check the result
54
- await doc.save(output_path + 'psychomachia_excerpt.pdf', 0);
55
-
56
- console.log('Example 1: psychomachia_excerpt.png');
57
-
58
- } catch (err) {
59
- console.log(err);
60
- }
61
-
62
- //--------------------------------------------------------------------------------
63
- // Example 2) Process document using multiple languages
64
- try {
65
- // A) Setup empty destination doc
66
- const doc = await PDFNet.PDFDoc.create();
67
- await doc.initSecurityHandler();
68
-
69
- // B) Setup options with multiple target languages, English will always be considered as secondary language
70
- const opts = new PDFNet.OCRModule.OCROptions();
71
- if(useIRIS) opts.setOCREngine('iris');
72
- opts.addLang('rus');
73
- opts.addLang('deu');
74
-
75
- // C) Run OCR on the .jpg with options
76
- await PDFNet.OCRModule.imageToPDF(doc, input_path + 'multi_lang.jpg', opts);
77
-
78
- // D) check the result
79
- await doc.save(output_path + 'multi_lang.pdf', 0);
80
-
81
- console.log('Example 2: multi_lang.jpg');
82
- } catch (err) {
83
- console.log(err);
84
- }
85
-
86
- //--------------------------------------------------------------------------------
87
- // Example 3) Process a .pdf specifying a language - German - and ignore zone comprising a sidebar image
88
- try {
89
- // A) Open the .pdf document
90
- const doc = await PDFNet.PDFDoc.createFromFilePath(input_path + 'german_kids_song.pdf');
91
- doc.initSecurityHandler();
92
-
93
- // B) Setup options with a single language and an ignore zone
94
- const opts = new PDFNet.OCRModule.OCROptions();
95
- if(useIRIS) opts.setOCREngine('iris');
96
- opts.addLang('deu');
97
-
98
- const ignore_zones = [];
99
- ignore_zones.push(new PDFNet.Rect(424, 163, 493, 730));
100
- opts.addIgnoreZonesForPage(ignore_zones, 1);
101
-
102
- // C) Run OCR on the .pdf with options
103
- await PDFNet.OCRModule.processPDF(doc, opts);
104
-
105
- // D) check the result
106
- await doc.save(output_path + 'german_kids_song.pdf', 0);
107
-
108
- console.log('Example 3: german_kids_song.pdf');
109
- } catch (err) {
110
- console.log(err);
111
- }
112
-
113
- //--------------------------------------------------------------------------------
114
- // Example 4) Process multipage tiff with text/ignore zones specified for each page, optionally provide English as the target language
115
- try {
116
- // A) Setup empty destination doc
117
- const doc = await PDFNet.PDFDoc.create();
118
- await doc.initSecurityHandler();
119
-
120
- // B) Setup options with a single language plus text/ignore zones
121
- const opts = new PDFNet.OCRModule.OCROptions();
122
- if(useIRIS) opts.setOCREngine('iris');
123
- opts.addLang('eng');
124
-
125
- var ignore_zones = [];
126
- // ignore signature box in the first 2 pages
127
- ignore_zones.push(new PDFNet.Rect(1492, 56, 2236, 432));
128
- opts.addIgnoreZonesForPage(ignore_zones, 1);
129
-
130
- ignore_zones = [];
131
- ignore_zones.push(new PDFNet.Rect(1492, 56, 2236, 432));
132
- opts.addIgnoreZonesForPage(ignore_zones, 2);
133
-
134
- // can use a combination of ignore and text boxes to focus on the page area of interest,
135
- // as ignore boxes are applied first, we remove the arrows before selecting part of the diagram
136
- ignore_zones = [];
137
- ignore_zones.push(new PDFNet.Rect(992, 1276, 1368, 1372));
138
- opts.addIgnoreZonesForPage(ignore_zones, 3);
139
-
140
-
141
- const text_zones = [];
142
- // we only have text zones selected in page 3
143
-
144
- // select horizontal BUFFER ZONE sign
145
- text_zones.push(new PDFNet.Rect(900, 2384, 1236, 2480));
146
- // select right vertical BUFFER ZONE sign
147
- text_zones.push(new PDFNet.Rect(1960, 1976, 2016, 2296));
148
- // select Lot No.
149
- text_zones.push(new PDFNet.Rect(696, 1028, 1196, 1128));
150
-
151
- // select part of the plan inside the BUFFER ZONE
152
- text_zones.push(new PDFNet.Rect(428, 1484, 1784, 2344));
153
- text_zones.push(new PDFNet.Rect(948, 1288, 1672, 1476));
154
- opts.addTextZonesForPage(text_zones, 3);
155
-
156
- // C) Run OCR on the .tif with options
157
- await PDFNet.OCRModule.imageToPDF(doc, input_path + 'bc_environment_protection.tif', opts);
158
-
159
- // D) check the result
160
- await doc.save(output_path + 'bc_environment_protection.pdf', 0);
161
-
162
- console.log('Example 4: bc_environment_protection.tif');
163
- } catch (err) {
164
- console.log(err);
165
- }
166
-
167
- //--------------------------------------------------------------------------------
168
- // Example 5) Alternative workflow for extracting OCR result JSON, postprocessing (e.g., removing words not in the dictionary or filtering special
169
- // out special characters), and finally applying modified OCR JSON to the source PDF document
170
- try {
171
- // A) Open the .pdf document
172
- const doc = await PDFNet.PDFDoc.createFromFilePath(input_path + 'zero_value_test_no_text.pdf');
173
- await doc.initSecurityHandler();
174
-
175
- const opts = new PDFNet.OCRModule.OCROptions();
176
- if(useIRIS) opts.setOCREngine('iris');
177
-
178
- // B) Run OCR on the .pdf with default English language
179
- const json = await PDFNet.OCRModule.getOCRJsonFromPDF(doc, opts);
180
-
181
- // C) Post-processing step (whatever it might be)
182
- console.log('Have OCR result JSON, re-applying to PDF ');
183
-
184
- // D) Apply potentially modified OCR JSON to the PDF
185
- await PDFNet.OCRModule.applyOCRJsonToPDF(doc, json);
186
-
187
- // E) Check the result
188
- await doc.save(output_path + 'zero_value_test_no_text.pdf', 0);
189
-
190
- console.log('Example 5: extracting and applying OCR JSON from zero_value_test_no_text.pdf');
191
- } catch (err) {
192
- console.log(err);
193
- }
194
-
195
- //--------------------------------------------------------------------------------
196
- // Example 6) The postprocessing workflow has also an option of extracting OCR results in XML format, similar to the one used by TextExtractor
197
- try {
198
-
199
- // A) Setup empty destination doc
200
- const doc = await PDFNet.PDFDoc.create();
201
- await doc.initSecurityHandler();
202
-
203
- const opts = new PDFNet.OCRModule.OCROptions();
204
- if(useIRIS) opts.setOCREngine('iris');
205
-
206
- // B) Run OCR on the .tif with default English language, extracting OCR results in XML format. Note that
207
- // in the process we convert the source image into PDF. We reuse this PDF document later to add hidden text layer to it.
208
- const xml = await PDFNet.OCRModule.getOCRXmlFromImage(doc, input_path + 'physics.tif', opts);
209
-
210
- // C) Post-processing step (whatever it might be)
211
- console.log('Have OCR result XML, re-applying to PDF');
212
-
213
- // D) Apply potentially modified OCR XML to the PDF
214
- await PDFNet.OCRModule.applyOCRXmlToPDF(doc, xml);
215
-
216
- // E) Check the result
217
- await doc.save(output_path + 'physics.pdf', 0);
218
-
219
- console.log('Example 6: extracting and applying OCR XML from physics.tif');
220
- } catch (err) {
221
- console.log(err);
222
- }
223
- console.log('Done.');
224
- } catch (err) {
225
- console.log(err);
226
- }
227
- };
228
- PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error) {
229
- console.log('Error: ' + JSON.stringify(error));
230
- }).then(function(){ return PDFNet.shutdown(); });
231
- };
232
- exports.runOCRTest();
233
- })(exports);
234
- // eslint-disable-next-line spaced-comment
235
- //# sourceURL=OCRTest.js
@@ -1,77 +0,0 @@
1
- //---------------------------------------------------------------------------------------
2
- // Copyright (c) 2001-2022 by PDFTron Systems Inc. All Rights Reserved.
3
- // Consult legal.txt regarding legal and license information.
4
- //---------------------------------------------------------------------------------------
5
-
6
- //------------------------------------------------------------------------------
7
- // The following sample illustrates how to use the PDF::Convert utility class
8
- // to convert MS Office files to PDF and replace templated tags present in the document
9
- // with content supplied via json
10
- //
11
- // For a detailed specification of the template format and supported features,
12
- // see: https://www.pdftron.com/documentation/core/guides/generate-via-template/data-model/
13
- //
14
- // This conversion is performed entirely within the PDFNet and has *no*
15
- // external or system dependencies -- Conversion results will be
16
- // the same whether on Windows, Linux or Android.
17
- //
18
- // Please contact us if you have any questions.
19
- //------------------------------------------------------------------------------
20
-
21
- const { PDFNet } = require('@pdftron/pdfnet-node');
22
- const PDFTronLicense = require('../LicenseKey/LicenseKey');
23
-
24
- ((exports) => {
25
- 'use strict';
26
-
27
- exports.runOfficeTemplateTest = () => {
28
-
29
- const inputPath = '../TestFiles/';
30
- const outputPath = inputPath + 'Output/';
31
- const inputFilename = 'SYH_Letter.docx'
32
- const outputFilename = 'SYH_Letter.pdf'
33
-
34
- const main = async () => {
35
-
36
- PDFNet.addResourceSearchPath('../Resources');
37
-
38
- try {
39
- const options = new PDFNet.Convert.OfficeToPDFOptions();
40
-
41
- const json = JSON.stringify({
42
- 'dest_given_name': 'Janice N.',
43
- 'dest_street_address': "187 Duizelstraat",
44
- 'dest_surname': 'Symonds',
45
- 'dest_title': 'Ms.',
46
- 'land_location': '225 Parc St., Rochelle, QC ',
47
- 'lease_problem': 'According to the city records, the lease was initiated in September 2010 and never terminated',
48
- 'logo': { 'image_url': inputPath + 'logo_red.png', 'width' : 64, 'height': 64 },
49
- 'sender_name': 'Arnold Smith'
50
- });
51
-
52
- const templateDoc = await PDFNet.Convert.createOfficeTemplateWithPath(inputPath + inputFilename, options);
53
-
54
- const pdfdoc = await templateDoc.fillTemplateJson(json);
55
-
56
- // save the result
57
- await pdfdoc.save(outputPath + outputFilename, PDFNet.SDFDoc.SaveOptions.e_linearized);
58
-
59
- // And we're done!
60
- console.log('Saved ' + outputFilename);
61
-
62
- } catch (err) {
63
- console.log(err);
64
- }
65
-
66
- console.log('Done.');
67
- };
68
-
69
- PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
70
- console.log('Error: ' + JSON.stringify(error));
71
- }).then(function () { return PDFNet.shutdown(); });
72
-
73
- };
74
- exports.runOfficeTemplateTest();
75
- })(exports);
76
- // eslint-disable-next-line spaced-comment
77
- //# sourceURL=OfficeTemplateTest.js
@@ -1,125 +0,0 @@
1
- //---------------------------------------------------------------------------------------
2
- // Copyright (c) 2001-2022 by PDFTron Systems Inc. All Rights Reserved.
3
- // Consult legal.txt regarding legal and license information.
4
- //---------------------------------------------------------------------------------------
5
-
6
- //------------------------------------------------------------------------------
7
- // The following sample illustrates how to use the PDF::Convert utility class
8
- // to convert MS Office files to PDF
9
- //
10
- // This conversion is performed entirely within the PDFNet and has *no*
11
- // external or system dependencies dependencies -- Conversion results will be
12
- // the same whether on Windows, Linux or Android.
13
- //
14
- // Please contact us if you have any questions.
15
- //------------------------------------------------------------------------------
16
-
17
- const { PDFNet } = require('@pdftron/pdfnet-node');
18
- const PDFTronLicense = require('../LicenseKey/LicenseKey');
19
-
20
- ((exports) => {
21
- 'use strict';
22
-
23
- exports.runOfficeToPDF = () => {
24
-
25
- const inputPath = '../TestFiles/';
26
- const outputPath = inputPath + 'Output/';
27
-
28
- const simpleDocxConvert = async (inputFilename, outputFilename) => {
29
- // perform the conversion with no optional parameters
30
- const pdfdoc = await PDFNet.Convert.officeToPdfWithPath(inputPath + inputFilename);
31
-
32
- // save the result
33
- await pdfdoc.save(outputPath + outputFilename, PDFNet.SDFDoc.SaveOptions.e_linearized);
34
-
35
- // And we're done!
36
- console.log('Saved ' + outputFilename);
37
- }
38
-
39
- const flexibleDocxConvert = async (inputFilename, outputFilename) => {
40
- // Start with a PDFDoc (the conversion destination)
41
- const pdfdoc = await PDFNet.PDFDoc.create();
42
- pdfdoc.initSecurityHandler();
43
-
44
- const options = new PDFNet.Convert.OfficeToPDFOptions();
45
-
46
- // set up smart font substitutions to improve conversion results
47
- // in situations where the original fonts are not available
48
- options.setSmartSubstitutionPluginPath(inputPath);
49
-
50
- // create a conversion object -- this sets things up but does not yet
51
- // perform any conversion logic.
52
- // in a multithreaded environment, this object can be used to monitor
53
- // the conversion progress and potentially cancel it as well
54
- const conversion = await PDFNet.Convert.streamingPdfConversionWithPdfAndPath(
55
- pdfdoc, inputPath + inputFilename, options);
56
-
57
- // Print the progress of the conversion.
58
- /*
59
- console.log('Status: ' + await conversion.getProgress() * 100 + '%, '
60
- + await conversion.getProgressLabel());
61
- */
62
-
63
- // actually perform the conversion
64
- // this particular method will not throw on conversion failure, but will
65
- // return an error status instead
66
-
67
- while (await conversion.getConversionStatus() === PDFNet.DocumentConversion.Result.e_Incomplete) {
68
- await conversion.convertNextPage();
69
- // print out the progress status as we go
70
- /*
71
- console.log('Status: ' + await conversion.getProgress() * 100 + '%, '
72
- + await conversion.getProgressLabel());
73
- */
74
- }
75
-
76
- if (await conversion.getConversionStatus() === PDFNet.DocumentConversion.Result.e_Success) {
77
- const num_warnings = await conversion.getNumWarnings();
78
-
79
- // print information about the conversion
80
- for (let i = 0; i < num_warnings; ++i) {
81
- console.log('Conversion Warning: ' + await conversion.getWarningString(i));
82
- }
83
-
84
- // save the result
85
- await pdfdoc.save(outputPath + outputFilename, PDFNet.SDFDoc.SaveOptions.e_linearized);
86
- // done
87
- console.log('Saved ' + outputFilename);
88
- }
89
- else {
90
- console.log('Encountered an error during conversion: '
91
- + await conversion.getErrorString());
92
- }
93
- }
94
-
95
-
96
- const main = async () => {
97
-
98
- PDFNet.addResourceSearchPath('../Resources');
99
-
100
- try {
101
- // first the one-line conversion function
102
- await simpleDocxConvert('Fishermen.docx', 'Fishermen.pdf');
103
-
104
- // then the more flexible line-by-line conversion API
105
- await flexibleDocxConvert('the_rime_of_the_ancient_mariner.docx',
106
- 'the_rime_of_the_ancient_mariner.pdf');
107
-
108
- // conversion of RTL content
109
- await flexibleDocxConvert('factsheet_Arabic.docx', 'factsheet_Arabic.pdf');
110
- } catch (err) {
111
- console.log(err);
112
- }
113
-
114
- console.log('Done.');
115
- };
116
-
117
- PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
118
- console.log('Error: ' + JSON.stringify(error));
119
- }).then(function () { return PDFNet.shutdown(); });
120
-
121
- };
122
- exports.runOfficeToPDF();
123
- })(exports);
124
- // eslint-disable-next-line spaced-comment
125
- //# sourceURL=OfficeToPDFTest.js