@pdftron/pdfnet-node-samples 10.8.0 → 10.9.0-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/{samples/AddImageTest → AddImageTest}/AddImageTest.js +115 -115
  2. package/{samples/AdvancedImagingTest → AdvancedImagingTest}/AdvancedImagingTest.js +78 -78
  3. package/{samples/AnnotationTest → AnnotationTest}/AnnotationTest.js +641 -641
  4. package/{samples/BookmarkTest → BookmarkTest}/BookmarkTest.js +219 -219
  5. package/{samples/CAD2PDFTest → CAD2PDFTest}/CAD2PDFTest.js +79 -79
  6. package/{samples/ContentReplacerTest → ContentReplacerTest}/ContentReplacerTest.js +75 -75
  7. package/{samples/ConvertPrintTest → ConvertPrintTest}/ConvertPrintTest.js +153 -153
  8. package/{samples/ConvertTest → ConvertTest}/ConvertTest.js +203 -203
  9. package/{samples/DataExtractionTest → DataExtractionTest}/DataExtractionTest.js +214 -214
  10. package/{samples/DigitalSignaturesTest → DigitalSignaturesTest}/DigitalSignaturesTest.js +621 -526
  11. package/{samples/DocumentCreationTest → DocumentCreationTest}/DocumentCreationTest.js +409 -409
  12. package/{samples/ElementBuilderTest → ElementBuilderTest}/ElementBuilderTest.js +513 -513
  13. package/{samples/ElementEditTest → ElementEditTest}/ElementEditTest.js +110 -110
  14. package/{samples/ElementReaderAdvTest → ElementReaderAdvTest}/ElementReaderAdvTest.js +305 -305
  15. package/{samples/ElementReaderTest → ElementReaderTest}/ElementReaderTest.js +77 -77
  16. package/{samples/EncTest → EncTest}/EncTest.js +175 -175
  17. package/{samples/FDFTest → FDFTest}/FDFTest.js +218 -218
  18. package/{samples/HTML2PDFTest → HTML2PDFTest}/HTML2PDFTest.js +164 -164
  19. package/{samples/HighlightsTest → HighlightsTest}/HighlightsTest.js +97 -97
  20. package/{samples/ImageExtractTest → ImageExtractTest}/ImageExtractTest.js +129 -129
  21. package/{samples/ImpositionTest → ImpositionTest}/ImpositionTest.js +86 -86
  22. package/{samples/InteractiveFormsTest → InteractiveFormsTest}/InteractiveFormsTest.js +381 -381
  23. package/{samples/JBIG2Test → JBIG2Test}/JBIG2Test.js +88 -88
  24. package/{samples/LicenseKey → LicenseKey}/LicenseKey.js +11 -11
  25. package/{samples/LogicalStructureTest → LogicalStructureTest}/LogicalStructureTest.js +250 -250
  26. package/{samples/OCRTest → OCRTest}/OCRTest.js +235 -235
  27. package/{samples/OfficeTemplateTest → OfficeTemplateTest}/OfficeTemplateTest.js +79 -79
  28. package/{samples/OfficeToPDFTest → OfficeToPDFTest}/OfficeToPDFTest.js +125 -125
  29. package/{samples/OptimizerTest → OptimizerTest}/OptimizerTest.js +191 -191
  30. package/{samples/PDF2HtmlTest → PDF2HtmlTest}/PDF2HtmlTest.js +123 -123
  31. package/{samples/PDF2OfficeTest → PDF2OfficeTest}/PDF2OfficeTest.js +158 -158
  32. package/{samples/PDFATest → PDFATest}/PDFATest.js +85 -85
  33. package/{samples/PDFDocMemoryTest → PDFDocMemoryTest}/PDFDocMemoryTest.js +84 -84
  34. package/{samples/PDFDrawTest → PDFDrawTest}/PDFDrawTest.js +305 -305
  35. package/{samples/PDFLayersTest → PDFLayersTest}/PDFLayersTest.js +294 -294
  36. package/{samples/PDFPackageTest → PDFPackageTest}/PDFPackageTest.js +111 -111
  37. package/{samples/PDFPageTest → PDFPageTest}/PDFPageTest.js +189 -189
  38. package/{samples/PDFRedactTest → PDFRedactTest}/PDFRedactTest.js +74 -74
  39. package/{samples/PageLabelsTest → PageLabelsTest}/PageLabelsTest.js +138 -138
  40. package/{samples/PatternTest → PatternTest}/PatternTest.js +226 -226
  41. package/{samples/RectTest → RectTest}/RectTest.js +40 -40
  42. package/{samples/SDFTest → SDFTest}/SDFTest.js +87 -87
  43. package/{samples/StamperTest → StamperTest}/StamperTest.js +255 -255
  44. package/{samples/TestFiles → TestFiles}/Misc-Fixed.pfa +1166 -1166
  45. package/{samples/TestFiles → TestFiles}/SHA-2 Root USERTrust RSA CA Sectigo timestamping.crt +34 -34
  46. package/{samples/TestFiles → TestFiles}/form1_annots.xfdf +33 -33
  47. package/{samples/TestFiles → TestFiles}/form1_data.xfdf +139 -139
  48. package/{samples/TestFiles → TestFiles}/my_stream.txt +2310 -2310
  49. package/{samples/TestFiles → TestFiles}/tiger.svg +378 -378
  50. package/{samples/TextExtractTest → TextExtractTest}/TextExtractTest.js +286 -286
  51. package/{samples/TextSearchTest → TextSearchTest}/TextSearchTest.js +121 -121
  52. package/{samples/U3DTest → U3DTest}/U3DTest.js +104 -104
  53. package/{samples/UndoRedoTest → UndoRedoTest}/UndoRedoTest.js +101 -101
  54. package/{samples/UnicodeWriteTest → UnicodeWriteTest}/UnicodeWriteTest.js +173 -173
  55. package/{samples/WebViewerConvertTest → WebViewerConvertTest}/WebViewerConvertTest.js +135 -135
  56. package/legal.txt +632 -0
  57. package/license.pdf +0 -0
  58. package/package.json +20 -21
  59. package/readme.md +38 -13
  60. package/{samples/runall.bat → runall.bat} +12 -12
  61. package/{samples/runall.sh → runall.sh} +15 -15
  62. /package/{samples/TestFiles → TestFiles}/BusinessCardTemplate.pdf +0 -0
  63. /package/{samples/TestFiles → TestFiles}/Fishermen.docx +0 -0
  64. /package/{samples/TestFiles → TestFiles}/Font_licenses.txt +0 -0
  65. /package/{samples/TestFiles → TestFiles}/GlobalSignRootForTST.cer +0 -0
  66. /package/{samples/TestFiles → TestFiles}/License.txt +0 -0
  67. /package/{samples/TestFiles → TestFiles}/NotoSans_with_hindi.ttf +0 -0
  68. /package/{samples/TestFiles → TestFiles}/Output/empty +0 -0
  69. /package/{samples/TestFiles → TestFiles}/SYH_Letter.docx +0 -0
  70. /package/{samples/TestFiles → TestFiles}/TigerText.pdf +0 -0
  71. /package/{samples/TestFiles → TestFiles}/US061222892-a.pdf +0 -0
  72. /package/{samples/TestFiles → TestFiles}/butterfly.png +0 -0
  73. /package/{samples/TestFiles → TestFiles}/credit card numbers.pdf +0 -0
  74. /package/{samples/TestFiles → TestFiles}/dice.jpg +0 -0
  75. /package/{samples/TestFiles → TestFiles}/dice.u3d +0 -0
  76. /package/{samples/TestFiles → TestFiles}/doc_to_sign.pdf +0 -0
  77. /package/{samples/TestFiles → TestFiles}/factsheet_Arabic.docx +0 -0
  78. /package/{samples/TestFiles → TestFiles}/financial.pdf +0 -0
  79. /package/{samples/TestFiles → TestFiles}/fish.pdf +0 -0
  80. /package/{samples/TestFiles → TestFiles}/font.ttf +0 -0
  81. /package/{samples/TestFiles → TestFiles}/form1.pdf +0 -0
  82. /package/{samples/TestFiles → TestFiles}/form1_data.fdf +0 -0
  83. /package/{samples/TestFiles → TestFiles}/formfields-scanned-withfields.pdf +0 -0
  84. /package/{samples/TestFiles → TestFiles}/formfields-scanned.pdf +0 -0
  85. /package/{samples/TestFiles → TestFiles}/formfields.pdf +0 -0
  86. /package/{samples/TestFiles → TestFiles}/grayscale.tif +0 -0
  87. /package/{samples/TestFiles → TestFiles}/hindi_sample_utf16le.txt +0 -0
  88. /package/{samples/TestFiles → TestFiles}/imagemask.dat +0 -0
  89. /package/{samples/TestFiles → TestFiles}/logo_red.png +0 -0
  90. /package/{samples/TestFiles → TestFiles}/lorem_ipsum.pdf +0 -0
  91. /package/{samples/TestFiles → TestFiles}/multipage.tif +0 -0
  92. /package/{samples/TestFiles → TestFiles}/newsletter.pdf +0 -0
  93. /package/{samples/TestFiles → TestFiles}/newsletter.xod +0 -0
  94. /package/{samples/TestFiles → TestFiles}/numbered.pdf +0 -0
  95. /package/{samples/TestFiles → TestFiles}/op_blend_test.pdf +0 -0
  96. /package/{samples/TestFiles → TestFiles}/palm.jp2 +0 -0
  97. /package/{samples/TestFiles → TestFiles}/paragraphs_and_tables.pdf +0 -0
  98. /package/{samples/TestFiles → TestFiles}/pdfnet.gif +0 -0
  99. /package/{samples/TestFiles → TestFiles}/pdftron.bmp +0 -0
  100. /package/{samples/TestFiles → TestFiles}/pdftron.cer +0 -0
  101. /package/{samples/TestFiles → TestFiles}/pdftron.pfx +0 -0
  102. /package/{samples/TestFiles → TestFiles}/pdftron_smart_substitution.plugin +0 -0
  103. /package/{samples/TestFiles → TestFiles}/peppers.jpg +0 -0
  104. /package/{samples/TestFiles → TestFiles}/signature.jpg +0 -0
  105. /package/{samples/TestFiles → TestFiles}/simple-emf.emf +0 -0
  106. /package/{samples/TestFiles → TestFiles}/simple-excel_2007.xlsx +0 -0
  107. /package/{samples/TestFiles → TestFiles}/simple-outlook.msg +0 -0
  108. /package/{samples/TestFiles → TestFiles}/simple-powerpoint_2007.pptx +0 -0
  109. /package/{samples/TestFiles → TestFiles}/simple-publisher.pub +0 -0
  110. /package/{samples/TestFiles → TestFiles}/simple-rtf.rtf +0 -0
  111. /package/{samples/TestFiles → TestFiles}/simple-text.txt +0 -0
  112. /package/{samples/TestFiles → TestFiles}/simple-visio.vsd +0 -0
  113. /package/{samples/TestFiles → TestFiles}/simple-webpage.html +0 -0
  114. /package/{samples/TestFiles → TestFiles}/simple-webpage.mht +0 -0
  115. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/colorschememapping.xml +0 -0
  116. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/filelist.xml +0 -0
  117. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image001.gif +0 -0
  118. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image002.png +0 -0
  119. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image003.jpg +0 -0
  120. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image004.emz +0 -0
  121. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image005.gif +0 -0
  122. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image006.png +0 -0
  123. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image007.gif +0 -0
  124. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/oledata.mso +0 -0
  125. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/themedata.thmx +0 -0
  126. /package/{samples/TestFiles → TestFiles}/simple-word_2007.docx +0 -0
  127. /package/{samples/TestFiles → TestFiles}/simple-xps.xps +0 -0
  128. /package/{samples/TestFiles → TestFiles}/table.pdf +0 -0
  129. /package/{samples/TestFiles → TestFiles}/tagged.pdf +0 -0
  130. /package/{samples/TestFiles → TestFiles}/the_rime_of_the_ancient_mariner.docx +0 -0
  131. /package/{samples/TestFiles → TestFiles}/tiger.pdf +0 -0
  132. /package/{samples/TestFiles → TestFiles}/waiver.pdf +0 -0
  133. /package/{samples/TestFiles → TestFiles}/waiver_withApprovalField.pdf +0 -0
  134. /package/{samples/TestFiles → TestFiles}/waiver_withApprovalField_certified.pdf +0 -0
  135. /package/{samples/TestFiles → TestFiles}/waiver_withApprovalField_certified_approved.pdf +0 -0
@@ -1,236 +1,236 @@
1
- //---------------------------------------------------------------------------------------
2
- // Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3
- // Consult legal.txt regarding legal and license information.
4
- //---------------------------------------------------------------------------------------
5
-
6
-
7
- const { PDFNet } = require('@pdftron/pdfnet-node');
8
- const PDFTronLicense = require('../LicenseKey/LicenseKey');
9
-
10
- ((exports) => {
11
- 'use strict';
12
-
13
- //---------------------------------------------------------------------------------------
14
- // The following sample illustrates how to use OCR module
15
- //---------------------------------------------------------------------------------------
16
- exports.runOCRTest = () => {
17
- const main = async () => {
18
- try {
19
-
20
- PDFNet.addResourceSearchPath('../../lib/');
21
-
22
- const useIRIS = await PDFNet.OCRModule.isIRISModuleAvailable();
23
- if (!(await PDFNet.OCRModule.isModuleAvailable())) {
24
- console.log('\nUnable to run OCRTest: Apryse SDK OCR module not available.');
25
- console.log('---------------------------------------------------------------');
26
- console.log('The OCR module is an optional add-on, available for download');
27
- console.log('at http://www.pdftron.com/. If you have already downloaded this');
28
- console.log('module, ensure that the SDK is able to find the required files');
29
- console.log('using the PDFNet.addResourceSearchPath() function.\n');
30
-
31
- return;
32
- }
33
-
34
- // Relative path to the folder containing test files.
35
- const input_path = '../TestFiles/OCR/';
36
- const output_path = '../TestFiles/Output/';
37
-
38
- //--------------------------------------------------------------------------------
39
- // Example 1) Process image without specifying options, default language - English - is used
40
- try {
41
-
42
- // A) Setup empty destination doc
43
- const doc = await PDFNet.PDFDoc.create();
44
-
45
- await doc.initSecurityHandler();
46
-
47
- const opts = new PDFNet.OCRModule.OCROptions();
48
- if(useIRIS) opts.setOCREngine('iris');
49
-
50
- // B) Run OCR on the .png with options
51
- await PDFNet.OCRModule.imageToPDF(doc, input_path + 'psychomachia_excerpt.png', opts);
52
-
53
- // C) check the result
54
- await doc.save(output_path + 'psychomachia_excerpt.pdf', 0);
55
-
56
- console.log('Example 1: psychomachia_excerpt.png');
57
-
58
- } catch (err) {
59
- console.log(err);
60
- }
61
-
62
- //--------------------------------------------------------------------------------
63
- // Example 2) Process document using multiple languages
64
- try {
65
- // A) Setup empty destination doc
66
- const doc = await PDFNet.PDFDoc.create();
67
- await doc.initSecurityHandler();
68
-
69
- // B) Setup options with multiple target languages, English will always be considered as secondary language
70
- const opts = new PDFNet.OCRModule.OCROptions();
71
- if(useIRIS) opts.setOCREngine('iris');
72
- opts.addLang('deu');
73
- opts.addLang('fra');
74
- opts.addLang('eng');
75
-
76
- // C) Run OCR on the .jpg with options
77
- await PDFNet.OCRModule.imageToPDF(doc, input_path + 'multi_lang.jpg', opts);
78
-
79
- // D) check the result
80
- await doc.save(output_path + 'multi_lang.pdf', 0);
81
-
82
- console.log('Example 2: multi_lang.jpg');
83
- } catch (err) {
84
- console.log(err);
85
- }
86
-
87
- //--------------------------------------------------------------------------------
88
- // Example 3) Process a .pdf specifying a language - German - and ignore zone comprising a sidebar image
89
- try {
90
- // A) Open the .pdf document
91
- const doc = await PDFNet.PDFDoc.createFromFilePath(input_path + 'german_kids_song.pdf');
92
- doc.initSecurityHandler();
93
-
94
- // B) Setup options with a single language and an ignore zone
95
- const opts = new PDFNet.OCRModule.OCROptions();
96
- if(useIRIS) opts.setOCREngine('iris');
97
- opts.addLang('deu');
98
-
99
- const ignore_zones = [];
100
- ignore_zones.push(new PDFNet.Rect(424, 163, 493, 730));
101
- opts.addIgnoreZonesForPage(ignore_zones, 1);
102
-
103
- // C) Run OCR on the .pdf with options
104
- await PDFNet.OCRModule.processPDF(doc, opts);
105
-
106
- // D) check the result
107
- await doc.save(output_path + 'german_kids_song.pdf', 0);
108
-
109
- console.log('Example 3: german_kids_song.pdf');
110
- } catch (err) {
111
- console.log(err);
112
- }
113
-
114
- //--------------------------------------------------------------------------------
115
- // Example 4) Process multipage tiff with text/ignore zones specified for each page, optionally provide English as the target language
116
- try {
117
- // A) Setup empty destination doc
118
- const doc = await PDFNet.PDFDoc.create();
119
- await doc.initSecurityHandler();
120
-
121
- // B) Setup options with a single language plus text/ignore zones
122
- const opts = new PDFNet.OCRModule.OCROptions();
123
- if(useIRIS) opts.setOCREngine('iris');
124
- opts.addLang('eng');
125
-
126
- var ignore_zones = [];
127
- // ignore signature box in the first 2 pages
128
- ignore_zones.push(new PDFNet.Rect(1492, 56, 2236, 432));
129
- opts.addIgnoreZonesForPage(ignore_zones, 1);
130
-
131
- ignore_zones = [];
132
- ignore_zones.push(new PDFNet.Rect(1492, 56, 2236, 432));
133
- opts.addIgnoreZonesForPage(ignore_zones, 2);
134
-
135
- // can use a combination of ignore and text boxes to focus on the page area of interest,
136
- // as ignore boxes are applied first, we remove the arrows before selecting part of the diagram
137
- ignore_zones = [];
138
- ignore_zones.push(new PDFNet.Rect(992, 1276, 1368, 1372));
139
- opts.addIgnoreZonesForPage(ignore_zones, 3);
140
-
141
-
142
- const text_zones = [];
143
- // we only have text zones selected in page 3
144
-
145
- // select horizontal BUFFER ZONE sign
146
- text_zones.push(new PDFNet.Rect(900, 2384, 1236, 2480));
147
- // select right vertical BUFFER ZONE sign
148
- text_zones.push(new PDFNet.Rect(1960, 1976, 2016, 2296));
149
- // select Lot No.
150
- text_zones.push(new PDFNet.Rect(696, 1028, 1196, 1128));
151
-
152
- // select part of the plan inside the BUFFER ZONE
153
- text_zones.push(new PDFNet.Rect(428, 1484, 1784, 2344));
154
- text_zones.push(new PDFNet.Rect(948, 1288, 1672, 1476));
155
- opts.addTextZonesForPage(text_zones, 3);
156
-
157
- // C) Run OCR on the .tif with options
158
- await PDFNet.OCRModule.imageToPDF(doc, input_path + 'bc_environment_protection.tif', opts);
159
-
160
- // D) check the result
161
- await doc.save(output_path + 'bc_environment_protection.pdf', 0);
162
-
163
- console.log('Example 4: bc_environment_protection.tif');
164
- } catch (err) {
165
- console.log(err);
166
- }
167
-
168
- //--------------------------------------------------------------------------------
169
- // Example 5) Alternative workflow for extracting OCR result JSON, postprocessing (e.g., removing words not in the dictionary or filtering special
170
- // out special characters), and finally applying modified OCR JSON to the source PDF document
171
- try {
172
- // A) Open the .pdf document
173
- const doc = await PDFNet.PDFDoc.createFromFilePath(input_path + 'zero_value_test_no_text.pdf');
174
- await doc.initSecurityHandler();
175
-
176
- const opts = new PDFNet.OCRModule.OCROptions();
177
- if(useIRIS) opts.setOCREngine('iris');
178
-
179
- // B) Run OCR on the .pdf with default English language
180
- const json = await PDFNet.OCRModule.getOCRJsonFromPDF(doc, opts);
181
-
182
- // C) Post-processing step (whatever it might be)
183
- console.log('Have OCR result JSON, re-applying to PDF ');
184
-
185
- // D) Apply potentially modified OCR JSON to the PDF
186
- await PDFNet.OCRModule.applyOCRJsonToPDF(doc, json);
187
-
188
- // E) Check the result
189
- await doc.save(output_path + 'zero_value_test_no_text.pdf', 0);
190
-
191
- console.log('Example 5: extracting and applying OCR JSON from zero_value_test_no_text.pdf');
192
- } catch (err) {
193
- console.log(err);
194
- }
195
-
196
- //--------------------------------------------------------------------------------
197
- // Example 6) The postprocessing workflow has also an option of extracting OCR results in XML format, similar to the one used by TextExtractor
198
- try {
199
-
200
- // A) Setup empty destination doc
201
- const doc = await PDFNet.PDFDoc.create();
202
- await doc.initSecurityHandler();
203
-
204
- const opts = new PDFNet.OCRModule.OCROptions();
205
- if(useIRIS) opts.setOCREngine('iris');
206
-
207
- // B) Run OCR on the .tif with default English language, extracting OCR results in XML format. Note that
208
- // in the process we convert the source image into PDF. We reuse this PDF document later to add hidden text layer to it.
209
- const xml = await PDFNet.OCRModule.getOCRXmlFromImage(doc, input_path + 'physics.tif', opts);
210
-
211
- // C) Post-processing step (whatever it might be)
212
- console.log('Have OCR result XML, re-applying to PDF');
213
-
214
- // D) Apply potentially modified OCR XML to the PDF
215
- await PDFNet.OCRModule.applyOCRXmlToPDF(doc, xml);
216
-
217
- // E) Check the result
218
- await doc.save(output_path + 'physics.pdf', 0);
219
-
220
- console.log('Example 6: extracting and applying OCR XML from physics.tif');
221
- } catch (err) {
222
- console.log(err);
223
- }
224
- console.log('Done.');
225
- } catch (err) {
226
- console.log(err);
227
- }
228
- };
229
- PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error) {
230
- console.log('Error: ' + JSON.stringify(error));
231
- }).then(function(){ return PDFNet.shutdown(); });
232
- };
233
- exports.runOCRTest();
234
- })(exports);
235
- // eslint-disable-next-line spaced-comment
1
+ //---------------------------------------------------------------------------------------
2
+ // Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3
+ // Consult legal.txt regarding legal and license information.
4
+ //---------------------------------------------------------------------------------------
5
+
6
+
7
+ const { PDFNet } = require('@pdftron/pdfnet-node');
8
+ const PDFTronLicense = require('../LicenseKey/LicenseKey');
9
+
10
+ ((exports) => {
11
+ 'use strict';
12
+
13
+ //---------------------------------------------------------------------------------------
14
+ // The following sample illustrates how to use OCR module
15
+ //---------------------------------------------------------------------------------------
16
+ exports.runOCRTest = () => {
17
+ const main = async () => {
18
+ try {
19
+
20
+ PDFNet.addResourceSearchPath('../../lib/');
21
+
22
+ const useIRIS = await PDFNet.OCRModule.isIRISModuleAvailable();
23
+ if (!(await PDFNet.OCRModule.isModuleAvailable())) {
24
+ console.log('\nUnable to run OCRTest: Apryse SDK OCR module not available.');
25
+ console.log('---------------------------------------------------------------');
26
+ console.log('The OCR module is an optional add-on, available for download');
27
+ console.log('at http://www.pdftron.com/. If you have already downloaded this');
28
+ console.log('module, ensure that the SDK is able to find the required files');
29
+ console.log('using the PDFNet.addResourceSearchPath() function.\n');
30
+
31
+ return;
32
+ }
33
+
34
+ // Relative path to the folder containing test files.
35
+ const input_path = '../TestFiles/OCR/';
36
+ const output_path = '../TestFiles/Output/';
37
+
38
+ //--------------------------------------------------------------------------------
39
+ // Example 1) Process image without specifying options, default language - English - is used
40
+ try {
41
+
42
+ // A) Setup empty destination doc
43
+ const doc = await PDFNet.PDFDoc.create();
44
+
45
+ await doc.initSecurityHandler();
46
+
47
+ const opts = new PDFNet.OCRModule.OCROptions();
48
+ if(useIRIS) opts.setOCREngine('iris');
49
+
50
+ // B) Run OCR on the .png with options
51
+ await PDFNet.OCRModule.imageToPDF(doc, input_path + 'psychomachia_excerpt.png', opts);
52
+
53
+ // C) check the result
54
+ await doc.save(output_path + 'psychomachia_excerpt.pdf', 0);
55
+
56
+ console.log('Example 1: psychomachia_excerpt.png');
57
+
58
+ } catch (err) {
59
+ console.log(err);
60
+ }
61
+
62
+ //--------------------------------------------------------------------------------
63
+ // Example 2) Process document using multiple languages
64
+ try {
65
+ // A) Setup empty destination doc
66
+ const doc = await PDFNet.PDFDoc.create();
67
+ await doc.initSecurityHandler();
68
+
69
+ // B) Setup options with multiple target languages, English will always be considered as secondary language
70
+ const opts = new PDFNet.OCRModule.OCROptions();
71
+ if(useIRIS) opts.setOCREngine('iris');
72
+ opts.addLang('deu');
73
+ opts.addLang('fra');
74
+ opts.addLang('eng');
75
+
76
+ // C) Run OCR on the .jpg with options
77
+ await PDFNet.OCRModule.imageToPDF(doc, input_path + 'multi_lang.jpg', opts);
78
+
79
+ // D) check the result
80
+ await doc.save(output_path + 'multi_lang.pdf', 0);
81
+
82
+ console.log('Example 2: multi_lang.jpg');
83
+ } catch (err) {
84
+ console.log(err);
85
+ }
86
+
87
+ //--------------------------------------------------------------------------------
88
+ // Example 3) Process a .pdf specifying a language - German - and ignore zone comprising a sidebar image
89
+ try {
90
+ // A) Open the .pdf document
91
+ const doc = await PDFNet.PDFDoc.createFromFilePath(input_path + 'german_kids_song.pdf');
92
+ doc.initSecurityHandler();
93
+
94
+ // B) Setup options with a single language and an ignore zone
95
+ const opts = new PDFNet.OCRModule.OCROptions();
96
+ if(useIRIS) opts.setOCREngine('iris');
97
+ opts.addLang('deu');
98
+
99
+ const ignore_zones = [];
100
+ ignore_zones.push(new PDFNet.Rect(424, 163, 493, 730));
101
+ opts.addIgnoreZonesForPage(ignore_zones, 1);
102
+
103
+ // C) Run OCR on the .pdf with options
104
+ await PDFNet.OCRModule.processPDF(doc, opts);
105
+
106
+ // D) check the result
107
+ await doc.save(output_path + 'german_kids_song.pdf', 0);
108
+
109
+ console.log('Example 3: german_kids_song.pdf');
110
+ } catch (err) {
111
+ console.log(err);
112
+ }
113
+
114
+ //--------------------------------------------------------------------------------
115
+ // Example 4) Process multipage tiff with text/ignore zones specified for each page, optionally provide English as the target language
116
+ try {
117
+ // A) Setup empty destination doc
118
+ const doc = await PDFNet.PDFDoc.create();
119
+ await doc.initSecurityHandler();
120
+
121
+ // B) Setup options with a single language plus text/ignore zones
122
+ const opts = new PDFNet.OCRModule.OCROptions();
123
+ if(useIRIS) opts.setOCREngine('iris');
124
+ opts.addLang('eng');
125
+
126
+ var ignore_zones = [];
127
+ // ignore signature box in the first 2 pages
128
+ ignore_zones.push(new PDFNet.Rect(1492, 56, 2236, 432));
129
+ opts.addIgnoreZonesForPage(ignore_zones, 1);
130
+
131
+ ignore_zones = [];
132
+ ignore_zones.push(new PDFNet.Rect(1492, 56, 2236, 432));
133
+ opts.addIgnoreZonesForPage(ignore_zones, 2);
134
+
135
+ // can use a combination of ignore and text boxes to focus on the page area of interest,
136
+ // as ignore boxes are applied first, we remove the arrows before selecting part of the diagram
137
+ ignore_zones = [];
138
+ ignore_zones.push(new PDFNet.Rect(992, 1276, 1368, 1372));
139
+ opts.addIgnoreZonesForPage(ignore_zones, 3);
140
+
141
+
142
+ const text_zones = [];
143
+ // we only have text zones selected in page 3
144
+
145
+ // select horizontal BUFFER ZONE sign
146
+ text_zones.push(new PDFNet.Rect(900, 2384, 1236, 2480));
147
+ // select right vertical BUFFER ZONE sign
148
+ text_zones.push(new PDFNet.Rect(1960, 1976, 2016, 2296));
149
+ // select Lot No.
150
+ text_zones.push(new PDFNet.Rect(696, 1028, 1196, 1128));
151
+
152
+ // select part of the plan inside the BUFFER ZONE
153
+ text_zones.push(new PDFNet.Rect(428, 1484, 1784, 2344));
154
+ text_zones.push(new PDFNet.Rect(948, 1288, 1672, 1476));
155
+ opts.addTextZonesForPage(text_zones, 3);
156
+
157
+ // C) Run OCR on the .tif with options
158
+ await PDFNet.OCRModule.imageToPDF(doc, input_path + 'bc_environment_protection.tif', opts);
159
+
160
+ // D) check the result
161
+ await doc.save(output_path + 'bc_environment_protection.pdf', 0);
162
+
163
+ console.log('Example 4: bc_environment_protection.tif');
164
+ } catch (err) {
165
+ console.log(err);
166
+ }
167
+
168
+ //--------------------------------------------------------------------------------
169
+ // Example 5) Alternative workflow for extracting OCR result JSON, postprocessing (e.g., removing words not in the dictionary or filtering special
170
+ // out special characters), and finally applying modified OCR JSON to the source PDF document
171
+ try {
172
+ // A) Open the .pdf document
173
+ const doc = await PDFNet.PDFDoc.createFromFilePath(input_path + 'zero_value_test_no_text.pdf');
174
+ await doc.initSecurityHandler();
175
+
176
+ const opts = new PDFNet.OCRModule.OCROptions();
177
+ if(useIRIS) opts.setOCREngine('iris');
178
+
179
+ // B) Run OCR on the .pdf with default English language
180
+ const json = await PDFNet.OCRModule.getOCRJsonFromPDF(doc, opts);
181
+
182
+ // C) Post-processing step (whatever it might be)
183
+ console.log('Have OCR result JSON, re-applying to PDF ');
184
+
185
+ // D) Apply potentially modified OCR JSON to the PDF
186
+ await PDFNet.OCRModule.applyOCRJsonToPDF(doc, json);
187
+
188
+ // E) Check the result
189
+ await doc.save(output_path + 'zero_value_test_no_text.pdf', 0);
190
+
191
+ console.log('Example 5: extracting and applying OCR JSON from zero_value_test_no_text.pdf');
192
+ } catch (err) {
193
+ console.log(err);
194
+ }
195
+
196
+ //--------------------------------------------------------------------------------
197
+ // Example 6) The postprocessing workflow has also an option of extracting OCR results in XML format, similar to the one used by TextExtractor
198
+ try {
199
+
200
+ // A) Setup empty destination doc
201
+ const doc = await PDFNet.PDFDoc.create();
202
+ await doc.initSecurityHandler();
203
+
204
+ const opts = new PDFNet.OCRModule.OCROptions();
205
+ if(useIRIS) opts.setOCREngine('iris');
206
+
207
+ // B) Run OCR on the .tif with default English language, extracting OCR results in XML format. Note that
208
+ // in the process we convert the source image into PDF. We reuse this PDF document later to add hidden text layer to it.
209
+ const xml = await PDFNet.OCRModule.getOCRXmlFromImage(doc, input_path + 'physics.tif', opts);
210
+
211
+ // C) Post-processing step (whatever it might be)
212
+ console.log('Have OCR result XML, re-applying to PDF');
213
+
214
+ // D) Apply potentially modified OCR XML to the PDF
215
+ await PDFNet.OCRModule.applyOCRXmlToPDF(doc, xml);
216
+
217
+ // E) Check the result
218
+ await doc.save(output_path + 'physics.pdf', 0);
219
+
220
+ console.log('Example 6: extracting and applying OCR XML from physics.tif');
221
+ } catch (err) {
222
+ console.log(err);
223
+ }
224
+ console.log('Done.');
225
+ } catch (err) {
226
+ console.log(err);
227
+ }
228
+ };
229
+ PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error) {
230
+ console.log('Error: ' + JSON.stringify(error));
231
+ }).then(function(){ return PDFNet.shutdown(); });
232
+ };
233
+ exports.runOCRTest();
234
+ })(exports);
235
+ // eslint-disable-next-line spaced-comment
236
236
  //# sourceURL=OCRTest.js
@@ -1,79 +1,79 @@
1
- //---------------------------------------------------------------------------------------
2
- // Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3
- // Consult legal.txt regarding legal and license information.
4
- //---------------------------------------------------------------------------------------
5
-
6
- //------------------------------------------------------------------------------
7
- // The following sample illustrates how to use the PDF::Convert utility class
8
- // to convert MS Office files to PDF and replace templated tags present in the document
9
- // with content supplied via json
10
- //
11
- // For a detailed specification of the template format and supported features,
12
- // see: https://docs.apryse.com/documentation/core/guides/generate-via-template/data-model/
13
- //
14
- // This conversion is performed entirely within the PDFNet and has *no*
15
- // external or system dependencies -- Conversion results will be
16
- // the same whether on Windows, Linux or Android.
17
- //
18
- // Please contact us if you have any questions.
19
- //------------------------------------------------------------------------------
20
-
21
- const { PDFNet } = require('@pdftron/pdfnet-node');
22
- const PDFTronLicense = require('../LicenseKey/LicenseKey');
23
-
24
- ((exports) => {
25
- 'use strict';
26
-
27
- exports.runOfficeTemplateTest = () => {
28
-
29
- const inputPath = '../TestFiles/';
30
- const outputPath = inputPath + 'Output/';
31
- const inputFilename = 'SYH_Letter.docx'
32
- const outputFilename = 'SYH_Letter.pdf'
33
-
34
- const main = async () => {
35
-
36
- PDFNet.addResourceSearchPath('../Resources');
37
-
38
- try {
39
- const options = new PDFNet.Convert.OfficeToPDFOptions();
40
-
41
- const json = JSON.stringify({
42
- 'dest_given_name': 'Janice N.',
43
- 'dest_street_address': "187 Duizelstraat",
44
- 'dest_surname': 'Symonds',
45
- 'dest_title': 'Ms.',
46
- 'land_location': '225 Parc St., Rochelle, QC ',
47
- 'lease_problem': 'According to the city records, the lease was initiated in September 2010 and never terminated',
48
- 'logo': { 'image_url': inputPath + 'logo_red.png', 'width' : 64, 'height': 64 },
49
- 'sender_name': 'Arnold Smith'
50
- });
51
-
52
- // Create a TemplateDocument object from an input office file.
53
- const templateDoc = await PDFNet.Convert.createOfficeTemplateWithPath(inputPath + inputFilename, options);
54
-
55
- // Fill the template with data from a JSON string, producing a PDF document.
56
- const pdfdoc = await templateDoc.fillTemplateJson(json);
57
-
58
- // Save the PDF to a file.
59
- await pdfdoc.save(outputPath + outputFilename, PDFNet.SDFDoc.SaveOptions.e_linearized);
60
-
61
- // And we're done!
62
- console.log('Saved ' + outputFilename);
63
-
64
- } catch (err) {
65
- console.log(err);
66
- }
67
-
68
- console.log('Done.');
69
- };
70
-
71
- PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
72
- console.log('Error: ' + JSON.stringify(error));
73
- }).then(function () { return PDFNet.shutdown(); });
74
-
75
- };
76
- exports.runOfficeTemplateTest();
77
- })(exports);
78
- // eslint-disable-next-line spaced-comment
79
- //# sourceURL=OfficeTemplateTest.js
1
+ //---------------------------------------------------------------------------------------
2
+ // Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3
+ // Consult legal.txt regarding legal and license information.
4
+ //---------------------------------------------------------------------------------------
5
+
6
+ //------------------------------------------------------------------------------
7
+ // The following sample illustrates how to use the PDF::Convert utility class
8
+ // to convert MS Office files to PDF and replace templated tags present in the document
9
+ // with content supplied via json
10
+ //
11
+ // For a detailed specification of the template format and supported features,
12
+ // see: https://docs.apryse.com/documentation/core/guides/generate-via-template/data-model/
13
+ //
14
+ // This conversion is performed entirely within the PDFNet and has *no*
15
+ // external or system dependencies -- Conversion results will be
16
+ // the same whether on Windows, Linux or Android.
17
+ //
18
+ // Please contact us if you have any questions.
19
+ //------------------------------------------------------------------------------
20
+
21
+ const { PDFNet } = require('@pdftron/pdfnet-node');
22
+ const PDFTronLicense = require('../LicenseKey/LicenseKey');
23
+
24
+ ((exports) => {
25
+ 'use strict';
26
+
27
+ exports.runOfficeTemplateTest = () => {
28
+
29
+ const inputPath = '../TestFiles/';
30
+ const outputPath = inputPath + 'Output/';
31
+ const inputFilename = 'SYH_Letter.docx'
32
+ const outputFilename = 'SYH_Letter.pdf'
33
+
34
+ const main = async () => {
35
+
36
+ PDFNet.addResourceSearchPath('../Resources');
37
+
38
+ try {
39
+ const options = new PDFNet.Convert.OfficeToPDFOptions();
40
+
41
+ const json = JSON.stringify({
42
+ 'dest_given_name': 'Janice N.',
43
+ 'dest_street_address': "187 Duizelstraat",
44
+ 'dest_surname': 'Symonds',
45
+ 'dest_title': 'Ms.',
46
+ 'land_location': '225 Parc St., Rochelle, QC ',
47
+ 'lease_problem': 'According to the city records, the lease was initiated in September 2010 and never terminated',
48
+ 'logo': { 'image_url': inputPath + 'logo_red.png', 'width' : 64, 'height': 64 },
49
+ 'sender_name': 'Arnold Smith'
50
+ });
51
+
52
+ // Create a TemplateDocument object from an input office file.
53
+ const templateDoc = await PDFNet.Convert.createOfficeTemplateWithPath(inputPath + inputFilename, options);
54
+
55
+ // Fill the template with data from a JSON string, producing a PDF document.
56
+ const pdfdoc = await templateDoc.fillTemplateJson(json);
57
+
58
+ // Save the PDF to a file.
59
+ await pdfdoc.save(outputPath + outputFilename, PDFNet.SDFDoc.SaveOptions.e_linearized);
60
+
61
+ // And we're done!
62
+ console.log('Saved ' + outputFilename);
63
+
64
+ } catch (err) {
65
+ console.log(err);
66
+ }
67
+
68
+ console.log('Done.');
69
+ };
70
+
71
+ PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
72
+ console.log('Error: ' + JSON.stringify(error));
73
+ }).then(function () { return PDFNet.shutdown(); });
74
+
75
+ };
76
+ exports.runOfficeTemplateTest();
77
+ })(exports);
78
+ // eslint-disable-next-line spaced-comment
79
+ //# sourceURL=OfficeTemplateTest.js