@pdftron/pdfnet-node-samples 10.8.0 → 10.9.0-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/{samples/AddImageTest → AddImageTest}/AddImageTest.js +115 -115
  2. package/{samples/AdvancedImagingTest → AdvancedImagingTest}/AdvancedImagingTest.js +78 -78
  3. package/{samples/AnnotationTest → AnnotationTest}/AnnotationTest.js +641 -641
  4. package/{samples/BookmarkTest → BookmarkTest}/BookmarkTest.js +219 -219
  5. package/{samples/CAD2PDFTest → CAD2PDFTest}/CAD2PDFTest.js +79 -79
  6. package/{samples/ContentReplacerTest → ContentReplacerTest}/ContentReplacerTest.js +75 -75
  7. package/{samples/ConvertPrintTest → ConvertPrintTest}/ConvertPrintTest.js +153 -153
  8. package/{samples/ConvertTest → ConvertTest}/ConvertTest.js +203 -203
  9. package/{samples/DataExtractionTest → DataExtractionTest}/DataExtractionTest.js +214 -214
  10. package/{samples/DigitalSignaturesTest → DigitalSignaturesTest}/DigitalSignaturesTest.js +621 -526
  11. package/{samples/DocumentCreationTest → DocumentCreationTest}/DocumentCreationTest.js +409 -409
  12. package/{samples/ElementBuilderTest → ElementBuilderTest}/ElementBuilderTest.js +513 -513
  13. package/{samples/ElementEditTest → ElementEditTest}/ElementEditTest.js +110 -110
  14. package/{samples/ElementReaderAdvTest → ElementReaderAdvTest}/ElementReaderAdvTest.js +305 -305
  15. package/{samples/ElementReaderTest → ElementReaderTest}/ElementReaderTest.js +77 -77
  16. package/{samples/EncTest → EncTest}/EncTest.js +175 -175
  17. package/{samples/FDFTest → FDFTest}/FDFTest.js +218 -218
  18. package/{samples/HTML2PDFTest → HTML2PDFTest}/HTML2PDFTest.js +164 -164
  19. package/{samples/HighlightsTest → HighlightsTest}/HighlightsTest.js +97 -97
  20. package/{samples/ImageExtractTest → ImageExtractTest}/ImageExtractTest.js +129 -129
  21. package/{samples/ImpositionTest → ImpositionTest}/ImpositionTest.js +86 -86
  22. package/{samples/InteractiveFormsTest → InteractiveFormsTest}/InteractiveFormsTest.js +381 -381
  23. package/{samples/JBIG2Test → JBIG2Test}/JBIG2Test.js +88 -88
  24. package/{samples/LicenseKey → LicenseKey}/LicenseKey.js +11 -11
  25. package/{samples/LogicalStructureTest → LogicalStructureTest}/LogicalStructureTest.js +250 -250
  26. package/{samples/OCRTest → OCRTest}/OCRTest.js +235 -235
  27. package/{samples/OfficeTemplateTest → OfficeTemplateTest}/OfficeTemplateTest.js +79 -79
  28. package/{samples/OfficeToPDFTest → OfficeToPDFTest}/OfficeToPDFTest.js +125 -125
  29. package/{samples/OptimizerTest → OptimizerTest}/OptimizerTest.js +191 -191
  30. package/{samples/PDF2HtmlTest → PDF2HtmlTest}/PDF2HtmlTest.js +123 -123
  31. package/{samples/PDF2OfficeTest → PDF2OfficeTest}/PDF2OfficeTest.js +158 -158
  32. package/{samples/PDFATest → PDFATest}/PDFATest.js +85 -85
  33. package/{samples/PDFDocMemoryTest → PDFDocMemoryTest}/PDFDocMemoryTest.js +84 -84
  34. package/{samples/PDFDrawTest → PDFDrawTest}/PDFDrawTest.js +305 -305
  35. package/{samples/PDFLayersTest → PDFLayersTest}/PDFLayersTest.js +294 -294
  36. package/{samples/PDFPackageTest → PDFPackageTest}/PDFPackageTest.js +111 -111
  37. package/{samples/PDFPageTest → PDFPageTest}/PDFPageTest.js +189 -189
  38. package/{samples/PDFRedactTest → PDFRedactTest}/PDFRedactTest.js +74 -74
  39. package/{samples/PageLabelsTest → PageLabelsTest}/PageLabelsTest.js +138 -138
  40. package/{samples/PatternTest → PatternTest}/PatternTest.js +226 -226
  41. package/{samples/RectTest → RectTest}/RectTest.js +40 -40
  42. package/{samples/SDFTest → SDFTest}/SDFTest.js +87 -87
  43. package/{samples/StamperTest → StamperTest}/StamperTest.js +255 -255
  44. package/{samples/TestFiles → TestFiles}/Misc-Fixed.pfa +1166 -1166
  45. package/{samples/TestFiles → TestFiles}/SHA-2 Root USERTrust RSA CA Sectigo timestamping.crt +34 -34
  46. package/{samples/TestFiles → TestFiles}/form1_annots.xfdf +33 -33
  47. package/{samples/TestFiles → TestFiles}/form1_data.xfdf +139 -139
  48. package/{samples/TestFiles → TestFiles}/my_stream.txt +2310 -2310
  49. package/{samples/TestFiles → TestFiles}/tiger.svg +378 -378
  50. package/{samples/TextExtractTest → TextExtractTest}/TextExtractTest.js +286 -286
  51. package/{samples/TextSearchTest → TextSearchTest}/TextSearchTest.js +121 -121
  52. package/{samples/U3DTest → U3DTest}/U3DTest.js +104 -104
  53. package/{samples/UndoRedoTest → UndoRedoTest}/UndoRedoTest.js +101 -101
  54. package/{samples/UnicodeWriteTest → UnicodeWriteTest}/UnicodeWriteTest.js +173 -173
  55. package/{samples/WebViewerConvertTest → WebViewerConvertTest}/WebViewerConvertTest.js +135 -135
  56. package/legal.txt +632 -0
  57. package/license.pdf +0 -0
  58. package/package.json +20 -21
  59. package/readme.md +38 -13
  60. package/{samples/runall.bat → runall.bat} +12 -12
  61. package/{samples/runall.sh → runall.sh} +15 -15
  62. /package/{samples/TestFiles → TestFiles}/BusinessCardTemplate.pdf +0 -0
  63. /package/{samples/TestFiles → TestFiles}/Fishermen.docx +0 -0
  64. /package/{samples/TestFiles → TestFiles}/Font_licenses.txt +0 -0
  65. /package/{samples/TestFiles → TestFiles}/GlobalSignRootForTST.cer +0 -0
  66. /package/{samples/TestFiles → TestFiles}/License.txt +0 -0
  67. /package/{samples/TestFiles → TestFiles}/NotoSans_with_hindi.ttf +0 -0
  68. /package/{samples/TestFiles → TestFiles}/Output/empty +0 -0
  69. /package/{samples/TestFiles → TestFiles}/SYH_Letter.docx +0 -0
  70. /package/{samples/TestFiles → TestFiles}/TigerText.pdf +0 -0
  71. /package/{samples/TestFiles → TestFiles}/US061222892-a.pdf +0 -0
  72. /package/{samples/TestFiles → TestFiles}/butterfly.png +0 -0
  73. /package/{samples/TestFiles → TestFiles}/credit card numbers.pdf +0 -0
  74. /package/{samples/TestFiles → TestFiles}/dice.jpg +0 -0
  75. /package/{samples/TestFiles → TestFiles}/dice.u3d +0 -0
  76. /package/{samples/TestFiles → TestFiles}/doc_to_sign.pdf +0 -0
  77. /package/{samples/TestFiles → TestFiles}/factsheet_Arabic.docx +0 -0
  78. /package/{samples/TestFiles → TestFiles}/financial.pdf +0 -0
  79. /package/{samples/TestFiles → TestFiles}/fish.pdf +0 -0
  80. /package/{samples/TestFiles → TestFiles}/font.ttf +0 -0
  81. /package/{samples/TestFiles → TestFiles}/form1.pdf +0 -0
  82. /package/{samples/TestFiles → TestFiles}/form1_data.fdf +0 -0
  83. /package/{samples/TestFiles → TestFiles}/formfields-scanned-withfields.pdf +0 -0
  84. /package/{samples/TestFiles → TestFiles}/formfields-scanned.pdf +0 -0
  85. /package/{samples/TestFiles → TestFiles}/formfields.pdf +0 -0
  86. /package/{samples/TestFiles → TestFiles}/grayscale.tif +0 -0
  87. /package/{samples/TestFiles → TestFiles}/hindi_sample_utf16le.txt +0 -0
  88. /package/{samples/TestFiles → TestFiles}/imagemask.dat +0 -0
  89. /package/{samples/TestFiles → TestFiles}/logo_red.png +0 -0
  90. /package/{samples/TestFiles → TestFiles}/lorem_ipsum.pdf +0 -0
  91. /package/{samples/TestFiles → TestFiles}/multipage.tif +0 -0
  92. /package/{samples/TestFiles → TestFiles}/newsletter.pdf +0 -0
  93. /package/{samples/TestFiles → TestFiles}/newsletter.xod +0 -0
  94. /package/{samples/TestFiles → TestFiles}/numbered.pdf +0 -0
  95. /package/{samples/TestFiles → TestFiles}/op_blend_test.pdf +0 -0
  96. /package/{samples/TestFiles → TestFiles}/palm.jp2 +0 -0
  97. /package/{samples/TestFiles → TestFiles}/paragraphs_and_tables.pdf +0 -0
  98. /package/{samples/TestFiles → TestFiles}/pdfnet.gif +0 -0
  99. /package/{samples/TestFiles → TestFiles}/pdftron.bmp +0 -0
  100. /package/{samples/TestFiles → TestFiles}/pdftron.cer +0 -0
  101. /package/{samples/TestFiles → TestFiles}/pdftron.pfx +0 -0
  102. /package/{samples/TestFiles → TestFiles}/pdftron_smart_substitution.plugin +0 -0
  103. /package/{samples/TestFiles → TestFiles}/peppers.jpg +0 -0
  104. /package/{samples/TestFiles → TestFiles}/signature.jpg +0 -0
  105. /package/{samples/TestFiles → TestFiles}/simple-emf.emf +0 -0
  106. /package/{samples/TestFiles → TestFiles}/simple-excel_2007.xlsx +0 -0
  107. /package/{samples/TestFiles → TestFiles}/simple-outlook.msg +0 -0
  108. /package/{samples/TestFiles → TestFiles}/simple-powerpoint_2007.pptx +0 -0
  109. /package/{samples/TestFiles → TestFiles}/simple-publisher.pub +0 -0
  110. /package/{samples/TestFiles → TestFiles}/simple-rtf.rtf +0 -0
  111. /package/{samples/TestFiles → TestFiles}/simple-text.txt +0 -0
  112. /package/{samples/TestFiles → TestFiles}/simple-visio.vsd +0 -0
  113. /package/{samples/TestFiles → TestFiles}/simple-webpage.html +0 -0
  114. /package/{samples/TestFiles → TestFiles}/simple-webpage.mht +0 -0
  115. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/colorschememapping.xml +0 -0
  116. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/filelist.xml +0 -0
  117. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image001.gif +0 -0
  118. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image002.png +0 -0
  119. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image003.jpg +0 -0
  120. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image004.emz +0 -0
  121. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image005.gif +0 -0
  122. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image006.png +0 -0
  123. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/image007.gif +0 -0
  124. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/oledata.mso +0 -0
  125. /package/{samples/TestFiles → TestFiles}/simple-webpage_files/themedata.thmx +0 -0
  126. /package/{samples/TestFiles → TestFiles}/simple-word_2007.docx +0 -0
  127. /package/{samples/TestFiles → TestFiles}/simple-xps.xps +0 -0
  128. /package/{samples/TestFiles → TestFiles}/table.pdf +0 -0
  129. /package/{samples/TestFiles → TestFiles}/tagged.pdf +0 -0
  130. /package/{samples/TestFiles → TestFiles}/the_rime_of_the_ancient_mariner.docx +0 -0
  131. /package/{samples/TestFiles → TestFiles}/tiger.pdf +0 -0
  132. /package/{samples/TestFiles → TestFiles}/waiver.pdf +0 -0
  133. /package/{samples/TestFiles → TestFiles}/waiver_withApprovalField.pdf +0 -0
  134. /package/{samples/TestFiles → TestFiles}/waiver_withApprovalField_certified.pdf +0 -0
  135. /package/{samples/TestFiles → TestFiles}/waiver_withApprovalField_certified_approved.pdf +0 -0
@@ -1,214 +1,214 @@
1
- //---------------------------------------------------------------------------------------
2
- // Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3
- // Consult legal.txt regarding legal and license information.
4
- //---------------------------------------------------------------------------------------
5
-
6
- //---------------------------------------------------------------------------------------
7
- // The Data Extraction suite is an optional PDFNet add-on collection that can be used to
8
- // extract various types of data from PDF documents.
9
- //
10
- // The Apryse SDK Data Extraction suite can be downloaded from http://www.pdftron.com/
11
- //---------------------------------------------------------------------------------------
12
-
13
- const fs = require('fs');
14
- const { PDFNet } = require('@pdftron/pdfnet-node');
15
- const PDFTronLicense = require('../LicenseKey/LicenseKey');
16
-
17
- ((exports) => {
18
- 'use strict';
19
-
20
- exports.runDataExtractionTest = () => {
21
-
22
- const main = async () => {
23
-
24
- const inputPath = '../TestFiles/';
25
- const outputPath = '../TestFiles/Output/';
26
-
27
- //////////////////////////////////////////////////////////////////////////
28
-
29
- await PDFNet.addResourceSearchPath('../../lib/');
30
-
31
- //////////////////////////////////////////////////////////////////////////
32
- // The following sample illustrates how to extract tables from PDF documents.
33
- //////////////////////////////////////////////////////////////////////////
34
-
35
- // Test if the add-on is installed
36
- if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular)) {
37
- console.log('\nUnable to run Data Extraction: Apryse SDK Tabular Data module not available.');
38
- console.log('---------------------------------------------------------------');
39
- console.log('The Data Extraction suite is an optional add-on, available for download');
40
- console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
41
- console.log('downloaded this module, ensure that the SDK is able to find the required files');
42
- console.log('using the PDFNet.addResourceSearchPath() function.\n');
43
- }
44
- else
45
- {
46
- try {
47
- // Extract tabular data as a JSON file
48
- console.log('Extract tabular data as a JSON file');
49
-
50
- let outputFile = outputPath + 'table.json';
51
- await PDFNet.DataExtractionModule.extractData(inputPath + 'table.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular);
52
-
53
- console.log('Result saved in ' + outputFile);
54
-
55
- ///////////////////////////////////////////////////////
56
- // Extract tabular data as a JSON string
57
- console.log('Extract tabular data as a JSON string');
58
-
59
- outputFile = outputPath + 'financial.json';
60
- const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'financial.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular);
61
- fs.writeFileSync(outputFile, json);
62
-
63
- console.log('Result saved in ' + outputFile);
64
-
65
- ///////////////////////////////////////////////////////
66
- // Extract tabular data as an XLSX file
67
- console.log('Extract tabular data as an XLSX file');
68
-
69
- outputFile = outputPath + 'table.xlsx';
70
- await PDFNet.DataExtractionModule.extractToXLSX(inputPath + 'table.pdf', outputFile);
71
-
72
- console.log('Result saved in ' + outputFile);
73
-
74
- ///////////////////////////////////////////////////////
75
- // Extract tabular data as an XLSX stream (also known as filter)
76
- console.log('Extract tabular data as an XLSX stream');
77
-
78
- outputFile = outputPath + 'financial.xlsx';
79
- const outputXlsxStream = await PDFNet.Filter.createMemoryFilter(0, false);
80
- const options = new PDFNet.DataExtractionModule.DataExtractionOptions();
81
- options.setPages("1"); // page 1
82
- await PDFNet.DataExtractionModule.extractToXLSXWithFilter(inputPath + 'financial.pdf', outputXlsxStream, options);
83
- outputXlsxStream.memoryFilterSetAsInputFilter();
84
- outputXlsxStream.writeToFile(outputFile, false);
85
-
86
- console.log('Result saved in ' + outputFile);
87
- } catch (err) {
88
- console.log(err);
89
- }
90
- }
91
-
92
- //////////////////////////////////////////////////////////////////////////
93
- // The following sample illustrates how to extract document structure from PDF documents.
94
- //////////////////////////////////////////////////////////////////////////
95
-
96
- // Test if the add-on is installed
97
- if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure)) {
98
- console.log('\nUnable to run Data Extraction: Apryse SDK Structured Output module not available.');
99
- console.log('---------------------------------------------------------------');
100
- console.log('The Data Extraction suite is an optional add-on, available for download');
101
- console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
102
- console.log('downloaded this module, ensure that the SDK is able to find the required files');
103
- console.log('using the PDFNet.addResourceSearchPath() function.\n');
104
- }
105
- else
106
- {
107
- try {
108
- // Extract document structure as a JSON file
109
- console.log('Extract document structure as a JSON file');
110
-
111
- let outputFile = outputPath + 'paragraphs_and_tables.json';
112
- await PDFNet.DataExtractionModule.extractData(inputPath + 'paragraphs_and_tables.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure);
113
-
114
- console.log('Result saved in ' + outputFile);
115
-
116
- ///////////////////////////////////////////////////////
117
- // Extract document structure as a JSON string
118
- console.log('Extract document structure as a JSON string');
119
-
120
- outputFile = outputPath + 'tagged.json';
121
- const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'tagged.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure);
122
- fs.writeFileSync(outputFile, json);
123
-
124
- console.log('Result saved in ' + outputFile);
125
- } catch (err) {
126
- console.log(err);
127
- }
128
- }
129
-
130
- //////////////////////////////////////////////////////////////////////////
131
- // The following sample illustrates how to extract form fields from PDF documents.
132
- //////////////////////////////////////////////////////////////////////////
133
-
134
- // Test if the add-on is installed
135
- if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_Form)) {
136
- console.log('\nUnable to run Data Extraction: Apryse SDK AIFormFieldExtractor module not available.');
137
- console.log('---------------------------------------------------------------');
138
- console.log('The Data Extraction suite is an optional add-on, available for download');
139
- console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
140
- console.log('downloaded this module, ensure that the SDK is able to find the required files');
141
- console.log('using the PDFNet.addResourceSearchPath() function.\n');
142
- }
143
- else
144
- {
145
- try {
146
- // Extract form fields as a JSON file
147
- console.log('Extract form fields as a JSON file');
148
-
149
- let outputFile = outputPath + 'formfields-scanned.json';
150
- await PDFNet.DataExtractionModule.extractData(inputPath + 'formfields-scanned.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_Form);
151
-
152
- console.log('Result saved in ' + outputFile);
153
-
154
- ///////////////////////////////////////////////////////
155
- // Extract form fields as a JSON string
156
- console.log('Extract form fields as a JSON string');
157
-
158
- outputFile = outputPath + 'formfields.json';
159
- const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'formfields.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_Form);
160
- fs.writeFileSync(outputFile, json);
161
-
162
- console.log('Result saved in ' + outputFile);
163
-
164
- //////////////////////////////////////////////////////////////////////////
165
- // Detect and add form fields to a PDF document.
166
- // Document already has form fields, and this sample will update to new found fields.
167
- {
168
- console.log('Detect and add form fields in a PDF file, keep new fields');
169
-
170
- const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + "formfields-scanned-withfields.pdf");
171
-
172
- await PDFNet.DataExtractionModule.detectAndAddFormFieldsToPDF(doc);
173
- outputFile = outputPath + 'formfields-scanned-fields-new.pdf';
174
- await doc.save(outputFile, PDFNet.SDFDoc.SaveOptions.e_linearized);
175
-
176
- console.log('Result saved in ' + outputFile);
177
- }
178
-
179
- //////////////////////////////////////////////////////////////////////////
180
- // Detect and add form fields to a PDF document.
181
- // Document already has form fields, and this sample will keep the original fields.
182
- {
183
- console.log('Detect and add form fields in a PDF file, keep old fields');
184
-
185
- const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + "formfields-scanned-withfields.pdf");
186
-
187
- const options = new PDFNet.DataExtractionModule.DataExtractionOptions();
188
- options.setOverlappingFormFieldBehavior('KeepOld');
189
-
190
- await PDFNet.DataExtractionModule.detectAndAddFormFieldsToPDF(doc, options);
191
- outputFile = outputPath + 'formfields-scanned-fields-old.pdf';
192
- await doc.save(outputFile, PDFNet.SDFDoc.SaveOptions.e_linearized);
193
- }
194
-
195
- console.log('Result saved in ' + outputFile);
196
-
197
- } catch (err) {
198
- console.log(err);
199
- }
200
- }
201
-
202
- //////////////////////////////////////////////////////////////////////////
203
-
204
- console.log('Done.');
205
- };
206
-
207
- PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
208
- console.log('Error: ' + JSON.stringify(error));
209
- }).then(function () { return PDFNet.shutdown(); });
210
- };
211
- exports.runDataExtractionTest();
212
- })(exports);
213
- // eslint-disable-next-line spaced-comment
214
- //# sourceURL=DataExtractionTest.js
1
+ //---------------------------------------------------------------------------------------
2
+ // Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3
+ // Consult legal.txt regarding legal and license information.
4
+ //---------------------------------------------------------------------------------------
5
+
6
+ //---------------------------------------------------------------------------------------
7
+ // The Data Extraction suite is an optional PDFNet add-on collection that can be used to
8
+ // extract various types of data from PDF documents.
9
+ //
10
+ // The Apryse SDK Data Extraction suite can be downloaded from http://www.pdftron.com/
11
+ //---------------------------------------------------------------------------------------
12
+
13
+ const fs = require('fs');
14
+ const { PDFNet } = require('@pdftron/pdfnet-node');
15
+ const PDFTronLicense = require('../LicenseKey/LicenseKey');
16
+
17
+ ((exports) => {
18
+ 'use strict';
19
+
20
+ exports.runDataExtractionTest = () => {
21
+
22
+ const main = async () => {
23
+
24
+ const inputPath = '../TestFiles/';
25
+ const outputPath = '../TestFiles/Output/';
26
+
27
+ //////////////////////////////////////////////////////////////////////////
28
+
29
+ await PDFNet.addResourceSearchPath('../../lib/');
30
+
31
+ //////////////////////////////////////////////////////////////////////////
32
+ // The following sample illustrates how to extract tables from PDF documents.
33
+ //////////////////////////////////////////////////////////////////////////
34
+
35
+ // Test if the add-on is installed
36
+ if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular)) {
37
+ console.log('\nUnable to run Data Extraction: Apryse SDK Tabular Data module not available.');
38
+ console.log('---------------------------------------------------------------');
39
+ console.log('The Data Extraction suite is an optional add-on, available for download');
40
+ console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
41
+ console.log('downloaded this module, ensure that the SDK is able to find the required files');
42
+ console.log('using the PDFNet.addResourceSearchPath() function.\n');
43
+ }
44
+ else
45
+ {
46
+ try {
47
+ // Extract tabular data as a JSON file
48
+ console.log('Extract tabular data as a JSON file');
49
+
50
+ let outputFile = outputPath + 'table.json';
51
+ await PDFNet.DataExtractionModule.extractData(inputPath + 'table.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular);
52
+
53
+ console.log('Result saved in ' + outputFile);
54
+
55
+ ///////////////////////////////////////////////////////
56
+ // Extract tabular data as a JSON string
57
+ console.log('Extract tabular data as a JSON string');
58
+
59
+ outputFile = outputPath + 'financial.json';
60
+ const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'financial.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_Tabular);
61
+ fs.writeFileSync(outputFile, json);
62
+
63
+ console.log('Result saved in ' + outputFile);
64
+
65
+ ///////////////////////////////////////////////////////
66
+ // Extract tabular data as an XLSX file
67
+ console.log('Extract tabular data as an XLSX file');
68
+
69
+ outputFile = outputPath + 'table.xlsx';
70
+ await PDFNet.DataExtractionModule.extractToXLSX(inputPath + 'table.pdf', outputFile);
71
+
72
+ console.log('Result saved in ' + outputFile);
73
+
74
+ ///////////////////////////////////////////////////////
75
+ // Extract tabular data as an XLSX stream (also known as filter)
76
+ console.log('Extract tabular data as an XLSX stream');
77
+
78
+ outputFile = outputPath + 'financial.xlsx';
79
+ const outputXlsxStream = await PDFNet.Filter.createMemoryFilter(0, false);
80
+ const options = new PDFNet.DataExtractionModule.DataExtractionOptions();
81
+ options.setPages("1"); // page 1
82
+ await PDFNet.DataExtractionModule.extractToXLSXWithFilter(inputPath + 'financial.pdf', outputXlsxStream, options);
83
+ outputXlsxStream.memoryFilterSetAsInputFilter();
84
+ outputXlsxStream.writeToFile(outputFile, false);
85
+
86
+ console.log('Result saved in ' + outputFile);
87
+ } catch (err) {
88
+ console.log(err);
89
+ }
90
+ }
91
+
92
+ //////////////////////////////////////////////////////////////////////////
93
+ // The following sample illustrates how to extract document structure from PDF documents.
94
+ //////////////////////////////////////////////////////////////////////////
95
+
96
+ // Test if the add-on is installed
97
+ if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure)) {
98
+ console.log('\nUnable to run Data Extraction: Apryse SDK Structured Output module not available.');
99
+ console.log('---------------------------------------------------------------');
100
+ console.log('The Data Extraction suite is an optional add-on, available for download');
101
+ console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
102
+ console.log('downloaded this module, ensure that the SDK is able to find the required files');
103
+ console.log('using the PDFNet.addResourceSearchPath() function.\n');
104
+ }
105
+ else
106
+ {
107
+ try {
108
+ // Extract document structure as a JSON file
109
+ console.log('Extract document structure as a JSON file');
110
+
111
+ let outputFile = outputPath + 'paragraphs_and_tables.json';
112
+ await PDFNet.DataExtractionModule.extractData(inputPath + 'paragraphs_and_tables.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure);
113
+
114
+ console.log('Result saved in ' + outputFile);
115
+
116
+ ///////////////////////////////////////////////////////
117
+ // Extract document structure as a JSON string
118
+ console.log('Extract document structure as a JSON string');
119
+
120
+ outputFile = outputPath + 'tagged.json';
121
+ const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'tagged.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure);
122
+ fs.writeFileSync(outputFile, json);
123
+
124
+ console.log('Result saved in ' + outputFile);
125
+ } catch (err) {
126
+ console.log(err);
127
+ }
128
+ }
129
+
130
+ //////////////////////////////////////////////////////////////////////////
131
+ // The following sample illustrates how to extract form fields from PDF documents.
132
+ //////////////////////////////////////////////////////////////////////////
133
+
134
+ // Test if the add-on is installed
135
+ if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_Form)) {
136
+ console.log('\nUnable to run Data Extraction: Apryse SDK AIFormFieldExtractor module not available.');
137
+ console.log('---------------------------------------------------------------');
138
+ console.log('The Data Extraction suite is an optional add-on, available for download');
139
+ console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
140
+ console.log('downloaded this module, ensure that the SDK is able to find the required files');
141
+ console.log('using the PDFNet.addResourceSearchPath() function.\n');
142
+ }
143
+ else
144
+ {
145
+ try {
146
+ // Extract form fields as a JSON file
147
+ console.log('Extract form fields as a JSON file');
148
+
149
+ let outputFile = outputPath + 'formfields-scanned.json';
150
+ await PDFNet.DataExtractionModule.extractData(inputPath + 'formfields-scanned.pdf', outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_Form);
151
+
152
+ console.log('Result saved in ' + outputFile);
153
+
154
+ ///////////////////////////////////////////////////////
155
+ // Extract form fields as a JSON string
156
+ console.log('Extract form fields as a JSON string');
157
+
158
+ outputFile = outputPath + 'formfields.json';
159
+ const json = await PDFNet.DataExtractionModule.extractDataAsString(inputPath + 'formfields.pdf', PDFNet.DataExtractionModule.DataExtractionEngine.e_Form);
160
+ fs.writeFileSync(outputFile, json);
161
+
162
+ console.log('Result saved in ' + outputFile);
163
+
164
+ //////////////////////////////////////////////////////////////////////////
165
+ // Detect and add form fields to a PDF document.
166
+ // Document already has form fields, and this sample will update to new found fields.
167
+ {
168
+ console.log('Detect and add form fields in a PDF file, keep new fields');
169
+
170
+ const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + "formfields-scanned-withfields.pdf");
171
+
172
+ await PDFNet.DataExtractionModule.detectAndAddFormFieldsToPDF(doc);
173
+ outputFile = outputPath + 'formfields-scanned-fields-new.pdf';
174
+ await doc.save(outputFile, PDFNet.SDFDoc.SaveOptions.e_linearized);
175
+
176
+ console.log('Result saved in ' + outputFile);
177
+ }
178
+
179
+ //////////////////////////////////////////////////////////////////////////
180
+ // Detect and add form fields to a PDF document.
181
+ // Document already has form fields, and this sample will keep the original fields.
182
+ {
183
+ console.log('Detect and add form fields in a PDF file, keep old fields');
184
+
185
+ const doc = await PDFNet.PDFDoc.createFromFilePath(inputPath + "formfields-scanned-withfields.pdf");
186
+
187
+ const options = new PDFNet.DataExtractionModule.DataExtractionOptions();
188
+ options.setOverlappingFormFieldBehavior('KeepOld');
189
+
190
+ await PDFNet.DataExtractionModule.detectAndAddFormFieldsToPDF(doc, options);
191
+ outputFile = outputPath + 'formfields-scanned-fields-old.pdf';
192
+ await doc.save(outputFile, PDFNet.SDFDoc.SaveOptions.e_linearized);
193
+ }
194
+
195
+ console.log('Result saved in ' + outputFile);
196
+
197
+ } catch (err) {
198
+ console.log(err);
199
+ }
200
+ }
201
+
202
+ //////////////////////////////////////////////////////////////////////////
203
+
204
+ console.log('Done.');
205
+ };
206
+
207
+ PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
208
+ console.log('Error: ' + JSON.stringify(error));
209
+ }).then(function () { return PDFNet.shutdown(); });
210
+ };
211
+ exports.runDataExtractionTest();
212
+ })(exports);
213
+ // eslint-disable-next-line spaced-comment
214
+ //# sourceURL=DataExtractionTest.js