@pdftron/data-extraction 10.1.1 → 10.2.0-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +4 -4
  2. package/readme.md +39 -21
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@pdftron/data-extraction",
3
- "version": "10.1.1",
3
+ "version": "10.2.0-1",
4
4
  "main": "./lib/main.js",
5
5
  "binary": {
6
6
  "module_name": "ApryseIDP",
7
7
  "module_path": "./lib",
8
8
  "remote_path": "./downloads/PDFNetNode/Data-Extraction",
9
- "package_name": "data-extraction-{platform}-{arch}.tar.gz",
9
+ "package_name": "data-extraction-{version}-{platform}-{arch}.tar.gz",
10
10
  "host": "https://www.pdftron.com"
11
11
  },
12
12
  "scripts": {
@@ -17,10 +17,10 @@
17
17
  },
18
18
  "description": "The Apryse SDK Data Extraction Module.",
19
19
  "author": "Apryse Software Inc.",
20
- "license": "SEE LICENSE IN license.pdf",
20
+ "license": "Commercial",
21
21
  "homepage": "https://www.apryse.com",
22
22
  "dependencies": {
23
- "@pdftron/pdfnet-node": "10.1.1",
23
+ "@pdftron/pdfnet-node": "10.2.0",
24
24
  "@mapbox/node-pre-gyp": "^1.0.3"
25
25
  },
26
26
  "keywords": [
package/readme.md CHANGED
@@ -1,38 +1,56 @@
1
- ## @pdftron/pdfnet-node
1
+ ## @pdftron/data-extraction
2
2
 
3
- This package leverages the full power of PDFTron's native SDK for maximal performance and accuracy. In order to maintain consistency across platforms the Javascript API is used in the same manner as the PDFNet API available in PDFTron's Web platform. Since access to the filesystem is included in Node.js/Electron some additional APIs requiring filesystem access have also been included.
3
+ This package is meant to be used in conjunction with @pdftron/pdfnet-node to support IDP data extraction from Apryse. Follow this guide for more info on usage.
4
+ https://docs.apryse.com/documentation/core/guides/intelligent-data-extraction/
5
+
6
+ For further reading checkout our blog post on the project.
7
+ https://apryse.com/blog/introducing-automated-data-extraction-pdf-idp
4
8
 
5
9
  #### Supported platform, Node.js, and Electron versions
6
10
  This package depends on unmanaged add-on binaries, and the add-on binaries are not cross-platform. At the moment we have support for
7
- * **OS**: Linux (excluding Alpine), Windows(x64), Mac
11
+ * **OS**: Linux (excluding Alpine), Windows(x64)
8
12
  * **Node.js version**: 8 - 18
9
13
  * **Electron version**: 6 - 19
10
14
 
11
15
  Installation will fail if your OS, Node.js or Electron version is not supported.
12
16
 
13
- To install for Electron, *runtime* and *target* options are needed. For example, For Electron 6, we need to run *npm i @pdftron/pdfnet-node --runtime=electron --target=6.0.0*. Note that we need to use *6.0.0* for all Electron 6 versions.
14
-
15
17
  #### Usage
16
- Here is a code snippet to demonstrate how to use this package.
18
+
19
+ Add the `@pdftron/data-extraction` package as a dependency in your `package.json`
20
+
17
21
  ```javascript
18
- const { PDFNet } = require('@pdftron/pdfnet-node'); // you may need to set up NODE_PATH environment variable to make this work.
19
-
20
- const main = async() => {
21
- const doc = await PDFNet.PDFDoc.create();
22
- const page = await doc.pageCreate();
23
- doc.pagePushBack(page);
24
- doc.save('blank.pdf', PDFNet.SDFDoc.SaveOptions.e_linearized);
25
- };
26
-
27
- // add your own license key as the second parameter, e.g. in place of 'YOUR_LICENSE_KEY'.
28
- PDFNet.runWithCleanup(main, 'YOUR_LICENSE_KEY').catch(function(error) {
29
- console.log('Error: ' + JSON.stringify(error));
30
- }).then(function(){ return PDFNet.shutdown(); });
22
+ const { PDFNet } = require('@pdftron/pdfnet-node');
23
+ const licenseKey = "Insert license key here"
24
+ const inputFile = "Insert input file location here"
25
+
26
+ async function main() {
27
+ // Extract document structure as a JSON file
28
+ console.log('Extract document structure as a JSON file');
29
+
30
+ let outputFile = 'out/paragraphs_and_tables.json';
31
+ await PDFNet.DataExtractionModule.extractData(inputFile, outputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure);
32
+
33
+ console.log('Result saved in ' + outputFile);
34
+
35
+ ///////////////////////////////////////////////////////
36
+ // Extract document structure as a JSON string
37
+ console.log('Extract document structure as a JSON string');
38
+
39
+ outputFile = 'out/tagged.json';
40
+ const json = await PDFNet.DataExtractionModule.extractDataAsString(inputFile, PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure);
41
+
42
+ fs.writeFileSync(outputFile, json);
43
+ }
44
+
45
+ PDFNet.runWithCleanup(main, licenseKey).catch(function (error) {
46
+ console.log('Error: ' + JSON.stringify(error));
47
+ }).then(function () { return PDFNet.shutdown(); });;
48
+
31
49
  ```
32
50
 
33
- There are some code samples in the [@pdftron/pdfnet-node-samples](https://www.npmjs.com/package/@pdftron/pdfnet-node-samples) package.
51
+ A larger code sample can be found [here](https://docs.apryse.com/documentation/samples/node/js/DataExtractionTest/)
34
52
 
35
53
  To get started please see the documentation at https://www.pdftron.com/documentation/nodejs/get-started/integration.
36
54
 
37
55
  #### Licensing
38
- Please go to https://www.pdftron.com/pws/get-key to obtain a demo license or https://www.pdftron.com/form/contact-sales to obtain a production key. For further information, please visit https://www.pdftron.com/licensing.
56
+ Please go to https://docs.apryse.com/documentation/core/info/license/ to obtain a demo or production license.