@aws-sdk/client-comprehend 3.28.0 → 3.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,8 @@
1
1
  import { MetadataBearer as $MetadataBearer, SmithyException as __SmithyException } from "@aws-sdk/types";
2
+ export declare enum AugmentedManifestsDocumentTypeFormat {
3
+ PLAIN_TEXT_DOCUMENT = "PLAIN_TEXT_DOCUMENT",
4
+ SEMI_STRUCTURED_DOCUMENT = "SEMI_STRUCTURED_DOCUMENT"
5
+ }
2
6
  /**
3
7
  * <p>An augmented manifest file that provides training data for your custom model. An augmented
4
8
  * manifest file is a labeled dataset that is produced by Amazon SageMaker Ground Truth.</p>
@@ -19,6 +23,29 @@ export interface AugmentedManifestsListItem {
19
23
  * an individual job.</p>
20
24
  */
21
25
  AttributeNames: string[] | undefined;
26
+ /**
27
+ * <p>The S3 prefix to the annotation files that are referred in the augmented manifest file.</p>
28
+ */
29
+ AnnotationDataS3Uri?: string;
30
+ /**
31
+ * <p>The S3 prefix to the source files (PDFs) that are referred to in the augmented manifest file.</p>
32
+ */
33
+ SourceDocumentsS3Uri?: string;
34
+ /**
35
+ * <p>The type of augmented manifest. PlainTextDocument or SemiStructuredDocument. If you don't specify, the default is PlainTextDocument. </p>
36
+ * <ul>
37
+ * <li>
38
+ * <p>
39
+ * <code>PLAIN_TEXT_DOCUMENT</code> A document type that represents any unicode text that is encoded in UTF-8.</p>
40
+ * </li>
41
+ * <li>
42
+ * <p>
43
+ * <code>SEMI_STRUCTURED_DOCUMENT</code> A document type with positional and structural context, like a PDF. For training with Amazon Comprehend, only PDFs are supported.
44
+ * For inference, Amazon Comprehend support PDFs, DOCX and TXT.</p>
45
+ * </li>
46
+ * </ul>
47
+ */
48
+ DocumentType?: AugmentedManifestsDocumentTypeFormat | string;
22
49
  }
23
50
  export declare namespace AugmentedManifestsListItem {
24
51
  /**
@@ -1674,12 +1701,68 @@ export declare namespace DescribeDocumentClassificationJobRequest {
1674
1701
  */
1675
1702
  const filterSensitiveLog: (obj: DescribeDocumentClassificationJobRequest) => any;
1676
1703
  }
1704
+ export declare enum DocumentReadAction {
1705
+ TEXTRACT_ANALYZE_DOCUMENT = "TEXTRACT_ANALYZE_DOCUMENT",
1706
+ TEXTRACT_DETECT_DOCUMENT_TEXT = "TEXTRACT_DETECT_DOCUMENT_TEXT"
1707
+ }
1708
+ export declare enum DocumentReadMode {
1709
+ FORCE_DOCUMENT_READ_ACTION = "FORCE_DOCUMENT_READ_ACTION",
1710
+ SERVICE_DEFAULT = "SERVICE_DEFAULT"
1711
+ }
1712
+ export declare enum DocumentReadFeatureTypes {
1713
+ FORMS = "FORMS",
1714
+ TABLES = "TABLES"
1715
+ }
1716
+ /**
1717
+ * <p>The input properties for a topic detection job.</p>
1718
+ */
1719
+ export interface DocumentReaderConfig {
1720
+ /**
1721
+ * <p>This enum field will start with two values which will apply to PDFs:</p>
1722
+ * <ul>
1723
+ * <li>
1724
+ * <p>
1725
+ * <code>TEXTRACT_DETECT_DOCUMENT_TEXT</code> - The service calls DetectDocumentText for PDF documents per page.</p>
1726
+ * </li>
1727
+ * <li>
1728
+ * <p>
1729
+ * <code>TEXTRACT_ANALYZE_DOCUMENT</code> - The service calls AnalyzeDocument for PDF documents per page.</p>
1730
+ * </li>
1731
+ * </ul>
1732
+ */
1733
+ DocumentReadAction: DocumentReadAction | string | undefined;
1734
+ /**
1735
+ * <p>This enum field provides two values:</p>
1736
+ * <ul>
1737
+ * <li>
1738
+ * <p>
1739
+ * <code>SERVICE_DEFAULT</code> - use service defaults for Document reading. For Digital PDF it would mean using an internal parser instead of Textract APIs</p>
1740
+ * </li>
1741
+ * <li>
1742
+ * <p>
1743
+ * <code>FORCE_DOCUMENT_READ_ACTION</code> - Always use specified action for DocumentReadAction, including Digital PDF.
1744
+ * </p>
1745
+ * </li>
1746
+ * </ul>
1747
+ */
1748
+ DocumentReadMode?: DocumentReadMode | string;
1749
+ /**
1750
+ * <p>Specifies how the text in an input file should be processed:</p>
1751
+ */
1752
+ FeatureTypes?: (DocumentReadFeatureTypes | string)[];
1753
+ }
1754
+ export declare namespace DocumentReaderConfig {
1755
+ /**
1756
+ * @internal
1757
+ */
1758
+ const filterSensitiveLog: (obj: DocumentReaderConfig) => any;
1759
+ }
1677
1760
  export declare enum InputFormat {
1678
1761
  ONE_DOC_PER_FILE = "ONE_DOC_PER_FILE",
1679
1762
  ONE_DOC_PER_LINE = "ONE_DOC_PER_LINE"
1680
1763
  }
1681
1764
  /**
1682
- * <p>The input properties for a topic detection job.</p>
1765
+ * <p>The input properties for an inference job.</p>
1683
1766
  */
1684
1767
  export interface InputDataConfig {
1685
1768
  /**
@@ -1709,6 +1792,12 @@ export interface InputDataConfig {
1709
1792
  * </ul>
1710
1793
  */
1711
1794
  InputFormat?: InputFormat | string;
1795
+ /**
1796
+ * <p>The document reader config field applies only for InputDataConfig of StartEntitiesDetectionJob. </p>
1797
+ * <p>Use DocumentReaderConfig to provide specifications about how you want your inference documents read.
1798
+ * Currently it applies for PDF documents in StartEntitiesDetectionJob custom inference.</p>
1799
+ */
1800
+ DocumentReaderConfig?: DocumentReaderConfig;
1712
1801
  }
1713
1802
  export declare namespace InputDataConfig {
1714
1803
  /**
@@ -1,4 +1,8 @@
1
1
  import { MetadataBearer as $MetadataBearer, SmithyException as __SmithyException } from "@aws-sdk/types";
2
+ export declare enum AugmentedManifestsDocumentTypeFormat {
3
+ PLAIN_TEXT_DOCUMENT = "PLAIN_TEXT_DOCUMENT",
4
+ SEMI_STRUCTURED_DOCUMENT = "SEMI_STRUCTURED_DOCUMENT"
5
+ }
2
6
  /**
3
7
  * <p>An augmented manifest file that provides training data for your custom model. An augmented
4
8
  * manifest file is a labeled dataset that is produced by Amazon SageMaker Ground Truth.</p>
@@ -19,6 +23,29 @@ export interface AugmentedManifestsListItem {
19
23
  * an individual job.</p>
20
24
  */
21
25
  AttributeNames: string[] | undefined;
26
+ /**
27
+ * <p>The S3 prefix to the annotation files that are referred in the augmented manifest file.</p>
28
+ */
29
+ AnnotationDataS3Uri?: string;
30
+ /**
31
+ * <p>The S3 prefix to the source files (PDFs) that are referred to in the augmented manifest file.</p>
32
+ */
33
+ SourceDocumentsS3Uri?: string;
34
+ /**
35
+ * <p>The type of augmented manifest. PlainTextDocument or SemiStructuredDocument. If you don't specify, the default is PlainTextDocument. </p>
36
+ * <ul>
37
+ * <li>
38
+ * <p>
39
+ * <code>PLAIN_TEXT_DOCUMENT</code> A document type that represents any unicode text that is encoded in UTF-8.</p>
40
+ * </li>
41
+ * <li>
42
+ * <p>
43
+ * <code>SEMI_STRUCTURED_DOCUMENT</code> A document type with positional and structural context, like a PDF. For training with Amazon Comprehend, only PDFs are supported.
44
+ * For inference, Amazon Comprehend support PDFs, DOCX and TXT.</p>
45
+ * </li>
46
+ * </ul>
47
+ */
48
+ DocumentType?: AugmentedManifestsDocumentTypeFormat | string;
22
49
  }
23
50
  export declare namespace AugmentedManifestsListItem {
24
51
  /**
@@ -1674,12 +1701,68 @@ export declare namespace DescribeDocumentClassificationJobRequest {
1674
1701
  */
1675
1702
  const filterSensitiveLog: (obj: DescribeDocumentClassificationJobRequest) => any;
1676
1703
  }
1704
+ export declare enum DocumentReadAction {
1705
+ TEXTRACT_ANALYZE_DOCUMENT = "TEXTRACT_ANALYZE_DOCUMENT",
1706
+ TEXTRACT_DETECT_DOCUMENT_TEXT = "TEXTRACT_DETECT_DOCUMENT_TEXT"
1707
+ }
1708
+ export declare enum DocumentReadMode {
1709
+ FORCE_DOCUMENT_READ_ACTION = "FORCE_DOCUMENT_READ_ACTION",
1710
+ SERVICE_DEFAULT = "SERVICE_DEFAULT"
1711
+ }
1712
+ export declare enum DocumentReadFeatureTypes {
1713
+ FORMS = "FORMS",
1714
+ TABLES = "TABLES"
1715
+ }
1716
+ /**
1717
+ * <p>The input properties for a topic detection job.</p>
1718
+ */
1719
+ export interface DocumentReaderConfig {
1720
+ /**
1721
+ * <p>This enum field will start with two values which will apply to PDFs:</p>
1722
+ * <ul>
1723
+ * <li>
1724
+ * <p>
1725
+ * <code>TEXTRACT_DETECT_DOCUMENT_TEXT</code> - The service calls DetectDocumentText for PDF documents per page.</p>
1726
+ * </li>
1727
+ * <li>
1728
+ * <p>
1729
+ * <code>TEXTRACT_ANALYZE_DOCUMENT</code> - The service calls AnalyzeDocument for PDF documents per page.</p>
1730
+ * </li>
1731
+ * </ul>
1732
+ */
1733
+ DocumentReadAction: DocumentReadAction | string | undefined;
1734
+ /**
1735
+ * <p>This enum field provides two values:</p>
1736
+ * <ul>
1737
+ * <li>
1738
+ * <p>
1739
+ * <code>SERVICE_DEFAULT</code> - use service defaults for Document reading. For Digital PDF it would mean using an internal parser instead of Textract APIs</p>
1740
+ * </li>
1741
+ * <li>
1742
+ * <p>
1743
+ * <code>FORCE_DOCUMENT_READ_ACTION</code> - Always use specified action for DocumentReadAction, including Digital PDF.
1744
+ * </p>
1745
+ * </li>
1746
+ * </ul>
1747
+ */
1748
+ DocumentReadMode?: DocumentReadMode | string;
1749
+ /**
1750
+ * <p>Specifies how the text in an input file should be processed:</p>
1751
+ */
1752
+ FeatureTypes?: (DocumentReadFeatureTypes | string)[];
1753
+ }
1754
+ export declare namespace DocumentReaderConfig {
1755
+ /**
1756
+ * @internal
1757
+ */
1758
+ const filterSensitiveLog: (obj: DocumentReaderConfig) => any;
1759
+ }
1677
1760
  export declare enum InputFormat {
1678
1761
  ONE_DOC_PER_FILE = "ONE_DOC_PER_FILE",
1679
1762
  ONE_DOC_PER_LINE = "ONE_DOC_PER_LINE"
1680
1763
  }
1681
1764
  /**
1682
- * <p>The input properties for a topic detection job.</p>
1765
+ * <p>The input properties for an inference job.</p>
1683
1766
  */
1684
1767
  export interface InputDataConfig {
1685
1768
  /**
@@ -1709,6 +1792,12 @@ export interface InputDataConfig {
1709
1792
  * </ul>
1710
1793
  */
1711
1794
  InputFormat?: InputFormat | string;
1795
+ /**
1796
+ * <p>The document reader config field applies only for InputDataConfig of StartEntitiesDetectionJob. </p>
1797
+ * <p>Use DocumentReaderConfig to provide specifications about how you want your inference documents read.
1798
+ * Currently it applies for PDF documents in StartEntitiesDetectionJob custom inference.</p>
1799
+ */
1800
+ DocumentReaderConfig?: DocumentReaderConfig;
1712
1801
  }
1713
1802
  export declare namespace InputDataConfig {
1714
1803
  /**
@@ -1,6 +1,11 @@
1
1
  import { SENSITIVE_STRING } from "@aws-sdk/smithy-client";
2
2
  import { MetadataBearer as $MetadataBearer, SmithyException as __SmithyException } from "@aws-sdk/types";
3
3
 
4
+ export enum AugmentedManifestsDocumentTypeFormat {
5
+ PLAIN_TEXT_DOCUMENT = "PLAIN_TEXT_DOCUMENT",
6
+ SEMI_STRUCTURED_DOCUMENT = "SEMI_STRUCTURED_DOCUMENT",
7
+ }
8
+
4
9
  /**
5
10
  * <p>An augmented manifest file that provides training data for your custom model. An augmented
6
11
  * manifest file is a labeled dataset that is produced by Amazon SageMaker Ground Truth.</p>
@@ -22,6 +27,32 @@ export interface AugmentedManifestsListItem {
22
27
  * an individual job.</p>
23
28
  */
24
29
  AttributeNames: string[] | undefined;
30
+
31
+ /**
32
+ * <p>The S3 prefix to the annotation files that are referred in the augmented manifest file.</p>
33
+ */
34
+ AnnotationDataS3Uri?: string;
35
+
36
+ /**
37
+ * <p>The S3 prefix to the source files (PDFs) that are referred to in the augmented manifest file.</p>
38
+ */
39
+ SourceDocumentsS3Uri?: string;
40
+
41
+ /**
42
+ * <p>The type of augmented manifest. PlainTextDocument or SemiStructuredDocument. If you don't specify, the default is PlainTextDocument. </p>
43
+ * <ul>
44
+ * <li>
45
+ * <p>
46
+ * <code>PLAIN_TEXT_DOCUMENT</code> A document type that represents any unicode text that is encoded in UTF-8.</p>
47
+ * </li>
48
+ * <li>
49
+ * <p>
50
+ * <code>SEMI_STRUCTURED_DOCUMENT</code> A document type with positional and structural context, like a PDF. For training with Amazon Comprehend, only PDFs are supported.
51
+ * For inference, Amazon Comprehend support PDFs, DOCX and TXT.</p>
52
+ * </li>
53
+ * </ul>
54
+ */
55
+ DocumentType?: AugmentedManifestsDocumentTypeFormat | string;
25
56
  }
26
57
 
27
58
  export namespace AugmentedManifestsListItem {
@@ -2036,13 +2067,78 @@ export namespace DescribeDocumentClassificationJobRequest {
2036
2067
  });
2037
2068
  }
2038
2069
 
2070
+ export enum DocumentReadAction {
2071
+ TEXTRACT_ANALYZE_DOCUMENT = "TEXTRACT_ANALYZE_DOCUMENT",
2072
+ TEXTRACT_DETECT_DOCUMENT_TEXT = "TEXTRACT_DETECT_DOCUMENT_TEXT",
2073
+ }
2074
+
2075
+ export enum DocumentReadMode {
2076
+ FORCE_DOCUMENT_READ_ACTION = "FORCE_DOCUMENT_READ_ACTION",
2077
+ SERVICE_DEFAULT = "SERVICE_DEFAULT",
2078
+ }
2079
+
2080
+ export enum DocumentReadFeatureTypes {
2081
+ FORMS = "FORMS",
2082
+ TABLES = "TABLES",
2083
+ }
2084
+
2085
+ /**
2086
+ * <p>The input properties for a topic detection job.</p>
2087
+ */
2088
+ export interface DocumentReaderConfig {
2089
+ /**
2090
+ * <p>This enum field will start with two values which will apply to PDFs:</p>
2091
+ * <ul>
2092
+ * <li>
2093
+ * <p>
2094
+ * <code>TEXTRACT_DETECT_DOCUMENT_TEXT</code> - The service calls DetectDocumentText for PDF documents per page.</p>
2095
+ * </li>
2096
+ * <li>
2097
+ * <p>
2098
+ * <code>TEXTRACT_ANALYZE_DOCUMENT</code> - The service calls AnalyzeDocument for PDF documents per page.</p>
2099
+ * </li>
2100
+ * </ul>
2101
+ */
2102
+ DocumentReadAction: DocumentReadAction | string | undefined;
2103
+
2104
+ /**
2105
+ * <p>This enum field provides two values:</p>
2106
+ * <ul>
2107
+ * <li>
2108
+ * <p>
2109
+ * <code>SERVICE_DEFAULT</code> - use service defaults for Document reading. For Digital PDF it would mean using an internal parser instead of Textract APIs</p>
2110
+ * </li>
2111
+ * <li>
2112
+ * <p>
2113
+ * <code>FORCE_DOCUMENT_READ_ACTION</code> - Always use specified action for DocumentReadAction, including Digital PDF.
2114
+ * </p>
2115
+ * </li>
2116
+ * </ul>
2117
+ */
2118
+ DocumentReadMode?: DocumentReadMode | string;
2119
+
2120
+ /**
2121
+ * <p>Specifies how the text in an input file should be processed:</p>
2122
+ */
2123
+ FeatureTypes?: (DocumentReadFeatureTypes | string)[];
2124
+ }
2125
+
2126
+ export namespace DocumentReaderConfig {
2127
+ /**
2128
+ * @internal
2129
+ */
2130
+ export const filterSensitiveLog = (obj: DocumentReaderConfig): any => ({
2131
+ ...obj,
2132
+ });
2133
+ }
2134
+
2039
2135
  export enum InputFormat {
2040
2136
  ONE_DOC_PER_FILE = "ONE_DOC_PER_FILE",
2041
2137
  ONE_DOC_PER_LINE = "ONE_DOC_PER_LINE",
2042
2138
  }
2043
2139
 
2044
2140
  /**
2045
- * <p>The input properties for a topic detection job.</p>
2141
+ * <p>The input properties for an inference job.</p>
2046
2142
  */
2047
2143
  export interface InputDataConfig {
2048
2144
  /**
@@ -2073,6 +2169,13 @@ export interface InputDataConfig {
2073
2169
  * </ul>
2074
2170
  */
2075
2171
  InputFormat?: InputFormat | string;
2172
+
2173
+ /**
2174
+ * <p>The document reader config field applies only for InputDataConfig of StartEntitiesDetectionJob. </p>
2175
+ * <p>Use DocumentReaderConfig to provide specifications about how you want your inference documents read.
2176
+ * Currently it applies for PDF documents in StartEntitiesDetectionJob custom inference.</p>
2177
+ */
2178
+ DocumentReaderConfig?: DocumentReaderConfig;
2076
2179
  }
2077
2180
 
2078
2181
  export namespace InputDataConfig {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@aws-sdk/client-comprehend",
3
3
  "description": "AWS SDK for JavaScript Comprehend Client for Node.js, Browser and React Native",
4
- "version": "3.28.0",
4
+ "version": "3.32.0",
5
5
  "scripts": {
6
6
  "clean": "yarn remove-definitions && yarn remove-dist && yarn remove-documentation",
7
7
  "build-documentation": "yarn remove-documentation && typedoc ./",
@@ -27,39 +27,39 @@
27
27
  "dependencies": {
28
28
  "@aws-crypto/sha256-browser": "^1.0.0",
29
29
  "@aws-crypto/sha256-js": "^1.0.0",
30
- "@aws-sdk/client-sts": "3.28.0",
31
- "@aws-sdk/config-resolver": "3.28.0",
32
- "@aws-sdk/credential-provider-node": "3.28.0",
33
- "@aws-sdk/fetch-http-handler": "3.25.0",
34
- "@aws-sdk/hash-node": "3.25.0",
35
- "@aws-sdk/invalid-dependency": "3.25.0",
36
- "@aws-sdk/middleware-content-length": "3.25.0",
37
- "@aws-sdk/middleware-host-header": "3.25.0",
38
- "@aws-sdk/middleware-logger": "3.25.0",
39
- "@aws-sdk/middleware-retry": "3.28.0",
40
- "@aws-sdk/middleware-serde": "3.25.0",
41
- "@aws-sdk/middleware-signing": "3.28.0",
42
- "@aws-sdk/middleware-stack": "3.25.0",
43
- "@aws-sdk/middleware-user-agent": "3.25.0",
44
- "@aws-sdk/node-config-provider": "3.28.0",
45
- "@aws-sdk/node-http-handler": "3.25.0",
46
- "@aws-sdk/protocol-http": "3.25.0",
47
- "@aws-sdk/smithy-client": "3.28.0",
48
- "@aws-sdk/types": "3.25.0",
49
- "@aws-sdk/url-parser": "3.25.0",
50
- "@aws-sdk/util-base64-browser": "3.23.0",
51
- "@aws-sdk/util-base64-node": "3.23.0",
52
- "@aws-sdk/util-body-length-browser": "3.23.0",
53
- "@aws-sdk/util-body-length-node": "3.23.0",
54
- "@aws-sdk/util-user-agent-browser": "3.25.0",
55
- "@aws-sdk/util-user-agent-node": "3.28.0",
56
- "@aws-sdk/util-utf8-browser": "3.23.0",
57
- "@aws-sdk/util-utf8-node": "3.23.0",
30
+ "@aws-sdk/client-sts": "3.32.0",
31
+ "@aws-sdk/config-resolver": "3.32.0",
32
+ "@aws-sdk/credential-provider-node": "3.32.0",
33
+ "@aws-sdk/fetch-http-handler": "3.32.0",
34
+ "@aws-sdk/hash-node": "3.32.0",
35
+ "@aws-sdk/invalid-dependency": "3.32.0",
36
+ "@aws-sdk/middleware-content-length": "3.32.0",
37
+ "@aws-sdk/middleware-host-header": "3.32.0",
38
+ "@aws-sdk/middleware-logger": "3.32.0",
39
+ "@aws-sdk/middleware-retry": "3.32.0",
40
+ "@aws-sdk/middleware-serde": "3.32.0",
41
+ "@aws-sdk/middleware-signing": "3.32.0",
42
+ "@aws-sdk/middleware-stack": "3.32.0",
43
+ "@aws-sdk/middleware-user-agent": "3.32.0",
44
+ "@aws-sdk/node-config-provider": "3.32.0",
45
+ "@aws-sdk/node-http-handler": "3.32.0",
46
+ "@aws-sdk/protocol-http": "3.32.0",
47
+ "@aws-sdk/smithy-client": "3.32.0",
48
+ "@aws-sdk/types": "3.32.0",
49
+ "@aws-sdk/url-parser": "3.32.0",
50
+ "@aws-sdk/util-base64-browser": "3.32.0",
51
+ "@aws-sdk/util-base64-node": "3.32.0",
52
+ "@aws-sdk/util-body-length-browser": "3.32.0",
53
+ "@aws-sdk/util-body-length-node": "3.32.0",
54
+ "@aws-sdk/util-user-agent-browser": "3.32.0",
55
+ "@aws-sdk/util-user-agent-node": "3.32.0",
56
+ "@aws-sdk/util-utf8-browser": "3.32.0",
57
+ "@aws-sdk/util-utf8-node": "3.32.0",
58
58
  "tslib": "^2.3.0",
59
59
  "uuid": "^8.3.2"
60
60
  },
61
61
  "devDependencies": {
62
- "@aws-sdk/client-documentation-generator": "3.23.0",
62
+ "@aws-sdk/client-documentation-generator": "3.32.0",
63
63
  "@types/node": "^12.7.5",
64
64
  "@types/uuid": "^8.3.0",
65
65
  "downlevel-dts": "0.7.0",
@@ -67,7 +67,7 @@
67
67
  "rimraf": "^3.0.0",
68
68
  "ts-jest": "^26.4.1",
69
69
  "typedoc": "^0.19.2",
70
- "typescript": "~4.3.2"
70
+ "typescript": "~4.3.5"
71
71
  },
72
72
  "engines": {
73
73
  "node": ">=10.0.0"