@aws-sdk/client-comprehend 3.218.0 → 3.223.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -166,6 +166,38 @@ export declare class InternalServerException extends __BaseException {
166
166
  */
167
167
  constructor(opts: __ExceptionOptionType<InternalServerException, __BaseException>);
168
168
  }
169
+ export declare enum InvalidRequestDetailReason {
170
+ DOCUMENT_SIZE_EXCEEDED = "DOCUMENT_SIZE_EXCEEDED",
171
+ PAGE_LIMIT_EXCEEDED = "PAGE_LIMIT_EXCEEDED",
172
+ TEXTRACT_ACCESS_DENIED = "TEXTRACT_ACCESS_DENIED",
173
+ UNSUPPORTED_DOC_TYPE = "UNSUPPORTED_DOC_TYPE"
174
+ }
175
+ /**
176
+ * <p>Provides additional detail about why the request failed:</p>
177
+ * <ul>
178
+ * <li>
179
+ * <p>Document size is too large - Check the size of your file and resubmit the request.</p>
180
+ * </li>
181
+ * <li>
182
+ * <p>Document type is not supported - Check the file type and resubmit the request.</p>
183
+ * </li>
184
+ * <li>
185
+ * <p>Too many pages in the document - Check the number of pages in your file and resubmit the request.</p>
186
+ * </li>
187
+ * <li>
188
+ * <p>Access denied to Amazon Textract - Verify that your account has permission to use Amazon Textract API operations and resubmit the request.</p>
189
+ * </li>
190
+ * </ul>
191
+ */
192
+ export interface InvalidRequestDetail {
193
+ /**
194
+ * <p>Reason code is <code>INVALID_DOCUMENT</code>.</p>
195
+ */
196
+ Reason?: InvalidRequestDetailReason | string;
197
+ }
198
+ export declare enum InvalidRequestReason {
199
+ INVALID_DOCUMENT = "INVALID_DOCUMENT"
200
+ }
169
201
  /**
170
202
  * <p>The request is invalid.</p>
171
203
  */
@@ -173,6 +205,25 @@ export declare class InvalidRequestException extends __BaseException {
173
205
  readonly name: "InvalidRequestException";
174
206
  readonly $fault: "client";
175
207
  Message?: string;
208
+ Reason?: InvalidRequestReason | string;
209
+ /**
210
+ * <p>Provides additional detail about why the request failed:</p>
211
+ * <ul>
212
+ * <li>
213
+ * <p>Document size is too large - Check the size of your file and resubmit the request.</p>
214
+ * </li>
215
+ * <li>
216
+ * <p>Document type is not supported - Check the file type and resubmit the request.</p>
217
+ * </li>
218
+ * <li>
219
+ * <p>Too many pages in the document - Check the number of pages in your file and resubmit the request.</p>
220
+ * </li>
221
+ * <li>
222
+ * <p>Access denied to Amazon Textract - Verify that your account has permission to use Amazon Textract API operations and resubmit the request.</p>
223
+ * </li>
224
+ * </ul>
225
+ */
226
+ Detail?: InvalidRequestDetail;
176
227
  /**
177
228
  * @internal
178
229
  */
@@ -216,6 +267,44 @@ export interface BatchDetectEntitiesRequest {
216
267
  */
217
268
  LanguageCode: LanguageCode | string | undefined;
218
269
  }
270
+ /**
271
+ * <p>Nested block contained within a block.</p>
272
+ */
273
+ export interface ChildBlock {
274
+ /**
275
+ * <p>Unique identifier for the child block.</p>
276
+ */
277
+ ChildBlockId?: string;
278
+ /**
279
+ * <p>Offset of the start of the child block within its parent block.</p>
280
+ */
281
+ BeginOffset?: number;
282
+ /**
283
+ * <p>Offset of the end of the child block within its parent block.</p>
284
+ */
285
+ EndOffset?: number;
286
+ }
287
+ /**
288
+ * <p>A reference to a block. </p>
289
+ */
290
+ export interface BlockReference {
291
+ /**
292
+ * <p>Unique identifier for the block.</p>
293
+ */
294
+ BlockId?: string;
295
+ /**
296
+ * <p>Offset of the start of the block within its parent block.</p>
297
+ */
298
+ BeginOffset?: number;
299
+ /**
300
+ * <p>Offset of the end of the block within its parent block.</p>
301
+ */
302
+ EndOffset?: number;
303
+ /**
304
+ * <p>List of child blocks within this block.</p>
305
+ */
306
+ ChildBlocks?: ChildBlock[];
307
+ }
219
308
  export declare enum EntityType {
220
309
  COMMERCIAL_ITEM = "COMMERCIAL_ITEM",
221
310
  DATE = "DATE",
@@ -238,7 +327,10 @@ export interface Entity {
238
327
  */
239
328
  Score?: number;
240
329
  /**
241
- * <p>The entity's type.</p>
330
+ * <p>The entity type. For entity detection using the built-in model, this field contains one of the
331
+ * standard entity types listed below.</p>
332
+ * <p>For custom entity detection, this field contains one of the
333
+ * entity types that you specified when you trained your custom model.</p>
242
334
  */
243
335
  Type?: EntityType | string;
244
336
  /**
@@ -248,13 +340,19 @@ export interface Entity {
248
340
  /**
249
341
  * <p>The zero-based offset from the beginning of the source text to the first character in the
250
342
  * entity.</p>
343
+ * <p>This field is empty for non-text input.</p>
251
344
  */
252
345
  BeginOffset?: number;
253
346
  /**
254
347
  * <p>The zero-based offset from the beginning of the source text to the last character in the
255
348
  * entity.</p>
349
+ * <p>This field is empty for non-text input.</p>
256
350
  */
257
351
  EndOffset?: number;
352
+ /**
353
+ * <p>A reference to each block for this entity. This field is empty for plain-text input.</p>
354
+ */
355
+ BlockReferences?: BlockReference[];
258
356
  }
259
357
  /**
260
358
  * <p>The result of calling the operation. The
@@ -697,6 +795,122 @@ export interface BatchDetectTargetedSentimentResponse {
697
795
  */
698
796
  ErrorList: BatchItemError[] | undefined;
699
797
  }
798
+ export declare enum BlockType {
799
+ LINE = "LINE",
800
+ WORD = "WORD"
801
+ }
802
+ /**
803
+ * <p>The bounding box around the detected page
804
+ * or around an element on a document page.
805
+ * The left (x-coordinate) and top (y-coordinate) are coordinates that
806
+ * represent the top and left sides of the bounding box. Note that the upper-left
807
+ * corner of the image is the origin (0,0). </p>
808
+ * <p>For additional information, see <a href="https://docs.aws.amazon.com/textract/latest/dg/API_BoundingBox.html">BoundingBox</a> in the Amazon Textract API reference.</p>
809
+ */
810
+ export interface BoundingBox {
811
+ /**
812
+ * <p>The height of the bounding box as a ratio of the overall document page height.</p>
813
+ */
814
+ Height?: number;
815
+ /**
816
+ * <p>The left coordinate of the bounding box as a ratio of overall document page width.</p>
817
+ */
818
+ Left?: number;
819
+ /**
820
+ * <p>The top coordinate of the bounding box as a ratio of overall document page height.</p>
821
+ */
822
+ Top?: number;
823
+ /**
824
+ * <p>The width of the bounding box as a ratio of the overall document page width.</p>
825
+ */
826
+ Width?: number;
827
+ }
828
+ /**
829
+ * <p>The X and Y coordinates of a point on a document page.</p>
830
+ * <p>For additional information, see <a href="https://docs.aws.amazon.com/textract/latest/dg/API_Point.html">Point</a> in the Amazon Textract API reference.</p>
831
+ */
832
+ export interface Point {
833
+ /**
834
+ * <p>The value of the X coordinate for a point on a polygon</p>
835
+ */
836
+ X?: number;
837
+ /**
838
+ * <p>The value of the Y coordinate for a point on a polygon</p>
839
+ */
840
+ Y?: number;
841
+ }
842
+ /**
843
+ * <p>Information about the location of items on a document page.</p>
844
+ * <p>For additional information, see <a href="https://docs.aws.amazon.com/textract/latest/dg/API_Geometry.html">Geometry</a> in the Amazon Textract API reference.</p>
845
+ */
846
+ export interface Geometry {
847
+ /**
848
+ * <p>An axis-aligned coarse representation of the location of the recognized item on the
849
+ * document page.</p>
850
+ */
851
+ BoundingBox?: BoundingBox;
852
+ /**
853
+ * <p>Within the bounding box, a fine-grained polygon around the recognized item.</p>
854
+ */
855
+ Polygon?: Point[];
856
+ }
857
+ export declare enum RelationshipType {
858
+ CHILD = "CHILD"
859
+ }
860
+ /**
861
+ * <p>List of child blocks for the current block.</p>
862
+ */
863
+ export interface RelationshipsListItem {
864
+ /**
865
+ * <p>Identifers of the child blocks.</p>
866
+ */
867
+ Ids?: string[];
868
+ /**
869
+ * <p>Only supported relationship is a child relationship.</p>
870
+ */
871
+ Type?: RelationshipType | string;
872
+ }
873
+ /**
874
+ * <p>Information about each word or line of text in the input document.</p>
875
+ * <p>For additional information, see <a href="https://docs.aws.amazon.com/textract/latest/dg/API_Block.html">Block</a> in the Amazon Textract API reference.</p>
876
+ */
877
+ export interface Block {
878
+ /**
879
+ * <p>Unique identifier for the block.</p>
880
+ */
881
+ Id?: string;
882
+ /**
883
+ * <p>The block represents a line of text or one word of text.</p>
884
+ * <ul>
885
+ * <li>
886
+ * <p>WORD - A word that's detected on a document page.
887
+ * A word is one or more ISO basic Latin script characters that aren't separated by spaces.</p>
888
+ * </li>
889
+ * <li>
890
+ * <p>LINE - A string of tab-delimited, contiguous words
891
+ * that are detected on a document page</p>
892
+ * </li>
893
+ * </ul>
894
+ */
895
+ BlockType?: BlockType | string;
896
+ /**
897
+ * <p>The word or line of text extracted from the block.</p>
898
+ */
899
+ Text?: string;
900
+ /**
901
+ * <p>Page number where the block appears.</p>
902
+ */
903
+ Page?: number;
904
+ /**
905
+ * <p>Co-ordinates of the rectangle or polygon that contains the text.</p>
906
+ */
907
+ Geometry?: Geometry;
908
+ /**
909
+ * <p>A list of child blocks of the current block.
910
+ * For example, a LINE object has child blocks for each WORD block that's part of the line of text. </p>
911
+ */
912
+ Relationships?: RelationshipsListItem[];
913
+ }
700
914
  /**
701
915
  * <p>Describes the result metrics for the test data associated with an documentation
702
916
  * classifier.</p>
@@ -779,15 +993,121 @@ export interface ClassifierMetadata {
779
993
  */
780
994
  EvaluationMetrics?: ClassifierEvaluationMetrics;
781
995
  }
996
+ export declare enum DocumentReadAction {
997
+ TEXTRACT_ANALYZE_DOCUMENT = "TEXTRACT_ANALYZE_DOCUMENT",
998
+ TEXTRACT_DETECT_DOCUMENT_TEXT = "TEXTRACT_DETECT_DOCUMENT_TEXT"
999
+ }
1000
+ export declare enum DocumentReadMode {
1001
+ FORCE_DOCUMENT_READ_ACTION = "FORCE_DOCUMENT_READ_ACTION",
1002
+ SERVICE_DEFAULT = "SERVICE_DEFAULT"
1003
+ }
1004
+ export declare enum DocumentReadFeatureTypes {
1005
+ FORMS = "FORMS",
1006
+ TABLES = "TABLES"
1007
+ }
1008
+ /**
1009
+ * <p>Provides configuration parameters to override the default actions for extracting text from PDF documents and image files. </p>
1010
+ * <p> By default, Amazon Comprehend performs the following actions to extract text from files, based on the input file type: </p>
1011
+ * <ul>
1012
+ * <li>
1013
+ * <p>
1014
+ * <b>Word files</b> - Amazon Comprehend parser extracts the text. </p>
1015
+ * </li>
1016
+ * <li>
1017
+ * <p>
1018
+ * <b>Digital PDF files</b> - Amazon Comprehend parser extracts the text. </p>
1019
+ * </li>
1020
+ * <li>
1021
+ * <p>
1022
+ * <b>Image files and scanned PDF files</b> - Amazon Comprehend uses the Amazon Textract <code>DetectDocumentText</code>
1023
+ * API to extract the text. </p>
1024
+ * </li>
1025
+ * </ul>
1026
+ * <p>
1027
+ * <code>DocumentReaderConfig</code> does not apply to plain text files or Word files.</p>
1028
+ * <p>
1029
+ * For image files and PDF documents, you can override these default actions using the fields listed below.
1030
+ * For more information, see <a href="https://docs.aws.amazon.com/comprehend/latest/dg/detecting-cer.html#detecting-cer-pdf">
1031
+ * Setting text extraction options</a>.
1032
+ * </p>
1033
+ */
1034
+ export interface DocumentReaderConfig {
1035
+ /**
1036
+ * <p>This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files and image files.
1037
+ * Enter one of the following values:</p>
1038
+ * <ul>
1039
+ * <li>
1040
+ * <p>
1041
+ * <code>TEXTRACT_DETECT_DOCUMENT_TEXT</code> - The Amazon Comprehend service uses the <code>DetectDocumentText</code>
1042
+ * API operation. </p>
1043
+ * </li>
1044
+ * <li>
1045
+ * <p>
1046
+ * <code>TEXTRACT_ANALYZE_DOCUMENT</code> - The Amazon Comprehend service uses the <code>AnalyzeDocument</code>
1047
+ * API operation. </p>
1048
+ * </li>
1049
+ * </ul>
1050
+ */
1051
+ DocumentReadAction: DocumentReadAction | string | undefined;
1052
+ /**
1053
+ * <p>Determines the text extraction actions for PDF files. Enter one of the following values:</p>
1054
+ * <ul>
1055
+ * <li>
1056
+ * <p>
1057
+ * <code>SERVICE_DEFAULT</code> - use the Amazon Comprehend service defaults for PDF files.</p>
1058
+ * </li>
1059
+ * <li>
1060
+ * <p>
1061
+ * <code>FORCE_DOCUMENT_READ_ACTION</code> - Amazon Comprehend uses the Textract API specified by
1062
+ * DocumentReadAction for all PDF files, including digital PDF files. </p>
1063
+ * </li>
1064
+ * </ul>
1065
+ */
1066
+ DocumentReadMode?: DocumentReadMode | string;
1067
+ /**
1068
+ * <p>Specifies the type of Amazon Textract features to apply. If you chose <code>TEXTRACT_ANALYZE_DOCUMENT</code>
1069
+ * as the read action, you must specify one or both of the following values:</p>
1070
+ * <ul>
1071
+ * <li>
1072
+ * <p>
1073
+ * <code>TABLES</code> - Returns information about any tables that are detected in the input document. </p>
1074
+ * </li>
1075
+ * <li>
1076
+ * <p>
1077
+ * <code>FORMS</code> - Returns information and the data from any forms that are detected in the input document. </p>
1078
+ * </li>
1079
+ * </ul>
1080
+ */
1081
+ FeatureTypes?: (DocumentReadFeatureTypes | string)[];
1082
+ }
782
1083
  export interface ClassifyDocumentRequest {
783
1084
  /**
784
- * <p>The document text to be analyzed.</p>
1085
+ * <p>The document text to be analyzed. If you enter text using this parameter,
1086
+ * do not use the <code>Bytes</code> parameter.</p>
785
1087
  */
786
- Text: string | undefined;
1088
+ Text?: string;
787
1089
  /**
788
1090
  * <p>The Amazon Resource Number (ARN) of the endpoint. For information about endpoints, see <a href="https://docs.aws.amazon.com/comprehend/latest/dg/manage-endpoints.html">Managing endpoints</a>.</p>
789
1091
  */
790
1092
  EndpointArn: string | undefined;
1093
+ /**
1094
+ * <p>Use the <code>Bytes</code> parameter to input a text, PDF, Word or image file.
1095
+ * You can also use the <code>Bytes</code> parameter to input an Amazon Textract <code>DetectDocumentText</code>
1096
+ * or <code>AnalyzeDocument</code> output file.</p>
1097
+ * <p>Provide the input document as a sequence of base64-encoded bytes.
1098
+ * If your code uses an Amazon Web Services SDK to classify documents, the SDK may encode
1099
+ * the document file bytes for you. </p>
1100
+ * <p>The maximum length of this field depends on the input document type. For details, see
1101
+ * <a href="https://docs.aws.amazon.com/comprehend/latest/dg/idp-inputs-sync.html">
1102
+ * Inputs for real-time custom analysis</a> in the Comprehend Developer Guide. </p>
1103
+ * <p>If you use the <code>Bytes</code> parameter, do not use the <code>Text</code> parameter.</p>
1104
+ */
1105
+ Bytes?: Uint8Array;
1106
+ /**
1107
+ * <p>Provides configuration parameters to override the default actions for extracting text
1108
+ * from PDF documents and image files.</p>
1109
+ */
1110
+ DocumentReaderConfig?: DocumentReaderConfig;
791
1111
  }
792
1112
  /**
793
1113
  * <p>Specifies the class that categorizes the document being analyzed</p>
@@ -801,6 +1121,105 @@ export interface DocumentClass {
801
1121
  * <p>The confidence score that Amazon Comprehend has this class correctly attributed.</p>
802
1122
  */
803
1123
  Score?: number;
1124
+ /**
1125
+ * <p>Page number in the input document. This field is present
1126
+ * in the response only if your request includes the <code>Byte</code> parameter. </p>
1127
+ */
1128
+ Page?: number;
1129
+ }
1130
+ /**
1131
+ * <p>Array of the number of characters extracted from each page.</p>
1132
+ */
1133
+ export interface ExtractedCharactersListItem {
1134
+ /**
1135
+ * <p>Page number.</p>
1136
+ */
1137
+ Page?: number;
1138
+ /**
1139
+ * <p>Number of characters extracted from each page.</p>
1140
+ */
1141
+ Count?: number;
1142
+ }
1143
+ /**
1144
+ * <p>Information about the document, discovered during text extraction.</p>
1145
+ */
1146
+ export interface DocumentMetadata {
1147
+ /**
1148
+ * <p>Number of pages in the document.</p>
1149
+ */
1150
+ Pages?: number;
1151
+ /**
1152
+ * <p>List of pages in the document, with the number of characters extracted from each page.</p>
1153
+ */
1154
+ ExtractedCharacters?: ExtractedCharactersListItem[];
1155
+ }
1156
+ export declare enum DocumentType {
1157
+ IMAGE = "IMAGE",
1158
+ MS_WORD = "MS_WORD",
1159
+ NATIVE_PDF = "NATIVE_PDF",
1160
+ PLAIN_TEXT = "PLAIN_TEXT",
1161
+ SCANNED_PDF = "SCANNED_PDF",
1162
+ TEXTRACT_ANALYZE_DOCUMENT_JSON = "TEXTRACT_ANALYZE_DOCUMENT_JSON",
1163
+ TEXTRACT_DETECT_DOCUMENT_TEXT_JSON = "TEXTRACT_DETECT_DOCUMENT_TEXT_JSON"
1164
+ }
1165
+ /**
1166
+ * <p>Document type for each page in the document.</p>
1167
+ */
1168
+ export interface DocumentTypeListItem {
1169
+ /**
1170
+ * <p>Page number.</p>
1171
+ */
1172
+ Page?: number;
1173
+ /**
1174
+ * <p>Document type.</p>
1175
+ */
1176
+ Type?: DocumentType | string;
1177
+ }
1178
+ export declare enum PageBasedErrorCode {
1179
+ INTERNAL_SERVER_ERROR = "INTERNAL_SERVER_ERROR",
1180
+ PAGE_CHARACTERS_EXCEEDED = "PAGE_CHARACTERS_EXCEEDED",
1181
+ PAGE_SIZE_EXCEEDED = "PAGE_SIZE_EXCEEDED",
1182
+ TEXTRACT_BAD_PAGE = "TEXTRACT_BAD_PAGE",
1183
+ TEXTRACT_PROVISIONED_THROUGHPUT_EXCEEDED = "TEXTRACT_PROVISIONED_THROUGHPUT_EXCEEDED"
1184
+ }
1185
+ /**
1186
+ * <p>Text extraction encountered one or more page-level errors in the input document.</p>
1187
+ * <p>The <code>ErrorCode</code> contains one of the following values:</p>
1188
+ * <ul>
1189
+ * <li>
1190
+ * <p>TEXTRACT_BAD_PAGE - Amazon Textract cannot read the page. For more information
1191
+ * about page limits in Amazon Textract, see <a href="https://docs.aws.amazon.com/textract/latest/dg/limits-document.html">
1192
+ * Page Quotas in Amazon Textract</a>.</p>
1193
+ * </li>
1194
+ * <li>
1195
+ * <p>TEXTRACT_PROVISIONED_THROUGHPUT_EXCEEDED - The number of requests exceeded your throughput limit.
1196
+ * For more information about throughput quotas in Amazon Textract, see <a href="https://docs.aws.amazon.com/textract/latest/dg/limits-quotas-explained.html">
1197
+ * Default quotas in Amazon Textract</a>.</p>
1198
+ * </li>
1199
+ * <li>
1200
+ * <p>PAGE_CHARACTERS_EXCEEDED - Too many text characters on the page (10,000 characters maximum).</p>
1201
+ * </li>
1202
+ * <li>
1203
+ * <p>PAGE_SIZE_EXCEEDED - The maximum page size is 10 MB.</p>
1204
+ * </li>
1205
+ * <li>
1206
+ * <p>INTERNAL_SERVER_ERROR - The request encountered a service issue. Try the API request again.</p>
1207
+ * </li>
1208
+ * </ul>
1209
+ */
1210
+ export interface ErrorsListItem {
1211
+ /**
1212
+ * <p>Page number where the error occurred.</p>
1213
+ */
1214
+ Page?: number;
1215
+ /**
1216
+ * <p>Error code for the cause of the error.</p>
1217
+ */
1218
+ ErrorCode?: PageBasedErrorCode | string;
1219
+ /**
1220
+ * <p>Text message explaining the reason for the error.</p>
1221
+ */
1222
+ ErrorMessage?: string;
804
1223
  }
805
1224
  /**
806
1225
  * <p>Specifies one of the label or labels that categorize the document being analyzed.</p>
@@ -814,6 +1233,11 @@ export interface DocumentLabel {
814
1233
  * <p>The confidence score that Amazon Comprehend has this label correctly attributed.</p>
815
1234
  */
816
1235
  Score?: number;
1236
+ /**
1237
+ * <p>Page number where the label occurs. This field is present
1238
+ * in the response only if your request includes the <code>Byte</code> parameter. </p>
1239
+ */
1240
+ Page?: number;
817
1241
  }
818
1242
  export interface ClassifyDocumentResponse {
819
1243
  /**
@@ -830,6 +1254,21 @@ export interface ClassifyDocumentResponse {
830
1254
  * action movie, a science fiction movie, and a comedy, all at the same time. </p>
831
1255
  */
832
1256
  Labels?: DocumentLabel[];
1257
+ /**
1258
+ * <p>Extraction information about the document. This field is present
1259
+ * in the response only if your request includes the <code>Byte</code> parameter. </p>
1260
+ */
1261
+ DocumentMetadata?: DocumentMetadata;
1262
+ /**
1263
+ * <p>The document type for each page in the input document. This field is present
1264
+ * in the response only if your request includes the <code>Byte</code> parameter. </p>
1265
+ */
1266
+ DocumentType?: DocumentTypeListItem[];
1267
+ /**
1268
+ * <p>Page-level errors that the system detected while processing the input document.
1269
+ * The field is empty if the system encountered no errors.</p>
1270
+ */
1271
+ Errors?: ErrorsListItem[];
833
1272
  }
834
1273
  /**
835
1274
  * <p>The specified resource is not available. Check the resource and try your request
@@ -1480,9 +1919,11 @@ export interface CreateEntityRecognizerRequest {
1480
1919
  */
1481
1920
  ClientRequestToken?: string;
1482
1921
  /**
1483
- * <p> You can specify any of the following languages supported by Amazon Comprehend: English
1922
+ * <p> You can specify any of the following languages: English
1484
1923
  * ("en"), Spanish ("es"), French ("fr"), Italian ("it"), German ("de"), or Portuguese ("pt").
1485
- * All documents must be in the same language.</p>
1924
+ * If you plan to use this entity recognizer with PDF, Word, or image input files, you must
1925
+ * specify English as the language.
1926
+ * All training documents must be in the same language.</p>
1486
1927
  */
1487
1928
  LanguageCode: LanguageCode | string | undefined;
1488
1929
  /**
@@ -1592,61 +2033,9 @@ export interface DescribeDocumentClassificationJobRequest {
1592
2033
  */
1593
2034
  JobId: string | undefined;
1594
2035
  }
1595
- export declare enum DocumentReadAction {
1596
- TEXTRACT_ANALYZE_DOCUMENT = "TEXTRACT_ANALYZE_DOCUMENT",
1597
- TEXTRACT_DETECT_DOCUMENT_TEXT = "TEXTRACT_DETECT_DOCUMENT_TEXT"
1598
- }
1599
- export declare enum DocumentReadMode {
1600
- FORCE_DOCUMENT_READ_ACTION = "FORCE_DOCUMENT_READ_ACTION",
1601
- SERVICE_DEFAULT = "SERVICE_DEFAULT"
1602
- }
1603
- export declare enum DocumentReadFeatureTypes {
1604
- FORMS = "FORMS",
1605
- TABLES = "TABLES"
1606
- }
1607
- /**
1608
- * <p>The input properties for a topic detection job.</p>
1609
- */
1610
- export interface DocumentReaderConfig {
1611
- /**
1612
- * <p>This enum field will start with two values which will apply to PDFs:</p>
1613
- * <ul>
1614
- * <li>
1615
- * <p>
1616
- * <code>TEXTRACT_DETECT_DOCUMENT_TEXT</code> - The service calls DetectDocumentText
1617
- * for PDF documents per page.</p>
1618
- * </li>
1619
- * <li>
1620
- * <p>
1621
- * <code>TEXTRACT_ANALYZE_DOCUMENT</code> - The service calls AnalyzeDocument for PDF
1622
- * documents per page.</p>
1623
- * </li>
1624
- * </ul>
1625
- */
1626
- DocumentReadAction: DocumentReadAction | string | undefined;
1627
- /**
1628
- * <p>This enum field provides two values:</p>
1629
- * <ul>
1630
- * <li>
1631
- * <p>
1632
- * <code>SERVICE_DEFAULT</code> - use service defaults for Document reading. For
1633
- * Digital PDF it would mean using an internal parser instead of Textract APIs</p>
1634
- * </li>
1635
- * <li>
1636
- * <p>
1637
- * <code>FORCE_DOCUMENT_READ_ACTION</code> - Always use specified action for
1638
- * DocumentReadAction, including Digital PDF. </p>
1639
- * </li>
1640
- * </ul>
1641
- */
1642
- DocumentReadMode?: DocumentReadMode | string;
1643
- /**
1644
- * <p>Specifies how the text in an input file should be processed:</p>
1645
- */
1646
- FeatureTypes?: (DocumentReadFeatureTypes | string)[];
1647
- }
1648
2036
  /**
1649
- * <p>The input properties for an inference job.</p>
2037
+ * <p>The input properties for an inference job. The document reader config field applies
2038
+ * only to non-text inputs for custom analysis.</p>
1650
2039
  */
1651
2040
  export interface InputDataConfig {
1652
2041
  /**
@@ -1677,11 +2066,8 @@ export interface InputDataConfig {
1677
2066
  */
1678
2067
  InputFormat?: InputFormat | string;
1679
2068
  /**
1680
- * <p>The document reader config field applies only for InputDataConfig of
1681
- * StartEntitiesDetectionJob. </p>
1682
- * <p>Use DocumentReaderConfig to provide specifications about how you want your inference
1683
- * documents read. Currently it applies for PDF documents in StartEntitiesDetectionJob custom
1684
- * inference.</p>
2069
+ * <p>Provides configuration parameters to override the default actions for extracting text
2070
+ * from PDF documents and image files.</p>
1685
2071
  */
1686
2072
  DocumentReaderConfig?: DocumentReaderConfig;
1687
2073
  }
@@ -2770,7 +3156,7 @@ export interface DescribePiiEntitiesDetectionJobResponse {
2770
3156
  }
2771
3157
  export interface DescribeResourcePolicyRequest {
2772
3158
  /**
2773
- * <p>The Amazon Resource Name (ARN) of the policy to describe.</p>
3159
+ * <p>The Amazon Resource Name (ARN) of the custom model version that has the resource policy.</p>
2774
3160
  */
2775
3161
  ResourceArn: string | undefined;
2776
3162
  }
@@ -2941,7 +3327,8 @@ export interface TargetedSentimentDetectionJobProperties {
2941
3327
  */
2942
3328
  EndTime?: Date;
2943
3329
  /**
2944
- * <p>The input properties for an inference job.</p>
3330
+ * <p>The input properties for an inference job. The document reader config field applies
3331
+ * only to non-text inputs for custom analysis.</p>
2945
3332
  */
2946
3333
  InputDataConfig?: InputDataConfig;
2947
3334
  /**
@@ -3103,15 +3490,16 @@ export interface DetectDominantLanguageResponse {
3103
3490
  }
3104
3491
  export interface DetectEntitiesRequest {
3105
3492
  /**
3106
- * <p>A UTF-8 text string. The maximum string size is 100 KB.</p>
3493
+ * <p>A UTF-8 text string. The maximum string size is 100 KB. If you enter text using this parameter,
3494
+ * do not use the <code>Bytes</code> parameter.</p>
3107
3495
  */
3108
- Text: string | undefined;
3496
+ Text?: string;
3109
3497
  /**
3110
3498
  * <p>The language of the input documents. You can specify any of the primary languages
3111
- * supported by Amazon Comprehend. All documents must be in the same language.</p>
3112
- * <p>If your request includes the endpoint for a custom entity recognition model, Amazon
3499
+ * supported by Amazon Comprehend. If your request includes the endpoint for a custom entity recognition model, Amazon
3113
3500
  * Comprehend uses the language of your custom model, and it ignores any language code that you
3114
3501
  * specify here.</p>
3502
+ * <p>All input documents must be in the same language.</p>
3115
3503
  */
3116
3504
  LanguageCode?: LanguageCode | string;
3117
3505
  /**
@@ -3123,6 +3511,30 @@ export interface DetectEntitiesRequest {
3123
3511
  * <p>For information about endpoints, see <a href="https://docs.aws.amazon.com/comprehend/latest/dg/manage-endpoints.html">Managing endpoints</a>.</p>
3124
3512
  */
3125
3513
  EndpointArn?: string;
3514
+ /**
3515
+ * <p>This field applies only when you use a custom entity recognition model that
3516
+ * was trained with PDF annotations. For other cases,
3517
+ * enter your text input in the <code>Text</code> field.</p>
3518
+ * <p>
3519
+ * Use the <code>Bytes</code> parameter to input a text, PDF, Word or image file.
3520
+ * Using a plain-text file in the <code>Bytes</code> parameter is equivelent to using the
3521
+ * <code>Text</code> parameter (the <code>Entities</code> field in the response is identical).</p>
3522
+ * <p>You can also use the <code>Bytes</code> parameter to input an Amazon Textract <code>DetectDocumentText</code>
3523
+ * or <code>AnalyzeDocument</code> output file.</p>
3524
+ * <p>Provide the input document as a sequence of base64-encoded bytes.
3525
+ * If your code uses an Amazon Web Services SDK to detect entities, the SDK may encode
3526
+ * the document file bytes for you. </p>
3527
+ * <p>The maximum length of this field depends on the input document type. For details, see
3528
+ * <a href="https://docs.aws.amazon.com/comprehend/latest/dg/idp-inputs-sync.html">
3529
+ * Inputs for real-time custom analysis</a> in the Comprehend Developer Guide. </p>
3530
+ * <p>If you use the <code>Bytes</code> parameter, do not use the <code>Text</code> parameter.</p>
3531
+ */
3532
+ Bytes?: Uint8Array;
3533
+ /**
3534
+ * <p>Provides configuration parameters to override the default actions for extracting text
3535
+ * from PDF documents and image files.</p>
3536
+ */
3537
+ DocumentReaderConfig?: DocumentReaderConfig;
3126
3538
  }
3127
3539
  export interface DetectEntitiesResponse {
3128
3540
  /**
@@ -3136,6 +3548,29 @@ export interface DetectEntitiesResponse {
3136
3548
  * </p>
3137
3549
  */
3138
3550
  Entities?: Entity[];
3551
+ /**
3552
+ * <p>Information about the document, discovered during text extraction. This field is present
3553
+ * in the response only if your request used the <code>Byte</code> parameter. </p>
3554
+ */
3555
+ DocumentMetadata?: DocumentMetadata;
3556
+ /**
3557
+ * <p>The document type for each page in the input document. This field is present
3558
+ * in the response only if your request used the <code>Byte</code> parameter. </p>
3559
+ */
3560
+ DocumentType?: DocumentTypeListItem[];
3561
+ /**
3562
+ * <p>Information about each block of text in the input document.
3563
+ * Blocks are nested. A page block contains a block for each line of text,
3564
+ * which contains a block for each word. </p>
3565
+ * <p>The <code>Block</code> content for a Word input document does not include a <code>Geometry</code> field.</p>
3566
+ * <p>The <code>Block</code> field is not present in the response for plain-text inputs.</p>
3567
+ */
3568
+ Blocks?: Block[];
3569
+ /**
3570
+ * <p>Page-level errors that the system detected while processing the input document.
3571
+ * The field is empty if the system encountered no errors.</p>
3572
+ */
3573
+ Errors?: ErrorsListItem[];
3139
3574
  }
3140
3575
  export interface DetectKeyPhrasesRequest {
3141
3576
  /**
@@ -4729,7 +5164,8 @@ export interface StartSentimentDetectionJobResponse {
4729
5164
  }
4730
5165
  export interface StartTargetedSentimentDetectionJobRequest {
4731
5166
  /**
4732
- * <p>The input properties for an inference job.</p>
5167
+ * <p>The input properties for an inference job. The document reader config field applies
5168
+ * only to non-text inputs for custom analysis.</p>
4733
5169
  */
4734
5170
  InputDataConfig: InputDataConfig | undefined;
4735
5171
  /**
@@ -5163,10 +5599,22 @@ export declare const BatchDetectDominantLanguageItemResultFilterSensitiveLog: (o
5163
5599
  * @internal
5164
5600
  */
5165
5601
  export declare const BatchDetectDominantLanguageResponseFilterSensitiveLog: (obj: BatchDetectDominantLanguageResponse) => any;
5602
+ /**
5603
+ * @internal
5604
+ */
5605
+ export declare const InvalidRequestDetailFilterSensitiveLog: (obj: InvalidRequestDetail) => any;
5166
5606
  /**
5167
5607
  * @internal
5168
5608
  */
5169
5609
  export declare const BatchDetectEntitiesRequestFilterSensitiveLog: (obj: BatchDetectEntitiesRequest) => any;
5610
+ /**
5611
+ * @internal
5612
+ */
5613
+ export declare const ChildBlockFilterSensitiveLog: (obj: ChildBlock) => any;
5614
+ /**
5615
+ * @internal
5616
+ */
5617
+ export declare const BlockReferenceFilterSensitiveLog: (obj: BlockReference) => any;
5170
5618
  /**
5171
5619
  * @internal
5172
5620
  */
@@ -5255,6 +5703,26 @@ export declare const BatchDetectTargetedSentimentItemResultFilterSensitiveLog: (
5255
5703
  * @internal
5256
5704
  */
5257
5705
  export declare const BatchDetectTargetedSentimentResponseFilterSensitiveLog: (obj: BatchDetectTargetedSentimentResponse) => any;
5706
+ /**
5707
+ * @internal
5708
+ */
5709
+ export declare const BoundingBoxFilterSensitiveLog: (obj: BoundingBox) => any;
5710
+ /**
5711
+ * @internal
5712
+ */
5713
+ export declare const PointFilterSensitiveLog: (obj: Point) => any;
5714
+ /**
5715
+ * @internal
5716
+ */
5717
+ export declare const GeometryFilterSensitiveLog: (obj: Geometry) => any;
5718
+ /**
5719
+ * @internal
5720
+ */
5721
+ export declare const RelationshipsListItemFilterSensitiveLog: (obj: RelationshipsListItem) => any;
5722
+ /**
5723
+ * @internal
5724
+ */
5725
+ export declare const BlockFilterSensitiveLog: (obj: Block) => any;
5258
5726
  /**
5259
5727
  * @internal
5260
5728
  */
@@ -5263,6 +5731,10 @@ export declare const ClassifierEvaluationMetricsFilterSensitiveLog: (obj: Classi
5263
5731
  * @internal
5264
5732
  */
5265
5733
  export declare const ClassifierMetadataFilterSensitiveLog: (obj: ClassifierMetadata) => any;
5734
+ /**
5735
+ * @internal
5736
+ */
5737
+ export declare const DocumentReaderConfigFilterSensitiveLog: (obj: DocumentReaderConfig) => any;
5266
5738
  /**
5267
5739
  * @internal
5268
5740
  */
@@ -5271,6 +5743,22 @@ export declare const ClassifyDocumentRequestFilterSensitiveLog: (obj: ClassifyDo
5271
5743
  * @internal
5272
5744
  */
5273
5745
  export declare const DocumentClassFilterSensitiveLog: (obj: DocumentClass) => any;
5746
+ /**
5747
+ * @internal
5748
+ */
5749
+ export declare const ExtractedCharactersListItemFilterSensitiveLog: (obj: ExtractedCharactersListItem) => any;
5750
+ /**
5751
+ * @internal
5752
+ */
5753
+ export declare const DocumentMetadataFilterSensitiveLog: (obj: DocumentMetadata) => any;
5754
+ /**
5755
+ * @internal
5756
+ */
5757
+ export declare const DocumentTypeListItemFilterSensitiveLog: (obj: DocumentTypeListItem) => any;
5758
+ /**
5759
+ * @internal
5760
+ */
5761
+ export declare const ErrorsListItemFilterSensitiveLog: (obj: ErrorsListItem) => any;
5274
5762
  /**
5275
5763
  * @internal
5276
5764
  */
@@ -5387,10 +5875,6 @@ export declare const DeleteResourcePolicyResponseFilterSensitiveLog: (obj: Delet
5387
5875
  * @internal
5388
5876
  */
5389
5877
  export declare const DescribeDocumentClassificationJobRequestFilterSensitiveLog: (obj: DescribeDocumentClassificationJobRequest) => any;
5390
- /**
5391
- * @internal
5392
- */
5393
- export declare const DocumentReaderConfigFilterSensitiveLog: (obj: DocumentReaderConfig) => any;
5394
5878
  /**
5395
5879
  * @internal
5396
5880
  */