aws-sdk-comprehend 1.50.0 → 1.51.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6b8c990eb8f22c9347bb33832a67cbb7decb7c3e2d3e26f5c55645c6f66112e
4
- data.tar.gz: 3480e954c55a8a1fa34a2fc713e7ef70fef60b6b788e4962037b186bad2941d5
3
+ metadata.gz: 6a9828628812834dcbad9932c47945231e7ba7f05b3948b59a349d0c7058f09f
4
+ data.tar.gz: 5141ec47d75e05488aa0fd4304c573d2e903b4072d7215f45aa30b1a42c33f80
5
5
  SHA512:
6
- metadata.gz: 77a1b79f708c26bd95b417c884e8c0fc4c3245e20b4eca2ee4f6cd7a65c9c712617ad6a5123a080ce72bab677cbc41b880103a360253ee84c35eb43e327db97b
7
- data.tar.gz: ae140ad7047606ac4e7db331f20aaf81825caa98909f8e39e235ea3d741405e854d38f3d39a04cfb478ec23a32e80e829e4c5df688685698b160ba2e2a380e17
6
+ metadata.gz: 1f350f13013eb12272326dc8aea2df7dbf74d5e2ee2810febfcfd2badd7ae7169f1a68cff10b8dde8a8bdce477064169bf61a26f3bbf4e5b57077cf0ddd2fc1b
7
+ data.tar.gz: 4d40609712cf892f49e69f14b20e7a4f4c781b4803336e0bc2803eb4839d5ff072e8143c3d31b74ae10a02b6055349a22374ee72b9123b25c2af5cc5ebcf472d
data/CHANGELOG.md CHANGED
@@ -1,6 +1,11 @@
1
1
  Unreleased Changes
2
2
  ------------------
3
3
 
4
+ 1.51.0 (2021-09-14)
5
+ ------------------
6
+
7
+ * Feature - Amazon Comprehend now allows you to train and run PDF and Word documents for custom entity recognition. With PDF and Word formats, you can extract information from documents containing headers, lists and tables.
8
+
4
9
  1.50.0 (2021-09-01)
5
10
  ------------------
6
11
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.50.0
1
+ 1.51.0
@@ -761,6 +761,9 @@ module Aws::Comprehend
761
761
  # {
762
762
  # s3_uri: "S3Uri", # required
763
763
  # attribute_names: ["AttributeNamesListItem"], # required
764
+ # annotation_data_s3_uri: "S3Uri",
765
+ # source_documents_s3_uri: "S3Uri",
766
+ # document_type: "PLAIN_TEXT_DOCUMENT", # accepts PLAIN_TEXT_DOCUMENT, SEMI_STRUCTURED_DOCUMENT
764
767
  # },
765
768
  # ],
766
769
  # },
@@ -965,6 +968,9 @@ module Aws::Comprehend
965
968
  # {
966
969
  # s3_uri: "S3Uri", # required
967
970
  # attribute_names: ["AttributeNamesListItem"], # required
971
+ # annotation_data_s3_uri: "S3Uri",
972
+ # source_documents_s3_uri: "S3Uri",
973
+ # document_type: "PLAIN_TEXT_DOCUMENT", # accepts PLAIN_TEXT_DOCUMENT, SEMI_STRUCTURED_DOCUMENT
968
974
  # },
969
975
  # ],
970
976
  # },
@@ -1107,6 +1113,10 @@ module Aws::Comprehend
1107
1113
  # resp.document_classification_job_properties.document_classifier_arn #=> String
1108
1114
  # resp.document_classification_job_properties.input_data_config.s3_uri #=> String
1109
1115
  # resp.document_classification_job_properties.input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
1116
+ # resp.document_classification_job_properties.input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
1117
+ # resp.document_classification_job_properties.input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
1118
+ # resp.document_classification_job_properties.input_data_config.document_reader_config.feature_types #=> Array
1119
+ # resp.document_classification_job_properties.input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
1110
1120
  # resp.document_classification_job_properties.output_data_config.s3_uri #=> String
1111
1121
  # resp.document_classification_job_properties.output_data_config.kms_key_id #=> String
1112
1122
  # resp.document_classification_job_properties.data_access_role_arn #=> String
@@ -1158,6 +1168,9 @@ module Aws::Comprehend
1158
1168
  # resp.document_classifier_properties.input_data_config.augmented_manifests[0].s3_uri #=> String
1159
1169
  # resp.document_classifier_properties.input_data_config.augmented_manifests[0].attribute_names #=> Array
1160
1170
  # resp.document_classifier_properties.input_data_config.augmented_manifests[0].attribute_names[0] #=> String
1171
+ # resp.document_classifier_properties.input_data_config.augmented_manifests[0].annotation_data_s3_uri #=> String
1172
+ # resp.document_classifier_properties.input_data_config.augmented_manifests[0].source_documents_s3_uri #=> String
1173
+ # resp.document_classifier_properties.input_data_config.augmented_manifests[0].document_type #=> String, one of "PLAIN_TEXT_DOCUMENT", "SEMI_STRUCTURED_DOCUMENT"
1161
1174
  # resp.document_classifier_properties.output_data_config.s3_uri #=> String
1162
1175
  # resp.document_classifier_properties.output_data_config.kms_key_id #=> String
1163
1176
  # resp.document_classifier_properties.classifier_metadata.number_of_labels #=> Integer
@@ -1217,6 +1230,10 @@ module Aws::Comprehend
1217
1230
  # resp.dominant_language_detection_job_properties.end_time #=> Time
1218
1231
  # resp.dominant_language_detection_job_properties.input_data_config.s3_uri #=> String
1219
1232
  # resp.dominant_language_detection_job_properties.input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
1233
+ # resp.dominant_language_detection_job_properties.input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
1234
+ # resp.dominant_language_detection_job_properties.input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
1235
+ # resp.dominant_language_detection_job_properties.input_data_config.document_reader_config.feature_types #=> Array
1236
+ # resp.dominant_language_detection_job_properties.input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
1220
1237
  # resp.dominant_language_detection_job_properties.output_data_config.s3_uri #=> String
1221
1238
  # resp.dominant_language_detection_job_properties.output_data_config.kms_key_id #=> String
1222
1239
  # resp.dominant_language_detection_job_properties.data_access_role_arn #=> String
@@ -1301,6 +1318,10 @@ module Aws::Comprehend
1301
1318
  # resp.entities_detection_job_properties.entity_recognizer_arn #=> String
1302
1319
  # resp.entities_detection_job_properties.input_data_config.s3_uri #=> String
1303
1320
  # resp.entities_detection_job_properties.input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
1321
+ # resp.entities_detection_job_properties.input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
1322
+ # resp.entities_detection_job_properties.input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
1323
+ # resp.entities_detection_job_properties.input_data_config.document_reader_config.feature_types #=> Array
1324
+ # resp.entities_detection_job_properties.input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
1304
1325
  # resp.entities_detection_job_properties.output_data_config.s3_uri #=> String
1305
1326
  # resp.entities_detection_job_properties.output_data_config.kms_key_id #=> String
1306
1327
  # resp.entities_detection_job_properties.language_code #=> String, one of "en", "es", "fr", "de", "it", "pt", "ar", "hi", "ja", "ko", "zh", "zh-TW"
@@ -1357,6 +1378,9 @@ module Aws::Comprehend
1357
1378
  # resp.entity_recognizer_properties.input_data_config.augmented_manifests[0].s3_uri #=> String
1358
1379
  # resp.entity_recognizer_properties.input_data_config.augmented_manifests[0].attribute_names #=> Array
1359
1380
  # resp.entity_recognizer_properties.input_data_config.augmented_manifests[0].attribute_names[0] #=> String
1381
+ # resp.entity_recognizer_properties.input_data_config.augmented_manifests[0].annotation_data_s3_uri #=> String
1382
+ # resp.entity_recognizer_properties.input_data_config.augmented_manifests[0].source_documents_s3_uri #=> String
1383
+ # resp.entity_recognizer_properties.input_data_config.augmented_manifests[0].document_type #=> String, one of "PLAIN_TEXT_DOCUMENT", "SEMI_STRUCTURED_DOCUMENT"
1360
1384
  # resp.entity_recognizer_properties.recognizer_metadata.number_of_trained_documents #=> Integer
1361
1385
  # resp.entity_recognizer_properties.recognizer_metadata.number_of_test_documents #=> Integer
1362
1386
  # resp.entity_recognizer_properties.recognizer_metadata.evaluation_metrics.precision #=> Float
@@ -1411,6 +1435,10 @@ module Aws::Comprehend
1411
1435
  # resp.events_detection_job_properties.end_time #=> Time
1412
1436
  # resp.events_detection_job_properties.input_data_config.s3_uri #=> String
1413
1437
  # resp.events_detection_job_properties.input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
1438
+ # resp.events_detection_job_properties.input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
1439
+ # resp.events_detection_job_properties.input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
1440
+ # resp.events_detection_job_properties.input_data_config.document_reader_config.feature_types #=> Array
1441
+ # resp.events_detection_job_properties.input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
1414
1442
  # resp.events_detection_job_properties.output_data_config.s3_uri #=> String
1415
1443
  # resp.events_detection_job_properties.output_data_config.kms_key_id #=> String
1416
1444
  # resp.events_detection_job_properties.language_code #=> String, one of "en", "es", "fr", "de", "it", "pt", "ar", "hi", "ja", "ko", "zh", "zh-TW"
@@ -1455,6 +1483,10 @@ module Aws::Comprehend
1455
1483
  # resp.key_phrases_detection_job_properties.end_time #=> Time
1456
1484
  # resp.key_phrases_detection_job_properties.input_data_config.s3_uri #=> String
1457
1485
  # resp.key_phrases_detection_job_properties.input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
1486
+ # resp.key_phrases_detection_job_properties.input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
1487
+ # resp.key_phrases_detection_job_properties.input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
1488
+ # resp.key_phrases_detection_job_properties.input_data_config.document_reader_config.feature_types #=> Array
1489
+ # resp.key_phrases_detection_job_properties.input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
1458
1490
  # resp.key_phrases_detection_job_properties.output_data_config.s3_uri #=> String
1459
1491
  # resp.key_phrases_detection_job_properties.output_data_config.kms_key_id #=> String
1460
1492
  # resp.key_phrases_detection_job_properties.language_code #=> String, one of "en", "es", "fr", "de", "it", "pt", "ar", "hi", "ja", "ko", "zh", "zh-TW"
@@ -1502,6 +1534,10 @@ module Aws::Comprehend
1502
1534
  # resp.pii_entities_detection_job_properties.end_time #=> Time
1503
1535
  # resp.pii_entities_detection_job_properties.input_data_config.s3_uri #=> String
1504
1536
  # resp.pii_entities_detection_job_properties.input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
1537
+ # resp.pii_entities_detection_job_properties.input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
1538
+ # resp.pii_entities_detection_job_properties.input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
1539
+ # resp.pii_entities_detection_job_properties.input_data_config.document_reader_config.feature_types #=> Array
1540
+ # resp.pii_entities_detection_job_properties.input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
1505
1541
  # resp.pii_entities_detection_job_properties.output_data_config.s3_uri #=> String
1506
1542
  # resp.pii_entities_detection_job_properties.output_data_config.kms_key_id #=> String
1507
1543
  # resp.pii_entities_detection_job_properties.redaction_config.pii_entity_types #=> Array
@@ -1549,6 +1585,10 @@ module Aws::Comprehend
1549
1585
  # resp.sentiment_detection_job_properties.end_time #=> Time
1550
1586
  # resp.sentiment_detection_job_properties.input_data_config.s3_uri #=> String
1551
1587
  # resp.sentiment_detection_job_properties.input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
1588
+ # resp.sentiment_detection_job_properties.input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
1589
+ # resp.sentiment_detection_job_properties.input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
1590
+ # resp.sentiment_detection_job_properties.input_data_config.document_reader_config.feature_types #=> Array
1591
+ # resp.sentiment_detection_job_properties.input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
1552
1592
  # resp.sentiment_detection_job_properties.output_data_config.s3_uri #=> String
1553
1593
  # resp.sentiment_detection_job_properties.output_data_config.kms_key_id #=> String
1554
1594
  # resp.sentiment_detection_job_properties.language_code #=> String, one of "en", "es", "fr", "de", "it", "pt", "ar", "hi", "ja", "ko", "zh", "zh-TW"
@@ -1595,6 +1635,10 @@ module Aws::Comprehend
1595
1635
  # resp.topics_detection_job_properties.end_time #=> Time
1596
1636
  # resp.topics_detection_job_properties.input_data_config.s3_uri #=> String
1597
1637
  # resp.topics_detection_job_properties.input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
1638
+ # resp.topics_detection_job_properties.input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
1639
+ # resp.topics_detection_job_properties.input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
1640
+ # resp.topics_detection_job_properties.input_data_config.document_reader_config.feature_types #=> Array
1641
+ # resp.topics_detection_job_properties.input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
1598
1642
  # resp.topics_detection_job_properties.output_data_config.s3_uri #=> String
1599
1643
  # resp.topics_detection_job_properties.output_data_config.kms_key_id #=> String
1600
1644
  # resp.topics_detection_job_properties.number_of_topics #=> Integer
@@ -1916,6 +1960,10 @@ module Aws::Comprehend
1916
1960
  # resp.document_classification_job_properties_list[0].document_classifier_arn #=> String
1917
1961
  # resp.document_classification_job_properties_list[0].input_data_config.s3_uri #=> String
1918
1962
  # resp.document_classification_job_properties_list[0].input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
1963
+ # resp.document_classification_job_properties_list[0].input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
1964
+ # resp.document_classification_job_properties_list[0].input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
1965
+ # resp.document_classification_job_properties_list[0].input_data_config.document_reader_config.feature_types #=> Array
1966
+ # resp.document_classification_job_properties_list[0].input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
1919
1967
  # resp.document_classification_job_properties_list[0].output_data_config.s3_uri #=> String
1920
1968
  # resp.document_classification_job_properties_list[0].output_data_config.kms_key_id #=> String
1921
1969
  # resp.document_classification_job_properties_list[0].data_access_role_arn #=> String
@@ -1986,6 +2034,9 @@ module Aws::Comprehend
1986
2034
  # resp.document_classifier_properties_list[0].input_data_config.augmented_manifests[0].s3_uri #=> String
1987
2035
  # resp.document_classifier_properties_list[0].input_data_config.augmented_manifests[0].attribute_names #=> Array
1988
2036
  # resp.document_classifier_properties_list[0].input_data_config.augmented_manifests[0].attribute_names[0] #=> String
2037
+ # resp.document_classifier_properties_list[0].input_data_config.augmented_manifests[0].annotation_data_s3_uri #=> String
2038
+ # resp.document_classifier_properties_list[0].input_data_config.augmented_manifests[0].source_documents_s3_uri #=> String
2039
+ # resp.document_classifier_properties_list[0].input_data_config.augmented_manifests[0].document_type #=> String, one of "PLAIN_TEXT_DOCUMENT", "SEMI_STRUCTURED_DOCUMENT"
1989
2040
  # resp.document_classifier_properties_list[0].output_data_config.s3_uri #=> String
1990
2041
  # resp.document_classifier_properties_list[0].output_data_config.kms_key_id #=> String
1991
2042
  # resp.document_classifier_properties_list[0].classifier_metadata.number_of_labels #=> Integer
@@ -2065,6 +2116,10 @@ module Aws::Comprehend
2065
2116
  # resp.dominant_language_detection_job_properties_list[0].end_time #=> Time
2066
2117
  # resp.dominant_language_detection_job_properties_list[0].input_data_config.s3_uri #=> String
2067
2118
  # resp.dominant_language_detection_job_properties_list[0].input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
2119
+ # resp.dominant_language_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
2120
+ # resp.dominant_language_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
2121
+ # resp.dominant_language_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types #=> Array
2122
+ # resp.dominant_language_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
2068
2123
  # resp.dominant_language_detection_job_properties_list[0].output_data_config.s3_uri #=> String
2069
2124
  # resp.dominant_language_detection_job_properties_list[0].output_data_config.kms_key_id #=> String
2070
2125
  # resp.dominant_language_detection_job_properties_list[0].data_access_role_arn #=> String
@@ -2186,6 +2241,10 @@ module Aws::Comprehend
2186
2241
  # resp.entities_detection_job_properties_list[0].entity_recognizer_arn #=> String
2187
2242
  # resp.entities_detection_job_properties_list[0].input_data_config.s3_uri #=> String
2188
2243
  # resp.entities_detection_job_properties_list[0].input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
2244
+ # resp.entities_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
2245
+ # resp.entities_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
2246
+ # resp.entities_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types #=> Array
2247
+ # resp.entities_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
2189
2248
  # resp.entities_detection_job_properties_list[0].output_data_config.s3_uri #=> String
2190
2249
  # resp.entities_detection_job_properties_list[0].output_data_config.kms_key_id #=> String
2191
2250
  # resp.entities_detection_job_properties_list[0].language_code #=> String, one of "en", "es", "fr", "de", "it", "pt", "ar", "hi", "ja", "ko", "zh", "zh-TW"
@@ -2267,6 +2326,9 @@ module Aws::Comprehend
2267
2326
  # resp.entity_recognizer_properties_list[0].input_data_config.augmented_manifests[0].s3_uri #=> String
2268
2327
  # resp.entity_recognizer_properties_list[0].input_data_config.augmented_manifests[0].attribute_names #=> Array
2269
2328
  # resp.entity_recognizer_properties_list[0].input_data_config.augmented_manifests[0].attribute_names[0] #=> String
2329
+ # resp.entity_recognizer_properties_list[0].input_data_config.augmented_manifests[0].annotation_data_s3_uri #=> String
2330
+ # resp.entity_recognizer_properties_list[0].input_data_config.augmented_manifests[0].source_documents_s3_uri #=> String
2331
+ # resp.entity_recognizer_properties_list[0].input_data_config.augmented_manifests[0].document_type #=> String, one of "PLAIN_TEXT_DOCUMENT", "SEMI_STRUCTURED_DOCUMENT"
2270
2332
  # resp.entity_recognizer_properties_list[0].recognizer_metadata.number_of_trained_documents #=> Integer
2271
2333
  # resp.entity_recognizer_properties_list[0].recognizer_metadata.number_of_test_documents #=> Integer
2272
2334
  # resp.entity_recognizer_properties_list[0].recognizer_metadata.evaluation_metrics.precision #=> Float
@@ -2341,6 +2403,10 @@ module Aws::Comprehend
2341
2403
  # resp.events_detection_job_properties_list[0].end_time #=> Time
2342
2404
  # resp.events_detection_job_properties_list[0].input_data_config.s3_uri #=> String
2343
2405
  # resp.events_detection_job_properties_list[0].input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
2406
+ # resp.events_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
2407
+ # resp.events_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
2408
+ # resp.events_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types #=> Array
2409
+ # resp.events_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
2344
2410
  # resp.events_detection_job_properties_list[0].output_data_config.s3_uri #=> String
2345
2411
  # resp.events_detection_job_properties_list[0].output_data_config.kms_key_id #=> String
2346
2412
  # resp.events_detection_job_properties_list[0].language_code #=> String, one of "en", "es", "fr", "de", "it", "pt", "ar", "hi", "ja", "ko", "zh", "zh-TW"
@@ -2404,6 +2470,10 @@ module Aws::Comprehend
2404
2470
  # resp.key_phrases_detection_job_properties_list[0].end_time #=> Time
2405
2471
  # resp.key_phrases_detection_job_properties_list[0].input_data_config.s3_uri #=> String
2406
2472
  # resp.key_phrases_detection_job_properties_list[0].input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
2473
+ # resp.key_phrases_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
2474
+ # resp.key_phrases_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
2475
+ # resp.key_phrases_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types #=> Array
2476
+ # resp.key_phrases_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
2407
2477
  # resp.key_phrases_detection_job_properties_list[0].output_data_config.s3_uri #=> String
2408
2478
  # resp.key_phrases_detection_job_properties_list[0].output_data_config.kms_key_id #=> String
2409
2479
  # resp.key_phrases_detection_job_properties_list[0].language_code #=> String, one of "en", "es", "fr", "de", "it", "pt", "ar", "hi", "ja", "ko", "zh", "zh-TW"
@@ -2467,6 +2537,10 @@ module Aws::Comprehend
2467
2537
  # resp.pii_entities_detection_job_properties_list[0].end_time #=> Time
2468
2538
  # resp.pii_entities_detection_job_properties_list[0].input_data_config.s3_uri #=> String
2469
2539
  # resp.pii_entities_detection_job_properties_list[0].input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
2540
+ # resp.pii_entities_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
2541
+ # resp.pii_entities_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
2542
+ # resp.pii_entities_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types #=> Array
2543
+ # resp.pii_entities_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
2470
2544
  # resp.pii_entities_detection_job_properties_list[0].output_data_config.s3_uri #=> String
2471
2545
  # resp.pii_entities_detection_job_properties_list[0].output_data_config.kms_key_id #=> String
2472
2546
  # resp.pii_entities_detection_job_properties_list[0].redaction_config.pii_entity_types #=> Array
@@ -2533,6 +2607,10 @@ module Aws::Comprehend
2533
2607
  # resp.sentiment_detection_job_properties_list[0].end_time #=> Time
2534
2608
  # resp.sentiment_detection_job_properties_list[0].input_data_config.s3_uri #=> String
2535
2609
  # resp.sentiment_detection_job_properties_list[0].input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
2610
+ # resp.sentiment_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
2611
+ # resp.sentiment_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
2612
+ # resp.sentiment_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types #=> Array
2613
+ # resp.sentiment_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
2536
2614
  # resp.sentiment_detection_job_properties_list[0].output_data_config.s3_uri #=> String
2537
2615
  # resp.sentiment_detection_job_properties_list[0].output_data_config.kms_key_id #=> String
2538
2616
  # resp.sentiment_detection_job_properties_list[0].language_code #=> String, one of "en", "es", "fr", "de", "it", "pt", "ar", "hi", "ja", "ko", "zh", "zh-TW"
@@ -2632,6 +2710,10 @@ module Aws::Comprehend
2632
2710
  # resp.topics_detection_job_properties_list[0].end_time #=> Time
2633
2711
  # resp.topics_detection_job_properties_list[0].input_data_config.s3_uri #=> String
2634
2712
  # resp.topics_detection_job_properties_list[0].input_data_config.input_format #=> String, one of "ONE_DOC_PER_FILE", "ONE_DOC_PER_LINE"
2713
+ # resp.topics_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_action #=> String, one of "TEXTRACT_DETECT_DOCUMENT_TEXT", "TEXTRACT_ANALYZE_DOCUMENT"
2714
+ # resp.topics_detection_job_properties_list[0].input_data_config.document_reader_config.document_read_mode #=> String, one of "SERVICE_DEFAULT", "FORCE_DOCUMENT_READ_ACTION"
2715
+ # resp.topics_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types #=> Array
2716
+ # resp.topics_detection_job_properties_list[0].input_data_config.document_reader_config.feature_types[0] #=> String, one of "TABLES", "FORMS"
2635
2717
  # resp.topics_detection_job_properties_list[0].output_data_config.s3_uri #=> String
2636
2718
  # resp.topics_detection_job_properties_list[0].output_data_config.kms_key_id #=> String
2637
2719
  # resp.topics_detection_job_properties_list[0].number_of_topics #=> Integer
@@ -2720,6 +2802,11 @@ module Aws::Comprehend
2720
2802
  # input_data_config: { # required
2721
2803
  # s3_uri: "S3Uri", # required
2722
2804
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
2805
+ # document_reader_config: {
2806
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
2807
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
2808
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
2809
+ # },
2723
2810
  # },
2724
2811
  # output_data_config: { # required
2725
2812
  # s3_uri: "S3Uri", # required
@@ -2823,6 +2910,11 @@ module Aws::Comprehend
2823
2910
  # input_data_config: { # required
2824
2911
  # s3_uri: "S3Uri", # required
2825
2912
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
2913
+ # document_reader_config: {
2914
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
2915
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
2916
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
2917
+ # },
2826
2918
  # },
2827
2919
  # output_data_config: { # required
2828
2920
  # s3_uri: "S3Uri", # required
@@ -2942,6 +3034,11 @@ module Aws::Comprehend
2942
3034
  # input_data_config: { # required
2943
3035
  # s3_uri: "S3Uri", # required
2944
3036
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
3037
+ # document_reader_config: {
3038
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
3039
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
3040
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
3041
+ # },
2945
3042
  # },
2946
3043
  # output_data_config: { # required
2947
3044
  # s3_uri: "S3Uri", # required
@@ -3028,6 +3125,11 @@ module Aws::Comprehend
3028
3125
  # input_data_config: { # required
3029
3126
  # s3_uri: "S3Uri", # required
3030
3127
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
3128
+ # document_reader_config: {
3129
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
3130
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
3131
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
3132
+ # },
3031
3133
  # },
3032
3134
  # output_data_config: { # required
3033
3135
  # s3_uri: "S3Uri", # required
@@ -3133,6 +3235,11 @@ module Aws::Comprehend
3133
3235
  # input_data_config: { # required
3134
3236
  # s3_uri: "S3Uri", # required
3135
3237
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
3238
+ # document_reader_config: {
3239
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
3240
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
3241
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
3242
+ # },
3136
3243
  # },
3137
3244
  # output_data_config: { # required
3138
3245
  # s3_uri: "S3Uri", # required
@@ -3227,6 +3334,11 @@ module Aws::Comprehend
3227
3334
  # input_data_config: { # required
3228
3335
  # s3_uri: "S3Uri", # required
3229
3336
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
3337
+ # document_reader_config: {
3338
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
3339
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
3340
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
3341
+ # },
3230
3342
  # },
3231
3343
  # output_data_config: { # required
3232
3344
  # s3_uri: "S3Uri", # required
@@ -3337,6 +3449,11 @@ module Aws::Comprehend
3337
3449
  # input_data_config: { # required
3338
3450
  # s3_uri: "S3Uri", # required
3339
3451
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
3452
+ # document_reader_config: {
3453
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
3454
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
3455
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
3456
+ # },
3340
3457
  # },
3341
3458
  # output_data_config: { # required
3342
3459
  # s3_uri: "S3Uri", # required
@@ -3447,6 +3564,11 @@ module Aws::Comprehend
3447
3564
  # input_data_config: { # required
3448
3565
  # s3_uri: "S3Uri", # required
3449
3566
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
3567
+ # document_reader_config: {
3568
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
3569
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
3570
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
3571
+ # },
3450
3572
  # },
3451
3573
  # output_data_config: { # required
3452
3574
  # s3_uri: "S3Uri", # required
@@ -3881,7 +4003,7 @@ module Aws::Comprehend
3881
4003
  params: params,
3882
4004
  config: config)
3883
4005
  context[:gem_name] = 'aws-sdk-comprehend'
3884
- context[:gem_version] = '1.50.0'
4006
+ context[:gem_version] = '1.51.0'
3885
4007
  Seahorse::Client::Request.new(handlers, context)
3886
4008
  end
3887
4009
 
@@ -16,6 +16,7 @@ module Aws::Comprehend
16
16
  AnyLengthString = Shapes::StringShape.new(name: 'AnyLengthString')
17
17
  AttributeNamesList = Shapes::ListShape.new(name: 'AttributeNamesList')
18
18
  AttributeNamesListItem = Shapes::StringShape.new(name: 'AttributeNamesListItem')
19
+ AugmentedManifestsDocumentTypeFormat = Shapes::StringShape.new(name: 'AugmentedManifestsDocumentTypeFormat')
19
20
  AugmentedManifestsListItem = Shapes::StructureShape.new(name: 'AugmentedManifestsListItem')
20
21
  BatchDetectDominantLanguageItemResult = Shapes::StructureShape.new(name: 'BatchDetectDominantLanguageItemResult')
21
22
  BatchDetectDominantLanguageRequest = Shapes::StructureShape.new(name: 'BatchDetectDominantLanguageRequest')
@@ -111,6 +112,10 @@ module Aws::Comprehend
111
112
  DocumentClassifierProperties = Shapes::StructureShape.new(name: 'DocumentClassifierProperties')
112
113
  DocumentClassifierPropertiesList = Shapes::ListShape.new(name: 'DocumentClassifierPropertiesList')
113
114
  DocumentLabel = Shapes::StructureShape.new(name: 'DocumentLabel')
115
+ DocumentReadAction = Shapes::StringShape.new(name: 'DocumentReadAction')
116
+ DocumentReadFeatureTypes = Shapes::StringShape.new(name: 'DocumentReadFeatureTypes')
117
+ DocumentReadMode = Shapes::StringShape.new(name: 'DocumentReadMode')
118
+ DocumentReaderConfig = Shapes::StructureShape.new(name: 'DocumentReaderConfig')
114
119
  DominantLanguage = Shapes::StructureShape.new(name: 'DominantLanguage')
115
120
  DominantLanguageDetectionJobFilter = Shapes::StructureShape.new(name: 'DominantLanguageDetectionJobFilter')
116
121
  DominantLanguageDetectionJobProperties = Shapes::StructureShape.new(name: 'DominantLanguageDetectionJobProperties')
@@ -192,6 +197,7 @@ module Aws::Comprehend
192
197
  ListOfDetectKeyPhrasesResult = Shapes::ListShape.new(name: 'ListOfDetectKeyPhrasesResult')
193
198
  ListOfDetectSentimentResult = Shapes::ListShape.new(name: 'ListOfDetectSentimentResult')
194
199
  ListOfDetectSyntaxResult = Shapes::ListShape.new(name: 'ListOfDetectSyntaxResult')
200
+ ListOfDocumentReadFeatureTypes = Shapes::ListShape.new(name: 'ListOfDocumentReadFeatureTypes')
195
201
  ListOfDominantLanguages = Shapes::ListShape.new(name: 'ListOfDominantLanguages')
196
202
  ListOfEntities = Shapes::ListShape.new(name: 'ListOfEntities')
197
203
  ListOfEntityLabels = Shapes::ListShape.new(name: 'ListOfEntityLabels')
@@ -300,6 +306,9 @@ module Aws::Comprehend
300
306
 
301
307
  AugmentedManifestsListItem.add_member(:s3_uri, Shapes::ShapeRef.new(shape: S3Uri, required: true, location_name: "S3Uri"))
302
308
  AugmentedManifestsListItem.add_member(:attribute_names, Shapes::ShapeRef.new(shape: AttributeNamesList, required: true, location_name: "AttributeNames"))
309
+ AugmentedManifestsListItem.add_member(:annotation_data_s3_uri, Shapes::ShapeRef.new(shape: S3Uri, location_name: "AnnotationDataS3Uri"))
310
+ AugmentedManifestsListItem.add_member(:source_documents_s3_uri, Shapes::ShapeRef.new(shape: S3Uri, location_name: "SourceDocumentsS3Uri"))
311
+ AugmentedManifestsListItem.add_member(:document_type, Shapes::ShapeRef.new(shape: AugmentedManifestsDocumentTypeFormat, location_name: "DocumentType"))
303
312
  AugmentedManifestsListItem.struct_class = Types::AugmentedManifestsListItem
304
313
 
305
314
  BatchDetectDominantLanguageItemResult.add_member(:index, Shapes::ShapeRef.new(shape: Integer, location_name: "Index"))
@@ -641,6 +650,11 @@ module Aws::Comprehend
641
650
  DocumentLabel.add_member(:score, Shapes::ShapeRef.new(shape: Float, location_name: "Score"))
642
651
  DocumentLabel.struct_class = Types::DocumentLabel
643
652
 
653
+ DocumentReaderConfig.add_member(:document_read_action, Shapes::ShapeRef.new(shape: DocumentReadAction, required: true, location_name: "DocumentReadAction"))
654
+ DocumentReaderConfig.add_member(:document_read_mode, Shapes::ShapeRef.new(shape: DocumentReadMode, location_name: "DocumentReadMode"))
655
+ DocumentReaderConfig.add_member(:feature_types, Shapes::ShapeRef.new(shape: ListOfDocumentReadFeatureTypes, location_name: "FeatureTypes"))
656
+ DocumentReaderConfig.struct_class = Types::DocumentReaderConfig
657
+
644
658
  DominantLanguage.add_member(:language_code, Shapes::ShapeRef.new(shape: String, location_name: "LanguageCode"))
645
659
  DominantLanguage.add_member(:score, Shapes::ShapeRef.new(shape: Float, location_name: "Score"))
646
660
  DominantLanguage.struct_class = Types::DominantLanguage
@@ -815,6 +829,7 @@ module Aws::Comprehend
815
829
 
816
830
  InputDataConfig.add_member(:s3_uri, Shapes::ShapeRef.new(shape: S3Uri, required: true, location_name: "S3Uri"))
817
831
  InputDataConfig.add_member(:input_format, Shapes::ShapeRef.new(shape: InputFormat, location_name: "InputFormat"))
832
+ InputDataConfig.add_member(:document_reader_config, Shapes::ShapeRef.new(shape: DocumentReaderConfig, location_name: "DocumentReaderConfig"))
818
833
  InputDataConfig.struct_class = Types::InputDataConfig
819
834
 
820
835
  InternalServerException.add_member(:message, Shapes::ShapeRef.new(shape: String, location_name: "Message"))
@@ -945,6 +960,8 @@ module Aws::Comprehend
945
960
 
946
961
  ListOfDetectSyntaxResult.member = Shapes::ShapeRef.new(shape: BatchDetectSyntaxItemResult)
947
962
 
963
+ ListOfDocumentReadFeatureTypes.member = Shapes::ShapeRef.new(shape: DocumentReadFeatureTypes)
964
+
948
965
  ListOfDominantLanguages.member = Shapes::ShapeRef.new(shape: DominantLanguage)
949
966
 
950
967
  ListOfEntities.member = Shapes::ShapeRef.new(shape: Entity)
@@ -20,6 +20,9 @@ module Aws::Comprehend
20
20
  # {
21
21
  # s3_uri: "S3Uri", # required
22
22
  # attribute_names: ["AttributeNamesListItem"], # required
23
+ # annotation_data_s3_uri: "S3Uri",
24
+ # source_documents_s3_uri: "S3Uri",
25
+ # document_type: "PLAIN_TEXT_DOCUMENT", # accepts PLAIN_TEXT_DOCUMENT, SEMI_STRUCTURED_DOCUMENT
23
26
  # }
24
27
  #
25
28
  # @!attribute [rw] s3_uri
@@ -42,11 +45,38 @@ module Aws::Comprehend
42
45
  # job.
43
46
  # @return [Array<String>]
44
47
  #
48
+ # @!attribute [rw] annotation_data_s3_uri
49
+ # The S3 prefix to the annotation files that are referred in the
50
+ # augmented manifest file.
51
+ # @return [String]
52
+ #
53
+ # @!attribute [rw] source_documents_s3_uri
54
+ # The S3 prefix to the source files (PDFs) that are referred to in the
55
+ # augmented manifest file.
56
+ # @return [String]
57
+ #
58
+ # @!attribute [rw] document_type
59
+ # The type of augmented manifest. PlainTextDocument or
60
+ # SemiStructuredDocument. If you don't specify, the default is
61
+ # PlainTextDocument.
62
+ #
63
+ # * `PLAIN_TEXT_DOCUMENT` A document type that represents any unicode
64
+ # text that is encoded in UTF-8.
65
+ #
66
+ # * `SEMI_STRUCTURED_DOCUMENT` A document type with positional and
67
+ # structural context, like a PDF. For training with Amazon
68
+ # Comprehend, only PDFs are supported. For inference, Amazon
69
+ # Comprehend support PDFs, DOCX and TXT.
70
+ # @return [String]
71
+ #
45
72
  # @see http://docs.aws.amazon.com/goto/WebAPI/comprehend-2017-11-27/AugmentedManifestsListItem AWS API Documentation
46
73
  #
47
74
  class AugmentedManifestsListItem < Struct.new(
48
75
  :s3_uri,
49
- :attribute_names)
76
+ :attribute_names,
77
+ :annotation_data_s3_uri,
78
+ :source_documents_s3_uri,
79
+ :document_type)
50
80
  SENSITIVE = []
51
81
  include Aws::Structure
52
82
  end
@@ -692,6 +722,9 @@ module Aws::Comprehend
692
722
  # {
693
723
  # s3_uri: "S3Uri", # required
694
724
  # attribute_names: ["AttributeNamesListItem"], # required
725
+ # annotation_data_s3_uri: "S3Uri",
726
+ # source_documents_s3_uri: "S3Uri",
727
+ # document_type: "PLAIN_TEXT_DOCUMENT", # accepts PLAIN_TEXT_DOCUMENT, SEMI_STRUCTURED_DOCUMENT
695
728
  # },
696
729
  # ],
697
730
  # },
@@ -939,6 +972,9 @@ module Aws::Comprehend
939
972
  # {
940
973
  # s3_uri: "S3Uri", # required
941
974
  # attribute_names: ["AttributeNamesListItem"], # required
975
+ # annotation_data_s3_uri: "S3Uri",
976
+ # source_documents_s3_uri: "S3Uri",
977
+ # document_type: "PLAIN_TEXT_DOCUMENT", # accepts PLAIN_TEXT_DOCUMENT, SEMI_STRUCTURED_DOCUMENT
942
978
  # },
943
979
  # ],
944
980
  # },
@@ -1996,6 +2032,9 @@ module Aws::Comprehend
1996
2032
  # {
1997
2033
  # s3_uri: "S3Uri", # required
1998
2034
  # attribute_names: ["AttributeNamesListItem"], # required
2035
+ # annotation_data_s3_uri: "S3Uri",
2036
+ # source_documents_s3_uri: "S3Uri",
2037
+ # document_type: "PLAIN_TEXT_DOCUMENT", # accepts PLAIN_TEXT_DOCUMENT, SEMI_STRUCTURED_DOCUMENT
1999
2038
  # },
2000
2039
  # ],
2001
2040
  # }
@@ -2264,6 +2303,52 @@ module Aws::Comprehend
2264
2303
  include Aws::Structure
2265
2304
  end
2266
2305
 
2306
+ # The input properties for a topic detection job.
2307
+ #
2308
+ # @note When making an API call, you may pass DocumentReaderConfig
2309
+ # data as a hash:
2310
+ #
2311
+ # {
2312
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
2313
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
2314
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
2315
+ # }
2316
+ #
2317
+ # @!attribute [rw] document_read_action
2318
+ # This enum field will start with two values which will apply to PDFs:
2319
+ #
2320
+ # * `TEXTRACT_DETECT_DOCUMENT_TEXT` - The service calls
2321
+ # DetectDocumentText for PDF documents per page.
2322
+ #
2323
+ # * `TEXTRACT_ANALYZE_DOCUMENT` - The service calls AnalyzeDocument
2324
+ # for PDF documents per page.
2325
+ # @return [String]
2326
+ #
2327
+ # @!attribute [rw] document_read_mode
2328
+ # This enum field provides two values:
2329
+ #
2330
+ # * `SERVICE_DEFAULT` - use service defaults for Document reading. For
2331
+ # Digital PDF it would mean using an internal parser instead of
2332
+ # Textract APIs
2333
+ #
2334
+ # * `FORCE_DOCUMENT_READ_ACTION` - Always use specified action for
2335
+ # DocumentReadAction, including Digital PDF.
2336
+ # @return [String]
2337
+ #
2338
+ # @!attribute [rw] feature_types
2339
+ # Specifies how the text in an input file should be processed:
2340
+ # @return [Array<String>]
2341
+ #
2342
+ # @see http://docs.aws.amazon.com/goto/WebAPI/comprehend-2017-11-27/DocumentReaderConfig AWS API Documentation
2343
+ #
2344
+ class DocumentReaderConfig < Struct.new(
2345
+ :document_read_action,
2346
+ :document_read_mode,
2347
+ :feature_types)
2348
+ SENSITIVE = []
2349
+ include Aws::Structure
2350
+ end
2351
+
2267
2352
  # Returns the code for the dominant language in the input text and the
2268
2353
  # level of confidence that Amazon Comprehend has in the accuracy of the
2269
2354
  # detection.
@@ -2931,6 +3016,9 @@ module Aws::Comprehend
2931
3016
  # {
2932
3017
  # s3_uri: "S3Uri", # required
2933
3018
  # attribute_names: ["AttributeNamesListItem"], # required
3019
+ # annotation_data_s3_uri: "S3Uri",
3020
+ # source_documents_s3_uri: "S3Uri",
3021
+ # document_type: "PLAIN_TEXT_DOCUMENT", # accepts PLAIN_TEXT_DOCUMENT, SEMI_STRUCTURED_DOCUMENT
2934
3022
  # },
2935
3023
  # ],
2936
3024
  # }
@@ -3368,7 +3456,7 @@ module Aws::Comprehend
3368
3456
  include Aws::Structure
3369
3457
  end
3370
3458
 
3371
- # The input properties for a topic detection job.
3459
+ # The input properties for an inference job.
3372
3460
  #
3373
3461
  # @note When making an API call, you may pass InputDataConfig
3374
3462
  # data as a hash:
@@ -3376,6 +3464,11 @@ module Aws::Comprehend
3376
3464
  # {
3377
3465
  # s3_uri: "S3Uri", # required
3378
3466
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
3467
+ # document_reader_config: {
3468
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
3469
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
3470
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
3471
+ # },
3379
3472
  # }
3380
3473
  #
3381
3474
  # @!attribute [rw] s3_uri
@@ -3402,11 +3495,21 @@ module Aws::Comprehend
3402
3495
  # documents, such as text messages.
3403
3496
  # @return [String]
3404
3497
  #
3498
+ # @!attribute [rw] document_reader_config
3499
+ # The document reader config field applies only for InputDataConfig of
3500
+ # StartEntitiesDetectionJob.
3501
+ #
3502
+ # Use DocumentReaderConfig to provide specifications about how you
3503
+ # want your inference documents read. Currently it applies for PDF
3504
+ # documents in StartEntitiesDetectionJob custom inference.
3505
+ # @return [Types::DocumentReaderConfig]
3506
+ #
3405
3507
  # @see http://docs.aws.amazon.com/goto/WebAPI/comprehend-2017-11-27/InputDataConfig AWS API Documentation
3406
3508
  #
3407
3509
  class InputDataConfig < Struct.new(
3408
3510
  :s3_uri,
3409
- :input_format)
3511
+ :input_format,
3512
+ :document_reader_config)
3410
3513
  SENSITIVE = []
3411
3514
  include Aws::Structure
3412
3515
  end
@@ -4877,6 +4980,11 @@ module Aws::Comprehend
4877
4980
  # input_data_config: { # required
4878
4981
  # s3_uri: "S3Uri", # required
4879
4982
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
4983
+ # document_reader_config: {
4984
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
4985
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
4986
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
4987
+ # },
4880
4988
  # },
4881
4989
  # output_data_config: { # required
4882
4990
  # s3_uri: "S3Uri", # required
@@ -5026,6 +5134,11 @@ module Aws::Comprehend
5026
5134
  # input_data_config: { # required
5027
5135
  # s3_uri: "S3Uri", # required
5028
5136
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
5137
+ # document_reader_config: {
5138
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
5139
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
5140
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
5141
+ # },
5029
5142
  # },
5030
5143
  # output_data_config: { # required
5031
5144
  # s3_uri: "S3Uri", # required
@@ -5173,6 +5286,11 @@ module Aws::Comprehend
5173
5286
  # input_data_config: { # required
5174
5287
  # s3_uri: "S3Uri", # required
5175
5288
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
5289
+ # document_reader_config: {
5290
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
5291
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
5292
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
5293
+ # },
5176
5294
  # },
5177
5295
  # output_data_config: { # required
5178
5296
  # s3_uri: "S3Uri", # required
@@ -5342,6 +5460,11 @@ module Aws::Comprehend
5342
5460
  # input_data_config: { # required
5343
5461
  # s3_uri: "S3Uri", # required
5344
5462
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
5463
+ # document_reader_config: {
5464
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
5465
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
5466
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
5467
+ # },
5345
5468
  # },
5346
5469
  # output_data_config: { # required
5347
5470
  # s3_uri: "S3Uri", # required
@@ -5455,6 +5578,11 @@ module Aws::Comprehend
5455
5578
  # input_data_config: { # required
5456
5579
  # s3_uri: "S3Uri", # required
5457
5580
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
5581
+ # document_reader_config: {
5582
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
5583
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
5584
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
5585
+ # },
5458
5586
  # },
5459
5587
  # output_data_config: { # required
5460
5588
  # s3_uri: "S3Uri", # required
@@ -5609,6 +5737,11 @@ module Aws::Comprehend
5609
5737
  # input_data_config: { # required
5610
5738
  # s3_uri: "S3Uri", # required
5611
5739
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
5740
+ # document_reader_config: {
5741
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
5742
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
5743
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
5744
+ # },
5612
5745
  # },
5613
5746
  # output_data_config: { # required
5614
5747
  # s3_uri: "S3Uri", # required
@@ -5737,6 +5870,11 @@ module Aws::Comprehend
5737
5870
  # input_data_config: { # required
5738
5871
  # s3_uri: "S3Uri", # required
5739
5872
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
5873
+ # document_reader_config: {
5874
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
5875
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
5876
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
5877
+ # },
5740
5878
  # },
5741
5879
  # output_data_config: { # required
5742
5880
  # s3_uri: "S3Uri", # required
@@ -5891,6 +6029,11 @@ module Aws::Comprehend
5891
6029
  # input_data_config: { # required
5892
6030
  # s3_uri: "S3Uri", # required
5893
6031
  # input_format: "ONE_DOC_PER_FILE", # accepts ONE_DOC_PER_FILE, ONE_DOC_PER_LINE
6032
+ # document_reader_config: {
6033
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
6034
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
6035
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
6036
+ # },
5894
6037
  # },
5895
6038
  # output_data_config: { # required
5896
6039
  # s3_uri: "S3Uri", # required
@@ -48,6 +48,6 @@ require_relative 'aws-sdk-comprehend/customizations'
48
48
  # @!group service
49
49
  module Aws::Comprehend
50
50
 
51
- GEM_VERSION = '1.50.0'
51
+ GEM_VERSION = '1.51.0'
52
52
 
53
53
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aws-sdk-comprehend
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.50.0
4
+ version: 1.51.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Amazon Web Services
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-09-01 00:00:00.000000000 Z
11
+ date: 2021-09-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-core