aws-sdk-comprehend 1.63.0 → 1.64.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d212d21125ecdc6ca982ffe33cd1d5a15def0f2dde020e50d72933597899a1c6
4
- data.tar.gz: e62eef5a460d224bd305d092f13240bd797fb7b8dcc51f7324adcd84b0badcee
3
+ metadata.gz: 507fc32d925b872b49c92b211c8805dbccf93cc7ffb8a69694186e4d5a9f95e3
4
+ data.tar.gz: ae354adc275dc7f59c4ebafa47f30e8f1ebe52991fec2db70986a4e1d81da4cd
5
5
  SHA512:
6
- metadata.gz: ef957369421573536c7594b798e30d7ca24ca6c6508456646487097c0c631703f4ef69faa6dbc7fde543225028a9472f48bf005bf652222e2faf41b744fba7ce
7
- data.tar.gz: 1a85f0719dfca01cbb6a7a8df95e48b8ebe71068bee62031e317f045a55c7d7a7c148b66a52b6007cc1ed10a8df979bf104c102e4944edebb84399e23e9b73e6
6
+ metadata.gz: 3bb3dfded4676a82b0120fa5999476004be7f5ab7b32f0f0c398e809bf6506c902061900d48edc3d4eba32e5d6321dcc0c1057a4f9a352a03fd0055871ed692b
7
+ data.tar.gz: 554775175c7e435e8872a4feab3d1d3f8a07df6ec79fd9b2379acf65b07c99979c5fcd305876915aa56336df28ad6b572771da7320eb727b5dd1f3427cdcf927
data/CHANGELOG.md CHANGED
@@ -1,6 +1,11 @@
1
1
  Unreleased Changes
2
2
  ------------------
3
3
 
4
+ 1.64.0 (2022-12-01)
5
+ ------------------
6
+
7
+ * Feature - Comprehend now supports semi-structured documents (such as PDF files or image files) as inputs for custom analysis using the synchronous APIs (ClassifyDocument and DetectEntities).
8
+
4
9
  1.63.0 (2022-10-25)
5
10
  ------------------
6
11
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.63.0
1
+ 1.64.0
@@ -464,6 +464,14 @@ module Aws::Comprehend
464
464
  # resp.result_list[0].entities[0].text #=> String
465
465
  # resp.result_list[0].entities[0].begin_offset #=> Integer
466
466
  # resp.result_list[0].entities[0].end_offset #=> Integer
467
+ # resp.result_list[0].entities[0].block_references #=> Array
468
+ # resp.result_list[0].entities[0].block_references[0].block_id #=> String
469
+ # resp.result_list[0].entities[0].block_references[0].begin_offset #=> Integer
470
+ # resp.result_list[0].entities[0].block_references[0].end_offset #=> Integer
471
+ # resp.result_list[0].entities[0].block_references[0].child_blocks #=> Array
472
+ # resp.result_list[0].entities[0].block_references[0].child_blocks[0].child_block_id #=> String
473
+ # resp.result_list[0].entities[0].block_references[0].child_blocks[0].begin_offset #=> Integer
474
+ # resp.result_list[0].entities[0].block_references[0].child_blocks[0].end_offset #=> Integer
467
475
  # resp.error_list #=> Array
468
476
  # resp.error_list[0].index #=> Integer
469
477
  # resp.error_list[0].error_code #=> String
@@ -705,8 +713,25 @@ module Aws::Comprehend
705
713
  # document in real-time, using a previously created and trained custom
706
714
  # model and an endpoint.
707
715
  #
708
- # @option params [required, String] :text
709
- # The document text to be analyzed.
716
+ # You can input plain text or you can upload a single-page input
717
+ # document (text, PDF, Word, or image).
718
+ #
719
+ # If the system detects errors while processing a page in the input
720
+ # document, the API response includes an entry in `Errors` that
721
+ # describes the errors.
722
+ #
723
+ # If the system detects a document-level error in your input document,
724
+ # the API returns an `InvalidRequestException` error response. For
725
+ # details about this exception, see [ Errors in semi-structured
726
+ # documents][1] in the Comprehend Developer Guide.
727
+ #
728
+ #
729
+ #
730
+ # [1]: https://docs.aws.amazon.com/comprehend/latest/dg/idp-inputs-sync-err.html
731
+ #
732
+ # @option params [String] :text
733
+ # The document text to be analyzed. If you enter text using this
734
+ # parameter, do not use the `Bytes` parameter.
710
735
  #
711
736
  # @option params [required, String] :endpoint_arn
712
737
  # The Amazon Resource Number (ARN) of the endpoint. For information
@@ -716,16 +741,48 @@ module Aws::Comprehend
716
741
  #
717
742
  # [1]: https://docs.aws.amazon.com/comprehend/latest/dg/manage-endpoints.html
718
743
  #
744
+ # @option params [String, StringIO, File] :bytes
745
+ # Use the `Bytes` parameter to input a text, PDF, Word or image file.
746
+ # You can also use the `Bytes` parameter to input an Amazon Textract
747
+ # `DetectDocumentText` or `AnalyzeDocument` output file.
748
+ #
749
+ # Provide the input document as a sequence of base64-encoded bytes. If
750
+ # your code uses an Amazon Web Services SDK to classify documents, the
751
+ # SDK may encode the document file bytes for you.
752
+ #
753
+ # The maximum length of this field depends on the input document type.
754
+ # For details, see [ Inputs for real-time custom analysis][1] in the
755
+ # Comprehend Developer Guide.
756
+ #
757
+ # If you use the `Bytes` parameter, do not use the `Text` parameter.
758
+ #
759
+ #
760
+ #
761
+ # [1]: https://docs.aws.amazon.com/comprehend/latest/dg/idp-inputs-sync.html
762
+ #
763
+ # @option params [Types::DocumentReaderConfig] :document_reader_config
764
+ # Provides configuration parameters to override the default actions for
765
+ # extracting text from PDF documents and image files.
766
+ #
719
767
  # @return [Types::ClassifyDocumentResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
720
768
  #
721
769
  # * {Types::ClassifyDocumentResponse#classes #classes} => Array<Types::DocumentClass>
722
770
  # * {Types::ClassifyDocumentResponse#labels #labels} => Array<Types::DocumentLabel>
771
+ # * {Types::ClassifyDocumentResponse#document_metadata #document_metadata} => Types::DocumentMetadata
772
+ # * {Types::ClassifyDocumentResponse#document_type #document_type} => Array<Types::DocumentTypeListItem>
773
+ # * {Types::ClassifyDocumentResponse#errors #errors} => Array<Types::ErrorsListItem>
723
774
  #
724
775
  # @example Request syntax with placeholder values
725
776
  #
726
777
  # resp = client.classify_document({
727
- # text: "CustomerInputString", # required
778
+ # text: "CustomerInputString",
728
779
  # endpoint_arn: "DocumentClassifierEndpointArn", # required
780
+ # bytes: "data",
781
+ # document_reader_config: {
782
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
783
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
784
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
785
+ # },
729
786
  # })
730
787
  #
731
788
  # @example Response structure
@@ -733,9 +790,22 @@ module Aws::Comprehend
733
790
  # resp.classes #=> Array
734
791
  # resp.classes[0].name #=> String
735
792
  # resp.classes[0].score #=> Float
793
+ # resp.classes[0].page #=> Integer
736
794
  # resp.labels #=> Array
737
795
  # resp.labels[0].name #=> String
738
796
  # resp.labels[0].score #=> Float
797
+ # resp.labels[0].page #=> Integer
798
+ # resp.document_metadata.pages #=> Integer
799
+ # resp.document_metadata.extracted_characters #=> Array
800
+ # resp.document_metadata.extracted_characters[0].page #=> Integer
801
+ # resp.document_metadata.extracted_characters[0].count #=> Integer
802
+ # resp.document_type #=> Array
803
+ # resp.document_type[0].page #=> Integer
804
+ # resp.document_type[0].type #=> String, one of "NATIVE_PDF", "SCANNED_PDF", "MS_WORD", "IMAGE", "PLAIN_TEXT", "TEXTRACT_DETECT_DOCUMENT_TEXT_JSON", "TEXTRACT_ANALYZE_DOCUMENT_JSON"
805
+ # resp.errors #=> Array
806
+ # resp.errors[0].page #=> Integer
807
+ # resp.errors[0].error_code #=> String, one of "TEXTRACT_BAD_PAGE", "TEXTRACT_PROVISIONED_THROUGHPUT_EXCEEDED", "PAGE_CHARACTERS_EXCEEDED", "PAGE_SIZE_EXCEEDED", "INTERNAL_SERVER_ERROR"
808
+ # resp.errors[0].error_message #=> String
739
809
  #
740
810
  # @see http://docs.aws.amazon.com/goto/WebAPI/comprehend-2017-11-27/ClassifyDocument AWS API Documentation
741
811
  #
@@ -1067,10 +1137,11 @@ module Aws::Comprehend
1067
1137
  # not need to pass this option.**
1068
1138
  #
1069
1139
  # @option params [required, String] :language_code
1070
- # You can specify any of the following languages supported by Amazon
1071
- # Comprehend: English ("en"), Spanish ("es"), French ("fr"),
1072
- # Italian ("it"), German ("de"), or Portuguese ("pt"). All
1073
- # documents must be in the same language.
1140
+ # You can specify any of the following languages: English ("en"),
1141
+ # Spanish ("es"), French ("fr"), Italian ("it"), German ("de"),
1142
+ # or Portuguese ("pt"). If you plan to use this entity recognizer with
1143
+ # PDF, Word, or image input files, you must specify English as the
1144
+ # language. All training documents must be in the same language.
1074
1145
  #
1075
1146
  # @option params [String] :volume_kms_key_id
1076
1147
  # ID for the AWS Key Management Service (KMS) key that Amazon Comprehend
@@ -1802,7 +1873,8 @@ module Aws::Comprehend
1802
1873
  # custom model, including the JSON body of the policy.
1803
1874
  #
1804
1875
  # @option params [required, String] :resource_arn
1805
- # The Amazon Resource Name (ARN) of the policy to describe.
1876
+ # The Amazon Resource Name (ARN) of the custom model version that has
1877
+ # the resource policy.
1806
1878
  #
1807
1879
  # @return [Types::DescribeResourcePolicyResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
1808
1880
  #
@@ -2022,25 +2094,44 @@ module Aws::Comprehend
2022
2094
  req.send_request(options)
2023
2095
  end
2024
2096
 
2025
- # Inspects text for named entities, and returns information about them.
2026
- # For more information, about named entities, see [Entities][1] in the
2027
- # Comprehend Developer Guide.
2097
+ # Detects named entities in input text when you use the pre-trained
2098
+ # model. Detects custom entities if you have a custom entity recognition
2099
+ # model.
2100
+ #
2101
+ # When detecting named entities using the pre-trained model, use plain
2102
+ # text as the input. For more information about named entities, see
2103
+ # [Entities][1] in the Comprehend Developer Guide.
2104
+ #
2105
+ # When you use a custom entity recognition model, you can input plain
2106
+ # text or you can upload a single-page input document (text, PDF, Word,
2107
+ # or image).
2108
+ #
2109
+ # If the system detects errors while processing a page in the input
2110
+ # document, the API response includes an entry in `Errors` for each
2111
+ # error.
2112
+ #
2113
+ # If the system detects a document-level error in your input document,
2114
+ # the API returns an `InvalidRequestException` error response. For
2115
+ # details about this exception, see [ Errors in semi-structured
2116
+ # documents][2] in the Comprehend Developer Guide.
2028
2117
  #
2029
2118
  #
2030
2119
  #
2031
2120
  # [1]: https://docs.aws.amazon.com/comprehend/latest/dg/how-entities.html
2121
+ # [2]: https://docs.aws.amazon.com/comprehend/latest/dg/idp-inputs-sync-err.html
2032
2122
  #
2033
- # @option params [required, String] :text
2034
- # A UTF-8 text string. The maximum string size is 100 KB.
2123
+ # @option params [String] :text
2124
+ # A UTF-8 text string. The maximum string size is 100 KB. If you enter
2125
+ # text using this parameter, do not use the `Bytes` parameter.
2035
2126
  #
2036
2127
  # @option params [String] :language_code
2037
2128
  # The language of the input documents. You can specify any of the
2038
- # primary languages supported by Amazon Comprehend. All documents must
2039
- # be in the same language.
2129
+ # primary languages supported by Amazon Comprehend. If your request
2130
+ # includes the endpoint for a custom entity recognition model, Amazon
2131
+ # Comprehend uses the language of your custom model, and it ignores any
2132
+ # language code that you specify here.
2040
2133
  #
2041
- # If your request includes the endpoint for a custom entity recognition
2042
- # model, Amazon Comprehend uses the language of your custom model, and
2043
- # it ignores any language code that you specify here.
2134
+ # All input documents must be in the same language.
2044
2135
  #
2045
2136
  # @option params [String] :endpoint_arn
2046
2137
  # The Amazon Resource Name of an endpoint that is associated with a
@@ -2058,16 +2149,57 @@ module Aws::Comprehend
2058
2149
  #
2059
2150
  # [1]: https://docs.aws.amazon.com/comprehend/latest/dg/manage-endpoints.html
2060
2151
  #
2152
+ # @option params [String, StringIO, File] :bytes
2153
+ # This field applies only when you use a custom entity recognition model
2154
+ # that was trained with PDF annotations. For other cases, enter your
2155
+ # text input in the `Text` field.
2156
+ #
2157
+ # Use the `Bytes` parameter to input a text, PDF, Word or image file.
2158
+ # Using a plain-text file in the `Bytes` parameter is equivelent to
2159
+ # using the `Text` parameter (the `Entities` field in the response is
2160
+ # identical).
2161
+ #
2162
+ # You can also use the `Bytes` parameter to input an Amazon Textract
2163
+ # `DetectDocumentText` or `AnalyzeDocument` output file.
2164
+ #
2165
+ # Provide the input document as a sequence of base64-encoded bytes. If
2166
+ # your code uses an Amazon Web Services SDK to detect entities, the SDK
2167
+ # may encode the document file bytes for you.
2168
+ #
2169
+ # The maximum length of this field depends on the input document type.
2170
+ # For details, see [ Inputs for real-time custom analysis][1] in the
2171
+ # Comprehend Developer Guide.
2172
+ #
2173
+ # If you use the `Bytes` parameter, do not use the `Text` parameter.
2174
+ #
2175
+ #
2176
+ #
2177
+ # [1]: https://docs.aws.amazon.com/comprehend/latest/dg/idp-inputs-sync.html
2178
+ #
2179
+ # @option params [Types::DocumentReaderConfig] :document_reader_config
2180
+ # Provides configuration parameters to override the default actions for
2181
+ # extracting text from PDF documents and image files.
2182
+ #
2061
2183
  # @return [Types::DetectEntitiesResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
2062
2184
  #
2063
2185
  # * {Types::DetectEntitiesResponse#entities #entities} => Array<Types::Entity>
2186
+ # * {Types::DetectEntitiesResponse#document_metadata #document_metadata} => Types::DocumentMetadata
2187
+ # * {Types::DetectEntitiesResponse#document_type #document_type} => Array<Types::DocumentTypeListItem>
2188
+ # * {Types::DetectEntitiesResponse#blocks #blocks} => Array<Types::Block>
2189
+ # * {Types::DetectEntitiesResponse#errors #errors} => Array<Types::ErrorsListItem>
2064
2190
  #
2065
2191
  # @example Request syntax with placeholder values
2066
2192
  #
2067
2193
  # resp = client.detect_entities({
2068
- # text: "CustomerInputString", # required
2194
+ # text: "CustomerInputString",
2069
2195
  # language_code: "en", # accepts en, es, fr, de, it, pt, ar, hi, ja, ko, zh, zh-TW
2070
2196
  # endpoint_arn: "EntityRecognizerEndpointArn",
2197
+ # bytes: "data",
2198
+ # document_reader_config: {
2199
+ # document_read_action: "TEXTRACT_DETECT_DOCUMENT_TEXT", # required, accepts TEXTRACT_DETECT_DOCUMENT_TEXT, TEXTRACT_ANALYZE_DOCUMENT
2200
+ # document_read_mode: "SERVICE_DEFAULT", # accepts SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION
2201
+ # feature_types: ["TABLES"], # accepts TABLES, FORMS
2202
+ # },
2071
2203
  # })
2072
2204
  #
2073
2205
  # @example Response structure
@@ -2078,6 +2210,41 @@ module Aws::Comprehend
2078
2210
  # resp.entities[0].text #=> String
2079
2211
  # resp.entities[0].begin_offset #=> Integer
2080
2212
  # resp.entities[0].end_offset #=> Integer
2213
+ # resp.entities[0].block_references #=> Array
2214
+ # resp.entities[0].block_references[0].block_id #=> String
2215
+ # resp.entities[0].block_references[0].begin_offset #=> Integer
2216
+ # resp.entities[0].block_references[0].end_offset #=> Integer
2217
+ # resp.entities[0].block_references[0].child_blocks #=> Array
2218
+ # resp.entities[0].block_references[0].child_blocks[0].child_block_id #=> String
2219
+ # resp.entities[0].block_references[0].child_blocks[0].begin_offset #=> Integer
2220
+ # resp.entities[0].block_references[0].child_blocks[0].end_offset #=> Integer
2221
+ # resp.document_metadata.pages #=> Integer
2222
+ # resp.document_metadata.extracted_characters #=> Array
2223
+ # resp.document_metadata.extracted_characters[0].page #=> Integer
2224
+ # resp.document_metadata.extracted_characters[0].count #=> Integer
2225
+ # resp.document_type #=> Array
2226
+ # resp.document_type[0].page #=> Integer
2227
+ # resp.document_type[0].type #=> String, one of "NATIVE_PDF", "SCANNED_PDF", "MS_WORD", "IMAGE", "PLAIN_TEXT", "TEXTRACT_DETECT_DOCUMENT_TEXT_JSON", "TEXTRACT_ANALYZE_DOCUMENT_JSON"
2228
+ # resp.blocks #=> Array
2229
+ # resp.blocks[0].id #=> String
2230
+ # resp.blocks[0].block_type #=> String, one of "LINE", "WORD"
2231
+ # resp.blocks[0].text #=> String
2232
+ # resp.blocks[0].page #=> Integer
2233
+ # resp.blocks[0].geometry.bounding_box.height #=> Float
2234
+ # resp.blocks[0].geometry.bounding_box.left #=> Float
2235
+ # resp.blocks[0].geometry.bounding_box.top #=> Float
2236
+ # resp.blocks[0].geometry.bounding_box.width #=> Float
2237
+ # resp.blocks[0].geometry.polygon #=> Array
2238
+ # resp.blocks[0].geometry.polygon[0].x #=> Float
2239
+ # resp.blocks[0].geometry.polygon[0].y #=> Float
2240
+ # resp.blocks[0].relationships #=> Array
2241
+ # resp.blocks[0].relationships[0].ids #=> Array
2242
+ # resp.blocks[0].relationships[0].ids[0] #=> String
2243
+ # resp.blocks[0].relationships[0].type #=> String, one of "CHILD"
2244
+ # resp.errors #=> Array
2245
+ # resp.errors[0].page #=> Integer
2246
+ # resp.errors[0].error_code #=> String, one of "TEXTRACT_BAD_PAGE", "TEXTRACT_PROVISIONED_THROUGHPUT_EXCEEDED", "PAGE_CHARACTERS_EXCEEDED", "PAGE_SIZE_EXCEEDED", "INTERNAL_SERVER_ERROR"
2247
+ # resp.errors[0].error_message #=> String
2081
2248
  #
2082
2249
  # @see http://docs.aws.amazon.com/goto/WebAPI/comprehend-2017-11-27/DetectEntities AWS API Documentation
2083
2250
  #
@@ -2692,6 +2859,8 @@ module Aws::Comprehend
2692
2859
  # * {Types::ListEndpointsResponse#endpoint_properties_list #endpoint_properties_list} => Array<Types::EndpointProperties>
2693
2860
  # * {Types::ListEndpointsResponse#next_token #next_token} => String
2694
2861
  #
2862
+ # The returned {Seahorse::Client::Response response} is a pageable response and is Enumerable. For details on usage see {Aws::PageableResponse PageableResponse}.
2863
+ #
2695
2864
  # @example Request syntax with placeholder values
2696
2865
  #
2697
2866
  # resp = client.list_endpoints({
@@ -3098,6 +3267,8 @@ module Aws::Comprehend
3098
3267
  # * {Types::ListPiiEntitiesDetectionJobsResponse#pii_entities_detection_job_properties_list #pii_entities_detection_job_properties_list} => Array<Types::PiiEntitiesDetectionJobProperties>
3099
3268
  # * {Types::ListPiiEntitiesDetectionJobsResponse#next_token #next_token} => String
3100
3269
  #
3270
+ # The returned {Seahorse::Client::Response response} is a pageable response and is Enumerable. For details on usage see {Aws::PageableResponse PageableResponse}.
3271
+ #
3101
3272
  # @example Request syntax with placeholder values
3102
3273
  #
3103
3274
  # resp = client.list_pii_entities_detection_jobs({
@@ -4208,7 +4379,8 @@ module Aws::Comprehend
4208
4379
  # job.
4209
4380
  #
4210
4381
  # @option params [required, Types::InputDataConfig] :input_data_config
4211
- # The input properties for an inference job.
4382
+ # The input properties for an inference job. The document reader config
4383
+ # field applies only to non-text inputs for custom analysis.
4212
4384
  #
4213
4385
  # @option params [required, Types::OutputDataConfig] :output_data_config
4214
4386
  # Specifies where to send the output files.
@@ -4885,7 +5057,7 @@ module Aws::Comprehend
4885
5057
  params: params,
4886
5058
  config: config)
4887
5059
  context[:gem_name] = 'aws-sdk-comprehend'
4888
- context[:gem_version] = '1.63.0'
5060
+ context[:gem_version] = '1.64.0'
4889
5061
  Seahorse::Client::Request.new(handlers, context)
4890
5062
  end
4891
5063