aws-sdk-textract 1.7.0 → 1.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/aws-sdk-textract.rb +1 -1
- data/lib/aws-sdk-textract/client.rb +120 -69
- data/lib/aws-sdk-textract/client_api.rb +41 -0
- data/lib/aws-sdk-textract/errors.rb +26 -0
- data/lib/aws-sdk-textract/types.rb +280 -108
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a71006801113001e73f63228fe2b5d656ca109c
|
4
|
+
data.tar.gz: e30581e97885a5a4dce46db639aa58f73291766d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da014f69acfee4fa46bb6e23ee4c0988d8e7cbfc3529dfbee53a992d1baeaf90d1f18bd01a9afa31c75bbc592e0e3112bf673cd99c287a400de984c2ebabc871
|
7
|
+
data.tar.gz: 4e87f85c57a41e611c818f4afc5b36c73422c1d5617153ab3175742e2f2be4811500d6702b24522d3988c077488f5902653be37ca7b9bf02cb4c1aed06482088
|
data/lib/aws-sdk-textract.rb
CHANGED
@@ -116,6 +116,10 @@ module Aws::Textract
|
|
116
116
|
# Allows you to provide an identifier for this client which will be attached to
|
117
117
|
# all generated client side metrics. Defaults to an empty string.
|
118
118
|
#
|
119
|
+
# @option options [String] :client_side_monitoring_host ("127.0.0.1")
|
120
|
+
# Allows you to specify the DNS hostname or IPv4 or IPv6 address that the client
|
121
|
+
# side monitoring agent is running on, where client metrics will be published via UDP.
|
122
|
+
#
|
119
123
|
# @option options [Integer] :client_side_monitoring_port (31000)
|
120
124
|
# Required for publishing client metrics. The port that the client side monitoring
|
121
125
|
# agent is running on, where client metrics will be published via UDP.
|
@@ -264,27 +268,30 @@ module Aws::Textract
|
|
264
268
|
#
|
265
269
|
# The types of information returned are as follows:
|
266
270
|
#
|
267
|
-
# *
|
268
|
-
#
|
269
|
-
#
|
270
|
-
#
|
271
|
-
#
|
271
|
+
# * Form data (key-value pairs). The related information is returned in
|
272
|
+
# two Block objects, each of type `KEY_VALUE_SET`\: a KEY `Block`
|
273
|
+
# object and a VALUE `Block` object. For example, *Name: Ana Silva
|
274
|
+
# Carolina* contains a key and value. *Name:* is the key. *Ana Silva
|
275
|
+
# Carolina* is the value.
|
272
276
|
#
|
273
|
-
# * Table and table cell data. A TABLE Block object contains
|
274
|
-
# about a detected table. A CELL Block object is
|
275
|
-
# cell in a table.
|
277
|
+
# * Table and table cell data. A TABLE `Block` object contains
|
278
|
+
# information about a detected table. A CELL `Block` object is
|
279
|
+
# returned for each cell in a table.
|
276
280
|
#
|
277
|
-
# *
|
278
|
-
#
|
279
|
-
#
|
281
|
+
# * Lines and words of text. A LINE `Block` object contains one or more
|
282
|
+
# WORD `Block` objects. All lines and words that are detected in the
|
283
|
+
# document are returned (including text that doesn't have a
|
284
|
+
# relationship with the value of `FeatureTypes`).
|
280
285
|
#
|
281
|
-
#
|
282
|
-
#
|
286
|
+
# Selection elements such as check boxes and option buttons (radio
|
287
|
+
# buttons) can be detected in form data and in tables. A
|
288
|
+
# SELECTION\_ELEMENT `Block` object contains information about a
|
289
|
+
# selection element, including the selection status.
|
283
290
|
#
|
284
291
|
# You can choose which type of analysis to perform by specifying the
|
285
292
|
# `FeatureTypes` list.
|
286
293
|
#
|
287
|
-
# The output is returned in a list of `
|
294
|
+
# The output is returned in a list of `Block` objects.
|
288
295
|
#
|
289
296
|
# `AnalyzeDocument` is a synchronous operation. To analyze documents
|
290
297
|
# asynchronously, use StartDocumentAnalysis.
|
@@ -298,22 +305,30 @@ module Aws::Textract
|
|
298
305
|
# @option params [required, Types::Document] :document
|
299
306
|
# The input document as base64-encoded bytes or an Amazon S3 object. If
|
300
307
|
# you use the AWS CLI to call Amazon Textract operations, you can't
|
301
|
-
# pass image bytes. The document must be an image in
|
308
|
+
# pass image bytes. The document must be an image in JPEG or PNG format.
|
302
309
|
#
|
303
|
-
# If you
|
304
|
-
# need to base64-encode image bytes passed using the `Bytes`
|
310
|
+
# If you're using an AWS SDK to call Amazon Textract, you might not
|
311
|
+
# need to base64-encode image bytes that are passed using the `Bytes`
|
312
|
+
# field.
|
305
313
|
#
|
306
314
|
# @option params [required, Array<String>] :feature_types
|
307
315
|
# A list of the types of analysis to perform. Add TABLES to the list to
|
308
|
-
# return information about the tables detected in the input
|
309
|
-
# Add FORMS to return detected
|
310
|
-
#
|
311
|
-
#
|
316
|
+
# return information about the tables that are detected in the input
|
317
|
+
# document. Add FORMS to return detected form data. To perform both
|
318
|
+
# types of analysis, add TABLES and FORMS to `FeatureTypes`. All lines
|
319
|
+
# and words detected in the document are included in the response
|
320
|
+
# (including text that isn't related to the value of `FeatureTypes`).
|
321
|
+
#
|
322
|
+
# @option params [Types::HumanLoopConfig] :human_loop_config
|
323
|
+
# Sets the configuration for the human in the loop workflow for
|
324
|
+
# analyzing documents.
|
312
325
|
#
|
313
326
|
# @return [Types::AnalyzeDocumentResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
|
314
327
|
#
|
315
328
|
# * {Types::AnalyzeDocumentResponse#document_metadata #document_metadata} => Types::DocumentMetadata
|
316
329
|
# * {Types::AnalyzeDocumentResponse#blocks #blocks} => Array<Types::Block>
|
330
|
+
# * {Types::AnalyzeDocumentResponse#human_loop_activation_output #human_loop_activation_output} => Types::HumanLoopActivationOutput
|
331
|
+
# * {Types::AnalyzeDocumentResponse#analyze_document_model_version #analyze_document_model_version} => String
|
317
332
|
#
|
318
333
|
# @example Request syntax with placeholder values
|
319
334
|
#
|
@@ -327,6 +342,13 @@ module Aws::Textract
|
|
327
342
|
# },
|
328
343
|
# },
|
329
344
|
# feature_types: ["TABLES"], # required, accepts TABLES, FORMS
|
345
|
+
# human_loop_config: {
|
346
|
+
# human_loop_name: "HumanLoopName", # required
|
347
|
+
# flow_definition_arn: "FlowDefinitionArn", # required
|
348
|
+
# data_attributes: {
|
349
|
+
# content_classifiers: ["FreeOfPersonallyIdentifiableInformation"], # accepts FreeOfPersonallyIdentifiableInformation, FreeOfAdultContent
|
350
|
+
# },
|
351
|
+
# },
|
330
352
|
# })
|
331
353
|
#
|
332
354
|
# @example Response structure
|
@@ -356,6 +378,11 @@ module Aws::Textract
|
|
356
378
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
357
379
|
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
358
380
|
# resp.blocks[0].page #=> Integer
|
381
|
+
# resp.human_loop_activation_output.human_loop_arn #=> String
|
382
|
+
# resp.human_loop_activation_output.human_loop_activation_reasons #=> Array
|
383
|
+
# resp.human_loop_activation_output.human_loop_activation_reasons[0] #=> String
|
384
|
+
# resp.human_loop_activation_output.human_loop_activation_conditions_evaluation_results #=> String
|
385
|
+
# resp.analyze_document_model_version #=> String
|
359
386
|
#
|
360
387
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/AnalyzeDocument AWS API Documentation
|
361
388
|
#
|
@@ -368,7 +395,7 @@ module Aws::Textract
|
|
368
395
|
|
369
396
|
# Detects text in the input document. Amazon Textract can detect lines
|
370
397
|
# of text and the words that make up a line of text. The input document
|
371
|
-
# must be an image in
|
398
|
+
# must be an image in JPEG or PNG format. `DetectDocumentText` returns
|
372
399
|
# the detected text in an array of Block objects.
|
373
400
|
#
|
374
401
|
# Each document page has as an associated `Block` of type PAGE. Each
|
@@ -389,15 +416,17 @@ module Aws::Textract
|
|
389
416
|
# @option params [required, Types::Document] :document
|
390
417
|
# The input document as base64-encoded bytes or an Amazon S3 object. If
|
391
418
|
# you use the AWS CLI to call Amazon Textract operations, you can't
|
392
|
-
# pass image bytes. The document must be an image in
|
419
|
+
# pass image bytes. The document must be an image in JPEG or PNG format.
|
393
420
|
#
|
394
|
-
# If you
|
395
|
-
# need to base64-encode image bytes passed using the `Bytes`
|
421
|
+
# If you're using an AWS SDK to call Amazon Textract, you might not
|
422
|
+
# need to base64-encode image bytes that are passed using the `Bytes`
|
423
|
+
# field.
|
396
424
|
#
|
397
425
|
# @return [Types::DetectDocumentTextResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
|
398
426
|
#
|
399
427
|
# * {Types::DetectDocumentTextResponse#document_metadata #document_metadata} => Types::DocumentMetadata
|
400
428
|
# * {Types::DetectDocumentTextResponse#blocks #blocks} => Array<Types::Block>
|
429
|
+
# * {Types::DetectDocumentTextResponse#detect_document_text_model_version #detect_document_text_model_version} => String
|
401
430
|
#
|
402
431
|
# @example Request syntax with placeholder values
|
403
432
|
#
|
@@ -439,6 +468,7 @@ module Aws::Textract
|
|
439
468
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
440
469
|
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
441
470
|
# resp.blocks[0].page #=> Integer
|
471
|
+
# resp.detect_document_text_model_version #=> String
|
442
472
|
#
|
443
473
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentText AWS API Documentation
|
444
474
|
#
|
@@ -465,30 +495,34 @@ module Aws::Textract
|
|
465
495
|
# `GetDocumentAnalysis` returns an array of Block objects. The following
|
466
496
|
# types of information are returned:
|
467
497
|
#
|
468
|
-
# *
|
469
|
-
#
|
470
|
-
#
|
471
|
-
#
|
472
|
-
#
|
498
|
+
# * Form data (key-value pairs). The related information is returned in
|
499
|
+
# two Block objects, each of type `KEY_VALUE_SET`\: a KEY `Block`
|
500
|
+
# object and a VALUE `Block` object. For example, *Name: Ana Silva
|
501
|
+
# Carolina* contains a key and value. *Name:* is the key. *Ana Silva
|
502
|
+
# Carolina* is the value.
|
473
503
|
#
|
474
|
-
# * Table and table cell data. A TABLE Block object contains
|
475
|
-
# about a detected table. A CELL Block object is
|
476
|
-
# cell in a table.
|
504
|
+
# * Table and table cell data. A TABLE `Block` object contains
|
505
|
+
# information about a detected table. A CELL `Block` object is
|
506
|
+
# returned for each cell in a table.
|
477
507
|
#
|
478
|
-
# *
|
479
|
-
#
|
480
|
-
#
|
508
|
+
# * Lines and words of text. A LINE `Block` object contains one or more
|
509
|
+
# WORD `Block` objects. All lines and words that are detected in the
|
510
|
+
# document are returned (including text that doesn't have a
|
511
|
+
# relationship with the value of the `StartDocumentAnalysis`
|
512
|
+
# `FeatureTypes` input parameter).
|
481
513
|
#
|
482
|
-
#
|
483
|
-
#
|
514
|
+
# Selection elements such as check boxes and option buttons (radio
|
515
|
+
# buttons) can be detected in form data and in tables. A
|
516
|
+
# SELECTION\_ELEMENT `Block` object contains information about a
|
517
|
+
# selection element, including the selection status.
|
484
518
|
#
|
485
|
-
# Use the `MaxResults` parameter to limit the number of blocks
|
486
|
-
# If there are more results than specified in `MaxResults`,
|
487
|
-
# `NextToken` in the operation response contains a
|
488
|
-
# getting the next set of results. To get the next
|
489
|
-
# `GetDocumentAnalysis`, and populate the
|
490
|
-
# with the token value that's returned
|
491
|
-
# `GetDocumentAnalysis`.
|
519
|
+
# Use the `MaxResults` parameter to limit the number of blocks that are
|
520
|
+
# returned. If there are more results than specified in `MaxResults`,
|
521
|
+
# the value of `NextToken` in the operation response contains a
|
522
|
+
# pagination token for getting the next set of results. To get the next
|
523
|
+
# page of results, call `GetDocumentAnalysis`, and populate the
|
524
|
+
# `NextToken` request parameter with the token value that's returned
|
525
|
+
# from the previous call to `GetDocumentAnalysis`.
|
492
526
|
#
|
493
527
|
# For more information, see [Document Text Analysis][1].
|
494
528
|
#
|
@@ -498,7 +532,8 @@ module Aws::Textract
|
|
498
532
|
#
|
499
533
|
# @option params [required, String] :job_id
|
500
534
|
# A unique identifier for the text-detection job. The `JobId` is
|
501
|
-
# returned from `StartDocumentAnalysis`.
|
535
|
+
# returned from `StartDocumentAnalysis`. A `JobId` value is only valid
|
536
|
+
# for 7 days.
|
502
537
|
#
|
503
538
|
# @option params [Integer] :max_results
|
504
539
|
# The maximum number of results to return per paginated call. The
|
@@ -520,6 +555,7 @@ module Aws::Textract
|
|
520
555
|
# * {Types::GetDocumentAnalysisResponse#blocks #blocks} => Array<Types::Block>
|
521
556
|
# * {Types::GetDocumentAnalysisResponse#warnings #warnings} => Array<Types::Warning>
|
522
557
|
# * {Types::GetDocumentAnalysisResponse#status_message #status_message} => String
|
558
|
+
# * {Types::GetDocumentAnalysisResponse#analyze_document_model_version #analyze_document_model_version} => String
|
523
559
|
#
|
524
560
|
# @example Request syntax with placeholder values
|
525
561
|
#
|
@@ -563,6 +599,7 @@ module Aws::Textract
|
|
563
599
|
# resp.warnings[0].pages #=> Array
|
564
600
|
# resp.warnings[0].pages[0] #=> Integer
|
565
601
|
# resp.status_message #=> String
|
602
|
+
# resp.analyze_document_model_version #=> String
|
566
603
|
#
|
567
604
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/GetDocumentAnalysis AWS API Documentation
|
568
605
|
#
|
@@ -612,7 +649,8 @@ module Aws::Textract
|
|
612
649
|
#
|
613
650
|
# @option params [required, String] :job_id
|
614
651
|
# A unique identifier for the text detection job. The `JobId` is
|
615
|
-
# returned from `StartDocumentTextDetection`.
|
652
|
+
# returned from `StartDocumentTextDetection`. A `JobId` value is only
|
653
|
+
# valid for 7 days.
|
616
654
|
#
|
617
655
|
# @option params [Integer] :max_results
|
618
656
|
# The maximum number of results to return per paginated call. The
|
@@ -634,6 +672,7 @@ module Aws::Textract
|
|
634
672
|
# * {Types::GetDocumentTextDetectionResponse#blocks #blocks} => Array<Types::Block>
|
635
673
|
# * {Types::GetDocumentTextDetectionResponse#warnings #warnings} => Array<Types::Warning>
|
636
674
|
# * {Types::GetDocumentTextDetectionResponse#status_message #status_message} => String
|
675
|
+
# * {Types::GetDocumentTextDetectionResponse#detect_document_text_model_version #detect_document_text_model_version} => String
|
637
676
|
#
|
638
677
|
# @example Request syntax with placeholder values
|
639
678
|
#
|
@@ -677,6 +716,7 @@ module Aws::Textract
|
|
677
716
|
# resp.warnings[0].pages #=> Array
|
678
717
|
# resp.warnings[0].pages[0] #=> Integer
|
679
718
|
# resp.status_message #=> String
|
719
|
+
# resp.detect_document_text_model_version #=> String
|
680
720
|
#
|
681
721
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/GetDocumentTextDetection AWS API Documentation
|
682
722
|
#
|
@@ -687,14 +727,14 @@ module Aws::Textract
|
|
687
727
|
req.send_request(options)
|
688
728
|
end
|
689
729
|
|
690
|
-
# Starts asynchronous analysis of an input document for
|
691
|
-
# between detected items such as key
|
692
|
-
# selection elements.
|
730
|
+
# Starts the asynchronous analysis of an input document for
|
731
|
+
# relationships between detected items such as key-value pairs, tables,
|
732
|
+
# and selection elements.
|
693
733
|
#
|
694
|
-
# `StartDocumentAnalysis` can analyze text in documents that are in
|
695
|
-
# PNG, and PDF format. The documents are stored in an Amazon S3
|
696
|
-
# Use DocumentLocation to specify the bucket name and file name
|
697
|
-
# document.
|
734
|
+
# `StartDocumentAnalysis` can analyze text in documents that are in
|
735
|
+
# JPEG, PNG, and PDF format. The documents are stored in an Amazon S3
|
736
|
+
# bucket. Use DocumentLocation to specify the bucket name and file name
|
737
|
+
# of the document.
|
698
738
|
#
|
699
739
|
# `StartDocumentAnalysis` returns a job identifier (`JobId`) that you
|
700
740
|
# use to get the results of the operation. When text analysis is
|
@@ -718,22 +758,27 @@ module Aws::Textract
|
|
718
758
|
# @option params [required, Array<String>] :feature_types
|
719
759
|
# A list of the types of analysis to perform. Add TABLES to the list to
|
720
760
|
# return information about the tables that are detected in the input
|
721
|
-
# document. Add FORMS to return detected
|
722
|
-
#
|
723
|
-
#
|
724
|
-
#
|
761
|
+
# document. Add FORMS to return detected form data. To perform both
|
762
|
+
# types of analysis, add TABLES and FORMS to `FeatureTypes`. All lines
|
763
|
+
# and words detected in the document are included in the response
|
764
|
+
# (including text that isn't related to the value of `FeatureTypes`).
|
725
765
|
#
|
726
766
|
# @option params [String] :client_request_token
|
727
767
|
# The idempotent token that you use to identify the start request. If
|
728
768
|
# you use the same token with multiple `StartDocumentAnalysis` requests,
|
729
769
|
# the same `JobId` is returned. Use `ClientRequestToken` to prevent the
|
730
|
-
# same job from being accidentally started more than once.
|
770
|
+
# same job from being accidentally started more than once. For more
|
771
|
+
# information, see [Calling Amazon Textract Asynchronous Operations][1].
|
772
|
+
#
|
773
|
+
#
|
774
|
+
#
|
775
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/api-async.html
|
731
776
|
#
|
732
777
|
# @option params [String] :job_tag
|
733
|
-
# An identifier you specify that's included in the completion
|
734
|
-
# notification
|
735
|
-
#
|
736
|
-
#
|
778
|
+
# An identifier that you specify that's included in the completion
|
779
|
+
# notification published to the Amazon SNS topic. For example, you can
|
780
|
+
# use `JobTag` to identify the type of document that the completion
|
781
|
+
# notification corresponds to (such as a tax form or a receipt).
|
737
782
|
#
|
738
783
|
# @option params [Types::NotificationChannel] :notification_channel
|
739
784
|
# The Amazon SNS topic ARN that you want Amazon Textract to publish the
|
@@ -780,7 +825,7 @@ module Aws::Textract
|
|
780
825
|
# text.
|
781
826
|
#
|
782
827
|
# `StartDocumentTextDetection` can analyze text in documents that are in
|
783
|
-
#
|
828
|
+
# JPEG, PNG, and PDF format. The documents are stored in an Amazon S3
|
784
829
|
# bucket. Use DocumentLocation to specify the bucket name and file name
|
785
830
|
# of the document.
|
786
831
|
#
|
@@ -808,12 +853,18 @@ module Aws::Textract
|
|
808
853
|
# you use the same token with multiple `StartDocumentTextDetection`
|
809
854
|
# requests, the same `JobId` is returned. Use `ClientRequestToken` to
|
810
855
|
# prevent the same job from being accidentally started more than once.
|
856
|
+
# For more information, see [Calling Amazon Textract Asynchronous
|
857
|
+
# Operations][1].
|
858
|
+
#
|
859
|
+
#
|
860
|
+
#
|
861
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/api-async.html
|
811
862
|
#
|
812
863
|
# @option params [String] :job_tag
|
813
|
-
# An identifier you specify that's included in the completion
|
814
|
-
# notification
|
815
|
-
#
|
816
|
-
#
|
864
|
+
# An identifier that you specify that's included in the completion
|
865
|
+
# notification published to the Amazon SNS topic. For example, you can
|
866
|
+
# use `JobTag` to identify the type of document that the completion
|
867
|
+
# notification corresponds to (such as a tax form or a receipt).
|
817
868
|
#
|
818
869
|
# @option params [Types::NotificationChannel] :notification_channel
|
819
870
|
# The Amazon SNS topic ARN that you want Amazon Textract to publish the
|
@@ -867,7 +918,7 @@ module Aws::Textract
|
|
867
918
|
params: params,
|
868
919
|
config: config)
|
869
920
|
context[:gem_name] = 'aws-sdk-textract'
|
870
|
-
context[:gem_version] = '1.
|
921
|
+
context[:gem_version] = '1.12.0'
|
871
922
|
Seahorse::Client::Request.new(handlers, context)
|
872
923
|
end
|
873
924
|
|
@@ -20,6 +20,8 @@ module Aws::Textract
|
|
20
20
|
BlockType = Shapes::StringShape.new(name: 'BlockType')
|
21
21
|
BoundingBox = Shapes::StructureShape.new(name: 'BoundingBox')
|
22
22
|
ClientRequestToken = Shapes::StringShape.new(name: 'ClientRequestToken')
|
23
|
+
ContentClassifier = Shapes::StringShape.new(name: 'ContentClassifier')
|
24
|
+
ContentClassifiers = Shapes::ListShape.new(name: 'ContentClassifiers')
|
23
25
|
DetectDocumentTextRequest = Shapes::StructureShape.new(name: 'DetectDocumentTextRequest')
|
24
26
|
DetectDocumentTextResponse = Shapes::StructureShape.new(name: 'DetectDocumentTextResponse')
|
25
27
|
Document = Shapes::StructureShape.new(name: 'Document')
|
@@ -32,11 +34,21 @@ module Aws::Textract
|
|
32
34
|
FeatureType = Shapes::StringShape.new(name: 'FeatureType')
|
33
35
|
FeatureTypes = Shapes::ListShape.new(name: 'FeatureTypes')
|
34
36
|
Float = Shapes::FloatShape.new(name: 'Float')
|
37
|
+
FlowDefinitionArn = Shapes::StringShape.new(name: 'FlowDefinitionArn')
|
35
38
|
Geometry = Shapes::StructureShape.new(name: 'Geometry')
|
36
39
|
GetDocumentAnalysisRequest = Shapes::StructureShape.new(name: 'GetDocumentAnalysisRequest')
|
37
40
|
GetDocumentAnalysisResponse = Shapes::StructureShape.new(name: 'GetDocumentAnalysisResponse')
|
38
41
|
GetDocumentTextDetectionRequest = Shapes::StructureShape.new(name: 'GetDocumentTextDetectionRequest')
|
39
42
|
GetDocumentTextDetectionResponse = Shapes::StructureShape.new(name: 'GetDocumentTextDetectionResponse')
|
43
|
+
HumanLoopActivationConditionsEvaluationResults = Shapes::StringShape.new(name: 'HumanLoopActivationConditionsEvaluationResults')
|
44
|
+
HumanLoopActivationOutput = Shapes::StructureShape.new(name: 'HumanLoopActivationOutput')
|
45
|
+
HumanLoopActivationReason = Shapes::StringShape.new(name: 'HumanLoopActivationReason')
|
46
|
+
HumanLoopActivationReasons = Shapes::ListShape.new(name: 'HumanLoopActivationReasons')
|
47
|
+
HumanLoopArn = Shapes::StringShape.new(name: 'HumanLoopArn')
|
48
|
+
HumanLoopConfig = Shapes::StructureShape.new(name: 'HumanLoopConfig')
|
49
|
+
HumanLoopDataAttributes = Shapes::StructureShape.new(name: 'HumanLoopDataAttributes')
|
50
|
+
HumanLoopName = Shapes::StringShape.new(name: 'HumanLoopName')
|
51
|
+
HumanLoopQuotaExceededException = Shapes::StructureShape.new(name: 'HumanLoopQuotaExceededException')
|
40
52
|
IdList = Shapes::ListShape.new(name: 'IdList')
|
41
53
|
IdempotentParameterMismatchException = Shapes::StructureShape.new(name: 'IdempotentParameterMismatchException')
|
42
54
|
ImageBlob = Shapes::BlobShape.new(name: 'ImageBlob')
|
@@ -81,10 +93,13 @@ module Aws::Textract
|
|
81
93
|
|
82
94
|
AnalyzeDocumentRequest.add_member(:document, Shapes::ShapeRef.new(shape: Document, required: true, location_name: "Document"))
|
83
95
|
AnalyzeDocumentRequest.add_member(:feature_types, Shapes::ShapeRef.new(shape: FeatureTypes, required: true, location_name: "FeatureTypes"))
|
96
|
+
AnalyzeDocumentRequest.add_member(:human_loop_config, Shapes::ShapeRef.new(shape: HumanLoopConfig, location_name: "HumanLoopConfig"))
|
84
97
|
AnalyzeDocumentRequest.struct_class = Types::AnalyzeDocumentRequest
|
85
98
|
|
86
99
|
AnalyzeDocumentResponse.add_member(:document_metadata, Shapes::ShapeRef.new(shape: DocumentMetadata, location_name: "DocumentMetadata"))
|
87
100
|
AnalyzeDocumentResponse.add_member(:blocks, Shapes::ShapeRef.new(shape: BlockList, location_name: "Blocks"))
|
101
|
+
AnalyzeDocumentResponse.add_member(:human_loop_activation_output, Shapes::ShapeRef.new(shape: HumanLoopActivationOutput, location_name: "HumanLoopActivationOutput"))
|
102
|
+
AnalyzeDocumentResponse.add_member(:analyze_document_model_version, Shapes::ShapeRef.new(shape: String, location_name: "AnalyzeDocumentModelVersion"))
|
88
103
|
AnalyzeDocumentResponse.struct_class = Types::AnalyzeDocumentResponse
|
89
104
|
|
90
105
|
Block.add_member(:block_type, Shapes::ShapeRef.new(shape: BlockType, location_name: "BlockType"))
|
@@ -110,11 +125,14 @@ module Aws::Textract
|
|
110
125
|
BoundingBox.add_member(:top, Shapes::ShapeRef.new(shape: Float, location_name: "Top"))
|
111
126
|
BoundingBox.struct_class = Types::BoundingBox
|
112
127
|
|
128
|
+
ContentClassifiers.member = Shapes::ShapeRef.new(shape: ContentClassifier)
|
129
|
+
|
113
130
|
DetectDocumentTextRequest.add_member(:document, Shapes::ShapeRef.new(shape: Document, required: true, location_name: "Document"))
|
114
131
|
DetectDocumentTextRequest.struct_class = Types::DetectDocumentTextRequest
|
115
132
|
|
116
133
|
DetectDocumentTextResponse.add_member(:document_metadata, Shapes::ShapeRef.new(shape: DocumentMetadata, location_name: "DocumentMetadata"))
|
117
134
|
DetectDocumentTextResponse.add_member(:blocks, Shapes::ShapeRef.new(shape: BlockList, location_name: "Blocks"))
|
135
|
+
DetectDocumentTextResponse.add_member(:detect_document_text_model_version, Shapes::ShapeRef.new(shape: String, location_name: "DetectDocumentTextModelVersion"))
|
118
136
|
DetectDocumentTextResponse.struct_class = Types::DetectDocumentTextResponse
|
119
137
|
|
120
138
|
Document.add_member(:bytes, Shapes::ShapeRef.new(shape: ImageBlob, location_name: "Bytes"))
|
@@ -146,6 +164,7 @@ module Aws::Textract
|
|
146
164
|
GetDocumentAnalysisResponse.add_member(:blocks, Shapes::ShapeRef.new(shape: BlockList, location_name: "Blocks"))
|
147
165
|
GetDocumentAnalysisResponse.add_member(:warnings, Shapes::ShapeRef.new(shape: Warnings, location_name: "Warnings"))
|
148
166
|
GetDocumentAnalysisResponse.add_member(:status_message, Shapes::ShapeRef.new(shape: StatusMessage, location_name: "StatusMessage"))
|
167
|
+
GetDocumentAnalysisResponse.add_member(:analyze_document_model_version, Shapes::ShapeRef.new(shape: String, location_name: "AnalyzeDocumentModelVersion"))
|
149
168
|
GetDocumentAnalysisResponse.struct_class = Types::GetDocumentAnalysisResponse
|
150
169
|
|
151
170
|
GetDocumentTextDetectionRequest.add_member(:job_id, Shapes::ShapeRef.new(shape: JobId, required: true, location_name: "JobId"))
|
@@ -159,8 +178,29 @@ module Aws::Textract
|
|
159
178
|
GetDocumentTextDetectionResponse.add_member(:blocks, Shapes::ShapeRef.new(shape: BlockList, location_name: "Blocks"))
|
160
179
|
GetDocumentTextDetectionResponse.add_member(:warnings, Shapes::ShapeRef.new(shape: Warnings, location_name: "Warnings"))
|
161
180
|
GetDocumentTextDetectionResponse.add_member(:status_message, Shapes::ShapeRef.new(shape: StatusMessage, location_name: "StatusMessage"))
|
181
|
+
GetDocumentTextDetectionResponse.add_member(:detect_document_text_model_version, Shapes::ShapeRef.new(shape: String, location_name: "DetectDocumentTextModelVersion"))
|
162
182
|
GetDocumentTextDetectionResponse.struct_class = Types::GetDocumentTextDetectionResponse
|
163
183
|
|
184
|
+
HumanLoopActivationOutput.add_member(:human_loop_arn, Shapes::ShapeRef.new(shape: HumanLoopArn, location_name: "HumanLoopArn"))
|
185
|
+
HumanLoopActivationOutput.add_member(:human_loop_activation_reasons, Shapes::ShapeRef.new(shape: HumanLoopActivationReasons, location_name: "HumanLoopActivationReasons"))
|
186
|
+
HumanLoopActivationOutput.add_member(:human_loop_activation_conditions_evaluation_results, Shapes::ShapeRef.new(shape: HumanLoopActivationConditionsEvaluationResults, location_name: "HumanLoopActivationConditionsEvaluationResults", metadata: {"jsonvalue"=>true}))
|
187
|
+
HumanLoopActivationOutput.struct_class = Types::HumanLoopActivationOutput
|
188
|
+
|
189
|
+
HumanLoopActivationReasons.member = Shapes::ShapeRef.new(shape: HumanLoopActivationReason)
|
190
|
+
|
191
|
+
HumanLoopConfig.add_member(:human_loop_name, Shapes::ShapeRef.new(shape: HumanLoopName, required: true, location_name: "HumanLoopName"))
|
192
|
+
HumanLoopConfig.add_member(:flow_definition_arn, Shapes::ShapeRef.new(shape: FlowDefinitionArn, required: true, location_name: "FlowDefinitionArn"))
|
193
|
+
HumanLoopConfig.add_member(:data_attributes, Shapes::ShapeRef.new(shape: HumanLoopDataAttributes, location_name: "DataAttributes"))
|
194
|
+
HumanLoopConfig.struct_class = Types::HumanLoopConfig
|
195
|
+
|
196
|
+
HumanLoopDataAttributes.add_member(:content_classifiers, Shapes::ShapeRef.new(shape: ContentClassifiers, location_name: "ContentClassifiers"))
|
197
|
+
HumanLoopDataAttributes.struct_class = Types::HumanLoopDataAttributes
|
198
|
+
|
199
|
+
HumanLoopQuotaExceededException.add_member(:resource_type, Shapes::ShapeRef.new(shape: String, location_name: "ResourceType"))
|
200
|
+
HumanLoopQuotaExceededException.add_member(:quota_code, Shapes::ShapeRef.new(shape: String, location_name: "QuotaCode"))
|
201
|
+
HumanLoopQuotaExceededException.add_member(:service_code, Shapes::ShapeRef.new(shape: String, location_name: "ServiceCode"))
|
202
|
+
HumanLoopQuotaExceededException.struct_class = Types::HumanLoopQuotaExceededException
|
203
|
+
|
164
204
|
IdList.member = Shapes::ShapeRef.new(shape: NonEmptyString)
|
165
205
|
|
166
206
|
NotificationChannel.add_member(:sns_topic_arn, Shapes::ShapeRef.new(shape: SNSTopicArn, required: true, location_name: "SNSTopicArn"))
|
@@ -244,6 +284,7 @@ module Aws::Textract
|
|
244
284
|
o.errors << Shapes::ShapeRef.new(shape: ProvisionedThroughputExceededException)
|
245
285
|
o.errors << Shapes::ShapeRef.new(shape: InternalServerError)
|
246
286
|
o.errors << Shapes::ShapeRef.new(shape: ThrottlingException)
|
287
|
+
o.errors << Shapes::ShapeRef.new(shape: HumanLoopQuotaExceededException)
|
247
288
|
end)
|
248
289
|
|
249
290
|
api.add_operation(:detect_document_text, Seahorse::Model::Operation.new.tap do |o|
|
@@ -10,5 +10,31 @@ module Aws::Textract
|
|
10
10
|
|
11
11
|
extend Aws::Errors::DynamicErrors
|
12
12
|
|
13
|
+
class HumanLoopQuotaExceededException < ServiceError
|
14
|
+
|
15
|
+
# @param [Seahorse::Client::RequestContext] context
|
16
|
+
# @param [String] message
|
17
|
+
# @param [Aws::Textract::Types::HumanLoopQuotaExceededException] data
|
18
|
+
def initialize(context, message, data = Aws::EmptyStructure.new)
|
19
|
+
super(context, message, data)
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [String]
|
23
|
+
def resource_type
|
24
|
+
@data[:resource_type]
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [String]
|
28
|
+
def quota_code
|
29
|
+
@data[:quota_code]
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [String]
|
33
|
+
def service_code
|
34
|
+
@data[:service_code]
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
13
39
|
end
|
14
40
|
end
|
@@ -21,31 +21,47 @@ module Aws::Textract
|
|
21
21
|
# },
|
22
22
|
# },
|
23
23
|
# feature_types: ["TABLES"], # required, accepts TABLES, FORMS
|
24
|
+
# human_loop_config: {
|
25
|
+
# human_loop_name: "HumanLoopName", # required
|
26
|
+
# flow_definition_arn: "FlowDefinitionArn", # required
|
27
|
+
# data_attributes: {
|
28
|
+
# content_classifiers: ["FreeOfPersonallyIdentifiableInformation"], # accepts FreeOfPersonallyIdentifiableInformation, FreeOfAdultContent
|
29
|
+
# },
|
30
|
+
# },
|
24
31
|
# }
|
25
32
|
#
|
26
33
|
# @!attribute [rw] document
|
27
34
|
# The input document as base64-encoded bytes or an Amazon S3 object.
|
28
35
|
# If you use the AWS CLI to call Amazon Textract operations, you
|
29
|
-
# can't pass image bytes. The document must be an image in
|
30
|
-
# format.
|
36
|
+
# can't pass image bytes. The document must be an image in JPEG or
|
37
|
+
# PNG format.
|
31
38
|
#
|
32
|
-
# If you
|
33
|
-
# need to base64-encode image bytes passed using the `Bytes`
|
39
|
+
# If you're using an AWS SDK to call Amazon Textract, you might not
|
40
|
+
# need to base64-encode image bytes that are passed using the `Bytes`
|
41
|
+
# field.
|
34
42
|
# @return [Types::Document]
|
35
43
|
#
|
36
44
|
# @!attribute [rw] feature_types
|
37
45
|
# A list of the types of analysis to perform. Add TABLES to the list
|
38
|
-
# to return information about the tables detected in the
|
39
|
-
# document. Add FORMS to return detected
|
40
|
-
#
|
41
|
-
#
|
46
|
+
# to return information about the tables that are detected in the
|
47
|
+
# input document. Add FORMS to return detected form data. To perform
|
48
|
+
# both types of analysis, add TABLES and FORMS to `FeatureTypes`. All
|
49
|
+
# lines and words detected in the document are included in the
|
50
|
+
# response (including text that isn't related to the value of
|
51
|
+
# `FeatureTypes`).
|
42
52
|
# @return [Array<String>]
|
43
53
|
#
|
54
|
+
# @!attribute [rw] human_loop_config
|
55
|
+
# Sets the configuration for the human in the loop workflow for
|
56
|
+
# analyzing documents.
|
57
|
+
# @return [Types::HumanLoopConfig]
|
58
|
+
#
|
44
59
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/AnalyzeDocumentRequest AWS API Documentation
|
45
60
|
#
|
46
61
|
class AnalyzeDocumentRequest < Struct.new(
|
47
62
|
:document,
|
48
|
-
:feature_types
|
63
|
+
:feature_types,
|
64
|
+
:human_loop_config)
|
49
65
|
include Aws::Structure
|
50
66
|
end
|
51
67
|
|
@@ -55,24 +71,34 @@ module Aws::Textract
|
|
55
71
|
# @return [Types::DocumentMetadata]
|
56
72
|
#
|
57
73
|
# @!attribute [rw] blocks
|
58
|
-
# The
|
74
|
+
# The items that are detected and analyzed by `AnalyzeDocument`.
|
59
75
|
# @return [Array<Types::Block>]
|
60
76
|
#
|
77
|
+
# @!attribute [rw] human_loop_activation_output
|
78
|
+
# Shows the results of the human in the loop evaluation.
|
79
|
+
# @return [Types::HumanLoopActivationOutput]
|
80
|
+
#
|
81
|
+
# @!attribute [rw] analyze_document_model_version
|
82
|
+
# The version of the model used to analyze the document.
|
83
|
+
# @return [String]
|
84
|
+
#
|
61
85
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/AnalyzeDocumentResponse AWS API Documentation
|
62
86
|
#
|
63
87
|
class AnalyzeDocumentResponse < Struct.new(
|
64
88
|
:document_metadata,
|
65
|
-
:blocks
|
89
|
+
:blocks,
|
90
|
+
:human_loop_activation_output,
|
91
|
+
:analyze_document_model_version)
|
66
92
|
include Aws::Structure
|
67
93
|
end
|
68
94
|
|
69
95
|
# A `Block` represents items that are recognized in a document within a
|
70
96
|
# group of pixels close to each other. The information returned in a
|
71
|
-
# `Block` depends on the type of operation. In
|
72
|
-
# (for example DetectDocumentText), you get information about
|
73
|
-
# detected words and lines of text. In text analysis (for example
|
97
|
+
# `Block` object depends on the type of operation. In text detection for
|
98
|
+
# documents (for example DetectDocumentText), you get information about
|
99
|
+
# the detected words and lines of text. In text analysis (for example
|
74
100
|
# AnalyzeDocument), you can also get information about the fields,
|
75
|
-
# tables and selection elements that are detected in the document.
|
101
|
+
# tables, and selection elements that are detected in the document.
|
76
102
|
#
|
77
103
|
# An array of `Block` objects is returned by both synchronous and
|
78
104
|
# asynchronous operations. In synchronous operations, such as
|
@@ -87,51 +113,51 @@ module Aws::Textract
|
|
87
113
|
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works.html
|
88
114
|
#
|
89
115
|
# @!attribute [rw] block_type
|
90
|
-
# The type of text that's recognized
|
91
|
-
#
|
116
|
+
# The type of text item that's recognized. In operations for text
|
117
|
+
# detection, the following types are returned:
|
92
118
|
#
|
93
|
-
# * *PAGE* - Contains a list of the LINE Block objects that are
|
119
|
+
# * *PAGE* - Contains a list of the LINE `Block` objects that are
|
94
120
|
# detected on a document page.
|
95
121
|
#
|
96
122
|
# * *WORD* - A word detected on a document page. A word is one or more
|
97
123
|
# ISO basic Latin script characters that aren't separated by
|
98
124
|
# spaces.
|
99
125
|
#
|
100
|
-
# * *LINE* - A string of tab-delimited, contiguous words that
|
126
|
+
# * *LINE* - A string of tab-delimited, contiguous words that are
|
101
127
|
# detected on a document page.
|
102
128
|
#
|
103
129
|
# In text analysis operations, the following types are returned:
|
104
130
|
#
|
105
|
-
# * *PAGE* - Contains a list of child Block objects that are
|
106
|
-
# on a document page.
|
131
|
+
# * *PAGE* - Contains a list of child `Block` objects that are
|
132
|
+
# detected on a document page.
|
107
133
|
#
|
108
|
-
# * *KEY\_VALUE\_SET* - Stores the KEY and VALUE Block objects for
|
109
|
-
#
|
110
|
-
# field to determine if a KEY\_VALUE\_SET object is a
|
111
|
-
# object or a VALUE Block object.
|
134
|
+
# * *KEY\_VALUE\_SET* - Stores the KEY and VALUE `Block` objects for
|
135
|
+
# linked text that's detected on a document page. Use the
|
136
|
+
# `EntityType` field to determine if a KEY\_VALUE\_SET object is a
|
137
|
+
# KEY `Block` object or a VALUE `Block` object.
|
112
138
|
#
|
113
|
-
# * *WORD* - A word detected on a document page. A word is one
|
114
|
-
# ISO basic Latin script characters that aren't separated
|
115
|
-
#
|
139
|
+
# * *WORD* - A word that's detected on a document page. A word is one
|
140
|
+
# or more ISO basic Latin script characters that aren't separated
|
141
|
+
# by spaces.
|
116
142
|
#
|
117
|
-
# * *LINE* - A string of tab-delimited, contiguous words that
|
143
|
+
# * *LINE* - A string of tab-delimited, contiguous words that are
|
118
144
|
# detected on a document page.
|
119
145
|
#
|
120
146
|
# * *TABLE* - A table that's detected on a document page. A table is
|
121
|
-
#
|
122
|
-
# cell span of
|
147
|
+
# grid-based information with two or more rows or columns, with a
|
148
|
+
# cell span of one row and one column each.
|
123
149
|
#
|
124
150
|
# * *CELL* - A cell within a detected table. The cell is the parent of
|
125
151
|
# the block that contains the text in the cell.
|
126
152
|
#
|
127
|
-
# * *SELECTION\_ELEMENT* - A
|
128
|
-
# or
|
129
|
-
# `SelectionStatus` to determine the
|
130
|
-
# element.
|
153
|
+
# * *SELECTION\_ELEMENT* - A selection element such as an option
|
154
|
+
# button (radio button) or a check box that's detected on a
|
155
|
+
# document page. Use the value of `SelectionStatus` to determine the
|
156
|
+
# status of the selection element.
|
131
157
|
# @return [String]
|
132
158
|
#
|
133
159
|
# @!attribute [rw] confidence
|
134
|
-
# The confidence that Amazon Textract has in the accuracy of the
|
160
|
+
# The confidence score that Amazon Textract has in the accuracy of the
|
135
161
|
# recognized text and the accuracy of the geometry points around the
|
136
162
|
# recognized text.
|
137
163
|
# @return [Float]
|
@@ -153,13 +179,17 @@ module Aws::Textract
|
|
153
179
|
# @return [Integer]
|
154
180
|
#
|
155
181
|
# @!attribute [rw] row_span
|
156
|
-
# The number of rows that a table spans.
|
157
|
-
#
|
182
|
+
# The number of rows that a table cell spans. Currently this value is
|
183
|
+
# always 1, even if the number of rows spanned is greater than 1.
|
184
|
+
# `RowSpan` isn't returned by `DetectDocumentText` and
|
185
|
+
# `GetDocumentTextDetection`.
|
158
186
|
# @return [Integer]
|
159
187
|
#
|
160
188
|
# @!attribute [rw] column_span
|
161
|
-
# The number of columns that a table cell spans.
|
162
|
-
#
|
189
|
+
# The number of columns that a table cell spans. Currently this value
|
190
|
+
# is always 1, even if the number of columns spanned is greater than
|
191
|
+
# 1. `ColumnSpan` isn't returned by `DetectDocumentText` and
|
192
|
+
# `GetDocumentTextDetection`.
|
163
193
|
# @return [Integer]
|
164
194
|
#
|
165
195
|
# @!attribute [rw] geometry
|
@@ -174,7 +204,7 @@ module Aws::Textract
|
|
174
204
|
# @return [String]
|
175
205
|
#
|
176
206
|
# @!attribute [rw] relationships
|
177
|
-
# A list of child blocks of the current block. For example a LINE
|
207
|
+
# A list of child blocks of the current block. For example, a LINE
|
178
208
|
# object has child blocks for each WORD block that's part of the line
|
179
209
|
# of text. There aren't Relationship objects in the list for
|
180
210
|
# relationships that don't exist, such as when the current block has
|
@@ -197,18 +227,18 @@ module Aws::Textract
|
|
197
227
|
# @return [Array<String>]
|
198
228
|
#
|
199
229
|
# @!attribute [rw] selection_status
|
200
|
-
# The selection status of a
|
201
|
-
# or
|
230
|
+
# The selection status of a selection element, such as an option
|
231
|
+
# button or check box.
|
202
232
|
# @return [String]
|
203
233
|
#
|
204
234
|
# @!attribute [rw] page
|
205
|
-
# The page
|
235
|
+
# The page on which a block was detected. `Page` is returned by
|
206
236
|
# asynchronous operations. Page values greater than 1 are only
|
207
|
-
# returned for
|
208
|
-
# image (
|
209
|
-
#
|
210
|
-
#
|
211
|
-
#
|
237
|
+
# returned for multipage documents that are in PDF format. A scanned
|
238
|
+
# image (JPEG/PNG), even if it contains multiple document pages, is
|
239
|
+
# considered to be a single-page document. The value of `Page` is
|
240
|
+
# always 1. Synchronous operations don't return `Page` because every
|
241
|
+
# input document is considered to be a single-page document.
|
212
242
|
# @return [Integer]
|
213
243
|
#
|
214
244
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/Block AWS API Documentation
|
@@ -230,11 +260,11 @@ module Aws::Textract
|
|
230
260
|
include Aws::Structure
|
231
261
|
end
|
232
262
|
|
233
|
-
# The bounding box around the
|
234
|
-
# table cell on a document page. The `left`
|
235
|
-
# (y-coordinate) are coordinates that represent
|
236
|
-
# of the bounding box. Note that the upper-left
|
237
|
-
# the origin (0,0).
|
263
|
+
# The bounding box around the detected page, text, key-value pair,
|
264
|
+
# table, table cell, or selection element on a document page. The `left`
|
265
|
+
# (x-coordinate) and `top` (y-coordinate) are coordinates that represent
|
266
|
+
# the top and left sides of the bounding box. Note that the upper-left
|
267
|
+
# corner of the image is the origin (0,0).
|
238
268
|
#
|
239
269
|
# The `top` and `left` values returned are ratios of the overall
|
240
270
|
# document page size. For example, if the input image is 700 x 200
|
@@ -294,11 +324,12 @@ module Aws::Textract
|
|
294
324
|
# @!attribute [rw] document
|
295
325
|
# The input document as base64-encoded bytes or an Amazon S3 object.
|
296
326
|
# If you use the AWS CLI to call Amazon Textract operations, you
|
297
|
-
# can't pass image bytes. The document must be an image in
|
298
|
-
# format.
|
327
|
+
# can't pass image bytes. The document must be an image in JPEG or
|
328
|
+
# PNG format.
|
299
329
|
#
|
300
|
-
# If you
|
301
|
-
# need to base64-encode image bytes passed using the `Bytes`
|
330
|
+
# If you're using an AWS SDK to call Amazon Textract, you might not
|
331
|
+
# need to base64-encode image bytes that are passed using the `Bytes`
|
332
|
+
# field.
|
302
333
|
# @return [Types::Document]
|
303
334
|
#
|
304
335
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentTextRequest AWS API Documentation
|
@@ -309,20 +340,24 @@ module Aws::Textract
|
|
309
340
|
end
|
310
341
|
|
311
342
|
# @!attribute [rw] document_metadata
|
312
|
-
# Metadata about the document.
|
313
|
-
# detected in the document.
|
343
|
+
# Metadata about the document. It contains the number of pages that
|
344
|
+
# are detected in the document.
|
314
345
|
# @return [Types::DocumentMetadata]
|
315
346
|
#
|
316
347
|
# @!attribute [rw] blocks
|
317
|
-
# An array of Block objects
|
318
|
-
# document.
|
348
|
+
# An array of `Block` objects that contain the text that's detected
|
349
|
+
# in the document.
|
319
350
|
# @return [Array<Types::Block>]
|
320
351
|
#
|
352
|
+
# @!attribute [rw] detect_document_text_model_version
|
353
|
+
# @return [String]
|
354
|
+
#
|
321
355
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentTextResponse AWS API Documentation
|
322
356
|
#
|
323
357
|
class DetectDocumentTextResponse < Struct.new(
|
324
358
|
:document_metadata,
|
325
|
-
:blocks
|
359
|
+
:blocks,
|
360
|
+
:detect_document_text_model_version)
|
326
361
|
include Aws::Structure
|
327
362
|
end
|
328
363
|
|
@@ -363,17 +398,17 @@ module Aws::Textract
|
|
363
398
|
# }
|
364
399
|
#
|
365
400
|
# @!attribute [rw] bytes
|
366
|
-
# A blob of
|
401
|
+
# A blob of base64-encoded document bytes. The maximum size of a
|
367
402
|
# document that's provided in a blob of bytes is 5 MB. The document
|
368
|
-
# bytes must be in PNG or
|
403
|
+
# bytes must be in PNG or JPEG format.
|
369
404
|
#
|
370
|
-
# If you
|
405
|
+
# If you're using an AWS SDK to call Amazon Textract, you might not
|
371
406
|
# need to base64-encode image bytes passed using the `Bytes` field.
|
372
407
|
# @return [String]
|
373
408
|
#
|
374
409
|
# @!attribute [rw] s3_object
|
375
410
|
# Identifies an S3 object as the document source. The maximum size of
|
376
|
-
# a document stored in an S3 bucket is 5 MB.
|
411
|
+
# a document that's stored in an S3 bucket is 5 MB.
|
377
412
|
# @return [Types::S3Object]
|
378
413
|
#
|
379
414
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/Document AWS API Documentation
|
@@ -387,7 +422,7 @@ module Aws::Textract
|
|
387
422
|
# The Amazon S3 bucket that contains the document to be processed. It's
|
388
423
|
# used by asynchronous operations such as StartDocumentTextDetection.
|
389
424
|
#
|
390
|
-
# The input document can be an image file in
|
425
|
+
# The input document can be an image file in JPEG or PNG format. It can
|
391
426
|
# also be a file in PDF format.
|
392
427
|
#
|
393
428
|
# @note When making an API call, you may pass DocumentLocation
|
@@ -415,7 +450,7 @@ module Aws::Textract
|
|
415
450
|
# Information about the input document.
|
416
451
|
#
|
417
452
|
# @!attribute [rw] pages
|
418
|
-
# The number of pages detected in the document.
|
453
|
+
# The number of pages that are detected in the document.
|
419
454
|
# @return [Integer]
|
420
455
|
#
|
421
456
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DocumentMetadata AWS API Documentation
|
@@ -425,17 +460,18 @@ module Aws::Textract
|
|
425
460
|
include Aws::Structure
|
426
461
|
end
|
427
462
|
|
428
|
-
# Information about where
|
429
|
-
#
|
463
|
+
# Information about where the following items are located on a document
|
464
|
+
# page: detected page, text, key-value pairs, tables, table cells, and
|
465
|
+
# selection elements.
|
430
466
|
#
|
431
467
|
# @!attribute [rw] bounding_box
|
432
468
|
# An axis-aligned coarse representation of the location of the
|
433
|
-
# recognized
|
469
|
+
# recognized item on the document page.
|
434
470
|
# @return [Types::BoundingBox]
|
435
471
|
#
|
436
472
|
# @!attribute [rw] polygon
|
437
473
|
# Within the bounding box, a fine-grained polygon around the
|
438
|
-
# recognized
|
474
|
+
# recognized item.
|
439
475
|
# @return [Array<Types::Point>]
|
440
476
|
#
|
441
477
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/Geometry AWS API Documentation
|
@@ -457,7 +493,8 @@ module Aws::Textract
|
|
457
493
|
#
|
458
494
|
# @!attribute [rw] job_id
|
459
495
|
# A unique identifier for the text-detection job. The `JobId` is
|
460
|
-
# returned from `StartDocumentAnalysis`.
|
496
|
+
# returned from `StartDocumentAnalysis`. A `JobId` value is only valid
|
497
|
+
# for 7 days.
|
461
498
|
# @return [String]
|
462
499
|
#
|
463
500
|
# @!attribute [rw] max_results
|
@@ -500,16 +537,19 @@ module Aws::Textract
|
|
500
537
|
# @return [String]
|
501
538
|
#
|
502
539
|
# @!attribute [rw] blocks
|
503
|
-
# The results of the text
|
540
|
+
# The results of the text-analysis operation.
|
504
541
|
# @return [Array<Types::Block>]
|
505
542
|
#
|
506
543
|
# @!attribute [rw] warnings
|
507
|
-
# A list of warnings that occurred during the document
|
544
|
+
# A list of warnings that occurred during the document-analysis
|
508
545
|
# operation.
|
509
546
|
# @return [Array<Types::Warning>]
|
510
547
|
#
|
511
548
|
# @!attribute [rw] status_message
|
512
|
-
# The current status of an asynchronous document
|
549
|
+
# The current status of an asynchronous document-analysis operation.
|
550
|
+
# @return [String]
|
551
|
+
#
|
552
|
+
# @!attribute [rw] analyze_document_model_version
|
513
553
|
# @return [String]
|
514
554
|
#
|
515
555
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/GetDocumentAnalysisResponse AWS API Documentation
|
@@ -520,7 +560,8 @@ module Aws::Textract
|
|
520
560
|
:next_token,
|
521
561
|
:blocks,
|
522
562
|
:warnings,
|
523
|
-
:status_message
|
563
|
+
:status_message,
|
564
|
+
:analyze_document_model_version)
|
524
565
|
include Aws::Structure
|
525
566
|
end
|
526
567
|
|
@@ -535,7 +576,8 @@ module Aws::Textract
|
|
535
576
|
#
|
536
577
|
# @!attribute [rw] job_id
|
537
578
|
# A unique identifier for the text detection job. The `JobId` is
|
538
|
-
# returned from `StartDocumentTextDetection`.
|
579
|
+
# returned from `StartDocumentTextDetection`. A `JobId` value is only
|
580
|
+
# valid for 7 days.
|
539
581
|
# @return [String]
|
540
582
|
#
|
541
583
|
# @!attribute [rw] max_results
|
@@ -582,13 +624,16 @@ module Aws::Textract
|
|
582
624
|
# @return [Array<Types::Block>]
|
583
625
|
#
|
584
626
|
# @!attribute [rw] warnings
|
585
|
-
# A list of warnings that occurred during the
|
586
|
-
#
|
627
|
+
# A list of warnings that occurred during the text-detection operation
|
628
|
+
# for the document.
|
587
629
|
# @return [Array<Types::Warning>]
|
588
630
|
#
|
589
631
|
# @!attribute [rw] status_message
|
590
|
-
# The current status of an asynchronous
|
591
|
-
#
|
632
|
+
# The current status of an asynchronous text-detection operation for
|
633
|
+
# the document.
|
634
|
+
# @return [String]
|
635
|
+
#
|
636
|
+
# @!attribute [rw] detect_document_text_model_version
|
592
637
|
# @return [String]
|
593
638
|
#
|
594
639
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/GetDocumentTextDetectionResponse AWS API Documentation
|
@@ -599,7 +644,114 @@ module Aws::Textract
|
|
599
644
|
:next_token,
|
600
645
|
:blocks,
|
601
646
|
:warnings,
|
602
|
-
:status_message
|
647
|
+
:status_message,
|
648
|
+
:detect_document_text_model_version)
|
649
|
+
include Aws::Structure
|
650
|
+
end
|
651
|
+
|
652
|
+
# Shows the results of the human in the loop evaluation. If there is no
|
653
|
+
# HumanLoopArn, the input did not trigger human review.
|
654
|
+
#
|
655
|
+
# @!attribute [rw] human_loop_arn
|
656
|
+
# The Amazon Resource Name (ARN) of the HumanLoop created.
|
657
|
+
# @return [String]
|
658
|
+
#
|
659
|
+
# @!attribute [rw] human_loop_activation_reasons
|
660
|
+
# Shows if and why human review was needed.
|
661
|
+
# @return [Array<String>]
|
662
|
+
#
|
663
|
+
# @!attribute [rw] human_loop_activation_conditions_evaluation_results
|
664
|
+
# Shows the result of condition evaluations, including those
|
665
|
+
# conditions which activated a human review.
|
666
|
+
# @return [String]
|
667
|
+
#
|
668
|
+
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/HumanLoopActivationOutput AWS API Documentation
|
669
|
+
#
|
670
|
+
class HumanLoopActivationOutput < Struct.new(
|
671
|
+
:human_loop_arn,
|
672
|
+
:human_loop_activation_reasons,
|
673
|
+
:human_loop_activation_conditions_evaluation_results)
|
674
|
+
include Aws::Structure
|
675
|
+
end
|
676
|
+
|
677
|
+
# Sets up the human review workflow the document will be sent to if one
|
678
|
+
# of the conditions is met. You can also set certain attributes of the
|
679
|
+
# image before review.
|
680
|
+
#
|
681
|
+
# @note When making an API call, you may pass HumanLoopConfig
|
682
|
+
# data as a hash:
|
683
|
+
#
|
684
|
+
# {
|
685
|
+
# human_loop_name: "HumanLoopName", # required
|
686
|
+
# flow_definition_arn: "FlowDefinitionArn", # required
|
687
|
+
# data_attributes: {
|
688
|
+
# content_classifiers: ["FreeOfPersonallyIdentifiableInformation"], # accepts FreeOfPersonallyIdentifiableInformation, FreeOfAdultContent
|
689
|
+
# },
|
690
|
+
# }
|
691
|
+
#
|
692
|
+
# @!attribute [rw] human_loop_name
|
693
|
+
# The name of the human workflow used for this image. This should be
|
694
|
+
# kept unique within a region.
|
695
|
+
# @return [String]
|
696
|
+
#
|
697
|
+
# @!attribute [rw] flow_definition_arn
|
698
|
+
# The Amazon Resource Name (ARN) of the flow definition.
|
699
|
+
# @return [String]
|
700
|
+
#
|
701
|
+
# @!attribute [rw] data_attributes
|
702
|
+
# Sets attributes of the input data.
|
703
|
+
# @return [Types::HumanLoopDataAttributes]
|
704
|
+
#
|
705
|
+
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/HumanLoopConfig AWS API Documentation
|
706
|
+
#
|
707
|
+
class HumanLoopConfig < Struct.new(
|
708
|
+
:human_loop_name,
|
709
|
+
:flow_definition_arn,
|
710
|
+
:data_attributes)
|
711
|
+
include Aws::Structure
|
712
|
+
end
|
713
|
+
|
714
|
+
# Allows you to set attributes of the image. Currently, you can declare
|
715
|
+
# an image as free of personally identifiable information and adult
|
716
|
+
# content.
|
717
|
+
#
|
718
|
+
# @note When making an API call, you may pass HumanLoopDataAttributes
|
719
|
+
# data as a hash:
|
720
|
+
#
|
721
|
+
# {
|
722
|
+
# content_classifiers: ["FreeOfPersonallyIdentifiableInformation"], # accepts FreeOfPersonallyIdentifiableInformation, FreeOfAdultContent
|
723
|
+
# }
|
724
|
+
#
|
725
|
+
# @!attribute [rw] content_classifiers
|
726
|
+
# Sets whether the input image is free of personally identifiable
|
727
|
+
# information or adult content.
|
728
|
+
# @return [Array<String>]
|
729
|
+
#
|
730
|
+
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/HumanLoopDataAttributes AWS API Documentation
|
731
|
+
#
|
732
|
+
class HumanLoopDataAttributes < Struct.new(
|
733
|
+
:content_classifiers)
|
734
|
+
include Aws::Structure
|
735
|
+
end
|
736
|
+
|
737
|
+
# Indicates you have exceeded the maximum number of active human in the
|
738
|
+
# loop workflows available
|
739
|
+
#
|
740
|
+
# @!attribute [rw] resource_type
|
741
|
+
# @return [String]
|
742
|
+
#
|
743
|
+
# @!attribute [rw] quota_code
|
744
|
+
# @return [String]
|
745
|
+
#
|
746
|
+
# @!attribute [rw] service_code
|
747
|
+
# @return [String]
|
748
|
+
#
|
749
|
+
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/HumanLoopQuotaExceededException AWS API Documentation
|
750
|
+
#
|
751
|
+
class HumanLoopQuotaExceededException < Struct.new(
|
752
|
+
:resource_type,
|
753
|
+
:quota_code,
|
754
|
+
:service_code)
|
603
755
|
include Aws::Structure
|
604
756
|
end
|
605
757
|
|
@@ -634,10 +786,10 @@ module Aws::Textract
|
|
634
786
|
end
|
635
787
|
|
636
788
|
# The X and Y coordinates of a point on a document page. The X and Y
|
637
|
-
# values returned are ratios of the overall document page size.
|
638
|
-
# example, if the input document is 700 x 200 and the operation
|
639
|
-
# X=0.5 and Y=0.25, then the point is at the (350,50) pixel
|
640
|
-
# on the document page.
|
789
|
+
# values that are returned are ratios of the overall document page size.
|
790
|
+
# For example, if the input document is 700 x 200 and the operation
|
791
|
+
# returns X=0.5 and Y=0.25, then the point is at the (350,50) pixel
|
792
|
+
# coordinate on the document page.
|
641
793
|
#
|
642
794
|
# An array of `Point` objects, `Polygon`, is returned by
|
643
795
|
# DetectDocumentText. `Polygon` represents a fine-grained polygon around
|
@@ -669,7 +821,11 @@ module Aws::Textract
|
|
669
821
|
#
|
670
822
|
# @!attribute [rw] type
|
671
823
|
# The type of relationship that the blocks in the IDs array have with
|
672
|
-
# the current block. The relationship can be `VALUE` or `CHILD`.
|
824
|
+
# the current block. The relationship can be `VALUE` or `CHILD`. A
|
825
|
+
# relationship of type VALUE is a list that contains the ID of the
|
826
|
+
# VALUE block that's associated with the KEY of a key-value pair. A
|
827
|
+
# relationship of type CHILD is a list of IDs that identify WORD
|
828
|
+
# blocks.
|
673
829
|
# @return [String]
|
674
830
|
#
|
675
831
|
# @!attribute [rw] ids
|
@@ -707,8 +863,9 @@ module Aws::Textract
|
|
707
863
|
# @return [String]
|
708
864
|
#
|
709
865
|
# @!attribute [rw] name
|
710
|
-
# The file name of the input document.
|
711
|
-
# or
|
866
|
+
# The file name of the input document. Synchronous operations can use
|
867
|
+
# image files that are in JPEG or PNG format. Asynchronous operations
|
868
|
+
# also support PDF format files.
|
712
869
|
# @return [String]
|
713
870
|
#
|
714
871
|
# @!attribute [rw] version
|
@@ -752,11 +909,11 @@ module Aws::Textract
|
|
752
909
|
# @!attribute [rw] feature_types
|
753
910
|
# A list of the types of analysis to perform. Add TABLES to the list
|
754
911
|
# to return information about the tables that are detected in the
|
755
|
-
# input document. Add FORMS to return detected
|
756
|
-
#
|
757
|
-
#
|
758
|
-
# (
|
759
|
-
#
|
912
|
+
# input document. Add FORMS to return detected form data. To perform
|
913
|
+
# both types of analysis, add TABLES and FORMS to `FeatureTypes`. All
|
914
|
+
# lines and words detected in the document are included in the
|
915
|
+
# response (including text that isn't related to the value of
|
916
|
+
# `FeatureTypes`).
|
760
917
|
# @return [Array<String>]
|
761
918
|
#
|
762
919
|
# @!attribute [rw] client_request_token
|
@@ -764,13 +921,19 @@ module Aws::Textract
|
|
764
921
|
# you use the same token with multiple `StartDocumentAnalysis`
|
765
922
|
# requests, the same `JobId` is returned. Use `ClientRequestToken` to
|
766
923
|
# prevent the same job from being accidentally started more than once.
|
924
|
+
# For more information, see [Calling Amazon Textract Asynchronous
|
925
|
+
# Operations][1].
|
926
|
+
#
|
927
|
+
#
|
928
|
+
#
|
929
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/api-async.html
|
767
930
|
# @return [String]
|
768
931
|
#
|
769
932
|
# @!attribute [rw] job_tag
|
770
|
-
# An identifier you specify that's included in the completion
|
771
|
-
# notification
|
772
|
-
#
|
773
|
-
#
|
933
|
+
# An identifier that you specify that's included in the completion
|
934
|
+
# notification published to the Amazon SNS topic. For example, you can
|
935
|
+
# use `JobTag` to identify the type of document that the completion
|
936
|
+
# notification corresponds to (such as a tax form or a receipt).
|
774
937
|
# @return [String]
|
775
938
|
#
|
776
939
|
# @!attribute [rw] notification_channel
|
@@ -791,7 +954,8 @@ module Aws::Textract
|
|
791
954
|
|
792
955
|
# @!attribute [rw] job_id
|
793
956
|
# The identifier for the document text detection job. Use `JobId` to
|
794
|
-
# identify the job in a subsequent call to `GetDocumentAnalysis`.
|
957
|
+
# identify the job in a subsequent call to `GetDocumentAnalysis`. A
|
958
|
+
# `JobId` value is only valid for 7 days.
|
795
959
|
# @return [String]
|
796
960
|
#
|
797
961
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/StartDocumentAnalysisResponse AWS API Documentation
|
@@ -829,13 +993,19 @@ module Aws::Textract
|
|
829
993
|
# you use the same token with multiple `StartDocumentTextDetection`
|
830
994
|
# requests, the same `JobId` is returned. Use `ClientRequestToken` to
|
831
995
|
# prevent the same job from being accidentally started more than once.
|
996
|
+
# For more information, see [Calling Amazon Textract Asynchronous
|
997
|
+
# Operations][1].
|
998
|
+
#
|
999
|
+
#
|
1000
|
+
#
|
1001
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/api-async.html
|
832
1002
|
# @return [String]
|
833
1003
|
#
|
834
1004
|
# @!attribute [rw] job_tag
|
835
|
-
# An identifier you specify that's included in the completion
|
836
|
-
# notification
|
837
|
-
#
|
838
|
-
#
|
1005
|
+
# An identifier that you specify that's included in the completion
|
1006
|
+
# notification published to the Amazon SNS topic. For example, you can
|
1007
|
+
# use `JobTag` to identify the type of document that the completion
|
1008
|
+
# notification corresponds to (such as a tax form or a receipt).
|
839
1009
|
# @return [String]
|
840
1010
|
#
|
841
1011
|
# @!attribute [rw] notification_channel
|
@@ -854,8 +1024,10 @@ module Aws::Textract
|
|
854
1024
|
end
|
855
1025
|
|
856
1026
|
# @!attribute [rw] job_id
|
857
|
-
# The identifier
|
858
|
-
# identify the job in a subsequent call to
|
1027
|
+
# The identifier of the text detection job for the document. Use
|
1028
|
+
# `JobId` to identify the job in a subsequent call to
|
1029
|
+
# `GetDocumentTextDetection`. A `JobId` value is only valid for 7
|
1030
|
+
# days.
|
859
1031
|
# @return [String]
|
860
1032
|
#
|
861
1033
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/StartDocumentTextDetectionResponse AWS API Documentation
|
@@ -866,7 +1038,7 @@ module Aws::Textract
|
|
866
1038
|
end
|
867
1039
|
|
868
1040
|
# A warning about an issue that occurred during asynchronous text
|
869
|
-
# analysis (StartDocumentAnalysis) or asynchronous document
|
1041
|
+
# analysis (StartDocumentAnalysis) or asynchronous document text
|
870
1042
|
# detection (StartDocumentTextDetection).
|
871
1043
|
#
|
872
1044
|
# @!attribute [rw] error_code
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aws-sdk-textract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Amazon Web Services
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-12-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-core
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: '3'
|
20
20
|
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: 3.
|
22
|
+
version: 3.71.0
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,7 +29,7 @@ dependencies:
|
|
29
29
|
version: '3'
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 3.
|
32
|
+
version: 3.71.0
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: aws-sigv4
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|