aws-sdk-textract 1.11.0 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/aws-sdk-textract.rb +1 -1
- data/lib/aws-sdk-textract/client.rb +119 -72
- data/lib/aws-sdk-textract/client_api.rb +41 -0
- data/lib/aws-sdk-textract/errors.rb +26 -0
- data/lib/aws-sdk-textract/types.rb +280 -108
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a71006801113001e73f63228fe2b5d656ca109c
|
4
|
+
data.tar.gz: e30581e97885a5a4dce46db639aa58f73291766d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da014f69acfee4fa46bb6e23ee4c0988d8e7cbfc3529dfbee53a992d1baeaf90d1f18bd01a9afa31c75bbc592e0e3112bf673cd99c287a400de984c2ebabc871
|
7
|
+
data.tar.gz: 4e87f85c57a41e611c818f4afc5b36c73422c1d5617153ab3175742e2f2be4811500d6702b24522d3988c077488f5902653be37ca7b9bf02cb4c1aed06482088
|
data/lib/aws-sdk-textract.rb
CHANGED
@@ -268,27 +268,30 @@ module Aws::Textract
|
|
268
268
|
#
|
269
269
|
# The types of information returned are as follows:
|
270
270
|
#
|
271
|
-
# *
|
272
|
-
#
|
273
|
-
#
|
274
|
-
#
|
275
|
-
#
|
276
|
-
#
|
277
|
-
# * Table and table cell data. A TABLE Block object contains
|
278
|
-
# about a detected table. A CELL Block object is
|
279
|
-
# cell in a table.
|
280
|
-
#
|
281
|
-
# *
|
282
|
-
#
|
283
|
-
#
|
284
|
-
#
|
285
|
-
#
|
286
|
-
#
|
271
|
+
# * Form data (key-value pairs). The related information is returned in
|
272
|
+
# two Block objects, each of type `KEY_VALUE_SET`\: a KEY `Block`
|
273
|
+
# object and a VALUE `Block` object. For example, *Name: Ana Silva
|
274
|
+
# Carolina* contains a key and value. *Name:* is the key. *Ana Silva
|
275
|
+
# Carolina* is the value.
|
276
|
+
#
|
277
|
+
# * Table and table cell data. A TABLE `Block` object contains
|
278
|
+
# information about a detected table. A CELL `Block` object is
|
279
|
+
# returned for each cell in a table.
|
280
|
+
#
|
281
|
+
# * Lines and words of text. A LINE `Block` object contains one or more
|
282
|
+
# WORD `Block` objects. All lines and words that are detected in the
|
283
|
+
# document are returned (including text that doesn't have a
|
284
|
+
# relationship with the value of `FeatureTypes`).
|
285
|
+
#
|
286
|
+
# Selection elements such as check boxes and option buttons (radio
|
287
|
+
# buttons) can be detected in form data and in tables. A
|
288
|
+
# SELECTION\_ELEMENT `Block` object contains information about a
|
289
|
+
# selection element, including the selection status.
|
287
290
|
#
|
288
291
|
# You can choose which type of analysis to perform by specifying the
|
289
292
|
# `FeatureTypes` list.
|
290
293
|
#
|
291
|
-
# The output is returned in a list of `
|
294
|
+
# The output is returned in a list of `Block` objects.
|
292
295
|
#
|
293
296
|
# `AnalyzeDocument` is a synchronous operation. To analyze documents
|
294
297
|
# asynchronously, use StartDocumentAnalysis.
|
@@ -302,22 +305,30 @@ module Aws::Textract
|
|
302
305
|
# @option params [required, Types::Document] :document
|
303
306
|
# The input document as base64-encoded bytes or an Amazon S3 object. If
|
304
307
|
# you use the AWS CLI to call Amazon Textract operations, you can't
|
305
|
-
# pass image bytes. The document must be an image in
|
308
|
+
# pass image bytes. The document must be an image in JPEG or PNG format.
|
306
309
|
#
|
307
|
-
# If you
|
308
|
-
# need to base64-encode image bytes passed using the `Bytes`
|
310
|
+
# If you're using an AWS SDK to call Amazon Textract, you might not
|
311
|
+
# need to base64-encode image bytes that are passed using the `Bytes`
|
312
|
+
# field.
|
309
313
|
#
|
310
314
|
# @option params [required, Array<String>] :feature_types
|
311
315
|
# A list of the types of analysis to perform. Add TABLES to the list to
|
312
|
-
# return information about the tables detected in the input
|
313
|
-
# Add FORMS to return detected
|
314
|
-
#
|
315
|
-
#
|
316
|
+
# return information about the tables that are detected in the input
|
317
|
+
# document. Add FORMS to return detected form data. To perform both
|
318
|
+
# types of analysis, add TABLES and FORMS to `FeatureTypes`. All lines
|
319
|
+
# and words detected in the document are included in the response
|
320
|
+
# (including text that isn't related to the value of `FeatureTypes`).
|
321
|
+
#
|
322
|
+
# @option params [Types::HumanLoopConfig] :human_loop_config
|
323
|
+
# Sets the configuration for the human in the loop workflow for
|
324
|
+
# analyzing documents.
|
316
325
|
#
|
317
326
|
# @return [Types::AnalyzeDocumentResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
|
318
327
|
#
|
319
328
|
# * {Types::AnalyzeDocumentResponse#document_metadata #document_metadata} => Types::DocumentMetadata
|
320
329
|
# * {Types::AnalyzeDocumentResponse#blocks #blocks} => Array<Types::Block>
|
330
|
+
# * {Types::AnalyzeDocumentResponse#human_loop_activation_output #human_loop_activation_output} => Types::HumanLoopActivationOutput
|
331
|
+
# * {Types::AnalyzeDocumentResponse#analyze_document_model_version #analyze_document_model_version} => String
|
321
332
|
#
|
322
333
|
# @example Request syntax with placeholder values
|
323
334
|
#
|
@@ -331,6 +342,13 @@ module Aws::Textract
|
|
331
342
|
# },
|
332
343
|
# },
|
333
344
|
# feature_types: ["TABLES"], # required, accepts TABLES, FORMS
|
345
|
+
# human_loop_config: {
|
346
|
+
# human_loop_name: "HumanLoopName", # required
|
347
|
+
# flow_definition_arn: "FlowDefinitionArn", # required
|
348
|
+
# data_attributes: {
|
349
|
+
# content_classifiers: ["FreeOfPersonallyIdentifiableInformation"], # accepts FreeOfPersonallyIdentifiableInformation, FreeOfAdultContent
|
350
|
+
# },
|
351
|
+
# },
|
334
352
|
# })
|
335
353
|
#
|
336
354
|
# @example Response structure
|
@@ -360,6 +378,11 @@ module Aws::Textract
|
|
360
378
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
361
379
|
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
362
380
|
# resp.blocks[0].page #=> Integer
|
381
|
+
# resp.human_loop_activation_output.human_loop_arn #=> String
|
382
|
+
# resp.human_loop_activation_output.human_loop_activation_reasons #=> Array
|
383
|
+
# resp.human_loop_activation_output.human_loop_activation_reasons[0] #=> String
|
384
|
+
# resp.human_loop_activation_output.human_loop_activation_conditions_evaluation_results #=> String
|
385
|
+
# resp.analyze_document_model_version #=> String
|
363
386
|
#
|
364
387
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/AnalyzeDocument AWS API Documentation
|
365
388
|
#
|
@@ -372,7 +395,7 @@ module Aws::Textract
|
|
372
395
|
|
373
396
|
# Detects text in the input document. Amazon Textract can detect lines
|
374
397
|
# of text and the words that make up a line of text. The input document
|
375
|
-
# must be an image in
|
398
|
+
# must be an image in JPEG or PNG format. `DetectDocumentText` returns
|
376
399
|
# the detected text in an array of Block objects.
|
377
400
|
#
|
378
401
|
# Each document page has as an associated `Block` of type PAGE. Each
|
@@ -393,15 +416,17 @@ module Aws::Textract
|
|
393
416
|
# @option params [required, Types::Document] :document
|
394
417
|
# The input document as base64-encoded bytes or an Amazon S3 object. If
|
395
418
|
# you use the AWS CLI to call Amazon Textract operations, you can't
|
396
|
-
# pass image bytes. The document must be an image in
|
419
|
+
# pass image bytes. The document must be an image in JPEG or PNG format.
|
397
420
|
#
|
398
|
-
# If you
|
399
|
-
# need to base64-encode image bytes passed using the `Bytes`
|
421
|
+
# If you're using an AWS SDK to call Amazon Textract, you might not
|
422
|
+
# need to base64-encode image bytes that are passed using the `Bytes`
|
423
|
+
# field.
|
400
424
|
#
|
401
425
|
# @return [Types::DetectDocumentTextResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
|
402
426
|
#
|
403
427
|
# * {Types::DetectDocumentTextResponse#document_metadata #document_metadata} => Types::DocumentMetadata
|
404
428
|
# * {Types::DetectDocumentTextResponse#blocks #blocks} => Array<Types::Block>
|
429
|
+
# * {Types::DetectDocumentTextResponse#detect_document_text_model_version #detect_document_text_model_version} => String
|
405
430
|
#
|
406
431
|
# @example Request syntax with placeholder values
|
407
432
|
#
|
@@ -443,6 +468,7 @@ module Aws::Textract
|
|
443
468
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
444
469
|
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
445
470
|
# resp.blocks[0].page #=> Integer
|
471
|
+
# resp.detect_document_text_model_version #=> String
|
446
472
|
#
|
447
473
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentText AWS API Documentation
|
448
474
|
#
|
@@ -469,30 +495,34 @@ module Aws::Textract
|
|
469
495
|
# `GetDocumentAnalysis` returns an array of Block objects. The following
|
470
496
|
# types of information are returned:
|
471
497
|
#
|
472
|
-
# *
|
473
|
-
#
|
474
|
-
#
|
475
|
-
#
|
476
|
-
#
|
498
|
+
# * Form data (key-value pairs). The related information is returned in
|
499
|
+
# two Block objects, each of type `KEY_VALUE_SET`\: a KEY `Block`
|
500
|
+
# object and a VALUE `Block` object. For example, *Name: Ana Silva
|
501
|
+
# Carolina* contains a key and value. *Name:* is the key. *Ana Silva
|
502
|
+
# Carolina* is the value.
|
477
503
|
#
|
478
|
-
# * Table and table cell data. A TABLE Block object contains
|
479
|
-
# about a detected table. A CELL Block object is
|
480
|
-
# cell in a table.
|
504
|
+
# * Table and table cell data. A TABLE `Block` object contains
|
505
|
+
# information about a detected table. A CELL `Block` object is
|
506
|
+
# returned for each cell in a table.
|
481
507
|
#
|
482
|
-
# *
|
483
|
-
#
|
484
|
-
#
|
508
|
+
# * Lines and words of text. A LINE `Block` object contains one or more
|
509
|
+
# WORD `Block` objects. All lines and words that are detected in the
|
510
|
+
# document are returned (including text that doesn't have a
|
511
|
+
# relationship with the value of the `StartDocumentAnalysis`
|
512
|
+
# `FeatureTypes` input parameter).
|
485
513
|
#
|
486
|
-
#
|
487
|
-
#
|
514
|
+
# Selection elements such as check boxes and option buttons (radio
|
515
|
+
# buttons) can be detected in form data and in tables. A
|
516
|
+
# SELECTION\_ELEMENT `Block` object contains information about a
|
517
|
+
# selection element, including the selection status.
|
488
518
|
#
|
489
|
-
# Use the `MaxResults` parameter to limit the number of blocks
|
490
|
-
# If there are more results than specified in `MaxResults`,
|
491
|
-
# `NextToken` in the operation response contains a
|
492
|
-
# getting the next set of results. To get the next
|
493
|
-
# `GetDocumentAnalysis`, and populate the
|
494
|
-
# with the token value that's returned
|
495
|
-
# `GetDocumentAnalysis`.
|
519
|
+
# Use the `MaxResults` parameter to limit the number of blocks that are
|
520
|
+
# returned. If there are more results than specified in `MaxResults`,
|
521
|
+
# the value of `NextToken` in the operation response contains a
|
522
|
+
# pagination token for getting the next set of results. To get the next
|
523
|
+
# page of results, call `GetDocumentAnalysis`, and populate the
|
524
|
+
# `NextToken` request parameter with the token value that's returned
|
525
|
+
# from the previous call to `GetDocumentAnalysis`.
|
496
526
|
#
|
497
527
|
# For more information, see [Document Text Analysis][1].
|
498
528
|
#
|
@@ -502,7 +532,8 @@ module Aws::Textract
|
|
502
532
|
#
|
503
533
|
# @option params [required, String] :job_id
|
504
534
|
# A unique identifier for the text-detection job. The `JobId` is
|
505
|
-
# returned from `StartDocumentAnalysis`.
|
535
|
+
# returned from `StartDocumentAnalysis`. A `JobId` value is only valid
|
536
|
+
# for 7 days.
|
506
537
|
#
|
507
538
|
# @option params [Integer] :max_results
|
508
539
|
# The maximum number of results to return per paginated call. The
|
@@ -524,6 +555,7 @@ module Aws::Textract
|
|
524
555
|
# * {Types::GetDocumentAnalysisResponse#blocks #blocks} => Array<Types::Block>
|
525
556
|
# * {Types::GetDocumentAnalysisResponse#warnings #warnings} => Array<Types::Warning>
|
526
557
|
# * {Types::GetDocumentAnalysisResponse#status_message #status_message} => String
|
558
|
+
# * {Types::GetDocumentAnalysisResponse#analyze_document_model_version #analyze_document_model_version} => String
|
527
559
|
#
|
528
560
|
# @example Request syntax with placeholder values
|
529
561
|
#
|
@@ -567,6 +599,7 @@ module Aws::Textract
|
|
567
599
|
# resp.warnings[0].pages #=> Array
|
568
600
|
# resp.warnings[0].pages[0] #=> Integer
|
569
601
|
# resp.status_message #=> String
|
602
|
+
# resp.analyze_document_model_version #=> String
|
570
603
|
#
|
571
604
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/GetDocumentAnalysis AWS API Documentation
|
572
605
|
#
|
@@ -616,7 +649,8 @@ module Aws::Textract
|
|
616
649
|
#
|
617
650
|
# @option params [required, String] :job_id
|
618
651
|
# A unique identifier for the text detection job. The `JobId` is
|
619
|
-
# returned from `StartDocumentTextDetection`.
|
652
|
+
# returned from `StartDocumentTextDetection`. A `JobId` value is only
|
653
|
+
# valid for 7 days.
|
620
654
|
#
|
621
655
|
# @option params [Integer] :max_results
|
622
656
|
# The maximum number of results to return per paginated call. The
|
@@ -638,6 +672,7 @@ module Aws::Textract
|
|
638
672
|
# * {Types::GetDocumentTextDetectionResponse#blocks #blocks} => Array<Types::Block>
|
639
673
|
# * {Types::GetDocumentTextDetectionResponse#warnings #warnings} => Array<Types::Warning>
|
640
674
|
# * {Types::GetDocumentTextDetectionResponse#status_message #status_message} => String
|
675
|
+
# * {Types::GetDocumentTextDetectionResponse#detect_document_text_model_version #detect_document_text_model_version} => String
|
641
676
|
#
|
642
677
|
# @example Request syntax with placeholder values
|
643
678
|
#
|
@@ -681,6 +716,7 @@ module Aws::Textract
|
|
681
716
|
# resp.warnings[0].pages #=> Array
|
682
717
|
# resp.warnings[0].pages[0] #=> Integer
|
683
718
|
# resp.status_message #=> String
|
719
|
+
# resp.detect_document_text_model_version #=> String
|
684
720
|
#
|
685
721
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/GetDocumentTextDetection AWS API Documentation
|
686
722
|
#
|
@@ -691,14 +727,14 @@ module Aws::Textract
|
|
691
727
|
req.send_request(options)
|
692
728
|
end
|
693
729
|
|
694
|
-
# Starts asynchronous analysis of an input document for
|
695
|
-
# between detected items such as key
|
696
|
-
# selection elements.
|
730
|
+
# Starts the asynchronous analysis of an input document for
|
731
|
+
# relationships between detected items such as key-value pairs, tables,
|
732
|
+
# and selection elements.
|
697
733
|
#
|
698
|
-
# `StartDocumentAnalysis` can analyze text in documents that are in
|
699
|
-
# PNG, and PDF format. The documents are stored in an Amazon S3
|
700
|
-
# Use DocumentLocation to specify the bucket name and file name
|
701
|
-
# document.
|
734
|
+
# `StartDocumentAnalysis` can analyze text in documents that are in
|
735
|
+
# JPEG, PNG, and PDF format. The documents are stored in an Amazon S3
|
736
|
+
# bucket. Use DocumentLocation to specify the bucket name and file name
|
737
|
+
# of the document.
|
702
738
|
#
|
703
739
|
# `StartDocumentAnalysis` returns a job identifier (`JobId`) that you
|
704
740
|
# use to get the results of the operation. When text analysis is
|
@@ -722,22 +758,27 @@ module Aws::Textract
|
|
722
758
|
# @option params [required, Array<String>] :feature_types
|
723
759
|
# A list of the types of analysis to perform. Add TABLES to the list to
|
724
760
|
# return information about the tables that are detected in the input
|
725
|
-
# document. Add FORMS to return detected
|
726
|
-
#
|
727
|
-
#
|
728
|
-
#
|
761
|
+
# document. Add FORMS to return detected form data. To perform both
|
762
|
+
# types of analysis, add TABLES and FORMS to `FeatureTypes`. All lines
|
763
|
+
# and words detected in the document are included in the response
|
764
|
+
# (including text that isn't related to the value of `FeatureTypes`).
|
729
765
|
#
|
730
766
|
# @option params [String] :client_request_token
|
731
767
|
# The idempotent token that you use to identify the start request. If
|
732
768
|
# you use the same token with multiple `StartDocumentAnalysis` requests,
|
733
769
|
# the same `JobId` is returned. Use `ClientRequestToken` to prevent the
|
734
|
-
# same job from being accidentally started more than once.
|
770
|
+
# same job from being accidentally started more than once. For more
|
771
|
+
# information, see [Calling Amazon Textract Asynchronous Operations][1].
|
772
|
+
#
|
773
|
+
#
|
774
|
+
#
|
775
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/api-async.html
|
735
776
|
#
|
736
777
|
# @option params [String] :job_tag
|
737
|
-
# An identifier you specify that's included in the completion
|
738
|
-
# notification
|
739
|
-
#
|
740
|
-
#
|
778
|
+
# An identifier that you specify that's included in the completion
|
779
|
+
# notification published to the Amazon SNS topic. For example, you can
|
780
|
+
# use `JobTag` to identify the type of document that the completion
|
781
|
+
# notification corresponds to (such as a tax form or a receipt).
|
741
782
|
#
|
742
783
|
# @option params [Types::NotificationChannel] :notification_channel
|
743
784
|
# The Amazon SNS topic ARN that you want Amazon Textract to publish the
|
@@ -784,7 +825,7 @@ module Aws::Textract
|
|
784
825
|
# text.
|
785
826
|
#
|
786
827
|
# `StartDocumentTextDetection` can analyze text in documents that are in
|
787
|
-
#
|
828
|
+
# JPEG, PNG, and PDF format. The documents are stored in an Amazon S3
|
788
829
|
# bucket. Use DocumentLocation to specify the bucket name and file name
|
789
830
|
# of the document.
|
790
831
|
#
|
@@ -812,12 +853,18 @@ module Aws::Textract
|
|
812
853
|
# you use the same token with multiple `StartDocumentTextDetection`
|
813
854
|
# requests, the same `JobId` is returned. Use `ClientRequestToken` to
|
814
855
|
# prevent the same job from being accidentally started more than once.
|
856
|
+
# For more information, see [Calling Amazon Textract Asynchronous
|
857
|
+
# Operations][1].
|
858
|
+
#
|
859
|
+
#
|
860
|
+
#
|
861
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/api-async.html
|
815
862
|
#
|
816
863
|
# @option params [String] :job_tag
|
817
|
-
# An identifier you specify that's included in the completion
|
818
|
-
# notification
|
819
|
-
#
|
820
|
-
#
|
864
|
+
# An identifier that you specify that's included in the completion
|
865
|
+
# notification published to the Amazon SNS topic. For example, you can
|
866
|
+
# use `JobTag` to identify the type of document that the completion
|
867
|
+
# notification corresponds to (such as a tax form or a receipt).
|
821
868
|
#
|
822
869
|
# @option params [Types::NotificationChannel] :notification_channel
|
823
870
|
# The Amazon SNS topic ARN that you want Amazon Textract to publish the
|
@@ -871,7 +918,7 @@ module Aws::Textract
|
|
871
918
|
params: params,
|
872
919
|
config: config)
|
873
920
|
context[:gem_name] = 'aws-sdk-textract'
|
874
|
-
context[:gem_version] = '1.
|
921
|
+
context[:gem_version] = '1.12.0'
|
875
922
|
Seahorse::Client::Request.new(handlers, context)
|
876
923
|
end
|
877
924
|
|
@@ -20,6 +20,8 @@ module Aws::Textract
|
|
20
20
|
BlockType = Shapes::StringShape.new(name: 'BlockType')
|
21
21
|
BoundingBox = Shapes::StructureShape.new(name: 'BoundingBox')
|
22
22
|
ClientRequestToken = Shapes::StringShape.new(name: 'ClientRequestToken')
|
23
|
+
ContentClassifier = Shapes::StringShape.new(name: 'ContentClassifier')
|
24
|
+
ContentClassifiers = Shapes::ListShape.new(name: 'ContentClassifiers')
|
23
25
|
DetectDocumentTextRequest = Shapes::StructureShape.new(name: 'DetectDocumentTextRequest')
|
24
26
|
DetectDocumentTextResponse = Shapes::StructureShape.new(name: 'DetectDocumentTextResponse')
|
25
27
|
Document = Shapes::StructureShape.new(name: 'Document')
|
@@ -32,11 +34,21 @@ module Aws::Textract
|
|
32
34
|
FeatureType = Shapes::StringShape.new(name: 'FeatureType')
|
33
35
|
FeatureTypes = Shapes::ListShape.new(name: 'FeatureTypes')
|
34
36
|
Float = Shapes::FloatShape.new(name: 'Float')
|
37
|
+
FlowDefinitionArn = Shapes::StringShape.new(name: 'FlowDefinitionArn')
|
35
38
|
Geometry = Shapes::StructureShape.new(name: 'Geometry')
|
36
39
|
GetDocumentAnalysisRequest = Shapes::StructureShape.new(name: 'GetDocumentAnalysisRequest')
|
37
40
|
GetDocumentAnalysisResponse = Shapes::StructureShape.new(name: 'GetDocumentAnalysisResponse')
|
38
41
|
GetDocumentTextDetectionRequest = Shapes::StructureShape.new(name: 'GetDocumentTextDetectionRequest')
|
39
42
|
GetDocumentTextDetectionResponse = Shapes::StructureShape.new(name: 'GetDocumentTextDetectionResponse')
|
43
|
+
HumanLoopActivationConditionsEvaluationResults = Shapes::StringShape.new(name: 'HumanLoopActivationConditionsEvaluationResults')
|
44
|
+
HumanLoopActivationOutput = Shapes::StructureShape.new(name: 'HumanLoopActivationOutput')
|
45
|
+
HumanLoopActivationReason = Shapes::StringShape.new(name: 'HumanLoopActivationReason')
|
46
|
+
HumanLoopActivationReasons = Shapes::ListShape.new(name: 'HumanLoopActivationReasons')
|
47
|
+
HumanLoopArn = Shapes::StringShape.new(name: 'HumanLoopArn')
|
48
|
+
HumanLoopConfig = Shapes::StructureShape.new(name: 'HumanLoopConfig')
|
49
|
+
HumanLoopDataAttributes = Shapes::StructureShape.new(name: 'HumanLoopDataAttributes')
|
50
|
+
HumanLoopName = Shapes::StringShape.new(name: 'HumanLoopName')
|
51
|
+
HumanLoopQuotaExceededException = Shapes::StructureShape.new(name: 'HumanLoopQuotaExceededException')
|
40
52
|
IdList = Shapes::ListShape.new(name: 'IdList')
|
41
53
|
IdempotentParameterMismatchException = Shapes::StructureShape.new(name: 'IdempotentParameterMismatchException')
|
42
54
|
ImageBlob = Shapes::BlobShape.new(name: 'ImageBlob')
|
@@ -81,10 +93,13 @@ module Aws::Textract
|
|
81
93
|
|
82
94
|
AnalyzeDocumentRequest.add_member(:document, Shapes::ShapeRef.new(shape: Document, required: true, location_name: "Document"))
|
83
95
|
AnalyzeDocumentRequest.add_member(:feature_types, Shapes::ShapeRef.new(shape: FeatureTypes, required: true, location_name: "FeatureTypes"))
|
96
|
+
AnalyzeDocumentRequest.add_member(:human_loop_config, Shapes::ShapeRef.new(shape: HumanLoopConfig, location_name: "HumanLoopConfig"))
|
84
97
|
AnalyzeDocumentRequest.struct_class = Types::AnalyzeDocumentRequest
|
85
98
|
|
86
99
|
AnalyzeDocumentResponse.add_member(:document_metadata, Shapes::ShapeRef.new(shape: DocumentMetadata, location_name: "DocumentMetadata"))
|
87
100
|
AnalyzeDocumentResponse.add_member(:blocks, Shapes::ShapeRef.new(shape: BlockList, location_name: "Blocks"))
|
101
|
+
AnalyzeDocumentResponse.add_member(:human_loop_activation_output, Shapes::ShapeRef.new(shape: HumanLoopActivationOutput, location_name: "HumanLoopActivationOutput"))
|
102
|
+
AnalyzeDocumentResponse.add_member(:analyze_document_model_version, Shapes::ShapeRef.new(shape: String, location_name: "AnalyzeDocumentModelVersion"))
|
88
103
|
AnalyzeDocumentResponse.struct_class = Types::AnalyzeDocumentResponse
|
89
104
|
|
90
105
|
Block.add_member(:block_type, Shapes::ShapeRef.new(shape: BlockType, location_name: "BlockType"))
|
@@ -110,11 +125,14 @@ module Aws::Textract
|
|
110
125
|
BoundingBox.add_member(:top, Shapes::ShapeRef.new(shape: Float, location_name: "Top"))
|
111
126
|
BoundingBox.struct_class = Types::BoundingBox
|
112
127
|
|
128
|
+
ContentClassifiers.member = Shapes::ShapeRef.new(shape: ContentClassifier)
|
129
|
+
|
113
130
|
DetectDocumentTextRequest.add_member(:document, Shapes::ShapeRef.new(shape: Document, required: true, location_name: "Document"))
|
114
131
|
DetectDocumentTextRequest.struct_class = Types::DetectDocumentTextRequest
|
115
132
|
|
116
133
|
DetectDocumentTextResponse.add_member(:document_metadata, Shapes::ShapeRef.new(shape: DocumentMetadata, location_name: "DocumentMetadata"))
|
117
134
|
DetectDocumentTextResponse.add_member(:blocks, Shapes::ShapeRef.new(shape: BlockList, location_name: "Blocks"))
|
135
|
+
DetectDocumentTextResponse.add_member(:detect_document_text_model_version, Shapes::ShapeRef.new(shape: String, location_name: "DetectDocumentTextModelVersion"))
|
118
136
|
DetectDocumentTextResponse.struct_class = Types::DetectDocumentTextResponse
|
119
137
|
|
120
138
|
Document.add_member(:bytes, Shapes::ShapeRef.new(shape: ImageBlob, location_name: "Bytes"))
|
@@ -146,6 +164,7 @@ module Aws::Textract
|
|
146
164
|
GetDocumentAnalysisResponse.add_member(:blocks, Shapes::ShapeRef.new(shape: BlockList, location_name: "Blocks"))
|
147
165
|
GetDocumentAnalysisResponse.add_member(:warnings, Shapes::ShapeRef.new(shape: Warnings, location_name: "Warnings"))
|
148
166
|
GetDocumentAnalysisResponse.add_member(:status_message, Shapes::ShapeRef.new(shape: StatusMessage, location_name: "StatusMessage"))
|
167
|
+
GetDocumentAnalysisResponse.add_member(:analyze_document_model_version, Shapes::ShapeRef.new(shape: String, location_name: "AnalyzeDocumentModelVersion"))
|
149
168
|
GetDocumentAnalysisResponse.struct_class = Types::GetDocumentAnalysisResponse
|
150
169
|
|
151
170
|
GetDocumentTextDetectionRequest.add_member(:job_id, Shapes::ShapeRef.new(shape: JobId, required: true, location_name: "JobId"))
|
@@ -159,8 +178,29 @@ module Aws::Textract
|
|
159
178
|
GetDocumentTextDetectionResponse.add_member(:blocks, Shapes::ShapeRef.new(shape: BlockList, location_name: "Blocks"))
|
160
179
|
GetDocumentTextDetectionResponse.add_member(:warnings, Shapes::ShapeRef.new(shape: Warnings, location_name: "Warnings"))
|
161
180
|
GetDocumentTextDetectionResponse.add_member(:status_message, Shapes::ShapeRef.new(shape: StatusMessage, location_name: "StatusMessage"))
|
181
|
+
GetDocumentTextDetectionResponse.add_member(:detect_document_text_model_version, Shapes::ShapeRef.new(shape: String, location_name: "DetectDocumentTextModelVersion"))
|
162
182
|
GetDocumentTextDetectionResponse.struct_class = Types::GetDocumentTextDetectionResponse
|
163
183
|
|
184
|
+
HumanLoopActivationOutput.add_member(:human_loop_arn, Shapes::ShapeRef.new(shape: HumanLoopArn, location_name: "HumanLoopArn"))
|
185
|
+
HumanLoopActivationOutput.add_member(:human_loop_activation_reasons, Shapes::ShapeRef.new(shape: HumanLoopActivationReasons, location_name: "HumanLoopActivationReasons"))
|
186
|
+
HumanLoopActivationOutput.add_member(:human_loop_activation_conditions_evaluation_results, Shapes::ShapeRef.new(shape: HumanLoopActivationConditionsEvaluationResults, location_name: "HumanLoopActivationConditionsEvaluationResults", metadata: {"jsonvalue"=>true}))
|
187
|
+
HumanLoopActivationOutput.struct_class = Types::HumanLoopActivationOutput
|
188
|
+
|
189
|
+
HumanLoopActivationReasons.member = Shapes::ShapeRef.new(shape: HumanLoopActivationReason)
|
190
|
+
|
191
|
+
HumanLoopConfig.add_member(:human_loop_name, Shapes::ShapeRef.new(shape: HumanLoopName, required: true, location_name: "HumanLoopName"))
|
192
|
+
HumanLoopConfig.add_member(:flow_definition_arn, Shapes::ShapeRef.new(shape: FlowDefinitionArn, required: true, location_name: "FlowDefinitionArn"))
|
193
|
+
HumanLoopConfig.add_member(:data_attributes, Shapes::ShapeRef.new(shape: HumanLoopDataAttributes, location_name: "DataAttributes"))
|
194
|
+
HumanLoopConfig.struct_class = Types::HumanLoopConfig
|
195
|
+
|
196
|
+
HumanLoopDataAttributes.add_member(:content_classifiers, Shapes::ShapeRef.new(shape: ContentClassifiers, location_name: "ContentClassifiers"))
|
197
|
+
HumanLoopDataAttributes.struct_class = Types::HumanLoopDataAttributes
|
198
|
+
|
199
|
+
HumanLoopQuotaExceededException.add_member(:resource_type, Shapes::ShapeRef.new(shape: String, location_name: "ResourceType"))
|
200
|
+
HumanLoopQuotaExceededException.add_member(:quota_code, Shapes::ShapeRef.new(shape: String, location_name: "QuotaCode"))
|
201
|
+
HumanLoopQuotaExceededException.add_member(:service_code, Shapes::ShapeRef.new(shape: String, location_name: "ServiceCode"))
|
202
|
+
HumanLoopQuotaExceededException.struct_class = Types::HumanLoopQuotaExceededException
|
203
|
+
|
164
204
|
IdList.member = Shapes::ShapeRef.new(shape: NonEmptyString)
|
165
205
|
|
166
206
|
NotificationChannel.add_member(:sns_topic_arn, Shapes::ShapeRef.new(shape: SNSTopicArn, required: true, location_name: "SNSTopicArn"))
|
@@ -244,6 +284,7 @@ module Aws::Textract
|
|
244
284
|
o.errors << Shapes::ShapeRef.new(shape: ProvisionedThroughputExceededException)
|
245
285
|
o.errors << Shapes::ShapeRef.new(shape: InternalServerError)
|
246
286
|
o.errors << Shapes::ShapeRef.new(shape: ThrottlingException)
|
287
|
+
o.errors << Shapes::ShapeRef.new(shape: HumanLoopQuotaExceededException)
|
247
288
|
end)
|
248
289
|
|
249
290
|
api.add_operation(:detect_document_text, Seahorse::Model::Operation.new.tap do |o|
|
@@ -10,5 +10,31 @@ module Aws::Textract
|
|
10
10
|
|
11
11
|
extend Aws::Errors::DynamicErrors
|
12
12
|
|
13
|
+
class HumanLoopQuotaExceededException < ServiceError
|
14
|
+
|
15
|
+
# @param [Seahorse::Client::RequestContext] context
|
16
|
+
# @param [String] message
|
17
|
+
# @param [Aws::Textract::Types::HumanLoopQuotaExceededException] data
|
18
|
+
def initialize(context, message, data = Aws::EmptyStructure.new)
|
19
|
+
super(context, message, data)
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [String]
|
23
|
+
def resource_type
|
24
|
+
@data[:resource_type]
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [String]
|
28
|
+
def quota_code
|
29
|
+
@data[:quota_code]
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [String]
|
33
|
+
def service_code
|
34
|
+
@data[:service_code]
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
13
39
|
end
|
14
40
|
end
|
@@ -21,31 +21,47 @@ module Aws::Textract
|
|
21
21
|
# },
|
22
22
|
# },
|
23
23
|
# feature_types: ["TABLES"], # required, accepts TABLES, FORMS
|
24
|
+
# human_loop_config: {
|
25
|
+
# human_loop_name: "HumanLoopName", # required
|
26
|
+
# flow_definition_arn: "FlowDefinitionArn", # required
|
27
|
+
# data_attributes: {
|
28
|
+
# content_classifiers: ["FreeOfPersonallyIdentifiableInformation"], # accepts FreeOfPersonallyIdentifiableInformation, FreeOfAdultContent
|
29
|
+
# },
|
30
|
+
# },
|
24
31
|
# }
|
25
32
|
#
|
26
33
|
# @!attribute [rw] document
|
27
34
|
# The input document as base64-encoded bytes or an Amazon S3 object.
|
28
35
|
# If you use the AWS CLI to call Amazon Textract operations, you
|
29
|
-
# can't pass image bytes. The document must be an image in
|
30
|
-
# format.
|
36
|
+
# can't pass image bytes. The document must be an image in JPEG or
|
37
|
+
# PNG format.
|
31
38
|
#
|
32
|
-
# If you
|
33
|
-
# need to base64-encode image bytes passed using the `Bytes`
|
39
|
+
# If you're using an AWS SDK to call Amazon Textract, you might not
|
40
|
+
# need to base64-encode image bytes that are passed using the `Bytes`
|
41
|
+
# field.
|
34
42
|
# @return [Types::Document]
|
35
43
|
#
|
36
44
|
# @!attribute [rw] feature_types
|
37
45
|
# A list of the types of analysis to perform. Add TABLES to the list
|
38
|
-
# to return information about the tables detected in the
|
39
|
-
# document. Add FORMS to return detected
|
40
|
-
#
|
41
|
-
#
|
46
|
+
# to return information about the tables that are detected in the
|
47
|
+
# input document. Add FORMS to return detected form data. To perform
|
48
|
+
# both types of analysis, add TABLES and FORMS to `FeatureTypes`. All
|
49
|
+
# lines and words detected in the document are included in the
|
50
|
+
# response (including text that isn't related to the value of
|
51
|
+
# `FeatureTypes`).
|
42
52
|
# @return [Array<String>]
|
43
53
|
#
|
54
|
+
# @!attribute [rw] human_loop_config
|
55
|
+
# Sets the configuration for the human in the loop workflow for
|
56
|
+
# analyzing documents.
|
57
|
+
# @return [Types::HumanLoopConfig]
|
58
|
+
#
|
44
59
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/AnalyzeDocumentRequest AWS API Documentation
|
45
60
|
#
|
46
61
|
class AnalyzeDocumentRequest < Struct.new(
|
47
62
|
:document,
|
48
|
-
:feature_types
|
63
|
+
:feature_types,
|
64
|
+
:human_loop_config)
|
49
65
|
include Aws::Structure
|
50
66
|
end
|
51
67
|
|
@@ -55,24 +71,34 @@ module Aws::Textract
|
|
55
71
|
# @return [Types::DocumentMetadata]
|
56
72
|
#
|
57
73
|
# @!attribute [rw] blocks
|
58
|
-
# The
|
74
|
+
# The items that are detected and analyzed by `AnalyzeDocument`.
|
59
75
|
# @return [Array<Types::Block>]
|
60
76
|
#
|
77
|
+
# @!attribute [rw] human_loop_activation_output
|
78
|
+
# Shows the results of the human in the loop evaluation.
|
79
|
+
# @return [Types::HumanLoopActivationOutput]
|
80
|
+
#
|
81
|
+
# @!attribute [rw] analyze_document_model_version
|
82
|
+
# The version of the model used to analyze the document.
|
83
|
+
# @return [String]
|
84
|
+
#
|
61
85
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/AnalyzeDocumentResponse AWS API Documentation
|
62
86
|
#
|
63
87
|
class AnalyzeDocumentResponse < Struct.new(
|
64
88
|
:document_metadata,
|
65
|
-
:blocks
|
89
|
+
:blocks,
|
90
|
+
:human_loop_activation_output,
|
91
|
+
:analyze_document_model_version)
|
66
92
|
include Aws::Structure
|
67
93
|
end
|
68
94
|
|
69
95
|
# A `Block` represents items that are recognized in a document within a
|
70
96
|
# group of pixels close to each other. The information returned in a
|
71
|
-
# `Block` depends on the type of operation. In
|
72
|
-
# (for example DetectDocumentText), you get information about
|
73
|
-
# detected words and lines of text. In text analysis (for example
|
97
|
+
# `Block` object depends on the type of operation. In text detection for
|
98
|
+
# documents (for example DetectDocumentText), you get information about
|
99
|
+
# the detected words and lines of text. In text analysis (for example
|
74
100
|
# AnalyzeDocument), you can also get information about the fields,
|
75
|
-
# tables and selection elements that are detected in the document.
|
101
|
+
# tables, and selection elements that are detected in the document.
|
76
102
|
#
|
77
103
|
# An array of `Block` objects is returned by both synchronous and
|
78
104
|
# asynchronous operations. In synchronous operations, such as
|
@@ -87,51 +113,51 @@ module Aws::Textract
|
|
87
113
|
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works.html
|
88
114
|
#
|
89
115
|
# @!attribute [rw] block_type
|
90
|
-
# The type of text that's recognized
|
91
|
-
#
|
116
|
+
# The type of text item that's recognized. In operations for text
|
117
|
+
# detection, the following types are returned:
|
92
118
|
#
|
93
|
-
# * *PAGE* - Contains a list of the LINE Block objects that are
|
119
|
+
# * *PAGE* - Contains a list of the LINE `Block` objects that are
|
94
120
|
# detected on a document page.
|
95
121
|
#
|
96
122
|
# * *WORD* - A word detected on a document page. A word is one or more
|
97
123
|
# ISO basic Latin script characters that aren't separated by
|
98
124
|
# spaces.
|
99
125
|
#
|
100
|
-
# * *LINE* - A string of tab-delimited, contiguous words that
|
126
|
+
# * *LINE* - A string of tab-delimited, contiguous words that are
|
101
127
|
# detected on a document page.
|
102
128
|
#
|
103
129
|
# In text analysis operations, the following types are returned:
|
104
130
|
#
|
105
|
-
# * *PAGE* - Contains a list of child Block objects that are
|
106
|
-
# on a document page.
|
131
|
+
# * *PAGE* - Contains a list of child `Block` objects that are
|
132
|
+
# detected on a document page.
|
107
133
|
#
|
108
|
-
# * *KEY\_VALUE\_SET* - Stores the KEY and VALUE Block objects for
|
109
|
-
#
|
110
|
-
# field to determine if a KEY\_VALUE\_SET object is a
|
111
|
-
# object or a VALUE Block object.
|
134
|
+
# * *KEY\_VALUE\_SET* - Stores the KEY and VALUE `Block` objects for
|
135
|
+
# linked text that's detected on a document page. Use the
|
136
|
+
# `EntityType` field to determine if a KEY\_VALUE\_SET object is a
|
137
|
+
# KEY `Block` object or a VALUE `Block` object.
|
112
138
|
#
|
113
|
-
# * *WORD* - A word detected on a document page. A word is one
|
114
|
-
# ISO basic Latin script characters that aren't separated
|
115
|
-
#
|
139
|
+
# * *WORD* - A word that's detected on a document page. A word is one
|
140
|
+
# or more ISO basic Latin script characters that aren't separated
|
141
|
+
# by spaces.
|
116
142
|
#
|
117
|
-
# * *LINE* - A string of tab-delimited, contiguous words that
|
143
|
+
# * *LINE* - A string of tab-delimited, contiguous words that are
|
118
144
|
# detected on a document page.
|
119
145
|
#
|
120
146
|
# * *TABLE* - A table that's detected on a document page. A table is
|
121
|
-
#
|
122
|
-
# cell span of
|
147
|
+
# grid-based information with two or more rows or columns, with a
|
148
|
+
# cell span of one row and one column each.
|
123
149
|
#
|
124
150
|
# * *CELL* - A cell within a detected table. The cell is the parent of
|
125
151
|
# the block that contains the text in the cell.
|
126
152
|
#
|
127
|
-
# * *SELECTION\_ELEMENT* - A
|
128
|
-
# or
|
129
|
-
# `SelectionStatus` to determine the
|
130
|
-
# element.
|
153
|
+
# * *SELECTION\_ELEMENT* - A selection element such as an option
|
154
|
+
# button (radio button) or a check box that's detected on a
|
155
|
+
# document page. Use the value of `SelectionStatus` to determine the
|
156
|
+
# status of the selection element.
|
131
157
|
# @return [String]
|
132
158
|
#
|
133
159
|
# @!attribute [rw] confidence
|
134
|
-
# The confidence that Amazon Textract has in the accuracy of the
|
160
|
+
# The confidence score that Amazon Textract has in the accuracy of the
|
135
161
|
# recognized text and the accuracy of the geometry points around the
|
136
162
|
# recognized text.
|
137
163
|
# @return [Float]
|
@@ -153,13 +179,17 @@ module Aws::Textract
|
|
153
179
|
# @return [Integer]
|
154
180
|
#
|
155
181
|
# @!attribute [rw] row_span
|
156
|
-
# The number of rows that a table spans.
|
157
|
-
#
|
182
|
+
# The number of rows that a table cell spans. Currently this value is
|
183
|
+
# always 1, even if the number of rows spanned is greater than 1.
|
184
|
+
# `RowSpan` isn't returned by `DetectDocumentText` and
|
185
|
+
# `GetDocumentTextDetection`.
|
158
186
|
# @return [Integer]
|
159
187
|
#
|
160
188
|
# @!attribute [rw] column_span
|
161
|
-
# The number of columns that a table cell spans.
|
162
|
-
#
|
189
|
+
# The number of columns that a table cell spans. Currently this value
|
190
|
+
# is always 1, even if the number of columns spanned is greater than
|
191
|
+
# 1. `ColumnSpan` isn't returned by `DetectDocumentText` and
|
192
|
+
# `GetDocumentTextDetection`.
|
163
193
|
# @return [Integer]
|
164
194
|
#
|
165
195
|
# @!attribute [rw] geometry
|
@@ -174,7 +204,7 @@ module Aws::Textract
|
|
174
204
|
# @return [String]
|
175
205
|
#
|
176
206
|
# @!attribute [rw] relationships
|
177
|
-
# A list of child blocks of the current block. For example a LINE
|
207
|
+
# A list of child blocks of the current block. For example, a LINE
|
178
208
|
# object has child blocks for each WORD block that's part of the line
|
179
209
|
# of text. There aren't Relationship objects in the list for
|
180
210
|
# relationships that don't exist, such as when the current block has
|
@@ -197,18 +227,18 @@ module Aws::Textract
|
|
197
227
|
# @return [Array<String>]
|
198
228
|
#
|
199
229
|
# @!attribute [rw] selection_status
|
200
|
-
# The selection status of a
|
201
|
-
# or
|
230
|
+
# The selection status of a selection element, such as an option
|
231
|
+
# button or check box.
|
202
232
|
# @return [String]
|
203
233
|
#
|
204
234
|
# @!attribute [rw] page
|
205
|
-
# The page
|
235
|
+
# The page on which a block was detected. `Page` is returned by
|
206
236
|
# asynchronous operations. Page values greater than 1 are only
|
207
|
-
# returned for
|
208
|
-
# image (
|
209
|
-
#
|
210
|
-
#
|
211
|
-
#
|
237
|
+
# returned for multipage documents that are in PDF format. A scanned
|
238
|
+
# image (JPEG/PNG), even if it contains multiple document pages, is
|
239
|
+
# considered to be a single-page document. The value of `Page` is
|
240
|
+
# always 1. Synchronous operations don't return `Page` because every
|
241
|
+
# input document is considered to be a single-page document.
|
212
242
|
# @return [Integer]
|
213
243
|
#
|
214
244
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/Block AWS API Documentation
|
@@ -230,11 +260,11 @@ module Aws::Textract
|
|
230
260
|
include Aws::Structure
|
231
261
|
end
|
232
262
|
|
233
|
-
# The bounding box around the
|
234
|
-
# table cell on a document page. The `left`
|
235
|
-
# (y-coordinate) are coordinates that represent
|
236
|
-
# of the bounding box. Note that the upper-left
|
237
|
-
# the origin (0,0).
|
263
|
+
# The bounding box around the detected page, text, key-value pair,
|
264
|
+
# table, table cell, or selection element on a document page. The `left`
|
265
|
+
# (x-coordinate) and `top` (y-coordinate) are coordinates that represent
|
266
|
+
# the top and left sides of the bounding box. Note that the upper-left
|
267
|
+
# corner of the image is the origin (0,0).
|
238
268
|
#
|
239
269
|
# The `top` and `left` values returned are ratios of the overall
|
240
270
|
# document page size. For example, if the input image is 700 x 200
|
@@ -294,11 +324,12 @@ module Aws::Textract
|
|
294
324
|
# @!attribute [rw] document
|
295
325
|
# The input document as base64-encoded bytes or an Amazon S3 object.
|
296
326
|
# If you use the AWS CLI to call Amazon Textract operations, you
|
297
|
-
# can't pass image bytes. The document must be an image in
|
298
|
-
# format.
|
327
|
+
# can't pass image bytes. The document must be an image in JPEG or
|
328
|
+
# PNG format.
|
299
329
|
#
|
300
|
-
# If you
|
301
|
-
# need to base64-encode image bytes passed using the `Bytes`
|
330
|
+
# If you're using an AWS SDK to call Amazon Textract, you might not
|
331
|
+
# need to base64-encode image bytes that are passed using the `Bytes`
|
332
|
+
# field.
|
302
333
|
# @return [Types::Document]
|
303
334
|
#
|
304
335
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentTextRequest AWS API Documentation
|
@@ -309,20 +340,24 @@ module Aws::Textract
|
|
309
340
|
end
|
310
341
|
|
311
342
|
# @!attribute [rw] document_metadata
|
312
|
-
# Metadata about the document.
|
313
|
-
# detected in the document.
|
343
|
+
# Metadata about the document. It contains the number of pages that
|
344
|
+
# are detected in the document.
|
314
345
|
# @return [Types::DocumentMetadata]
|
315
346
|
#
|
316
347
|
# @!attribute [rw] blocks
|
317
|
-
# An array of Block objects
|
318
|
-
# document.
|
348
|
+
# An array of `Block` objects that contain the text that's detected
|
349
|
+
# in the document.
|
319
350
|
# @return [Array<Types::Block>]
|
320
351
|
#
|
352
|
+
# @!attribute [rw] detect_document_text_model_version
|
353
|
+
# @return [String]
|
354
|
+
#
|
321
355
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentTextResponse AWS API Documentation
|
322
356
|
#
|
323
357
|
class DetectDocumentTextResponse < Struct.new(
|
324
358
|
:document_metadata,
|
325
|
-
:blocks
|
359
|
+
:blocks,
|
360
|
+
:detect_document_text_model_version)
|
326
361
|
include Aws::Structure
|
327
362
|
end
|
328
363
|
|
@@ -363,17 +398,17 @@ module Aws::Textract
|
|
363
398
|
# }
|
364
399
|
#
|
365
400
|
# @!attribute [rw] bytes
|
366
|
-
# A blob of
|
401
|
+
# A blob of base64-encoded document bytes. The maximum size of a
|
367
402
|
# document that's provided in a blob of bytes is 5 MB. The document
|
368
|
-
# bytes must be in PNG or
|
403
|
+
# bytes must be in PNG or JPEG format.
|
369
404
|
#
|
370
|
-
# If you
|
405
|
+
# If you're using an AWS SDK to call Amazon Textract, you might not
|
371
406
|
# need to base64-encode image bytes passed using the `Bytes` field.
|
372
407
|
# @return [String]
|
373
408
|
#
|
374
409
|
# @!attribute [rw] s3_object
|
375
410
|
# Identifies an S3 object as the document source. The maximum size of
|
376
|
-
# a document stored in an S3 bucket is 5 MB.
|
411
|
+
# a document that's stored in an S3 bucket is 5 MB.
|
377
412
|
# @return [Types::S3Object]
|
378
413
|
#
|
379
414
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/Document AWS API Documentation
|
@@ -387,7 +422,7 @@ module Aws::Textract
|
|
387
422
|
# The Amazon S3 bucket that contains the document to be processed. It's
|
388
423
|
# used by asynchronous operations such as StartDocumentTextDetection.
|
389
424
|
#
|
390
|
-
# The input document can be an image file in
|
425
|
+
# The input document can be an image file in JPEG or PNG format. It can
|
391
426
|
# also be a file in PDF format.
|
392
427
|
#
|
393
428
|
# @note When making an API call, you may pass DocumentLocation
|
@@ -415,7 +450,7 @@ module Aws::Textract
|
|
415
450
|
# Information about the input document.
|
416
451
|
#
|
417
452
|
# @!attribute [rw] pages
|
418
|
-
# The number of pages detected in the document.
|
453
|
+
# The number of pages that are detected in the document.
|
419
454
|
# @return [Integer]
|
420
455
|
#
|
421
456
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DocumentMetadata AWS API Documentation
|
@@ -425,17 +460,18 @@ module Aws::Textract
|
|
425
460
|
include Aws::Structure
|
426
461
|
end
|
427
462
|
|
428
|
-
# Information about where
|
429
|
-
#
|
463
|
+
# Information about where the following items are located on a document
|
464
|
+
# page: detected page, text, key-value pairs, tables, table cells, and
|
465
|
+
# selection elements.
|
430
466
|
#
|
431
467
|
# @!attribute [rw] bounding_box
|
432
468
|
# An axis-aligned coarse representation of the location of the
|
433
|
-
# recognized
|
469
|
+
# recognized item on the document page.
|
434
470
|
# @return [Types::BoundingBox]
|
435
471
|
#
|
436
472
|
# @!attribute [rw] polygon
|
437
473
|
# Within the bounding box, a fine-grained polygon around the
|
438
|
-
# recognized
|
474
|
+
# recognized item.
|
439
475
|
# @return [Array<Types::Point>]
|
440
476
|
#
|
441
477
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/Geometry AWS API Documentation
|
@@ -457,7 +493,8 @@ module Aws::Textract
|
|
457
493
|
#
|
458
494
|
# @!attribute [rw] job_id
|
459
495
|
# A unique identifier for the text-detection job. The `JobId` is
|
460
|
-
# returned from `StartDocumentAnalysis`.
|
496
|
+
# returned from `StartDocumentAnalysis`. A `JobId` value is only valid
|
497
|
+
# for 7 days.
|
461
498
|
# @return [String]
|
462
499
|
#
|
463
500
|
# @!attribute [rw] max_results
|
@@ -500,16 +537,19 @@ module Aws::Textract
|
|
500
537
|
# @return [String]
|
501
538
|
#
|
502
539
|
# @!attribute [rw] blocks
|
503
|
-
# The results of the text
|
540
|
+
# The results of the text-analysis operation.
|
504
541
|
# @return [Array<Types::Block>]
|
505
542
|
#
|
506
543
|
# @!attribute [rw] warnings
|
507
|
-
# A list of warnings that occurred during the document
|
544
|
+
# A list of warnings that occurred during the document-analysis
|
508
545
|
# operation.
|
509
546
|
# @return [Array<Types::Warning>]
|
510
547
|
#
|
511
548
|
# @!attribute [rw] status_message
|
512
|
-
# The current status of an asynchronous document
|
549
|
+
# The current status of an asynchronous document-analysis operation.
|
550
|
+
# @return [String]
|
551
|
+
#
|
552
|
+
# @!attribute [rw] analyze_document_model_version
|
513
553
|
# @return [String]
|
514
554
|
#
|
515
555
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/GetDocumentAnalysisResponse AWS API Documentation
|
@@ -520,7 +560,8 @@ module Aws::Textract
|
|
520
560
|
:next_token,
|
521
561
|
:blocks,
|
522
562
|
:warnings,
|
523
|
-
:status_message
|
563
|
+
:status_message,
|
564
|
+
:analyze_document_model_version)
|
524
565
|
include Aws::Structure
|
525
566
|
end
|
526
567
|
|
@@ -535,7 +576,8 @@ module Aws::Textract
|
|
535
576
|
#
|
536
577
|
# @!attribute [rw] job_id
|
537
578
|
# A unique identifier for the text detection job. The `JobId` is
|
538
|
-
# returned from `StartDocumentTextDetection`.
|
579
|
+
# returned from `StartDocumentTextDetection`. A `JobId` value is only
|
580
|
+
# valid for 7 days.
|
539
581
|
# @return [String]
|
540
582
|
#
|
541
583
|
# @!attribute [rw] max_results
|
@@ -582,13 +624,16 @@ module Aws::Textract
|
|
582
624
|
# @return [Array<Types::Block>]
|
583
625
|
#
|
584
626
|
# @!attribute [rw] warnings
|
585
|
-
# A list of warnings that occurred during the
|
586
|
-
#
|
627
|
+
# A list of warnings that occurred during the text-detection operation
|
628
|
+
# for the document.
|
587
629
|
# @return [Array<Types::Warning>]
|
588
630
|
#
|
589
631
|
# @!attribute [rw] status_message
|
590
|
-
# The current status of an asynchronous
|
591
|
-
#
|
632
|
+
# The current status of an asynchronous text-detection operation for
|
633
|
+
# the document.
|
634
|
+
# @return [String]
|
635
|
+
#
|
636
|
+
# @!attribute [rw] detect_document_text_model_version
|
592
637
|
# @return [String]
|
593
638
|
#
|
594
639
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/GetDocumentTextDetectionResponse AWS API Documentation
|
@@ -599,7 +644,114 @@ module Aws::Textract
|
|
599
644
|
:next_token,
|
600
645
|
:blocks,
|
601
646
|
:warnings,
|
602
|
-
:status_message
|
647
|
+
:status_message,
|
648
|
+
:detect_document_text_model_version)
|
649
|
+
include Aws::Structure
|
650
|
+
end
|
651
|
+
|
652
|
+
# Shows the results of the human in the loop evaluation. If there is no
|
653
|
+
# HumanLoopArn, the input did not trigger human review.
|
654
|
+
#
|
655
|
+
# @!attribute [rw] human_loop_arn
|
656
|
+
# The Amazon Resource Name (ARN) of the HumanLoop created.
|
657
|
+
# @return [String]
|
658
|
+
#
|
659
|
+
# @!attribute [rw] human_loop_activation_reasons
|
660
|
+
# Shows if and why human review was needed.
|
661
|
+
# @return [Array<String>]
|
662
|
+
#
|
663
|
+
# @!attribute [rw] human_loop_activation_conditions_evaluation_results
|
664
|
+
# Shows the result of condition evaluations, including those
|
665
|
+
# conditions which activated a human review.
|
666
|
+
# @return [String]
|
667
|
+
#
|
668
|
+
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/HumanLoopActivationOutput AWS API Documentation
|
669
|
+
#
|
670
|
+
class HumanLoopActivationOutput < Struct.new(
|
671
|
+
:human_loop_arn,
|
672
|
+
:human_loop_activation_reasons,
|
673
|
+
:human_loop_activation_conditions_evaluation_results)
|
674
|
+
include Aws::Structure
|
675
|
+
end
|
676
|
+
|
677
|
+
# Sets up the human review workflow the document will be sent to if one
|
678
|
+
# of the conditions is met. You can also set certain attributes of the
|
679
|
+
# image before review.
|
680
|
+
#
|
681
|
+
# @note When making an API call, you may pass HumanLoopConfig
|
682
|
+
# data as a hash:
|
683
|
+
#
|
684
|
+
# {
|
685
|
+
# human_loop_name: "HumanLoopName", # required
|
686
|
+
# flow_definition_arn: "FlowDefinitionArn", # required
|
687
|
+
# data_attributes: {
|
688
|
+
# content_classifiers: ["FreeOfPersonallyIdentifiableInformation"], # accepts FreeOfPersonallyIdentifiableInformation, FreeOfAdultContent
|
689
|
+
# },
|
690
|
+
# }
|
691
|
+
#
|
692
|
+
# @!attribute [rw] human_loop_name
|
693
|
+
# The name of the human workflow used for this image. This should be
|
694
|
+
# kept unique within a region.
|
695
|
+
# @return [String]
|
696
|
+
#
|
697
|
+
# @!attribute [rw] flow_definition_arn
|
698
|
+
# The Amazon Resource Name (ARN) of the flow definition.
|
699
|
+
# @return [String]
|
700
|
+
#
|
701
|
+
# @!attribute [rw] data_attributes
|
702
|
+
# Sets attributes of the input data.
|
703
|
+
# @return [Types::HumanLoopDataAttributes]
|
704
|
+
#
|
705
|
+
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/HumanLoopConfig AWS API Documentation
|
706
|
+
#
|
707
|
+
class HumanLoopConfig < Struct.new(
|
708
|
+
:human_loop_name,
|
709
|
+
:flow_definition_arn,
|
710
|
+
:data_attributes)
|
711
|
+
include Aws::Structure
|
712
|
+
end
|
713
|
+
|
714
|
+
# Allows you to set attributes of the image. Currently, you can declare
|
715
|
+
# an image as free of personally identifiable information and adult
|
716
|
+
# content.
|
717
|
+
#
|
718
|
+
# @note When making an API call, you may pass HumanLoopDataAttributes
|
719
|
+
# data as a hash:
|
720
|
+
#
|
721
|
+
# {
|
722
|
+
# content_classifiers: ["FreeOfPersonallyIdentifiableInformation"], # accepts FreeOfPersonallyIdentifiableInformation, FreeOfAdultContent
|
723
|
+
# }
|
724
|
+
#
|
725
|
+
# @!attribute [rw] content_classifiers
|
726
|
+
# Sets whether the input image is free of personally identifiable
|
727
|
+
# information or adult content.
|
728
|
+
# @return [Array<String>]
|
729
|
+
#
|
730
|
+
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/HumanLoopDataAttributes AWS API Documentation
|
731
|
+
#
|
732
|
+
class HumanLoopDataAttributes < Struct.new(
|
733
|
+
:content_classifiers)
|
734
|
+
include Aws::Structure
|
735
|
+
end
|
736
|
+
|
737
|
+
# Indicates you have exceeded the maximum number of active human in the
|
738
|
+
# loop workflows available
|
739
|
+
#
|
740
|
+
# @!attribute [rw] resource_type
|
741
|
+
# @return [String]
|
742
|
+
#
|
743
|
+
# @!attribute [rw] quota_code
|
744
|
+
# @return [String]
|
745
|
+
#
|
746
|
+
# @!attribute [rw] service_code
|
747
|
+
# @return [String]
|
748
|
+
#
|
749
|
+
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/HumanLoopQuotaExceededException AWS API Documentation
|
750
|
+
#
|
751
|
+
class HumanLoopQuotaExceededException < Struct.new(
|
752
|
+
:resource_type,
|
753
|
+
:quota_code,
|
754
|
+
:service_code)
|
603
755
|
include Aws::Structure
|
604
756
|
end
|
605
757
|
|
@@ -634,10 +786,10 @@ module Aws::Textract
|
|
634
786
|
end
|
635
787
|
|
636
788
|
# The X and Y coordinates of a point on a document page. The X and Y
|
637
|
-
# values returned are ratios of the overall document page size.
|
638
|
-
# example, if the input document is 700 x 200 and the operation
|
639
|
-
# X=0.5 and Y=0.25, then the point is at the (350,50) pixel
|
640
|
-
# on the document page.
|
789
|
+
# values that are returned are ratios of the overall document page size.
|
790
|
+
# For example, if the input document is 700 x 200 and the operation
|
791
|
+
# returns X=0.5 and Y=0.25, then the point is at the (350,50) pixel
|
792
|
+
# coordinate on the document page.
|
641
793
|
#
|
642
794
|
# An array of `Point` objects, `Polygon`, is returned by
|
643
795
|
# DetectDocumentText. `Polygon` represents a fine-grained polygon around
|
@@ -669,7 +821,11 @@ module Aws::Textract
|
|
669
821
|
#
|
670
822
|
# @!attribute [rw] type
|
671
823
|
# The type of relationship that the blocks in the IDs array have with
|
672
|
-
# the current block. The relationship can be `VALUE` or `CHILD`.
|
824
|
+
# the current block. The relationship can be `VALUE` or `CHILD`. A
|
825
|
+
# relationship of type VALUE is a list that contains the ID of the
|
826
|
+
# VALUE block that's associated with the KEY of a key-value pair. A
|
827
|
+
# relationship of type CHILD is a list of IDs that identify WORD
|
828
|
+
# blocks.
|
673
829
|
# @return [String]
|
674
830
|
#
|
675
831
|
# @!attribute [rw] ids
|
@@ -707,8 +863,9 @@ module Aws::Textract
|
|
707
863
|
# @return [String]
|
708
864
|
#
|
709
865
|
# @!attribute [rw] name
|
710
|
-
# The file name of the input document.
|
711
|
-
# or
|
866
|
+
# The file name of the input document. Synchronous operations can use
|
867
|
+
# image files that are in JPEG or PNG format. Asynchronous operations
|
868
|
+
# also support PDF format files.
|
712
869
|
# @return [String]
|
713
870
|
#
|
714
871
|
# @!attribute [rw] version
|
@@ -752,11 +909,11 @@ module Aws::Textract
|
|
752
909
|
# @!attribute [rw] feature_types
|
753
910
|
# A list of the types of analysis to perform. Add TABLES to the list
|
754
911
|
# to return information about the tables that are detected in the
|
755
|
-
# input document. Add FORMS to return detected
|
756
|
-
#
|
757
|
-
#
|
758
|
-
# (
|
759
|
-
#
|
912
|
+
# input document. Add FORMS to return detected form data. To perform
|
913
|
+
# both types of analysis, add TABLES and FORMS to `FeatureTypes`. All
|
914
|
+
# lines and words detected in the document are included in the
|
915
|
+
# response (including text that isn't related to the value of
|
916
|
+
# `FeatureTypes`).
|
760
917
|
# @return [Array<String>]
|
761
918
|
#
|
762
919
|
# @!attribute [rw] client_request_token
|
@@ -764,13 +921,19 @@ module Aws::Textract
|
|
764
921
|
# you use the same token with multiple `StartDocumentAnalysis`
|
765
922
|
# requests, the same `JobId` is returned. Use `ClientRequestToken` to
|
766
923
|
# prevent the same job from being accidentally started more than once.
|
924
|
+
# For more information, see [Calling Amazon Textract Asynchronous
|
925
|
+
# Operations][1].
|
926
|
+
#
|
927
|
+
#
|
928
|
+
#
|
929
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/api-async.html
|
767
930
|
# @return [String]
|
768
931
|
#
|
769
932
|
# @!attribute [rw] job_tag
|
770
|
-
# An identifier you specify that's included in the completion
|
771
|
-
# notification
|
772
|
-
#
|
773
|
-
#
|
933
|
+
# An identifier that you specify that's included in the completion
|
934
|
+
# notification published to the Amazon SNS topic. For example, you can
|
935
|
+
# use `JobTag` to identify the type of document that the completion
|
936
|
+
# notification corresponds to (such as a tax form or a receipt).
|
774
937
|
# @return [String]
|
775
938
|
#
|
776
939
|
# @!attribute [rw] notification_channel
|
@@ -791,7 +954,8 @@ module Aws::Textract
|
|
791
954
|
|
792
955
|
# @!attribute [rw] job_id
|
793
956
|
# The identifier for the document text detection job. Use `JobId` to
|
794
|
-
# identify the job in a subsequent call to `GetDocumentAnalysis`.
|
957
|
+
# identify the job in a subsequent call to `GetDocumentAnalysis`. A
|
958
|
+
# `JobId` value is only valid for 7 days.
|
795
959
|
# @return [String]
|
796
960
|
#
|
797
961
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/StartDocumentAnalysisResponse AWS API Documentation
|
@@ -829,13 +993,19 @@ module Aws::Textract
|
|
829
993
|
# you use the same token with multiple `StartDocumentTextDetection`
|
830
994
|
# requests, the same `JobId` is returned. Use `ClientRequestToken` to
|
831
995
|
# prevent the same job from being accidentally started more than once.
|
996
|
+
# For more information, see [Calling Amazon Textract Asynchronous
|
997
|
+
# Operations][1].
|
998
|
+
#
|
999
|
+
#
|
1000
|
+
#
|
1001
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/api-async.html
|
832
1002
|
# @return [String]
|
833
1003
|
#
|
834
1004
|
# @!attribute [rw] job_tag
|
835
|
-
# An identifier you specify that's included in the completion
|
836
|
-
# notification
|
837
|
-
#
|
838
|
-
#
|
1005
|
+
# An identifier that you specify that's included in the completion
|
1006
|
+
# notification published to the Amazon SNS topic. For example, you can
|
1007
|
+
# use `JobTag` to identify the type of document that the completion
|
1008
|
+
# notification corresponds to (such as a tax form or a receipt).
|
839
1009
|
# @return [String]
|
840
1010
|
#
|
841
1011
|
# @!attribute [rw] notification_channel
|
@@ -854,8 +1024,10 @@ module Aws::Textract
|
|
854
1024
|
end
|
855
1025
|
|
856
1026
|
# @!attribute [rw] job_id
|
857
|
-
# The identifier
|
858
|
-
# identify the job in a subsequent call to
|
1027
|
+
# The identifier of the text detection job for the document. Use
|
1028
|
+
# `JobId` to identify the job in a subsequent call to
|
1029
|
+
# `GetDocumentTextDetection`. A `JobId` value is only valid for 7
|
1030
|
+
# days.
|
859
1031
|
# @return [String]
|
860
1032
|
#
|
861
1033
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/StartDocumentTextDetectionResponse AWS API Documentation
|
@@ -866,7 +1038,7 @@ module Aws::Textract
|
|
866
1038
|
end
|
867
1039
|
|
868
1040
|
# A warning about an issue that occurred during asynchronous text
|
869
|
-
# analysis (StartDocumentAnalysis) or asynchronous document
|
1041
|
+
# analysis (StartDocumentAnalysis) or asynchronous document text
|
870
1042
|
# detection (StartDocumentTextDetection).
|
871
1043
|
#
|
872
1044
|
# @!attribute [rw] error_code
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aws-sdk-textract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Amazon Web Services
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-12-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-core
|