aws-sdk-textract 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e4cca400ed79e0db32dc117da71bcf9e64c8ea80
4
- data.tar.gz: 15250eeb7f588474ab18bc9db8b2709745663476
3
+ metadata.gz: bb5796435f32cf464a826d8a950bfd4530ce0653
4
+ data.tar.gz: '099358286d8dee94332f1e18da3028c18abd5d8b'
5
5
  SHA512:
6
- metadata.gz: a4e8418f8541deee215b9eb5838d8e3e5648ed5dd3b3315447c5fd11a84ec010c14932be840808528a1317a1f18dd1c55e8199a815fa3b170405c456303af9ba
7
- data.tar.gz: be85b0fc679517abc96e7cfba7905bece767552a3108117252c6b60a759bf044da86a0b5a1e07529682d320433fb9f79b509ab16a767abd705669f44dbd8339e
6
+ metadata.gz: 0f45a24e21f46c9f0c44a619e0e2b272288c42fe68484cfa997f280cbb3e8bf35db471565e620154e6f95898f821da77500554801e40b628488333632961adb5
7
+ data.tar.gz: da14d00c06ecc20b0b246c7a7b5235c2582460356a4d4c1d58f6cda494de28cb7d76e24e7d0462692d6fd9dc1e6bbae417e64bfa28db714cd867ad735e3fd554
@@ -42,6 +42,6 @@ require_relative 'aws-sdk-textract/customizations'
42
42
  # @service
43
43
  module Aws::Textract
44
44
 
45
- GEM_VERSION = '1.3.0'
45
+ GEM_VERSION = '1.4.0'
46
46
 
47
47
  end
@@ -209,40 +209,98 @@ module Aws::Textract
209
209
  # When `true`, request parameters are validated before
210
210
  # sending the request.
211
211
  #
212
+ # @option options [URI::HTTP,String] :http_proxy A proxy to send
213
+ # requests through. Formatted like 'http://proxy.com:123'.
214
+ #
215
+ # @option options [Float] :http_open_timeout (15) The number of
216
+ # seconds to wait when opening a HTTP session before rasing a
217
+ # `Timeout::Error`.
218
+ #
219
+ # @option options [Integer] :http_read_timeout (60) The default
220
+ # number of seconds to wait for response data. This value can
221
+ # safely be set
222
+ # per-request on the session yeidled by {#session_for}.
223
+ #
224
+ # @option options [Float] :http_idle_timeout (5) The number of
225
+ # seconds a connection is allowed to sit idble before it is
226
+ # considered stale. Stale connections are closed and removed
227
+ # from the pool before making a request.
228
+ #
229
+ # @option options [Float] :http_continue_timeout (1) The number of
230
+ # seconds to wait for a 100-continue response before sending the
231
+ # request body. This option has no effect unless the request has
232
+ # "Expect" header set to "100-continue". Defaults to `nil` which
233
+ # disables this behaviour. This value can safely be set per
234
+ # request on the session yeidled by {#session_for}.
235
+ #
236
+ # @option options [Boolean] :http_wire_trace (false) When `true`,
237
+ # HTTP debug output will be sent to the `:logger`.
238
+ #
239
+ # @option options [Boolean] :ssl_verify_peer (true) When `true`,
240
+ # SSL peer certificates are verified when establishing a
241
+ # connection.
242
+ #
243
+ # @option options [String] :ssl_ca_bundle Full path to the SSL
244
+ # certificate authority bundle file that should be used when
245
+ # verifying peer certificates. If you do not pass
246
+ # `:ssl_ca_bundle` or `:ssl_ca_directory` the the system default
247
+ # will be used if available.
248
+ #
249
+ # @option options [String] :ssl_ca_directory Full path of the
250
+ # directory that contains the unbundled SSL certificate
251
+ # authority files for verifying peer certificates. If you do
252
+ # not pass `:ssl_ca_bundle` or `:ssl_ca_directory` the the
253
+ # system default will be used if available.
254
+ #
212
255
  def initialize(*args)
213
256
  super
214
257
  end
215
258
 
216
259
  # @!group API Operations
217
260
 
218
- # Analyzes an input document for relationships in the detected text and
219
- # tables.
261
+ # Analyzes an input document for relationships between detected items.
220
262
  #
221
- # Two types of information are returned:
263
+ # The types of information returned are as follows:
222
264
  #
223
265
  # * Words and lines that are related to nearby lines and words. The
224
- # related information is returned in two Block objects: a KEY Block
225
- # object and a VALUE Block object. For example, *Name: Ana Silva
226
- # Carolina* contains a key and value. *Name:* is the key. *Ana Silva
227
- # Carolina* is the value.
266
+ # related information is returned in two Block objects each of type
267
+ # `KEY_VALUE_SET`\: a KEY Block object and a VALUE Block object. For
268
+ # example, *Name: Ana Silva Carolina* contains a key and value.
269
+ # *Name:* is the key. *Ana Silva Carolina* is the value.
270
+ #
271
+ # * Table and table cell data. A TABLE Block object contains information
272
+ # about a detected table. A CELL Block object is returned for each
273
+ # cell in a table.
274
+ #
275
+ # * Selectable elements such as checkboxes and radio buttons. A
276
+ # SELECTION\_ELEMENT Block object contains information about a
277
+ # selectable element.
228
278
  #
229
- # * Table and table cell data. A TABLE Block contains information about
230
- # a detected table. A CELL block is returned for each cell in a table.
279
+ # * Lines and words of text. A LINE Block object contains one or more
280
+ # WORD Block objects.
231
281
  #
232
282
  # You can choose which type of analysis to perform by specifying the
233
283
  # `FeatureTypes` list.
234
284
  #
235
- # The output is returned in a list of `BLOCK` objects (Blocks). For more
236
- # information, see how-it-works-analyzing.
285
+ # The output is returned in a list of `BLOCK` objects.
237
286
  #
238
287
  # `AnalyzeDocument` is a synchronous operation. To analyze documents
239
288
  # asynchronously, use StartDocumentAnalysis.
240
289
  #
290
+ # For more information, see [Document Text Analysis][1].
291
+ #
292
+ #
293
+ #
294
+ # [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html
295
+ #
241
296
  # @option params [required, Types::Document] :document
242
297
  # The input document as base64-encoded bytes or an Amazon S3 object. If
243
298
  # you use the AWS CLI to call Amazon Textract operations, you can't
244
299
  # pass image bytes. The document must be an image in JPG or PNG format.
245
300
  #
301
+ # If you are using an AWS SDK to call Amazon Textract, you might not
302
+ # need to base64-encode image bytes passed using the `Bytes` field.
303
+ #
246
304
  # @option params [required, Array<String>] :feature_types
247
305
  # A list of the types of analysis to perform. Add TABLES to the list to
248
306
  # return information about the tables detected in the input document.
@@ -273,7 +331,7 @@ module Aws::Textract
273
331
  #
274
332
  # resp.document_metadata.pages #=> Integer
275
333
  # resp.blocks #=> Array
276
- # resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
334
+ # resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
277
335
  # resp.blocks[0].confidence #=> Float
278
336
  # resp.blocks[0].text #=> String
279
337
  # resp.blocks[0].row_index #=> Integer
@@ -294,6 +352,7 @@ module Aws::Textract
294
352
  # resp.blocks[0].relationships[0].ids[0] #=> String
295
353
  # resp.blocks[0].entity_types #=> Array
296
354
  # resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
355
+ # resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
297
356
  # resp.blocks[0].page #=> Integer
298
357
  #
299
358
  # @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/AnalyzeDocument AWS API Documentation
@@ -308,17 +367,31 @@ module Aws::Textract
308
367
  # Detects text in the input document. Amazon Textract can detect lines
309
368
  # of text and the words that make up a line of text. The input document
310
369
  # must be an image in JPG or PNG format. `DetectDocumentText` returns
311
- # the detected text in an array of Block objects. For more information,
312
- # see how-it-works-detecting.
370
+ # the detected text in an array of Block objects.
371
+ #
372
+ # Each document page has as an associated `Block` of type PAGE. Each
373
+ # PAGE `Block` object is the parent of LINE `Block` objects that
374
+ # represent the lines of detected text on a page. A LINE `Block` object
375
+ # is a parent for each word that makes up the line. Words are
376
+ # represented by `Block` objects of type WORD.
313
377
  #
314
378
  # `DetectDocumentText` is a synchronous operation. To analyze documents
315
379
  # asynchronously, use StartDocumentTextDetection.
316
380
  #
381
+ # For more information, see [Document Text Detection][1].
382
+ #
383
+ #
384
+ #
385
+ # [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-detecting.html
386
+ #
317
387
  # @option params [required, Types::Document] :document
318
388
  # The input document as base64-encoded bytes or an Amazon S3 object. If
319
389
  # you use the AWS CLI to call Amazon Textract operations, you can't
320
390
  # pass image bytes. The document must be an image in JPG or PNG format.
321
391
  #
392
+ # If you are using an AWS SDK to call Amazon Textract, you might not
393
+ # need to base64-encode image bytes passed using the `Bytes` field.
394
+ #
322
395
  # @return [Types::DetectDocumentTextResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
323
396
  #
324
397
  # * {Types::DetectDocumentTextResponse#document_metadata #document_metadata} => Types::DocumentMetadata
@@ -341,7 +414,7 @@ module Aws::Textract
341
414
  #
342
415
  # resp.document_metadata.pages #=> Integer
343
416
  # resp.blocks #=> Array
344
- # resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
417
+ # resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
345
418
  # resp.blocks[0].confidence #=> Float
346
419
  # resp.blocks[0].text #=> String
347
420
  # resp.blocks[0].row_index #=> Integer
@@ -362,6 +435,7 @@ module Aws::Textract
362
435
  # resp.blocks[0].relationships[0].ids[0] #=> String
363
436
  # resp.blocks[0].entity_types #=> Array
364
437
  # resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
438
+ # resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
365
439
  # resp.blocks[0].page #=> Integer
366
440
  #
367
441
  # @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentText AWS API Documentation
@@ -374,7 +448,7 @@ module Aws::Textract
374
448
  end
375
449
 
376
450
  # Gets the results for an Amazon Textract asynchronous operation that
377
- # analyzes text in a document image.
451
+ # analyzes text in a document.
378
452
  #
379
453
  # You start asynchronous text analysis by calling StartDocumentAnalysis,
380
454
  # which returns a job identifier (`JobId`). When the text analysis
@@ -386,8 +460,25 @@ module Aws::Textract
386
460
  # `GetDocumentAnalysis`, and pass the job identifier (`JobId`) from the
387
461
  # initial call to `StartDocumentAnalysis`.
388
462
  #
389
- # `GetDocumentAnalysis` returns an array of Block objects. For more
390
- # information, see how-it-works-analyzing.
463
+ # `GetDocumentAnalysis` returns an array of Block objects. The following
464
+ # types of information are returned:
465
+ #
466
+ # * Words and lines that are related to nearby lines and words. The
467
+ # related information is returned in two Block objects each of type
468
+ # `KEY_VALUE_SET`\: a KEY Block object and a VALUE Block object. For
469
+ # example, *Name: Ana Silva Carolina* contains a key and value.
470
+ # *Name:* is the key. *Ana Silva Carolina* is the value.
471
+ #
472
+ # * Table and table cell data. A TABLE Block object contains information
473
+ # about a detected table. A CELL Block object is returned for each
474
+ # cell in a table.
475
+ #
476
+ # * Selectable elements such as checkboxes and radio buttons. A
477
+ # SELECTION\_ELEMENT Block object contains information about a
478
+ # selectable element.
479
+ #
480
+ # * Lines and words of text. A LINE Block object contains one or more
481
+ # WORD Block objects.
391
482
  #
392
483
  # Use the `MaxResults` parameter to limit the number of blocks returned.
393
484
  # If there are more results than specified in `MaxResults`, the value of
@@ -397,6 +488,12 @@ module Aws::Textract
397
488
  # with the token value that's returned from the previous call to
398
489
  # `GetDocumentAnalysis`.
399
490
  #
491
+ # For more information, see [Document Text Analysis][1].
492
+ #
493
+ #
494
+ #
495
+ # [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html
496
+ #
400
497
  # @option params [required, String] :job_id
401
498
  # A unique identifier for the text-detection job. The `JobId` is
402
499
  # returned from `StartDocumentAnalysis`.
@@ -436,7 +533,7 @@ module Aws::Textract
436
533
  # resp.job_status #=> String, one of "IN_PROGRESS", "SUCCEEDED", "FAILED", "PARTIAL_SUCCESS"
437
534
  # resp.next_token #=> String
438
535
  # resp.blocks #=> Array
439
- # resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
536
+ # resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
440
537
  # resp.blocks[0].confidence #=> Float
441
538
  # resp.blocks[0].text #=> String
442
539
  # resp.blocks[0].row_index #=> Integer
@@ -457,6 +554,7 @@ module Aws::Textract
457
554
  # resp.blocks[0].relationships[0].ids[0] #=> String
458
555
  # resp.blocks[0].entity_types #=> Array
459
556
  # resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
557
+ # resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
460
558
  # resp.blocks[0].page #=> Integer
461
559
  # resp.warnings #=> Array
462
560
  # resp.warnings[0].error_code #=> String
@@ -474,8 +572,8 @@ module Aws::Textract
474
572
  end
475
573
 
476
574
  # Gets the results for an Amazon Textract asynchronous operation that
477
- # detects text in a document image. Amazon Textract can detect lines of
478
- # text and the words that make up a line of text.
575
+ # detects text in a document. Amazon Textract can detect lines of text
576
+ # and the words that make up a line of text.
479
577
  #
480
578
  # You start asynchronous text detection by calling
481
579
  # StartDocumentTextDetection, which returns a job identifier (`JobId`).
@@ -488,8 +586,13 @@ module Aws::Textract
488
586
  # pass the job identifier (`JobId`) from the initial call to
489
587
  # `StartDocumentTextDetection`.
490
588
  #
491
- # `GetDocumentTextDetection` returns an array of Block objects. For more
492
- # information, see how-it-works-detecting.
589
+ # `GetDocumentTextDetection` returns an array of Block objects.
590
+ #
591
+ # Each document page has as an associated `Block` of type PAGE. Each
592
+ # PAGE `Block` object is the parent of LINE `Block` objects that
593
+ # represent the lines of detected text on a page. A LINE `Block` object
594
+ # is a parent for each word that makes up the line. Words are
595
+ # represented by `Block` objects of type WORD.
493
596
  #
494
597
  # Use the MaxResults parameter to limit the number of blocks that are
495
598
  # returned. If there are more results than specified in `MaxResults`,
@@ -499,8 +602,11 @@ module Aws::Textract
499
602
  # `NextToken` request parameter with the token value that's returned
500
603
  # from the previous call to `GetDocumentTextDetection`.
501
604
  #
502
- # For more information, see Document Text Detection in the Amazon
503
- # Textract Developer Guide.
605
+ # For more information, see [Document Text Detection][1].
606
+ #
607
+ #
608
+ #
609
+ # [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-detecting.html
504
610
  #
505
611
  # @option params [required, String] :job_id
506
612
  # A unique identifier for the text detection job. The `JobId` is
@@ -541,7 +647,7 @@ module Aws::Textract
541
647
  # resp.job_status #=> String, one of "IN_PROGRESS", "SUCCEEDED", "FAILED", "PARTIAL_SUCCESS"
542
648
  # resp.next_token #=> String
543
649
  # resp.blocks #=> Array
544
- # resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
650
+ # resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
545
651
  # resp.blocks[0].confidence #=> Float
546
652
  # resp.blocks[0].text #=> String
547
653
  # resp.blocks[0].row_index #=> Integer
@@ -562,6 +668,7 @@ module Aws::Textract
562
668
  # resp.blocks[0].relationships[0].ids[0] #=> String
563
669
  # resp.blocks[0].entity_types #=> Array
564
670
  # resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
671
+ # resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
565
672
  # resp.blocks[0].page #=> Integer
566
673
  # resp.warnings #=> Array
567
674
  # resp.warnings[0].error_code #=> String
@@ -578,22 +685,14 @@ module Aws::Textract
578
685
  req.send_request(options)
579
686
  end
580
687
 
581
- # Starts asynchronous analysis of text for relationships in the text and
582
- # tables that are detected in a document. Amazon Textract returns for
583
- # two types of information:
584
- #
585
- # * Words and lines that are related to nearby lines and words. The
586
- # related information is returned in two Block objects: A KEY Block
587
- # object and a VALUE Block object. For example, *Name: Ana Silva
588
- # Carolina* contains a key and value. *Name:* is the key. *Ana Silva
589
- # Carolina* is the value.
688
+ # Starts asynchronous analysis of an input document for relationships
689
+ # between detected items such as key and value pairs, tables, and
690
+ # selection elements.
590
691
  #
591
- # * Table and table cell data. A TABLE block contains information about
592
- # a detected table. A CELL block is returned for each cell in a table.
593
- #
594
- # Amazon Textract can analyze text in document images and PDF files that
595
- # are stored in an Amazon S3 bucket. Use DocumentLocation to specify the
596
- # bucket name and file name of the document image.
692
+ # `StartDocumentAnalysis` can analyze text in documents that are in JPG,
693
+ # PNG, and PDF format. The documents are stored in an Amazon S3 bucket.
694
+ # Use DocumentLocation to specify the bucket name and file name of the
695
+ # document.
597
696
  #
598
697
  # `StartDocumentAnalysis` returns a job identifier (`JobId`) that you
599
698
  # use to get the results of the operation. When text analysis is
@@ -605,6 +704,12 @@ module Aws::Textract
605
704
  # the job identifier (`JobId`) from the initial call to
606
705
  # `StartDocumentAnalysis`.
607
706
  #
707
+ # For more information, see [Document Text Analysis][1].
708
+ #
709
+ #
710
+ #
711
+ # [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html
712
+ #
608
713
  # @option params [required, Types::DocumentLocation] :document_location
609
714
  # The location of the document to be processed.
610
715
  #
@@ -613,7 +718,8 @@ module Aws::Textract
613
718
  # return information about the tables that are detected in the input
614
719
  # document. Add FORMS to return detected fields and the associated text.
615
720
  # To perform both types of analysis, add TABLES and FORMS to
616
- # `FeatureTypes`.
721
+ # `FeatureTypes`. All selectable elements (`SELECTION_ELEMENT`) that are
722
+ # detected are returned, whatever the value of `FeatureTypes`.
617
723
  #
618
724
  # @option params [String] :client_request_token
619
725
  # The idempotent token that you use to identify the start request. If
@@ -622,8 +728,10 @@ module Aws::Textract
622
728
  # same job from being accidentally started more than once.
623
729
  #
624
730
  # @option params [String] :job_tag
625
- # The unique identifier you specify to identify the job in the
626
- # completion status that's published to the Amazon SNS topic.
731
+ # An identifier you specify that's included in the completion
732
+ # notification that's published to the Amazon SNS topic. For example,
733
+ # you can use `JobTag` to identify the type of document, such as a tax
734
+ # form or a receipt, that the completion notification corresponds to.
627
735
  #
628
736
  # @option params [Types::NotificationChannel] :notification_channel
629
737
  # The Amazon SNS topic ARN that you want Amazon Textract to publish the
@@ -669,9 +777,10 @@ module Aws::Textract
669
777
  # Textract can detect lines of text and the words that make up a line of
670
778
  # text.
671
779
  #
672
- # Amazon Textract can detect text in document images and PDF files that
673
- # are stored in an Amazon S3 bucket. Use DocumentLocation to specify the
674
- # bucket name and the file name of the document image.
780
+ # `StartDocumentTextDetection` can analyze text in documents that are in
781
+ # JPG, PNG, and PDF format. The documents are stored in an Amazon S3
782
+ # bucket. Use DocumentLocation to specify the bucket name and file name
783
+ # of the document.
675
784
  #
676
785
  # `StartTextDetection` returns a job identifier (`JobId`) that you use
677
786
  # to get the results of the operation. When text detection is finished,
@@ -683,8 +792,11 @@ module Aws::Textract
683
792
  # pass the job identifier (`JobId`) from the initial call to
684
793
  # `StartDocumentTextDetection`.
685
794
  #
686
- # For more information, see Document Text Detection in the Amazon
687
- # Textract Developer Guide.
795
+ # For more information, see [Document Text Detection][1].
796
+ #
797
+ #
798
+ #
799
+ # [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-detecting.html
688
800
  #
689
801
  # @option params [required, Types::DocumentLocation] :document_location
690
802
  # The location of the document to be processed.
@@ -696,9 +808,10 @@ module Aws::Textract
696
808
  # prevent the same job from being accidentally started more than once.
697
809
  #
698
810
  # @option params [String] :job_tag
699
- # A unique identifier you specify to identify the job in the completion
700
- # status that's published to the Amazon Simple Notification Service
701
- # (Amazon SNS) topic.
811
+ # An identifier you specify that's included in the completion
812
+ # notification that's published to the Amazon SNS topic. For example,
813
+ # you can use `JobTag` to identify the type of document, such as a tax
814
+ # form or a receipt, that the completion notification corresponds to.
702
815
  #
703
816
  # @option params [Types::NotificationChannel] :notification_channel
704
817
  # The Amazon SNS topic ARN that you want Amazon Textract to publish the
@@ -752,7 +865,7 @@ module Aws::Textract
752
865
  params: params,
753
866
  config: config)
754
867
  context[:gem_name] = 'aws-sdk-textract'
755
- context[:gem_version] = '1.3.0'
868
+ context[:gem_version] = '1.4.0'
756
869
  Seahorse::Client::Request.new(handlers, context)
757
870
  end
758
871
 
@@ -66,6 +66,7 @@ module Aws::Textract
66
66
  S3ObjectName = Shapes::StringShape.new(name: 'S3ObjectName')
67
67
  S3ObjectVersion = Shapes::StringShape.new(name: 'S3ObjectVersion')
68
68
  SNSTopicArn = Shapes::StringShape.new(name: 'SNSTopicArn')
69
+ SelectionStatus = Shapes::StringShape.new(name: 'SelectionStatus')
69
70
  StartDocumentAnalysisRequest = Shapes::StructureShape.new(name: 'StartDocumentAnalysisRequest')
70
71
  StartDocumentAnalysisResponse = Shapes::StructureShape.new(name: 'StartDocumentAnalysisResponse')
71
72
  StartDocumentTextDetectionRequest = Shapes::StructureShape.new(name: 'StartDocumentTextDetectionRequest')
@@ -97,6 +98,7 @@ module Aws::Textract
97
98
  Block.add_member(:id, Shapes::ShapeRef.new(shape: NonEmptyString, location_name: "Id"))
98
99
  Block.add_member(:relationships, Shapes::ShapeRef.new(shape: RelationshipList, location_name: "Relationships"))
99
100
  Block.add_member(:entity_types, Shapes::ShapeRef.new(shape: EntityTypes, location_name: "EntityTypes"))
101
+ Block.add_member(:selection_status, Shapes::ShapeRef.new(shape: SelectionStatus, location_name: "SelectionStatus"))
100
102
  Block.add_member(:page, Shapes::ShapeRef.new(shape: UInteger, location_name: "Page"))
101
103
  Block.struct_class = Types::Block
102
104
 
@@ -28,6 +28,9 @@ module Aws::Textract
28
28
  # If you use the AWS CLI to call Amazon Textract operations, you
29
29
  # can't pass image bytes. The document must be an image in JPG or PNG
30
30
  # format.
31
+ #
32
+ # If you are using an AWS SDK to call Amazon Textract, you might not
33
+ # need to base64-encode image bytes passed using the `Bytes` field.
31
34
  # @return [Types::Document]
32
35
  #
33
36
  # @!attribute [rw] feature_types
@@ -63,13 +66,13 @@ module Aws::Textract
63
66
  include Aws::Structure
64
67
  end
65
68
 
66
- # A `Block` represents text that's recognized in a document within a
69
+ # A `Block` represents items that are recognized in a document within a
67
70
  # group of pixels close to each other. The information returned in a
68
71
  # `Block` depends on the type of operation. In document-text detection
69
72
  # (for example DetectDocumentText), you get information about the
70
73
  # detected words and lines of text. In text analysis (for example
71
- # AnalyzeDocument), you can get information about the fields and tables
72
- # that are detected in the document.
74
+ # AnalyzeDocument), you can also get information about the fields,
75
+ # tables and selection elements that are detected in the document.
73
76
  #
74
77
  # An array of `Block` objects is returned by both synchronous and
75
78
  # asynchronous operations. In synchronous operations, such as
@@ -77,37 +80,54 @@ module Aws::Textract
77
80
  # results. In asynchronous operations, such as GetDocumentAnalysis, the
78
81
  # array is returned over one or more responses.
79
82
  #
83
+ # For more information, see [How Amazon Textract Works][1].
84
+ #
85
+ #
86
+ #
87
+ # [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works.html
88
+ #
80
89
  # @!attribute [rw] block_type
81
90
  # The type of text that's recognized in a block. In text-detection
82
91
  # operations, the following types are returned:
83
92
  #
84
93
  # * *PAGE* - Contains a list of the LINE Block objects that are
85
- # detected on a specific page.
94
+ # detected on a document page.
86
95
  #
87
- # * *WORD* - One or more ISO basic Latin script characters that
88
- # aren't separated by spaces.
96
+ # * *WORD* - A word detected on a document page. A word is one or more
97
+ # ISO basic Latin script characters that aren't separated by
98
+ # spaces.
89
99
  #
90
- # * *LINE* - A string of equally spaced words.
100
+ # * *LINE* - A string of tab-delimited, contiguous words that's
101
+ # detected on a document page.
91
102
  #
92
103
  # In text analysis operations, the following types are returned:
93
104
  #
94
105
  # * *PAGE* - Contains a list of child Block objects that are detected
95
- # on a specific page.
106
+ # on a document page.
96
107
  #
97
108
  # * *KEY\_VALUE\_SET* - Stores the KEY and VALUE Block objects for a
98
- # field that's detected in a document. Use the `EntityType` field
99
- # to determine if a KEY\_VALUE\_SET object is a KEY Block object or
100
- # a VALUE Block object.
109
+ # field that's detected on a document page. Use the `EntityType`
110
+ # field to determine if a KEY\_VALUE\_SET object is a KEY Block
111
+ # object or a VALUE Block object.
101
112
  #
102
- # * *WORD* - One or more ISO basic Latin script characters that
103
- # aren't separated by spaces.
113
+ # * *WORD* - A word detected on a document page. A word is one or more
114
+ # ISO basic Latin script characters that aren't separated by spaces
115
+ # that's detected on a document page.
104
116
  #
105
- # * *LINE* - A string of tab-delimited, contiguous words.
117
+ # * *LINE* - A string of tab-delimited, contiguous words that's
118
+ # detected on a document page.
106
119
  #
107
- # * *TABLE* - A table that's detected in the document.
120
+ # * *TABLE* - A table that's detected on a document page. A table is
121
+ # any grid-based information with 2 or more rows or columns with a
122
+ # cell span of 1 row and 1 column each.
108
123
  #
109
124
  # * *CELL* - A cell within a detected table. The cell is the parent of
110
125
  # the block that contains the text in the cell.
126
+ #
127
+ # * *SELECTION\_ELEMENT* - A selectable element such as a radio button
128
+ # or checkbox that's detected on a document page. Use the value of
129
+ # `SelectionStatus` to determine the status of the selection
130
+ # element.
111
131
  # @return [String]
112
132
  #
113
133
  # @!attribute [rw] confidence
@@ -176,8 +196,19 @@ module Aws::Textract
176
196
  # `GetDocumentTextDetection`.
177
197
  # @return [Array<String>]
178
198
  #
199
+ # @!attribute [rw] selection_status
200
+ # The selection status of a selectable element such as a radio button
201
+ # or checkbox.
202
+ # @return [String]
203
+ #
179
204
  # @!attribute [rw] page
180
- # The page in which a block was detected.
205
+ # The page in which a block was detected. `Page` is returned by
206
+ # asynchronous operations. Page values greater than 1 are only
207
+ # returned for multi-page documents that are in PDF format. A scanned
208
+ # image (JPG/PNG), even if it contains multiple document pages, is
209
+ # always considered to be a single-page document and the value of
210
+ # `Page` is always 1. Synchronous operations don't return `Page` as
211
+ # every input document is considered to be a single-page document.
181
212
  # @return [Integer]
182
213
  #
183
214
  # @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/Block AWS API Documentation
@@ -194,6 +225,7 @@ module Aws::Textract
194
225
  :id,
195
226
  :relationships,
196
227
  :entity_types,
228
+ :selection_status,
197
229
  :page)
198
230
  include Aws::Structure
199
231
  end
@@ -264,6 +296,9 @@ module Aws::Textract
264
296
  # If you use the AWS CLI to call Amazon Textract operations, you
265
297
  # can't pass image bytes. The document must be an image in JPG or PNG
266
298
  # format.
299
+ #
300
+ # If you are using an AWS SDK to call Amazon Textract, you might not
301
+ # need to base64-encode image bytes passed using the `Bytes` field.
267
302
  # @return [Types::Document]
268
303
  #
269
304
  # @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentTextRequest AWS API Documentation
@@ -305,7 +340,7 @@ module Aws::Textract
305
340
  # bucket don't need to be base64 encoded.
306
341
  #
307
342
  # The AWS Region for the S3 bucket that contains the S3 object must
308
- # match the Region that you use for Amazon Textract operations.
343
+ # match the AWS Region that you use for Amazon Textract operations.
309
344
  #
310
345
  # If you use the AWS CLI to call Amazon Textract operations, passing
311
346
  # image bytes using the Bytes property isn't supported. You must first
@@ -328,8 +363,12 @@ module Aws::Textract
328
363
  # }
329
364
  #
330
365
  # @!attribute [rw] bytes
331
- # A blob of documents bytes. The maximum size of a document that's
332
- # provided in a blob of bytes is 5 MB.
366
+ # A blob of base-64 encoded documents bytes. The maximum size of a
367
+ # document that's provided in a blob of bytes is 5 MB. The document
368
+ # bytes must be in PNG or JPG format.
369
+ #
370
+ # If you are using an AWS SDK to call Amazon Textract, you might not
371
+ # need to base64-encode image bytes passed using the `Bytes` field.
333
372
  # @return [String]
334
373
  #
335
374
  # @!attribute [rw] s3_object
@@ -715,7 +754,9 @@ module Aws::Textract
715
754
  # to return information about the tables that are detected in the
716
755
  # input document. Add FORMS to return detected fields and the
717
756
  # associated text. To perform both types of analysis, add TABLES and
718
- # FORMS to `FeatureTypes`.
757
+ # FORMS to `FeatureTypes`. All selectable elements
758
+ # (`SELECTION_ELEMENT`) that are detected are returned, whatever the
759
+ # value of `FeatureTypes`.
719
760
  # @return [Array<String>]
720
761
  #
721
762
  # @!attribute [rw] client_request_token
@@ -726,8 +767,10 @@ module Aws::Textract
726
767
  # @return [String]
727
768
  #
728
769
  # @!attribute [rw] job_tag
729
- # The unique identifier you specify to identify the job in the
730
- # completion status that's published to the Amazon SNS topic.
770
+ # An identifier you specify that's included in the completion
771
+ # notification that's published to the Amazon SNS topic. For example,
772
+ # you can use `JobTag` to identify the type of document, such as a tax
773
+ # form or a receipt, that the completion notification corresponds to.
731
774
  # @return [String]
732
775
  #
733
776
  # @!attribute [rw] notification_channel
@@ -747,7 +790,7 @@ module Aws::Textract
747
790
  end
748
791
 
749
792
  # @!attribute [rw] job_id
750
- # The identifier for the document text-detection job. Use `JobId` to
793
+ # The identifier for the document text detection job. Use `JobId` to
751
794
  # identify the job in a subsequent call to `GetDocumentAnalysis`.
752
795
  # @return [String]
753
796
  #
@@ -789,9 +832,10 @@ module Aws::Textract
789
832
  # @return [String]
790
833
  #
791
834
  # @!attribute [rw] job_tag
792
- # A unique identifier you specify to identify the job in the
793
- # completion status that's published to the Amazon Simple
794
- # Notification Service (Amazon SNS) topic.
835
+ # An identifier you specify that's included in the completion
836
+ # notification that's published to the Amazon SNS topic. For example,
837
+ # you can use `JobTag` to identify the type of document, such as a tax
838
+ # form or a receipt, that the completion notification corresponds to.
795
839
  # @return [String]
796
840
  #
797
841
  # @!attribute [rw] notification_channel
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aws-sdk-textract
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Amazon Web Services
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-21 00:00:00.000000000 Z
11
+ date: 2019-04-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-core
@@ -59,7 +59,7 @@ files:
59
59
  - lib/aws-sdk-textract/errors.rb
60
60
  - lib/aws-sdk-textract/resource.rb
61
61
  - lib/aws-sdk-textract/types.rb
62
- homepage: http://github.com/aws/aws-sdk-ruby
62
+ homepage: https://github.com/aws/aws-sdk-ruby
63
63
  licenses:
64
64
  - Apache-2.0
65
65
  metadata: