aws-sdk-textract 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/aws-sdk-textract.rb +1 -1
- data/lib/aws-sdk-textract/client.rb +166 -53
- data/lib/aws-sdk-textract/client_api.rb +2 -0
- data/lib/aws-sdk-textract/types.rb +70 -26
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bb5796435f32cf464a826d8a950bfd4530ce0653
|
4
|
+
data.tar.gz: '099358286d8dee94332f1e18da3028c18abd5d8b'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f45a24e21f46c9f0c44a619e0e2b272288c42fe68484cfa997f280cbb3e8bf35db471565e620154e6f95898f821da77500554801e40b628488333632961adb5
|
7
|
+
data.tar.gz: da14d00c06ecc20b0b246c7a7b5235c2582460356a4d4c1d58f6cda494de28cb7d76e24e7d0462692d6fd9dc1e6bbae417e64bfa28db714cd867ad735e3fd554
|
data/lib/aws-sdk-textract.rb
CHANGED
@@ -209,40 +209,98 @@ module Aws::Textract
|
|
209
209
|
# When `true`, request parameters are validated before
|
210
210
|
# sending the request.
|
211
211
|
#
|
212
|
+
# @option options [URI::HTTP,String] :http_proxy A proxy to send
|
213
|
+
# requests through. Formatted like 'http://proxy.com:123'.
|
214
|
+
#
|
215
|
+
# @option options [Float] :http_open_timeout (15) The number of
|
216
|
+
# seconds to wait when opening a HTTP session before rasing a
|
217
|
+
# `Timeout::Error`.
|
218
|
+
#
|
219
|
+
# @option options [Integer] :http_read_timeout (60) The default
|
220
|
+
# number of seconds to wait for response data. This value can
|
221
|
+
# safely be set
|
222
|
+
# per-request on the session yeidled by {#session_for}.
|
223
|
+
#
|
224
|
+
# @option options [Float] :http_idle_timeout (5) The number of
|
225
|
+
# seconds a connection is allowed to sit idble before it is
|
226
|
+
# considered stale. Stale connections are closed and removed
|
227
|
+
# from the pool before making a request.
|
228
|
+
#
|
229
|
+
# @option options [Float] :http_continue_timeout (1) The number of
|
230
|
+
# seconds to wait for a 100-continue response before sending the
|
231
|
+
# request body. This option has no effect unless the request has
|
232
|
+
# "Expect" header set to "100-continue". Defaults to `nil` which
|
233
|
+
# disables this behaviour. This value can safely be set per
|
234
|
+
# request on the session yeidled by {#session_for}.
|
235
|
+
#
|
236
|
+
# @option options [Boolean] :http_wire_trace (false) When `true`,
|
237
|
+
# HTTP debug output will be sent to the `:logger`.
|
238
|
+
#
|
239
|
+
# @option options [Boolean] :ssl_verify_peer (true) When `true`,
|
240
|
+
# SSL peer certificates are verified when establishing a
|
241
|
+
# connection.
|
242
|
+
#
|
243
|
+
# @option options [String] :ssl_ca_bundle Full path to the SSL
|
244
|
+
# certificate authority bundle file that should be used when
|
245
|
+
# verifying peer certificates. If you do not pass
|
246
|
+
# `:ssl_ca_bundle` or `:ssl_ca_directory` the the system default
|
247
|
+
# will be used if available.
|
248
|
+
#
|
249
|
+
# @option options [String] :ssl_ca_directory Full path of the
|
250
|
+
# directory that contains the unbundled SSL certificate
|
251
|
+
# authority files for verifying peer certificates. If you do
|
252
|
+
# not pass `:ssl_ca_bundle` or `:ssl_ca_directory` the the
|
253
|
+
# system default will be used if available.
|
254
|
+
#
|
212
255
|
def initialize(*args)
|
213
256
|
super
|
214
257
|
end
|
215
258
|
|
216
259
|
# @!group API Operations
|
217
260
|
|
218
|
-
# Analyzes an input document for relationships
|
219
|
-
# tables.
|
261
|
+
# Analyzes an input document for relationships between detected items.
|
220
262
|
#
|
221
|
-
#
|
263
|
+
# The types of information returned are as follows:
|
222
264
|
#
|
223
265
|
# * Words and lines that are related to nearby lines and words. The
|
224
|
-
# related information is returned in two Block objects
|
225
|
-
# object and a VALUE Block object. For
|
226
|
-
# Carolina* contains a key and value.
|
227
|
-
# Carolina* is the value.
|
266
|
+
# related information is returned in two Block objects each of type
|
267
|
+
# `KEY_VALUE_SET`\: a KEY Block object and a VALUE Block object. For
|
268
|
+
# example, *Name: Ana Silva Carolina* contains a key and value.
|
269
|
+
# *Name:* is the key. *Ana Silva Carolina* is the value.
|
270
|
+
#
|
271
|
+
# * Table and table cell data. A TABLE Block object contains information
|
272
|
+
# about a detected table. A CELL Block object is returned for each
|
273
|
+
# cell in a table.
|
274
|
+
#
|
275
|
+
# * Selectable elements such as checkboxes and radio buttons. A
|
276
|
+
# SELECTION\_ELEMENT Block object contains information about a
|
277
|
+
# selectable element.
|
228
278
|
#
|
229
|
-
# *
|
230
|
-
#
|
279
|
+
# * Lines and words of text. A LINE Block object contains one or more
|
280
|
+
# WORD Block objects.
|
231
281
|
#
|
232
282
|
# You can choose which type of analysis to perform by specifying the
|
233
283
|
# `FeatureTypes` list.
|
234
284
|
#
|
235
|
-
# The output is returned in a list of `BLOCK` objects
|
236
|
-
# information, see how-it-works-analyzing.
|
285
|
+
# The output is returned in a list of `BLOCK` objects.
|
237
286
|
#
|
238
287
|
# `AnalyzeDocument` is a synchronous operation. To analyze documents
|
239
288
|
# asynchronously, use StartDocumentAnalysis.
|
240
289
|
#
|
290
|
+
# For more information, see [Document Text Analysis][1].
|
291
|
+
#
|
292
|
+
#
|
293
|
+
#
|
294
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html
|
295
|
+
#
|
241
296
|
# @option params [required, Types::Document] :document
|
242
297
|
# The input document as base64-encoded bytes or an Amazon S3 object. If
|
243
298
|
# you use the AWS CLI to call Amazon Textract operations, you can't
|
244
299
|
# pass image bytes. The document must be an image in JPG or PNG format.
|
245
300
|
#
|
301
|
+
# If you are using an AWS SDK to call Amazon Textract, you might not
|
302
|
+
# need to base64-encode image bytes passed using the `Bytes` field.
|
303
|
+
#
|
246
304
|
# @option params [required, Array<String>] :feature_types
|
247
305
|
# A list of the types of analysis to perform. Add TABLES to the list to
|
248
306
|
# return information about the tables detected in the input document.
|
@@ -273,7 +331,7 @@ module Aws::Textract
|
|
273
331
|
#
|
274
332
|
# resp.document_metadata.pages #=> Integer
|
275
333
|
# resp.blocks #=> Array
|
276
|
-
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
|
334
|
+
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
|
277
335
|
# resp.blocks[0].confidence #=> Float
|
278
336
|
# resp.blocks[0].text #=> String
|
279
337
|
# resp.blocks[0].row_index #=> Integer
|
@@ -294,6 +352,7 @@ module Aws::Textract
|
|
294
352
|
# resp.blocks[0].relationships[0].ids[0] #=> String
|
295
353
|
# resp.blocks[0].entity_types #=> Array
|
296
354
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
355
|
+
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
297
356
|
# resp.blocks[0].page #=> Integer
|
298
357
|
#
|
299
358
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/AnalyzeDocument AWS API Documentation
|
@@ -308,17 +367,31 @@ module Aws::Textract
|
|
308
367
|
# Detects text in the input document. Amazon Textract can detect lines
|
309
368
|
# of text and the words that make up a line of text. The input document
|
310
369
|
# must be an image in JPG or PNG format. `DetectDocumentText` returns
|
311
|
-
# the detected text in an array of Block objects.
|
312
|
-
#
|
370
|
+
# the detected text in an array of Block objects.
|
371
|
+
#
|
372
|
+
# Each document page has as an associated `Block` of type PAGE. Each
|
373
|
+
# PAGE `Block` object is the parent of LINE `Block` objects that
|
374
|
+
# represent the lines of detected text on a page. A LINE `Block` object
|
375
|
+
# is a parent for each word that makes up the line. Words are
|
376
|
+
# represented by `Block` objects of type WORD.
|
313
377
|
#
|
314
378
|
# `DetectDocumentText` is a synchronous operation. To analyze documents
|
315
379
|
# asynchronously, use StartDocumentTextDetection.
|
316
380
|
#
|
381
|
+
# For more information, see [Document Text Detection][1].
|
382
|
+
#
|
383
|
+
#
|
384
|
+
#
|
385
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-detecting.html
|
386
|
+
#
|
317
387
|
# @option params [required, Types::Document] :document
|
318
388
|
# The input document as base64-encoded bytes or an Amazon S3 object. If
|
319
389
|
# you use the AWS CLI to call Amazon Textract operations, you can't
|
320
390
|
# pass image bytes. The document must be an image in JPG or PNG format.
|
321
391
|
#
|
392
|
+
# If you are using an AWS SDK to call Amazon Textract, you might not
|
393
|
+
# need to base64-encode image bytes passed using the `Bytes` field.
|
394
|
+
#
|
322
395
|
# @return [Types::DetectDocumentTextResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
|
323
396
|
#
|
324
397
|
# * {Types::DetectDocumentTextResponse#document_metadata #document_metadata} => Types::DocumentMetadata
|
@@ -341,7 +414,7 @@ module Aws::Textract
|
|
341
414
|
#
|
342
415
|
# resp.document_metadata.pages #=> Integer
|
343
416
|
# resp.blocks #=> Array
|
344
|
-
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
|
417
|
+
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
|
345
418
|
# resp.blocks[0].confidence #=> Float
|
346
419
|
# resp.blocks[0].text #=> String
|
347
420
|
# resp.blocks[0].row_index #=> Integer
|
@@ -362,6 +435,7 @@ module Aws::Textract
|
|
362
435
|
# resp.blocks[0].relationships[0].ids[0] #=> String
|
363
436
|
# resp.blocks[0].entity_types #=> Array
|
364
437
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
438
|
+
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
365
439
|
# resp.blocks[0].page #=> Integer
|
366
440
|
#
|
367
441
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentText AWS API Documentation
|
@@ -374,7 +448,7 @@ module Aws::Textract
|
|
374
448
|
end
|
375
449
|
|
376
450
|
# Gets the results for an Amazon Textract asynchronous operation that
|
377
|
-
# analyzes text in a document
|
451
|
+
# analyzes text in a document.
|
378
452
|
#
|
379
453
|
# You start asynchronous text analysis by calling StartDocumentAnalysis,
|
380
454
|
# which returns a job identifier (`JobId`). When the text analysis
|
@@ -386,8 +460,25 @@ module Aws::Textract
|
|
386
460
|
# `GetDocumentAnalysis`, and pass the job identifier (`JobId`) from the
|
387
461
|
# initial call to `StartDocumentAnalysis`.
|
388
462
|
#
|
389
|
-
# `GetDocumentAnalysis` returns an array of Block objects.
|
390
|
-
# information
|
463
|
+
# `GetDocumentAnalysis` returns an array of Block objects. The following
|
464
|
+
# types of information are returned:
|
465
|
+
#
|
466
|
+
# * Words and lines that are related to nearby lines and words. The
|
467
|
+
# related information is returned in two Block objects each of type
|
468
|
+
# `KEY_VALUE_SET`\: a KEY Block object and a VALUE Block object. For
|
469
|
+
# example, *Name: Ana Silva Carolina* contains a key and value.
|
470
|
+
# *Name:* is the key. *Ana Silva Carolina* is the value.
|
471
|
+
#
|
472
|
+
# * Table and table cell data. A TABLE Block object contains information
|
473
|
+
# about a detected table. A CELL Block object is returned for each
|
474
|
+
# cell in a table.
|
475
|
+
#
|
476
|
+
# * Selectable elements such as checkboxes and radio buttons. A
|
477
|
+
# SELECTION\_ELEMENT Block object contains information about a
|
478
|
+
# selectable element.
|
479
|
+
#
|
480
|
+
# * Lines and words of text. A LINE Block object contains one or more
|
481
|
+
# WORD Block objects.
|
391
482
|
#
|
392
483
|
# Use the `MaxResults` parameter to limit the number of blocks returned.
|
393
484
|
# If there are more results than specified in `MaxResults`, the value of
|
@@ -397,6 +488,12 @@ module Aws::Textract
|
|
397
488
|
# with the token value that's returned from the previous call to
|
398
489
|
# `GetDocumentAnalysis`.
|
399
490
|
#
|
491
|
+
# For more information, see [Document Text Analysis][1].
|
492
|
+
#
|
493
|
+
#
|
494
|
+
#
|
495
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html
|
496
|
+
#
|
400
497
|
# @option params [required, String] :job_id
|
401
498
|
# A unique identifier for the text-detection job. The `JobId` is
|
402
499
|
# returned from `StartDocumentAnalysis`.
|
@@ -436,7 +533,7 @@ module Aws::Textract
|
|
436
533
|
# resp.job_status #=> String, one of "IN_PROGRESS", "SUCCEEDED", "FAILED", "PARTIAL_SUCCESS"
|
437
534
|
# resp.next_token #=> String
|
438
535
|
# resp.blocks #=> Array
|
439
|
-
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
|
536
|
+
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
|
440
537
|
# resp.blocks[0].confidence #=> Float
|
441
538
|
# resp.blocks[0].text #=> String
|
442
539
|
# resp.blocks[0].row_index #=> Integer
|
@@ -457,6 +554,7 @@ module Aws::Textract
|
|
457
554
|
# resp.blocks[0].relationships[0].ids[0] #=> String
|
458
555
|
# resp.blocks[0].entity_types #=> Array
|
459
556
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
557
|
+
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
460
558
|
# resp.blocks[0].page #=> Integer
|
461
559
|
# resp.warnings #=> Array
|
462
560
|
# resp.warnings[0].error_code #=> String
|
@@ -474,8 +572,8 @@ module Aws::Textract
|
|
474
572
|
end
|
475
573
|
|
476
574
|
# Gets the results for an Amazon Textract asynchronous operation that
|
477
|
-
# detects text in a document
|
478
|
-
#
|
575
|
+
# detects text in a document. Amazon Textract can detect lines of text
|
576
|
+
# and the words that make up a line of text.
|
479
577
|
#
|
480
578
|
# You start asynchronous text detection by calling
|
481
579
|
# StartDocumentTextDetection, which returns a job identifier (`JobId`).
|
@@ -488,8 +586,13 @@ module Aws::Textract
|
|
488
586
|
# pass the job identifier (`JobId`) from the initial call to
|
489
587
|
# `StartDocumentTextDetection`.
|
490
588
|
#
|
491
|
-
# `GetDocumentTextDetection` returns an array of Block objects.
|
492
|
-
#
|
589
|
+
# `GetDocumentTextDetection` returns an array of Block objects.
|
590
|
+
#
|
591
|
+
# Each document page has as an associated `Block` of type PAGE. Each
|
592
|
+
# PAGE `Block` object is the parent of LINE `Block` objects that
|
593
|
+
# represent the lines of detected text on a page. A LINE `Block` object
|
594
|
+
# is a parent for each word that makes up the line. Words are
|
595
|
+
# represented by `Block` objects of type WORD.
|
493
596
|
#
|
494
597
|
# Use the MaxResults parameter to limit the number of blocks that are
|
495
598
|
# returned. If there are more results than specified in `MaxResults`,
|
@@ -499,8 +602,11 @@ module Aws::Textract
|
|
499
602
|
# `NextToken` request parameter with the token value that's returned
|
500
603
|
# from the previous call to `GetDocumentTextDetection`.
|
501
604
|
#
|
502
|
-
# For more information, see Document Text Detection
|
503
|
-
#
|
605
|
+
# For more information, see [Document Text Detection][1].
|
606
|
+
#
|
607
|
+
#
|
608
|
+
#
|
609
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-detecting.html
|
504
610
|
#
|
505
611
|
# @option params [required, String] :job_id
|
506
612
|
# A unique identifier for the text detection job. The `JobId` is
|
@@ -541,7 +647,7 @@ module Aws::Textract
|
|
541
647
|
# resp.job_status #=> String, one of "IN_PROGRESS", "SUCCEEDED", "FAILED", "PARTIAL_SUCCESS"
|
542
648
|
# resp.next_token #=> String
|
543
649
|
# resp.blocks #=> Array
|
544
|
-
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
|
650
|
+
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
|
545
651
|
# resp.blocks[0].confidence #=> Float
|
546
652
|
# resp.blocks[0].text #=> String
|
547
653
|
# resp.blocks[0].row_index #=> Integer
|
@@ -562,6 +668,7 @@ module Aws::Textract
|
|
562
668
|
# resp.blocks[0].relationships[0].ids[0] #=> String
|
563
669
|
# resp.blocks[0].entity_types #=> Array
|
564
670
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
671
|
+
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
565
672
|
# resp.blocks[0].page #=> Integer
|
566
673
|
# resp.warnings #=> Array
|
567
674
|
# resp.warnings[0].error_code #=> String
|
@@ -578,22 +685,14 @@ module Aws::Textract
|
|
578
685
|
req.send_request(options)
|
579
686
|
end
|
580
687
|
|
581
|
-
# Starts asynchronous analysis of
|
582
|
-
#
|
583
|
-
#
|
584
|
-
#
|
585
|
-
# * Words and lines that are related to nearby lines and words. The
|
586
|
-
# related information is returned in two Block objects: A KEY Block
|
587
|
-
# object and a VALUE Block object. For example, *Name: Ana Silva
|
588
|
-
# Carolina* contains a key and value. *Name:* is the key. *Ana Silva
|
589
|
-
# Carolina* is the value.
|
688
|
+
# Starts asynchronous analysis of an input document for relationships
|
689
|
+
# between detected items such as key and value pairs, tables, and
|
690
|
+
# selection elements.
|
590
691
|
#
|
591
|
-
#
|
592
|
-
#
|
593
|
-
#
|
594
|
-
#
|
595
|
-
# are stored in an Amazon S3 bucket. Use DocumentLocation to specify the
|
596
|
-
# bucket name and file name of the document image.
|
692
|
+
# `StartDocumentAnalysis` can analyze text in documents that are in JPG,
|
693
|
+
# PNG, and PDF format. The documents are stored in an Amazon S3 bucket.
|
694
|
+
# Use DocumentLocation to specify the bucket name and file name of the
|
695
|
+
# document.
|
597
696
|
#
|
598
697
|
# `StartDocumentAnalysis` returns a job identifier (`JobId`) that you
|
599
698
|
# use to get the results of the operation. When text analysis is
|
@@ -605,6 +704,12 @@ module Aws::Textract
|
|
605
704
|
# the job identifier (`JobId`) from the initial call to
|
606
705
|
# `StartDocumentAnalysis`.
|
607
706
|
#
|
707
|
+
# For more information, see [Document Text Analysis][1].
|
708
|
+
#
|
709
|
+
#
|
710
|
+
#
|
711
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html
|
712
|
+
#
|
608
713
|
# @option params [required, Types::DocumentLocation] :document_location
|
609
714
|
# The location of the document to be processed.
|
610
715
|
#
|
@@ -613,7 +718,8 @@ module Aws::Textract
|
|
613
718
|
# return information about the tables that are detected in the input
|
614
719
|
# document. Add FORMS to return detected fields and the associated text.
|
615
720
|
# To perform both types of analysis, add TABLES and FORMS to
|
616
|
-
# `FeatureTypes`.
|
721
|
+
# `FeatureTypes`. All selectable elements (`SELECTION_ELEMENT`) that are
|
722
|
+
# detected are returned, whatever the value of `FeatureTypes`.
|
617
723
|
#
|
618
724
|
# @option params [String] :client_request_token
|
619
725
|
# The idempotent token that you use to identify the start request. If
|
@@ -622,8 +728,10 @@ module Aws::Textract
|
|
622
728
|
# same job from being accidentally started more than once.
|
623
729
|
#
|
624
730
|
# @option params [String] :job_tag
|
625
|
-
#
|
626
|
-
#
|
731
|
+
# An identifier you specify that's included in the completion
|
732
|
+
# notification that's published to the Amazon SNS topic. For example,
|
733
|
+
# you can use `JobTag` to identify the type of document, such as a tax
|
734
|
+
# form or a receipt, that the completion notification corresponds to.
|
627
735
|
#
|
628
736
|
# @option params [Types::NotificationChannel] :notification_channel
|
629
737
|
# The Amazon SNS topic ARN that you want Amazon Textract to publish the
|
@@ -669,9 +777,10 @@ module Aws::Textract
|
|
669
777
|
# Textract can detect lines of text and the words that make up a line of
|
670
778
|
# text.
|
671
779
|
#
|
672
|
-
#
|
673
|
-
# are stored in an Amazon S3
|
674
|
-
# bucket
|
780
|
+
# `StartDocumentTextDetection` can analyze text in documents that are in
|
781
|
+
# JPG, PNG, and PDF format. The documents are stored in an Amazon S3
|
782
|
+
# bucket. Use DocumentLocation to specify the bucket name and file name
|
783
|
+
# of the document.
|
675
784
|
#
|
676
785
|
# `StartTextDetection` returns a job identifier (`JobId`) that you use
|
677
786
|
# to get the results of the operation. When text detection is finished,
|
@@ -683,8 +792,11 @@ module Aws::Textract
|
|
683
792
|
# pass the job identifier (`JobId`) from the initial call to
|
684
793
|
# `StartDocumentTextDetection`.
|
685
794
|
#
|
686
|
-
# For more information, see Document Text Detection
|
687
|
-
#
|
795
|
+
# For more information, see [Document Text Detection][1].
|
796
|
+
#
|
797
|
+
#
|
798
|
+
#
|
799
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-detecting.html
|
688
800
|
#
|
689
801
|
# @option params [required, Types::DocumentLocation] :document_location
|
690
802
|
# The location of the document to be processed.
|
@@ -696,9 +808,10 @@ module Aws::Textract
|
|
696
808
|
# prevent the same job from being accidentally started more than once.
|
697
809
|
#
|
698
810
|
# @option params [String] :job_tag
|
699
|
-
#
|
700
|
-
#
|
701
|
-
#
|
811
|
+
# An identifier you specify that's included in the completion
|
812
|
+
# notification that's published to the Amazon SNS topic. For example,
|
813
|
+
# you can use `JobTag` to identify the type of document, such as a tax
|
814
|
+
# form or a receipt, that the completion notification corresponds to.
|
702
815
|
#
|
703
816
|
# @option params [Types::NotificationChannel] :notification_channel
|
704
817
|
# The Amazon SNS topic ARN that you want Amazon Textract to publish the
|
@@ -752,7 +865,7 @@ module Aws::Textract
|
|
752
865
|
params: params,
|
753
866
|
config: config)
|
754
867
|
context[:gem_name] = 'aws-sdk-textract'
|
755
|
-
context[:gem_version] = '1.
|
868
|
+
context[:gem_version] = '1.4.0'
|
756
869
|
Seahorse::Client::Request.new(handlers, context)
|
757
870
|
end
|
758
871
|
|
@@ -66,6 +66,7 @@ module Aws::Textract
|
|
66
66
|
S3ObjectName = Shapes::StringShape.new(name: 'S3ObjectName')
|
67
67
|
S3ObjectVersion = Shapes::StringShape.new(name: 'S3ObjectVersion')
|
68
68
|
SNSTopicArn = Shapes::StringShape.new(name: 'SNSTopicArn')
|
69
|
+
SelectionStatus = Shapes::StringShape.new(name: 'SelectionStatus')
|
69
70
|
StartDocumentAnalysisRequest = Shapes::StructureShape.new(name: 'StartDocumentAnalysisRequest')
|
70
71
|
StartDocumentAnalysisResponse = Shapes::StructureShape.new(name: 'StartDocumentAnalysisResponse')
|
71
72
|
StartDocumentTextDetectionRequest = Shapes::StructureShape.new(name: 'StartDocumentTextDetectionRequest')
|
@@ -97,6 +98,7 @@ module Aws::Textract
|
|
97
98
|
Block.add_member(:id, Shapes::ShapeRef.new(shape: NonEmptyString, location_name: "Id"))
|
98
99
|
Block.add_member(:relationships, Shapes::ShapeRef.new(shape: RelationshipList, location_name: "Relationships"))
|
99
100
|
Block.add_member(:entity_types, Shapes::ShapeRef.new(shape: EntityTypes, location_name: "EntityTypes"))
|
101
|
+
Block.add_member(:selection_status, Shapes::ShapeRef.new(shape: SelectionStatus, location_name: "SelectionStatus"))
|
100
102
|
Block.add_member(:page, Shapes::ShapeRef.new(shape: UInteger, location_name: "Page"))
|
101
103
|
Block.struct_class = Types::Block
|
102
104
|
|
@@ -28,6 +28,9 @@ module Aws::Textract
|
|
28
28
|
# If you use the AWS CLI to call Amazon Textract operations, you
|
29
29
|
# can't pass image bytes. The document must be an image in JPG or PNG
|
30
30
|
# format.
|
31
|
+
#
|
32
|
+
# If you are using an AWS SDK to call Amazon Textract, you might not
|
33
|
+
# need to base64-encode image bytes passed using the `Bytes` field.
|
31
34
|
# @return [Types::Document]
|
32
35
|
#
|
33
36
|
# @!attribute [rw] feature_types
|
@@ -63,13 +66,13 @@ module Aws::Textract
|
|
63
66
|
include Aws::Structure
|
64
67
|
end
|
65
68
|
|
66
|
-
# A `Block` represents
|
69
|
+
# A `Block` represents items that are recognized in a document within a
|
67
70
|
# group of pixels close to each other. The information returned in a
|
68
71
|
# `Block` depends on the type of operation. In document-text detection
|
69
72
|
# (for example DetectDocumentText), you get information about the
|
70
73
|
# detected words and lines of text. In text analysis (for example
|
71
|
-
# AnalyzeDocument), you can get information about the fields
|
72
|
-
# that are detected in the document.
|
74
|
+
# AnalyzeDocument), you can also get information about the fields,
|
75
|
+
# tables and selection elements that are detected in the document.
|
73
76
|
#
|
74
77
|
# An array of `Block` objects is returned by both synchronous and
|
75
78
|
# asynchronous operations. In synchronous operations, such as
|
@@ -77,37 +80,54 @@ module Aws::Textract
|
|
77
80
|
# results. In asynchronous operations, such as GetDocumentAnalysis, the
|
78
81
|
# array is returned over one or more responses.
|
79
82
|
#
|
83
|
+
# For more information, see [How Amazon Textract Works][1].
|
84
|
+
#
|
85
|
+
#
|
86
|
+
#
|
87
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works.html
|
88
|
+
#
|
80
89
|
# @!attribute [rw] block_type
|
81
90
|
# The type of text that's recognized in a block. In text-detection
|
82
91
|
# operations, the following types are returned:
|
83
92
|
#
|
84
93
|
# * *PAGE* - Contains a list of the LINE Block objects that are
|
85
|
-
# detected on a
|
94
|
+
# detected on a document page.
|
86
95
|
#
|
87
|
-
# * *WORD* -
|
88
|
-
# aren't separated by
|
96
|
+
# * *WORD* - A word detected on a document page. A word is one or more
|
97
|
+
# ISO basic Latin script characters that aren't separated by
|
98
|
+
# spaces.
|
89
99
|
#
|
90
|
-
# * *LINE* - A string of
|
100
|
+
# * *LINE* - A string of tab-delimited, contiguous words that's
|
101
|
+
# detected on a document page.
|
91
102
|
#
|
92
103
|
# In text analysis operations, the following types are returned:
|
93
104
|
#
|
94
105
|
# * *PAGE* - Contains a list of child Block objects that are detected
|
95
|
-
# on a
|
106
|
+
# on a document page.
|
96
107
|
#
|
97
108
|
# * *KEY\_VALUE\_SET* - Stores the KEY and VALUE Block objects for a
|
98
|
-
# field that's detected
|
99
|
-
# to determine if a KEY\_VALUE\_SET object is a KEY Block
|
100
|
-
# a VALUE Block object.
|
109
|
+
# field that's detected on a document page. Use the `EntityType`
|
110
|
+
# field to determine if a KEY\_VALUE\_SET object is a KEY Block
|
111
|
+
# object or a VALUE Block object.
|
101
112
|
#
|
102
|
-
# * *WORD* -
|
103
|
-
# aren't separated by spaces
|
113
|
+
# * *WORD* - A word detected on a document page. A word is one or more
|
114
|
+
# ISO basic Latin script characters that aren't separated by spaces
|
115
|
+
# that's detected on a document page.
|
104
116
|
#
|
105
|
-
# * *LINE* - A string of tab-delimited, contiguous words
|
117
|
+
# * *LINE* - A string of tab-delimited, contiguous words that's
|
118
|
+
# detected on a document page.
|
106
119
|
#
|
107
|
-
# * *TABLE* - A table that's detected
|
120
|
+
# * *TABLE* - A table that's detected on a document page. A table is
|
121
|
+
# any grid-based information with 2 or more rows or columns with a
|
122
|
+
# cell span of 1 row and 1 column each.
|
108
123
|
#
|
109
124
|
# * *CELL* - A cell within a detected table. The cell is the parent of
|
110
125
|
# the block that contains the text in the cell.
|
126
|
+
#
|
127
|
+
# * *SELECTION\_ELEMENT* - A selectable element such as a radio button
|
128
|
+
# or checkbox that's detected on a document page. Use the value of
|
129
|
+
# `SelectionStatus` to determine the status of the selection
|
130
|
+
# element.
|
111
131
|
# @return [String]
|
112
132
|
#
|
113
133
|
# @!attribute [rw] confidence
|
@@ -176,8 +196,19 @@ module Aws::Textract
|
|
176
196
|
# `GetDocumentTextDetection`.
|
177
197
|
# @return [Array<String>]
|
178
198
|
#
|
199
|
+
# @!attribute [rw] selection_status
|
200
|
+
# The selection status of a selectable element such as a radio button
|
201
|
+
# or checkbox.
|
202
|
+
# @return [String]
|
203
|
+
#
|
179
204
|
# @!attribute [rw] page
|
180
|
-
# The page in which a block was detected.
|
205
|
+
# The page in which a block was detected. `Page` is returned by
|
206
|
+
# asynchronous operations. Page values greater than 1 are only
|
207
|
+
# returned for multi-page documents that are in PDF format. A scanned
|
208
|
+
# image (JPG/PNG), even if it contains multiple document pages, is
|
209
|
+
# always considered to be a single-page document and the value of
|
210
|
+
# `Page` is always 1. Synchronous operations don't return `Page` as
|
211
|
+
# every input document is considered to be a single-page document.
|
181
212
|
# @return [Integer]
|
182
213
|
#
|
183
214
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/Block AWS API Documentation
|
@@ -194,6 +225,7 @@ module Aws::Textract
|
|
194
225
|
:id,
|
195
226
|
:relationships,
|
196
227
|
:entity_types,
|
228
|
+
:selection_status,
|
197
229
|
:page)
|
198
230
|
include Aws::Structure
|
199
231
|
end
|
@@ -264,6 +296,9 @@ module Aws::Textract
|
|
264
296
|
# If you use the AWS CLI to call Amazon Textract operations, you
|
265
297
|
# can't pass image bytes. The document must be an image in JPG or PNG
|
266
298
|
# format.
|
299
|
+
#
|
300
|
+
# If you are using an AWS SDK to call Amazon Textract, you might not
|
301
|
+
# need to base64-encode image bytes passed using the `Bytes` field.
|
267
302
|
# @return [Types::Document]
|
268
303
|
#
|
269
304
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentTextRequest AWS API Documentation
|
@@ -305,7 +340,7 @@ module Aws::Textract
|
|
305
340
|
# bucket don't need to be base64 encoded.
|
306
341
|
#
|
307
342
|
# The AWS Region for the S3 bucket that contains the S3 object must
|
308
|
-
# match the Region that you use for Amazon Textract operations.
|
343
|
+
# match the AWS Region that you use for Amazon Textract operations.
|
309
344
|
#
|
310
345
|
# If you use the AWS CLI to call Amazon Textract operations, passing
|
311
346
|
# image bytes using the Bytes property isn't supported. You must first
|
@@ -328,8 +363,12 @@ module Aws::Textract
|
|
328
363
|
# }
|
329
364
|
#
|
330
365
|
# @!attribute [rw] bytes
|
331
|
-
# A blob of documents bytes. The maximum size of a
|
332
|
-
# provided in a blob of bytes is 5 MB.
|
366
|
+
# A blob of base-64 encoded documents bytes. The maximum size of a
|
367
|
+
# document that's provided in a blob of bytes is 5 MB. The document
|
368
|
+
# bytes must be in PNG or JPG format.
|
369
|
+
#
|
370
|
+
# If you are using an AWS SDK to call Amazon Textract, you might not
|
371
|
+
# need to base64-encode image bytes passed using the `Bytes` field.
|
333
372
|
# @return [String]
|
334
373
|
#
|
335
374
|
# @!attribute [rw] s3_object
|
@@ -715,7 +754,9 @@ module Aws::Textract
|
|
715
754
|
# to return information about the tables that are detected in the
|
716
755
|
# input document. Add FORMS to return detected fields and the
|
717
756
|
# associated text. To perform both types of analysis, add TABLES and
|
718
|
-
# FORMS to `FeatureTypes`.
|
757
|
+
# FORMS to `FeatureTypes`. All selectable elements
|
758
|
+
# (`SELECTION_ELEMENT`) that are detected are returned, whatever the
|
759
|
+
# value of `FeatureTypes`.
|
719
760
|
# @return [Array<String>]
|
720
761
|
#
|
721
762
|
# @!attribute [rw] client_request_token
|
@@ -726,8 +767,10 @@ module Aws::Textract
|
|
726
767
|
# @return [String]
|
727
768
|
#
|
728
769
|
# @!attribute [rw] job_tag
|
729
|
-
#
|
730
|
-
#
|
770
|
+
# An identifier you specify that's included in the completion
|
771
|
+
# notification that's published to the Amazon SNS topic. For example,
|
772
|
+
# you can use `JobTag` to identify the type of document, such as a tax
|
773
|
+
# form or a receipt, that the completion notification corresponds to.
|
731
774
|
# @return [String]
|
732
775
|
#
|
733
776
|
# @!attribute [rw] notification_channel
|
@@ -747,7 +790,7 @@ module Aws::Textract
|
|
747
790
|
end
|
748
791
|
|
749
792
|
# @!attribute [rw] job_id
|
750
|
-
# The identifier for the document text
|
793
|
+
# The identifier for the document text detection job. Use `JobId` to
|
751
794
|
# identify the job in a subsequent call to `GetDocumentAnalysis`.
|
752
795
|
# @return [String]
|
753
796
|
#
|
@@ -789,9 +832,10 @@ module Aws::Textract
|
|
789
832
|
# @return [String]
|
790
833
|
#
|
791
834
|
# @!attribute [rw] job_tag
|
792
|
-
#
|
793
|
-
#
|
794
|
-
#
|
835
|
+
# An identifier you specify that's included in the completion
|
836
|
+
# notification that's published to the Amazon SNS topic. For example,
|
837
|
+
# you can use `JobTag` to identify the type of document, such as a tax
|
838
|
+
# form or a receipt, that the completion notification corresponds to.
|
795
839
|
# @return [String]
|
796
840
|
#
|
797
841
|
# @!attribute [rw] notification_channel
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aws-sdk-textract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Amazon Web Services
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-04-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-core
|
@@ -59,7 +59,7 @@ files:
|
|
59
59
|
- lib/aws-sdk-textract/errors.rb
|
60
60
|
- lib/aws-sdk-textract/resource.rb
|
61
61
|
- lib/aws-sdk-textract/types.rb
|
62
|
-
homepage:
|
62
|
+
homepage: https://github.com/aws/aws-sdk-ruby
|
63
63
|
licenses:
|
64
64
|
- Apache-2.0
|
65
65
|
metadata:
|