aws-sdk-textract 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/aws-sdk-textract.rb +1 -1
- data/lib/aws-sdk-textract/client.rb +166 -53
- data/lib/aws-sdk-textract/client_api.rb +2 -0
- data/lib/aws-sdk-textract/types.rb +70 -26
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bb5796435f32cf464a826d8a950bfd4530ce0653
|
4
|
+
data.tar.gz: '099358286d8dee94332f1e18da3028c18abd5d8b'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f45a24e21f46c9f0c44a619e0e2b272288c42fe68484cfa997f280cbb3e8bf35db471565e620154e6f95898f821da77500554801e40b628488333632961adb5
|
7
|
+
data.tar.gz: da14d00c06ecc20b0b246c7a7b5235c2582460356a4d4c1d58f6cda494de28cb7d76e24e7d0462692d6fd9dc1e6bbae417e64bfa28db714cd867ad735e3fd554
|
data/lib/aws-sdk-textract.rb
CHANGED
@@ -209,40 +209,98 @@ module Aws::Textract
|
|
209
209
|
# When `true`, request parameters are validated before
|
210
210
|
# sending the request.
|
211
211
|
#
|
212
|
+
# @option options [URI::HTTP,String] :http_proxy A proxy to send
|
213
|
+
# requests through. Formatted like 'http://proxy.com:123'.
|
214
|
+
#
|
215
|
+
# @option options [Float] :http_open_timeout (15) The number of
|
216
|
+
# seconds to wait when opening a HTTP session before rasing a
|
217
|
+
# `Timeout::Error`.
|
218
|
+
#
|
219
|
+
# @option options [Integer] :http_read_timeout (60) The default
|
220
|
+
# number of seconds to wait for response data. This value can
|
221
|
+
# safely be set
|
222
|
+
# per-request on the session yeidled by {#session_for}.
|
223
|
+
#
|
224
|
+
# @option options [Float] :http_idle_timeout (5) The number of
|
225
|
+
# seconds a connection is allowed to sit idble before it is
|
226
|
+
# considered stale. Stale connections are closed and removed
|
227
|
+
# from the pool before making a request.
|
228
|
+
#
|
229
|
+
# @option options [Float] :http_continue_timeout (1) The number of
|
230
|
+
# seconds to wait for a 100-continue response before sending the
|
231
|
+
# request body. This option has no effect unless the request has
|
232
|
+
# "Expect" header set to "100-continue". Defaults to `nil` which
|
233
|
+
# disables this behaviour. This value can safely be set per
|
234
|
+
# request on the session yeidled by {#session_for}.
|
235
|
+
#
|
236
|
+
# @option options [Boolean] :http_wire_trace (false) When `true`,
|
237
|
+
# HTTP debug output will be sent to the `:logger`.
|
238
|
+
#
|
239
|
+
# @option options [Boolean] :ssl_verify_peer (true) When `true`,
|
240
|
+
# SSL peer certificates are verified when establishing a
|
241
|
+
# connection.
|
242
|
+
#
|
243
|
+
# @option options [String] :ssl_ca_bundle Full path to the SSL
|
244
|
+
# certificate authority bundle file that should be used when
|
245
|
+
# verifying peer certificates. If you do not pass
|
246
|
+
# `:ssl_ca_bundle` or `:ssl_ca_directory` the the system default
|
247
|
+
# will be used if available.
|
248
|
+
#
|
249
|
+
# @option options [String] :ssl_ca_directory Full path of the
|
250
|
+
# directory that contains the unbundled SSL certificate
|
251
|
+
# authority files for verifying peer certificates. If you do
|
252
|
+
# not pass `:ssl_ca_bundle` or `:ssl_ca_directory` the the
|
253
|
+
# system default will be used if available.
|
254
|
+
#
|
212
255
|
def initialize(*args)
|
213
256
|
super
|
214
257
|
end
|
215
258
|
|
216
259
|
# @!group API Operations
|
217
260
|
|
218
|
-
# Analyzes an input document for relationships
|
219
|
-
# tables.
|
261
|
+
# Analyzes an input document for relationships between detected items.
|
220
262
|
#
|
221
|
-
#
|
263
|
+
# The types of information returned are as follows:
|
222
264
|
#
|
223
265
|
# * Words and lines that are related to nearby lines and words. The
|
224
|
-
# related information is returned in two Block objects
|
225
|
-
# object and a VALUE Block object. For
|
226
|
-
# Carolina* contains a key and value.
|
227
|
-
# Carolina* is the value.
|
266
|
+
# related information is returned in two Block objects each of type
|
267
|
+
# `KEY_VALUE_SET`\: a KEY Block object and a VALUE Block object. For
|
268
|
+
# example, *Name: Ana Silva Carolina* contains a key and value.
|
269
|
+
# *Name:* is the key. *Ana Silva Carolina* is the value.
|
270
|
+
#
|
271
|
+
# * Table and table cell data. A TABLE Block object contains information
|
272
|
+
# about a detected table. A CELL Block object is returned for each
|
273
|
+
# cell in a table.
|
274
|
+
#
|
275
|
+
# * Selectable elements such as checkboxes and radio buttons. A
|
276
|
+
# SELECTION\_ELEMENT Block object contains information about a
|
277
|
+
# selectable element.
|
228
278
|
#
|
229
|
-
# *
|
230
|
-
#
|
279
|
+
# * Lines and words of text. A LINE Block object contains one or more
|
280
|
+
# WORD Block objects.
|
231
281
|
#
|
232
282
|
# You can choose which type of analysis to perform by specifying the
|
233
283
|
# `FeatureTypes` list.
|
234
284
|
#
|
235
|
-
# The output is returned in a list of `BLOCK` objects
|
236
|
-
# information, see how-it-works-analyzing.
|
285
|
+
# The output is returned in a list of `BLOCK` objects.
|
237
286
|
#
|
238
287
|
# `AnalyzeDocument` is a synchronous operation. To analyze documents
|
239
288
|
# asynchronously, use StartDocumentAnalysis.
|
240
289
|
#
|
290
|
+
# For more information, see [Document Text Analysis][1].
|
291
|
+
#
|
292
|
+
#
|
293
|
+
#
|
294
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html
|
295
|
+
#
|
241
296
|
# @option params [required, Types::Document] :document
|
242
297
|
# The input document as base64-encoded bytes or an Amazon S3 object. If
|
243
298
|
# you use the AWS CLI to call Amazon Textract operations, you can't
|
244
299
|
# pass image bytes. The document must be an image in JPG or PNG format.
|
245
300
|
#
|
301
|
+
# If you are using an AWS SDK to call Amazon Textract, you might not
|
302
|
+
# need to base64-encode image bytes passed using the `Bytes` field.
|
303
|
+
#
|
246
304
|
# @option params [required, Array<String>] :feature_types
|
247
305
|
# A list of the types of analysis to perform. Add TABLES to the list to
|
248
306
|
# return information about the tables detected in the input document.
|
@@ -273,7 +331,7 @@ module Aws::Textract
|
|
273
331
|
#
|
274
332
|
# resp.document_metadata.pages #=> Integer
|
275
333
|
# resp.blocks #=> Array
|
276
|
-
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
|
334
|
+
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
|
277
335
|
# resp.blocks[0].confidence #=> Float
|
278
336
|
# resp.blocks[0].text #=> String
|
279
337
|
# resp.blocks[0].row_index #=> Integer
|
@@ -294,6 +352,7 @@ module Aws::Textract
|
|
294
352
|
# resp.blocks[0].relationships[0].ids[0] #=> String
|
295
353
|
# resp.blocks[0].entity_types #=> Array
|
296
354
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
355
|
+
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
297
356
|
# resp.blocks[0].page #=> Integer
|
298
357
|
#
|
299
358
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/AnalyzeDocument AWS API Documentation
|
@@ -308,17 +367,31 @@ module Aws::Textract
|
|
308
367
|
# Detects text in the input document. Amazon Textract can detect lines
|
309
368
|
# of text and the words that make up a line of text. The input document
|
310
369
|
# must be an image in JPG or PNG format. `DetectDocumentText` returns
|
311
|
-
# the detected text in an array of Block objects.
|
312
|
-
#
|
370
|
+
# the detected text in an array of Block objects.
|
371
|
+
#
|
372
|
+
# Each document page has as an associated `Block` of type PAGE. Each
|
373
|
+
# PAGE `Block` object is the parent of LINE `Block` objects that
|
374
|
+
# represent the lines of detected text on a page. A LINE `Block` object
|
375
|
+
# is a parent for each word that makes up the line. Words are
|
376
|
+
# represented by `Block` objects of type WORD.
|
313
377
|
#
|
314
378
|
# `DetectDocumentText` is a synchronous operation. To analyze documents
|
315
379
|
# asynchronously, use StartDocumentTextDetection.
|
316
380
|
#
|
381
|
+
# For more information, see [Document Text Detection][1].
|
382
|
+
#
|
383
|
+
#
|
384
|
+
#
|
385
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-detecting.html
|
386
|
+
#
|
317
387
|
# @option params [required, Types::Document] :document
|
318
388
|
# The input document as base64-encoded bytes or an Amazon S3 object. If
|
319
389
|
# you use the AWS CLI to call Amazon Textract operations, you can't
|
320
390
|
# pass image bytes. The document must be an image in JPG or PNG format.
|
321
391
|
#
|
392
|
+
# If you are using an AWS SDK to call Amazon Textract, you might not
|
393
|
+
# need to base64-encode image bytes passed using the `Bytes` field.
|
394
|
+
#
|
322
395
|
# @return [Types::DetectDocumentTextResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
|
323
396
|
#
|
324
397
|
# * {Types::DetectDocumentTextResponse#document_metadata #document_metadata} => Types::DocumentMetadata
|
@@ -341,7 +414,7 @@ module Aws::Textract
|
|
341
414
|
#
|
342
415
|
# resp.document_metadata.pages #=> Integer
|
343
416
|
# resp.blocks #=> Array
|
344
|
-
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
|
417
|
+
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
|
345
418
|
# resp.blocks[0].confidence #=> Float
|
346
419
|
# resp.blocks[0].text #=> String
|
347
420
|
# resp.blocks[0].row_index #=> Integer
|
@@ -362,6 +435,7 @@ module Aws::Textract
|
|
362
435
|
# resp.blocks[0].relationships[0].ids[0] #=> String
|
363
436
|
# resp.blocks[0].entity_types #=> Array
|
364
437
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
438
|
+
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
365
439
|
# resp.blocks[0].page #=> Integer
|
366
440
|
#
|
367
441
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentText AWS API Documentation
|
@@ -374,7 +448,7 @@ module Aws::Textract
|
|
374
448
|
end
|
375
449
|
|
376
450
|
# Gets the results for an Amazon Textract asynchronous operation that
|
377
|
-
# analyzes text in a document
|
451
|
+
# analyzes text in a document.
|
378
452
|
#
|
379
453
|
# You start asynchronous text analysis by calling StartDocumentAnalysis,
|
380
454
|
# which returns a job identifier (`JobId`). When the text analysis
|
@@ -386,8 +460,25 @@ module Aws::Textract
|
|
386
460
|
# `GetDocumentAnalysis`, and pass the job identifier (`JobId`) from the
|
387
461
|
# initial call to `StartDocumentAnalysis`.
|
388
462
|
#
|
389
|
-
# `GetDocumentAnalysis` returns an array of Block objects.
|
390
|
-
# information
|
463
|
+
# `GetDocumentAnalysis` returns an array of Block objects. The following
|
464
|
+
# types of information are returned:
|
465
|
+
#
|
466
|
+
# * Words and lines that are related to nearby lines and words. The
|
467
|
+
# related information is returned in two Block objects each of type
|
468
|
+
# `KEY_VALUE_SET`\: a KEY Block object and a VALUE Block object. For
|
469
|
+
# example, *Name: Ana Silva Carolina* contains a key and value.
|
470
|
+
# *Name:* is the key. *Ana Silva Carolina* is the value.
|
471
|
+
#
|
472
|
+
# * Table and table cell data. A TABLE Block object contains information
|
473
|
+
# about a detected table. A CELL Block object is returned for each
|
474
|
+
# cell in a table.
|
475
|
+
#
|
476
|
+
# * Selectable elements such as checkboxes and radio buttons. A
|
477
|
+
# SELECTION\_ELEMENT Block object contains information about a
|
478
|
+
# selectable element.
|
479
|
+
#
|
480
|
+
# * Lines and words of text. A LINE Block object contains one or more
|
481
|
+
# WORD Block objects.
|
391
482
|
#
|
392
483
|
# Use the `MaxResults` parameter to limit the number of blocks returned.
|
393
484
|
# If there are more results than specified in `MaxResults`, the value of
|
@@ -397,6 +488,12 @@ module Aws::Textract
|
|
397
488
|
# with the token value that's returned from the previous call to
|
398
489
|
# `GetDocumentAnalysis`.
|
399
490
|
#
|
491
|
+
# For more information, see [Document Text Analysis][1].
|
492
|
+
#
|
493
|
+
#
|
494
|
+
#
|
495
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html
|
496
|
+
#
|
400
497
|
# @option params [required, String] :job_id
|
401
498
|
# A unique identifier for the text-detection job. The `JobId` is
|
402
499
|
# returned from `StartDocumentAnalysis`.
|
@@ -436,7 +533,7 @@ module Aws::Textract
|
|
436
533
|
# resp.job_status #=> String, one of "IN_PROGRESS", "SUCCEEDED", "FAILED", "PARTIAL_SUCCESS"
|
437
534
|
# resp.next_token #=> String
|
438
535
|
# resp.blocks #=> Array
|
439
|
-
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
|
536
|
+
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
|
440
537
|
# resp.blocks[0].confidence #=> Float
|
441
538
|
# resp.blocks[0].text #=> String
|
442
539
|
# resp.blocks[0].row_index #=> Integer
|
@@ -457,6 +554,7 @@ module Aws::Textract
|
|
457
554
|
# resp.blocks[0].relationships[0].ids[0] #=> String
|
458
555
|
# resp.blocks[0].entity_types #=> Array
|
459
556
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
557
|
+
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
460
558
|
# resp.blocks[0].page #=> Integer
|
461
559
|
# resp.warnings #=> Array
|
462
560
|
# resp.warnings[0].error_code #=> String
|
@@ -474,8 +572,8 @@ module Aws::Textract
|
|
474
572
|
end
|
475
573
|
|
476
574
|
# Gets the results for an Amazon Textract asynchronous operation that
|
477
|
-
# detects text in a document
|
478
|
-
#
|
575
|
+
# detects text in a document. Amazon Textract can detect lines of text
|
576
|
+
# and the words that make up a line of text.
|
479
577
|
#
|
480
578
|
# You start asynchronous text detection by calling
|
481
579
|
# StartDocumentTextDetection, which returns a job identifier (`JobId`).
|
@@ -488,8 +586,13 @@ module Aws::Textract
|
|
488
586
|
# pass the job identifier (`JobId`) from the initial call to
|
489
587
|
# `StartDocumentTextDetection`.
|
490
588
|
#
|
491
|
-
# `GetDocumentTextDetection` returns an array of Block objects.
|
492
|
-
#
|
589
|
+
# `GetDocumentTextDetection` returns an array of Block objects.
|
590
|
+
#
|
591
|
+
# Each document page has as an associated `Block` of type PAGE. Each
|
592
|
+
# PAGE `Block` object is the parent of LINE `Block` objects that
|
593
|
+
# represent the lines of detected text on a page. A LINE `Block` object
|
594
|
+
# is a parent for each word that makes up the line. Words are
|
595
|
+
# represented by `Block` objects of type WORD.
|
493
596
|
#
|
494
597
|
# Use the MaxResults parameter to limit the number of blocks that are
|
495
598
|
# returned. If there are more results than specified in `MaxResults`,
|
@@ -499,8 +602,11 @@ module Aws::Textract
|
|
499
602
|
# `NextToken` request parameter with the token value that's returned
|
500
603
|
# from the previous call to `GetDocumentTextDetection`.
|
501
604
|
#
|
502
|
-
# For more information, see Document Text Detection
|
503
|
-
#
|
605
|
+
# For more information, see [Document Text Detection][1].
|
606
|
+
#
|
607
|
+
#
|
608
|
+
#
|
609
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-detecting.html
|
504
610
|
#
|
505
611
|
# @option params [required, String] :job_id
|
506
612
|
# A unique identifier for the text detection job. The `JobId` is
|
@@ -541,7 +647,7 @@ module Aws::Textract
|
|
541
647
|
# resp.job_status #=> String, one of "IN_PROGRESS", "SUCCEEDED", "FAILED", "PARTIAL_SUCCESS"
|
542
648
|
# resp.next_token #=> String
|
543
649
|
# resp.blocks #=> Array
|
544
|
-
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL"
|
650
|
+
# resp.blocks[0].block_type #=> String, one of "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT"
|
545
651
|
# resp.blocks[0].confidence #=> Float
|
546
652
|
# resp.blocks[0].text #=> String
|
547
653
|
# resp.blocks[0].row_index #=> Integer
|
@@ -562,6 +668,7 @@ module Aws::Textract
|
|
562
668
|
# resp.blocks[0].relationships[0].ids[0] #=> String
|
563
669
|
# resp.blocks[0].entity_types #=> Array
|
564
670
|
# resp.blocks[0].entity_types[0] #=> String, one of "KEY", "VALUE"
|
671
|
+
# resp.blocks[0].selection_status #=> String, one of "SELECTED", "NOT_SELECTED"
|
565
672
|
# resp.blocks[0].page #=> Integer
|
566
673
|
# resp.warnings #=> Array
|
567
674
|
# resp.warnings[0].error_code #=> String
|
@@ -578,22 +685,14 @@ module Aws::Textract
|
|
578
685
|
req.send_request(options)
|
579
686
|
end
|
580
687
|
|
581
|
-
# Starts asynchronous analysis of
|
582
|
-
#
|
583
|
-
#
|
584
|
-
#
|
585
|
-
# * Words and lines that are related to nearby lines and words. The
|
586
|
-
# related information is returned in two Block objects: A KEY Block
|
587
|
-
# object and a VALUE Block object. For example, *Name: Ana Silva
|
588
|
-
# Carolina* contains a key and value. *Name:* is the key. *Ana Silva
|
589
|
-
# Carolina* is the value.
|
688
|
+
# Starts asynchronous analysis of an input document for relationships
|
689
|
+
# between detected items such as key and value pairs, tables, and
|
690
|
+
# selection elements.
|
590
691
|
#
|
591
|
-
#
|
592
|
-
#
|
593
|
-
#
|
594
|
-
#
|
595
|
-
# are stored in an Amazon S3 bucket. Use DocumentLocation to specify the
|
596
|
-
# bucket name and file name of the document image.
|
692
|
+
# `StartDocumentAnalysis` can analyze text in documents that are in JPG,
|
693
|
+
# PNG, and PDF format. The documents are stored in an Amazon S3 bucket.
|
694
|
+
# Use DocumentLocation to specify the bucket name and file name of the
|
695
|
+
# document.
|
597
696
|
#
|
598
697
|
# `StartDocumentAnalysis` returns a job identifier (`JobId`) that you
|
599
698
|
# use to get the results of the operation. When text analysis is
|
@@ -605,6 +704,12 @@ module Aws::Textract
|
|
605
704
|
# the job identifier (`JobId`) from the initial call to
|
606
705
|
# `StartDocumentAnalysis`.
|
607
706
|
#
|
707
|
+
# For more information, see [Document Text Analysis][1].
|
708
|
+
#
|
709
|
+
#
|
710
|
+
#
|
711
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html
|
712
|
+
#
|
608
713
|
# @option params [required, Types::DocumentLocation] :document_location
|
609
714
|
# The location of the document to be processed.
|
610
715
|
#
|
@@ -613,7 +718,8 @@ module Aws::Textract
|
|
613
718
|
# return information about the tables that are detected in the input
|
614
719
|
# document. Add FORMS to return detected fields and the associated text.
|
615
720
|
# To perform both types of analysis, add TABLES and FORMS to
|
616
|
-
# `FeatureTypes`.
|
721
|
+
# `FeatureTypes`. All selectable elements (`SELECTION_ELEMENT`) that are
|
722
|
+
# detected are returned, whatever the value of `FeatureTypes`.
|
617
723
|
#
|
618
724
|
# @option params [String] :client_request_token
|
619
725
|
# The idempotent token that you use to identify the start request. If
|
@@ -622,8 +728,10 @@ module Aws::Textract
|
|
622
728
|
# same job from being accidentally started more than once.
|
623
729
|
#
|
624
730
|
# @option params [String] :job_tag
|
625
|
-
#
|
626
|
-
#
|
731
|
+
# An identifier you specify that's included in the completion
|
732
|
+
# notification that's published to the Amazon SNS topic. For example,
|
733
|
+
# you can use `JobTag` to identify the type of document, such as a tax
|
734
|
+
# form or a receipt, that the completion notification corresponds to.
|
627
735
|
#
|
628
736
|
# @option params [Types::NotificationChannel] :notification_channel
|
629
737
|
# The Amazon SNS topic ARN that you want Amazon Textract to publish the
|
@@ -669,9 +777,10 @@ module Aws::Textract
|
|
669
777
|
# Textract can detect lines of text and the words that make up a line of
|
670
778
|
# text.
|
671
779
|
#
|
672
|
-
#
|
673
|
-
# are stored in an Amazon S3
|
674
|
-
# bucket
|
780
|
+
# `StartDocumentTextDetection` can analyze text in documents that are in
|
781
|
+
# JPG, PNG, and PDF format. The documents are stored in an Amazon S3
|
782
|
+
# bucket. Use DocumentLocation to specify the bucket name and file name
|
783
|
+
# of the document.
|
675
784
|
#
|
676
785
|
# `StartTextDetection` returns a job identifier (`JobId`) that you use
|
677
786
|
# to get the results of the operation. When text detection is finished,
|
@@ -683,8 +792,11 @@ module Aws::Textract
|
|
683
792
|
# pass the job identifier (`JobId`) from the initial call to
|
684
793
|
# `StartDocumentTextDetection`.
|
685
794
|
#
|
686
|
-
# For more information, see Document Text Detection
|
687
|
-
#
|
795
|
+
# For more information, see [Document Text Detection][1].
|
796
|
+
#
|
797
|
+
#
|
798
|
+
#
|
799
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-detecting.html
|
688
800
|
#
|
689
801
|
# @option params [required, Types::DocumentLocation] :document_location
|
690
802
|
# The location of the document to be processed.
|
@@ -696,9 +808,10 @@ module Aws::Textract
|
|
696
808
|
# prevent the same job from being accidentally started more than once.
|
697
809
|
#
|
698
810
|
# @option params [String] :job_tag
|
699
|
-
#
|
700
|
-
#
|
701
|
-
#
|
811
|
+
# An identifier you specify that's included in the completion
|
812
|
+
# notification that's published to the Amazon SNS topic. For example,
|
813
|
+
# you can use `JobTag` to identify the type of document, such as a tax
|
814
|
+
# form or a receipt, that the completion notification corresponds to.
|
702
815
|
#
|
703
816
|
# @option params [Types::NotificationChannel] :notification_channel
|
704
817
|
# The Amazon SNS topic ARN that you want Amazon Textract to publish the
|
@@ -752,7 +865,7 @@ module Aws::Textract
|
|
752
865
|
params: params,
|
753
866
|
config: config)
|
754
867
|
context[:gem_name] = 'aws-sdk-textract'
|
755
|
-
context[:gem_version] = '1.
|
868
|
+
context[:gem_version] = '1.4.0'
|
756
869
|
Seahorse::Client::Request.new(handlers, context)
|
757
870
|
end
|
758
871
|
|
@@ -66,6 +66,7 @@ module Aws::Textract
|
|
66
66
|
S3ObjectName = Shapes::StringShape.new(name: 'S3ObjectName')
|
67
67
|
S3ObjectVersion = Shapes::StringShape.new(name: 'S3ObjectVersion')
|
68
68
|
SNSTopicArn = Shapes::StringShape.new(name: 'SNSTopicArn')
|
69
|
+
SelectionStatus = Shapes::StringShape.new(name: 'SelectionStatus')
|
69
70
|
StartDocumentAnalysisRequest = Shapes::StructureShape.new(name: 'StartDocumentAnalysisRequest')
|
70
71
|
StartDocumentAnalysisResponse = Shapes::StructureShape.new(name: 'StartDocumentAnalysisResponse')
|
71
72
|
StartDocumentTextDetectionRequest = Shapes::StructureShape.new(name: 'StartDocumentTextDetectionRequest')
|
@@ -97,6 +98,7 @@ module Aws::Textract
|
|
97
98
|
Block.add_member(:id, Shapes::ShapeRef.new(shape: NonEmptyString, location_name: "Id"))
|
98
99
|
Block.add_member(:relationships, Shapes::ShapeRef.new(shape: RelationshipList, location_name: "Relationships"))
|
99
100
|
Block.add_member(:entity_types, Shapes::ShapeRef.new(shape: EntityTypes, location_name: "EntityTypes"))
|
101
|
+
Block.add_member(:selection_status, Shapes::ShapeRef.new(shape: SelectionStatus, location_name: "SelectionStatus"))
|
100
102
|
Block.add_member(:page, Shapes::ShapeRef.new(shape: UInteger, location_name: "Page"))
|
101
103
|
Block.struct_class = Types::Block
|
102
104
|
|
@@ -28,6 +28,9 @@ module Aws::Textract
|
|
28
28
|
# If you use the AWS CLI to call Amazon Textract operations, you
|
29
29
|
# can't pass image bytes. The document must be an image in JPG or PNG
|
30
30
|
# format.
|
31
|
+
#
|
32
|
+
# If you are using an AWS SDK to call Amazon Textract, you might not
|
33
|
+
# need to base64-encode image bytes passed using the `Bytes` field.
|
31
34
|
# @return [Types::Document]
|
32
35
|
#
|
33
36
|
# @!attribute [rw] feature_types
|
@@ -63,13 +66,13 @@ module Aws::Textract
|
|
63
66
|
include Aws::Structure
|
64
67
|
end
|
65
68
|
|
66
|
-
# A `Block` represents
|
69
|
+
# A `Block` represents items that are recognized in a document within a
|
67
70
|
# group of pixels close to each other. The information returned in a
|
68
71
|
# `Block` depends on the type of operation. In document-text detection
|
69
72
|
# (for example DetectDocumentText), you get information about the
|
70
73
|
# detected words and lines of text. In text analysis (for example
|
71
|
-
# AnalyzeDocument), you can get information about the fields
|
72
|
-
# that are detected in the document.
|
74
|
+
# AnalyzeDocument), you can also get information about the fields,
|
75
|
+
# tables and selection elements that are detected in the document.
|
73
76
|
#
|
74
77
|
# An array of `Block` objects is returned by both synchronous and
|
75
78
|
# asynchronous operations. In synchronous operations, such as
|
@@ -77,37 +80,54 @@ module Aws::Textract
|
|
77
80
|
# results. In asynchronous operations, such as GetDocumentAnalysis, the
|
78
81
|
# array is returned over one or more responses.
|
79
82
|
#
|
83
|
+
# For more information, see [How Amazon Textract Works][1].
|
84
|
+
#
|
85
|
+
#
|
86
|
+
#
|
87
|
+
# [1]: https://docs.aws.amazon.com/textract/latest/dg/how-it-works.html
|
88
|
+
#
|
80
89
|
# @!attribute [rw] block_type
|
81
90
|
# The type of text that's recognized in a block. In text-detection
|
82
91
|
# operations, the following types are returned:
|
83
92
|
#
|
84
93
|
# * *PAGE* - Contains a list of the LINE Block objects that are
|
85
|
-
# detected on a
|
94
|
+
# detected on a document page.
|
86
95
|
#
|
87
|
-
# * *WORD* -
|
88
|
-
# aren't separated by
|
96
|
+
# * *WORD* - A word detected on a document page. A word is one or more
|
97
|
+
# ISO basic Latin script characters that aren't separated by
|
98
|
+
# spaces.
|
89
99
|
#
|
90
|
-
# * *LINE* - A string of
|
100
|
+
# * *LINE* - A string of tab-delimited, contiguous words that's
|
101
|
+
# detected on a document page.
|
91
102
|
#
|
92
103
|
# In text analysis operations, the following types are returned:
|
93
104
|
#
|
94
105
|
# * *PAGE* - Contains a list of child Block objects that are detected
|
95
|
-
# on a
|
106
|
+
# on a document page.
|
96
107
|
#
|
97
108
|
# * *KEY\_VALUE\_SET* - Stores the KEY and VALUE Block objects for a
|
98
|
-
# field that's detected
|
99
|
-
# to determine if a KEY\_VALUE\_SET object is a KEY Block
|
100
|
-
# a VALUE Block object.
|
109
|
+
# field that's detected on a document page. Use the `EntityType`
|
110
|
+
# field to determine if a KEY\_VALUE\_SET object is a KEY Block
|
111
|
+
# object or a VALUE Block object.
|
101
112
|
#
|
102
|
-
# * *WORD* -
|
103
|
-
# aren't separated by spaces
|
113
|
+
# * *WORD* - A word detected on a document page. A word is one or more
|
114
|
+
# ISO basic Latin script characters that aren't separated by spaces
|
115
|
+
# that's detected on a document page.
|
104
116
|
#
|
105
|
-
# * *LINE* - A string of tab-delimited, contiguous words
|
117
|
+
# * *LINE* - A string of tab-delimited, contiguous words that's
|
118
|
+
# detected on a document page.
|
106
119
|
#
|
107
|
-
# * *TABLE* - A table that's detected
|
120
|
+
# * *TABLE* - A table that's detected on a document page. A table is
|
121
|
+
# any grid-based information with 2 or more rows or columns with a
|
122
|
+
# cell span of 1 row and 1 column each.
|
108
123
|
#
|
109
124
|
# * *CELL* - A cell within a detected table. The cell is the parent of
|
110
125
|
# the block that contains the text in the cell.
|
126
|
+
#
|
127
|
+
# * *SELECTION\_ELEMENT* - A selectable element such as a radio button
|
128
|
+
# or checkbox that's detected on a document page. Use the value of
|
129
|
+
# `SelectionStatus` to determine the status of the selection
|
130
|
+
# element.
|
111
131
|
# @return [String]
|
112
132
|
#
|
113
133
|
# @!attribute [rw] confidence
|
@@ -176,8 +196,19 @@ module Aws::Textract
|
|
176
196
|
# `GetDocumentTextDetection`.
|
177
197
|
# @return [Array<String>]
|
178
198
|
#
|
199
|
+
# @!attribute [rw] selection_status
|
200
|
+
# The selection status of a selectable element such as a radio button
|
201
|
+
# or checkbox.
|
202
|
+
# @return [String]
|
203
|
+
#
|
179
204
|
# @!attribute [rw] page
|
180
|
-
# The page in which a block was detected.
|
205
|
+
# The page in which a block was detected. `Page` is returned by
|
206
|
+
# asynchronous operations. Page values greater than 1 are only
|
207
|
+
# returned for multi-page documents that are in PDF format. A scanned
|
208
|
+
# image (JPG/PNG), even if it contains multiple document pages, is
|
209
|
+
# always considered to be a single-page document and the value of
|
210
|
+
# `Page` is always 1. Synchronous operations don't return `Page` as
|
211
|
+
# every input document is considered to be a single-page document.
|
181
212
|
# @return [Integer]
|
182
213
|
#
|
183
214
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/Block AWS API Documentation
|
@@ -194,6 +225,7 @@ module Aws::Textract
|
|
194
225
|
:id,
|
195
226
|
:relationships,
|
196
227
|
:entity_types,
|
228
|
+
:selection_status,
|
197
229
|
:page)
|
198
230
|
include Aws::Structure
|
199
231
|
end
|
@@ -264,6 +296,9 @@ module Aws::Textract
|
|
264
296
|
# If you use the AWS CLI to call Amazon Textract operations, you
|
265
297
|
# can't pass image bytes. The document must be an image in JPG or PNG
|
266
298
|
# format.
|
299
|
+
#
|
300
|
+
# If you are using an AWS SDK to call Amazon Textract, you might not
|
301
|
+
# need to base64-encode image bytes passed using the `Bytes` field.
|
267
302
|
# @return [Types::Document]
|
268
303
|
#
|
269
304
|
# @see http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/DetectDocumentTextRequest AWS API Documentation
|
@@ -305,7 +340,7 @@ module Aws::Textract
|
|
305
340
|
# bucket don't need to be base64 encoded.
|
306
341
|
#
|
307
342
|
# The AWS Region for the S3 bucket that contains the S3 object must
|
308
|
-
# match the Region that you use for Amazon Textract operations.
|
343
|
+
# match the AWS Region that you use for Amazon Textract operations.
|
309
344
|
#
|
310
345
|
# If you use the AWS CLI to call Amazon Textract operations, passing
|
311
346
|
# image bytes using the Bytes property isn't supported. You must first
|
@@ -328,8 +363,12 @@ module Aws::Textract
|
|
328
363
|
# }
|
329
364
|
#
|
330
365
|
# @!attribute [rw] bytes
|
331
|
-
# A blob of documents bytes. The maximum size of a
|
332
|
-
# provided in a blob of bytes is 5 MB.
|
366
|
+
# A blob of base-64 encoded documents bytes. The maximum size of a
|
367
|
+
# document that's provided in a blob of bytes is 5 MB. The document
|
368
|
+
# bytes must be in PNG or JPG format.
|
369
|
+
#
|
370
|
+
# If you are using an AWS SDK to call Amazon Textract, you might not
|
371
|
+
# need to base64-encode image bytes passed using the `Bytes` field.
|
333
372
|
# @return [String]
|
334
373
|
#
|
335
374
|
# @!attribute [rw] s3_object
|
@@ -715,7 +754,9 @@ module Aws::Textract
|
|
715
754
|
# to return information about the tables that are detected in the
|
716
755
|
# input document. Add FORMS to return detected fields and the
|
717
756
|
# associated text. To perform both types of analysis, add TABLES and
|
718
|
-
# FORMS to `FeatureTypes`.
|
757
|
+
# FORMS to `FeatureTypes`. All selectable elements
|
758
|
+
# (`SELECTION_ELEMENT`) that are detected are returned, whatever the
|
759
|
+
# value of `FeatureTypes`.
|
719
760
|
# @return [Array<String>]
|
720
761
|
#
|
721
762
|
# @!attribute [rw] client_request_token
|
@@ -726,8 +767,10 @@ module Aws::Textract
|
|
726
767
|
# @return [String]
|
727
768
|
#
|
728
769
|
# @!attribute [rw] job_tag
|
729
|
-
#
|
730
|
-
#
|
770
|
+
# An identifier you specify that's included in the completion
|
771
|
+
# notification that's published to the Amazon SNS topic. For example,
|
772
|
+
# you can use `JobTag` to identify the type of document, such as a tax
|
773
|
+
# form or a receipt, that the completion notification corresponds to.
|
731
774
|
# @return [String]
|
732
775
|
#
|
733
776
|
# @!attribute [rw] notification_channel
|
@@ -747,7 +790,7 @@ module Aws::Textract
|
|
747
790
|
end
|
748
791
|
|
749
792
|
# @!attribute [rw] job_id
|
750
|
-
# The identifier for the document text
|
793
|
+
# The identifier for the document text detection job. Use `JobId` to
|
751
794
|
# identify the job in a subsequent call to `GetDocumentAnalysis`.
|
752
795
|
# @return [String]
|
753
796
|
#
|
@@ -789,9 +832,10 @@ module Aws::Textract
|
|
789
832
|
# @return [String]
|
790
833
|
#
|
791
834
|
# @!attribute [rw] job_tag
|
792
|
-
#
|
793
|
-
#
|
794
|
-
#
|
835
|
+
# An identifier you specify that's included in the completion
|
836
|
+
# notification that's published to the Amazon SNS topic. For example,
|
837
|
+
# you can use `JobTag` to identify the type of document, such as a tax
|
838
|
+
# form or a receipt, that the completion notification corresponds to.
|
795
839
|
# @return [String]
|
796
840
|
#
|
797
841
|
# @!attribute [rw] notification_channel
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aws-sdk-textract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Amazon Web Services
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-04-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-core
|
@@ -59,7 +59,7 @@ files:
|
|
59
59
|
- lib/aws-sdk-textract/errors.rb
|
60
60
|
- lib/aws-sdk-textract/resource.rb
|
61
61
|
- lib/aws-sdk-textract/types.rb
|
62
|
-
homepage:
|
62
|
+
homepage: https://github.com/aws/aws-sdk-ruby
|
63
63
|
licenses:
|
64
64
|
- Apache-2.0
|
65
65
|
metadata:
|