google-cloud-document_ai-v1beta3 0.15.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/google/cloud/document_ai/v1beta3/document_processor_service/client.rb +145 -31
- data/lib/google/cloud/document_ai/v1beta3/document_processor_service/paths.rb +19 -0
- data/lib/google/cloud/document_ai/v1beta3/document_processor_service.rb +1 -1
- data/lib/google/cloud/document_ai/v1beta3/version.rb +1 -1
- data/lib/google/cloud/document_ai/v1beta3.rb +2 -2
- data/lib/google/cloud/documentai/v1beta3/document_io_pb.rb +4 -0
- data/lib/google/cloud/documentai/v1beta3/document_processor_service_pb.rb +10 -0
- data/lib/google/cloud/documentai/v1beta3/document_processor_service_services_pb.rb +6 -2
- data/lib/google/cloud/documentai/v1beta3/processor_type_pb.rb +1 -0
- data/proto_docs/google/cloud/documentai/v1beta3/document.rb +145 -79
- data/proto_docs/google/cloud/documentai/v1beta3/document_io.rb +10 -0
- data/proto_docs/google/cloud/documentai/v1beta3/document_processor_service.rb +68 -27
- data/proto_docs/google/cloud/documentai/v1beta3/document_schema.rb +1 -1
- data/proto_docs/google/cloud/documentai/v1beta3/processor.rb +2 -1
- data/proto_docs/google/cloud/documentai/v1beta3/processor_type.rb +3 -0
- metadata +7 -7
@@ -47,22 +47,27 @@ module Google
|
|
47
47
|
# Optional. UTF-8 encoded text in reading order from the document.
|
48
48
|
# @!attribute [rw] text_styles
|
49
49
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Style>]
|
50
|
-
#
|
50
|
+
# Styles for the
|
51
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
51
52
|
# @!attribute [rw] pages
|
52
53
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page>]
|
53
|
-
# Visual page layout for the
|
54
|
+
# Visual page layout for the
|
55
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document Document}.
|
54
56
|
# @!attribute [rw] entities
|
55
57
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Entity>]
|
56
|
-
# A list of entities detected on
|
57
|
-
#
|
58
|
+
# A list of entities detected on
|
59
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}. For
|
60
|
+
# document shards, entities in this list may cross shard boundaries.
|
58
61
|
# @!attribute [rw] entity_relations
|
59
62
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::EntityRelation>]
|
60
|
-
# Placeholder. Relationship among
|
63
|
+
# Placeholder. Relationship among
|
64
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#entities Document.entities}.
|
61
65
|
# @!attribute [rw] text_changes
|
62
66
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::TextChange>]
|
63
|
-
# Placeholder. A list of text corrections made to
|
64
|
-
#
|
65
|
-
#
|
67
|
+
# Placeholder. A list of text corrections made to
|
68
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}. This is
|
69
|
+
# usually used for annotating corrections to OCR mistakes. Text changes for
|
70
|
+
# a given revision may not overlap with each other.
|
66
71
|
# @!attribute [rw] shard_info
|
67
72
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::ShardInfo]
|
68
73
|
# Information about the sharding if this document is sharded part of a larger
|
@@ -88,8 +93,9 @@ module Google
|
|
88
93
|
# Total number of shards.
|
89
94
|
# @!attribute [rw] text_offset
|
90
95
|
# @return [::Integer]
|
91
|
-
# The index of the first character in
|
92
|
-
#
|
96
|
+
# The index of the first character in
|
97
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text} in the
|
98
|
+
# overall document global text.
|
93
99
|
class ShardInfo
|
94
100
|
include ::Google::Protobuf::MessageExts
|
95
101
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -99,7 +105,8 @@ module Google
|
|
99
105
|
# conventions as much as possible.
|
100
106
|
# @!attribute [rw] text_anchor
|
101
107
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor]
|
102
|
-
# Text anchor indexing into the
|
108
|
+
# Text anchor indexing into the
|
109
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
103
110
|
# @!attribute [rw] color
|
104
111
|
# @return [::Google::Type::Color]
|
105
112
|
# Text color.
|
@@ -146,9 +153,11 @@ module Google
|
|
146
153
|
# A page in a {::Google::Cloud::DocumentAI::V1beta3::Document Document}.
|
147
154
|
# @!attribute [rw] page_number
|
148
155
|
# @return [::Integer]
|
149
|
-
# 1-based index for current
|
150
|
-
#
|
151
|
-
#
|
156
|
+
# 1-based index for current
|
157
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page Page} in a parent
|
158
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document Document}. Useful when a page
|
159
|
+
# is taken out of a {::Google::Cloud::DocumentAI::V1beta3::Document Document}
|
160
|
+
# for individual processing.
|
152
161
|
# @!attribute [rw] image
|
153
162
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Image]
|
154
163
|
# Rendered image for this page. This image is preprocessed to remove any
|
@@ -157,13 +166,15 @@ module Google
|
|
157
166
|
# @!attribute [rw] transforms
|
158
167
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::Matrix>]
|
159
168
|
# Transformation matrices that were applied to the original document image
|
160
|
-
# to produce
|
169
|
+
# to produce
|
170
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#image Page.image}.
|
161
171
|
# @!attribute [rw] dimension
|
162
172
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Dimension]
|
163
173
|
# Physical dimension of the page.
|
164
174
|
# @!attribute [rw] layout
|
165
175
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
166
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
176
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
177
|
+
# page.
|
167
178
|
# @!attribute [rw] detected_languages
|
168
179
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
169
180
|
# A list of detected languages together with confidence.
|
@@ -267,18 +278,23 @@ module Google
|
|
267
278
|
# Visual element describing a layout unit on a page.
|
268
279
|
# @!attribute [rw] text_anchor
|
269
280
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor]
|
270
|
-
# Text anchor indexing into the
|
281
|
+
# Text anchor indexing into the
|
282
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
271
283
|
# @!attribute [rw] confidence
|
272
284
|
# @return [::Float]
|
273
|
-
# Confidence of the current
|
274
|
-
#
|
275
|
-
#
|
285
|
+
# Confidence of the current
|
286
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} within
|
287
|
+
# context of the object this layout is for. e.g. confidence can be for a
|
288
|
+
# single token, a table, a visual element, etc. depending on context.
|
289
|
+
# Range `[0, 1]`.
|
276
290
|
# @!attribute [rw] bounding_poly
|
277
291
|
# @return [::Google::Cloud::DocumentAI::V1beta3::BoundingPoly]
|
278
|
-
# The bounding polygon for the
|
292
|
+
# The bounding polygon for the
|
293
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout}.
|
279
294
|
# @!attribute [rw] orientation
|
280
295
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout::Orientation]
|
281
|
-
# Detected orientation for the
|
296
|
+
# Detected orientation for the
|
297
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout}.
|
282
298
|
class Layout
|
283
299
|
include ::Google::Protobuf::MessageExts
|
284
300
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -309,7 +325,8 @@ module Google
|
|
309
325
|
# common line-spacing and orientation.
|
310
326
|
# @!attribute [rw] layout
|
311
327
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
312
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
328
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
329
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Block Block}.
|
313
330
|
# @!attribute [rw] detected_languages
|
314
331
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
315
332
|
# A list of detected languages together with confidence.
|
@@ -324,7 +341,8 @@ module Google
|
|
324
341
|
# A collection of lines that a human would perceive as a paragraph.
|
325
342
|
# @!attribute [rw] layout
|
326
343
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
327
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
344
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
345
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Paragraph Paragraph}.
|
328
346
|
# @!attribute [rw] detected_languages
|
329
347
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
330
348
|
# A list of detected languages together with confidence.
|
@@ -340,7 +358,8 @@ module Google
|
|
340
358
|
# Does not cross column boundaries, can be horizontal, vertical, etc.
|
341
359
|
# @!attribute [rw] layout
|
342
360
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
343
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
361
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
362
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Line Line}.
|
344
363
|
# @!attribute [rw] detected_languages
|
345
364
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
346
365
|
# A list of detected languages together with confidence.
|
@@ -355,10 +374,12 @@ module Google
|
|
355
374
|
# A detected token.
|
356
375
|
# @!attribute [rw] layout
|
357
376
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
358
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
377
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
378
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Token Token}.
|
359
379
|
# @!attribute [rw] detected_break
|
360
380
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Token::DetectedBreak]
|
361
|
-
# Detected break at the end of a
|
381
|
+
# Detected break at the end of a
|
382
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Token Token}.
|
362
383
|
# @!attribute [rw] detected_languages
|
363
384
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
364
385
|
# A list of detected languages together with confidence.
|
@@ -369,7 +390,8 @@ module Google
|
|
369
390
|
include ::Google::Protobuf::MessageExts
|
370
391
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
371
392
|
|
372
|
-
# Detected break at the end of a
|
393
|
+
# Detected break at the end of a
|
394
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Token Token}.
|
373
395
|
# @!attribute [rw] type
|
374
396
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Token::DetectedBreak::Type]
|
375
397
|
# Detected break type.
|
@@ -397,7 +419,8 @@ module Google
|
|
397
419
|
# A detected symbol.
|
398
420
|
# @!attribute [rw] layout
|
399
421
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
400
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
422
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
423
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Symbol Symbol}.
|
401
424
|
# @!attribute [rw] detected_languages
|
402
425
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
403
426
|
# A list of detected languages together with confidence.
|
@@ -410,10 +433,12 @@ module Google
|
|
410
433
|
# page.
|
411
434
|
# @!attribute [rw] layout
|
412
435
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
413
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
436
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
437
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::VisualElement VisualElement}.
|
414
438
|
# @!attribute [rw] type
|
415
439
|
# @return [::String]
|
416
|
-
# Type of the
|
440
|
+
# Type of the
|
441
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::VisualElement VisualElement}.
|
417
442
|
# @!attribute [rw] detected_languages
|
418
443
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
419
444
|
# A list of detected languages together with confidence.
|
@@ -425,7 +450,8 @@ module Google
|
|
425
450
|
# A table representation similar to HTML table structure.
|
426
451
|
# @!attribute [rw] layout
|
427
452
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
428
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
453
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
454
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Table Table}.
|
429
455
|
# @!attribute [rw] header_rows
|
430
456
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::Table::TableRow>]
|
431
457
|
# Header rows of the table.
|
@@ -454,7 +480,8 @@ module Google
|
|
454
480
|
# A cell representation inside the table.
|
455
481
|
# @!attribute [rw] layout
|
456
482
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
457
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
483
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
484
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Table::TableCell TableCell}.
|
458
485
|
# @!attribute [rw] row_span
|
459
486
|
# @return [::Integer]
|
460
487
|
# How many rows this cell spans.
|
@@ -473,11 +500,14 @@ module Google
|
|
473
500
|
# A form field detected on the page.
|
474
501
|
# @!attribute [rw] field_name
|
475
502
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
476
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
477
|
-
#
|
503
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
504
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::FormField FormField}
|
505
|
+
# name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
|
478
506
|
# @!attribute [rw] field_value
|
479
507
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
480
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
508
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
509
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::FormField FormField}
|
510
|
+
# value.
|
481
511
|
# @!attribute [rw] name_detected_languages
|
482
512
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
483
513
|
# A list of detected languages for name together with confidence.
|
@@ -488,9 +518,10 @@ module Google
|
|
488
518
|
# @return [::String]
|
489
519
|
# If the value is non-textual, this field represents the type. Current
|
490
520
|
# valid values are:
|
491
|
-
#
|
492
|
-
# -
|
493
|
-
# -
|
521
|
+
#
|
522
|
+
# - blank (this indicates the `field_value` is normal text)
|
523
|
+
# - `unfilled_checkbox`
|
524
|
+
# - `filled_checkbox`
|
494
525
|
# @!attribute [rw] corrected_key_text
|
495
526
|
# @return [::String]
|
496
527
|
# Created for Labeling UI to export key text.
|
@@ -512,10 +543,12 @@ module Google
|
|
512
543
|
# A detected barcode.
|
513
544
|
# @!attribute [rw] layout
|
514
545
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
515
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
546
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
547
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedBarcode DetectedBarcode}.
|
516
548
|
# @!attribute [rw] barcode
|
517
549
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Barcode]
|
518
|
-
# Detailed barcode information of the
|
550
|
+
# Detailed barcode information of the
|
551
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedBarcode DetectedBarcode}.
|
519
552
|
class DetectedBarcode
|
520
553
|
include ::Google::Protobuf::MessageExts
|
521
554
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -576,7 +609,8 @@ module Google
|
|
576
609
|
# @!attribute [rw] text_anchor
|
577
610
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor]
|
578
611
|
# Optional. Provenance of the entity.
|
579
|
-
# Text anchor indexing into the
|
612
|
+
# Text anchor indexing into the
|
613
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
580
614
|
# @!attribute [rw] type
|
581
615
|
# @return [::String]
|
582
616
|
# Required. Entity type from a schema e.g. `Address`.
|
@@ -591,28 +625,29 @@ module Google
|
|
591
625
|
# Optional. Confidence of detected Schema entity. Range `[0, 1]`.
|
592
626
|
# @!attribute [rw] page_anchor
|
593
627
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::PageAnchor]
|
594
|
-
# Optional. Represents the provenance of this entity wrt. the location on
|
595
|
-
# page where it was found.
|
628
|
+
# Optional. Represents the provenance of this entity wrt. the location on
|
629
|
+
# the page where it was found.
|
596
630
|
# @!attribute [rw] id
|
597
631
|
# @return [::String]
|
598
632
|
# Optional. Canonical id. This will be a unique value in the entity list
|
599
633
|
# for this document.
|
600
634
|
# @!attribute [rw] normalized_value
|
601
635
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Entity::NormalizedValue]
|
602
|
-
# Optional. Normalized entity value. Absent if the extracted value could
|
603
|
-
# converted or the type (e.g. address) is not supported for certain
|
636
|
+
# Optional. Normalized entity value. Absent if the extracted value could
|
637
|
+
# not be converted or the type (e.g. address) is not supported for certain
|
604
638
|
# parsers. This field is also only populated for certain supported document
|
605
639
|
# types.
|
606
640
|
# @!attribute [rw] properties
|
607
641
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Entity>]
|
608
|
-
# Optional. Entities can be nested to form a hierarchical data structure
|
609
|
-
# the content in the document.
|
642
|
+
# Optional. Entities can be nested to form a hierarchical data structure
|
643
|
+
# representing the content in the document.
|
610
644
|
# @!attribute [rw] provenance
|
611
645
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Provenance]
|
612
646
|
# Optional. The history of this annotation.
|
613
647
|
# @!attribute [rw] redacted
|
614
648
|
# @return [::Boolean]
|
615
|
-
# Optional. Whether the entity will be redacted for de-identification
|
649
|
+
# Optional. Whether the entity will be redacted for de-identification
|
650
|
+
# purposes.
|
616
651
|
class Entity
|
617
652
|
include ::Google::Protobuf::MessageExts
|
618
653
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -649,8 +684,8 @@ module Google
|
|
649
684
|
# Optional. An optional field to store a normalized string.
|
650
685
|
# For some entity types, one of respective `structured_value` fields may
|
651
686
|
# also be populated. Also not all the types of `structured_value` will be
|
652
|
-
# normalized. For example, some processors may not generate float
|
653
|
-
# or
|
687
|
+
# normalized. For example, some processors may not generate `float`
|
688
|
+
# or `integer` normalized text by default.
|
654
689
|
#
|
655
690
|
# Below are sample formats mapped to structured values.
|
656
691
|
#
|
@@ -663,7 +698,8 @@ module Google
|
|
663
698
|
end
|
664
699
|
end
|
665
700
|
|
666
|
-
# Relationship between
|
701
|
+
# Relationship between
|
702
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Entity Entities}.
|
667
703
|
# @!attribute [rw] subject_id
|
668
704
|
# @return [::String]
|
669
705
|
# Subject entity id.
|
@@ -678,10 +714,12 @@ module Google
|
|
678
714
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
679
715
|
end
|
680
716
|
|
681
|
-
# Text reference indexing into the
|
717
|
+
# Text reference indexing into the
|
718
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
682
719
|
# @!attribute [rw] text_segments
|
683
720
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor::TextSegment>]
|
684
|
-
# The text segments from the
|
721
|
+
# The text segments from the
|
722
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
685
723
|
# @!attribute [rw] content
|
686
724
|
# @return [::String]
|
687
725
|
# Contains the content of the text span so that users do
|
@@ -691,15 +729,20 @@ module Google
|
|
691
729
|
include ::Google::Protobuf::MessageExts
|
692
730
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
693
731
|
|
694
|
-
# A text segment in the
|
695
|
-
#
|
696
|
-
#
|
732
|
+
# A text segment in the
|
733
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}. The
|
734
|
+
# indices may be out of bounds which indicate that the text extends into
|
735
|
+
# another document shard for large sharded documents. See
|
736
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::ShardInfo#text_offset ShardInfo.text_offset}
|
697
737
|
# @!attribute [rw] start_index
|
698
738
|
# @return [::Integer]
|
699
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor::TextSegment TextSegment}
|
739
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor::TextSegment TextSegment}
|
740
|
+
# start UTF-8 char index in the
|
741
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
700
742
|
# @!attribute [rw] end_index
|
701
743
|
# @return [::Integer]
|
702
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor::TextSegment TextSegment}
|
744
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor::TextSegment TextSegment}
|
745
|
+
# half open end UTF-8 char index in the
|
703
746
|
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
704
747
|
class TextSegment
|
705
748
|
include ::Google::Protobuf::MessageExts
|
@@ -707,8 +750,9 @@ module Google
|
|
707
750
|
end
|
708
751
|
end
|
709
752
|
|
710
|
-
# Referencing the visual context of the entity in the
|
711
|
-
#
|
753
|
+
# Referencing the visual context of the entity in the
|
754
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#pages Document.pages}. Page
|
755
|
+
# anchors can be cross-page, consist of multiple bounding polygons and
|
712
756
|
# optionally reference specific layout element types.
|
713
757
|
# @!attribute [rw] page_refs
|
714
758
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::PageAnchor::PageRef>]
|
@@ -720,22 +764,29 @@ module Google
|
|
720
764
|
# Represents a weak reference to a page element within a document.
|
721
765
|
# @!attribute [rw] page
|
722
766
|
# @return [::Integer]
|
723
|
-
# Required. Index into the
|
724
|
-
#
|
725
|
-
#
|
767
|
+
# Required. Index into the
|
768
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#pages Document.pages}
|
769
|
+
# element, for example using
|
770
|
+
# `[Document.pages][page_refs.page]` to locate the related page element.
|
771
|
+
# This field is skipped when its value is the default `0`. See
|
726
772
|
# https://developers.google.com/protocol-buffers/docs/proto3#json.
|
727
773
|
# @!attribute [rw] layout_type
|
728
774
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::PageAnchor::PageRef::LayoutType]
|
729
|
-
# Optional. The type of the layout element that is being referenced if
|
775
|
+
# Optional. The type of the layout element that is being referenced if
|
776
|
+
# any.
|
730
777
|
# @!attribute [rw] layout_id
|
731
778
|
# @return [::String]
|
732
|
-
# Optional. Deprecated. Use
|
779
|
+
# Optional. Deprecated. Use
|
780
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::PageAnchor::PageRef#bounding_poly PageRef.bounding_poly}
|
781
|
+
# instead.
|
733
782
|
# @!attribute [rw] bounding_poly
|
734
783
|
# @return [::Google::Cloud::DocumentAI::V1beta3::BoundingPoly]
|
735
|
-
# Optional. Identifies the bounding polygon of a layout element on the
|
784
|
+
# Optional. Identifies the bounding polygon of a layout element on the
|
785
|
+
# page.
|
736
786
|
# @!attribute [rw] confidence
|
737
787
|
# @return [::Float]
|
738
|
-
# Optional. Confidence of detected page element, if applicable. Range
|
788
|
+
# Optional. Confidence of detected page element, if applicable. Range
|
789
|
+
# `[0, 1]`.
|
739
790
|
class PageRef
|
740
791
|
include ::Google::Protobuf::MessageExts
|
741
792
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -745,25 +796,39 @@ module Google
|
|
745
796
|
# Layout Unspecified.
|
746
797
|
LAYOUT_TYPE_UNSPECIFIED = 0
|
747
798
|
|
748
|
-
# References a
|
799
|
+
# References a
|
800
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#blocks Page.blocks}
|
801
|
+
# element.
|
749
802
|
BLOCK = 1
|
750
803
|
|
751
|
-
# References a
|
804
|
+
# References a
|
805
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#paragraphs Page.paragraphs}
|
806
|
+
# element.
|
752
807
|
PARAGRAPH = 2
|
753
808
|
|
754
|
-
# References a
|
809
|
+
# References a
|
810
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#lines Page.lines}
|
811
|
+
# element.
|
755
812
|
LINE = 3
|
756
813
|
|
757
|
-
# References a
|
814
|
+
# References a
|
815
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#tokens Page.tokens}
|
816
|
+
# element.
|
758
817
|
TOKEN = 4
|
759
818
|
|
760
|
-
# References a
|
819
|
+
# References a
|
820
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#visual_elements Page.visual_elements}
|
821
|
+
# element.
|
761
822
|
VISUAL_ELEMENT = 5
|
762
823
|
|
763
|
-
# Refrrences a
|
824
|
+
# Refrrences a
|
825
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#tables Page.tables}
|
826
|
+
# element.
|
764
827
|
TABLE = 6
|
765
828
|
|
766
|
-
# References a
|
829
|
+
# References a
|
830
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#form_fields Page.form_fields}
|
831
|
+
# element.
|
767
832
|
FORM_FIELD = 7
|
768
833
|
end
|
769
834
|
end
|
@@ -882,10 +947,11 @@ module Google
|
|
882
947
|
# @!attribute [rw] text_anchor
|
883
948
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor]
|
884
949
|
# Provenance of the correction.
|
885
|
-
# Text anchor indexing into the
|
886
|
-
#
|
887
|
-
#
|
888
|
-
#
|
950
|
+
# Text anchor indexing into the
|
951
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}. There
|
952
|
+
# can only be a single `TextAnchor.text_segments` element. If the start
|
953
|
+
# and end index of the text segment are the same, the text change is
|
954
|
+
# inserted before that index.
|
889
955
|
# @!attribute [rw] changed_text
|
890
956
|
# @return [::String]
|
891
957
|
# The text that replaces the text identified in the `text_anchor`.
|
@@ -114,6 +114,16 @@ module Google
|
|
114
114
|
end
|
115
115
|
end
|
116
116
|
end
|
117
|
+
|
118
|
+
# Config for Document OCR.
|
119
|
+
# @!attribute [rw] enable_native_pdf_parsing
|
120
|
+
# @return [::Boolean]
|
121
|
+
# Enables special handling for PDFs with existing text information. Results
|
122
|
+
# in better text extraction quality in such PDF inputs.
|
123
|
+
class OcrConfig
|
124
|
+
include ::Google::Protobuf::MessageExts
|
125
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
126
|
+
end
|
117
127
|
end
|
118
128
|
end
|
119
129
|
end
|