google-cloud-document_ai-v1 0.21.0 → 0.22.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -52,16 +52,19 @@ module Google
52
52
  # Visual page layout for the {::Google::Cloud::DocumentAI::V1::Document Document}.
53
53
  # @!attribute [rw] entities
54
54
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Entity>]
55
- # A list of entities detected on {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. For document shards,
56
- # entities in this list may cross shard boundaries.
55
+ # A list of entities detected on
56
+ # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. For document
57
+ # shards, entities in this list may cross shard boundaries.
57
58
  # @!attribute [rw] entity_relations
58
59
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::EntityRelation>]
59
- # Placeholder. Relationship among {::Google::Cloud::DocumentAI::V1::Document#entities Document.entities}.
60
+ # Placeholder. Relationship among
61
+ # {::Google::Cloud::DocumentAI::V1::Document#entities Document.entities}.
60
62
  # @!attribute [rw] text_changes
61
63
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::TextChange>]
62
- # Placeholder. A list of text corrections made to {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. This
63
- # is usually used for annotating corrections to OCR mistakes. Text changes
64
- # for a given revision may not overlap with each other.
64
+ # Placeholder. A list of text corrections made to
65
+ # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. This is usually
66
+ # used for annotating corrections to OCR mistakes. Text changes for a given
67
+ # revision may not overlap with each other.
65
68
  # @!attribute [rw] shard_info
66
69
  # @return [::Google::Cloud::DocumentAI::V1::Document::ShardInfo]
67
70
  # Information about the sharding if this document is sharded part of a larger
@@ -72,6 +75,12 @@ module Google
72
75
  # @!attribute [rw] revisions
73
76
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Revision>]
74
77
  # Placeholder. Revision history of this document.
78
+ # @!attribute [rw] document_layout
79
+ # @return [::Google::Cloud::DocumentAI::V1::Document::DocumentLayout]
80
+ # Parsed layout of the document.
81
+ # @!attribute [rw] chunked_document
82
+ # @return [::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument]
83
+ # Document chunked based on chunking config.
75
84
  class Document
76
85
  include ::Google::Protobuf::MessageExts
77
86
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -87,7 +96,8 @@ module Google
87
96
  # Total number of shards.
88
97
  # @!attribute [rw] text_offset
89
98
  # @return [::Integer]
90
- # The index of the first character in {::Google::Cloud::DocumentAI::V1::Document#text Document.text} in the overall
99
+ # The index of the first character in
100
+ # {::Google::Cloud::DocumentAI::V1::Document#text Document.text} in the overall
91
101
  # document global text.
92
102
  class ShardInfo
93
103
  include ::Google::Protobuf::MessageExts
@@ -98,7 +108,8 @@ module Google
98
108
  # conventions as much as possible.
99
109
  # @!attribute [rw] text_anchor
100
110
  # @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor]
101
- # Text anchor indexing into the {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
111
+ # Text anchor indexing into the
112
+ # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
102
113
  # @!attribute [rw] color
103
114
  # @return [::Google::Type::Color]
104
115
  # Text color.
@@ -147,9 +158,11 @@ module Google
147
158
  # A page in a {::Google::Cloud::DocumentAI::V1::Document Document}.
148
159
  # @!attribute [rw] page_number
149
160
  # @return [::Integer]
150
- # 1-based index for current {::Google::Cloud::DocumentAI::V1::Document::Page Page} in a parent {::Google::Cloud::DocumentAI::V1::Document Document}.
151
- # Useful when a page is taken out of a {::Google::Cloud::DocumentAI::V1::Document Document} for individual
152
- # processing.
161
+ # 1-based index for current
162
+ # {::Google::Cloud::DocumentAI::V1::Document::Page Page} in a parent
163
+ # {::Google::Cloud::DocumentAI::V1::Document Document}. Useful when a page is
164
+ # taken out of a {::Google::Cloud::DocumentAI::V1::Document Document} for
165
+ # individual processing.
153
166
  # @!attribute [rw] image
154
167
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Image]
155
168
  # Rendered image for this page. This image is preprocessed to remove any
@@ -271,18 +284,23 @@ module Google
271
284
  # Visual element describing a layout unit on a page.
272
285
  # @!attribute [rw] text_anchor
273
286
  # @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor]
274
- # Text anchor indexing into the {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
287
+ # Text anchor indexing into the
288
+ # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
275
289
  # @!attribute [rw] confidence
276
290
  # @return [::Float]
277
- # Confidence of the current {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} within context of the object this
278
- # layout is for. e.g. confidence can be for a single token, a table,
279
- # a visual element, etc. depending on context. Range `[0, 1]`.
291
+ # Confidence of the current
292
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} within
293
+ # context of the object this layout is for. e.g. confidence can be for a
294
+ # single token, a table, a visual element, etc. depending on context.
295
+ # Range `[0, 1]`.
280
296
  # @!attribute [rw] bounding_poly
281
297
  # @return [::Google::Cloud::DocumentAI::V1::BoundingPoly]
282
- # The bounding polygon for the {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout}.
298
+ # The bounding polygon for the
299
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout}.
283
300
  # @!attribute [rw] orientation
284
301
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout::Orientation]
285
- # Detected orientation for the {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout}.
302
+ # Detected orientation for the
303
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout}.
286
304
  class Layout
287
305
  include ::Google::Protobuf::MessageExts
288
306
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -313,7 +331,8 @@ module Google
313
331
  # common line-spacing and orientation.
314
332
  # @!attribute [rw] layout
315
333
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
316
- # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for {::Google::Cloud::DocumentAI::V1::Document::Page::Block Block}.
334
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
335
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Block Block}.
317
336
  # @!attribute [rw] detected_languages
318
337
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
319
338
  # A list of detected languages together with confidence.
@@ -329,7 +348,8 @@ module Google
329
348
  # A collection of lines that a human would perceive as a paragraph.
330
349
  # @!attribute [rw] layout
331
350
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
332
- # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for {::Google::Cloud::DocumentAI::V1::Document::Page::Paragraph Paragraph}.
351
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
352
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Paragraph Paragraph}.
333
353
  # @!attribute [rw] detected_languages
334
354
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
335
355
  # A list of detected languages together with confidence.
@@ -346,7 +366,8 @@ module Google
346
366
  # Does not cross column boundaries, can be horizontal, vertical, etc.
347
367
  # @!attribute [rw] layout
348
368
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
349
- # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for {::Google::Cloud::DocumentAI::V1::Document::Page::Line Line}.
369
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
370
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Line Line}.
350
371
  # @!attribute [rw] detected_languages
351
372
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
352
373
  # A list of detected languages together with confidence.
@@ -362,10 +383,12 @@ module Google
362
383
  # A detected token.
363
384
  # @!attribute [rw] layout
364
385
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
365
- # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}.
386
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
387
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}.
366
388
  # @!attribute [rw] detected_break
367
389
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Token::DetectedBreak]
368
- # Detected break at the end of a {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}.
390
+ # Detected break at the end of a
391
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}.
369
392
  # @!attribute [rw] detected_languages
370
393
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
371
394
  # A list of detected languages together with confidence.
@@ -380,7 +403,8 @@ module Google
380
403
  include ::Google::Protobuf::MessageExts
381
404
  extend ::Google::Protobuf::MessageExts::ClassMethods
382
405
 
383
- # Detected break at the end of a {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}.
406
+ # Detected break at the end of a
407
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}.
384
408
  # @!attribute [rw] type
385
409
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Token::DetectedBreak::Type]
386
410
  # Detected break type.
@@ -410,8 +434,9 @@ module Google
410
434
  # Font size in points (`1` point is `¹⁄₇₂` inches).
411
435
  # @!attribute [rw] pixel_font_size
412
436
  # @return [::Float]
413
- # Font size in pixels, equal to _unrounded {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_size font_size}_ *
414
- # _resolution_ ÷ `72.0`.
437
+ # Font size in pixels, equal to _unrounded
438
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_size font_size}_
439
+ # * _resolution_ ÷ `72.0`.
415
440
  # @!attribute [rw] letter_spacing
416
441
  # @return [::Float]
417
442
  # Letter spacing in points.
@@ -420,8 +445,9 @@ module Google
420
445
  # Name or style of the font.
421
446
  # @!attribute [rw] bold
422
447
  # @return [::Boolean]
423
- # Whether the text is bold (equivalent to {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_weight font_weight} is at least
424
- # `700`).
448
+ # Whether the text is bold (equivalent to
449
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_weight font_weight}
450
+ # is at least `700`).
425
451
  # @!attribute [rw] italic
426
452
  # @return [::Boolean]
427
453
  # Whether the text is italic.
@@ -430,16 +456,16 @@ module Google
430
456
  # Whether the text is underlined.
431
457
  # @!attribute [rw] strikeout
432
458
  # @return [::Boolean]
433
- # Whether the text is strikethrough.
459
+ # Whether the text is strikethrough. This feature is not supported yet.
434
460
  # @!attribute [rw] subscript
435
461
  # @return [::Boolean]
436
- # Whether the text is a subscript.
462
+ # Whether the text is a subscript. This feature is not supported yet.
437
463
  # @!attribute [rw] superscript
438
464
  # @return [::Boolean]
439
- # Whether the text is a superscript.
465
+ # Whether the text is a superscript. This feature is not supported yet.
440
466
  # @!attribute [rw] smallcaps
441
467
  # @return [::Boolean]
442
- # Whether the text is in small caps.
468
+ # Whether the text is in small caps. This feature is not supported yet.
443
469
  # @!attribute [rw] font_weight
444
470
  # @return [::Integer]
445
471
  # TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
@@ -462,7 +488,8 @@ module Google
462
488
  # A detected symbol.
463
489
  # @!attribute [rw] layout
464
490
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
465
- # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for {::Google::Cloud::DocumentAI::V1::Document::Page::Symbol Symbol}.
491
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
492
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Symbol Symbol}.
466
493
  # @!attribute [rw] detected_languages
467
494
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
468
495
  # A list of detected languages together with confidence.
@@ -475,10 +502,12 @@ module Google
475
502
  # page.
476
503
  # @!attribute [rw] layout
477
504
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
478
- # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for {::Google::Cloud::DocumentAI::V1::Document::Page::VisualElement VisualElement}.
505
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
506
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::VisualElement VisualElement}.
479
507
  # @!attribute [rw] type
480
508
  # @return [::String]
481
- # Type of the {::Google::Cloud::DocumentAI::V1::Document::Page::VisualElement VisualElement}.
509
+ # Type of the
510
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::VisualElement VisualElement}.
482
511
  # @!attribute [rw] detected_languages
483
512
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
484
513
  # A list of detected languages together with confidence.
@@ -490,7 +519,8 @@ module Google
490
519
  # A table representation similar to HTML table structure.
491
520
  # @!attribute [rw] layout
492
521
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
493
- # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for {::Google::Cloud::DocumentAI::V1::Document::Page::Table Table}.
522
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
523
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Table Table}.
494
524
  # @!attribute [rw] header_rows
495
525
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Table::TableRow>]
496
526
  # Header rows of the table.
@@ -520,7 +550,8 @@ module Google
520
550
  # A cell representation inside the table.
521
551
  # @!attribute [rw] layout
522
552
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
523
- # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for {::Google::Cloud::DocumentAI::V1::Document::Page::Table::TableCell TableCell}.
553
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
554
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Table::TableCell TableCell}.
524
555
  # @!attribute [rw] row_span
525
556
  # @return [::Integer]
526
557
  # How many rows this cell spans.
@@ -539,11 +570,13 @@ module Google
539
570
  # A form field detected on the page.
540
571
  # @!attribute [rw] field_name
541
572
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
542
- # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for the {::Google::Cloud::DocumentAI::V1::Document::Page::FormField FormField} name. e.g. `Address`, `Email`,
543
- # `Grand total`, `Phone number`, etc.
573
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for the
574
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::FormField FormField} name.
575
+ # e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
544
576
  # @!attribute [rw] field_value
545
577
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
546
- # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for the {::Google::Cloud::DocumentAI::V1::Document::Page::FormField FormField} value.
578
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for the
579
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::FormField FormField} value.
547
580
  # @!attribute [rw] name_detected_languages
548
581
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
549
582
  # A list of detected languages for name together with confidence.
@@ -579,10 +612,12 @@ module Google
579
612
  # A detected barcode.
580
613
  # @!attribute [rw] layout
581
614
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
582
- # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for {::Google::Cloud::DocumentAI::V1::Document::Page::DetectedBarcode DetectedBarcode}.
615
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
616
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::DetectedBarcode DetectedBarcode}.
583
617
  # @!attribute [rw] barcode
584
618
  # @return [::Google::Cloud::DocumentAI::V1::Barcode]
585
- # Detailed barcode information of the {::Google::Cloud::DocumentAI::V1::Document::Page::DetectedBarcode DetectedBarcode}.
619
+ # Detailed barcode information of the
620
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::DetectedBarcode DetectedBarcode}.
586
621
  class DetectedBarcode
587
622
  include ::Google::Protobuf::MessageExts
588
623
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -643,7 +678,8 @@ module Google
643
678
  # @!attribute [rw] text_anchor
644
679
  # @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor]
645
680
  # Optional. Provenance of the entity.
646
- # Text anchor indexing into the {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
681
+ # Text anchor indexing into the
682
+ # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
647
683
  # @!attribute [rw] type
648
684
  # @return [::String]
649
685
  # Required. Entity type from a schema e.g. `Address`.
@@ -658,28 +694,29 @@ module Google
658
694
  # Optional. Confidence of detected Schema entity. Range `[0, 1]`.
659
695
  # @!attribute [rw] page_anchor
660
696
  # @return [::Google::Cloud::DocumentAI::V1::Document::PageAnchor]
661
- # Optional. Represents the provenance of this entity wrt. the location on the
662
- # page where it was found.
697
+ # Optional. Represents the provenance of this entity wrt. the location on
698
+ # the page where it was found.
663
699
  # @!attribute [rw] id
664
700
  # @return [::String]
665
701
  # Optional. Canonical id. This will be a unique value in the entity list
666
702
  # for this document.
667
703
  # @!attribute [rw] normalized_value
668
704
  # @return [::Google::Cloud::DocumentAI::V1::Document::Entity::NormalizedValue]
669
- # Optional. Normalized entity value. Absent if the extracted value could not be
670
- # converted or the type (e.g. address) is not supported for certain
705
+ # Optional. Normalized entity value. Absent if the extracted value could
706
+ # not be converted or the type (e.g. address) is not supported for certain
671
707
  # parsers. This field is also only populated for certain supported document
672
708
  # types.
673
709
  # @!attribute [rw] properties
674
710
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Entity>]
675
- # Optional. Entities can be nested to form a hierarchical data structure representing
676
- # the content in the document.
711
+ # Optional. Entities can be nested to form a hierarchical data structure
712
+ # representing the content in the document.
677
713
  # @!attribute [rw] provenance
678
714
  # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance]
679
715
  # Optional. The history of this annotation.
680
716
  # @!attribute [rw] redacted
681
717
  # @return [::Boolean]
682
- # Optional. Whether the entity will be redacted for de-identification purposes.
718
+ # Optional. Whether the entity will be redacted for de-identification
719
+ # purposes.
683
720
  class Entity
684
721
  include ::Google::Protobuf::MessageExts
685
722
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -730,7 +767,8 @@ module Google
730
767
  end
731
768
  end
732
769
 
733
- # Relationship between {::Google::Cloud::DocumentAI::V1::Document::Entity Entities}.
770
+ # Relationship between
771
+ # {::Google::Cloud::DocumentAI::V1::Document::Entity Entities}.
734
772
  # @!attribute [rw] subject_id
735
773
  # @return [::String]
736
774
  # Subject entity id.
@@ -745,10 +783,12 @@ module Google
745
783
  extend ::Google::Protobuf::MessageExts::ClassMethods
746
784
  end
747
785
 
748
- # Text reference indexing into the {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
786
+ # Text reference indexing into the
787
+ # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
749
788
  # @!attribute [rw] text_segments
750
789
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment>]
751
- # The text segments from the {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
790
+ # The text segments from the
791
+ # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
752
792
  # @!attribute [rw] content
753
793
  # @return [::String]
754
794
  # Contains the content of the text span so that users do
@@ -758,15 +798,20 @@ module Google
758
798
  include ::Google::Protobuf::MessageExts
759
799
  extend ::Google::Protobuf::MessageExts::ClassMethods
760
800
 
761
- # A text segment in the {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. The indices may be out of bounds
762
- # which indicate that the text extends into another document shard for
763
- # large sharded documents. See {::Google::Cloud::DocumentAI::V1::Document::ShardInfo#text_offset ShardInfo.text_offset}
801
+ # A text segment in the
802
+ # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. The indices
803
+ # may be out of bounds which indicate that the text extends into another
804
+ # document shard for large sharded documents. See
805
+ # {::Google::Cloud::DocumentAI::V1::Document::ShardInfo#text_offset ShardInfo.text_offset}
764
806
  # @!attribute [rw] start_index
765
807
  # @return [::Integer]
766
- # {::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment TextSegment} start UTF-8 char index in the {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
808
+ # {::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment TextSegment}
809
+ # start UTF-8 char index in the
810
+ # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
767
811
  # @!attribute [rw] end_index
768
812
  # @return [::Integer]
769
- # {::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment TextSegment} half open end UTF-8 char index in the
813
+ # {::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment TextSegment}
814
+ # half open end UTF-8 char index in the
770
815
  # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
771
816
  class TextSegment
772
817
  include ::Google::Protobuf::MessageExts
@@ -774,9 +819,10 @@ module Google
774
819
  end
775
820
  end
776
821
 
777
- # Referencing the visual context of the entity in the {::Google::Cloud::DocumentAI::V1::Document#pages Document.pages}.
778
- # Page anchors can be cross-page, consist of multiple bounding polygons and
779
- # optionally reference specific layout element types.
822
+ # Referencing the visual context of the entity in the
823
+ # {::Google::Cloud::DocumentAI::V1::Document#pages Document.pages}. Page anchors
824
+ # can be cross-page, consist of multiple bounding polygons and optionally
825
+ # reference specific layout element types.
780
826
  # @!attribute [rw] page_refs
781
827
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::PageAnchor::PageRef>]
782
828
  # One or more references to visual page elements
@@ -787,25 +833,31 @@ module Google
787
833
  # Represents a weak reference to a page element within a document.
788
834
  # @!attribute [rw] page
789
835
  # @return [::Integer]
790
- # Required. Index into the {::Google::Cloud::DocumentAI::V1::Document#pages Document.pages} element, for example using
836
+ # Required. Index into the
837
+ # {::Google::Cloud::DocumentAI::V1::Document#pages Document.pages} element,
838
+ # for example using
791
839
  # `[Document.pages][page_refs.page]` to locate the related page element.
792
840
  # This field is skipped when its value is the default `0`. See
793
841
  # https://developers.google.com/protocol-buffers/docs/proto3#json.
794
842
  # @!attribute [rw] layout_type
795
843
  # @return [::Google::Cloud::DocumentAI::V1::Document::PageAnchor::PageRef::LayoutType]
796
- # Optional. The type of the layout element that is being referenced if any.
844
+ # Optional. The type of the layout element that is being referenced if
845
+ # any.
797
846
  # @!attribute [rw] layout_id
798
847
  # @deprecated This field is deprecated and may be removed in the next major version update.
799
848
  # @return [::String]
800
- # Optional. Deprecated. Use {::Google::Cloud::DocumentAI::V1::Document::PageAnchor::PageRef#bounding_poly PageRef.bounding_poly} instead.
849
+ # Optional. Deprecated. Use
850
+ # {::Google::Cloud::DocumentAI::V1::Document::PageAnchor::PageRef#bounding_poly PageRef.bounding_poly}
851
+ # instead.
801
852
  # @!attribute [rw] bounding_poly
802
853
  # @return [::Google::Cloud::DocumentAI::V1::BoundingPoly]
803
- # Optional. Identifies the bounding polygon of a layout element on the page.
804
- # If `layout_type` is set, the bounding polygon must be exactly the same
805
- # to the layout element it's referring to.
854
+ # Optional. Identifies the bounding polygon of a layout element on the
855
+ # page. If `layout_type` is set, the bounding polygon must be exactly the
856
+ # same to the layout element it's referring to.
806
857
  # @!attribute [rw] confidence
807
858
  # @return [::Float]
808
- # Optional. Confidence of detected page element, if applicable. Range `[0, 1]`.
859
+ # Optional. Confidence of detected page element, if applicable. Range
860
+ # `[0, 1]`.
809
861
  class PageRef
810
862
  include ::Google::Protobuf::MessageExts
811
863
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -815,25 +867,38 @@ module Google
815
867
  # Layout Unspecified.
816
868
  LAYOUT_TYPE_UNSPECIFIED = 0
817
869
 
818
- # References a {::Google::Cloud::DocumentAI::V1::Document::Page#blocks Page.blocks} element.
870
+ # References a
871
+ # {::Google::Cloud::DocumentAI::V1::Document::Page#blocks Page.blocks}
872
+ # element.
819
873
  BLOCK = 1
820
874
 
821
- # References a {::Google::Cloud::DocumentAI::V1::Document::Page#paragraphs Page.paragraphs} element.
875
+ # References a
876
+ # {::Google::Cloud::DocumentAI::V1::Document::Page#paragraphs Page.paragraphs}
877
+ # element.
822
878
  PARAGRAPH = 2
823
879
 
824
- # References a {::Google::Cloud::DocumentAI::V1::Document::Page#lines Page.lines} element.
880
+ # References a
881
+ # {::Google::Cloud::DocumentAI::V1::Document::Page#lines Page.lines} element.
825
882
  LINE = 3
826
883
 
827
- # References a {::Google::Cloud::DocumentAI::V1::Document::Page#tokens Page.tokens} element.
884
+ # References a
885
+ # {::Google::Cloud::DocumentAI::V1::Document::Page#tokens Page.tokens}
886
+ # element.
828
887
  TOKEN = 4
829
888
 
830
- # References a {::Google::Cloud::DocumentAI::V1::Document::Page#visual_elements Page.visual_elements} element.
889
+ # References a
890
+ # {::Google::Cloud::DocumentAI::V1::Document::Page#visual_elements Page.visual_elements}
891
+ # element.
831
892
  VISUAL_ELEMENT = 5
832
893
 
833
- # Refrrences a {::Google::Cloud::DocumentAI::V1::Document::Page#tables Page.tables} element.
894
+ # Refrrences a
895
+ # {::Google::Cloud::DocumentAI::V1::Document::Page#tables Page.tables}
896
+ # element.
834
897
  TABLE = 6
835
898
 
836
- # References a {::Google::Cloud::DocumentAI::V1::Document::Page#form_fields Page.form_fields} element.
899
+ # References a
900
+ # {::Google::Cloud::DocumentAI::V1::Document::Page#form_fields Page.form_fields}
901
+ # element.
837
902
  FORM_FIELD = 7
838
903
  end
839
904
  end
@@ -965,8 +1030,9 @@ module Google
965
1030
  # @!attribute [rw] text_anchor
966
1031
  # @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor]
967
1032
  # Provenance of the correction.
968
- # Text anchor indexing into the {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. There can only be a
969
- # single `TextAnchor.text_segments` element. If the start and
1033
+ # Text anchor indexing into the
1034
+ # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. There can
1035
+ # only be a single `TextAnchor.text_segments` element. If the start and
970
1036
  # end index of the text segment are the same, the text change is inserted
971
1037
  # before that index.
972
1038
  # @!attribute [rw] changed_text
@@ -980,6 +1046,200 @@ module Google
980
1046
  include ::Google::Protobuf::MessageExts
981
1047
  extend ::Google::Protobuf::MessageExts::ClassMethods
982
1048
  end
1049
+
1050
+ # Represents the parsed layout of a document as a collection of blocks that
1051
+ # the document is divided into.
1052
+ # @!attribute [rw] blocks
1053
+ # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock>]
1054
+ # List of blocks in the document.
1055
+ class DocumentLayout
1056
+ include ::Google::Protobuf::MessageExts
1057
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1058
+
1059
+ # Represents a block. A block could be one of the various types (text,
1060
+ # table, list) supported.
1061
+ # @!attribute [rw] text_block
1062
+ # @return [::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutTextBlock]
1063
+ # Block consisting of text content.
1064
+ # @!attribute [rw] table_block
1065
+ # @return [::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutTableBlock]
1066
+ # Block consisting of table content/structure.
1067
+ # @!attribute [rw] list_block
1068
+ # @return [::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutListBlock]
1069
+ # Block consisting of list content/structure.
1070
+ # @!attribute [rw] block_id
1071
+ # @return [::String]
1072
+ # ID of the block.
1073
+ # @!attribute [rw] page_span
1074
+ # @return [::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutPageSpan]
1075
+ # Page span of the block.
1076
+ class DocumentLayoutBlock
1077
+ include ::Google::Protobuf::MessageExts
1078
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1079
+
1080
+ # Represents where the block starts and ends in the document.
1081
+ # @!attribute [rw] page_start
1082
+ # @return [::Integer]
1083
+ # Page where block starts in the document.
1084
+ # @!attribute [rw] page_end
1085
+ # @return [::Integer]
1086
+ # Page where block ends in the document.
1087
+ class LayoutPageSpan
1088
+ include ::Google::Protobuf::MessageExts
1089
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1090
+ end
1091
+
1092
+ # Represents a text type block.
1093
+ # @!attribute [rw] text
1094
+ # @return [::String]
1095
+ # Text content stored in the block.
1096
+ # @!attribute [rw] type
1097
+ # @return [::String]
1098
+ # Type of the text in the block. Available options are: `paragraph`,
1099
+ # `subtitle`, `heading-1`, `heading-2`, `heading-3`, `heading-4`,
1100
+ # `heading-5`, `header`, `footer`.
1101
+ # @!attribute [rw] blocks
1102
+ # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock>]
1103
+ # A text block could further have child blocks.
1104
+ # Repeated blocks support further hierarchies and nested blocks.
1105
+ class LayoutTextBlock
1106
+ include ::Google::Protobuf::MessageExts
1107
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1108
+ end
1109
+
1110
+ # Represents a table type block.
1111
+ # @!attribute [rw] header_rows
1112
+ # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutTableRow>]
1113
+ # Header rows at the top of the table.
1114
+ # @!attribute [rw] body_rows
1115
+ # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutTableRow>]
1116
+ # Body rows containing main table content.
1117
+ # @!attribute [rw] caption
1118
+ # @return [::String]
1119
+ # Table caption/title.
1120
+ class LayoutTableBlock
1121
+ include ::Google::Protobuf::MessageExts
1122
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1123
+ end
1124
+
1125
+ # Represents a row in a table.
1126
+ # @!attribute [rw] cells
1127
+ # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutTableCell>]
1128
+ # A table row is a list of table cells.
1129
+ class LayoutTableRow
1130
+ include ::Google::Protobuf::MessageExts
1131
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1132
+ end
1133
+
1134
+ # Represents a cell in a table row.
1135
+ # @!attribute [rw] blocks
1136
+ # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock>]
1137
+ # A table cell is a list of blocks.
1138
+ # Repeated blocks support further hierarchies and nested blocks.
1139
+ # @!attribute [rw] row_span
1140
+ # @return [::Integer]
1141
+ # How many rows this cell spans.
1142
+ # @!attribute [rw] col_span
1143
+ # @return [::Integer]
1144
+ # How many columns this cell spans.
1145
+ class LayoutTableCell
1146
+ include ::Google::Protobuf::MessageExts
1147
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1148
+ end
1149
+
1150
+ # Represents a list type block.
1151
+ # @!attribute [rw] list_entries
1152
+ # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutListEntry>]
1153
+ # List entries that constitute a list block.
1154
+ # @!attribute [rw] type
1155
+ # @return [::String]
1156
+ # Type of the list_entries (if exist). Available options are `ordered`
1157
+ # and `unordered`.
1158
+ class LayoutListBlock
1159
+ include ::Google::Protobuf::MessageExts
1160
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1161
+ end
1162
+
1163
+ # Represents an entry in the list.
1164
+ # @!attribute [rw] blocks
1165
+ # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock>]
1166
+ # A list entry is a list of blocks.
1167
+ # Repeated blocks support further hierarchies and nested blocks.
1168
+ class LayoutListEntry
1169
+ include ::Google::Protobuf::MessageExts
1170
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1171
+ end
1172
+ end
1173
+ end
1174
+
1175
+ # Represents the chunks that the document is divided into.
1176
+ # @!attribute [rw] chunks
1177
+ # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk>]
1178
+ # List of chunks.
1179
+ class ChunkedDocument
1180
+ include ::Google::Protobuf::MessageExts
1181
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1182
+
1183
+ # Represents a chunk.
1184
+ # @!attribute [rw] chunk_id
1185
+ # @return [::String]
1186
+ # ID of the chunk.
1187
+ # @!attribute [rw] source_block_ids
1188
+ # @return [::Array<::String>]
1189
+ # Unused.
1190
+ # @!attribute [rw] content
1191
+ # @return [::String]
1192
+ # Text content of the chunk.
1193
+ # @!attribute [rw] page_span
1194
+ # @return [::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk::ChunkPageSpan]
1195
+ # Page span of the chunk.
1196
+ # @!attribute [rw] page_headers
1197
+ # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk::ChunkPageHeader>]
1198
+ # Page headers associated with the chunk.
1199
+ # @!attribute [rw] page_footers
1200
+ # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk::ChunkPageFooter>]
1201
+ # Page footers associated with the chunk.
1202
+ class Chunk
1203
+ include ::Google::Protobuf::MessageExts
1204
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1205
+
1206
+ # Represents where the chunk starts and ends in the document.
1207
+ # @!attribute [rw] page_start
1208
+ # @return [::Integer]
1209
+ # Page where chunk starts in the document.
1210
+ # @!attribute [rw] page_end
1211
+ # @return [::Integer]
1212
+ # Page where chunk ends in the document.
1213
+ class ChunkPageSpan
1214
+ include ::Google::Protobuf::MessageExts
1215
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1216
+ end
1217
+
1218
+ # Represents the page header associated with the chunk.
1219
+ # @!attribute [rw] text
1220
+ # @return [::String]
1221
+ # Header in text format.
1222
+ # @!attribute [rw] page_span
1223
+ # @return [::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk::ChunkPageSpan]
1224
+ # Page span of the header.
1225
+ class ChunkPageHeader
1226
+ include ::Google::Protobuf::MessageExts
1227
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1228
+ end
1229
+
1230
+ # Represents the page footer associated with the chunk.
1231
+ # @!attribute [rw] text
1232
+ # @return [::String]
1233
+ # Footer in text format.
1234
+ # @!attribute [rw] page_span
1235
+ # @return [::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk::ChunkPageSpan]
1236
+ # Page span of the footer.
1237
+ class ChunkPageFooter
1238
+ include ::Google::Protobuf::MessageExts
1239
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1240
+ end
1241
+ end
1242
+ end
983
1243
  end
984
1244
  end
985
1245
  end