google-cloud-document_ai-v1 0.21.0 → 0.22.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/google/cloud/document_ai/v1/document_processor_service/client.rb +106 -56
- data/lib/google/cloud/document_ai/v1/document_processor_service/operations.rb +13 -5
- data/lib/google/cloud/document_ai/v1/document_processor_service/rest/client.rb +106 -56
- data/lib/google/cloud/document_ai/v1/document_processor_service/rest/operations.rb +11 -4
- data/lib/google/cloud/document_ai/v1/version.rb +1 -1
- data/lib/google/cloud/documentai/v1/document_pb.rb +15 -1
- data/lib/google/cloud/documentai/v1/document_processor_service_services_pb.rb +9 -5
- data/lib/google/cloud/documentai/v1/processor_pb.rb +1 -1
- data/proto_docs/google/cloud/documentai/v1/document.rb +336 -76
- data/proto_docs/google/cloud/documentai/v1/document_io.rb +3 -1
- data/proto_docs/google/cloud/documentai/v1/document_processor_service.rb +127 -76
- data/proto_docs/google/cloud/documentai/v1/processor.rb +17 -4
- metadata +2 -2
@@ -52,16 +52,19 @@ module Google
|
|
52
52
|
# Visual page layout for the {::Google::Cloud::DocumentAI::V1::Document Document}.
|
53
53
|
# @!attribute [rw] entities
|
54
54
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Entity>]
|
55
|
-
# A list of entities detected on
|
56
|
-
#
|
55
|
+
# A list of entities detected on
|
56
|
+
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. For document
|
57
|
+
# shards, entities in this list may cross shard boundaries.
|
57
58
|
# @!attribute [rw] entity_relations
|
58
59
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::EntityRelation>]
|
59
|
-
# Placeholder. Relationship among
|
60
|
+
# Placeholder. Relationship among
|
61
|
+
# {::Google::Cloud::DocumentAI::V1::Document#entities Document.entities}.
|
60
62
|
# @!attribute [rw] text_changes
|
61
63
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::TextChange>]
|
62
|
-
# Placeholder. A list of text corrections made to
|
63
|
-
#
|
64
|
-
# for
|
64
|
+
# Placeholder. A list of text corrections made to
|
65
|
+
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. This is usually
|
66
|
+
# used for annotating corrections to OCR mistakes. Text changes for a given
|
67
|
+
# revision may not overlap with each other.
|
65
68
|
# @!attribute [rw] shard_info
|
66
69
|
# @return [::Google::Cloud::DocumentAI::V1::Document::ShardInfo]
|
67
70
|
# Information about the sharding if this document is sharded part of a larger
|
@@ -72,6 +75,12 @@ module Google
|
|
72
75
|
# @!attribute [rw] revisions
|
73
76
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Revision>]
|
74
77
|
# Placeholder. Revision history of this document.
|
78
|
+
# @!attribute [rw] document_layout
|
79
|
+
# @return [::Google::Cloud::DocumentAI::V1::Document::DocumentLayout]
|
80
|
+
# Parsed layout of the document.
|
81
|
+
# @!attribute [rw] chunked_document
|
82
|
+
# @return [::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument]
|
83
|
+
# Document chunked based on chunking config.
|
75
84
|
class Document
|
76
85
|
include ::Google::Protobuf::MessageExts
|
77
86
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -87,7 +96,8 @@ module Google
|
|
87
96
|
# Total number of shards.
|
88
97
|
# @!attribute [rw] text_offset
|
89
98
|
# @return [::Integer]
|
90
|
-
# The index of the first character in
|
99
|
+
# The index of the first character in
|
100
|
+
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text} in the overall
|
91
101
|
# document global text.
|
92
102
|
class ShardInfo
|
93
103
|
include ::Google::Protobuf::MessageExts
|
@@ -98,7 +108,8 @@ module Google
|
|
98
108
|
# conventions as much as possible.
|
99
109
|
# @!attribute [rw] text_anchor
|
100
110
|
# @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor]
|
101
|
-
# Text anchor indexing into the
|
111
|
+
# Text anchor indexing into the
|
112
|
+
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
|
102
113
|
# @!attribute [rw] color
|
103
114
|
# @return [::Google::Type::Color]
|
104
115
|
# Text color.
|
@@ -147,9 +158,11 @@ module Google
|
|
147
158
|
# A page in a {::Google::Cloud::DocumentAI::V1::Document Document}.
|
148
159
|
# @!attribute [rw] page_number
|
149
160
|
# @return [::Integer]
|
150
|
-
# 1-based index for current
|
151
|
-
#
|
152
|
-
#
|
161
|
+
# 1-based index for current
|
162
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page Page} in a parent
|
163
|
+
# {::Google::Cloud::DocumentAI::V1::Document Document}. Useful when a page is
|
164
|
+
# taken out of a {::Google::Cloud::DocumentAI::V1::Document Document} for
|
165
|
+
# individual processing.
|
153
166
|
# @!attribute [rw] image
|
154
167
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Image]
|
155
168
|
# Rendered image for this page. This image is preprocessed to remove any
|
@@ -271,18 +284,23 @@ module Google
|
|
271
284
|
# Visual element describing a layout unit on a page.
|
272
285
|
# @!attribute [rw] text_anchor
|
273
286
|
# @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor]
|
274
|
-
# Text anchor indexing into the
|
287
|
+
# Text anchor indexing into the
|
288
|
+
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
|
275
289
|
# @!attribute [rw] confidence
|
276
290
|
# @return [::Float]
|
277
|
-
# Confidence of the current
|
278
|
-
#
|
279
|
-
#
|
291
|
+
# Confidence of the current
|
292
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} within
|
293
|
+
# context of the object this layout is for. e.g. confidence can be for a
|
294
|
+
# single token, a table, a visual element, etc. depending on context.
|
295
|
+
# Range `[0, 1]`.
|
280
296
|
# @!attribute [rw] bounding_poly
|
281
297
|
# @return [::Google::Cloud::DocumentAI::V1::BoundingPoly]
|
282
|
-
# The bounding polygon for the
|
298
|
+
# The bounding polygon for the
|
299
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout}.
|
283
300
|
# @!attribute [rw] orientation
|
284
301
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout::Orientation]
|
285
|
-
# Detected orientation for the
|
302
|
+
# Detected orientation for the
|
303
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout}.
|
286
304
|
class Layout
|
287
305
|
include ::Google::Protobuf::MessageExts
|
288
306
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -313,7 +331,8 @@ module Google
|
|
313
331
|
# common line-spacing and orientation.
|
314
332
|
# @!attribute [rw] layout
|
315
333
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
|
316
|
-
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
334
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
335
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Block Block}.
|
317
336
|
# @!attribute [rw] detected_languages
|
318
337
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
|
319
338
|
# A list of detected languages together with confidence.
|
@@ -329,7 +348,8 @@ module Google
|
|
329
348
|
# A collection of lines that a human would perceive as a paragraph.
|
330
349
|
# @!attribute [rw] layout
|
331
350
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
|
332
|
-
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
351
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
352
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Paragraph Paragraph}.
|
333
353
|
# @!attribute [rw] detected_languages
|
334
354
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
|
335
355
|
# A list of detected languages together with confidence.
|
@@ -346,7 +366,8 @@ module Google
|
|
346
366
|
# Does not cross column boundaries, can be horizontal, vertical, etc.
|
347
367
|
# @!attribute [rw] layout
|
348
368
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
|
349
|
-
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
369
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
370
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Line Line}.
|
350
371
|
# @!attribute [rw] detected_languages
|
351
372
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
|
352
373
|
# A list of detected languages together with confidence.
|
@@ -362,10 +383,12 @@ module Google
|
|
362
383
|
# A detected token.
|
363
384
|
# @!attribute [rw] layout
|
364
385
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
|
365
|
-
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
386
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
387
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}.
|
366
388
|
# @!attribute [rw] detected_break
|
367
389
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Token::DetectedBreak]
|
368
|
-
# Detected break at the end of a
|
390
|
+
# Detected break at the end of a
|
391
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}.
|
369
392
|
# @!attribute [rw] detected_languages
|
370
393
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
|
371
394
|
# A list of detected languages together with confidence.
|
@@ -380,7 +403,8 @@ module Google
|
|
380
403
|
include ::Google::Protobuf::MessageExts
|
381
404
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
382
405
|
|
383
|
-
# Detected break at the end of a
|
406
|
+
# Detected break at the end of a
|
407
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}.
|
384
408
|
# @!attribute [rw] type
|
385
409
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Token::DetectedBreak::Type]
|
386
410
|
# Detected break type.
|
@@ -410,8 +434,9 @@ module Google
|
|
410
434
|
# Font size in points (`1` point is `¹⁄₇₂` inches).
|
411
435
|
# @!attribute [rw] pixel_font_size
|
412
436
|
# @return [::Float]
|
413
|
-
# Font size in pixels, equal to _unrounded
|
414
|
-
#
|
437
|
+
# Font size in pixels, equal to _unrounded
|
438
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_size font_size}_
|
439
|
+
# * _resolution_ ÷ `72.0`.
|
415
440
|
# @!attribute [rw] letter_spacing
|
416
441
|
# @return [::Float]
|
417
442
|
# Letter spacing in points.
|
@@ -420,8 +445,9 @@ module Google
|
|
420
445
|
# Name or style of the font.
|
421
446
|
# @!attribute [rw] bold
|
422
447
|
# @return [::Boolean]
|
423
|
-
# Whether the text is bold (equivalent to
|
424
|
-
#
|
448
|
+
# Whether the text is bold (equivalent to
|
449
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_weight font_weight}
|
450
|
+
# is at least `700`).
|
425
451
|
# @!attribute [rw] italic
|
426
452
|
# @return [::Boolean]
|
427
453
|
# Whether the text is italic.
|
@@ -430,16 +456,16 @@ module Google
|
|
430
456
|
# Whether the text is underlined.
|
431
457
|
# @!attribute [rw] strikeout
|
432
458
|
# @return [::Boolean]
|
433
|
-
# Whether the text is strikethrough.
|
459
|
+
# Whether the text is strikethrough. This feature is not supported yet.
|
434
460
|
# @!attribute [rw] subscript
|
435
461
|
# @return [::Boolean]
|
436
|
-
# Whether the text is a subscript.
|
462
|
+
# Whether the text is a subscript. This feature is not supported yet.
|
437
463
|
# @!attribute [rw] superscript
|
438
464
|
# @return [::Boolean]
|
439
|
-
# Whether the text is a superscript.
|
465
|
+
# Whether the text is a superscript. This feature is not supported yet.
|
440
466
|
# @!attribute [rw] smallcaps
|
441
467
|
# @return [::Boolean]
|
442
|
-
# Whether the text is in small caps.
|
468
|
+
# Whether the text is in small caps. This feature is not supported yet.
|
443
469
|
# @!attribute [rw] font_weight
|
444
470
|
# @return [::Integer]
|
445
471
|
# TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
|
@@ -462,7 +488,8 @@ module Google
|
|
462
488
|
# A detected symbol.
|
463
489
|
# @!attribute [rw] layout
|
464
490
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
|
465
|
-
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
491
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
492
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Symbol Symbol}.
|
466
493
|
# @!attribute [rw] detected_languages
|
467
494
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
|
468
495
|
# A list of detected languages together with confidence.
|
@@ -475,10 +502,12 @@ module Google
|
|
475
502
|
# page.
|
476
503
|
# @!attribute [rw] layout
|
477
504
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
|
478
|
-
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
505
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
506
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::VisualElement VisualElement}.
|
479
507
|
# @!attribute [rw] type
|
480
508
|
# @return [::String]
|
481
|
-
# Type of the
|
509
|
+
# Type of the
|
510
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::VisualElement VisualElement}.
|
482
511
|
# @!attribute [rw] detected_languages
|
483
512
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
|
484
513
|
# A list of detected languages together with confidence.
|
@@ -490,7 +519,8 @@ module Google
|
|
490
519
|
# A table representation similar to HTML table structure.
|
491
520
|
# @!attribute [rw] layout
|
492
521
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
|
493
|
-
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
522
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
523
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Table Table}.
|
494
524
|
# @!attribute [rw] header_rows
|
495
525
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Table::TableRow>]
|
496
526
|
# Header rows of the table.
|
@@ -520,7 +550,8 @@ module Google
|
|
520
550
|
# A cell representation inside the table.
|
521
551
|
# @!attribute [rw] layout
|
522
552
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
|
523
|
-
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
553
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
554
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Table::TableCell TableCell}.
|
524
555
|
# @!attribute [rw] row_span
|
525
556
|
# @return [::Integer]
|
526
557
|
# How many rows this cell spans.
|
@@ -539,11 +570,13 @@ module Google
|
|
539
570
|
# A form field detected on the page.
|
540
571
|
# @!attribute [rw] field_name
|
541
572
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
|
542
|
-
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for the
|
543
|
-
#
|
573
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for the
|
574
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::FormField FormField} name.
|
575
|
+
# e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
|
544
576
|
# @!attribute [rw] field_value
|
545
577
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
|
546
|
-
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for the
|
578
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for the
|
579
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::FormField FormField} value.
|
547
580
|
# @!attribute [rw] name_detected_languages
|
548
581
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>]
|
549
582
|
# A list of detected languages for name together with confidence.
|
@@ -579,10 +612,12 @@ module Google
|
|
579
612
|
# A detected barcode.
|
580
613
|
# @!attribute [rw] layout
|
581
614
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout]
|
582
|
-
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
615
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for
|
616
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::DetectedBarcode DetectedBarcode}.
|
583
617
|
# @!attribute [rw] barcode
|
584
618
|
# @return [::Google::Cloud::DocumentAI::V1::Barcode]
|
585
|
-
# Detailed barcode information of the
|
619
|
+
# Detailed barcode information of the
|
620
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::DetectedBarcode DetectedBarcode}.
|
586
621
|
class DetectedBarcode
|
587
622
|
include ::Google::Protobuf::MessageExts
|
588
623
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -643,7 +678,8 @@ module Google
|
|
643
678
|
# @!attribute [rw] text_anchor
|
644
679
|
# @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor]
|
645
680
|
# Optional. Provenance of the entity.
|
646
|
-
# Text anchor indexing into the
|
681
|
+
# Text anchor indexing into the
|
682
|
+
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
|
647
683
|
# @!attribute [rw] type
|
648
684
|
# @return [::String]
|
649
685
|
# Required. Entity type from a schema e.g. `Address`.
|
@@ -658,28 +694,29 @@ module Google
|
|
658
694
|
# Optional. Confidence of detected Schema entity. Range `[0, 1]`.
|
659
695
|
# @!attribute [rw] page_anchor
|
660
696
|
# @return [::Google::Cloud::DocumentAI::V1::Document::PageAnchor]
|
661
|
-
# Optional. Represents the provenance of this entity wrt. the location on
|
662
|
-
# page where it was found.
|
697
|
+
# Optional. Represents the provenance of this entity wrt. the location on
|
698
|
+
# the page where it was found.
|
663
699
|
# @!attribute [rw] id
|
664
700
|
# @return [::String]
|
665
701
|
# Optional. Canonical id. This will be a unique value in the entity list
|
666
702
|
# for this document.
|
667
703
|
# @!attribute [rw] normalized_value
|
668
704
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Entity::NormalizedValue]
|
669
|
-
# Optional. Normalized entity value. Absent if the extracted value could
|
670
|
-
# converted or the type (e.g. address) is not supported for certain
|
705
|
+
# Optional. Normalized entity value. Absent if the extracted value could
|
706
|
+
# not be converted or the type (e.g. address) is not supported for certain
|
671
707
|
# parsers. This field is also only populated for certain supported document
|
672
708
|
# types.
|
673
709
|
# @!attribute [rw] properties
|
674
710
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Entity>]
|
675
|
-
# Optional. Entities can be nested to form a hierarchical data structure
|
676
|
-
# the content in the document.
|
711
|
+
# Optional. Entities can be nested to form a hierarchical data structure
|
712
|
+
# representing the content in the document.
|
677
713
|
# @!attribute [rw] provenance
|
678
714
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Provenance]
|
679
715
|
# Optional. The history of this annotation.
|
680
716
|
# @!attribute [rw] redacted
|
681
717
|
# @return [::Boolean]
|
682
|
-
# Optional. Whether the entity will be redacted for de-identification
|
718
|
+
# Optional. Whether the entity will be redacted for de-identification
|
719
|
+
# purposes.
|
683
720
|
class Entity
|
684
721
|
include ::Google::Protobuf::MessageExts
|
685
722
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -730,7 +767,8 @@ module Google
|
|
730
767
|
end
|
731
768
|
end
|
732
769
|
|
733
|
-
# Relationship between
|
770
|
+
# Relationship between
|
771
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Entity Entities}.
|
734
772
|
# @!attribute [rw] subject_id
|
735
773
|
# @return [::String]
|
736
774
|
# Subject entity id.
|
@@ -745,10 +783,12 @@ module Google
|
|
745
783
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
746
784
|
end
|
747
785
|
|
748
|
-
# Text reference indexing into the
|
786
|
+
# Text reference indexing into the
|
787
|
+
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
|
749
788
|
# @!attribute [rw] text_segments
|
750
789
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment>]
|
751
|
-
# The text segments from the
|
790
|
+
# The text segments from the
|
791
|
+
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
|
752
792
|
# @!attribute [rw] content
|
753
793
|
# @return [::String]
|
754
794
|
# Contains the content of the text span so that users do
|
@@ -758,15 +798,20 @@ module Google
|
|
758
798
|
include ::Google::Protobuf::MessageExts
|
759
799
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
760
800
|
|
761
|
-
# A text segment in the
|
762
|
-
#
|
763
|
-
#
|
801
|
+
# A text segment in the
|
802
|
+
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. The indices
|
803
|
+
# may be out of bounds which indicate that the text extends into another
|
804
|
+
# document shard for large sharded documents. See
|
805
|
+
# {::Google::Cloud::DocumentAI::V1::Document::ShardInfo#text_offset ShardInfo.text_offset}
|
764
806
|
# @!attribute [rw] start_index
|
765
807
|
# @return [::Integer]
|
766
|
-
# {::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment TextSegment}
|
808
|
+
# {::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment TextSegment}
|
809
|
+
# start UTF-8 char index in the
|
810
|
+
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
|
767
811
|
# @!attribute [rw] end_index
|
768
812
|
# @return [::Integer]
|
769
|
-
# {::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment TextSegment}
|
813
|
+
# {::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment TextSegment}
|
814
|
+
# half open end UTF-8 char index in the
|
770
815
|
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text}.
|
771
816
|
class TextSegment
|
772
817
|
include ::Google::Protobuf::MessageExts
|
@@ -774,9 +819,10 @@ module Google
|
|
774
819
|
end
|
775
820
|
end
|
776
821
|
|
777
|
-
# Referencing the visual context of the entity in the
|
778
|
-
# Page anchors
|
779
|
-
#
|
822
|
+
# Referencing the visual context of the entity in the
|
823
|
+
# {::Google::Cloud::DocumentAI::V1::Document#pages Document.pages}. Page anchors
|
824
|
+
# can be cross-page, consist of multiple bounding polygons and optionally
|
825
|
+
# reference specific layout element types.
|
780
826
|
# @!attribute [rw] page_refs
|
781
827
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::PageAnchor::PageRef>]
|
782
828
|
# One or more references to visual page elements
|
@@ -787,25 +833,31 @@ module Google
|
|
787
833
|
# Represents a weak reference to a page element within a document.
|
788
834
|
# @!attribute [rw] page
|
789
835
|
# @return [::Integer]
|
790
|
-
# Required. Index into the
|
836
|
+
# Required. Index into the
|
837
|
+
# {::Google::Cloud::DocumentAI::V1::Document#pages Document.pages} element,
|
838
|
+
# for example using
|
791
839
|
# `[Document.pages][page_refs.page]` to locate the related page element.
|
792
840
|
# This field is skipped when its value is the default `0`. See
|
793
841
|
# https://developers.google.com/protocol-buffers/docs/proto3#json.
|
794
842
|
# @!attribute [rw] layout_type
|
795
843
|
# @return [::Google::Cloud::DocumentAI::V1::Document::PageAnchor::PageRef::LayoutType]
|
796
|
-
# Optional. The type of the layout element that is being referenced if
|
844
|
+
# Optional. The type of the layout element that is being referenced if
|
845
|
+
# any.
|
797
846
|
# @!attribute [rw] layout_id
|
798
847
|
# @deprecated This field is deprecated and may be removed in the next major version update.
|
799
848
|
# @return [::String]
|
800
|
-
# Optional. Deprecated. Use
|
849
|
+
# Optional. Deprecated. Use
|
850
|
+
# {::Google::Cloud::DocumentAI::V1::Document::PageAnchor::PageRef#bounding_poly PageRef.bounding_poly}
|
851
|
+
# instead.
|
801
852
|
# @!attribute [rw] bounding_poly
|
802
853
|
# @return [::Google::Cloud::DocumentAI::V1::BoundingPoly]
|
803
|
-
# Optional. Identifies the bounding polygon of a layout element on the
|
804
|
-
# If `layout_type` is set, the bounding polygon must be exactly the
|
805
|
-
# to the layout element it's referring to.
|
854
|
+
# Optional. Identifies the bounding polygon of a layout element on the
|
855
|
+
# page. If `layout_type` is set, the bounding polygon must be exactly the
|
856
|
+
# same to the layout element it's referring to.
|
806
857
|
# @!attribute [rw] confidence
|
807
858
|
# @return [::Float]
|
808
|
-
# Optional. Confidence of detected page element, if applicable. Range
|
859
|
+
# Optional. Confidence of detected page element, if applicable. Range
|
860
|
+
# `[0, 1]`.
|
809
861
|
class PageRef
|
810
862
|
include ::Google::Protobuf::MessageExts
|
811
863
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -815,25 +867,38 @@ module Google
|
|
815
867
|
# Layout Unspecified.
|
816
868
|
LAYOUT_TYPE_UNSPECIFIED = 0
|
817
869
|
|
818
|
-
# References a
|
870
|
+
# References a
|
871
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page#blocks Page.blocks}
|
872
|
+
# element.
|
819
873
|
BLOCK = 1
|
820
874
|
|
821
|
-
# References a
|
875
|
+
# References a
|
876
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page#paragraphs Page.paragraphs}
|
877
|
+
# element.
|
822
878
|
PARAGRAPH = 2
|
823
879
|
|
824
|
-
# References a
|
880
|
+
# References a
|
881
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page#lines Page.lines} element.
|
825
882
|
LINE = 3
|
826
883
|
|
827
|
-
# References a
|
884
|
+
# References a
|
885
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page#tokens Page.tokens}
|
886
|
+
# element.
|
828
887
|
TOKEN = 4
|
829
888
|
|
830
|
-
# References a
|
889
|
+
# References a
|
890
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page#visual_elements Page.visual_elements}
|
891
|
+
# element.
|
831
892
|
VISUAL_ELEMENT = 5
|
832
893
|
|
833
|
-
# Refrrences a
|
894
|
+
# Refrrences a
|
895
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page#tables Page.tables}
|
896
|
+
# element.
|
834
897
|
TABLE = 6
|
835
898
|
|
836
|
-
# References a
|
899
|
+
# References a
|
900
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page#form_fields Page.form_fields}
|
901
|
+
# element.
|
837
902
|
FORM_FIELD = 7
|
838
903
|
end
|
839
904
|
end
|
@@ -965,8 +1030,9 @@ module Google
|
|
965
1030
|
# @!attribute [rw] text_anchor
|
966
1031
|
# @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor]
|
967
1032
|
# Provenance of the correction.
|
968
|
-
# Text anchor indexing into the
|
969
|
-
#
|
1033
|
+
# Text anchor indexing into the
|
1034
|
+
# {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. There can
|
1035
|
+
# only be a single `TextAnchor.text_segments` element. If the start and
|
970
1036
|
# end index of the text segment are the same, the text change is inserted
|
971
1037
|
# before that index.
|
972
1038
|
# @!attribute [rw] changed_text
|
@@ -980,6 +1046,200 @@ module Google
|
|
980
1046
|
include ::Google::Protobuf::MessageExts
|
981
1047
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
982
1048
|
end
|
1049
|
+
|
1050
|
+
# Represents the parsed layout of a document as a collection of blocks that
|
1051
|
+
# the document is divided into.
|
1052
|
+
# @!attribute [rw] blocks
|
1053
|
+
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock>]
|
1054
|
+
# List of blocks in the document.
|
1055
|
+
class DocumentLayout
|
1056
|
+
include ::Google::Protobuf::MessageExts
|
1057
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1058
|
+
|
1059
|
+
# Represents a block. A block could be one of the various types (text,
|
1060
|
+
# table, list) supported.
|
1061
|
+
# @!attribute [rw] text_block
|
1062
|
+
# @return [::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutTextBlock]
|
1063
|
+
# Block consisting of text content.
|
1064
|
+
# @!attribute [rw] table_block
|
1065
|
+
# @return [::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutTableBlock]
|
1066
|
+
# Block consisting of table content/structure.
|
1067
|
+
# @!attribute [rw] list_block
|
1068
|
+
# @return [::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutListBlock]
|
1069
|
+
# Block consisting of list content/structure.
|
1070
|
+
# @!attribute [rw] block_id
|
1071
|
+
# @return [::String]
|
1072
|
+
# ID of the block.
|
1073
|
+
# @!attribute [rw] page_span
|
1074
|
+
# @return [::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutPageSpan]
|
1075
|
+
# Page span of the block.
|
1076
|
+
class DocumentLayoutBlock
|
1077
|
+
include ::Google::Protobuf::MessageExts
|
1078
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1079
|
+
|
1080
|
+
# Represents where the block starts and ends in the document.
|
1081
|
+
# @!attribute [rw] page_start
|
1082
|
+
# @return [::Integer]
|
1083
|
+
# Page where block starts in the document.
|
1084
|
+
# @!attribute [rw] page_end
|
1085
|
+
# @return [::Integer]
|
1086
|
+
# Page where block ends in the document.
|
1087
|
+
class LayoutPageSpan
|
1088
|
+
include ::Google::Protobuf::MessageExts
|
1089
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1090
|
+
end
|
1091
|
+
|
1092
|
+
# Represents a text type block.
|
1093
|
+
# @!attribute [rw] text
|
1094
|
+
# @return [::String]
|
1095
|
+
# Text content stored in the block.
|
1096
|
+
# @!attribute [rw] type
|
1097
|
+
# @return [::String]
|
1098
|
+
# Type of the text in the block. Available options are: `paragraph`,
|
1099
|
+
# `subtitle`, `heading-1`, `heading-2`, `heading-3`, `heading-4`,
|
1100
|
+
# `heading-5`, `header`, `footer`.
|
1101
|
+
# @!attribute [rw] blocks
|
1102
|
+
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock>]
|
1103
|
+
# A text block could further have child blocks.
|
1104
|
+
# Repeated blocks support further hierarchies and nested blocks.
|
1105
|
+
class LayoutTextBlock
|
1106
|
+
include ::Google::Protobuf::MessageExts
|
1107
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1108
|
+
end
|
1109
|
+
|
1110
|
+
# Represents a table type block.
|
1111
|
+
# @!attribute [rw] header_rows
|
1112
|
+
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutTableRow>]
|
1113
|
+
# Header rows at the top of the table.
|
1114
|
+
# @!attribute [rw] body_rows
|
1115
|
+
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutTableRow>]
|
1116
|
+
# Body rows containing main table content.
|
1117
|
+
# @!attribute [rw] caption
|
1118
|
+
# @return [::String]
|
1119
|
+
# Table caption/title.
|
1120
|
+
class LayoutTableBlock
|
1121
|
+
include ::Google::Protobuf::MessageExts
|
1122
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1123
|
+
end
|
1124
|
+
|
1125
|
+
# Represents a row in a table.
|
1126
|
+
# @!attribute [rw] cells
|
1127
|
+
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutTableCell>]
|
1128
|
+
# A table row is a list of table cells.
|
1129
|
+
class LayoutTableRow
|
1130
|
+
include ::Google::Protobuf::MessageExts
|
1131
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1132
|
+
end
|
1133
|
+
|
1134
|
+
# Represents a cell in a table row.
|
1135
|
+
# @!attribute [rw] blocks
|
1136
|
+
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock>]
|
1137
|
+
# A table cell is a list of blocks.
|
1138
|
+
# Repeated blocks support further hierarchies and nested blocks.
|
1139
|
+
# @!attribute [rw] row_span
|
1140
|
+
# @return [::Integer]
|
1141
|
+
# How many rows this cell spans.
|
1142
|
+
# @!attribute [rw] col_span
|
1143
|
+
# @return [::Integer]
|
1144
|
+
# How many columns this cell spans.
|
1145
|
+
class LayoutTableCell
|
1146
|
+
include ::Google::Protobuf::MessageExts
|
1147
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1148
|
+
end
|
1149
|
+
|
1150
|
+
# Represents a list type block.
|
1151
|
+
# @!attribute [rw] list_entries
|
1152
|
+
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock::LayoutListEntry>]
|
1153
|
+
# List entries that constitute a list block.
|
1154
|
+
# @!attribute [rw] type
|
1155
|
+
# @return [::String]
|
1156
|
+
# Type of the list_entries (if exist). Available options are `ordered`
|
1157
|
+
# and `unordered`.
|
1158
|
+
class LayoutListBlock
|
1159
|
+
include ::Google::Protobuf::MessageExts
|
1160
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1161
|
+
end
|
1162
|
+
|
1163
|
+
# Represents an entry in the list.
|
1164
|
+
# @!attribute [rw] blocks
|
1165
|
+
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::DocumentLayout::DocumentLayoutBlock>]
|
1166
|
+
# A list entry is a list of blocks.
|
1167
|
+
# Repeated blocks support further hierarchies and nested blocks.
|
1168
|
+
class LayoutListEntry
|
1169
|
+
include ::Google::Protobuf::MessageExts
|
1170
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1171
|
+
end
|
1172
|
+
end
|
1173
|
+
end
|
1174
|
+
|
1175
|
+
# Represents the chunks that the document is divided into.
|
1176
|
+
# @!attribute [rw] chunks
|
1177
|
+
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk>]
|
1178
|
+
# List of chunks.
|
1179
|
+
class ChunkedDocument
|
1180
|
+
include ::Google::Protobuf::MessageExts
|
1181
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1182
|
+
|
1183
|
+
# Represents a chunk.
|
1184
|
+
# @!attribute [rw] chunk_id
|
1185
|
+
# @return [::String]
|
1186
|
+
# ID of the chunk.
|
1187
|
+
# @!attribute [rw] source_block_ids
|
1188
|
+
# @return [::Array<::String>]
|
1189
|
+
# Unused.
|
1190
|
+
# @!attribute [rw] content
|
1191
|
+
# @return [::String]
|
1192
|
+
# Text content of the chunk.
|
1193
|
+
# @!attribute [rw] page_span
|
1194
|
+
# @return [::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk::ChunkPageSpan]
|
1195
|
+
# Page span of the chunk.
|
1196
|
+
# @!attribute [rw] page_headers
|
1197
|
+
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk::ChunkPageHeader>]
|
1198
|
+
# Page headers associated with the chunk.
|
1199
|
+
# @!attribute [rw] page_footers
|
1200
|
+
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk::ChunkPageFooter>]
|
1201
|
+
# Page footers associated with the chunk.
|
1202
|
+
class Chunk
|
1203
|
+
include ::Google::Protobuf::MessageExts
|
1204
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1205
|
+
|
1206
|
+
# Represents where the chunk starts and ends in the document.
|
1207
|
+
# @!attribute [rw] page_start
|
1208
|
+
# @return [::Integer]
|
1209
|
+
# Page where chunk starts in the document.
|
1210
|
+
# @!attribute [rw] page_end
|
1211
|
+
# @return [::Integer]
|
1212
|
+
# Page where chunk ends in the document.
|
1213
|
+
class ChunkPageSpan
|
1214
|
+
include ::Google::Protobuf::MessageExts
|
1215
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1216
|
+
end
|
1217
|
+
|
1218
|
+
# Represents the page header associated with the chunk.
|
1219
|
+
# @!attribute [rw] text
|
1220
|
+
# @return [::String]
|
1221
|
+
# Header in text format.
|
1222
|
+
# @!attribute [rw] page_span
|
1223
|
+
# @return [::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk::ChunkPageSpan]
|
1224
|
+
# Page span of the header.
|
1225
|
+
class ChunkPageHeader
|
1226
|
+
include ::Google::Protobuf::MessageExts
|
1227
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1228
|
+
end
|
1229
|
+
|
1230
|
+
# Represents the page footer associated with the chunk.
|
1231
|
+
# @!attribute [rw] text
|
1232
|
+
# @return [::String]
|
1233
|
+
# Footer in text format.
|
1234
|
+
# @!attribute [rw] page_span
|
1235
|
+
# @return [::Google::Cloud::DocumentAI::V1::Document::ChunkedDocument::Chunk::ChunkPageSpan]
|
1236
|
+
# Page span of the footer.
|
1237
|
+
class ChunkPageFooter
|
1238
|
+
include ::Google::Protobuf::MessageExts
|
1239
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
1240
|
+
end
|
1241
|
+
end
|
1242
|
+
end
|
983
1243
|
end
|
984
1244
|
end
|
985
1245
|
end
|