google-cloud-document_ai-v1beta3 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/google/cloud/document_ai/v1beta3/document_processor_service/client.rb +145 -31
- data/lib/google/cloud/document_ai/v1beta3/document_processor_service/paths.rb +19 -0
- data/lib/google/cloud/document_ai/v1beta3/document_processor_service.rb +1 -1
- data/lib/google/cloud/document_ai/v1beta3/version.rb +1 -1
- data/lib/google/cloud/document_ai/v1beta3.rb +2 -2
- data/lib/google/cloud/documentai/v1beta3/document_io_pb.rb +4 -0
- data/lib/google/cloud/documentai/v1beta3/document_processor_service_pb.rb +10 -0
- data/lib/google/cloud/documentai/v1beta3/document_processor_service_services_pb.rb +6 -2
- data/lib/google/cloud/documentai/v1beta3/processor_type_pb.rb +1 -0
- data/proto_docs/google/cloud/documentai/v1beta3/document.rb +145 -79
- data/proto_docs/google/cloud/documentai/v1beta3/document_io.rb +10 -0
- data/proto_docs/google/cloud/documentai/v1beta3/document_processor_service.rb +68 -27
- data/proto_docs/google/cloud/documentai/v1beta3/document_schema.rb +1 -1
- data/proto_docs/google/cloud/documentai/v1beta3/processor.rb +2 -1
- data/proto_docs/google/cloud/documentai/v1beta3/processor_type.rb +3 -0
- metadata +7 -7
@@ -47,22 +47,27 @@ module Google
|
|
47
47
|
# Optional. UTF-8 encoded text in reading order from the document.
|
48
48
|
# @!attribute [rw] text_styles
|
49
49
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Style>]
|
50
|
-
#
|
50
|
+
# Styles for the
|
51
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
51
52
|
# @!attribute [rw] pages
|
52
53
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page>]
|
53
|
-
# Visual page layout for the
|
54
|
+
# Visual page layout for the
|
55
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document Document}.
|
54
56
|
# @!attribute [rw] entities
|
55
57
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Entity>]
|
56
|
-
# A list of entities detected on
|
57
|
-
#
|
58
|
+
# A list of entities detected on
|
59
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}. For
|
60
|
+
# document shards, entities in this list may cross shard boundaries.
|
58
61
|
# @!attribute [rw] entity_relations
|
59
62
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::EntityRelation>]
|
60
|
-
# Placeholder. Relationship among
|
63
|
+
# Placeholder. Relationship among
|
64
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#entities Document.entities}.
|
61
65
|
# @!attribute [rw] text_changes
|
62
66
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::TextChange>]
|
63
|
-
# Placeholder. A list of text corrections made to
|
64
|
-
#
|
65
|
-
#
|
67
|
+
# Placeholder. A list of text corrections made to
|
68
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}. This is
|
69
|
+
# usually used for annotating corrections to OCR mistakes. Text changes for
|
70
|
+
# a given revision may not overlap with each other.
|
66
71
|
# @!attribute [rw] shard_info
|
67
72
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::ShardInfo]
|
68
73
|
# Information about the sharding if this document is sharded part of a larger
|
@@ -88,8 +93,9 @@ module Google
|
|
88
93
|
# Total number of shards.
|
89
94
|
# @!attribute [rw] text_offset
|
90
95
|
# @return [::Integer]
|
91
|
-
# The index of the first character in
|
92
|
-
#
|
96
|
+
# The index of the first character in
|
97
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text} in the
|
98
|
+
# overall document global text.
|
93
99
|
class ShardInfo
|
94
100
|
include ::Google::Protobuf::MessageExts
|
95
101
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -99,7 +105,8 @@ module Google
|
|
99
105
|
# conventions as much as possible.
|
100
106
|
# @!attribute [rw] text_anchor
|
101
107
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor]
|
102
|
-
# Text anchor indexing into the
|
108
|
+
# Text anchor indexing into the
|
109
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
103
110
|
# @!attribute [rw] color
|
104
111
|
# @return [::Google::Type::Color]
|
105
112
|
# Text color.
|
@@ -146,9 +153,11 @@ module Google
|
|
146
153
|
# A page in a {::Google::Cloud::DocumentAI::V1beta3::Document Document}.
|
147
154
|
# @!attribute [rw] page_number
|
148
155
|
# @return [::Integer]
|
149
|
-
# 1-based index for current
|
150
|
-
#
|
151
|
-
#
|
156
|
+
# 1-based index for current
|
157
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page Page} in a parent
|
158
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document Document}. Useful when a page
|
159
|
+
# is taken out of a {::Google::Cloud::DocumentAI::V1beta3::Document Document}
|
160
|
+
# for individual processing.
|
152
161
|
# @!attribute [rw] image
|
153
162
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Image]
|
154
163
|
# Rendered image for this page. This image is preprocessed to remove any
|
@@ -157,13 +166,15 @@ module Google
|
|
157
166
|
# @!attribute [rw] transforms
|
158
167
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::Matrix>]
|
159
168
|
# Transformation matrices that were applied to the original document image
|
160
|
-
# to produce
|
169
|
+
# to produce
|
170
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#image Page.image}.
|
161
171
|
# @!attribute [rw] dimension
|
162
172
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Dimension]
|
163
173
|
# Physical dimension of the page.
|
164
174
|
# @!attribute [rw] layout
|
165
175
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
166
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
176
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
177
|
+
# page.
|
167
178
|
# @!attribute [rw] detected_languages
|
168
179
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
169
180
|
# A list of detected languages together with confidence.
|
@@ -267,18 +278,23 @@ module Google
|
|
267
278
|
# Visual element describing a layout unit on a page.
|
268
279
|
# @!attribute [rw] text_anchor
|
269
280
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor]
|
270
|
-
# Text anchor indexing into the
|
281
|
+
# Text anchor indexing into the
|
282
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
271
283
|
# @!attribute [rw] confidence
|
272
284
|
# @return [::Float]
|
273
|
-
# Confidence of the current
|
274
|
-
#
|
275
|
-
#
|
285
|
+
# Confidence of the current
|
286
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} within
|
287
|
+
# context of the object this layout is for. e.g. confidence can be for a
|
288
|
+
# single token, a table, a visual element, etc. depending on context.
|
289
|
+
# Range `[0, 1]`.
|
276
290
|
# @!attribute [rw] bounding_poly
|
277
291
|
# @return [::Google::Cloud::DocumentAI::V1beta3::BoundingPoly]
|
278
|
-
# The bounding polygon for the
|
292
|
+
# The bounding polygon for the
|
293
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout}.
|
279
294
|
# @!attribute [rw] orientation
|
280
295
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout::Orientation]
|
281
|
-
# Detected orientation for the
|
296
|
+
# Detected orientation for the
|
297
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout}.
|
282
298
|
class Layout
|
283
299
|
include ::Google::Protobuf::MessageExts
|
284
300
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -309,7 +325,8 @@ module Google
|
|
309
325
|
# common line-spacing and orientation.
|
310
326
|
# @!attribute [rw] layout
|
311
327
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
312
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
328
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
329
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Block Block}.
|
313
330
|
# @!attribute [rw] detected_languages
|
314
331
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
315
332
|
# A list of detected languages together with confidence.
|
@@ -324,7 +341,8 @@ module Google
|
|
324
341
|
# A collection of lines that a human would perceive as a paragraph.
|
325
342
|
# @!attribute [rw] layout
|
326
343
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
327
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
344
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
345
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Paragraph Paragraph}.
|
328
346
|
# @!attribute [rw] detected_languages
|
329
347
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
330
348
|
# A list of detected languages together with confidence.
|
@@ -340,7 +358,8 @@ module Google
|
|
340
358
|
# Does not cross column boundaries, can be horizontal, vertical, etc.
|
341
359
|
# @!attribute [rw] layout
|
342
360
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
343
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
361
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
362
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Line Line}.
|
344
363
|
# @!attribute [rw] detected_languages
|
345
364
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
346
365
|
# A list of detected languages together with confidence.
|
@@ -355,10 +374,12 @@ module Google
|
|
355
374
|
# A detected token.
|
356
375
|
# @!attribute [rw] layout
|
357
376
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
358
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
377
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
378
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Token Token}.
|
359
379
|
# @!attribute [rw] detected_break
|
360
380
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Token::DetectedBreak]
|
361
|
-
# Detected break at the end of a
|
381
|
+
# Detected break at the end of a
|
382
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Token Token}.
|
362
383
|
# @!attribute [rw] detected_languages
|
363
384
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
364
385
|
# A list of detected languages together with confidence.
|
@@ -369,7 +390,8 @@ module Google
|
|
369
390
|
include ::Google::Protobuf::MessageExts
|
370
391
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
371
392
|
|
372
|
-
# Detected break at the end of a
|
393
|
+
# Detected break at the end of a
|
394
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Token Token}.
|
373
395
|
# @!attribute [rw] type
|
374
396
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Token::DetectedBreak::Type]
|
375
397
|
# Detected break type.
|
@@ -397,7 +419,8 @@ module Google
|
|
397
419
|
# A detected symbol.
|
398
420
|
# @!attribute [rw] layout
|
399
421
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
400
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
422
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
423
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Symbol Symbol}.
|
401
424
|
# @!attribute [rw] detected_languages
|
402
425
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
403
426
|
# A list of detected languages together with confidence.
|
@@ -410,10 +433,12 @@ module Google
|
|
410
433
|
# page.
|
411
434
|
# @!attribute [rw] layout
|
412
435
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
413
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
436
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
437
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::VisualElement VisualElement}.
|
414
438
|
# @!attribute [rw] type
|
415
439
|
# @return [::String]
|
416
|
-
# Type of the
|
440
|
+
# Type of the
|
441
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::VisualElement VisualElement}.
|
417
442
|
# @!attribute [rw] detected_languages
|
418
443
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
419
444
|
# A list of detected languages together with confidence.
|
@@ -425,7 +450,8 @@ module Google
|
|
425
450
|
# A table representation similar to HTML table structure.
|
426
451
|
# @!attribute [rw] layout
|
427
452
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
428
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
453
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
454
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Table Table}.
|
429
455
|
# @!attribute [rw] header_rows
|
430
456
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::Table::TableRow>]
|
431
457
|
# Header rows of the table.
|
@@ -454,7 +480,8 @@ module Google
|
|
454
480
|
# A cell representation inside the table.
|
455
481
|
# @!attribute [rw] layout
|
456
482
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
457
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
483
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
484
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Table::TableCell TableCell}.
|
458
485
|
# @!attribute [rw] row_span
|
459
486
|
# @return [::Integer]
|
460
487
|
# How many rows this cell spans.
|
@@ -473,11 +500,14 @@ module Google
|
|
473
500
|
# A form field detected on the page.
|
474
501
|
# @!attribute [rw] field_name
|
475
502
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
476
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
477
|
-
#
|
503
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
504
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::FormField FormField}
|
505
|
+
# name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
|
478
506
|
# @!attribute [rw] field_value
|
479
507
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
480
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
508
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for the
|
509
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::FormField FormField}
|
510
|
+
# value.
|
481
511
|
# @!attribute [rw] name_detected_languages
|
482
512
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedLanguage>]
|
483
513
|
# A list of detected languages for name together with confidence.
|
@@ -488,9 +518,10 @@ module Google
|
|
488
518
|
# @return [::String]
|
489
519
|
# If the value is non-textual, this field represents the type. Current
|
490
520
|
# valid values are:
|
491
|
-
#
|
492
|
-
# -
|
493
|
-
# -
|
521
|
+
#
|
522
|
+
# - blank (this indicates the `field_value` is normal text)
|
523
|
+
# - `unfilled_checkbox`
|
524
|
+
# - `filled_checkbox`
|
494
525
|
# @!attribute [rw] corrected_key_text
|
495
526
|
# @return [::String]
|
496
527
|
# Created for Labeling UI to export key text.
|
@@ -512,10 +543,12 @@ module Google
|
|
512
543
|
# A detected barcode.
|
513
544
|
# @!attribute [rw] layout
|
514
545
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout]
|
515
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
546
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::Layout Layout} for
|
547
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedBarcode DetectedBarcode}.
|
516
548
|
# @!attribute [rw] barcode
|
517
549
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Barcode]
|
518
|
-
# Detailed barcode information of the
|
550
|
+
# Detailed barcode information of the
|
551
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page::DetectedBarcode DetectedBarcode}.
|
519
552
|
class DetectedBarcode
|
520
553
|
include ::Google::Protobuf::MessageExts
|
521
554
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -576,7 +609,8 @@ module Google
|
|
576
609
|
# @!attribute [rw] text_anchor
|
577
610
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor]
|
578
611
|
# Optional. Provenance of the entity.
|
579
|
-
# Text anchor indexing into the
|
612
|
+
# Text anchor indexing into the
|
613
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
580
614
|
# @!attribute [rw] type
|
581
615
|
# @return [::String]
|
582
616
|
# Required. Entity type from a schema e.g. `Address`.
|
@@ -591,28 +625,29 @@ module Google
|
|
591
625
|
# Optional. Confidence of detected Schema entity. Range `[0, 1]`.
|
592
626
|
# @!attribute [rw] page_anchor
|
593
627
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::PageAnchor]
|
594
|
-
# Optional. Represents the provenance of this entity wrt. the location on
|
595
|
-
# page where it was found.
|
628
|
+
# Optional. Represents the provenance of this entity wrt. the location on
|
629
|
+
# the page where it was found.
|
596
630
|
# @!attribute [rw] id
|
597
631
|
# @return [::String]
|
598
632
|
# Optional. Canonical id. This will be a unique value in the entity list
|
599
633
|
# for this document.
|
600
634
|
# @!attribute [rw] normalized_value
|
601
635
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Entity::NormalizedValue]
|
602
|
-
# Optional. Normalized entity value. Absent if the extracted value could
|
603
|
-
# converted or the type (e.g. address) is not supported for certain
|
636
|
+
# Optional. Normalized entity value. Absent if the extracted value could
|
637
|
+
# not be converted or the type (e.g. address) is not supported for certain
|
604
638
|
# parsers. This field is also only populated for certain supported document
|
605
639
|
# types.
|
606
640
|
# @!attribute [rw] properties
|
607
641
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::Entity>]
|
608
|
-
# Optional. Entities can be nested to form a hierarchical data structure
|
609
|
-
# the content in the document.
|
642
|
+
# Optional. Entities can be nested to form a hierarchical data structure
|
643
|
+
# representing the content in the document.
|
610
644
|
# @!attribute [rw] provenance
|
611
645
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::Provenance]
|
612
646
|
# Optional. The history of this annotation.
|
613
647
|
# @!attribute [rw] redacted
|
614
648
|
# @return [::Boolean]
|
615
|
-
# Optional. Whether the entity will be redacted for de-identification
|
649
|
+
# Optional. Whether the entity will be redacted for de-identification
|
650
|
+
# purposes.
|
616
651
|
class Entity
|
617
652
|
include ::Google::Protobuf::MessageExts
|
618
653
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -649,8 +684,8 @@ module Google
|
|
649
684
|
# Optional. An optional field to store a normalized string.
|
650
685
|
# For some entity types, one of respective `structured_value` fields may
|
651
686
|
# also be populated. Also not all the types of `structured_value` will be
|
652
|
-
# normalized. For example, some processors may not generate float
|
653
|
-
# or
|
687
|
+
# normalized. For example, some processors may not generate `float`
|
688
|
+
# or `integer` normalized text by default.
|
654
689
|
#
|
655
690
|
# Below are sample formats mapped to structured values.
|
656
691
|
#
|
@@ -663,7 +698,8 @@ module Google
|
|
663
698
|
end
|
664
699
|
end
|
665
700
|
|
666
|
-
# Relationship between
|
701
|
+
# Relationship between
|
702
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Entity Entities}.
|
667
703
|
# @!attribute [rw] subject_id
|
668
704
|
# @return [::String]
|
669
705
|
# Subject entity id.
|
@@ -678,10 +714,12 @@ module Google
|
|
678
714
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
679
715
|
end
|
680
716
|
|
681
|
-
# Text reference indexing into the
|
717
|
+
# Text reference indexing into the
|
718
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
682
719
|
# @!attribute [rw] text_segments
|
683
720
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor::TextSegment>]
|
684
|
-
# The text segments from the
|
721
|
+
# The text segments from the
|
722
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
685
723
|
# @!attribute [rw] content
|
686
724
|
# @return [::String]
|
687
725
|
# Contains the content of the text span so that users do
|
@@ -691,15 +729,20 @@ module Google
|
|
691
729
|
include ::Google::Protobuf::MessageExts
|
692
730
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
693
731
|
|
694
|
-
# A text segment in the
|
695
|
-
#
|
696
|
-
#
|
732
|
+
# A text segment in the
|
733
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}. The
|
734
|
+
# indices may be out of bounds which indicate that the text extends into
|
735
|
+
# another document shard for large sharded documents. See
|
736
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::ShardInfo#text_offset ShardInfo.text_offset}
|
697
737
|
# @!attribute [rw] start_index
|
698
738
|
# @return [::Integer]
|
699
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor::TextSegment TextSegment}
|
739
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor::TextSegment TextSegment}
|
740
|
+
# start UTF-8 char index in the
|
741
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
700
742
|
# @!attribute [rw] end_index
|
701
743
|
# @return [::Integer]
|
702
|
-
# {::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor::TextSegment TextSegment}
|
744
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor::TextSegment TextSegment}
|
745
|
+
# half open end UTF-8 char index in the
|
703
746
|
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}.
|
704
747
|
class TextSegment
|
705
748
|
include ::Google::Protobuf::MessageExts
|
@@ -707,8 +750,9 @@ module Google
|
|
707
750
|
end
|
708
751
|
end
|
709
752
|
|
710
|
-
# Referencing the visual context of the entity in the
|
711
|
-
#
|
753
|
+
# Referencing the visual context of the entity in the
|
754
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#pages Document.pages}. Page
|
755
|
+
# anchors can be cross-page, consist of multiple bounding polygons and
|
712
756
|
# optionally reference specific layout element types.
|
713
757
|
# @!attribute [rw] page_refs
|
714
758
|
# @return [::Array<::Google::Cloud::DocumentAI::V1beta3::Document::PageAnchor::PageRef>]
|
@@ -720,22 +764,29 @@ module Google
|
|
720
764
|
# Represents a weak reference to a page element within a document.
|
721
765
|
# @!attribute [rw] page
|
722
766
|
# @return [::Integer]
|
723
|
-
# Required. Index into the
|
724
|
-
#
|
725
|
-
#
|
767
|
+
# Required. Index into the
|
768
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#pages Document.pages}
|
769
|
+
# element, for example using
|
770
|
+
# `[Document.pages][page_refs.page]` to locate the related page element.
|
771
|
+
# This field is skipped when its value is the default `0`. See
|
726
772
|
# https://developers.google.com/protocol-buffers/docs/proto3#json.
|
727
773
|
# @!attribute [rw] layout_type
|
728
774
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::PageAnchor::PageRef::LayoutType]
|
729
|
-
# Optional. The type of the layout element that is being referenced if
|
775
|
+
# Optional. The type of the layout element that is being referenced if
|
776
|
+
# any.
|
730
777
|
# @!attribute [rw] layout_id
|
731
778
|
# @return [::String]
|
732
|
-
# Optional. Deprecated. Use
|
779
|
+
# Optional. Deprecated. Use
|
780
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::PageAnchor::PageRef#bounding_poly PageRef.bounding_poly}
|
781
|
+
# instead.
|
733
782
|
# @!attribute [rw] bounding_poly
|
734
783
|
# @return [::Google::Cloud::DocumentAI::V1beta3::BoundingPoly]
|
735
|
-
# Optional. Identifies the bounding polygon of a layout element on the
|
784
|
+
# Optional. Identifies the bounding polygon of a layout element on the
|
785
|
+
# page.
|
736
786
|
# @!attribute [rw] confidence
|
737
787
|
# @return [::Float]
|
738
|
-
# Optional. Confidence of detected page element, if applicable. Range
|
788
|
+
# Optional. Confidence of detected page element, if applicable. Range
|
789
|
+
# `[0, 1]`.
|
739
790
|
class PageRef
|
740
791
|
include ::Google::Protobuf::MessageExts
|
741
792
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -745,25 +796,39 @@ module Google
|
|
745
796
|
# Layout Unspecified.
|
746
797
|
LAYOUT_TYPE_UNSPECIFIED = 0
|
747
798
|
|
748
|
-
# References a
|
799
|
+
# References a
|
800
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#blocks Page.blocks}
|
801
|
+
# element.
|
749
802
|
BLOCK = 1
|
750
803
|
|
751
|
-
# References a
|
804
|
+
# References a
|
805
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#paragraphs Page.paragraphs}
|
806
|
+
# element.
|
752
807
|
PARAGRAPH = 2
|
753
808
|
|
754
|
-
# References a
|
809
|
+
# References a
|
810
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#lines Page.lines}
|
811
|
+
# element.
|
755
812
|
LINE = 3
|
756
813
|
|
757
|
-
# References a
|
814
|
+
# References a
|
815
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#tokens Page.tokens}
|
816
|
+
# element.
|
758
817
|
TOKEN = 4
|
759
818
|
|
760
|
-
# References a
|
819
|
+
# References a
|
820
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#visual_elements Page.visual_elements}
|
821
|
+
# element.
|
761
822
|
VISUAL_ELEMENT = 5
|
762
823
|
|
763
|
-
# Refrrences a
|
824
|
+
# Refrrences a
|
825
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#tables Page.tables}
|
826
|
+
# element.
|
764
827
|
TABLE = 6
|
765
828
|
|
766
|
-
# References a
|
829
|
+
# References a
|
830
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document::Page#form_fields Page.form_fields}
|
831
|
+
# element.
|
767
832
|
FORM_FIELD = 7
|
768
833
|
end
|
769
834
|
end
|
@@ -882,10 +947,11 @@ module Google
|
|
882
947
|
# @!attribute [rw] text_anchor
|
883
948
|
# @return [::Google::Cloud::DocumentAI::V1beta3::Document::TextAnchor]
|
884
949
|
# Provenance of the correction.
|
885
|
-
# Text anchor indexing into the
|
886
|
-
#
|
887
|
-
#
|
888
|
-
#
|
950
|
+
# Text anchor indexing into the
|
951
|
+
# {::Google::Cloud::DocumentAI::V1beta3::Document#text Document.text}. There
|
952
|
+
# can only be a single `TextAnchor.text_segments` element. If the start
|
953
|
+
# and end index of the text segment are the same, the text change is
|
954
|
+
# inserted before that index.
|
889
955
|
# @!attribute [rw] changed_text
|
890
956
|
# @return [::String]
|
891
957
|
# The text that replaces the text identified in the `text_anchor`.
|
@@ -114,6 +114,16 @@ module Google
|
|
114
114
|
end
|
115
115
|
end
|
116
116
|
end
|
117
|
+
|
118
|
+
# Config for Document OCR.
|
119
|
+
# @!attribute [rw] enable_native_pdf_parsing
|
120
|
+
# @return [::Boolean]
|
121
|
+
# Enables special handling for PDFs with existing text information. Results
|
122
|
+
# in better text extraction quality in such PDF inputs.
|
123
|
+
class OcrConfig
|
124
|
+
include ::Google::Protobuf::MessageExts
|
125
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
126
|
+
end
|
117
127
|
end
|
118
128
|
end
|
119
129
|
end
|