google-cloud-document_ai-v1 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/google/cloud/document_ai/v1/document_processor_service/client.rb +66 -40
- data/lib/google/cloud/document_ai/v1/document_processor_service/operations.rb +10 -1
- data/lib/google/cloud/document_ai/v1/document_processor_service/rest/client.rb +501 -39
- data/lib/google/cloud/document_ai/v1/document_processor_service/rest/operations.rb +75 -0
- data/lib/google/cloud/document_ai/v1/document_processor_service/rest.rb +1 -1
- data/lib/google/cloud/document_ai/v1/document_processor_service.rb +1 -1
- data/lib/google/cloud/document_ai/v1/version.rb +1 -1
- data/lib/google/cloud/documentai/v1/document_io_pb.rb +3 -1
- data/lib/google/cloud/documentai/v1/document_pb.rb +2 -1
- data/lib/google/cloud/documentai/v1/document_processor_service_pb.rb +5 -2
- data/lib/google/cloud/documentai/v1/document_processor_service_services_pb.rb +8 -6
- data/lib/google/cloud/documentai/v1/processor_pb.rb +1 -1
- data/proto_docs/google/api/field_behavior.rb +14 -0
- data/proto_docs/google/cloud/documentai/v1/document.rb +88 -27
- data/proto_docs/google/cloud/documentai/v1/document_io.rb +47 -0
- data/proto_docs/google/cloud/documentai/v1/document_processor_service.rb +230 -98
- data/proto_docs/google/cloud/documentai/v1/document_schema.rb +6 -7
- data/proto_docs/google/cloud/documentai/v1/processor.rb +12 -10
- data/proto_docs/google/cloud/documentai/v1/processor_type.rb +3 -2
- metadata +5 -5
@@ -28,10 +28,9 @@ module Google
|
|
28
28
|
# @!attribute [rw] uri
|
29
29
|
# @return [::String]
|
30
30
|
# Optional. Currently supports Google Cloud Storage URI of the form
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
# info.
|
31
|
+
# `gs://bucket_name/object_name`. Object versioning is not supported.
|
32
|
+
# For more information, refer to [Google Cloud Storage Request
|
33
|
+
# URIs](https://cloud.google.com/storage/docs/reference-uris).
|
35
34
|
# @!attribute [rw] content
|
36
35
|
# @return [::String]
|
37
36
|
# Optional. Inline document content, represented as a stream of bytes.
|
@@ -39,9 +38,8 @@ module Google
|
|
39
38
|
# representation, whereas JSON representations use base64.
|
40
39
|
# @!attribute [rw] mime_type
|
41
40
|
# @return [::String]
|
42
|
-
# An IANA published
|
43
|
-
#
|
44
|
-
# https://www.iana.org/assignments/media-types/media-types.xhtml.
|
41
|
+
# An IANA published [media type (MIME
|
42
|
+
# type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
|
45
43
|
# @!attribute [rw] text
|
46
44
|
# @return [::String]
|
47
45
|
# Optional. UTF-8 encoded text in reading order from the document.
|
@@ -113,17 +111,18 @@ module Google
|
|
113
111
|
# Text background color.
|
114
112
|
# @!attribute [rw] font_weight
|
115
113
|
# @return [::String]
|
116
|
-
# Font weight.
|
117
|
-
#
|
114
|
+
# [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp).
|
115
|
+
# Possible values are `normal`, `bold`, `bolder`, and `lighter`.
|
118
116
|
# @!attribute [rw] text_style
|
119
117
|
# @return [::String]
|
120
|
-
# Text style.
|
121
|
-
#
|
118
|
+
# [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp).
|
119
|
+
# Possible values are `normal`, `italic`, and `oblique`.
|
122
120
|
# @!attribute [rw] text_decoration
|
123
121
|
# @return [::String]
|
124
|
-
# Text
|
125
|
-
#
|
126
|
-
#
|
122
|
+
# [Text
|
123
|
+
# decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp).
|
124
|
+
# Follows CSS standard. <text-decoration-line> <text-decoration-color>
|
125
|
+
# <text-decoration-style>
|
127
126
|
# @!attribute [rw] font_size
|
128
127
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Style::FontSize]
|
129
128
|
# Font size.
|
@@ -141,7 +140,8 @@ module Google
|
|
141
140
|
# Font size for the text.
|
142
141
|
# @!attribute [rw] unit
|
143
142
|
# @return [::String]
|
144
|
-
# Unit for the font size. Follows CSS naming (in
|
143
|
+
# Unit for the font size. Follows CSS naming (such as `in`, `px`, and
|
144
|
+
# `pt`).
|
145
145
|
class FontSize
|
146
146
|
include ::Google::Protobuf::MessageExts
|
147
147
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -208,7 +208,7 @@ module Google
|
|
208
208
|
# A list of detected barcodes.
|
209
209
|
# @!attribute [rw] image_quality_scores
|
210
210
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::ImageQualityScores]
|
211
|
-
# Image
|
211
|
+
# Image quality scores.
|
212
212
|
# @!attribute [rw] provenance
|
213
213
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Provenance]
|
214
214
|
# The history of this page.
|
@@ -237,7 +237,9 @@ module Google
|
|
237
237
|
# Raw byte content of the image.
|
238
238
|
# @!attribute [rw] mime_type
|
239
239
|
# @return [::String]
|
240
|
-
# Encoding
|
240
|
+
# Encoding [media type (MIME
|
241
|
+
# type)](https://www.iana.org/assignments/media-types/media-types.xhtml)
|
242
|
+
# for the image.
|
241
243
|
# @!attribute [rw] width
|
242
244
|
# @return [::Integer]
|
243
245
|
# Width of the image in pixels.
|
@@ -382,6 +384,9 @@ module Google
|
|
382
384
|
# @!attribute [rw] provenance
|
383
385
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Provenance]
|
384
386
|
# The history of this annotation.
|
387
|
+
# @!attribute [rw] style_info
|
388
|
+
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo]
|
389
|
+
# Text style attributes.
|
385
390
|
class Token
|
386
391
|
include ::Google::Protobuf::MessageExts
|
387
392
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -410,6 +415,62 @@ module Google
|
|
410
415
|
HYPHEN = 3
|
411
416
|
end
|
412
417
|
end
|
418
|
+
|
419
|
+
# Font and other text style attributes.
|
420
|
+
# @!attribute [rw] font_size
|
421
|
+
# @return [::Integer]
|
422
|
+
# Font size in points (`1` point is `¹⁄₇₂` inches).
|
423
|
+
# @!attribute [rw] pixel_font_size
|
424
|
+
# @return [::Float]
|
425
|
+
# Font size in pixels, equal to _unrounded
|
426
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_size font_size}_
|
427
|
+
# * _resolution_ ÷ `72.0`.
|
428
|
+
# @!attribute [rw] letter_spacing
|
429
|
+
# @return [::Float]
|
430
|
+
# Letter spacing in points.
|
431
|
+
# @!attribute [rw] font_type
|
432
|
+
# @return [::String]
|
433
|
+
# Name or style of the font.
|
434
|
+
# @!attribute [rw] bold
|
435
|
+
# @return [::Boolean]
|
436
|
+
# Whether the text is bold (equivalent to
|
437
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_weight font_weight}
|
438
|
+
# is at least `700`).
|
439
|
+
# @!attribute [rw] italic
|
440
|
+
# @return [::Boolean]
|
441
|
+
# Whether the text is italic.
|
442
|
+
# @!attribute [rw] underlined
|
443
|
+
# @return [::Boolean]
|
444
|
+
# Whether the text is underlined.
|
445
|
+
# @!attribute [rw] strikeout
|
446
|
+
# @return [::Boolean]
|
447
|
+
# Whether the text is strikethrough.
|
448
|
+
# @!attribute [rw] subscript
|
449
|
+
# @return [::Boolean]
|
450
|
+
# Whether the text is a subscript.
|
451
|
+
# @!attribute [rw] superscript
|
452
|
+
# @return [::Boolean]
|
453
|
+
# Whether the text is a superscript.
|
454
|
+
# @!attribute [rw] smallcaps
|
455
|
+
# @return [::Boolean]
|
456
|
+
# Whether the text is in small caps.
|
457
|
+
# @!attribute [rw] font_weight
|
458
|
+
# @return [::Integer]
|
459
|
+
# TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
|
460
|
+
# Normal is `400`, bold is `700`.
|
461
|
+
# @!attribute [rw] handwritten
|
462
|
+
# @return [::Boolean]
|
463
|
+
# Whether the text is handwritten.
|
464
|
+
# @!attribute [rw] text_color
|
465
|
+
# @return [::Google::Type::Color]
|
466
|
+
# Color of the text.
|
467
|
+
# @!attribute [rw] background_color
|
468
|
+
# @return [::Google::Type::Color]
|
469
|
+
# Color of the background.
|
470
|
+
class StyleInfo
|
471
|
+
include ::Google::Protobuf::MessageExts
|
472
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
473
|
+
end
|
413
474
|
end
|
414
475
|
|
415
476
|
# A detected symbol.
|
@@ -552,9 +613,9 @@ module Google
|
|
552
613
|
# Detected language for a structural component.
|
553
614
|
# @!attribute [rw] language_code
|
554
615
|
# @return [::String]
|
555
|
-
# The BCP-47 language
|
556
|
-
#
|
557
|
-
#
|
616
|
+
# The [BCP-47 language
|
617
|
+
# code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier),
|
618
|
+
# such as `en-US` or `sr-Latn`.
|
558
619
|
# @!attribute [rw] confidence
|
559
620
|
# @return [::Float]
|
560
621
|
# Confidence of detected language. Range `[0, 1]`.
|
@@ -563,10 +624,10 @@ module Google
|
|
563
624
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
564
625
|
end
|
565
626
|
|
566
|
-
# Image
|
627
|
+
# Image quality scores for the page image.
|
567
628
|
# @!attribute [rw] quality_score
|
568
629
|
# @return [::Float]
|
569
|
-
# The overall quality score. Range `[0, 1]` where 1 is perfect quality.
|
630
|
+
# The overall quality score. Range `[0, 1]` where `1` is perfect quality.
|
570
631
|
# @!attribute [rw] detected_defects
|
571
632
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::ImageQualityScores::DetectedDefect>]
|
572
633
|
# A list of detected defects.
|
@@ -589,8 +650,8 @@ module Google
|
|
589
650
|
# - `quality/defect_glare`
|
590
651
|
# @!attribute [rw] confidence
|
591
652
|
# @return [::Float]
|
592
|
-
# Confidence of detected defect. Range `[0, 1]` where 1 indicates
|
593
|
-
# strong confidence
|
653
|
+
# Confidence of detected defect. Range `[0, 1]` where `1` indicates
|
654
|
+
# strong confidence that the defect exists.
|
594
655
|
class DetectedDefect
|
595
656
|
include ::Google::Protobuf::MessageExts
|
596
657
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -877,9 +938,9 @@ module Google
|
|
877
938
|
REMOVE = 2
|
878
939
|
|
879
940
|
# Updates any fields within the given provenance scope of the message. It
|
880
|
-
#
|
881
|
-
#
|
882
|
-
#
|
941
|
+
# overwrites the fields rather than replacing them. Use this when you
|
942
|
+
# want to update a field value of an entity without also updating all the
|
943
|
+
# child properties.
|
883
944
|
UPDATE = 7
|
884
945
|
|
885
946
|
# Currently unused. Replace an element identified by `parent`.
|
@@ -114,6 +114,53 @@ module Google
|
|
114
114
|
end
|
115
115
|
end
|
116
116
|
end
|
117
|
+
|
118
|
+
# Config for Document OCR.
|
119
|
+
# @!attribute [rw] hints
|
120
|
+
# @return [::Google::Cloud::DocumentAI::V1::OcrConfig::Hints]
|
121
|
+
# Hints for the OCR model.
|
122
|
+
# @!attribute [rw] enable_native_pdf_parsing
|
123
|
+
# @return [::Boolean]
|
124
|
+
# Enables special handling for PDFs with existing text information. Results
|
125
|
+
# in better text extraction quality in such PDF inputs.
|
126
|
+
# @!attribute [rw] enable_image_quality_scores
|
127
|
+
# @return [::Boolean]
|
128
|
+
# Enables intelligent document quality scores after OCR. Can help with
|
129
|
+
# diagnosing why OCR responses are of poor quality for a given input.
|
130
|
+
# Adds additional latency comparable to regular OCR to the process call.
|
131
|
+
# @!attribute [rw] advanced_ocr_options
|
132
|
+
# @return [::Array<::String>]
|
133
|
+
# A list of advanced OCR options to further fine-tune OCR behavior. Current
|
134
|
+
# valid values are:
|
135
|
+
#
|
136
|
+
# - `legacy_layout`: a heuristics layout detection algorithm, which serves as
|
137
|
+
# an alternative to the current ML-based layout detection algorithm.
|
138
|
+
# Customers can choose the best suitable layout algorithm based on their
|
139
|
+
# situation.
|
140
|
+
# @!attribute [rw] enable_symbol
|
141
|
+
# @return [::Boolean]
|
142
|
+
# Includes symbol level OCR information if set to true.
|
143
|
+
# @!attribute [rw] compute_style_info
|
144
|
+
# @return [::Boolean]
|
145
|
+
# Turn on font id model and returns font style information.
|
146
|
+
class OcrConfig
|
147
|
+
include ::Google::Protobuf::MessageExts
|
148
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
149
|
+
|
150
|
+
# Hints for OCR Engine
|
151
|
+
# @!attribute [rw] language_hints
|
152
|
+
# @return [::Array<::String>]
|
153
|
+
# List of BCP-47 language codes to use for OCR. In most cases, not
|
154
|
+
# specifying it yields the best results since it enables automatic language
|
155
|
+
# detection. For languages based on the Latin alphabet, setting hints is
|
156
|
+
# not needed. In rare cases, when the language of the text in the
|
157
|
+
# image is known, setting a hint will help get better results (although it
|
158
|
+
# will be a significant hindrance if the hint is wrong).
|
159
|
+
class Hints
|
160
|
+
include ::Google::Protobuf::MessageExts
|
161
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
162
|
+
end
|
163
|
+
end
|
117
164
|
end
|
118
165
|
end
|
119
166
|
end
|