google-cloud-document_ai-v1 0.12.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/google/cloud/document_ai/v1/document_processor_service/client.rb +66 -40
- data/lib/google/cloud/document_ai/v1/document_processor_service/operations.rb +10 -1
- data/lib/google/cloud/document_ai/v1/document_processor_service/rest/client.rb +501 -39
- data/lib/google/cloud/document_ai/v1/document_processor_service/rest/operations.rb +75 -0
- data/lib/google/cloud/document_ai/v1/document_processor_service/rest.rb +1 -1
- data/lib/google/cloud/document_ai/v1/document_processor_service.rb +1 -1
- data/lib/google/cloud/document_ai/v1/version.rb +1 -1
- data/lib/google/cloud/documentai/v1/document_io_pb.rb +3 -1
- data/lib/google/cloud/documentai/v1/document_pb.rb +2 -1
- data/lib/google/cloud/documentai/v1/document_processor_service_pb.rb +5 -2
- data/lib/google/cloud/documentai/v1/document_processor_service_services_pb.rb +8 -6
- data/lib/google/cloud/documentai/v1/processor_pb.rb +1 -1
- data/proto_docs/google/api/field_behavior.rb +14 -0
- data/proto_docs/google/cloud/documentai/v1/document.rb +88 -27
- data/proto_docs/google/cloud/documentai/v1/document_io.rb +47 -0
- data/proto_docs/google/cloud/documentai/v1/document_processor_service.rb +230 -98
- data/proto_docs/google/cloud/documentai/v1/document_schema.rb +6 -7
- data/proto_docs/google/cloud/documentai/v1/processor.rb +12 -10
- data/proto_docs/google/cloud/documentai/v1/processor_type.rb +3 -2
- metadata +5 -5
@@ -28,10 +28,9 @@ module Google
|
|
28
28
|
# @!attribute [rw] uri
|
29
29
|
# @return [::String]
|
30
30
|
# Optional. Currently supports Google Cloud Storage URI of the form
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
# info.
|
31
|
+
# `gs://bucket_name/object_name`. Object versioning is not supported.
|
32
|
+
# For more information, refer to [Google Cloud Storage Request
|
33
|
+
# URIs](https://cloud.google.com/storage/docs/reference-uris).
|
35
34
|
# @!attribute [rw] content
|
36
35
|
# @return [::String]
|
37
36
|
# Optional. Inline document content, represented as a stream of bytes.
|
@@ -39,9 +38,8 @@ module Google
|
|
39
38
|
# representation, whereas JSON representations use base64.
|
40
39
|
# @!attribute [rw] mime_type
|
41
40
|
# @return [::String]
|
42
|
-
# An IANA published
|
43
|
-
#
|
44
|
-
# https://www.iana.org/assignments/media-types/media-types.xhtml.
|
41
|
+
# An IANA published [media type (MIME
|
42
|
+
# type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
|
45
43
|
# @!attribute [rw] text
|
46
44
|
# @return [::String]
|
47
45
|
# Optional. UTF-8 encoded text in reading order from the document.
|
@@ -113,17 +111,18 @@ module Google
|
|
113
111
|
# Text background color.
|
114
112
|
# @!attribute [rw] font_weight
|
115
113
|
# @return [::String]
|
116
|
-
# Font weight.
|
117
|
-
#
|
114
|
+
# [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp).
|
115
|
+
# Possible values are `normal`, `bold`, `bolder`, and `lighter`.
|
118
116
|
# @!attribute [rw] text_style
|
119
117
|
# @return [::String]
|
120
|
-
# Text style.
|
121
|
-
#
|
118
|
+
# [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp).
|
119
|
+
# Possible values are `normal`, `italic`, and `oblique`.
|
122
120
|
# @!attribute [rw] text_decoration
|
123
121
|
# @return [::String]
|
124
|
-
# Text
|
125
|
-
#
|
126
|
-
#
|
122
|
+
# [Text
|
123
|
+
# decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp).
|
124
|
+
# Follows CSS standard. <text-decoration-line> <text-decoration-color>
|
125
|
+
# <text-decoration-style>
|
127
126
|
# @!attribute [rw] font_size
|
128
127
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Style::FontSize]
|
129
128
|
# Font size.
|
@@ -141,7 +140,8 @@ module Google
|
|
141
140
|
# Font size for the text.
|
142
141
|
# @!attribute [rw] unit
|
143
142
|
# @return [::String]
|
144
|
-
# Unit for the font size. Follows CSS naming (in
|
143
|
+
# Unit for the font size. Follows CSS naming (such as `in`, `px`, and
|
144
|
+
# `pt`).
|
145
145
|
class FontSize
|
146
146
|
include ::Google::Protobuf::MessageExts
|
147
147
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -208,7 +208,7 @@ module Google
|
|
208
208
|
# A list of detected barcodes.
|
209
209
|
# @!attribute [rw] image_quality_scores
|
210
210
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::ImageQualityScores]
|
211
|
-
# Image
|
211
|
+
# Image quality scores.
|
212
212
|
# @!attribute [rw] provenance
|
213
213
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Provenance]
|
214
214
|
# The history of this page.
|
@@ -237,7 +237,9 @@ module Google
|
|
237
237
|
# Raw byte content of the image.
|
238
238
|
# @!attribute [rw] mime_type
|
239
239
|
# @return [::String]
|
240
|
-
# Encoding
|
240
|
+
# Encoding [media type (MIME
|
241
|
+
# type)](https://www.iana.org/assignments/media-types/media-types.xhtml)
|
242
|
+
# for the image.
|
241
243
|
# @!attribute [rw] width
|
242
244
|
# @return [::Integer]
|
243
245
|
# Width of the image in pixels.
|
@@ -382,6 +384,9 @@ module Google
|
|
382
384
|
# @!attribute [rw] provenance
|
383
385
|
# @return [::Google::Cloud::DocumentAI::V1::Document::Provenance]
|
384
386
|
# The history of this annotation.
|
387
|
+
# @!attribute [rw] style_info
|
388
|
+
# @return [::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo]
|
389
|
+
# Text style attributes.
|
385
390
|
class Token
|
386
391
|
include ::Google::Protobuf::MessageExts
|
387
392
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -410,6 +415,62 @@ module Google
|
|
410
415
|
HYPHEN = 3
|
411
416
|
end
|
412
417
|
end
|
418
|
+
|
419
|
+
# Font and other text style attributes.
|
420
|
+
# @!attribute [rw] font_size
|
421
|
+
# @return [::Integer]
|
422
|
+
# Font size in points (`1` point is `¹⁄₇₂` inches).
|
423
|
+
# @!attribute [rw] pixel_font_size
|
424
|
+
# @return [::Float]
|
425
|
+
# Font size in pixels, equal to _unrounded
|
426
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_size font_size}_
|
427
|
+
# * _resolution_ ÷ `72.0`.
|
428
|
+
# @!attribute [rw] letter_spacing
|
429
|
+
# @return [::Float]
|
430
|
+
# Letter spacing in points.
|
431
|
+
# @!attribute [rw] font_type
|
432
|
+
# @return [::String]
|
433
|
+
# Name or style of the font.
|
434
|
+
# @!attribute [rw] bold
|
435
|
+
# @return [::Boolean]
|
436
|
+
# Whether the text is bold (equivalent to
|
437
|
+
# {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_weight font_weight}
|
438
|
+
# is at least `700`).
|
439
|
+
# @!attribute [rw] italic
|
440
|
+
# @return [::Boolean]
|
441
|
+
# Whether the text is italic.
|
442
|
+
# @!attribute [rw] underlined
|
443
|
+
# @return [::Boolean]
|
444
|
+
# Whether the text is underlined.
|
445
|
+
# @!attribute [rw] strikeout
|
446
|
+
# @return [::Boolean]
|
447
|
+
# Whether the text is strikethrough.
|
448
|
+
# @!attribute [rw] subscript
|
449
|
+
# @return [::Boolean]
|
450
|
+
# Whether the text is a subscript.
|
451
|
+
# @!attribute [rw] superscript
|
452
|
+
# @return [::Boolean]
|
453
|
+
# Whether the text is a superscript.
|
454
|
+
# @!attribute [rw] smallcaps
|
455
|
+
# @return [::Boolean]
|
456
|
+
# Whether the text is in small caps.
|
457
|
+
# @!attribute [rw] font_weight
|
458
|
+
# @return [::Integer]
|
459
|
+
# TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
|
460
|
+
# Normal is `400`, bold is `700`.
|
461
|
+
# @!attribute [rw] handwritten
|
462
|
+
# @return [::Boolean]
|
463
|
+
# Whether the text is handwritten.
|
464
|
+
# @!attribute [rw] text_color
|
465
|
+
# @return [::Google::Type::Color]
|
466
|
+
# Color of the text.
|
467
|
+
# @!attribute [rw] background_color
|
468
|
+
# @return [::Google::Type::Color]
|
469
|
+
# Color of the background.
|
470
|
+
class StyleInfo
|
471
|
+
include ::Google::Protobuf::MessageExts
|
472
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
473
|
+
end
|
413
474
|
end
|
414
475
|
|
415
476
|
# A detected symbol.
|
@@ -552,9 +613,9 @@ module Google
|
|
552
613
|
# Detected language for a structural component.
|
553
614
|
# @!attribute [rw] language_code
|
554
615
|
# @return [::String]
|
555
|
-
# The BCP-47 language
|
556
|
-
#
|
557
|
-
#
|
616
|
+
# The [BCP-47 language
|
617
|
+
# code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier),
|
618
|
+
# such as `en-US` or `sr-Latn`.
|
558
619
|
# @!attribute [rw] confidence
|
559
620
|
# @return [::Float]
|
560
621
|
# Confidence of detected language. Range `[0, 1]`.
|
@@ -563,10 +624,10 @@ module Google
|
|
563
624
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
564
625
|
end
|
565
626
|
|
566
|
-
# Image
|
627
|
+
# Image quality scores for the page image.
|
567
628
|
# @!attribute [rw] quality_score
|
568
629
|
# @return [::Float]
|
569
|
-
# The overall quality score. Range `[0, 1]` where 1 is perfect quality.
|
630
|
+
# The overall quality score. Range `[0, 1]` where `1` is perfect quality.
|
570
631
|
# @!attribute [rw] detected_defects
|
571
632
|
# @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::ImageQualityScores::DetectedDefect>]
|
572
633
|
# A list of detected defects.
|
@@ -589,8 +650,8 @@ module Google
|
|
589
650
|
# - `quality/defect_glare`
|
590
651
|
# @!attribute [rw] confidence
|
591
652
|
# @return [::Float]
|
592
|
-
# Confidence of detected defect. Range `[0, 1]` where 1 indicates
|
593
|
-
# strong confidence
|
653
|
+
# Confidence of detected defect. Range `[0, 1]` where `1` indicates
|
654
|
+
# strong confidence that the defect exists.
|
594
655
|
class DetectedDefect
|
595
656
|
include ::Google::Protobuf::MessageExts
|
596
657
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -877,9 +938,9 @@ module Google
|
|
877
938
|
REMOVE = 2
|
878
939
|
|
879
940
|
# Updates any fields within the given provenance scope of the message. It
|
880
|
-
#
|
881
|
-
#
|
882
|
-
#
|
941
|
+
# overwrites the fields rather than replacing them. Use this when you
|
942
|
+
# want to update a field value of an entity without also updating all the
|
943
|
+
# child properties.
|
883
944
|
UPDATE = 7
|
884
945
|
|
885
946
|
# Currently unused. Replace an element identified by `parent`.
|
@@ -114,6 +114,53 @@ module Google
|
|
114
114
|
end
|
115
115
|
end
|
116
116
|
end
|
117
|
+
|
118
|
+
# Config for Document OCR.
|
119
|
+
# @!attribute [rw] hints
|
120
|
+
# @return [::Google::Cloud::DocumentAI::V1::OcrConfig::Hints]
|
121
|
+
# Hints for the OCR model.
|
122
|
+
# @!attribute [rw] enable_native_pdf_parsing
|
123
|
+
# @return [::Boolean]
|
124
|
+
# Enables special handling for PDFs with existing text information. Results
|
125
|
+
# in better text extraction quality in such PDF inputs.
|
126
|
+
# @!attribute [rw] enable_image_quality_scores
|
127
|
+
# @return [::Boolean]
|
128
|
+
# Enables intelligent document quality scores after OCR. Can help with
|
129
|
+
# diagnosing why OCR responses are of poor quality for a given input.
|
130
|
+
# Adds additional latency comparable to regular OCR to the process call.
|
131
|
+
# @!attribute [rw] advanced_ocr_options
|
132
|
+
# @return [::Array<::String>]
|
133
|
+
# A list of advanced OCR options to further fine-tune OCR behavior. Current
|
134
|
+
# valid values are:
|
135
|
+
#
|
136
|
+
# - `legacy_layout`: a heuristics layout detection algorithm, which serves as
|
137
|
+
# an alternative to the current ML-based layout detection algorithm.
|
138
|
+
# Customers can choose the best suitable layout algorithm based on their
|
139
|
+
# situation.
|
140
|
+
# @!attribute [rw] enable_symbol
|
141
|
+
# @return [::Boolean]
|
142
|
+
# Includes symbol level OCR information if set to true.
|
143
|
+
# @!attribute [rw] compute_style_info
|
144
|
+
# @return [::Boolean]
|
145
|
+
# Turn on font id model and returns font style information.
|
146
|
+
class OcrConfig
|
147
|
+
include ::Google::Protobuf::MessageExts
|
148
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
149
|
+
|
150
|
+
# Hints for OCR Engine
|
151
|
+
# @!attribute [rw] language_hints
|
152
|
+
# @return [::Array<::String>]
|
153
|
+
# List of BCP-47 language codes to use for OCR. In most cases, not
|
154
|
+
# specifying it yields the best results since it enables automatic language
|
155
|
+
# detection. For languages based on the Latin alphabet, setting hints is
|
156
|
+
# not needed. In rare cases, when the language of the text in the
|
157
|
+
# image is known, setting a hint will help get better results (although it
|
158
|
+
# will be a significant hindrance if the hint is wrong).
|
159
|
+
class Hints
|
160
|
+
include ::Google::Protobuf::MessageExts
|
161
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
162
|
+
end
|
163
|
+
end
|
117
164
|
end
|
118
165
|
end
|
119
166
|
end
|