google-cloud-document_ai-v1 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,10 +28,9 @@ module Google
28
28
  # @!attribute [rw] uri
29
29
  # @return [::String]
30
30
  # Optional. Currently supports Google Cloud Storage URI of the form
31
- # `gs://bucket_name/object_name`. Object versioning is not supported.
32
- # See [Google Cloud Storage Request
33
- # URIs](https://cloud.google.com/storage/docs/reference-uris) for more
34
- # info.
31
+ # `gs://bucket_name/object_name`. Object versioning is not supported.
32
+ # For more information, refer to [Google Cloud Storage Request
33
+ # URIs](https://cloud.google.com/storage/docs/reference-uris).
35
34
  # @!attribute [rw] content
36
35
  # @return [::String]
37
36
  # Optional. Inline document content, represented as a stream of bytes.
@@ -39,9 +38,8 @@ module Google
39
38
  # representation, whereas JSON representations use base64.
40
39
  # @!attribute [rw] mime_type
41
40
  # @return [::String]
42
- # An IANA published MIME type (also referred to as media type). For more
43
- # information, see
44
- # https://www.iana.org/assignments/media-types/media-types.xhtml.
41
+ # An IANA published [media type (MIME
42
+ # type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
45
43
  # @!attribute [rw] text
46
44
  # @return [::String]
47
45
  # Optional. UTF-8 encoded text in reading order from the document.
@@ -113,17 +111,18 @@ module Google
113
111
  # Text background color.
114
112
  # @!attribute [rw] font_weight
115
113
  # @return [::String]
116
- # Font weight. Possible values are normal, bold, bolder, and lighter.
117
- # https://www.w3schools.com/cssref/pr_font_weight.asp
114
+ # [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp).
115
+ # Possible values are `normal`, `bold`, `bolder`, and `lighter`.
118
116
  # @!attribute [rw] text_style
119
117
  # @return [::String]
120
- # Text style. Possible values are normal, italic, and oblique.
121
- # https://www.w3schools.com/cssref/pr_font_font-style.asp
118
+ # [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp).
119
+ # Possible values are `normal`, `italic`, and `oblique`.
122
120
  # @!attribute [rw] text_decoration
123
121
  # @return [::String]
124
- # Text decoration. Follows CSS standard.
125
- # <text-decoration-line> <text-decoration-color> <text-decoration-style>
126
- # https://www.w3schools.com/cssref/pr_text_text-decoration.asp
122
+ # [Text
123
+ # decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp).
124
+ # Follows CSS standard. <text-decoration-line> <text-decoration-color>
125
+ # <text-decoration-style>
127
126
  # @!attribute [rw] font_size
128
127
  # @return [::Google::Cloud::DocumentAI::V1::Document::Style::FontSize]
129
128
  # Font size.
@@ -141,7 +140,8 @@ module Google
141
140
  # Font size for the text.
142
141
  # @!attribute [rw] unit
143
142
  # @return [::String]
144
- # Unit for the font size. Follows CSS naming (in, px, pt, etc.).
143
+ # Unit for the font size. Follows CSS naming (such as `in`, `px`, and
144
+ # `pt`).
145
145
  class FontSize
146
146
  include ::Google::Protobuf::MessageExts
147
147
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -208,7 +208,7 @@ module Google
208
208
  # A list of detected barcodes.
209
209
  # @!attribute [rw] image_quality_scores
210
210
  # @return [::Google::Cloud::DocumentAI::V1::Document::Page::ImageQualityScores]
211
- # Image Quality Scores.
211
+ # Image quality scores.
212
212
  # @!attribute [rw] provenance
213
213
  # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance]
214
214
  # The history of this page.
@@ -237,7 +237,9 @@ module Google
237
237
  # Raw byte content of the image.
238
238
  # @!attribute [rw] mime_type
239
239
  # @return [::String]
240
- # Encoding mime type for the image.
240
+ # Encoding [media type (MIME
241
+ # type)](https://www.iana.org/assignments/media-types/media-types.xhtml)
242
+ # for the image.
241
243
  # @!attribute [rw] width
242
244
  # @return [::Integer]
243
245
  # Width of the image in pixels.
@@ -382,6 +384,9 @@ module Google
382
384
  # @!attribute [rw] provenance
383
385
  # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance]
384
386
  # The history of this annotation.
387
+ # @!attribute [rw] style_info
388
+ # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo]
389
+ # Text style attributes.
385
390
  class Token
386
391
  include ::Google::Protobuf::MessageExts
387
392
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -410,6 +415,62 @@ module Google
410
415
  HYPHEN = 3
411
416
  end
412
417
  end
418
+
419
+ # Font and other text style attributes.
420
+ # @!attribute [rw] font_size
421
+ # @return [::Integer]
422
+ # Font size in points (`1` point is `¹⁄₇₂` inches).
423
+ # @!attribute [rw] pixel_font_size
424
+ # @return [::Float]
425
+ # Font size in pixels, equal to _unrounded
426
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_size font_size}_
427
+ # * _resolution_ ÷ `72.0`.
428
+ # @!attribute [rw] letter_spacing
429
+ # @return [::Float]
430
+ # Letter spacing in points.
431
+ # @!attribute [rw] font_type
432
+ # @return [::String]
433
+ # Name or style of the font.
434
+ # @!attribute [rw] bold
435
+ # @return [::Boolean]
436
+ # Whether the text is bold (equivalent to
437
+ # {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_weight font_weight}
438
+ # is at least `700`).
439
+ # @!attribute [rw] italic
440
+ # @return [::Boolean]
441
+ # Whether the text is italic.
442
+ # @!attribute [rw] underlined
443
+ # @return [::Boolean]
444
+ # Whether the text is underlined.
445
+ # @!attribute [rw] strikeout
446
+ # @return [::Boolean]
447
+ # Whether the text is strikethrough.
448
+ # @!attribute [rw] subscript
449
+ # @return [::Boolean]
450
+ # Whether the text is a subscript.
451
+ # @!attribute [rw] superscript
452
+ # @return [::Boolean]
453
+ # Whether the text is a superscript.
454
+ # @!attribute [rw] smallcaps
455
+ # @return [::Boolean]
456
+ # Whether the text is in small caps.
457
+ # @!attribute [rw] font_weight
458
+ # @return [::Integer]
459
+ # TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
460
+ # Normal is `400`, bold is `700`.
461
+ # @!attribute [rw] handwritten
462
+ # @return [::Boolean]
463
+ # Whether the text is handwritten.
464
+ # @!attribute [rw] text_color
465
+ # @return [::Google::Type::Color]
466
+ # Color of the text.
467
+ # @!attribute [rw] background_color
468
+ # @return [::Google::Type::Color]
469
+ # Color of the background.
470
+ class StyleInfo
471
+ include ::Google::Protobuf::MessageExts
472
+ extend ::Google::Protobuf::MessageExts::ClassMethods
473
+ end
413
474
  end
414
475
 
415
476
  # A detected symbol.
@@ -552,9 +613,9 @@ module Google
552
613
  # Detected language for a structural component.
553
614
  # @!attribute [rw] language_code
554
615
  # @return [::String]
555
- # The BCP-47 language code, such as `en-US` or `sr-Latn`. For more
556
- # information, see
557
- # https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
616
+ # The [BCP-47 language
617
+ # code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier),
618
+ # such as `en-US` or `sr-Latn`.
558
619
  # @!attribute [rw] confidence
559
620
  # @return [::Float]
560
621
  # Confidence of detected language. Range `[0, 1]`.
@@ -563,10 +624,10 @@ module Google
563
624
  extend ::Google::Protobuf::MessageExts::ClassMethods
564
625
  end
565
626
 
566
- # Image Quality Scores for the page image
627
+ # Image quality scores for the page image.
567
628
  # @!attribute [rw] quality_score
568
629
  # @return [::Float]
569
- # The overall quality score. Range `[0, 1]` where 1 is perfect quality.
630
+ # The overall quality score. Range `[0, 1]` where `1` is perfect quality.
570
631
  # @!attribute [rw] detected_defects
571
632
  # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::ImageQualityScores::DetectedDefect>]
572
633
  # A list of detected defects.
@@ -589,8 +650,8 @@ module Google
589
650
  # - `quality/defect_glare`
590
651
  # @!attribute [rw] confidence
591
652
  # @return [::Float]
592
- # Confidence of detected defect. Range `[0, 1]` where 1 indicates
593
- # strong confidence of that the defect exists.
653
+ # Confidence of detected defect. Range `[0, 1]` where `1` indicates
654
+ # strong confidence that the defect exists.
594
655
  class DetectedDefect
595
656
  include ::Google::Protobuf::MessageExts
596
657
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -877,9 +938,9 @@ module Google
877
938
  REMOVE = 2
878
939
 
879
940
  # Updates any fields within the given provenance scope of the message. It
880
- # 'overwrites' the fields rather than replacing them. This is
881
- # especially relevant when we just want to update a field value of an
882
- # entity without also affecting all the child properties.
941
+ # overwrites the fields rather than replacing them. Use this when you
942
+ # want to update a field value of an entity without also updating all the
943
+ # child properties.
883
944
  UPDATE = 7
884
945
 
885
946
  # Currently unused. Replace an element identified by `parent`.
@@ -114,6 +114,53 @@ module Google
114
114
  end
115
115
  end
116
116
  end
117
+
118
+ # Config for Document OCR.
119
+ # @!attribute [rw] hints
120
+ # @return [::Google::Cloud::DocumentAI::V1::OcrConfig::Hints]
121
+ # Hints for the OCR model.
122
+ # @!attribute [rw] enable_native_pdf_parsing
123
+ # @return [::Boolean]
124
+ # Enables special handling for PDFs with existing text information. Results
125
+ # in better text extraction quality in such PDF inputs.
126
+ # @!attribute [rw] enable_image_quality_scores
127
+ # @return [::Boolean]
128
+ # Enables intelligent document quality scores after OCR. Can help with
129
+ # diagnosing why OCR responses are of poor quality for a given input.
130
+ # Adds additional latency comparable to regular OCR to the process call.
131
+ # @!attribute [rw] advanced_ocr_options
132
+ # @return [::Array<::String>]
133
+ # A list of advanced OCR options to further fine-tune OCR behavior. Current
134
+ # valid values are:
135
+ #
136
+ # - `legacy_layout`: a heuristics layout detection algorithm, which serves as
137
+ # an alternative to the current ML-based layout detection algorithm.
138
+ # Customers can choose the best suitable layout algorithm based on their
139
+ # situation.
140
+ # @!attribute [rw] enable_symbol
141
+ # @return [::Boolean]
142
+ # Includes symbol level OCR information if set to true.
143
+ # @!attribute [rw] compute_style_info
144
+ # @return [::Boolean]
145
+ # Turn on font id model and returns font style information.
146
+ class OcrConfig
147
+ include ::Google::Protobuf::MessageExts
148
+ extend ::Google::Protobuf::MessageExts::ClassMethods
149
+
150
+ # Hints for OCR Engine
151
+ # @!attribute [rw] language_hints
152
+ # @return [::Array<::String>]
153
+ # List of BCP-47 language codes to use for OCR. In most cases, not
154
+ # specifying it yields the best results since it enables automatic language
155
+ # detection. For languages based on the Latin alphabet, setting hints is
156
+ # not needed. In rare cases, when the language of the text in the
157
+ # image is known, setting a hint will help get better results (although it
158
+ # will be a significant hindrance if the hint is wrong).
159
+ class Hints
160
+ include ::Google::Protobuf::MessageExts
161
+ extend ::Google::Protobuf::MessageExts::ClassMethods
162
+ end
163
+ end
117
164
  end
118
165
  end
119
166
  end