google-cloud-document_ai-v1beta3 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,6 +106,44 @@ module Google
106
106
  end
107
107
  end
108
108
 
109
+ # Document Identifier.
110
+ # @!attribute [rw] gcs_managed_doc_id
111
+ # @return [::Google::Cloud::DocumentAI::V1beta3::DocumentId::GCSManagedDocumentId]
112
+ # A document id within user-managed Cloud Storage.
113
+ # @!attribute [rw] unmanaged_doc_id
114
+ # @return [::Google::Cloud::DocumentAI::V1beta3::DocumentId::UnmanagedDocumentId]
115
+ # A document id within unmanaged dataset.
116
+ # @!attribute [rw] revision_ref
117
+ # @return [::Google::Cloud::DocumentAI::V1beta3::RevisionRef]
118
+ # Points to a specific revision of the document if set.
119
+ class DocumentId
120
+ include ::Google::Protobuf::MessageExts
121
+ extend ::Google::Protobuf::MessageExts::ClassMethods
122
+
123
+ # Identifies a document uniquely within the scope of a dataset in the
124
+ # user-managed Cloud Storage option.
125
+ # @!attribute [rw] gcs_uri
126
+ # @return [::String]
127
+ # Required. The Cloud Storage URI where the actual document is stored.
128
+ # @!attribute [rw] cw_doc_id
129
+ # @return [::String]
130
+ # Id of the document (indexed) managed by Content Warehouse.
131
+ class GCSManagedDocumentId
132
+ include ::Google::Protobuf::MessageExts
133
+ extend ::Google::Protobuf::MessageExts::ClassMethods
134
+ end
135
+
136
+ # Identifies a document uniquely within the scope of a dataset in unmanaged
137
+ # option.
138
+ # @!attribute [rw] doc_id
139
+ # @return [::String]
140
+ # Required. The id of the document.
141
+ class UnmanagedDocumentId
142
+ include ::Google::Protobuf::MessageExts
143
+ extend ::Google::Protobuf::MessageExts::ClassMethods
144
+ end
145
+ end
146
+
109
147
  # Dataset Schema.
110
148
  # @!attribute [rw] name
111
149
  # @return [::String]
@@ -119,6 +157,30 @@ module Google
119
157
  include ::Google::Protobuf::MessageExts
120
158
  extend ::Google::Protobuf::MessageExts::ClassMethods
121
159
  end
160
+
161
+ # Dataset documents that the batch operation will be applied to.
162
+ # @!attribute [rw] individual_document_ids
163
+ # @return [::Google::Cloud::DocumentAI::V1beta3::BatchDatasetDocuments::IndividualDocumentIds]
164
+ # Document identifiers.
165
+ # @!attribute [rw] filter
166
+ # @return [::String]
167
+ # A filter matching the documents.
168
+ # Follows the same format and restriction as
169
+ # [google.cloud.documentai.master.ListDocumentsRequest.filter].
170
+ class BatchDatasetDocuments
171
+ include ::Google::Protobuf::MessageExts
172
+ extend ::Google::Protobuf::MessageExts::ClassMethods
173
+
174
+ # List of individual DocumentIds.
175
+ # @!attribute [rw] document_ids
176
+ # @return [::Array<::Google::Cloud::DocumentAI::V1beta3::DocumentId>]
177
+ # Required. List of Document IDs indicating where the actual documents are
178
+ # stored.
179
+ class IndividualDocumentIds
180
+ include ::Google::Protobuf::MessageExts
181
+ extend ::Google::Protobuf::MessageExts::ClassMethods
182
+ end
183
+ end
122
184
  end
123
185
  end
124
186
  end
@@ -1033,6 +1033,38 @@ module Google
1033
1033
  extend ::Google::Protobuf::MessageExts::ClassMethods
1034
1034
  end
1035
1035
  end
1036
+
1037
+ # The revision reference specifies which revision on the document to read.
1038
+ # @!attribute [rw] revision_case
1039
+ # @return [::Google::Cloud::DocumentAI::V1beta3::RevisionRef::RevisionCase]
1040
+ # Reads the revision by the predefined case.
1041
+ # @!attribute [rw] revision_id
1042
+ # @return [::String]
1043
+ # Reads the revision given by the id.
1044
+ # @!attribute [rw] latest_processor_version
1045
+ # @return [::String]
1046
+ # Reads the revision generated by the processor version.
1047
+ # The format takes the full resource name of processor version.
1048
+ # `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
1049
+ class RevisionRef
1050
+ include ::Google::Protobuf::MessageExts
1051
+ extend ::Google::Protobuf::MessageExts::ClassMethods
1052
+
1053
+ # Some predefined revision cases.
1054
+ module RevisionCase
1055
+ # Unspecified case, fallback to read the LATEST_HUMAN_REVIEW.
1056
+ REVISION_CASE_UNSPECIFIED = 0
1057
+
1058
+ # The latest revision made by a human.
1059
+ LATEST_HUMAN_REVIEW = 1
1060
+
1061
+ # The latest revision based on timestamp.
1062
+ LATEST_TIMESTAMP = 2
1063
+
1064
+ # The first (OCR) revision.
1065
+ BASE_OCR_REVISION = 3
1066
+ end
1067
+ end
1036
1068
  end
1037
1069
  end
1038
1070
  end
@@ -40,6 +40,9 @@ module Google
40
40
  # @!attribute [rw] raw_document
41
41
  # @return [::Google::Cloud::DocumentAI::V1beta3::RawDocument]
42
42
  # A raw document content (bytes).
43
+ # @!attribute [rw] gcs_document
44
+ # @return [::Google::Cloud::DocumentAI::V1beta3::GcsDocument]
45
+ # A raw document on Google Cloud Storage.
43
46
  # @!attribute [rw] name
44
47
  # @return [::String]
45
48
  # Required. The resource name of the
@@ -41,6 +41,179 @@ module Google
41
41
  extend ::Google::Protobuf::MessageExts::ClassMethods
42
42
  end
43
43
 
44
+ # @!attribute [rw] dataset
45
+ # @return [::String]
46
+ # Required. The dataset resource name.
47
+ # Format:
48
+ # projects/\\{project}/locations/\\{location}/processors/\\{processor}/dataset
49
+ # @!attribute [rw] batch_documents_import_configs
50
+ # @return [::Array<::Google::Cloud::DocumentAI::V1beta3::ImportDocumentsRequest::BatchDocumentsImportConfig>]
51
+ # Required. The Cloud Storage uri containing raw documents that must be
52
+ # imported.
53
+ class ImportDocumentsRequest
54
+ include ::Google::Protobuf::MessageExts
55
+ extend ::Google::Protobuf::MessageExts::ClassMethods
56
+
57
+ # Config for importing documents.
58
+ # Each batch can have its own dataset split type.
59
+ # @!attribute [rw] dataset_split
60
+ # @return [::Google::Cloud::DocumentAI::V1beta3::DatasetSplitType]
61
+ # Target dataset split where the documents must be stored.
62
+ # @!attribute [rw] auto_split_config
63
+ # @return [::Google::Cloud::DocumentAI::V1beta3::ImportDocumentsRequest::BatchDocumentsImportConfig::AutoSplitConfig]
64
+ # If set, documents will be automatically split into training and test
65
+ # split category with the specified ratio.
66
+ # @!attribute [rw] batch_input_config
67
+ # @return [::Google::Cloud::DocumentAI::V1beta3::BatchDocumentsInputConfig]
68
+ # The common config to specify a set of documents used as input.
69
+ class BatchDocumentsImportConfig
70
+ include ::Google::Protobuf::MessageExts
71
+ extend ::Google::Protobuf::MessageExts::ClassMethods
72
+
73
+ # The config for auto-split.
74
+ # @!attribute [rw] training_split_ratio
75
+ # @return [::Float]
76
+ # Ratio of training dataset split.
77
+ class AutoSplitConfig
78
+ include ::Google::Protobuf::MessageExts
79
+ extend ::Google::Protobuf::MessageExts::ClassMethods
80
+ end
81
+ end
82
+ end
83
+
84
+ # Response of the import document operation.
85
+ class ImportDocumentsResponse
86
+ include ::Google::Protobuf::MessageExts
87
+ extend ::Google::Protobuf::MessageExts::ClassMethods
88
+ end
89
+
90
+ # Metadata of the import document operation.
91
+ # @!attribute [rw] common_metadata
92
+ # @return [::Google::Cloud::DocumentAI::V1beta3::CommonOperationMetadata]
93
+ # The basic metadata of the long running operation.
94
+ # @!attribute [rw] individual_import_statuses
95
+ # @return [::Array<::Google::Cloud::DocumentAI::V1beta3::ImportDocumentsMetadata::IndividualImportStatus>]
96
+ # The list of response details of each document.
97
+ # @!attribute [rw] import_config_validation_results
98
+ # @return [::Array<::Google::Cloud::DocumentAI::V1beta3::ImportDocumentsMetadata::ImportConfigValidationResult>]
99
+ # Validation statuses of the batch documents import config.
100
+ # @!attribute [rw] total_document_count
101
+ # @return [::Integer]
102
+ # Total number of the documents that are qualified for importing.
103
+ class ImportDocumentsMetadata
104
+ include ::Google::Protobuf::MessageExts
105
+ extend ::Google::Protobuf::MessageExts::ClassMethods
106
+
107
+ # The status of each individual document in the import process.
108
+ # @!attribute [rw] input_gcs_source
109
+ # @return [::String]
110
+ # The source Cloud Storage URI of the document.
111
+ # @!attribute [rw] status
112
+ # @return [::Google::Rpc::Status]
113
+ # The status of the importing of the document.
114
+ # @!attribute [rw] output_document_id
115
+ # @return [::Google::Cloud::DocumentAI::V1beta3::DocumentId]
116
+ # The document id of imported document if it was successful, otherwise
117
+ # empty.
118
+ class IndividualImportStatus
119
+ include ::Google::Protobuf::MessageExts
120
+ extend ::Google::Protobuf::MessageExts::ClassMethods
121
+ end
122
+
123
+ # The validation status of each import config. Status is set to errors if
124
+ # there is no documents to import in the import_config, or OK if the
125
+ # operation will try to proceed at least one document.
126
+ # @!attribute [rw] input_gcs_source
127
+ # @return [::String]
128
+ # The source Cloud Storage URI specified in the import config.
129
+ # @!attribute [rw] status
130
+ # @return [::Google::Rpc::Status]
131
+ # The validation status of import config.
132
+ class ImportConfigValidationResult
133
+ include ::Google::Protobuf::MessageExts
134
+ extend ::Google::Protobuf::MessageExts::ClassMethods
135
+ end
136
+ end
137
+
138
+ # @!attribute [rw] dataset
139
+ # @return [::String]
140
+ # Required. The resource name of the dataset that the document belongs to .
141
+ # Format:
142
+ # projects/\\{project}/locations/\\{location}/processors/\\{processor}/dataset
143
+ # @!attribute [rw] document_id
144
+ # @return [::Google::Cloud::DocumentAI::V1beta3::DocumentId]
145
+ # Required. Document identifier.
146
+ # @!attribute [rw] read_mask
147
+ # @return [::Google::Protobuf::FieldMask]
148
+ # If set, only fields listed here will be returned. Otherwise, all fields
149
+ # will be returned by default.
150
+ # @!attribute [rw] page_range
151
+ # @return [::Google::Cloud::DocumentAI::V1beta3::DocumentPageRange]
152
+ # List of pages for which the fields specified in the `read_mask` must
153
+ # be served.
154
+ class GetDocumentRequest
155
+ include ::Google::Protobuf::MessageExts
156
+ extend ::Google::Protobuf::MessageExts::ClassMethods
157
+ end
158
+
159
+ # @!attribute [rw] document
160
+ # @return [::Google::Cloud::DocumentAI::V1beta3::Document]
161
+ class GetDocumentResponse
162
+ include ::Google::Protobuf::MessageExts
163
+ extend ::Google::Protobuf::MessageExts::ClassMethods
164
+ end
165
+
166
+ # @!attribute [rw] dataset
167
+ # @return [::String]
168
+ # Required. The dataset resource name.
169
+ # Format:
170
+ # projects/\\{project}/locations/\\{location}/processors/\\{processor}/dataset
171
+ # @!attribute [rw] dataset_documents
172
+ # @return [::Google::Cloud::DocumentAI::V1beta3::BatchDatasetDocuments]
173
+ # Required. Dataset documents input. If given `filter`, all documents
174
+ # satisfying the filter will be deleted. If given documentIds, a maximum of
175
+ # 50 documents can be deleted in a batch. The request will be rejected if
176
+ # more than 50 document_ids are provided.
177
+ class BatchDeleteDocumentsRequest
178
+ include ::Google::Protobuf::MessageExts
179
+ extend ::Google::Protobuf::MessageExts::ClassMethods
180
+ end
181
+
182
+ # Response of the delete documents operation.
183
+ class BatchDeleteDocumentsResponse
184
+ include ::Google::Protobuf::MessageExts
185
+ extend ::Google::Protobuf::MessageExts::ClassMethods
186
+ end
187
+
188
+ # @!attribute [rw] common_metadata
189
+ # @return [::Google::Cloud::DocumentAI::V1beta3::CommonOperationMetadata]
190
+ # The basic metadata of the long running operation.
191
+ # @!attribute [rw] individual_batch_delete_statuses
192
+ # @return [::Array<::Google::Cloud::DocumentAI::V1beta3::BatchDeleteDocumentsMetadata::IndividualBatchDeleteStatus>]
193
+ # The list of response details of each document.
194
+ # @!attribute [rw] total_document_count
195
+ # @return [::Integer]
196
+ # Total number of documents deleting from dataset.
197
+ # @!attribute [rw] error_document_count
198
+ # @return [::Integer]
199
+ # Total number of documents that failed to be deleted in storage.
200
+ class BatchDeleteDocumentsMetadata
201
+ include ::Google::Protobuf::MessageExts
202
+ extend ::Google::Protobuf::MessageExts::ClassMethods
203
+
204
+ # The status of each individual document in the batch delete process.
205
+ # @!attribute [rw] document_id
206
+ # @return [::Google::Cloud::DocumentAI::V1beta3::DocumentId]
207
+ # The document id of the document.
208
+ # @!attribute [rw] status
209
+ # @return [::Google::Rpc::Status]
210
+ # The status of deleting the document in storage.
211
+ class IndividualBatchDeleteStatus
212
+ include ::Google::Protobuf::MessageExts
213
+ extend ::Google::Protobuf::MessageExts::ClassMethods
214
+ end
215
+ end
216
+
44
217
  # Request for `GetDatasetSchema`.
45
218
  # @!attribute [rw] name
46
219
  # @return [::String]
@@ -67,6 +240,35 @@ module Google
67
240
  include ::Google::Protobuf::MessageExts
68
241
  extend ::Google::Protobuf::MessageExts::ClassMethods
69
242
  end
243
+
244
+ # Range of pages present in a document.
245
+ # @!attribute [rw] start
246
+ # @return [::Integer]
247
+ # First page number (one-based index) to be returned.
248
+ # @!attribute [rw] end
249
+ # @return [::Integer]
250
+ # Last page number (one-based index) to be returned.
251
+ class DocumentPageRange
252
+ include ::Google::Protobuf::MessageExts
253
+ extend ::Google::Protobuf::MessageExts::ClassMethods
254
+ end
255
+
256
+ # Documents belonging to a dataset will be split into different groups
257
+ # referred to as splits: train, test.
258
+ module DatasetSplitType
259
+ # Default value if the enum is not set.
260
+ # go/protodosdonts#do-include-an-unspecified-value-in-an-enum
261
+ DATASET_SPLIT_TYPE_UNSPECIFIED = 0
262
+
263
+ # Identifies the train documents.
264
+ DATASET_SPLIT_TRAIN = 1
265
+
266
+ # Identifies the test documents.
267
+ DATASET_SPLIT_TEST = 2
268
+
269
+ # Identifies the unassigned documents.
270
+ DATASET_SPLIT_UNASSIGNED = 3
271
+ end
70
272
  end
71
273
  end
72
274
  end
@@ -51,9 +51,9 @@ module Google
51
51
  # @!attribute [rw] kms_key_version_name
52
52
  # @return [::String]
53
53
  # The KMS key version with which data is encrypted.
54
- # @!attribute [rw] google_managed
54
+ # @!attribute [r] google_managed
55
55
  # @return [::Boolean]
56
- # Denotes that this `ProcessorVersion` is managed by Google.
56
+ # Output only. Denotes that this `ProcessorVersion` is managed by Google.
57
57
  # @!attribute [rw] deprecation_info
58
58
  # @return [::Google::Cloud::DocumentAI::V1beta3::ProcessorVersion::DeprecationInfo]
59
59
  # If set, information about the eventual deprecation of this version.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-document_ai-v1beta3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.24.0
4
+ version: 0.25.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Google LLC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-20 00:00:00.000000000 Z
11
+ date: 2023-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gapic-common