google-cloud-automl-v1 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +12 -0
  3. data/AUTHENTICATION.md +169 -0
  4. data/LICENSE.md +203 -0
  5. data/README.md +71 -0
  6. data/lib/google/cloud/automl/v1/annotation_payload_pb.rb +37 -0
  7. data/lib/google/cloud/automl/v1/annotation_spec_pb.rb +26 -0
  8. data/lib/google/cloud/automl/v1/automl/client.rb +1851 -0
  9. data/lib/google/cloud/automl/v1/automl/credentials.rb +51 -0
  10. data/lib/google/cloud/automl/v1/automl/operations.rb +564 -0
  11. data/lib/google/cloud/automl/v1/automl/paths.rb +130 -0
  12. data/lib/google/cloud/automl/v1/automl.rb +63 -0
  13. data/lib/google/cloud/automl/v1/classification_pb.rb +65 -0
  14. data/lib/google/cloud/automl/v1/data_items_pb.rb +87 -0
  15. data/lib/google/cloud/automl/v1/dataset_pb.rb +42 -0
  16. data/lib/google/cloud/automl/v1/detection_pb.rb +45 -0
  17. data/lib/google/cloud/automl/v1/geometry_pb.rb +28 -0
  18. data/lib/google/cloud/automl/v1/image_pb.rb +57 -0
  19. data/lib/google/cloud/automl/v1/io_pb.rb +65 -0
  20. data/lib/google/cloud/automl/v1/model_evaluation_pb.rb +41 -0
  21. data/lib/google/cloud/automl/v1/model_pb.rb +49 -0
  22. data/lib/google/cloud/automl/v1/operations_pb.rb +88 -0
  23. data/lib/google/cloud/automl/v1/prediction_service/client.rb +634 -0
  24. data/lib/google/cloud/automl/v1/prediction_service/credentials.rb +51 -0
  25. data/lib/google/cloud/automl/v1/prediction_service/operations.rb +564 -0
  26. data/lib/google/cloud/automl/v1/prediction_service/paths.rb +52 -0
  27. data/lib/google/cloud/automl/v1/prediction_service.rb +53 -0
  28. data/lib/google/cloud/automl/v1/prediction_service_pb.rb +50 -0
  29. data/lib/google/cloud/automl/v1/prediction_service_services_pb.rb +103 -0
  30. data/lib/google/cloud/automl/v1/service_pb.rb +139 -0
  31. data/lib/google/cloud/automl/v1/service_services_pb.rb +137 -0
  32. data/lib/google/cloud/automl/v1/text_extraction_pb.rb +39 -0
  33. data/lib/google/cloud/automl/v1/text_pb.rb +41 -0
  34. data/lib/google/cloud/automl/v1/text_segment_pb.rb +25 -0
  35. data/lib/google/cloud/automl/v1/text_sentiment_pb.rb +35 -0
  36. data/lib/google/cloud/automl/v1/translation_pb.rb +41 -0
  37. data/lib/google/cloud/automl/v1/version.rb +28 -0
  38. data/lib/google/cloud/automl/v1.rb +36 -0
  39. data/lib/google/cloud/common_resources_pb.rb +15 -0
  40. data/lib/google-cloud-automl-v1.rb +21 -0
  41. data/proto_docs/README.md +4 -0
  42. data/proto_docs/google/api/field_behavior.rb +59 -0
  43. data/proto_docs/google/api/resource.rb +247 -0
  44. data/proto_docs/google/cloud/automl/v1/annotation_payload.rb +60 -0
  45. data/proto_docs/google/cloud/automl/v1/annotation_spec.rb +46 -0
  46. data/proto_docs/google/cloud/automl/v1/classification.rb +191 -0
  47. data/proto_docs/google/cloud/automl/v1/data_items.rb +197 -0
  48. data/proto_docs/google/cloud/automl/v1/dataset.rb +94 -0
  49. data/proto_docs/google/cloud/automl/v1/detection.rb +97 -0
  50. data/proto_docs/google/cloud/automl/v1/geometry.rb +53 -0
  51. data/proto_docs/google/cloud/automl/v1/image.rb +217 -0
  52. data/proto_docs/google/cloud/automl/v1/io.rb +1402 -0
  53. data/proto_docs/google/cloud/automl/v1/model.rb +109 -0
  54. data/proto_docs/google/cloud/automl/v1/model_evaluation.rb +94 -0
  55. data/proto_docs/google/cloud/automl/v1/operations.rb +180 -0
  56. data/proto_docs/google/cloud/automl/v1/prediction_service.rb +271 -0
  57. data/proto_docs/google/cloud/automl/v1/service.rb +322 -0
  58. data/proto_docs/google/cloud/automl/v1/text.rb +76 -0
  59. data/proto_docs/google/cloud/automl/v1/text_extraction.rb +73 -0
  60. data/proto_docs/google/cloud/automl/v1/text_segment.rb +44 -0
  61. data/proto_docs/google/cloud/automl/v1/text_sentiment.rb +82 -0
  62. data/proto_docs/google/cloud/automl/v1/translation.rb +79 -0
  63. data/proto_docs/google/longrunning/operations.rb +150 -0
  64. data/proto_docs/google/protobuf/any.rb +138 -0
  65. data/proto_docs/google/protobuf/duration.rb +98 -0
  66. data/proto_docs/google/protobuf/empty.rb +36 -0
  67. data/proto_docs/google/protobuf/field_mask.rb +229 -0
  68. data/proto_docs/google/protobuf/struct.rb +96 -0
  69. data/proto_docs/google/protobuf/timestamp.rb +120 -0
  70. data/proto_docs/google/rpc/status.rb +46 -0
  71. metadata +227 -0
@@ -0,0 +1,1402 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright 2020 Google LLC
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # Auto-generated by gapic-generator-ruby. DO NOT EDIT!
18
+
19
+
20
+ module Google
21
+ module Cloud
22
+ module AutoML
23
+ module V1
24
+ # Input configuration for {Google::Cloud::AutoML::V1::AutoML::Client#import_data AutoMl.ImportData} action.
25
+ #
26
+ # The format of input depends on dataset_metadata the Dataset into which
27
+ # the import is happening has. As input source the
28
+ # {Google::Cloud::AutoML::V1::InputConfig#gcs_source gcs_source}
29
+ # is expected, unless specified otherwise. Additionally any input .CSV file
30
+ # by itself must be 100MB or smaller, unless specified otherwise.
31
+ # If an "example" file (that is, image, video etc.) with identical content
32
+ # (even if it had different `GCS_FILE_PATH`) is mentioned multiple times, then
33
+ # its label, bounding boxes etc. are appended. The same file should be always
34
+ # provided with the same `ML_USE` and `GCS_FILE_PATH`, if it is not, then
35
+ # these values are nondeterministically selected from the given ones.
36
+ #
37
+ # The formats are represented in EBNF with commas being literal and with
38
+ # non-terminal symbols defined near the end of this comment. The formats are:
39
+ #
40
+ # <h4>AutoML Vision</h4>
41
+ #
42
+ #
43
+ # <div class="ds-selector-tabs"><section><h5>Classification</h5>
44
+ #
45
+ # See [Preparing your training
46
+ # data](https://cloud.google.com/vision/automl/docs/prepare) for more
47
+ # information.
48
+ #
49
+ # CSV file(s) with each line in format:
50
+ #
51
+ # ML_USE,GCS_FILE_PATH,LABEL,LABEL,...
52
+ #
53
+ # * `ML_USE` - Identifies the data set that the current row (file) applies
54
+ # to.
55
+ # This value can be one of the following:
56
+ # * `TRAIN` - Rows in this file are used to train the model.
57
+ # * `TEST` - Rows in this file are used to test the model during training.
58
+ # * `UNASSIGNED` - Rows in this file are not categorized. They are
59
+ # Automatically divided into train and test data. 80% for training and
60
+ # 20% for testing.
61
+ #
62
+ # * `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to
63
+ # 30MB in size. Supported extensions: .JPEG, .GIF, .PNG, .WEBP, .BMP,
64
+ # .TIFF, .ICO.
65
+ #
66
+ # * `LABEL` - A label that identifies the object in the image.
67
+ #
68
+ # For the `MULTICLASS` classification type, at most one `LABEL` is allowed
69
+ # per image. If an image has not yet been labeled, then it should be
70
+ # mentioned just once with no `LABEL`.
71
+ #
72
+ # Some sample rows:
73
+ #
74
+ # TRAIN,gs://folder/image1.jpg,daisy
75
+ # TEST,gs://folder/image2.jpg,dandelion,tulip,rose
76
+ # UNASSIGNED,gs://folder/image3.jpg,daisy
77
+ # UNASSIGNED,gs://folder/image4.jpg
78
+ #
79
+ #
80
+ # </section><section><h5>Object Detection</h5>
81
+ # See [Preparing your training
82
+ # data](https://cloud.google.com/vision/automl/object-detection/docs/prepare)
83
+ # for more information.
84
+ #
85
+ # A CSV file(s) with each line in format:
86
+ #
87
+ # ML_USE,GCS_FILE_PATH,[LABEL],(BOUNDING_BOX | ,,,,,,,)
88
+ #
89
+ # * `ML_USE` - Identifies the data set that the current row (file) applies
90
+ # to.
91
+ # This value can be one of the following:
92
+ # * `TRAIN` - Rows in this file are used to train the model.
93
+ # * `TEST` - Rows in this file are used to test the model during training.
94
+ # * `UNASSIGNED` - Rows in this file are not categorized. They are
95
+ # Automatically divided into train and test data. 80% for training and
96
+ # 20% for testing.
97
+ #
98
+ # * `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to
99
+ # 30MB in size. Supported extensions: .JPEG, .GIF, .PNG. Each image
100
+ # is assumed to be exhaustively labeled.
101
+ #
102
+ # * `LABEL` - A label that identifies the object in the image specified by the
103
+ # `BOUNDING_BOX`.
104
+ #
105
+ # * `BOUNDING BOX` - The vertices of an object in the example image.
106
+ # The minimum allowed `BOUNDING_BOX` edge length is 0.01, and no more than
107
+ # 500 `BOUNDING_BOX` instances per image are allowed (one `BOUNDING_BOX`
108
+ # per line). If an image has no looked for objects then it should be
109
+ # mentioned just once with no LABEL and the ",,,,,,," in place of the
110
+ # `BOUNDING_BOX`.
111
+ #
112
+ # **Four sample rows:**
113
+ #
114
+ # TRAIN,gs://folder/image1.png,car,0.1,0.1,,,0.3,0.3,,
115
+ # TRAIN,gs://folder/image1.png,bike,.7,.6,,,.8,.9,,
116
+ # UNASSIGNED,gs://folder/im2.png,car,0.1,0.1,0.2,0.1,0.2,0.3,0.1,0.3
117
+ # TEST,gs://folder/im3.png,,,,,,,,,
118
+ # </section>
119
+ # </div>
120
+ #
121
+ #
122
+ # <h4>AutoML Video Intelligence</h4>
123
+ #
124
+ #
125
+ # <div class="ds-selector-tabs"><section><h5>Classification</h5>
126
+ #
127
+ # See [Preparing your training
128
+ # data](https://cloud.google.com/video-intelligence/automl/docs/prepare) for
129
+ # more information.
130
+ #
131
+ # CSV file(s) with each line in format:
132
+ #
133
+ # ML_USE,GCS_FILE_PATH
134
+ #
135
+ # For `ML_USE`, do not use `VALIDATE`.
136
+ #
137
+ # `GCS_FILE_PATH` is the path to another .csv file that describes training
138
+ # example for a given `ML_USE`, using the following row format:
139
+ #
140
+ # GCS_FILE_PATH,(LABEL,TIME_SEGMENT_START,TIME_SEGMENT_END | ,,)
141
+ #
142
+ # Here `GCS_FILE_PATH` leads to a video of up to 50GB in size and up
143
+ # to 3h duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
144
+ #
145
+ # `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the
146
+ # length of the video, and the end time must be after the start time. Any
147
+ # segment of a video which has one or more labels on it, is considered a
148
+ # hard negative for all other labels. Any segment with no labels on
149
+ # it is considered to be unknown. If a whole video is unknown, then
150
+ # it should be mentioned just once with ",," in place of `LABEL,
151
+ # TIME_SEGMENT_START,TIME_SEGMENT_END`.
152
+ #
153
+ # Sample top level CSV file:
154
+ #
155
+ # TRAIN,gs://folder/train_videos.csv
156
+ # TEST,gs://folder/test_videos.csv
157
+ # UNASSIGNED,gs://folder/other_videos.csv
158
+ #
159
+ # Sample rows of a CSV file for a particular ML_USE:
160
+ #
161
+ # gs://folder/video1.avi,car,120,180.000021
162
+ # gs://folder/video1.avi,bike,150,180.000021
163
+ # gs://folder/vid2.avi,car,0,60.5
164
+ # gs://folder/vid3.avi,,,
165
+ #
166
+ #
167
+ #
168
+ # </section><section><h5>Object Tracking</h5>
169
+ #
170
+ # See [Preparing your training
171
+ # data](/video-intelligence/automl/object-tracking/docs/prepare) for more
172
+ # information.
173
+ #
174
+ # CSV file(s) with each line in format:
175
+ #
176
+ # ML_USE,GCS_FILE_PATH
177
+ #
178
+ # For `ML_USE`, do not use `VALIDATE`.
179
+ #
180
+ # `GCS_FILE_PATH` is the path to another .csv file that describes training
181
+ # example for a given `ML_USE`, using the following row format:
182
+ #
183
+ # GCS_FILE_PATH,LABEL,[INSTANCE_ID],TIMESTAMP,BOUNDING_BOX
184
+ #
185
+ # or
186
+ #
187
+ # GCS_FILE_PATH,,,,,,,,,,
188
+ #
189
+ # Here `GCS_FILE_PATH` leads to a video of up to 50GB in size and up
190
+ # to 3h duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
191
+ # Providing `INSTANCE_ID`s can help to obtain a better model. When
192
+ # a specific labeled entity leaves the video frame, and shows up
193
+ # afterwards it is not required, albeit preferable, that the same
194
+ # `INSTANCE_ID` is given to it.
195
+ #
196
+ # `TIMESTAMP` must be within the length of the video, the
197
+ # `BOUNDING_BOX` is assumed to be drawn on the closest video's frame
198
+ # to the `TIMESTAMP`. Any mentioned by the `TIMESTAMP` frame is expected
199
+ # to be exhaustively labeled and no more than 500 `BOUNDING_BOX`-es per
200
+ # frame are allowed. If a whole video is unknown, then it should be
201
+ # mentioned just once with ",,,,,,,,,," in place of `LABEL,
202
+ # [INSTANCE_ID],TIMESTAMP,BOUNDING_BOX`.
203
+ #
204
+ # Sample top level CSV file:
205
+ #
206
+ # TRAIN,gs://folder/train_videos.csv
207
+ # TEST,gs://folder/test_videos.csv
208
+ # UNASSIGNED,gs://folder/other_videos.csv
209
+ #
210
+ # Seven sample rows of a CSV file for a particular ML_USE:
211
+ #
212
+ # gs://folder/video1.avi,car,1,12.10,0.8,0.8,0.9,0.8,0.9,0.9,0.8,0.9
213
+ # gs://folder/video1.avi,car,1,12.90,0.4,0.8,0.5,0.8,0.5,0.9,0.4,0.9
214
+ # gs://folder/video1.avi,car,2,12.10,.4,.2,.5,.2,.5,.3,.4,.3
215
+ # gs://folder/video1.avi,car,2,12.90,.8,.2,,,.9,.3,,
216
+ # gs://folder/video1.avi,bike,,12.50,.45,.45,,,.55,.55,,
217
+ # gs://folder/video2.avi,car,1,0,.1,.9,,,.9,.1,,
218
+ # gs://folder/video2.avi,,,,,,,,,,,
219
+ # </section>
220
+ # </div>
221
+ #
222
+ #
223
+ # <h4>AutoML Natural Language</h4>
224
+ #
225
+ #
226
+ # <div class="ds-selector-tabs"><section><h5>Entity Extraction</h5>
227
+ #
228
+ # See [Preparing your training
229
+ # data](/natural-language/automl/entity-analysis/docs/prepare) for more
230
+ # information.
231
+ #
232
+ # One or more CSV file(s) with each line in the following format:
233
+ #
234
+ # ML_USE,GCS_FILE_PATH
235
+ #
236
+ # * `ML_USE` - Identifies the data set that the current row (file) applies
237
+ # to.
238
+ # This value can be one of the following:
239
+ # * `TRAIN` - Rows in this file are used to train the model.
240
+ # * `TEST` - Rows in this file are used to test the model during training.
241
+ # * `UNASSIGNED` - Rows in this file are not categorized. They are
242
+ # Automatically divided into train and test data. 80% for training and
243
+ # 20% for testing..
244
+ #
245
+ # * `GCS_FILE_PATH` - a Identifies JSON Lines (.JSONL) file stored in
246
+ # Google Cloud Storage that contains in-line text in-line as documents
247
+ # for model training.
248
+ #
249
+ # After the training data set has been determined from the `TRAIN` and
250
+ # `UNASSIGNED` CSV files, the training data is divided into train and
251
+ # validation data sets. 70% for training and 30% for validation.
252
+ #
253
+ # For example:
254
+ #
255
+ # TRAIN,gs://folder/file1.jsonl
256
+ # VALIDATE,gs://folder/file2.jsonl
257
+ # TEST,gs://folder/file3.jsonl
258
+ #
259
+ # **In-line JSONL files**
260
+ #
261
+ # In-line .JSONL files contain, per line, a JSON document that wraps a
262
+ # {Google::Cloud::AutoML::V1::TextSnippet `text_snippet`} field followed by
263
+ # one or more {Google::Cloud::AutoML::V1::AnnotationPayload `annotations`}
264
+ # fields, which have `display_name` and `text_extraction` fields to describe
265
+ # the entity from the text snippet. Multiple JSON documents can be separated
266
+ # using line breaks (\n).
267
+ #
268
+ # The supplied text must be annotated exhaustively. For example, if you
269
+ # include the text "horse", but do not label it as "animal",
270
+ # then "horse" is assumed to not be an "animal".
271
+ #
272
+ # Any given text snippet content must have 30,000 characters or
273
+ # less, and also be UTF-8 NFC encoded. ASCII is accepted as it is
274
+ # UTF-8 NFC encoded.
275
+ #
276
+ # For example:
277
+ #
278
+ # {
279
+ # "text_snippet": {
280
+ # "content": "dog car cat"
281
+ # },
282
+ # "annotations": [
283
+ # {
284
+ # "display_name": "animal",
285
+ # "text_extraction": {
286
+ # "text_segment": {"start_offset": 0, "end_offset": 2}
287
+ # }
288
+ # },
289
+ # {
290
+ # "display_name": "vehicle",
291
+ # "text_extraction": {
292
+ # "text_segment": {"start_offset": 4, "end_offset": 6}
293
+ # }
294
+ # },
295
+ # {
296
+ # "display_name": "animal",
297
+ # "text_extraction": {
298
+ # "text_segment": {"start_offset": 8, "end_offset": 10}
299
+ # }
300
+ # }
301
+ # ]
302
+ # }\n
303
+ # {
304
+ # "text_snippet": {
305
+ # "content": "This dog is good."
306
+ # },
307
+ # "annotations": [
308
+ # {
309
+ # "display_name": "animal",
310
+ # "text_extraction": {
311
+ # "text_segment": {"start_offset": 5, "end_offset": 7}
312
+ # }
313
+ # }
314
+ # ]
315
+ # }
316
+ #
317
+ # **JSONL files that reference documents**
318
+ #
319
+ # .JSONL files contain, per line, a JSON document that wraps a
320
+ # `input_config` that contains the path to a source document.
321
+ # Multiple JSON documents can be separated using line breaks (\n).
322
+ #
323
+ # Supported document extensions: .PDF, .TIF, .TIFF
324
+ #
325
+ # For example:
326
+ #
327
+ # {
328
+ # "document": {
329
+ # "input_config": {
330
+ # "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ]
331
+ # }
332
+ # }
333
+ # }
334
+ # }\n
335
+ # {
336
+ # "document": {
337
+ # "input_config": {
338
+ # "gcs_source": { "input_uris": [ "gs://folder/document2.tif" ]
339
+ # }
340
+ # }
341
+ # }
342
+ # }
343
+ #
344
+ # **In-line JSONL files with document layout information**
345
+ #
346
+ # **Note:** You can only annotate documents using the UI. The format described
347
+ # below applies to annotated documents exported using the UI or `exportData`.
348
+ #
349
+ # In-line .JSONL files for documents contain, per line, a JSON document
350
+ # that wraps a `document` field that provides the textual content of the
351
+ # document and the layout information.
352
+ #
353
+ # For example:
354
+ #
355
+ # {
356
+ # "document": {
357
+ # "document_text": {
358
+ # "content": "dog car cat"
359
+ # }
360
+ # "layout": [
361
+ # {
362
+ # "text_segment": {
363
+ # "start_offset": 0,
364
+ # "end_offset": 11,
365
+ # },
366
+ # "page_number": 1,
367
+ # "bounding_poly": {
368
+ # "normalized_vertices": [
369
+ # {"x": 0.1, "y": 0.1},
370
+ # {"x": 0.1, "y": 0.3},
371
+ # {"x": 0.3, "y": 0.3},
372
+ # {"x": 0.3, "y": 0.1},
373
+ # ],
374
+ # },
375
+ # "text_segment_type": TOKEN,
376
+ # }
377
+ # ],
378
+ # "document_dimensions": {
379
+ # "width": 8.27,
380
+ # "height": 11.69,
381
+ # "unit": INCH,
382
+ # }
383
+ # "page_count": 3,
384
+ # },
385
+ # "annotations": [
386
+ # {
387
+ # "display_name": "animal",
388
+ # "text_extraction": {
389
+ # "text_segment": {"start_offset": 0, "end_offset": 3}
390
+ # }
391
+ # },
392
+ # {
393
+ # "display_name": "vehicle",
394
+ # "text_extraction": {
395
+ # "text_segment": {"start_offset": 4, "end_offset": 7}
396
+ # }
397
+ # },
398
+ # {
399
+ # "display_name": "animal",
400
+ # "text_extraction": {
401
+ # "text_segment": {"start_offset": 8, "end_offset": 11}
402
+ # }
403
+ # },
404
+ # ],
405
+ #
406
+ #
407
+ #
408
+ #
409
+ # </section><section><h5>Classification</h5>
410
+ #
411
+ # See [Preparing your training
412
+ # data](https://cloud.google.com/natural-language/automl/docs/prepare) for more
413
+ # information.
414
+ #
415
+ # One or more CSV file(s) with each line in the following format:
416
+ #
417
+ # ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),LABEL,LABEL,...
418
+ #
419
+ # * `ML_USE` - Identifies the data set that the current row (file) applies
420
+ # to.
421
+ # This value can be one of the following:
422
+ # * `TRAIN` - Rows in this file are used to train the model.
423
+ # * `TEST` - Rows in this file are used to test the model during training.
424
+ # * `UNASSIGNED` - Rows in this file are not categorized. They are
425
+ # Automatically divided into train and test data. 80% for training and
426
+ # 20% for testing.
427
+ #
428
+ # * `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If
429
+ # the column content is a valid Google Cloud Storage file path, that is,
430
+ # prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if
431
+ # the content is enclosed in double quotes (""), it is treated as a
432
+ # `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a
433
+ # file with supported extension and UTF-8 encoding, for example,
434
+ # "gs://folder/content.txt" AutoML imports the file content
435
+ # as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content
436
+ # excluding quotes. In both cases, size of the content must be 10MB or
437
+ # less in size. For zip files, the size of each file inside the zip must be
438
+ # 10MB or less in size.
439
+ #
440
+ # For the `MULTICLASS` classification type, at most one `LABEL` is allowed.
441
+ #
442
+ # The `ML_USE` and `LABEL` columns are optional.
443
+ # Supported file extensions: .TXT, .PDF, .TIF, .TIFF, .ZIP
444
+ #
445
+ # A maximum of 100 unique labels are allowed per CSV row.
446
+ #
447
+ # Sample rows:
448
+ #
449
+ # TRAIN,"They have bad food and very rude",RudeService,BadFood
450
+ # gs://folder/content.txt,SlowService
451
+ # TEST,gs://folder/document.pdf
452
+ # VALIDATE,gs://folder/text_files.zip,BadFood
453
+ #
454
+ #
455
+ #
456
+ # </section><section><h5>Sentiment Analysis</h5>
457
+ #
458
+ # See [Preparing your training
459
+ # data](https://cloud.google.com/natural-language/automl/docs/prepare) for more
460
+ # information.
461
+ #
462
+ # CSV file(s) with each line in format:
463
+ #
464
+ # ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),SENTIMENT
465
+ #
466
+ # * `ML_USE` - Identifies the data set that the current row (file) applies
467
+ # to.
468
+ # This value can be one of the following:
469
+ # * `TRAIN` - Rows in this file are used to train the model.
470
+ # * `TEST` - Rows in this file are used to test the model during training.
471
+ # * `UNASSIGNED` - Rows in this file are not categorized. They are
472
+ # Automatically divided into train and test data. 80% for training and
473
+ # 20% for testing.
474
+ #
475
+ # * `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If
476
+ # the column content is a valid Google Cloud Storage file path, that is,
477
+ # prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if
478
+ # the content is enclosed in double quotes (""), it is treated as a
479
+ # `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a
480
+ # file with supported extension and UTF-8 encoding, for example,
481
+ # "gs://folder/content.txt" AutoML imports the file content
482
+ # as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content
483
+ # excluding quotes. In both cases, size of the content must be 128kB or
484
+ # less in size. For zip files, the size of each file inside the zip must be
485
+ # 128kB or less in size.
486
+ #
487
+ # The `ML_USE` and `SENTIMENT` columns are optional.
488
+ # Supported file extensions: .TXT, .PDF, .TIF, .TIFF, .ZIP
489
+ #
490
+ # * `SENTIMENT` - An integer between 0 and
491
+ # Dataset.text_sentiment_dataset_metadata.sentiment_max
492
+ # (inclusive). Describes the ordinal of the sentiment - higher
493
+ # value means a more positive sentiment. All the values are
494
+ # completely relative, i.e. neither 0 needs to mean a negative or
495
+ # neutral sentiment nor sentiment_max needs to mean a positive one -
496
+ # it is just required that 0 is the least positive sentiment
497
+ # in the data, and sentiment_max is the most positive one.
498
+ # The SENTIMENT shouldn't be confused with "score" or "magnitude"
499
+ # from the previous Natural Language Sentiment Analysis API.
500
+ # All SENTIMENT values between 0 and sentiment_max must be
501
+ # represented in the imported data. On prediction the same 0 to
502
+ # sentiment_max range will be used. The difference between
503
+ # neighboring sentiment values needs not to be uniform, e.g. 1 and
504
+ # 2 may be similar whereas the difference between 2 and 3 may be
505
+ # large.
506
+ #
507
+ # Sample rows:
508
+ #
509
+ # TRAIN,"@freewrytin this is way too good for your product",2
510
+ # gs://folder/content.txt,3
511
+ # TEST,gs://folder/document.pdf
512
+ # VALIDATE,gs://folder/text_files.zip,2
513
+ # </section>
514
+ # </div>
515
+ #
516
+ #
517
+ #
518
+ # <h4>AutoML Tables</h4><div class="ui-datasection-main"><section
519
+ # class="selected">
520
+ #
521
+ # See [Preparing your training
522
+ # data](https://cloud.google.com/automl-tables/docs/prepare) for more
523
+ # information.
524
+ #
525
+ # You can use either
526
+ # {Google::Cloud::AutoML::V1::InputConfig#gcs_source gcs_source} or
527
+ # [bigquery_source][google.cloud.automl.v1.InputConfig.bigquery_source].
528
+ # All input is concatenated into a
529
+ # single
530
+ #
531
+ # [primary_table_spec_id][google.cloud.automl.v1.TablesDatasetMetadata.primary_table_spec_id]
532
+ #
533
+ # **For gcs_source:**
534
+ #
535
+ # CSV file(s), where the first row of the first file is the header,
536
+ # containing unique column names. If the first row of a subsequent
537
+ # file is the same as the header, then it is also treated as a
538
+ # header. All other rows contain values for the corresponding
539
+ # columns.
540
+ #
541
+ # Each .CSV file by itself must be 10GB or smaller, and their total
542
+ # size must be 100GB or smaller.
543
+ #
544
+ # First three sample rows of a CSV file:
545
+ # <pre>
546
+ # "Id","First Name","Last Name","Dob","Addresses"
547
+ #
548
+ # "1","John","Doe","1968-01-22","[\\{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},\\{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]"
549
+ #
550
+ # "2","Jane","Doe","1980-10-16","[\\{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},\\{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]}
551
+ # </pre>
552
+ # **For bigquery_source:**
553
+ #
554
+ # An URI of a BigQuery table. The user data size of the BigQuery
555
+ # table must be 100GB or smaller.
556
+ #
557
+ # An imported table must have between 2 and 1,000 columns, inclusive,
558
+ # and between 1000 and 100,000,000 rows, inclusive. There are at most 5
559
+ # import data running in parallel.
560
+ #
561
+ # </section>
562
+ # </div>
563
+ #
564
+ #
565
+ # **Input field definitions:**
566
+ #
567
+ # `ML_USE`
568
+ # : ("TRAIN" | "VALIDATE" | "TEST" | "UNASSIGNED")
569
+ # Describes how the given example (file) should be used for model
570
+ # training. "UNASSIGNED" can be used when user has no preference.
571
+ #
572
+ # `GCS_FILE_PATH`
573
+ # : The path to a file on Google Cloud Storage. For example,
574
+ # "gs://folder/image1.png".
575
+ #
576
+ # `LABEL`
577
+ # : A display name of an object on an image, video etc., e.g. "dog".
578
+ # Must be up to 32 characters long and can consist only of ASCII
579
+ # Latin letters A-Z and a-z, underscores(_), and ASCII digits 0-9.
580
+ # For each label an AnnotationSpec is created which display_name
581
+ # becomes the label; AnnotationSpecs are given back in predictions.
582
+ #
583
+ # `INSTANCE_ID`
584
+ # : A positive integer that identifies a specific instance of a
585
+ # labeled entity on an example. Used e.g. to track two cars on
586
+ # a video while being able to tell apart which one is which.
587
+ #
588
+ # `BOUNDING_BOX`
589
+ # : (`VERTEX,VERTEX,VERTEX,VERTEX` | `VERTEX,,,VERTEX,,`)
590
+ # A rectangle parallel to the frame of the example (image,
591
+ # video). If 4 vertices are given they are connected by edges
592
+ # in the order provided, if 2 are given they are recognized
593
+ # as diagonally opposite vertices of the rectangle.
594
+ #
595
+ # `VERTEX`
596
+ # : (`COORDINATE,COORDINATE`)
597
+ # First coordinate is horizontal (x), the second is vertical (y).
598
+ #
599
+ # `COORDINATE`
600
+ # : A float in 0 to 1 range, relative to total length of
601
+ # image or video in given dimension. For fractions the
602
+ # leading non-decimal 0 can be omitted (i.e. 0.3 = .3).
603
+ # Point 0,0 is in top left.
604
+ #
605
+ # `TIME_SEGMENT_START`
606
+ # : (`TIME_OFFSET`)
607
+ # Expresses a beginning, inclusive, of a time segment
608
+ # within an example that has a time dimension
609
+ # (e.g. video).
610
+ #
611
+ # `TIME_SEGMENT_END`
612
+ # : (`TIME_OFFSET`)
613
+ # Expresses an end, exclusive, of a time segment within
614
+ # n example that has a time dimension (e.g. video).
615
+ #
616
+ # `TIME_OFFSET`
617
+ # : A number of seconds as measured from the start of an
618
+ # example (e.g. video). Fractions are allowed, up to a
619
+ # microsecond precision. "inf" is allowed, and it means the end
620
+ # of the example.
621
+ #
622
+ # `TEXT_SNIPPET`
623
+ # : The content of a text snippet, UTF-8 encoded, enclosed within
624
+ # double quotes ("").
625
+ #
626
+ # `DOCUMENT`
627
+ # : A field that provides the textual content with document and the layout
628
+ # information.
629
+ #
630
+ #
631
+ # **Errors:**
632
+ #
633
+ # If any of the provided CSV files can't be parsed or if more than certain
634
+ # percent of CSV rows cannot be processed then the operation fails and
635
+ # nothing is imported. Regardless of overall success or failure the per-row
636
+ # failures, up to a certain count cap, is listed in
637
+ # Operation.metadata.partial_failures.
638
+ # @!attribute [rw] gcs_source
639
+ # @return [Google::Cloud::AutoML::V1::GcsSource]
640
+ # The Google Cloud Storage location for the input content.
641
+ # For {Google::Cloud::AutoML::V1::AutoML::Client#import_data AutoMl.ImportData}, `gcs_source` points to a CSV file with
642
+ # a structure described in {Google::Cloud::AutoML::V1::InputConfig InputConfig}.
643
+ # @!attribute [rw] params
644
+ # @return [Google::Protobuf::Map{String => String}]
645
+ # Additional domain-specific parameters describing the semantic of the
646
+ # imported data, any string must be up to 25000
647
+ # characters long.
648
+ #
649
+ # <h4>AutoML Tables</h4>
650
+ #
651
+ # `schema_inference_version`
652
+ # : (integer) This value must be supplied.
653
+ # The version of the
654
+ # algorithm to use for the initial inference of the
655
+ # column data types of the imported table. Allowed values: "1".
656
+ class InputConfig
657
+ include Google::Protobuf::MessageExts
658
+ extend Google::Protobuf::MessageExts::ClassMethods
659
+
660
+ # @!attribute [rw] key
661
+ # @return [String]
662
+ # @!attribute [rw] value
663
+ # @return [String]
664
+ class ParamsEntry
665
+ include Google::Protobuf::MessageExts
666
+ extend Google::Protobuf::MessageExts::ClassMethods
667
+ end
668
+ end
669
+
670
+ # Input configuration for BatchPredict Action.
671
+ #
672
+ # The format of input depends on the ML problem of the model used for
673
+ # prediction. As input source the
674
+ # {Google::Cloud::AutoML::V1::InputConfig#gcs_source gcs_source}
675
+ # is expected, unless specified otherwise.
676
+ #
677
+ # The formats are represented in EBNF with commas being literal and with
678
+ # non-terminal symbols defined near the end of this comment. The formats
679
+ # are:
680
+ #
681
+ # <h4>AutoML Vision</h4>
682
+ # <div class="ds-selector-tabs"><section><h5>Classification</h5>
683
+ #
684
+ # One or more CSV files where each line is a single column:
685
+ #
686
+ # GCS_FILE_PATH
687
+ #
688
+ # The Google Cloud Storage location of an image of up to
689
+ # 30MB in size. Supported extensions: .JPEG, .GIF, .PNG.
690
+ # This path is treated as the ID in the batch predict output.
691
+ #
692
+ # Sample rows:
693
+ #
694
+ # gs://folder/image1.jpeg
695
+ # gs://folder/image2.gif
696
+ # gs://folder/image3.png
697
+ #
698
+ # </section><section><h5>Object Detection</h5>
699
+ #
700
+ # One or more CSV files where each line is a single column:
701
+ #
702
+ # GCS_FILE_PATH
703
+ #
704
+ # The Google Cloud Storage location of an image of up to
705
+ # 30MB in size. Supported extensions: .JPEG, .GIF, .PNG.
706
+ # This path is treated as the ID in the batch predict output.
707
+ #
708
+ # Sample rows:
709
+ #
710
+ # gs://folder/image1.jpeg
711
+ # gs://folder/image2.gif
712
+ # gs://folder/image3.png
713
+ # </section>
714
+ # </div>
715
+ #
716
+ # <h4>AutoML Video Intelligence</h4>
717
+ # <div class="ds-selector-tabs"><section><h5>Classification</h5>
718
+ #
719
+ # One or more CSV files where each line is a single column:
720
+ #
721
+ # GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END
722
+ #
723
+ # `GCS_FILE_PATH` is the Google Cloud Storage location of video up to 50GB in
724
+ # size and up to 3h in duration duration.
725
+ # Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
726
+ #
727
+ # `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the
728
+ # length of the video, and the end time must be after the start time.
729
+ #
730
+ # Sample rows:
731
+ #
732
+ # gs://folder/video1.mp4,10,40
733
+ # gs://folder/video1.mp4,20,60
734
+ # gs://folder/vid2.mov,0,inf
735
+ #
736
+ # </section><section><h5>Object Tracking</h5>
737
+ #
738
+ # One or more CSV files where each line is a single column:
739
+ #
740
+ # GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END
741
+ #
742
+ # `GCS_FILE_PATH` is the Google Cloud Storage location of video up to 50GB in
743
+ # size and up to 3h in duration duration.
744
+ # Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
745
+ #
746
+ # `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the
747
+ # length of the video, and the end time must be after the start time.
748
+ #
749
+ # Sample rows:
750
+ #
751
+ # gs://folder/video1.mp4,10,40
752
+ # gs://folder/video1.mp4,20,60
753
+ # gs://folder/vid2.mov,0,inf
754
+ # </section>
755
+ # </div>
756
+ #
757
+ # <h4>AutoML Natural Language</h4>
758
+ # <div class="ds-selector-tabs"><section><h5>Classification</h5>
759
+ #
760
+ # One or more CSV files where each line is a single column:
761
+ #
762
+ # GCS_FILE_PATH
763
+ #
764
+ # `GCS_FILE_PATH` is the Google Cloud Storage location of a text file.
765
+ # Supported file extensions: .TXT, .PDF, .TIF, .TIFF
766
+ #
767
+ # Text files can be no larger than 10MB in size.
768
+ #
769
+ # Sample rows:
770
+ #
771
+ # gs://folder/text1.txt
772
+ # gs://folder/text2.pdf
773
+ # gs://folder/text3.tif
774
+ #
775
+ # </section><section><h5>Sentiment Analysis</h5>
776
+ # One or more CSV files where each line is a single column:
777
+ #
778
+ # GCS_FILE_PATH
779
+ #
780
+ # `GCS_FILE_PATH` is the Google Cloud Storage location of a text file.
781
+ # Supported file extensions: .TXT, .PDF, .TIF, .TIFF
782
+ #
783
+ # Text files can be no larger than 128kB in size.
784
+ #
785
+ # Sample rows:
786
+ #
787
+ # gs://folder/text1.txt
788
+ # gs://folder/text2.pdf
789
+ # gs://folder/text3.tif
790
+ #
791
+ # </section><section><h5>Entity Extraction</h5>
792
+ #
793
+ # One or more JSONL (JSON Lines) files that either provide inline text or
794
+ # documents. You can only use one format, either inline text or documents,
795
+ # for a single call to [AutoMl.BatchPredict].
796
+ #
797
+ # Each JSONL file contains a per line a proto that
798
+ # wraps a temporary user-assigned TextSnippet ID (string up to 2000
799
+ # characters long) called "id", a TextSnippet proto (in
800
+ # JSON representation) and zero or more TextFeature protos. Any given
801
+ # text snippet content must have 30,000 characters or less, and also
802
+ # be UTF-8 NFC encoded (ASCII already is). The IDs provided should be
803
+ # unique.
804
+ #
805
+ # Each document JSONL file contains, per line, a proto that wraps a Document
806
+ # proto with `input_config` set. Each document cannot exceed 2MB in size.
807
+ #
808
+ # Supported document extensions: .PDF, .TIF, .TIFF
809
+ #
810
+ # Each JSONL file must not exceed 100MB in size, and no more than 20
811
+ # JSONL files may be passed.
812
+ #
813
+ # Sample inline JSONL file (Shown with artificial line
814
+ # breaks. Actual line breaks are denoted by "\n".):
815
+ #
816
+ # {
817
+ # "id": "my_first_id",
818
+ # "text_snippet": { "content": "dog car cat"},
819
+ # "text_features": [
820
+ # {
821
+ # "text_segment": \\{"start_offset": 4, "end_offset": 6},
822
+ # "structural_type": PARAGRAPH,
823
+ # "bounding_poly": {
824
+ # "normalized_vertices": [
825
+ # \\{"x": 0.1, "y": 0.1},
826
+ # \\{"x": 0.1, "y": 0.3},
827
+ # \\{"x": 0.3, "y": 0.3},
828
+ # \\{"x": 0.3, "y": 0.1},
829
+ # ]
830
+ # },
831
+ # }
832
+ # ],
833
+ # }\n
834
+ # {
835
+ # "id": "2",
836
+ # "text_snippet": {
837
+ # "content": "Extended sample content",
838
+ # "mime_type": "text/plain"
839
+ # }
840
+ # }
841
+ #
842
+ # Sample document JSONL file (Shown with artificial line
843
+ # breaks. Actual line breaks are denoted by "\n".):
844
+ #
845
+ # {
846
+ # "document": {
847
+ # "input_config": {
848
+ # "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ]
849
+ # }
850
+ # }
851
+ # }
852
+ # }\n
853
+ # {
854
+ # "document": {
855
+ # "input_config": {
856
+ # "gcs_source": { "input_uris": [ "gs://folder/document2.tif" ]
857
+ # }
858
+ # }
859
+ # }
860
+ # }
861
+ # </section>
862
+ # </div>
863
+ #
864
+ # <h4>AutoML Tables</h4><div class="ui-datasection-main"><section
865
+ # class="selected">
866
+ #
867
+ # See [Preparing your training
868
+ # data](https://cloud.google.com/automl-tables/docs/predict-batch) for more
869
+ # information.
870
+ #
871
+ # You can use either
872
+ # {Google::Cloud::AutoML::V1::BatchPredictInputConfig#gcs_source gcs_source}
873
+ # or
874
+ # [bigquery_source][BatchPredictInputConfig.bigquery_source].
875
+ #
876
+ # **For gcs_source:**
877
+ #
878
+ # CSV file(s), each by itself 10GB or smaller and total size must be
879
+ # 100GB or smaller, where first file must have a header containing
880
+ # column names. If the first row of a subsequent file is the same as
881
+ # the header, then it is also treated as a header. All other rows
882
+ # contain values for the corresponding columns.
883
+ #
884
+ # The column names must contain the model's
885
+ #
886
+ # [input_feature_column_specs'][google.cloud.automl.v1.TablesModelMetadata.input_feature_column_specs]
887
+ # [display_name-s][google.cloud.automl.v1.ColumnSpec.display_name]
888
+ # (order doesn't matter). The columns corresponding to the model's
889
+ # input feature column specs must contain values compatible with the
890
+ # column spec's data types. Prediction on all the rows, i.e. the CSV
891
+ # lines, will be attempted.
892
+ #
893
+ #
894
+ # Sample rows from a CSV file:
895
+ # <pre>
896
+ # "First Name","Last Name","Dob","Addresses"
897
+ #
898
+ # "John","Doe","1968-01-22","[\\{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},\\{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]"
899
+ #
900
+ # "Jane","Doe","1980-10-16","[\\{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},\\{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]}
901
+ # </pre>
902
+ # **For bigquery_source:**
903
+ #
904
+ # The URI of a BigQuery table. The user data size of the BigQuery
905
+ # table must be 100GB or smaller.
906
+ #
907
+ # The column names must contain the model's
908
+ #
909
+ # [input_feature_column_specs'][google.cloud.automl.v1.TablesModelMetadata.input_feature_column_specs]
910
+ # [display_name-s][google.cloud.automl.v1.ColumnSpec.display_name]
911
+ # (order doesn't matter). The columns corresponding to the model's
912
+ # input feature column specs must contain values compatible with the
913
+ # column spec's data types. Prediction on all the rows of the table
914
+ # will be attempted.
915
+ # </section>
916
+ # </div>
917
+ #
918
+ # **Input field definitions:**
919
+ #
920
+ # `GCS_FILE_PATH`
921
+ # : The path to a file on Google Cloud Storage. For example,
922
+ # "gs://folder/video.avi".
923
+ #
924
+ # `TIME_SEGMENT_START`
925
+ # : (`TIME_OFFSET`)
926
+ # Expresses a beginning, inclusive, of a time segment
927
+ # within an example that has a time dimension
928
+ # (e.g. video).
929
+ #
930
+ # `TIME_SEGMENT_END`
931
+ # : (`TIME_OFFSET`)
932
+ # Expresses an end, exclusive, of a time segment within
933
+ # n example that has a time dimension (e.g. video).
934
+ #
935
+ # `TIME_OFFSET`
936
+ # : A number of seconds as measured from the start of an
937
+ # example (e.g. video). Fractions are allowed, up to a
938
+ # microsecond precision. "inf" is allowed, and it means the end
939
+ # of the example.
940
+ #
941
+ # **Errors:**
942
+ #
943
+ # If any of the provided CSV files can't be parsed or if more than certain
944
+ # percent of CSV rows cannot be processed then the operation fails and
945
+ # prediction does not happen. Regardless of overall success or failure the
946
+ # per-row failures, up to a certain count cap, will be listed in
947
+ # Operation.metadata.partial_failures.
948
+ # @!attribute [rw] gcs_source
949
+ # @return [Google::Cloud::AutoML::V1::GcsSource]
950
+ # Required. The Google Cloud Storage location for the input content.
951
+ class BatchPredictInputConfig
952
+ include Google::Protobuf::MessageExts
953
+ extend Google::Protobuf::MessageExts::ClassMethods
954
+ end
955
+
956
+ # Input configuration of a {Google::Cloud::AutoML::V1::Document Document}.
957
+ # @!attribute [rw] gcs_source
958
+ # @return [Google::Cloud::AutoML::V1::GcsSource]
959
+ # The Google Cloud Storage location of the document file. Only a single path
960
+ # should be given.
961
+ #
962
+ # Max supported size: 512MB.
963
+ #
964
+ # Supported extensions: .PDF.
965
+ class DocumentInputConfig
966
+ include Google::Protobuf::MessageExts
967
+ extend Google::Protobuf::MessageExts::ClassMethods
968
+ end
969
+
970
+ # * For Translation:
971
+ # CSV file `translation.csv`, with each line in format:
972
+ # ML_USE,GCS_FILE_PATH
973
+ # GCS_FILE_PATH leads to a .TSV file which describes examples that have
974
+ # given ML_USE, using the following row format per line:
975
+ # TEXT_SNIPPET (in source language) \t TEXT_SNIPPET (in target
976
+ # language)
977
+ #
978
+ # * For Tables:
979
+ # Output depends on whether the dataset was imported from Google Cloud
980
+ # Storage or BigQuery.
981
+ # Google Cloud Storage case:
982
+ #
983
+ # [gcs_destination][google.cloud.automl.v1p1beta.OutputConfig.gcs_destination]
984
+ # must be set. Exported are CSV file(s) `tables_1.csv`,
985
+ # `tables_2.csv`,...,`tables_N.csv` with each having as header line
986
+ # the table's column names, and all other lines contain values for
987
+ # the header columns.
988
+ # BigQuery case:
989
+ #
990
+ # [bigquery_destination][google.cloud.automl.v1p1beta.OutputConfig.bigquery_destination]
991
+ # pointing to a BigQuery project must be set. In the given project a
992
+ # new dataset will be created with name
993
+ #
994
+ # `export_data_<automl-dataset-display-name>_<timestamp-of-export-call>`
995
+ # where <automl-dataset-display-name> will be made
996
+ # BigQuery-dataset-name compatible (e.g. most special characters will
997
+ # become underscores), and timestamp will be in
998
+ # YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In that
999
+ # dataset a new table called `primary_table` will be created, and
1000
+ # filled with precisely the same data as this obtained on import.
1001
+ # @!attribute [rw] gcs_destination
1002
+ # @return [Google::Cloud::AutoML::V1::GcsDestination]
1003
+ # Required. The Google Cloud Storage location where the output is to be written to.
1004
+ # For Image Object Detection, Text Extraction, Video Classification and
1005
+ # Tables, in the given directory a new directory will be created with name:
1006
+ # export_data-<dataset-display-name>-<timestamp-of-export-call> where
1007
+ # timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. All export
1008
+ # output will be written into that directory.
1009
+ class OutputConfig
1010
+ include Google::Protobuf::MessageExts
1011
+ extend Google::Protobuf::MessageExts::ClassMethods
1012
+ end
1013
+
1014
+ # Output configuration for BatchPredict Action.
1015
+ #
1016
+ # As destination the
1017
+ #
1018
+ # {Google::Cloud::AutoML::V1::BatchPredictOutputConfig#gcs_destination gcs_destination}
1019
+ # must be set unless specified otherwise for a domain. If gcs_destination is
1020
+ # set then in the given directory a new directory is created. Its name
1021
+ # will be
1022
+ # "prediction-<model-display-name>-<timestamp-of-prediction-call>",
1023
+ # where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. The contents
1024
+ # of it depends on the ML problem the predictions are made for.
1025
+ #
1026
+ # * For Image Classification:
1027
+ # In the created directory files `image_classification_1.jsonl`,
1028
+ # `image_classification_2.jsonl`,...,`image_classification_N.jsonl`
1029
+ # will be created, where N may be 1, and depends on the
1030
+ # total number of the successfully predicted images and annotations.
1031
+ # A single image will be listed only once with all its annotations,
1032
+ # and its annotations will never be split across files.
1033
+ # Each .JSONL file will contain, per line, a JSON representation of a
1034
+ # proto that wraps image's "ID" : "<id_value>" followed by a list of
1035
+ # zero or more AnnotationPayload protos (called annotations), which
1036
+ # have classification detail populated.
1037
+ # If prediction for any image failed (partially or completely), then an
1038
+ # additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
1039
+ # files will be created (N depends on total number of failed
1040
+ # predictions). These files will have a JSON representation of a proto
1041
+ # that wraps the same "ID" : "<id_value>" but here followed by
1042
+ # exactly one
1043
+ #
1044
+ # [`google.rpc.Status`](https:
1045
+ # //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1046
+ # containing only `code` and `message`fields.
1047
+ #
1048
+ # * For Image Object Detection:
1049
+ # In the created directory files `image_object_detection_1.jsonl`,
1050
+ # `image_object_detection_2.jsonl`,...,`image_object_detection_N.jsonl`
1051
+ # will be created, where N may be 1, and depends on the
1052
+ # total number of the successfully predicted images and annotations.
1053
+ # Each .JSONL file will contain, per line, a JSON representation of a
1054
+ # proto that wraps image's "ID" : "<id_value>" followed by a list of
1055
+ # zero or more AnnotationPayload protos (called annotations), which
1056
+ # have image_object_detection detail populated. A single image will
1057
+ # be listed only once with all its annotations, and its annotations
1058
+ # will never be split across files.
1059
+ # If prediction for any image failed (partially or completely), then
1060
+ # additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
1061
+ # files will be created (N depends on total number of failed
1062
+ # predictions). These files will have a JSON representation of a proto
1063
+ # that wraps the same "ID" : "<id_value>" but here followed by
1064
+ # exactly one
1065
+ #
1066
+ # [`google.rpc.Status`](https:
1067
+ # //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1068
+ # containing only `code` and `message`fields.
1069
+ # * For Video Classification:
1070
+ # In the created directory a video_classification.csv file, and a .JSON
1071
+ # file per each video classification requested in the input (i.e. each
1072
+ # line in given CSV(s)), will be created.
1073
+ #
1074
+ # The format of video_classification.csv is:
1075
+ #
1076
+ # GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END,JSON_FILE_NAME,STATUS
1077
+ # where:
1078
+ # GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END = matches 1 to 1
1079
+ # the prediction input lines (i.e. video_classification.csv has
1080
+ # precisely the same number of lines as the prediction input had.)
1081
+ # JSON_FILE_NAME = Name of .JSON file in the output directory, which
1082
+ # contains prediction responses for the video time segment.
1083
+ # STATUS = "OK" if prediction completed successfully, or an error code
1084
+ # with message otherwise. If STATUS is not "OK" then the .JSON file
1085
+ # for that line may not exist or be empty.
1086
+ #
1087
+ # Each .JSON file, assuming STATUS is "OK", will contain a list of
1088
+ # AnnotationPayload protos in JSON format, which are the predictions
1089
+ # for the video time segment the file is assigned to in the
1090
+ # video_classification.csv. All AnnotationPayload protos will have
1091
+ # video_classification field set, and will be sorted by
1092
+ # video_classification.type field (note that the returned types are
1093
+ # governed by `classifaction_types` parameter in
1094
+ # [PredictService.BatchPredictRequest.params][]).
1095
+ #
1096
+ # * For Video Object Tracking:
1097
+ # In the created directory a video_object_tracking.csv file will be
1098
+ # created, and multiple files video_object_trackinng_1.json,
1099
+ # video_object_trackinng_2.json,..., video_object_trackinng_N.json,
1100
+ # where N is the number of requests in the input (i.e. the number of
1101
+ # lines in given CSV(s)).
1102
+ #
1103
+ # The format of video_object_tracking.csv is:
1104
+ #
1105
+ # GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END,JSON_FILE_NAME,STATUS
1106
+ # where:
1107
+ # GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END = matches 1 to 1
1108
+ # the prediction input lines (i.e. video_object_tracking.csv has
1109
+ # precisely the same number of lines as the prediction input had.)
1110
+ # JSON_FILE_NAME = Name of .JSON file in the output directory, which
1111
+ # contains prediction responses for the video time segment.
1112
+ # STATUS = "OK" if prediction completed successfully, or an error
1113
+ # code with message otherwise. If STATUS is not "OK" then the .JSON
1114
+ # file for that line may not exist or be empty.
1115
+ #
1116
+ # Each .JSON file, assuming STATUS is "OK", will contain a list of
1117
+ # AnnotationPayload protos in JSON format, which are the predictions
1118
+ # for each frame of the video time segment the file is assigned to in
1119
+ # video_object_tracking.csv. All AnnotationPayload protos will have
1120
+ # video_object_tracking field set.
1121
+ # * For Text Classification:
1122
+ # In the created directory files `text_classification_1.jsonl`,
1123
+ # `text_classification_2.jsonl`,...,`text_classification_N.jsonl`
1124
+ # will be created, where N may be 1, and depends on the
1125
+ # total number of inputs and annotations found.
1126
+ #
1127
+ # Each .JSONL file will contain, per line, a JSON representation of a
1128
+ # proto that wraps input text file (or document) in
1129
+ # the text snippet (or document) proto and a list of
1130
+ # zero or more AnnotationPayload protos (called annotations), which
1131
+ # have classification detail populated. A single text file (or
1132
+ # document) will be listed only once with all its annotations, and its
1133
+ # annotations will never be split across files.
1134
+ #
1135
+ # If prediction for any input file (or document) failed (partially or
1136
+ # completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
1137
+ # `errors_N.jsonl` files will be created (N depends on total number of
1138
+ # failed predictions). These files will have a JSON representation of a
1139
+ # proto that wraps input file followed by exactly one
1140
+ #
1141
+ # [`google.rpc.Status`](https:
1142
+ # //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1143
+ # containing only `code` and `message`.
1144
+ #
1145
+ # * For Text Sentiment:
1146
+ # In the created directory files `text_sentiment_1.jsonl`,
1147
+ # `text_sentiment_2.jsonl`,...,`text_sentiment_N.jsonl`
1148
+ # will be created, where N may be 1, and depends on the
1149
+ # total number of inputs and annotations found.
1150
+ #
1151
+ # Each .JSONL file will contain, per line, a JSON representation of a
1152
+ # proto that wraps input text file (or document) in
1153
+ # the text snippet (or document) proto and a list of
1154
+ # zero or more AnnotationPayload protos (called annotations), which
1155
+ # have text_sentiment detail populated. A single text file (or
1156
+ # document) will be listed only once with all its annotations, and its
1157
+ # annotations will never be split across files.
1158
+ #
1159
+ # If prediction for any input file (or document) failed (partially or
1160
+ # completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
1161
+ # `errors_N.jsonl` files will be created (N depends on total number of
1162
+ # failed predictions). These files will have a JSON representation of a
1163
+ # proto that wraps input file followed by exactly one
1164
+ #
1165
+ # [`google.rpc.Status`](https:
1166
+ # //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1167
+ # containing only `code` and `message`.
1168
+ #
1169
+ # * For Text Extraction:
1170
+ # In the created directory files `text_extraction_1.jsonl`,
1171
+ # `text_extraction_2.jsonl`,...,`text_extraction_N.jsonl`
1172
+ # will be created, where N may be 1, and depends on the
1173
+ # total number of inputs and annotations found.
1174
+ # The contents of these .JSONL file(s) depend on whether the input
1175
+ # used inline text, or documents.
1176
+ # If input was inline, then each .JSONL file will contain, per line,
1177
+ # a JSON representation of a proto that wraps given in request text
1178
+ # snippet's "id" (if specified), followed by input text snippet,
1179
+ # and a list of zero or more
1180
+ # AnnotationPayload protos (called annotations), which have
1181
+ # text_extraction detail populated. A single text snippet will be
1182
+ # listed only once with all its annotations, and its annotations will
1183
+ # never be split across files.
1184
+ # If input used documents, then each .JSONL file will contain, per
1185
+ # line, a JSON representation of a proto that wraps given in request
1186
+ # document proto, followed by its OCR-ed representation in the form
1187
+ # of a text snippet, finally followed by a list of zero or more
1188
+ # AnnotationPayload protos (called annotations), which have
1189
+ # text_extraction detail populated and refer, via their indices, to
1190
+ # the OCR-ed text snippet. A single document (and its text snippet)
1191
+ # will be listed only once with all its annotations, and its
1192
+ # annotations will never be split across files.
1193
+ # If prediction for any text snippet failed (partially or completely),
1194
+ # then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
1195
+ # `errors_N.jsonl` files will be created (N depends on total number of
1196
+ # failed predictions). These files will have a JSON representation of a
1197
+ # proto that wraps either the "id" : "<id_value>" (in case of inline)
1198
+ # or the document proto (in case of document) but here followed by
1199
+ # exactly one
1200
+ #
1201
+ # [`google.rpc.Status`](https:
1202
+ # //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1203
+ # containing only `code` and `message`.
1204
+ #
1205
+ # * For Tables:
1206
+ # Output depends on whether
1207
+ #
1208
+ # [gcs_destination][google.cloud.automl.v1p1beta.BatchPredictOutputConfig.gcs_destination]
1209
+ # or
1210
+ #
1211
+ # [bigquery_destination][google.cloud.automl.v1p1beta.BatchPredictOutputConfig.bigquery_destination]
1212
+ # is set (either is allowed).
1213
+ # Google Cloud Storage case:
1214
+ # In the created directory files `tables_1.csv`, `tables_2.csv`,...,
1215
+ # `tables_N.csv` will be created, where N may be 1, and depends on
1216
+ # the total number of the successfully predicted rows.
1217
+ # For all CLASSIFICATION
1218
+ #
1219
+ # [prediction_type-s][google.cloud.automl.v1p1beta.TablesModelMetadata.prediction_type]:
1220
+ # Each .csv file will contain a header, listing all columns'
1221
+ #
1222
+ # [display_name-s][google.cloud.automl.v1p1beta.ColumnSpec.display_name]
1223
+ # given on input followed by M target column names in the format of
1224
+ #
1225
+ # "<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
1226
+ #
1227
+ # [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>_<target
1228
+ # value>_score" where M is the number of distinct target values,
1229
+ # i.e. number of distinct values in the target column of the table
1230
+ # used to train the model. Subsequent lines will contain the
1231
+ # respective values of successfully predicted rows, with the last,
1232
+ # i.e. the target, columns having the corresponding prediction
1233
+ # [scores][google.cloud.automl.v1p1beta.TablesAnnotation.score].
1234
+ # For REGRESSION and FORECASTING
1235
+ #
1236
+ # [prediction_type-s][google.cloud.automl.v1p1beta.TablesModelMetadata.prediction_type]:
1237
+ # Each .csv file will contain a header, listing all columns'
1238
+ # [display_name-s][google.cloud.automl.v1p1beta.display_name]
1239
+ # given on input followed by the predicted target column with name
1240
+ # in the format of
1241
+ #
1242
+ # "predicted_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
1243
+ #
1244
+ # [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>"
1245
+ # Subsequent lines will contain the respective values of
1246
+ # successfully predicted rows, with the last, i.e. the target,
1247
+ # column having the predicted target value.
1248
+ # If prediction for any rows failed, then an additional
1249
+ # `errors_1.csv`, `errors_2.csv`,..., `errors_N.csv` will be
1250
+ # created (N depends on total number of failed rows). These files
1251
+ # will have analogous format as `tables_*.csv`, but always with a
1252
+ # single target column having
1253
+ #
1254
+ # [`google.rpc.Status`](https:
1255
+ # //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1256
+ # represented as a JSON string, and containing only `code` and
1257
+ # `message`.
1258
+ # BigQuery case:
1259
+ #
1260
+ # [bigquery_destination][google.cloud.automl.v1p1beta.OutputConfig.bigquery_destination]
1261
+ # pointing to a BigQuery project must be set. In the given project a
1262
+ # new dataset will be created with name
1263
+ # `prediction_<model-display-name>_<timestamp-of-prediction-call>`
1264
+ # where <model-display-name> will be made
1265
+ # BigQuery-dataset-name compatible (e.g. most special characters will
1266
+ # become underscores), and timestamp will be in
1267
+ # YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset
1268
+ # two tables will be created, `predictions`, and `errors`.
1269
+ # The `predictions` table's column names will be the input columns'
1270
+ #
1271
+ # [display_name-s][google.cloud.automl.v1p1beta.ColumnSpec.display_name]
1272
+ # followed by the target column with name in the format of
1273
+ #
1274
+ # "predicted_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
1275
+ #
1276
+ # [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>"
1277
+ # The input feature columns will contain the respective values of
1278
+ # successfully predicted rows, with the target column having an
1279
+ # ARRAY of
1280
+ #
1281
+ # [AnnotationPayloads][google.cloud.automl.v1p1beta.AnnotationPayload],
1282
+ # represented as STRUCT-s, containing
1283
+ # [TablesAnnotation][google.cloud.automl.v1p1beta.TablesAnnotation].
1284
+ # The `errors` table contains rows for which the prediction has
1285
+ # failed, it has analogous input columns while the target column name
1286
+ # is in the format of
1287
+ #
1288
+ # "errors_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
1289
+ #
1290
+ # [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>",
1291
+ # and as a value has
1292
+ #
1293
+ # [`google.rpc.Status`](https:
1294
+ # //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1295
+ # represented as a STRUCT, and containing only `code` and `message`.
1296
+ # @!attribute [rw] gcs_destination
1297
+ # @return [Google::Cloud::AutoML::V1::GcsDestination]
1298
+ # Required. The Google Cloud Storage location of the directory where the output is to
1299
+ # be written to.
1300
+ class BatchPredictOutputConfig
1301
+ include Google::Protobuf::MessageExts
1302
+ extend Google::Protobuf::MessageExts::ClassMethods
1303
+ end
1304
+
1305
+ # Output configuration for ModelExport Action.
1306
+ # @!attribute [rw] gcs_destination
1307
+ # @return [Google::Cloud::AutoML::V1::GcsDestination]
1308
+ # Required. The Google Cloud Storage location where the model is to be written to.
1309
+ # This location may only be set for the following model formats:
1310
+ # "tflite", "edgetpu_tflite", "tf_saved_model", "tf_js", "core_ml".
1311
+ #
1312
+ # Under the directory given as the destination a new one with name
1313
+ # "model-export-<model-display-name>-<timestamp-of-export-call>",
1314
+ # where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format,
1315
+ # will be created. Inside the model and any of its supporting files
1316
+ # will be written.
1317
+ # @!attribute [rw] model_format
1318
+ # @return [String]
1319
+ # The format in which the model must be exported. The available, and default,
1320
+ # formats depend on the problem and model type (if given problem and type
1321
+ # combination doesn't have a format listed, it means its models are not
1322
+ # exportable):
1323
+ #
1324
+ # * For Image Classification mobile-low-latency-1, mobile-versatile-1,
1325
+ # mobile-high-accuracy-1:
1326
+ # "tflite" (default), "edgetpu_tflite", "tf_saved_model", "tf_js",
1327
+ # "docker".
1328
+ #
1329
+ # * For Image Classification mobile-core-ml-low-latency-1,
1330
+ # mobile-core-ml-versatile-1, mobile-core-ml-high-accuracy-1:
1331
+ # "core_ml" (default).
1332
+ #
1333
+ # * For Image Object Detection mobile-low-latency-1, mobile-versatile-1,
1334
+ # mobile-high-accuracy-1:
1335
+ # "tflite", "tf_saved_model", "tf_js".
1336
+ # Formats description:
1337
+ #
1338
+ # * tflite - Used for Android mobile devices.
1339
+ # * edgetpu_tflite - Used for [Edge TPU](https://cloud.google.com/edge-tpu/)
1340
+ # devices.
1341
+ # * tf_saved_model - A tensorflow model in SavedModel format.
1342
+ # * tf_js - A [TensorFlow.js](https://www.tensorflow.org/js) model that can
1343
+ # be used in the browser and in Node.js using JavaScript.
1344
+ # * docker - Used for Docker containers. Use the params field to customize
1345
+ # the container. The container is verified to work correctly on
1346
+ # ubuntu 16.04 operating system. See more at
1347
+ # [containers
1348
+ #
1349
+ # quickstart](https:
1350
+ # //cloud.google.com/vision/automl/docs/containers-gcs-quickstart)
1351
+ # * core_ml - Used for iOS mobile devices.
1352
+ # @!attribute [rw] params
1353
+ # @return [Google::Protobuf::Map{String => String}]
1354
+ # Additional model-type and format specific parameters describing the
1355
+ # requirements for the to be exported model files, any string must be up to
1356
+ # 25000 characters long.
1357
+ #
1358
+ # * For `docker` format:
1359
+ # `cpu_architecture` - (string) "x86_64" (default).
1360
+ # `gpu_architecture` - (string) "none" (default), "nvidia".
1361
+ class ModelExportOutputConfig
1362
+ include Google::Protobuf::MessageExts
1363
+ extend Google::Protobuf::MessageExts::ClassMethods
1364
+
1365
+ # @!attribute [rw] key
1366
+ # @return [String]
1367
+ # @!attribute [rw] value
1368
+ # @return [String]
1369
+ class ParamsEntry
1370
+ include Google::Protobuf::MessageExts
1371
+ extend Google::Protobuf::MessageExts::ClassMethods
1372
+ end
1373
+ end
1374
+
1375
+ # The Google Cloud Storage location for the input content.
1376
+ # @!attribute [rw] input_uris
1377
+ # @return [Array<String>]
1378
+ # Required. Google Cloud Storage URIs to input files, up to 2000
1379
+ # characters long. Accepted forms:
1380
+ # * Full object path, e.g. gs://bucket/directory/object.csv
1381
+ class GcsSource
1382
+ include Google::Protobuf::MessageExts
1383
+ extend Google::Protobuf::MessageExts::ClassMethods
1384
+ end
1385
+
1386
+ # The Google Cloud Storage location where the output is to be written to.
1387
+ # @!attribute [rw] output_uri_prefix
1388
+ # @return [String]
1389
+ # Required. Google Cloud Storage URI to output directory, up to 2000
1390
+ # characters long.
1391
+ # Accepted forms:
1392
+ # * Prefix path: gs://bucket/directory
1393
+ # The requesting user must have write permission to the bucket.
1394
+ # The directory is created if it doesn't exist.
1395
+ class GcsDestination
1396
+ include Google::Protobuf::MessageExts
1397
+ extend Google::Protobuf::MessageExts::ClassMethods
1398
+ end
1399
+ end
1400
+ end
1401
+ end
1402
+ end