google-cloud-automl 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/lib/google/cloud/automl.rb +2 -2
- data/lib/google/cloud/automl/v1beta1.rb +2 -2
- data/lib/google/cloud/automl/v1beta1/automl_client.rb +4 -20
- data/lib/google/cloud/automl/v1beta1/classification_pb.rb +1 -0
- data/lib/google/cloud/automl/v1beta1/data_items_pb.rb +40 -1
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/classification.rb +9 -0
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/column_spec.rb +1 -6
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/data_items.rb +125 -9
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/data_types.rb +0 -8
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/detection.rb +4 -8
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/image.rb +10 -3
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/io.rb +257 -125
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/model.rb +4 -8
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/model_evaluation.rb +5 -9
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/prediction_service.rb +56 -6
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/service.rb +3 -18
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/table_spec.rb +4 -0
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/tables.rb +26 -30
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/text.rb +2 -4
- data/lib/google/cloud/automl/v1beta1/prediction_service_client.rb +40 -7
- data/lib/google/cloud/automl/v1beta1/prediction_service_pb.rb +3 -1
- data/lib/google/cloud/automl/v1beta1/prediction_service_services_pb.rb +10 -5
- data/lib/google/cloud/automl/v1beta1/service_services_pb.rb +1 -2
- data/lib/google/cloud/automl/v1beta1/table_spec_pb.rb +1 -0
- data/lib/google/cloud/automl/v1beta1/tables_pb.rb +0 -4
- data/lib/google/cloud/automl/version.rb +1 -1
- metadata +3 -3
@@ -115,9 +115,16 @@ module Google
|
|
115
115
|
# `train_cost` will be equal or less than this value. If further model
|
116
116
|
# training ceases to provide any improvements, it will stop without using
|
117
117
|
# full budget and the stop_reason will be `MODEL_CONVERGED`.
|
118
|
-
# Note, node_hour = actual_hour * number_of_nodes_invovled.
|
119
|
-
#
|
120
|
-
#
|
118
|
+
# Note, node_hour = actual_hour * number_of_nodes_invovled.
|
119
|
+
# For model type `cloud-high-accuracy-1`(default) and `cloud-low-latency-1`,
|
120
|
+
# the train budget must be between 20,000 and 2,000,000 milli node hours,
|
121
|
+
# inclusive. The default value is 216, 000 which represents one day in
|
122
|
+
# wall time.
|
123
|
+
# For model type `mobile-low-latency-1`, `mobile-versatile-1`,
|
124
|
+
# `mobile-high-accuracy-1`, `mobile-core-ml-low-latency-1`,
|
125
|
+
# `mobile-core-ml-versatile-1`, `mobile-core-ml-high-accuracy-1`, the train
|
126
|
+
# budget must be between 1,000 and 100,000 milli node hours, inclusive.
|
127
|
+
# The default value is 24, 000 which represents one day in wall time.
|
121
128
|
# @!attribute [rw] train_cost_milli_node_hours
|
122
129
|
# @return [Integer]
|
123
130
|
# Output only. The actual train cost of creating this model, expressed in
|
@@ -24,10 +24,10 @@ module Google
|
|
24
24
|
# {Google::Cloud::AutoML::V1beta1::InputConfig#gcs_source gcs_source}
|
25
25
|
# is expected, unless specified otherwise. Additionally any input .CSV file
|
26
26
|
# by itself must be 100MB or smaller, unless specified otherwise.
|
27
|
-
# If an "example" file (
|
27
|
+
# If an "example" file (that is, image, video etc.) with identical content
|
28
28
|
# (even if it had different GCS_FILE_PATH) is mentioned multiple times, then
|
29
29
|
# its label, bounding boxes etc. are appended. The same file should be always
|
30
|
-
# provided with the same ML_USE and GCS_FILE_PATH, if it is not then
|
30
|
+
# provided with the same ML_USE and GCS_FILE_PATH, if it is not, then
|
31
31
|
# these values are nondeterministically selected from the given ones.
|
32
32
|
#
|
33
33
|
# The formats are represented in EBNF with commas being literal and with
|
@@ -57,12 +57,16 @@ module Google
|
|
57
57
|
# BOUNDING_BOX-es per image are allowed (one BOUNDING_BOX is defined
|
58
58
|
# per line). If an image has not yet been labeled, then it should be
|
59
59
|
# mentioned just once with no LABEL and the ",,,,,,," in place of the
|
60
|
+
# BOUNDING_BOX. For images which are known to not contain any
|
61
|
+
# bounding boxes, they should be labelled explictly as
|
62
|
+
# "NEGATIVE_IMAGE", followed by ",,,,,,," in place of the
|
60
63
|
# BOUNDING_BOX.
|
61
|
-
#
|
64
|
+
# Sample rows:
|
62
65
|
# TRAIN,gs://folder/image1.png,car,0.1,0.1,,,0.3,0.3,,
|
63
66
|
# TRAIN,gs://folder/image1.png,bike,.7,.6,,,.8,.9,,
|
64
67
|
# UNASSIGNED,gs://folder/im2.png,car,0.1,0.1,0.2,0.1,0.2,0.3,0.1,0.3
|
65
68
|
# TEST,gs://folder/im3.png,,,,,,,,,
|
69
|
+
# TRAIN,gs://folder/im4.png,NEGATIVE_IMAGE,,,,,,,,,
|
66
70
|
#
|
67
71
|
# * For Video Classification:
|
68
72
|
# CSV file(s) with each line in format:
|
@@ -127,21 +131,26 @@ module Google
|
|
127
131
|
# * For Text Extraction:
|
128
132
|
# CSV file(s) with each line in format:
|
129
133
|
# ML_USE,GCS_FILE_PATH
|
130
|
-
# GCS_FILE_PATH leads to a .JSONL (
|
131
|
-
# imports text in-line or as documents.
|
134
|
+
# GCS_FILE_PATH leads to a .JSONL (that is, JSON Lines) file which
|
135
|
+
# either imports text in-line or as documents. Any given
|
136
|
+
# .JSONL file must be 100MB or smaller.
|
132
137
|
# The in-line .JSONL file contains, per line, a proto that wraps a
|
133
|
-
# TextSnippet proto (in json representation) followed by one or
|
134
|
-
#
|
135
|
-
# display_name and text_extraction detail populated.
|
136
|
-
#
|
137
|
-
#
|
138
|
-
#
|
139
|
-
#
|
140
|
-
#
|
141
|
-
#
|
142
|
-
#
|
143
|
-
#
|
144
|
-
#
|
138
|
+
# TextSnippet proto (in json representation) followed by one or more
|
139
|
+
# AnnotationPayload protos (called annotations), which have
|
140
|
+
# display_name and text_extraction detail populated. The given text
|
141
|
+
# is expected to be annotated exhaustively, for example, if you look
|
142
|
+
# for animals and text contains "dolphin" that is not labeled, then
|
143
|
+
# "dolphin" is assumed to not be an animal. Any given text snippet
|
144
|
+
# content must be 10KB or smaller, and also be UTF-8 NFC encoded
|
145
|
+
# (ASCII already is).
|
146
|
+
# The document .JSONL file contains, per line, a proto that wraps a
|
147
|
+
# Document proto. The Document proto must have either document_text
|
148
|
+
# or input_config set. In document_text case, the Document proto may
|
149
|
+
# also contain the spatial information of the document, including
|
150
|
+
# layout, document dimension and page number. In input_config case,
|
151
|
+
# only PDF documents are supported now, and each document may be up
|
152
|
+
# to 2MB large. Currently, annotations on documents cannot be
|
153
|
+
# specified at import.
|
145
154
|
# Three sample CSV rows:
|
146
155
|
# TRAIN,gs://folder/file1.jsonl
|
147
156
|
# VALIDATE,gs://folder/file2.jsonl
|
@@ -150,27 +159,61 @@ module Google
|
|
150
159
|
# with artificial line breaks, but the only actual line break is
|
151
160
|
# denoted by \n).:
|
152
161
|
# {
|
153
|
-
# "
|
154
|
-
# "content": "dog
|
155
|
-
#
|
162
|
+
# "document": {
|
163
|
+
# "document_text": {"content": "dog cat"}
|
164
|
+
# "layout": [
|
156
165
|
# {
|
157
|
-
# "
|
158
|
-
# "
|
159
|
-
# "
|
160
|
-
# }
|
166
|
+
# "text_segment": {
|
167
|
+
# "start_offset": 0,
|
168
|
+
# "end_offset": 3,
|
169
|
+
# },
|
170
|
+
# "page_number": 1,
|
171
|
+
# "bounding_poly": {
|
172
|
+
# "normalized_vertices": [
|
173
|
+
# {"x": 0.1, "y": 0.1},
|
174
|
+
# {"x": 0.1, "y": 0.3},
|
175
|
+
# {"x": 0.3, "y": 0.3},
|
176
|
+
# {"x": 0.3, "y": 0.1},
|
177
|
+
# ],
|
178
|
+
# },
|
179
|
+
# "text_segment_type": TOKEN,
|
161
180
|
# },
|
162
181
|
# {
|
163
|
-
# "
|
164
|
-
# "
|
165
|
-
# "
|
182
|
+
# "text_segment": {
|
183
|
+
# "start_offset": 4,
|
184
|
+
# "end_offset": 7,
|
185
|
+
# },
|
186
|
+
# "page_number": 1,
|
187
|
+
# "bounding_poly": {
|
188
|
+
# "normalized_vertices": [
|
189
|
+
# {"x": 0.4, "y": 0.1},
|
190
|
+
# {"x": 0.4, "y": 0.3},
|
191
|
+
# {"x": 0.8, "y": 0.3},
|
192
|
+
# {"x": 0.8, "y": 0.1},
|
193
|
+
# ],
|
194
|
+
# },
|
195
|
+
# "text_segment_type": TOKEN,
|
196
|
+
# }
|
197
|
+
#
|
198
|
+
# ],
|
199
|
+
# "document_dimensions": {
|
200
|
+
# "width": 8.27,
|
201
|
+
# "height": 11.69,
|
202
|
+
# "unit": INCH,
|
166
203
|
# }
|
204
|
+
# "page_count": 1,
|
167
205
|
# },
|
206
|
+
# "annotations": [
|
168
207
|
# {
|
169
208
|
# "display_name": "animal",
|
170
|
-
# "text_extraction": {
|
171
|
-
# "
|
172
|
-
# }
|
209
|
+
# "text_extraction": {"text_segment": {"start_offset": 0,
|
210
|
+
# "end_offset": 3}}
|
173
211
|
# },
|
212
|
+
# {
|
213
|
+
# "display_name": "animal",
|
214
|
+
# "text_extraction": {"text_segment": {"start_offset": 4,
|
215
|
+
# "end_offset": 7}}
|
216
|
+
# }
|
174
217
|
# ],
|
175
218
|
# }\n
|
176
219
|
# {
|
@@ -211,15 +254,15 @@ module Google
|
|
211
254
|
# TEXT_SNIPPET and GCS_FILE_PATH are distinguished by a pattern. If
|
212
255
|
# the column content is a valid gcs file path, i.e. prefixed by
|
213
256
|
# "gs://", it will be treated as a GCS_FILE_PATH, else if the content
|
214
|
-
# is enclosed within double quotes (""), it
|
215
|
-
#
|
216
|
-
# must lead to a .txt file with UTF-8 encoding,
|
217
|
-
# "gs://folder/content.txt", and the content in it
|
257
|
+
# is enclosed within double quotes (""), it is
|
258
|
+
# treated as a TEXT_SNIPPET. In the GCS_FILE_PATH case, the path
|
259
|
+
# must lead to a .txt file with UTF-8 encoding, for example,
|
260
|
+
# "gs://folder/content.txt", and the content in it is extracted
|
218
261
|
# as a text snippet. In TEXT_SNIPPET case, the column content
|
219
|
-
# excluding quotes
|
262
|
+
# excluding quotes is treated as to be imported text snippet. In
|
220
263
|
# both cases, the text snippet/file size must be within 128kB.
|
221
264
|
# Maximum 100 unique labels are allowed per CSV row.
|
222
|
-
#
|
265
|
+
# Sample rows:
|
223
266
|
# TRAIN,"They have bad food and very rude",RudeService,BadFood
|
224
267
|
# TRAIN,gs://folder/content.txt,SlowService
|
225
268
|
# TEST,"Typically always bad service there.",RudeService
|
@@ -229,18 +272,18 @@ module Google
|
|
229
272
|
# CSV file(s) with each line in format:
|
230
273
|
# ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),SENTIMENT
|
231
274
|
# TEXT_SNIPPET and GCS_FILE_PATH are distinguished by a pattern. If
|
232
|
-
# the column content is a valid gcs file path,
|
233
|
-
# "gs://", it
|
234
|
-
#
|
235
|
-
# must lead to a .txt file with UTF-8 encoding,
|
236
|
-
# "gs://folder/content.txt", and the content in it
|
275
|
+
# the column content is a valid gcs file path, that is, prefixed by
|
276
|
+
# "gs://", it is treated as a GCS_FILE_PATH, otherwise it is treated
|
277
|
+
# as a TEXT_SNIPPET. In the GCS_FILE_PATH case, the path
|
278
|
+
# must lead to a .txt file with UTF-8 encoding, for example,
|
279
|
+
# "gs://folder/content.txt", and the content in it is extracted
|
237
280
|
# as a text snippet. In TEXT_SNIPPET case, the column content itself
|
238
|
-
#
|
281
|
+
# is treated as to be imported text snippet. In both cases, the
|
239
282
|
# text snippet must be up to 500 characters long.
|
240
|
-
#
|
241
|
-
# TRAIN,"@freewrytin
|
242
|
-
# TRAIN,"I need
|
243
|
-
# TEST,"Thank
|
283
|
+
# Sample rows:
|
284
|
+
# TRAIN,"@freewrytin this is way too good for your product",2
|
285
|
+
# TRAIN,"I need this product so bad",3
|
286
|
+
# TEST,"Thank you for this product.",4
|
244
287
|
# VALIDATE,gs://folder/content.txt,2
|
245
288
|
#
|
246
289
|
# * For Tables:
|
@@ -248,7 +291,7 @@ module Google
|
|
248
291
|
# {Google::Cloud::AutoML::V1beta1::InputConfig#gcs_source gcs_source} or
|
249
292
|
#
|
250
293
|
# {Google::Cloud::AutoML::V1beta1::InputConfig#bigquery_source bigquery_source}
|
251
|
-
# can be used. All inputs
|
294
|
+
# can be used. All inputs is concatenated into a single
|
252
295
|
#
|
253
296
|
# {Google::Cloud::AutoML::V1beta1::TablesDatasetMetadata#primary_table_name primary_table}
|
254
297
|
# For gcs_source:
|
@@ -271,7 +314,6 @@ module Google
|
|
271
314
|
# An imported table must have between 2 and 1,000 columns, inclusive,
|
272
315
|
# and between 1000 and 100,000,000 rows, inclusive. There are at most 5
|
273
316
|
# import data running in parallel.
|
274
|
-
#
|
275
317
|
# Definitions:
|
276
318
|
# ML_USE = "TRAIN" | "VALIDATE" | "TEST" | "UNASSIGNED"
|
277
319
|
# Describes how the given example (file) should be used for model
|
@@ -330,7 +372,7 @@ module Google
|
|
330
372
|
# If any of the provided CSV files can't be parsed or if more than certain
|
331
373
|
# percent of CSV rows cannot be processed then the operation fails and
|
332
374
|
# nothing is imported. Regardless of overall success or failure the per-row
|
333
|
-
# failures, up to a certain count cap,
|
375
|
+
# failures, up to a certain count cap, is listed in
|
334
376
|
# Operation.metadata.partial_failures.
|
335
377
|
# @!attribute [rw] gcs_source
|
336
378
|
# @return [Google::Cloud::AutoML::V1beta1::GcsSource]
|
@@ -363,6 +405,28 @@ module Google
|
|
363
405
|
# The formats are represented in EBNF with commas being literal and with
|
364
406
|
# non-terminal symbols defined near the end of this comment. The formats
|
365
407
|
# are:
|
408
|
+
#
|
409
|
+
# * For Image Classification:
|
410
|
+
# CSV file(s) with each line having just a single column:
|
411
|
+
# GCS_FILE_PATH
|
412
|
+
# which leads to image of up to 30MB in size. Supported
|
413
|
+
# extensions: .JPEG, .GIF, .PNG. This path is treated as the ID in
|
414
|
+
# the Batch predict output.
|
415
|
+
# Three sample rows:
|
416
|
+
# gs://folder/image1.jpeg
|
417
|
+
# gs://folder/image2.gif
|
418
|
+
# gs://folder/image3.png
|
419
|
+
#
|
420
|
+
# * For Image Object Detection:
|
421
|
+
# CSV file(s) with each line having just a single column:
|
422
|
+
# GCS_FILE_PATH
|
423
|
+
# which leads to image of up to 30MB in size. Supported
|
424
|
+
# extensions: .JPEG, .GIF, .PNG. This path is treated as the ID in
|
425
|
+
# the Batch predict output.
|
426
|
+
# Three sample rows:
|
427
|
+
# gs://folder/image1.jpeg
|
428
|
+
# gs://folder/image2.gif
|
429
|
+
# gs://folder/image3.png
|
366
430
|
# * For Video Classification:
|
367
431
|
# CSV file(s) with each line in format:
|
368
432
|
# GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END
|
@@ -386,6 +450,28 @@ module Google
|
|
386
450
|
# gs://folder/video1.mp4,10,240
|
387
451
|
# gs://folder/video1.mp4,300,360
|
388
452
|
# gs://folder/vid2.mov,0,inf
|
453
|
+
# * For Text Classification:
|
454
|
+
# CSV file(s) with each line having just a single column:
|
455
|
+
# GCS_FILE_PATH | TEXT_SNIPPET
|
456
|
+
# Any given text file can have size upto 128kB.
|
457
|
+
# Any given text snippet content must have 60,000 characters or less.
|
458
|
+
# Three sample rows:
|
459
|
+
# gs://folder/text1.txt
|
460
|
+
# "Some text content to predict"
|
461
|
+
# gs://folder/text3.pdf
|
462
|
+
# Supported file extensions: .txt, .pdf
|
463
|
+
#
|
464
|
+
# * For Text Sentiment:
|
465
|
+
# CSV file(s) with each line having just a single column:
|
466
|
+
# GCS_FILE_PATH | TEXT_SNIPPET
|
467
|
+
# Any given text file can have size upto 128kB.
|
468
|
+
# Any given text snippet content must have 500 characters or less.
|
469
|
+
# Three sample rows:
|
470
|
+
# gs://folder/text1.txt
|
471
|
+
# "Some text content to predict"
|
472
|
+
# gs://folder/text3.pdf
|
473
|
+
# Supported file extensions: .txt, .pdf
|
474
|
+
#
|
389
475
|
# * For Text Extraction
|
390
476
|
# .JSONL (i.e. JSON Lines) file(s) which either provide text in-line or
|
391
477
|
# as documents (for a single BatchPredict call only one of the these
|
@@ -458,101 +544,51 @@ module Google
|
|
458
544
|
# 100GB or smaller, where first file must have a header containing
|
459
545
|
# column names. If the first row of a subsequent file is the same as
|
460
546
|
# the header, then it is also treated as a header. All other rows
|
461
|
-
# contain values for the corresponding columns.
|
462
|
-
#
|
463
|
-
#
|
464
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#prediction_type prediction_type-s}:
|
465
|
-
# The column names must contain the model's
|
547
|
+
# contain values for the corresponding columns.
|
548
|
+
# The column names must contain the model's
|
466
549
|
#
|
467
550
|
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#input_feature_column_specs input_feature_column_specs'}
|
468
551
|
#
|
469
552
|
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name-s}
|
470
|
-
#
|
471
|
-
#
|
472
|
-
#
|
473
|
-
#
|
474
|
-
#
|
553
|
+
# (order doesn't matter). The columns corresponding to the model's
|
554
|
+
# input feature column specs must contain values compatible with the
|
555
|
+
# column spec's data types. Prediction on all the rows, i.e. the CSV
|
556
|
+
# lines, will be attempted. For FORECASTING
|
557
|
+
#
|
558
|
+
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#prediction_type prediction_type}:
|
559
|
+
# all columns having
|
560
|
+
#
|
561
|
+
# {Google::Cloud::AutoML::V1beta1::ColumnSpec::ForecastingMetadata::ColumnType TIME_SERIES_AVAILABLE_PAST_ONLY}
|
562
|
+
# type will be ignored.
|
563
|
+
# First three sample rows of a CSV file:
|
475
564
|
# "First Name","Last Name","Dob","Addresses"
|
476
565
|
#
|
477
566
|
# "John","Doe","1968-01-22","[{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]"
|
478
567
|
#
|
479
568
|
# "Jane","Doe","1980-10-16","[{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]}
|
480
|
-
# For FORECASTING
|
481
|
-
#
|
482
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#prediction_type prediction_type}:
|
483
|
-
# The column names must contain the union of the model's
|
484
|
-
#
|
485
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#input_feature_column_specs input_feature_column_specs'}
|
486
|
-
#
|
487
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name-s}
|
488
|
-
# and
|
489
|
-
#
|
490
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#target_column_spec target_column_specs'}
|
491
|
-
#
|
492
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name}
|
493
|
-
# (order doesn't matter), with values compatible with these column
|
494
|
-
# specs data types, except as specified below.
|
495
|
-
# The input rows must contain not only the to-be-predicted rows
|
496
|
-
# but also the historical data rows, even if they would be
|
497
|
-
# identical as the ones on which the model has been trained.
|
498
|
-
# The historical rows must have non-NULL target column
|
499
|
-
# values. The to-be-predicted rows must have NULL values in the
|
500
|
-
# target column and all columns having
|
501
|
-
#
|
502
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec::ForecastingMetadata::ColumnType::KEY TIME_SERIES_AVAILABLE_PAST_ONLY}
|
503
|
-
# type, regardless if these columns are
|
504
|
-
# {Google::Cloud::AutoML::V1beta1::DataType#nullable nullable}.
|
505
|
-
# Prediction only on the to-be-predicted rows will be attempted.
|
506
|
-
# First four sample rows of a CSV file:
|
507
|
-
#
|
508
|
-
# "Year","City","OlympicsThatYear","Population","WaterUsedGigaGallons"
|
509
|
-
# "2000","NYC","true","8008278","452.7"
|
510
|
-
# "2001","NYC","false","8024963","432.2"
|
511
|
-
# "2002","NYC","true","",""
|
512
569
|
# BigQuery case:
|
513
570
|
# An URI of a BigQuery table. The user data size of the BigQuery
|
514
571
|
# table must be 100GB or smaller.
|
515
|
-
#
|
516
|
-
#
|
517
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#prediction_type prediction_type-s}:
|
518
|
-
# The column names must contain the model's
|
572
|
+
# The column names must contain the model's
|
519
573
|
#
|
520
574
|
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#input_feature_column_specs input_feature_column_specs'}
|
521
575
|
#
|
522
576
|
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name-s}
|
523
|
-
#
|
524
|
-
#
|
525
|
-
#
|
526
|
-
#
|
527
|
-
# For FORECASTING
|
577
|
+
# (order doesn't matter). The columns corresponding to the model's
|
578
|
+
# input feature column specs must contain values compatible with the
|
579
|
+
# column spec's data types. Prediction on all the rows of the table
|
580
|
+
# will be attempted. For FORECASTING
|
528
581
|
#
|
529
582
|
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#prediction_type prediction_type}:
|
530
|
-
#
|
531
|
-
#
|
532
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#input_feature_column_specs input_feature_column_specs'}
|
533
|
-
#
|
534
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name-s}
|
535
|
-
# and
|
583
|
+
# all columns having
|
536
584
|
#
|
537
|
-
# {Google::Cloud::AutoML::V1beta1::
|
538
|
-
#
|
539
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name}
|
540
|
-
# (order doesn't matter), with values compatible with these column
|
541
|
-
# specs data types, except as specified below.
|
542
|
-
# The table's rows must contain not only the to-be-predicted rows
|
543
|
-
# but also the historical data rows, even if they would be
|
544
|
-
# identical as the ones on which the model has been trained.
|
545
|
-
# The historical rows must have non-NULL target column values.
|
546
|
-
# The to-be-predicted rows must have NULL values in the
|
547
|
-
# target column and all columns having
|
548
|
-
#
|
549
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec::ForecastingMetadata::ColumnType::KEY TIME_SERIES_AVAILABLE_PAST_ONLY}
|
550
|
-
# type, regardless if these columns are
|
551
|
-
# {Google::Cloud::AutoML::V1beta1::DataType#nullable nullable}.
|
552
|
-
# Prediction only on the to-be-predicted rows will be attempted.
|
585
|
+
# {Google::Cloud::AutoML::V1beta1::ColumnSpec::ForecastingMetadata::ColumnType TIME_SERIES_AVAILABLE_PAST_ONLY}
|
586
|
+
# type will be ignored.
|
553
587
|
#
|
554
588
|
# Definitions:
|
555
589
|
# GCS_FILE_PATH = A path to file on GCS, e.g. "gs://folder/video.avi".
|
590
|
+
# TEXT_SNIPPET = A content of a text snippet, UTF-8 encoded, enclosed within
|
591
|
+
# double quotes ("")
|
556
592
|
# TIME_SEGMENT_START = TIME_OFFSET
|
557
593
|
# Expresses a beginning, inclusive, of a time segment
|
558
594
|
# within an
|
@@ -638,11 +674,55 @@ module Google
|
|
638
674
|
#
|
639
675
|
# {Google::Cloud::AutoML::V1beta1::BatchPredictOutputConfig#gcs_destination gcs_destination}
|
640
676
|
# must be set unless specified otherwise for a domain. If gcs_destination is
|
641
|
-
# set then in the given directory a new directory
|
677
|
+
# set then in the given directory a new directory is created. Its name
|
642
678
|
# will be
|
643
679
|
# "prediction-<model-display-name>-<timestamp-of-prediction-call>",
|
644
680
|
# where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. The contents
|
645
681
|
# of it depends on the ML problem the predictions are made for.
|
682
|
+
#
|
683
|
+
# * For Image Classification:
|
684
|
+
# In the created directory files `image_classification_1.jsonl`,
|
685
|
+
# `image_classification_2.jsonl`,...,`image_classification_N.jsonl`
|
686
|
+
# will be created, where N may be 1, and depends on the
|
687
|
+
# total number of the successfully predicted images and annotations.
|
688
|
+
# A single image will be listed only once with all its annotations,
|
689
|
+
# and its annotations will never be split across files.
|
690
|
+
# Each .JSONL file will contain, per line, a JSON representation of a
|
691
|
+
# proto that wraps image's "ID" : "<id_value>" followed by a list of
|
692
|
+
# zero or more AnnotationPayload protos (called annotations), which
|
693
|
+
# have classification detail populated.
|
694
|
+
# If prediction for any image failed (partially or completely), then an
|
695
|
+
# additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
|
696
|
+
# files will be created (N depends on total number of failed
|
697
|
+
# predictions). These files will have a JSON representation of a proto
|
698
|
+
# that wraps the same "ID" : "<id_value>" but here followed by
|
699
|
+
# exactly one
|
700
|
+
#
|
701
|
+
# [`google.rpc.Status`](https:
|
702
|
+
# //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
|
703
|
+
# containing only `code` and `message`fields.
|
704
|
+
#
|
705
|
+
# * For Image Object Detection:
|
706
|
+
# In the created directory files `image_object_detection_1.jsonl`,
|
707
|
+
# `image_object_detection_2.jsonl`,...,`image_object_detection_N.jsonl`
|
708
|
+
# will be created, where N may be 1, and depends on the
|
709
|
+
# total number of the successfully predicted images and annotations.
|
710
|
+
# Each .JSONL file will contain, per line, a JSON representation of a
|
711
|
+
# proto that wraps image's "ID" : "<id_value>" followed by a list of
|
712
|
+
# zero or more AnnotationPayload protos (called annotations), which
|
713
|
+
# have image_object_detection detail populated. A single image will
|
714
|
+
# be listed only once with all its annotations, and its annotations
|
715
|
+
# will never be split across files.
|
716
|
+
# If prediction for any image failed (partially or completely), then
|
717
|
+
# additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
|
718
|
+
# files will be created (N depends on total number of failed
|
719
|
+
# predictions). These files will have a JSON representation of a proto
|
720
|
+
# that wraps the same "ID" : "<id_value>" but here followed by
|
721
|
+
# exactly one
|
722
|
+
#
|
723
|
+
# [`google.rpc.Status`](https:
|
724
|
+
# //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
|
725
|
+
# containing only `code` and `message`fields.
|
646
726
|
# * For Video Classification:
|
647
727
|
# In the created directory a video_classification.csv file, and a .JSON
|
648
728
|
# file per each video classification requested in the input (i.e. each
|
@@ -695,6 +775,54 @@ module Google
|
|
695
775
|
# for each frame of the video time segment the file is assigned to in
|
696
776
|
# video_object_tracking.csv. All AnnotationPayload protos will have
|
697
777
|
# video_object_tracking field set.
|
778
|
+
# * For Text Classification:
|
779
|
+
# In the created directory files `text_classification_1.jsonl`,
|
780
|
+
# `text_classification_2.jsonl`,...,`text_classification_N.jsonl`
|
781
|
+
# will be created, where N may be 1, and depends on the
|
782
|
+
# total number of inputs and annotations found.
|
783
|
+
#
|
784
|
+
# Each .JSONL file will contain, per line, a JSON representation of a
|
785
|
+
# proto that wraps input text snippet or input text file and a list of
|
786
|
+
# zero or more AnnotationPayload protos (called annotations), which
|
787
|
+
# have classification detail populated. A single text snippet or file
|
788
|
+
# will be listed only once with all its annotations, and its
|
789
|
+
# annotations will never be split across files.
|
790
|
+
#
|
791
|
+
# If prediction for any text snippet or file failed (partially or
|
792
|
+
# completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
|
793
|
+
# `errors_N.jsonl` files will be created (N depends on total number of
|
794
|
+
# failed predictions). These files will have a JSON representation of a
|
795
|
+
# proto that wraps input text snippet or input text file followed by
|
796
|
+
# exactly one
|
797
|
+
#
|
798
|
+
# [`google.rpc.Status`](https:
|
799
|
+
# //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
|
800
|
+
# containing only `code` and `message`.
|
801
|
+
#
|
802
|
+
# * For Text Sentiment:
|
803
|
+
# In the created directory files `text_sentiment_1.jsonl`,
|
804
|
+
# `text_sentiment_2.jsonl`,...,`text_sentiment_N.jsonl`
|
805
|
+
# will be created, where N may be 1, and depends on the
|
806
|
+
# total number of inputs and annotations found.
|
807
|
+
#
|
808
|
+
# Each .JSONL file will contain, per line, a JSON representation of a
|
809
|
+
# proto that wraps input text snippet or input text file and a list of
|
810
|
+
# zero or more AnnotationPayload protos (called annotations), which
|
811
|
+
# have text_sentiment detail populated. A single text snippet or file
|
812
|
+
# will be listed only once with all its annotations, and its
|
813
|
+
# annotations will never be split across files.
|
814
|
+
#
|
815
|
+
# If prediction for any text snippet or file failed (partially or
|
816
|
+
# completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
|
817
|
+
# `errors_N.jsonl` files will be created (N depends on total number of
|
818
|
+
# failed predictions). These files will have a JSON representation of a
|
819
|
+
# proto that wraps input text snippet or input text file followed by
|
820
|
+
# exactly one
|
821
|
+
#
|
822
|
+
# [`google.rpc.Status`](https:
|
823
|
+
# //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
|
824
|
+
# containing only `code` and `message`.
|
825
|
+
#
|
698
826
|
# * For Text Extraction:
|
699
827
|
# In the created directory files `text_extraction_1.jsonl`,
|
700
828
|
# `text_extraction_2.jsonl`,...,`text_extraction_N.jsonl`
|
@@ -704,7 +832,8 @@ module Google
|
|
704
832
|
# used inline text, or documents.
|
705
833
|
# If input was inline, then each .JSONL file will contain, per line,
|
706
834
|
# a JSON representation of a proto that wraps given in request text
|
707
|
-
# snippet's "id"
|
835
|
+
# snippet's "id" (if specified), followed by input text snippet,
|
836
|
+
# and a list of zero or more
|
708
837
|
# AnnotationPayload protos (called annotations), which have
|
709
838
|
# text_extraction detail populated. A single text snippet will be
|
710
839
|
# listed only once with all its annotations, and its annotations will
|
@@ -872,7 +1001,10 @@ module Google
|
|
872
1001
|
# * docker - Used for Docker containers. Use the params field to customize
|
873
1002
|
# the container. The container is verified to work correctly on
|
874
1003
|
# ubuntu 16.04 operating system. See more at
|
875
|
-
# [containers
|
1004
|
+
# [containers
|
1005
|
+
#
|
1006
|
+
# quickstart](https:
|
1007
|
+
# //cloud.google.com/vision/automl/docs/containers-gcs-quickstart)
|
876
1008
|
# * core_ml - Used for iOS mobile devices.
|
877
1009
|
# @!attribute [rw] params
|
878
1010
|
# @return [Hash{String => String}]
|