google-cloud-automl 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/lib/google/cloud/automl.rb +2 -2
- data/lib/google/cloud/automl/v1beta1.rb +2 -2
- data/lib/google/cloud/automl/v1beta1/automl_client.rb +4 -20
- data/lib/google/cloud/automl/v1beta1/classification_pb.rb +1 -0
- data/lib/google/cloud/automl/v1beta1/data_items_pb.rb +40 -1
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/classification.rb +9 -0
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/column_spec.rb +1 -6
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/data_items.rb +125 -9
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/data_types.rb +0 -8
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/detection.rb +4 -8
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/image.rb +10 -3
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/io.rb +257 -125
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/model.rb +4 -8
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/model_evaluation.rb +5 -9
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/prediction_service.rb +56 -6
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/service.rb +3 -18
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/table_spec.rb +4 -0
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/tables.rb +26 -30
- data/lib/google/cloud/automl/v1beta1/doc/google/cloud/automl/v1beta1/text.rb +2 -4
- data/lib/google/cloud/automl/v1beta1/prediction_service_client.rb +40 -7
- data/lib/google/cloud/automl/v1beta1/prediction_service_pb.rb +3 -1
- data/lib/google/cloud/automl/v1beta1/prediction_service_services_pb.rb +10 -5
- data/lib/google/cloud/automl/v1beta1/service_services_pb.rb +1 -2
- data/lib/google/cloud/automl/v1beta1/table_spec_pb.rb +1 -0
- data/lib/google/cloud/automl/v1beta1/tables_pb.rb +0 -4
- data/lib/google/cloud/automl/version.rb +1 -1
- metadata +3 -3
@@ -115,9 +115,16 @@ module Google
|
|
115
115
|
# `train_cost` will be equal or less than this value. If further model
|
116
116
|
# training ceases to provide any improvements, it will stop without using
|
117
117
|
# full budget and the stop_reason will be `MODEL_CONVERGED`.
|
118
|
-
# Note, node_hour = actual_hour * number_of_nodes_invovled.
|
119
|
-
#
|
120
|
-
#
|
118
|
+
# Note, node_hour = actual_hour * number_of_nodes_invovled.
|
119
|
+
# For model type `cloud-high-accuracy-1`(default) and `cloud-low-latency-1`,
|
120
|
+
# the train budget must be between 20,000 and 2,000,000 milli node hours,
|
121
|
+
# inclusive. The default value is 216, 000 which represents one day in
|
122
|
+
# wall time.
|
123
|
+
# For model type `mobile-low-latency-1`, `mobile-versatile-1`,
|
124
|
+
# `mobile-high-accuracy-1`, `mobile-core-ml-low-latency-1`,
|
125
|
+
# `mobile-core-ml-versatile-1`, `mobile-core-ml-high-accuracy-1`, the train
|
126
|
+
# budget must be between 1,000 and 100,000 milli node hours, inclusive.
|
127
|
+
# The default value is 24, 000 which represents one day in wall time.
|
121
128
|
# @!attribute [rw] train_cost_milli_node_hours
|
122
129
|
# @return [Integer]
|
123
130
|
# Output only. The actual train cost of creating this model, expressed in
|
@@ -24,10 +24,10 @@ module Google
|
|
24
24
|
# {Google::Cloud::AutoML::V1beta1::InputConfig#gcs_source gcs_source}
|
25
25
|
# is expected, unless specified otherwise. Additionally any input .CSV file
|
26
26
|
# by itself must be 100MB or smaller, unless specified otherwise.
|
27
|
-
# If an "example" file (
|
27
|
+
# If an "example" file (that is, image, video etc.) with identical content
|
28
28
|
# (even if it had different GCS_FILE_PATH) is mentioned multiple times, then
|
29
29
|
# its label, bounding boxes etc. are appended. The same file should be always
|
30
|
-
# provided with the same ML_USE and GCS_FILE_PATH, if it is not then
|
30
|
+
# provided with the same ML_USE and GCS_FILE_PATH, if it is not, then
|
31
31
|
# these values are nondeterministically selected from the given ones.
|
32
32
|
#
|
33
33
|
# The formats are represented in EBNF with commas being literal and with
|
@@ -57,12 +57,16 @@ module Google
|
|
57
57
|
# BOUNDING_BOX-es per image are allowed (one BOUNDING_BOX is defined
|
58
58
|
# per line). If an image has not yet been labeled, then it should be
|
59
59
|
# mentioned just once with no LABEL and the ",,,,,,," in place of the
|
60
|
+
# BOUNDING_BOX. For images which are known to not contain any
|
61
|
+
# bounding boxes, they should be labelled explictly as
|
62
|
+
# "NEGATIVE_IMAGE", followed by ",,,,,,," in place of the
|
60
63
|
# BOUNDING_BOX.
|
61
|
-
#
|
64
|
+
# Sample rows:
|
62
65
|
# TRAIN,gs://folder/image1.png,car,0.1,0.1,,,0.3,0.3,,
|
63
66
|
# TRAIN,gs://folder/image1.png,bike,.7,.6,,,.8,.9,,
|
64
67
|
# UNASSIGNED,gs://folder/im2.png,car,0.1,0.1,0.2,0.1,0.2,0.3,0.1,0.3
|
65
68
|
# TEST,gs://folder/im3.png,,,,,,,,,
|
69
|
+
# TRAIN,gs://folder/im4.png,NEGATIVE_IMAGE,,,,,,,,,
|
66
70
|
#
|
67
71
|
# * For Video Classification:
|
68
72
|
# CSV file(s) with each line in format:
|
@@ -127,21 +131,26 @@ module Google
|
|
127
131
|
# * For Text Extraction:
|
128
132
|
# CSV file(s) with each line in format:
|
129
133
|
# ML_USE,GCS_FILE_PATH
|
130
|
-
# GCS_FILE_PATH leads to a .JSONL (
|
131
|
-
# imports text in-line or as documents.
|
134
|
+
# GCS_FILE_PATH leads to a .JSONL (that is, JSON Lines) file which
|
135
|
+
# either imports text in-line or as documents. Any given
|
136
|
+
# .JSONL file must be 100MB or smaller.
|
132
137
|
# The in-line .JSONL file contains, per line, a proto that wraps a
|
133
|
-
# TextSnippet proto (in json representation) followed by one or
|
134
|
-
#
|
135
|
-
# display_name and text_extraction detail populated.
|
136
|
-
#
|
137
|
-
#
|
138
|
-
#
|
139
|
-
#
|
140
|
-
#
|
141
|
-
#
|
142
|
-
#
|
143
|
-
#
|
144
|
-
#
|
138
|
+
# TextSnippet proto (in json representation) followed by one or more
|
139
|
+
# AnnotationPayload protos (called annotations), which have
|
140
|
+
# display_name and text_extraction detail populated. The given text
|
141
|
+
# is expected to be annotated exhaustively, for example, if you look
|
142
|
+
# for animals and text contains "dolphin" that is not labeled, then
|
143
|
+
# "dolphin" is assumed to not be an animal. Any given text snippet
|
144
|
+
# content must be 10KB or smaller, and also be UTF-8 NFC encoded
|
145
|
+
# (ASCII already is).
|
146
|
+
# The document .JSONL file contains, per line, a proto that wraps a
|
147
|
+
# Document proto. The Document proto must have either document_text
|
148
|
+
# or input_config set. In document_text case, the Document proto may
|
149
|
+
# also contain the spatial information of the document, including
|
150
|
+
# layout, document dimension and page number. In input_config case,
|
151
|
+
# only PDF documents are supported now, and each document may be up
|
152
|
+
# to 2MB large. Currently, annotations on documents cannot be
|
153
|
+
# specified at import.
|
145
154
|
# Three sample CSV rows:
|
146
155
|
# TRAIN,gs://folder/file1.jsonl
|
147
156
|
# VALIDATE,gs://folder/file2.jsonl
|
@@ -150,27 +159,61 @@ module Google
|
|
150
159
|
# with artificial line breaks, but the only actual line break is
|
151
160
|
# denoted by \n).:
|
152
161
|
# {
|
153
|
-
# "
|
154
|
-
# "content": "dog
|
155
|
-
#
|
162
|
+
# "document": {
|
163
|
+
# "document_text": {"content": "dog cat"}
|
164
|
+
# "layout": [
|
156
165
|
# {
|
157
|
-
# "
|
158
|
-
# "
|
159
|
-
# "
|
160
|
-
# }
|
166
|
+
# "text_segment": {
|
167
|
+
# "start_offset": 0,
|
168
|
+
# "end_offset": 3,
|
169
|
+
# },
|
170
|
+
# "page_number": 1,
|
171
|
+
# "bounding_poly": {
|
172
|
+
# "normalized_vertices": [
|
173
|
+
# {"x": 0.1, "y": 0.1},
|
174
|
+
# {"x": 0.1, "y": 0.3},
|
175
|
+
# {"x": 0.3, "y": 0.3},
|
176
|
+
# {"x": 0.3, "y": 0.1},
|
177
|
+
# ],
|
178
|
+
# },
|
179
|
+
# "text_segment_type": TOKEN,
|
161
180
|
# },
|
162
181
|
# {
|
163
|
-
# "
|
164
|
-
# "
|
165
|
-
# "
|
182
|
+
# "text_segment": {
|
183
|
+
# "start_offset": 4,
|
184
|
+
# "end_offset": 7,
|
185
|
+
# },
|
186
|
+
# "page_number": 1,
|
187
|
+
# "bounding_poly": {
|
188
|
+
# "normalized_vertices": [
|
189
|
+
# {"x": 0.4, "y": 0.1},
|
190
|
+
# {"x": 0.4, "y": 0.3},
|
191
|
+
# {"x": 0.8, "y": 0.3},
|
192
|
+
# {"x": 0.8, "y": 0.1},
|
193
|
+
# ],
|
194
|
+
# },
|
195
|
+
# "text_segment_type": TOKEN,
|
196
|
+
# }
|
197
|
+
#
|
198
|
+
# ],
|
199
|
+
# "document_dimensions": {
|
200
|
+
# "width": 8.27,
|
201
|
+
# "height": 11.69,
|
202
|
+
# "unit": INCH,
|
166
203
|
# }
|
204
|
+
# "page_count": 1,
|
167
205
|
# },
|
206
|
+
# "annotations": [
|
168
207
|
# {
|
169
208
|
# "display_name": "animal",
|
170
|
-
# "text_extraction": {
|
171
|
-
# "
|
172
|
-
# }
|
209
|
+
# "text_extraction": {"text_segment": {"start_offset": 0,
|
210
|
+
# "end_offset": 3}}
|
173
211
|
# },
|
212
|
+
# {
|
213
|
+
# "display_name": "animal",
|
214
|
+
# "text_extraction": {"text_segment": {"start_offset": 4,
|
215
|
+
# "end_offset": 7}}
|
216
|
+
# }
|
174
217
|
# ],
|
175
218
|
# }\n
|
176
219
|
# {
|
@@ -211,15 +254,15 @@ module Google
|
|
211
254
|
# TEXT_SNIPPET and GCS_FILE_PATH are distinguished by a pattern. If
|
212
255
|
# the column content is a valid gcs file path, i.e. prefixed by
|
213
256
|
# "gs://", it will be treated as a GCS_FILE_PATH, else if the content
|
214
|
-
# is enclosed within double quotes (""), it
|
215
|
-
#
|
216
|
-
# must lead to a .txt file with UTF-8 encoding,
|
217
|
-
# "gs://folder/content.txt", and the content in it
|
257
|
+
# is enclosed within double quotes (""), it is
|
258
|
+
# treated as a TEXT_SNIPPET. In the GCS_FILE_PATH case, the path
|
259
|
+
# must lead to a .txt file with UTF-8 encoding, for example,
|
260
|
+
# "gs://folder/content.txt", and the content in it is extracted
|
218
261
|
# as a text snippet. In TEXT_SNIPPET case, the column content
|
219
|
-
# excluding quotes
|
262
|
+
# excluding quotes is treated as to be imported text snippet. In
|
220
263
|
# both cases, the text snippet/file size must be within 128kB.
|
221
264
|
# Maximum 100 unique labels are allowed per CSV row.
|
222
|
-
#
|
265
|
+
# Sample rows:
|
223
266
|
# TRAIN,"They have bad food and very rude",RudeService,BadFood
|
224
267
|
# TRAIN,gs://folder/content.txt,SlowService
|
225
268
|
# TEST,"Typically always bad service there.",RudeService
|
@@ -229,18 +272,18 @@ module Google
|
|
229
272
|
# CSV file(s) with each line in format:
|
230
273
|
# ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),SENTIMENT
|
231
274
|
# TEXT_SNIPPET and GCS_FILE_PATH are distinguished by a pattern. If
|
232
|
-
# the column content is a valid gcs file path,
|
233
|
-
# "gs://", it
|
234
|
-
#
|
235
|
-
# must lead to a .txt file with UTF-8 encoding,
|
236
|
-
# "gs://folder/content.txt", and the content in it
|
275
|
+
# the column content is a valid gcs file path, that is, prefixed by
|
276
|
+
# "gs://", it is treated as a GCS_FILE_PATH, otherwise it is treated
|
277
|
+
# as a TEXT_SNIPPET. In the GCS_FILE_PATH case, the path
|
278
|
+
# must lead to a .txt file with UTF-8 encoding, for example,
|
279
|
+
# "gs://folder/content.txt", and the content in it is extracted
|
237
280
|
# as a text snippet. In TEXT_SNIPPET case, the column content itself
|
238
|
-
#
|
281
|
+
# is treated as to be imported text snippet. In both cases, the
|
239
282
|
# text snippet must be up to 500 characters long.
|
240
|
-
#
|
241
|
-
# TRAIN,"@freewrytin
|
242
|
-
# TRAIN,"I need
|
243
|
-
# TEST,"Thank
|
283
|
+
# Sample rows:
|
284
|
+
# TRAIN,"@freewrytin this is way too good for your product",2
|
285
|
+
# TRAIN,"I need this product so bad",3
|
286
|
+
# TEST,"Thank you for this product.",4
|
244
287
|
# VALIDATE,gs://folder/content.txt,2
|
245
288
|
#
|
246
289
|
# * For Tables:
|
@@ -248,7 +291,7 @@ module Google
|
|
248
291
|
# {Google::Cloud::AutoML::V1beta1::InputConfig#gcs_source gcs_source} or
|
249
292
|
#
|
250
293
|
# {Google::Cloud::AutoML::V1beta1::InputConfig#bigquery_source bigquery_source}
|
251
|
-
# can be used. All inputs
|
294
|
+
# can be used. All inputs is concatenated into a single
|
252
295
|
#
|
253
296
|
# {Google::Cloud::AutoML::V1beta1::TablesDatasetMetadata#primary_table_name primary_table}
|
254
297
|
# For gcs_source:
|
@@ -271,7 +314,6 @@ module Google
|
|
271
314
|
# An imported table must have between 2 and 1,000 columns, inclusive,
|
272
315
|
# and between 1000 and 100,000,000 rows, inclusive. There are at most 5
|
273
316
|
# import data running in parallel.
|
274
|
-
#
|
275
317
|
# Definitions:
|
276
318
|
# ML_USE = "TRAIN" | "VALIDATE" | "TEST" | "UNASSIGNED"
|
277
319
|
# Describes how the given example (file) should be used for model
|
@@ -330,7 +372,7 @@ module Google
|
|
330
372
|
# If any of the provided CSV files can't be parsed or if more than certain
|
331
373
|
# percent of CSV rows cannot be processed then the operation fails and
|
332
374
|
# nothing is imported. Regardless of overall success or failure the per-row
|
333
|
-
# failures, up to a certain count cap,
|
375
|
+
# failures, up to a certain count cap, is listed in
|
334
376
|
# Operation.metadata.partial_failures.
|
335
377
|
# @!attribute [rw] gcs_source
|
336
378
|
# @return [Google::Cloud::AutoML::V1beta1::GcsSource]
|
@@ -363,6 +405,28 @@ module Google
|
|
363
405
|
# The formats are represented in EBNF with commas being literal and with
|
364
406
|
# non-terminal symbols defined near the end of this comment. The formats
|
365
407
|
# are:
|
408
|
+
#
|
409
|
+
# * For Image Classification:
|
410
|
+
# CSV file(s) with each line having just a single column:
|
411
|
+
# GCS_FILE_PATH
|
412
|
+
# which leads to image of up to 30MB in size. Supported
|
413
|
+
# extensions: .JPEG, .GIF, .PNG. This path is treated as the ID in
|
414
|
+
# the Batch predict output.
|
415
|
+
# Three sample rows:
|
416
|
+
# gs://folder/image1.jpeg
|
417
|
+
# gs://folder/image2.gif
|
418
|
+
# gs://folder/image3.png
|
419
|
+
#
|
420
|
+
# * For Image Object Detection:
|
421
|
+
# CSV file(s) with each line having just a single column:
|
422
|
+
# GCS_FILE_PATH
|
423
|
+
# which leads to image of up to 30MB in size. Supported
|
424
|
+
# extensions: .JPEG, .GIF, .PNG. This path is treated as the ID in
|
425
|
+
# the Batch predict output.
|
426
|
+
# Three sample rows:
|
427
|
+
# gs://folder/image1.jpeg
|
428
|
+
# gs://folder/image2.gif
|
429
|
+
# gs://folder/image3.png
|
366
430
|
# * For Video Classification:
|
367
431
|
# CSV file(s) with each line in format:
|
368
432
|
# GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END
|
@@ -386,6 +450,28 @@ module Google
|
|
386
450
|
# gs://folder/video1.mp4,10,240
|
387
451
|
# gs://folder/video1.mp4,300,360
|
388
452
|
# gs://folder/vid2.mov,0,inf
|
453
|
+
# * For Text Classification:
|
454
|
+
# CSV file(s) with each line having just a single column:
|
455
|
+
# GCS_FILE_PATH | TEXT_SNIPPET
|
456
|
+
# Any given text file can have size upto 128kB.
|
457
|
+
# Any given text snippet content must have 60,000 characters or less.
|
458
|
+
# Three sample rows:
|
459
|
+
# gs://folder/text1.txt
|
460
|
+
# "Some text content to predict"
|
461
|
+
# gs://folder/text3.pdf
|
462
|
+
# Supported file extensions: .txt, .pdf
|
463
|
+
#
|
464
|
+
# * For Text Sentiment:
|
465
|
+
# CSV file(s) with each line having just a single column:
|
466
|
+
# GCS_FILE_PATH | TEXT_SNIPPET
|
467
|
+
# Any given text file can have size upto 128kB.
|
468
|
+
# Any given text snippet content must have 500 characters or less.
|
469
|
+
# Three sample rows:
|
470
|
+
# gs://folder/text1.txt
|
471
|
+
# "Some text content to predict"
|
472
|
+
# gs://folder/text3.pdf
|
473
|
+
# Supported file extensions: .txt, .pdf
|
474
|
+
#
|
389
475
|
# * For Text Extraction
|
390
476
|
# .JSONL (i.e. JSON Lines) file(s) which either provide text in-line or
|
391
477
|
# as documents (for a single BatchPredict call only one of the these
|
@@ -458,101 +544,51 @@ module Google
|
|
458
544
|
# 100GB or smaller, where first file must have a header containing
|
459
545
|
# column names. If the first row of a subsequent file is the same as
|
460
546
|
# the header, then it is also treated as a header. All other rows
|
461
|
-
# contain values for the corresponding columns.
|
462
|
-
#
|
463
|
-
#
|
464
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#prediction_type prediction_type-s}:
|
465
|
-
# The column names must contain the model's
|
547
|
+
# contain values for the corresponding columns.
|
548
|
+
# The column names must contain the model's
|
466
549
|
#
|
467
550
|
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#input_feature_column_specs input_feature_column_specs'}
|
468
551
|
#
|
469
552
|
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name-s}
|
470
|
-
#
|
471
|
-
#
|
472
|
-
#
|
473
|
-
#
|
474
|
-
#
|
553
|
+
# (order doesn't matter). The columns corresponding to the model's
|
554
|
+
# input feature column specs must contain values compatible with the
|
555
|
+
# column spec's data types. Prediction on all the rows, i.e. the CSV
|
556
|
+
# lines, will be attempted. For FORECASTING
|
557
|
+
#
|
558
|
+
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#prediction_type prediction_type}:
|
559
|
+
# all columns having
|
560
|
+
#
|
561
|
+
# {Google::Cloud::AutoML::V1beta1::ColumnSpec::ForecastingMetadata::ColumnType TIME_SERIES_AVAILABLE_PAST_ONLY}
|
562
|
+
# type will be ignored.
|
563
|
+
# First three sample rows of a CSV file:
|
475
564
|
# "First Name","Last Name","Dob","Addresses"
|
476
565
|
#
|
477
566
|
# "John","Doe","1968-01-22","[{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]"
|
478
567
|
#
|
479
568
|
# "Jane","Doe","1980-10-16","[{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]}
|
480
|
-
# For FORECASTING
|
481
|
-
#
|
482
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#prediction_type prediction_type}:
|
483
|
-
# The column names must contain the union of the model's
|
484
|
-
#
|
485
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#input_feature_column_specs input_feature_column_specs'}
|
486
|
-
#
|
487
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name-s}
|
488
|
-
# and
|
489
|
-
#
|
490
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#target_column_spec target_column_specs'}
|
491
|
-
#
|
492
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name}
|
493
|
-
# (order doesn't matter), with values compatible with these column
|
494
|
-
# specs data types, except as specified below.
|
495
|
-
# The input rows must contain not only the to-be-predicted rows
|
496
|
-
# but also the historical data rows, even if they would be
|
497
|
-
# identical as the ones on which the model has been trained.
|
498
|
-
# The historical rows must have non-NULL target column
|
499
|
-
# values. The to-be-predicted rows must have NULL values in the
|
500
|
-
# target column and all columns having
|
501
|
-
#
|
502
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec::ForecastingMetadata::ColumnType::KEY TIME_SERIES_AVAILABLE_PAST_ONLY}
|
503
|
-
# type, regardless if these columns are
|
504
|
-
# {Google::Cloud::AutoML::V1beta1::DataType#nullable nullable}.
|
505
|
-
# Prediction only on the to-be-predicted rows will be attempted.
|
506
|
-
# First four sample rows of a CSV file:
|
507
|
-
#
|
508
|
-
# "Year","City","OlympicsThatYear","Population","WaterUsedGigaGallons"
|
509
|
-
# "2000","NYC","true","8008278","452.7"
|
510
|
-
# "2001","NYC","false","8024963","432.2"
|
511
|
-
# "2002","NYC","true","",""
|
512
569
|
# BigQuery case:
|
513
570
|
# An URI of a BigQuery table. The user data size of the BigQuery
|
514
571
|
# table must be 100GB or smaller.
|
515
|
-
#
|
516
|
-
#
|
517
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#prediction_type prediction_type-s}:
|
518
|
-
# The column names must contain the model's
|
572
|
+
# The column names must contain the model's
|
519
573
|
#
|
520
574
|
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#input_feature_column_specs input_feature_column_specs'}
|
521
575
|
#
|
522
576
|
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name-s}
|
523
|
-
#
|
524
|
-
#
|
525
|
-
#
|
526
|
-
#
|
527
|
-
# For FORECASTING
|
577
|
+
# (order doesn't matter). The columns corresponding to the model's
|
578
|
+
# input feature column specs must contain values compatible with the
|
579
|
+
# column spec's data types. Prediction on all the rows of the table
|
580
|
+
# will be attempted. For FORECASTING
|
528
581
|
#
|
529
582
|
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#prediction_type prediction_type}:
|
530
|
-
#
|
531
|
-
#
|
532
|
-
# {Google::Cloud::AutoML::V1beta1::TablesModelMetadata#input_feature_column_specs input_feature_column_specs'}
|
533
|
-
#
|
534
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name-s}
|
535
|
-
# and
|
583
|
+
# all columns having
|
536
584
|
#
|
537
|
-
# {Google::Cloud::AutoML::V1beta1::
|
538
|
-
#
|
539
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec#display_name display_name}
|
540
|
-
# (order doesn't matter), with values compatible with these column
|
541
|
-
# specs data types, except as specified below.
|
542
|
-
# The table's rows must contain not only the to-be-predicted rows
|
543
|
-
# but also the historical data rows, even if they would be
|
544
|
-
# identical as the ones on which the model has been trained.
|
545
|
-
# The historical rows must have non-NULL target column values.
|
546
|
-
# The to-be-predicted rows must have NULL values in the
|
547
|
-
# target column and all columns having
|
548
|
-
#
|
549
|
-
# {Google::Cloud::AutoML::V1beta1::ColumnSpec::ForecastingMetadata::ColumnType::KEY TIME_SERIES_AVAILABLE_PAST_ONLY}
|
550
|
-
# type, regardless if these columns are
|
551
|
-
# {Google::Cloud::AutoML::V1beta1::DataType#nullable nullable}.
|
552
|
-
# Prediction only on the to-be-predicted rows will be attempted.
|
585
|
+
# {Google::Cloud::AutoML::V1beta1::ColumnSpec::ForecastingMetadata::ColumnType TIME_SERIES_AVAILABLE_PAST_ONLY}
|
586
|
+
# type will be ignored.
|
553
587
|
#
|
554
588
|
# Definitions:
|
555
589
|
# GCS_FILE_PATH = A path to file on GCS, e.g. "gs://folder/video.avi".
|
590
|
+
# TEXT_SNIPPET = A content of a text snippet, UTF-8 encoded, enclosed within
|
591
|
+
# double quotes ("")
|
556
592
|
# TIME_SEGMENT_START = TIME_OFFSET
|
557
593
|
# Expresses a beginning, inclusive, of a time segment
|
558
594
|
# within an
|
@@ -638,11 +674,55 @@ module Google
|
|
638
674
|
#
|
639
675
|
# {Google::Cloud::AutoML::V1beta1::BatchPredictOutputConfig#gcs_destination gcs_destination}
|
640
676
|
# must be set unless specified otherwise for a domain. If gcs_destination is
|
641
|
-
# set then in the given directory a new directory
|
677
|
+
# set then in the given directory a new directory is created. Its name
|
642
678
|
# will be
|
643
679
|
# "prediction-<model-display-name>-<timestamp-of-prediction-call>",
|
644
680
|
# where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. The contents
|
645
681
|
# of it depends on the ML problem the predictions are made for.
|
682
|
+
#
|
683
|
+
# * For Image Classification:
|
684
|
+
# In the created directory files `image_classification_1.jsonl`,
|
685
|
+
# `image_classification_2.jsonl`,...,`image_classification_N.jsonl`
|
686
|
+
# will be created, where N may be 1, and depends on the
|
687
|
+
# total number of the successfully predicted images and annotations.
|
688
|
+
# A single image will be listed only once with all its annotations,
|
689
|
+
# and its annotations will never be split across files.
|
690
|
+
# Each .JSONL file will contain, per line, a JSON representation of a
|
691
|
+
# proto that wraps image's "ID" : "<id_value>" followed by a list of
|
692
|
+
# zero or more AnnotationPayload protos (called annotations), which
|
693
|
+
# have classification detail populated.
|
694
|
+
# If prediction for any image failed (partially or completely), then an
|
695
|
+
# additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
|
696
|
+
# files will be created (N depends on total number of failed
|
697
|
+
# predictions). These files will have a JSON representation of a proto
|
698
|
+
# that wraps the same "ID" : "<id_value>" but here followed by
|
699
|
+
# exactly one
|
700
|
+
#
|
701
|
+
# [`google.rpc.Status`](https:
|
702
|
+
# //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
|
703
|
+
# containing only `code` and `message`fields.
|
704
|
+
#
|
705
|
+
# * For Image Object Detection:
|
706
|
+
# In the created directory files `image_object_detection_1.jsonl`,
|
707
|
+
# `image_object_detection_2.jsonl`,...,`image_object_detection_N.jsonl`
|
708
|
+
# will be created, where N may be 1, and depends on the
|
709
|
+
# total number of the successfully predicted images and annotations.
|
710
|
+
# Each .JSONL file will contain, per line, a JSON representation of a
|
711
|
+
# proto that wraps image's "ID" : "<id_value>" followed by a list of
|
712
|
+
# zero or more AnnotationPayload protos (called annotations), which
|
713
|
+
# have image_object_detection detail populated. A single image will
|
714
|
+
# be listed only once with all its annotations, and its annotations
|
715
|
+
# will never be split across files.
|
716
|
+
# If prediction for any image failed (partially or completely), then
|
717
|
+
# additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
|
718
|
+
# files will be created (N depends on total number of failed
|
719
|
+
# predictions). These files will have a JSON representation of a proto
|
720
|
+
# that wraps the same "ID" : "<id_value>" but here followed by
|
721
|
+
# exactly one
|
722
|
+
#
|
723
|
+
# [`google.rpc.Status`](https:
|
724
|
+
# //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
|
725
|
+
# containing only `code` and `message`fields.
|
646
726
|
# * For Video Classification:
|
647
727
|
# In the created directory a video_classification.csv file, and a .JSON
|
648
728
|
# file per each video classification requested in the input (i.e. each
|
@@ -695,6 +775,54 @@ module Google
|
|
695
775
|
# for each frame of the video time segment the file is assigned to in
|
696
776
|
# video_object_tracking.csv. All AnnotationPayload protos will have
|
697
777
|
# video_object_tracking field set.
|
778
|
+
# * For Text Classification:
|
779
|
+
# In the created directory files `text_classification_1.jsonl`,
|
780
|
+
# `text_classification_2.jsonl`,...,`text_classification_N.jsonl`
|
781
|
+
# will be created, where N may be 1, and depends on the
|
782
|
+
# total number of inputs and annotations found.
|
783
|
+
#
|
784
|
+
# Each .JSONL file will contain, per line, a JSON representation of a
|
785
|
+
# proto that wraps input text snippet or input text file and a list of
|
786
|
+
# zero or more AnnotationPayload protos (called annotations), which
|
787
|
+
# have classification detail populated. A single text snippet or file
|
788
|
+
# will be listed only once with all its annotations, and its
|
789
|
+
# annotations will never be split across files.
|
790
|
+
#
|
791
|
+
# If prediction for any text snippet or file failed (partially or
|
792
|
+
# completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
|
793
|
+
# `errors_N.jsonl` files will be created (N depends on total number of
|
794
|
+
# failed predictions). These files will have a JSON representation of a
|
795
|
+
# proto that wraps input text snippet or input text file followed by
|
796
|
+
# exactly one
|
797
|
+
#
|
798
|
+
# [`google.rpc.Status`](https:
|
799
|
+
# //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
|
800
|
+
# containing only `code` and `message`.
|
801
|
+
#
|
802
|
+
# * For Text Sentiment:
|
803
|
+
# In the created directory files `text_sentiment_1.jsonl`,
|
804
|
+
# `text_sentiment_2.jsonl`,...,`text_sentiment_N.jsonl`
|
805
|
+
# will be created, where N may be 1, and depends on the
|
806
|
+
# total number of inputs and annotations found.
|
807
|
+
#
|
808
|
+
# Each .JSONL file will contain, per line, a JSON representation of a
|
809
|
+
# proto that wraps input text snippet or input text file and a list of
|
810
|
+
# zero or more AnnotationPayload protos (called annotations), which
|
811
|
+
# have text_sentiment detail populated. A single text snippet or file
|
812
|
+
# will be listed only once with all its annotations, and its
|
813
|
+
# annotations will never be split across files.
|
814
|
+
#
|
815
|
+
# If prediction for any text snippet or file failed (partially or
|
816
|
+
# completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
|
817
|
+
# `errors_N.jsonl` files will be created (N depends on total number of
|
818
|
+
# failed predictions). These files will have a JSON representation of a
|
819
|
+
# proto that wraps input text snippet or input text file followed by
|
820
|
+
# exactly one
|
821
|
+
#
|
822
|
+
# [`google.rpc.Status`](https:
|
823
|
+
# //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
|
824
|
+
# containing only `code` and `message`.
|
825
|
+
#
|
698
826
|
# * For Text Extraction:
|
699
827
|
# In the created directory files `text_extraction_1.jsonl`,
|
700
828
|
# `text_extraction_2.jsonl`,...,`text_extraction_N.jsonl`
|
@@ -704,7 +832,8 @@ module Google
|
|
704
832
|
# used inline text, or documents.
|
705
833
|
# If input was inline, then each .JSONL file will contain, per line,
|
706
834
|
# a JSON representation of a proto that wraps given in request text
|
707
|
-
# snippet's "id"
|
835
|
+
# snippet's "id" (if specified), followed by input text snippet,
|
836
|
+
# and a list of zero or more
|
708
837
|
# AnnotationPayload protos (called annotations), which have
|
709
838
|
# text_extraction detail populated. A single text snippet will be
|
710
839
|
# listed only once with all its annotations, and its annotations will
|
@@ -872,7 +1001,10 @@ module Google
|
|
872
1001
|
# * docker - Used for Docker containers. Use the params field to customize
|
873
1002
|
# the container. The container is verified to work correctly on
|
874
1003
|
# ubuntu 16.04 operating system. See more at
|
875
|
-
# [containers
|
1004
|
+
# [containers
|
1005
|
+
#
|
1006
|
+
# quickstart](https:
|
1007
|
+
# //cloud.google.com/vision/automl/docs/containers-gcs-quickstart)
|
876
1008
|
# * core_ml - Used for iOS mobile devices.
|
877
1009
|
# @!attribute [rw] params
|
878
1010
|
# @return [Hash{String => String}]
|