google-cloud-pipeline-components 2.15.0__py3-none-any.whl → 2.16.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of google-cloud-pipeline-components might be problematic. Click here for more details.

Files changed (59) hide show
  1. google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation_preprocessor/component.py +11 -1
  2. google_cloud_pipeline_components/_implementation/starry_net/dataprep/component.py +14 -0
  3. google_cloud_pipeline_components/_implementation/starry_net/get_training_artifacts/component.py +1 -1
  4. google_cloud_pipeline_components/_implementation/starry_net/train/component.py +11 -0
  5. google_cloud_pipeline_components/_implementation/starry_net/upload_decomposition_plots/component.py +6 -1
  6. google_cloud_pipeline_components/_implementation/starry_net/version.py +3 -3
  7. google_cloud_pipeline_components/container/preview/custom_job/remote_runner.py +31 -0
  8. google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py +1 -1
  9. google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py +2 -2
  10. google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py +2 -2
  11. google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +42 -38
  12. google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +42 -38
  13. google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +42 -38
  14. google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +42 -38
  15. google_cloud_pipeline_components/preview/automl/forecasting/utils.py +49 -7
  16. google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py +1 -1
  17. google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml +45 -45
  18. google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml +47 -47
  19. google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py +2 -2
  20. google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py +2 -2
  21. google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml +4 -4
  22. google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py +3 -3
  23. google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py +2 -2
  24. google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml +15 -15
  25. google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py +2 -2
  26. google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml +13 -13
  27. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py +2 -2
  28. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml +14 -14
  29. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py +2 -2
  30. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml +13 -13
  31. google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml +14 -14
  32. google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml +13 -13
  33. google_cloud_pipeline_components/preview/custom_job/utils.py +64 -15
  34. google_cloud_pipeline_components/preview/starry_net/component.py +60 -34
  35. google_cloud_pipeline_components/proto/template_metadata_pb2.py +21 -17
  36. google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml +10 -10
  37. google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml +31 -31
  38. google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml +13 -13
  39. google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +3 -3
  40. google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +14 -14
  41. google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml +43 -43
  42. google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py +2 -2
  43. google_cloud_pipeline_components/v1/automl/tabular/ensemble.py +2 -2
  44. google_cloud_pipeline_components/v1/automl/tabular/finalizer.py +1 -1
  45. google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py +1 -1
  46. google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py +1 -1
  47. google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py +2 -2
  48. google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py +2 -2
  49. google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py +1 -1
  50. google_cloud_pipeline_components/v1/automl/tabular/transform.py +2 -2
  51. google_cloud_pipeline_components/v1/custom_job/component.py +3 -0
  52. google_cloud_pipeline_components/v1/custom_job/utils.py +4 -0
  53. google_cloud_pipeline_components/v1/model_evaluation/evaluation_llm_text_generation_pipeline.py +1 -1
  54. google_cloud_pipeline_components/version.py +1 -1
  55. {google_cloud_pipeline_components-2.15.0.dist-info → google_cloud_pipeline_components-2.16.1.dist-info}/METADATA +10 -10
  56. {google_cloud_pipeline_components-2.15.0.dist-info → google_cloud_pipeline_components-2.16.1.dist-info}/RECORD +59 -59
  57. {google_cloud_pipeline_components-2.15.0.dist-info → google_cloud_pipeline_components-2.16.1.dist-info}/WHEEL +1 -1
  58. {google_cloud_pipeline_components-2.15.0.dist-info → google_cloud_pipeline_components-2.16.1.dist-info}/LICENSE +0 -0
  59. {google_cloud_pipeline_components-2.15.0.dist-info → google_cloud_pipeline_components-2.16.1.dist-info}/top_level.txt +0 -0
@@ -54,7 +54,7 @@ def create_custom_training_job_from_component(
54
54
  display_name: str = '',
55
55
  replica_count: int = 1,
56
56
  machine_type: str = 'n1-standard-4',
57
- accelerator_type: str = '',
57
+ accelerator_type: str = 'ACCELERATOR_TYPE_UNSPECIFIED',
58
58
  accelerator_count: int = 1,
59
59
  boot_disk_type: str = 'pd-ssd',
60
60
  boot_disk_size_gb: int = 100,
@@ -83,9 +83,9 @@ def create_custom_training_job_from_component(
83
83
  replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information.](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job)
84
84
  machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
85
85
  accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype).
86
- accelerator_count: The number of accelerators to attach to the machine. Defaults to 1 if `accelerator_type` is set.
87
- boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive). boot_disk_type is set as a static value and cannot be changed as a pipeline parameter.
88
- boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). `boot_disk_size_gb` is set as a static value and cannot be changed as a pipeline parameter.
86
+ accelerator_count: The number of accelerators to attach to the machine. Defaults to 1 if `accelerator_type` is set statically.
87
+ boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive).
88
+ boot_disk_size_gb: Size in GB of the boot disk (default is 100GB).
89
89
  timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s".
90
90
  restart_job_on_worker_restart: Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job.
91
91
  service_account: Sets the default service account for workload run-as account. The [service account](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project.
@@ -94,11 +94,11 @@ def create_custom_training_job_from_component(
94
94
  tensorboard: The name of a Vertex AI TensorBoard resource to which this CustomJob will upload TensorBoard logs.
95
95
  enable_web_access: Whether you want Vertex AI to enable [interactive shell access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][].
96
96
  reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC network that can be used for this job. If set, we will deploy the job within the provided IP ranges. Otherwise, the job will be deployed to any IP ranges under the provided VPC network.
97
- nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share).
97
+ nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share). `nfs_mounts` is set as a static value and cannot be changed as a pipeline parameter.
98
98
  base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination).
99
99
  labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf).
100
100
  persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
101
- env: Environment variables to be passed to the container. Takes the form `[{'name': '...', 'value': '...'}]`. Maximum limit is 100.
101
+ env: Environment variables to be passed to the container. Takes the form `[{'name': '...', 'value': '...'}]`. Maximum limit is 100. `env` is set as a static value and cannot be changed as a pipeline parameter.
102
102
 
103
103
  Returns:
104
104
  A KFP component with CustomJob specification applied.
@@ -148,7 +148,11 @@ def create_custom_training_job_from_component(
148
148
  )[0]['container']
149
149
 
150
150
  worker_pool_spec = {
151
- 'machine_spec': {'machine_type': machine_type},
151
+ 'machine_spec': {
152
+ 'machine_type': "{{$.inputs.parameters['machine_type']}}",
153
+ 'accelerator_type': "{{$.inputs.parameters['accelerator_type']}}",
154
+ 'accelerator_count': "{{$.inputs.parameters['accelerator_count']}}",
155
+ },
152
156
  'replica_count': 1,
153
157
  'container_spec': {
154
158
  'image_uri': user_component_container['image'],
@@ -160,15 +164,11 @@ def create_custom_training_job_from_component(
160
164
  ),
161
165
  'env': env or [],
162
166
  },
167
+ 'disk_spec': {
168
+ 'boot_disk_type': "{{$.inputs.parameters['boot_disk_type']}}",
169
+ 'boot_disk_size_gb': "{{$.inputs.parameters['boot_disk_size_gb']}}",
170
+ },
163
171
  }
164
- if accelerator_type:
165
- worker_pool_spec['machine_spec']['accelerator_type'] = accelerator_type
166
- worker_pool_spec['machine_spec']['accelerator_count'] = accelerator_count
167
- if boot_disk_type:
168
- worker_pool_spec['disk_spec'] = {
169
- 'boot_disk_type': boot_disk_type,
170
- 'boot_disk_size_gb': boot_disk_size_gb,
171
- }
172
172
  if nfs_mounts:
173
173
  worker_pool_spec['nfs_mounts'] = nfs_mounts
174
174
 
@@ -210,6 +210,54 @@ def create_custom_training_job_from_component(
210
210
  'defaultValue'
211
211
  ] = default_value
212
212
 
213
+ # add workerPoolSpec parameters into the customjob component
214
+ cj_component_spec['inputDefinitions']['parameters']['machine_type'] = {
215
+ 'parameterType': 'STRING',
216
+ 'defaultValue': machine_type,
217
+ 'isOptional': True,
218
+ }
219
+ cj_component_spec['inputDefinitions']['parameters']['accelerator_type'] = {
220
+ 'parameterType': 'STRING',
221
+ 'defaultValue': accelerator_type,
222
+ 'isOptional': True,
223
+ }
224
+ cj_component_spec['inputDefinitions']['parameters']['accelerator_count'] = {
225
+ 'parameterType': 'NUMBER_INTEGER',
226
+ 'defaultValue': (
227
+ accelerator_count
228
+ if accelerator_type != 'ACCELERATOR_TYPE_UNSPECIFIED'
229
+ else 0
230
+ ),
231
+ 'isOptional': True,
232
+ }
233
+ cj_component_spec['inputDefinitions']['parameters']['boot_disk_type'] = {
234
+ 'parameterType': 'STRING',
235
+ 'defaultValue': boot_disk_type,
236
+ 'isOptional': True,
237
+ }
238
+ cj_component_spec['inputDefinitions']['parameters']['boot_disk_size_gb'] = {
239
+ 'parameterType': 'NUMBER_INTEGER',
240
+ 'defaultValue': boot_disk_size_gb,
241
+ 'isOptional': True,
242
+ }
243
+
244
+ # check if user component has any input parameters that already exist in the
245
+ # custom job component
246
+ for param_name in user_component_spec.get('inputDefinitions', {}).get(
247
+ 'parameters', {}
248
+ ):
249
+ if param_name in cj_component_spec['inputDefinitions']['parameters']:
250
+ raise ValueError(
251
+ f'Input parameter {param_name} already exists in the CustomJob component.' # pylint: disable=line-too-long
252
+ )
253
+ for param_name in user_component_spec.get('outputDefinitions', {}).get(
254
+ 'parameters', {}
255
+ ):
256
+ if param_name in cj_component_spec['outputDefinitions']['parameters']:
257
+ raise ValueError(
258
+ f'Output parameter {param_name} already exists in the CustomJob component.' # pylint: disable=line-too-long
259
+ )
260
+
213
261
  # merge parameters from user component into the customjob component
214
262
  cj_component_spec['inputDefinitions']['parameters'].update(
215
263
  user_component_spec.get('inputDefinitions', {}).get('parameters', {})
@@ -217,6 +265,7 @@ def create_custom_training_job_from_component(
217
265
  cj_component_spec['outputDefinitions']['parameters'].update(
218
266
  user_component_spec.get('outputDefinitions', {}).get('parameters', {})
219
267
  )
268
+
220
269
  # use artifacts from user component
221
270
  ## assign artifacts, not update, since customjob has no artifact outputs
222
271
  cj_component_spec['inputDefinitions']['artifacts'] = user_component_spec.get(
@@ -57,6 +57,8 @@ def starry_net( # pylint: disable=dangerous-default-value
57
57
  dataprep_target_column: str = '',
58
58
  dataprep_static_covariate_columns: List[str] = [],
59
59
  dataprep_previous_run_dir: str = '',
60
+ dataprep_nan_threshold: float = 0.2,
61
+ dataprep_zero_threshold: float = 0.2,
60
62
  trainer_machine_type: str = 'n1-standard-4',
61
63
  trainer_accelerator_type: str = 'NVIDIA_TESLA_V100',
62
64
  trainer_num_epochs: int = 50,
@@ -84,7 +86,16 @@ def starry_net( # pylint: disable=dangerous-default-value
84
86
  project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
85
87
  ):
86
88
  # fmt: off
87
- """Trains a STARRY-Net model.
89
+ """Starry Net is a state-of-the-art forecaster used internally by Google.
90
+
91
+ Starry Net is a glass-box neural network inspired by statistical time series
92
+ models, capable of cleaning step changes and spikes, modeling seasonality and
93
+ events, forecasting trend, and providing both point and prediction interval
94
+ forecasts in a single, lightweight model. Starry Net stands out among neural
95
+ network based forecasting models by providing the explainability,
96
+ interpretability and tunability of traditional statistical forecasters.
97
+ For example, it features time series feature decomposition and damped local
98
+ linear exponential smoothing model as the trend structure.
88
99
 
89
100
  Args:
90
101
  tensorboard_instance_id: The tensorboard instance ID. This must be in same
@@ -149,6 +160,13 @@ def starry_net( # pylint: disable=dangerous-default-value
149
160
  dataprep_previous_run_dir: The dataprep dir from a previous run. Use this
150
161
  to save time if you've already created TFRecords from your BigQuery
151
162
  dataset with the same dataprep parameters as this run.
163
+ dataprep_nan_threshold: Series having more nan / missing values than
164
+ nan_threshold (inclusive) in percentage for either backtest or forecast
165
+ will not be sampled in the training set (including missing due to
166
+ train_start and train_end). All existing nans are replaced by zeros.
167
+ dataprep_zero_threshold: Series having more 0.0 values than zero_threshold
168
+ (inclusive) in percentage for either backtest or forecast will not be
169
+ sampled in the training set.
152
170
  trainer_machine_type: The machine type for training. Must be compatible with
153
171
  trainer_accelerator_type.
154
172
  trainer_accelerator_type: The accelerator type for training.
@@ -221,39 +239,6 @@ def starry_net( # pylint: disable=dangerous-default-value
221
239
  model_blocks=trainer_model_blocks,
222
240
  static_covariates=dataprep_static_covariate_columns,
223
241
  )
224
- test_set_task = DataprepOp(
225
- backcast_length=dataprep_backcast_length,
226
- forecast_length=dataprep_forecast_length,
227
- train_end_date=dataprep_train_end_date,
228
- n_val_windows=dataprep_n_val_windows,
229
- n_test_windows=dataprep_n_test_windows,
230
- test_set_stride=dataprep_test_set_stride,
231
- model_blocks=create_dataprep_args_task.outputs['model_blocks'],
232
- bigquery_source=dataprep_bigquery_data_path,
233
- ts_identifier_columns=create_dataprep_args_task.outputs[
234
- 'ts_identifier_columns'],
235
- time_column=dataprep_time_column,
236
- static_covariate_columns=create_dataprep_args_task.outputs[
237
- 'static_covariate_columns'],
238
- target_column=dataprep_target_column,
239
- machine_type=dataflow_machine_type,
240
- docker_region=create_dataprep_args_task.outputs['docker_region'],
241
- location=location,
242
- project=project,
243
- job_id=job_id,
244
- job_name_prefix='test-set',
245
- num_workers=dataflow_starting_replica_count,
246
- max_num_workers=dataflow_max_replica_count,
247
- disk_size_gb=dataflow_disk_size_gb,
248
- test_set_only=True,
249
- bigquery_output=dataprep_test_set_bigquery_dataset,
250
- gcs_source=dataprep_csv_data_path,
251
- gcs_static_covariate_source=dataprep_csv_static_covariates_path,
252
- encryption_spec_key_name=encryption_spec_key_name
253
- )
254
- test_set_task.set_display_name('create-test-set')
255
- set_test_set_task = SetTestSetOp(
256
- dataprep_dir=test_set_task.outputs['dataprep_dir'])
257
242
  with dsl.If(create_dataprep_args_task.outputs['create_tf_records'] == True, # pylint: disable=singleton-comparison
258
243
  'create-tf-records'):
259
244
  create_tf_records_task = DataprepOp(
@@ -270,6 +255,7 @@ def starry_net( # pylint: disable=dangerous-default-value
270
255
  time_column=dataprep_time_column,
271
256
  static_covariate_columns=create_dataprep_args_task.outputs[
272
257
  'static_covariate_columns'],
258
+ static_covariates_vocab_path='',
273
259
  target_column=dataprep_target_column,
274
260
  machine_type=dataflow_machine_type,
275
261
  docker_region=create_dataprep_args_task.outputs['docker_region'],
@@ -282,6 +268,8 @@ def starry_net( # pylint: disable=dangerous-default-value
282
268
  disk_size_gb=dataflow_disk_size_gb,
283
269
  test_set_only=False,
284
270
  bigquery_output=dataprep_test_set_bigquery_dataset,
271
+ nan_threshold=dataprep_nan_threshold,
272
+ zero_threshold=dataprep_zero_threshold,
285
273
  gcs_source=dataprep_csv_data_path,
286
274
  gcs_static_covariate_source=dataprep_csv_static_covariates_path,
287
275
  encryption_spec_key_name=encryption_spec_key_name
@@ -303,6 +291,42 @@ def starry_net( # pylint: disable=dangerous-default-value
303
291
  'static_covariates_vocab_path'],
304
292
  set_tfrecord_args_this_run_task.outputs['static_covariates_vocab_path']
305
293
  )
294
+ test_set_task = DataprepOp(
295
+ backcast_length=dataprep_backcast_length,
296
+ forecast_length=dataprep_forecast_length,
297
+ train_end_date=dataprep_train_end_date,
298
+ n_val_windows=dataprep_n_val_windows,
299
+ n_test_windows=dataprep_n_test_windows,
300
+ test_set_stride=dataprep_test_set_stride,
301
+ model_blocks=create_dataprep_args_task.outputs['model_blocks'],
302
+ bigquery_source=dataprep_bigquery_data_path,
303
+ ts_identifier_columns=create_dataprep_args_task.outputs[
304
+ 'ts_identifier_columns'],
305
+ time_column=dataprep_time_column,
306
+ static_covariate_columns=create_dataprep_args_task.outputs[
307
+ 'static_covariate_columns'],
308
+ static_covariates_vocab_path=static_covariates_vocab_path,
309
+ target_column=dataprep_target_column,
310
+ machine_type=dataflow_machine_type,
311
+ docker_region=create_dataprep_args_task.outputs['docker_region'],
312
+ location=location,
313
+ project=project,
314
+ job_id=job_id,
315
+ job_name_prefix='test-set',
316
+ num_workers=dataflow_starting_replica_count,
317
+ max_num_workers=dataflow_max_replica_count,
318
+ disk_size_gb=dataflow_disk_size_gb,
319
+ test_set_only=True,
320
+ bigquery_output=dataprep_test_set_bigquery_dataset,
321
+ nan_threshold=dataprep_nan_threshold,
322
+ zero_threshold=dataprep_zero_threshold,
323
+ gcs_source=dataprep_csv_data_path,
324
+ gcs_static_covariate_source=dataprep_csv_static_covariates_path,
325
+ encryption_spec_key_name=encryption_spec_key_name
326
+ )
327
+ test_set_task.set_display_name('create-test-set')
328
+ set_test_set_task = SetTestSetOp(
329
+ dataprep_dir=test_set_task.outputs['dataprep_dir'])
306
330
  train_tf_record_patterns = dsl.OneOf(
307
331
  set_tfrecord_args_previous_run_task.outputs['train_tf_record_patterns'],
308
332
  set_tfrecord_args_this_run_task.outputs['train_tf_record_patterns']
@@ -330,6 +354,8 @@ def starry_net( # pylint: disable=dangerous-default-value
330
354
  n_val_windows=dataprep_n_val_windows,
331
355
  n_test_windows=dataprep_n_test_windows,
332
356
  test_set_stride=dataprep_test_set_stride,
357
+ nan_threshold=dataprep_nan_threshold,
358
+ zero_threshold=dataprep_zero_threshold,
333
359
  cleaning_activation_regularizer_coeff=trainer_cleaning_activation_regularizer_coeff,
334
360
  change_point_activation_regularizer_coeff=trainer_change_point_activation_regularizer_coeff,
335
361
  change_point_output_regularizer_coeff=trainer_change_point_output_regularizer_coeff,
@@ -1,11 +1,13 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
- # Protobuf Python Version: 0.20240110.0
3
+ # NO CHECKED-IN PROTOBUF GENCODE
4
+ # Protobuf Python Version: 0.20240806.0
4
5
  """Generated protocol buffer code."""
5
6
  from google.protobuf import descriptor as _descriptor
6
7
  from google.protobuf import descriptor_pool as _descriptor_pool
7
8
  from google.protobuf import symbol_database as _symbol_database
8
9
  from google.protobuf.internal import builder as _builder
10
+
9
11
  # @@protoc_insertion_point(imports)
10
12
 
11
13
  _sym_db = _symbol_database.Default()
@@ -71,14 +73,16 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
71
73
  b' \x03(\x0b\x32\x36.template_metadata.GoogleCloudServiceAccountValidation\x12O\n\x11quota_validations\x18\x02'
72
74
  b' \x03(\x0b\x32\x34.template_metadata.GoogleCloudProjectQuotaValidation\x12N\n\x0f\x61pi_validations\x18\x03'
73
75
  b' \x03(\x0b\x32\x35.template_metadata.GoogleCloudApiEnablementValidation\x12H\n\x0fgcs_validations\x18\x04'
74
- b' \x03(\x0b\x32/.template_metadata.GoogleCloudStorageValidation"\x80\x01\n\x1cGoogleCloudStorageValidation\x12\x0f\n\x07gcs_uri\x18\x01'
76
+ b' \x03(\x0b\x32/.template_metadata.GoogleCloudStorageValidation"\x92\x01\n\x1cGoogleCloudStorageValidation\x12\x0f\n\x07gcs_uri\x18\x01'
75
77
  b' \x01(\t\x12\x10\n\x08is_input\x18\x02'
76
78
  b' \x01(\x08\x12\x1f\n\x17\x64\x65\x66\x61ult_service_account\x18\x03'
77
79
  b' \x01(\t\x12\x1c\n\x14override_placeholder\x18\x04'
78
- b' \x01(\t"p\n!GoogleCloudProjectQuotaValidation\x12\x13\n\x0bmetric_name\x18\x01'
80
+ b' \x01(\t\x12\x10\n\x08gcs_uris\x18\x05'
81
+ b' \x03(\t"\x80\x01\n!GoogleCloudProjectQuotaValidation\x12\x13\n\x0bmetric_name\x18\x01'
79
82
  b' \x01(\t\x12\x15\n\x0bint64_value\x18\x02'
80
83
  b' \x01(\x03H\x00\x12\x16\n\x0c\x64ouble_value\x18\x03'
81
- b' \x01(\x01H\x00\x42\x07\n\x05value"\x8d\x01\n#GoogleCloudServiceAccountValidation\x12\x1f\n\x17\x64\x65\x66\x61ult_principal_email\x18\x01'
84
+ b' \x01(\x01H\x00\x12\x0e\n\x06region\x18\x04'
85
+ b' \x01(\tB\x07\n\x05value"\x8d\x01\n#GoogleCloudServiceAccountValidation\x12\x1f\n\x17\x64\x65\x66\x61ult_principal_email\x18\x01'
82
86
  b' \x01(\t\x12\x1c\n\x14override_placeholder\x18\x02'
83
87
  b' \x01(\t\x12\x13\n\x0bpermissions\x18\x03'
84
88
  b' \x03(\t\x12\x12\n\nrole_names\x18\x04'
@@ -96,12 +100,12 @@ _builder.BuildTopDescriptorsAndMessages(
96
100
  if not _descriptor._USE_C_DESCRIPTORS:
97
101
  _globals['DESCRIPTOR']._loaded_options = None
98
102
  _globals['DESCRIPTOR']._serialized_options = b'P\001'
99
- _globals['_SIZE']._serialized_start = 3092
100
- _globals['_SIZE']._serialized_end = 3163
101
- _globals['_CONTENTTYPE']._serialized_start = 3166
102
- _globals['_CONTENTTYPE']._serialized_end = 3296
103
- _globals['_URITYPE']._serialized_start = 3298
104
- _globals['_URITYPE']._serialized_end = 3395
103
+ _globals['_SIZE']._serialized_start = 3127
104
+ _globals['_SIZE']._serialized_end = 3198
105
+ _globals['_CONTENTTYPE']._serialized_start = 3201
106
+ _globals['_CONTENTTYPE']._serialized_end = 3331
107
+ _globals['_URITYPE']._serialized_start = 3333
108
+ _globals['_URITYPE']._serialized_end = 3430
105
109
  _globals['_TEMPLATEMETADATA']._serialized_start = 164
106
110
  _globals['_TEMPLATEMETADATA']._serialized_end = 301
107
111
  _globals['_IOMETADATA']._serialized_start = 303
@@ -143,11 +147,11 @@ if not _descriptor._USE_C_DESCRIPTORS:
143
147
  _globals['_VALIDATIONITEMS']._serialized_start = 2308
144
148
  _globals['_VALIDATIONITEMS']._serialized_end = 2640
145
149
  _globals['_GOOGLECLOUDSTORAGEVALIDATION']._serialized_start = 2643
146
- _globals['_GOOGLECLOUDSTORAGEVALIDATION']._serialized_end = 2771
147
- _globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_start = 2773
148
- _globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_end = 2885
149
- _globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_start = 2888
150
- _globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_end = 3029
151
- _globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_start = 3031
152
- _globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_end = 3090
150
+ _globals['_GOOGLECLOUDSTORAGEVALIDATION']._serialized_end = 2789
151
+ _globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_start = 2792
152
+ _globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_end = 2920
153
+ _globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_start = 2923
154
+ _globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_end = 3064
155
+ _globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_start = 3066
156
+ _globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_end = 3125
153
157
  # @@protoc_insertion_point(module_scope)
@@ -658,7 +658,7 @@ deploymentSpec:
658
658
  \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\
659
659
  \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \
660
660
  \ ref.project, ref.dataset_id)\n\n"
661
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
661
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
662
662
  exec-bigquery-create-dataset-2:
663
663
  container:
664
664
  args:
@@ -693,7 +693,7 @@ deploymentSpec:
693
693
  \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\
694
694
  \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \
695
695
  \ ref.project, ref.dataset_id)\n\n"
696
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
696
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
697
697
  exec-bigquery-delete-dataset-with-prefix:
698
698
  container:
699
699
  args:
@@ -727,7 +727,7 @@ deploymentSpec:
727
727
  \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\
728
728
  \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\
729
729
  \n"
730
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
730
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
731
731
  exec-bigquery-query-job:
732
732
  container:
733
733
  args:
@@ -788,7 +788,7 @@ deploymentSpec:
788
788
  \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\
789
789
  \ if write_disposition:\n config['write_disposition'] = write_disposition\n\
790
790
  \ return config\n\n"
791
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
791
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
792
792
  exec-get-first-valid:
793
793
  container:
794
794
  args:
@@ -812,7 +812,7 @@ deploymentSpec:
812
812
  \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
813
813
  \n for value in json.loads(values):\n if value:\n return value\n\
814
814
  \ raise ValueError('No valid values.')\n\n"
815
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
815
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
816
816
  exec-get-model-metadata:
817
817
  container:
818
818
  args:
@@ -851,7 +851,7 @@ deploymentSpec:
851
851
  \ 'forecast_horizon',\n ],\n )(\n options.time_series_timestamp_column,\n\
852
852
  \ options.time_series_id_column,\n options.time_series_data_column,\n\
853
853
  \ options.horizon,\n )\n\n"
854
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
854
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
855
855
  exec-get-table-location:
856
856
  container:
857
857
  args:
@@ -887,7 +887,7 @@ deploymentSpec:
887
887
  \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\
888
888
  \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\
889
889
  \ return client.get_table(table).location\n\n"
890
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
890
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
891
891
  exec-load-table-from-uri:
892
892
  container:
893
893
  args:
@@ -928,7 +928,7 @@ deploymentSpec:
928
928
  \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\
929
929
  \ destination=destination,\n project=project,\n location=location,\n\
930
930
  \ job_config=job_config).result()\n return destination\n\n"
931
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
931
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
932
932
  exec-maybe-replace-with-default:
933
933
  container:
934
934
  args:
@@ -950,7 +950,7 @@ deploymentSpec:
950
950
  \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\
951
951
  \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\
952
952
  \n return default if not value else value\n\n"
953
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
953
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
954
954
  exec-validate-inputs:
955
955
  container:
956
956
  args:
@@ -1046,7 +1046,7 @@ deploymentSpec:
1046
1046
  \ raise ValueError(\n 'Granularity unit should be one of the\
1047
1047
  \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\
1048
1048
  \n"
1049
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1049
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
1050
1050
  pipelineInfo:
1051
1051
  description: Forecasts using a BQML ARIMA_PLUS model.
1052
1052
  name: automl-tabular-bqml-arima-prediction