google-cloud-pipeline-components 2.15.0__py3-none-any.whl → 2.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of google-cloud-pipeline-components might be problematic. Click here for more details.
- google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation_preprocessor/component.py +11 -1
- google_cloud_pipeline_components/_implementation/starry_net/dataprep/component.py +14 -0
- google_cloud_pipeline_components/_implementation/starry_net/get_training_artifacts/component.py +1 -1
- google_cloud_pipeline_components/_implementation/starry_net/train/component.py +11 -0
- google_cloud_pipeline_components/_implementation/starry_net/upload_decomposition_plots/component.py +6 -1
- google_cloud_pipeline_components/_implementation/starry_net/version.py +3 -3
- google_cloud_pipeline_components/container/preview/custom_job/remote_runner.py +31 -0
- google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py +1 -1
- google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py +2 -2
- google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py +2 -2
- google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +42 -38
- google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +42 -38
- google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +42 -38
- google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +42 -38
- google_cloud_pipeline_components/preview/automl/forecasting/utils.py +49 -7
- google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py +1 -1
- google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml +45 -45
- google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml +47 -47
- google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml +4 -4
- google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py +3 -3
- google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml +15 -15
- google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml +13 -13
- google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml +14 -14
- google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml +13 -13
- google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml +14 -14
- google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml +13 -13
- google_cloud_pipeline_components/preview/custom_job/utils.py +64 -15
- google_cloud_pipeline_components/preview/starry_net/component.py +60 -34
- google_cloud_pipeline_components/proto/template_metadata_pb2.py +21 -17
- google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml +10 -10
- google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml +31 -31
- google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml +13 -13
- google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +3 -3
- google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +14 -14
- google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml +43 -43
- google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py +2 -2
- google_cloud_pipeline_components/v1/automl/tabular/ensemble.py +2 -2
- google_cloud_pipeline_components/v1/automl/tabular/finalizer.py +1 -1
- google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py +1 -1
- google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py +1 -1
- google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py +2 -2
- google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py +2 -2
- google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py +1 -1
- google_cloud_pipeline_components/v1/automl/tabular/transform.py +2 -2
- google_cloud_pipeline_components/v1/custom_job/component.py +3 -0
- google_cloud_pipeline_components/v1/custom_job/utils.py +4 -0
- google_cloud_pipeline_components/v1/model_evaluation/evaluation_llm_text_generation_pipeline.py +1 -1
- google_cloud_pipeline_components/version.py +1 -1
- {google_cloud_pipeline_components-2.15.0.dist-info → google_cloud_pipeline_components-2.16.1.dist-info}/METADATA +10 -10
- {google_cloud_pipeline_components-2.15.0.dist-info → google_cloud_pipeline_components-2.16.1.dist-info}/RECORD +59 -59
- {google_cloud_pipeline_components-2.15.0.dist-info → google_cloud_pipeline_components-2.16.1.dist-info}/WHEEL +1 -1
- {google_cloud_pipeline_components-2.15.0.dist-info → google_cloud_pipeline_components-2.16.1.dist-info}/LICENSE +0 -0
- {google_cloud_pipeline_components-2.15.0.dist-info → google_cloud_pipeline_components-2.16.1.dist-info}/top_level.txt +0 -0
|
@@ -54,7 +54,7 @@ def create_custom_training_job_from_component(
|
|
|
54
54
|
display_name: str = '',
|
|
55
55
|
replica_count: int = 1,
|
|
56
56
|
machine_type: str = 'n1-standard-4',
|
|
57
|
-
accelerator_type: str = '',
|
|
57
|
+
accelerator_type: str = 'ACCELERATOR_TYPE_UNSPECIFIED',
|
|
58
58
|
accelerator_count: int = 1,
|
|
59
59
|
boot_disk_type: str = 'pd-ssd',
|
|
60
60
|
boot_disk_size_gb: int = 100,
|
|
@@ -83,9 +83,9 @@ def create_custom_training_job_from_component(
|
|
|
83
83
|
replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information.](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job)
|
|
84
84
|
machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
|
|
85
85
|
accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype).
|
|
86
|
-
accelerator_count: The number of accelerators to attach to the machine. Defaults to 1 if `accelerator_type` is set.
|
|
87
|
-
boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive).
|
|
88
|
-
boot_disk_size_gb: Size in GB of the boot disk (default is 100GB).
|
|
86
|
+
accelerator_count: The number of accelerators to attach to the machine. Defaults to 1 if `accelerator_type` is set statically.
|
|
87
|
+
boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive).
|
|
88
|
+
boot_disk_size_gb: Size in GB of the boot disk (default is 100GB).
|
|
89
89
|
timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s".
|
|
90
90
|
restart_job_on_worker_restart: Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job.
|
|
91
91
|
service_account: Sets the default service account for workload run-as account. The [service account](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project.
|
|
@@ -94,11 +94,11 @@ def create_custom_training_job_from_component(
|
|
|
94
94
|
tensorboard: The name of a Vertex AI TensorBoard resource to which this CustomJob will upload TensorBoard logs.
|
|
95
95
|
enable_web_access: Whether you want Vertex AI to enable [interactive shell access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][].
|
|
96
96
|
reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC network that can be used for this job. If set, we will deploy the job within the provided IP ranges. Otherwise, the job will be deployed to any IP ranges under the provided VPC network.
|
|
97
|
-
nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share).
|
|
97
|
+
nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share). `nfs_mounts` is set as a static value and cannot be changed as a pipeline parameter.
|
|
98
98
|
base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination).
|
|
99
99
|
labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf).
|
|
100
100
|
persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
|
|
101
|
-
env: Environment variables to be passed to the container. Takes the form `[{'name': '...', 'value': '...'}]`. Maximum limit is 100.
|
|
101
|
+
env: Environment variables to be passed to the container. Takes the form `[{'name': '...', 'value': '...'}]`. Maximum limit is 100. `env` is set as a static value and cannot be changed as a pipeline parameter.
|
|
102
102
|
|
|
103
103
|
Returns:
|
|
104
104
|
A KFP component with CustomJob specification applied.
|
|
@@ -148,7 +148,11 @@ def create_custom_training_job_from_component(
|
|
|
148
148
|
)[0]['container']
|
|
149
149
|
|
|
150
150
|
worker_pool_spec = {
|
|
151
|
-
'machine_spec': {
|
|
151
|
+
'machine_spec': {
|
|
152
|
+
'machine_type': "{{$.inputs.parameters['machine_type']}}",
|
|
153
|
+
'accelerator_type': "{{$.inputs.parameters['accelerator_type']}}",
|
|
154
|
+
'accelerator_count': "{{$.inputs.parameters['accelerator_count']}}",
|
|
155
|
+
},
|
|
152
156
|
'replica_count': 1,
|
|
153
157
|
'container_spec': {
|
|
154
158
|
'image_uri': user_component_container['image'],
|
|
@@ -160,15 +164,11 @@ def create_custom_training_job_from_component(
|
|
|
160
164
|
),
|
|
161
165
|
'env': env or [],
|
|
162
166
|
},
|
|
167
|
+
'disk_spec': {
|
|
168
|
+
'boot_disk_type': "{{$.inputs.parameters['boot_disk_type']}}",
|
|
169
|
+
'boot_disk_size_gb': "{{$.inputs.parameters['boot_disk_size_gb']}}",
|
|
170
|
+
},
|
|
163
171
|
}
|
|
164
|
-
if accelerator_type:
|
|
165
|
-
worker_pool_spec['machine_spec']['accelerator_type'] = accelerator_type
|
|
166
|
-
worker_pool_spec['machine_spec']['accelerator_count'] = accelerator_count
|
|
167
|
-
if boot_disk_type:
|
|
168
|
-
worker_pool_spec['disk_spec'] = {
|
|
169
|
-
'boot_disk_type': boot_disk_type,
|
|
170
|
-
'boot_disk_size_gb': boot_disk_size_gb,
|
|
171
|
-
}
|
|
172
172
|
if nfs_mounts:
|
|
173
173
|
worker_pool_spec['nfs_mounts'] = nfs_mounts
|
|
174
174
|
|
|
@@ -210,6 +210,54 @@ def create_custom_training_job_from_component(
|
|
|
210
210
|
'defaultValue'
|
|
211
211
|
] = default_value
|
|
212
212
|
|
|
213
|
+
# add workerPoolSpec parameters into the customjob component
|
|
214
|
+
cj_component_spec['inputDefinitions']['parameters']['machine_type'] = {
|
|
215
|
+
'parameterType': 'STRING',
|
|
216
|
+
'defaultValue': machine_type,
|
|
217
|
+
'isOptional': True,
|
|
218
|
+
}
|
|
219
|
+
cj_component_spec['inputDefinitions']['parameters']['accelerator_type'] = {
|
|
220
|
+
'parameterType': 'STRING',
|
|
221
|
+
'defaultValue': accelerator_type,
|
|
222
|
+
'isOptional': True,
|
|
223
|
+
}
|
|
224
|
+
cj_component_spec['inputDefinitions']['parameters']['accelerator_count'] = {
|
|
225
|
+
'parameterType': 'NUMBER_INTEGER',
|
|
226
|
+
'defaultValue': (
|
|
227
|
+
accelerator_count
|
|
228
|
+
if accelerator_type != 'ACCELERATOR_TYPE_UNSPECIFIED'
|
|
229
|
+
else 0
|
|
230
|
+
),
|
|
231
|
+
'isOptional': True,
|
|
232
|
+
}
|
|
233
|
+
cj_component_spec['inputDefinitions']['parameters']['boot_disk_type'] = {
|
|
234
|
+
'parameterType': 'STRING',
|
|
235
|
+
'defaultValue': boot_disk_type,
|
|
236
|
+
'isOptional': True,
|
|
237
|
+
}
|
|
238
|
+
cj_component_spec['inputDefinitions']['parameters']['boot_disk_size_gb'] = {
|
|
239
|
+
'parameterType': 'NUMBER_INTEGER',
|
|
240
|
+
'defaultValue': boot_disk_size_gb,
|
|
241
|
+
'isOptional': True,
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
# check if user component has any input parameters that already exist in the
|
|
245
|
+
# custom job component
|
|
246
|
+
for param_name in user_component_spec.get('inputDefinitions', {}).get(
|
|
247
|
+
'parameters', {}
|
|
248
|
+
):
|
|
249
|
+
if param_name in cj_component_spec['inputDefinitions']['parameters']:
|
|
250
|
+
raise ValueError(
|
|
251
|
+
f'Input parameter {param_name} already exists in the CustomJob component.' # pylint: disable=line-too-long
|
|
252
|
+
)
|
|
253
|
+
for param_name in user_component_spec.get('outputDefinitions', {}).get(
|
|
254
|
+
'parameters', {}
|
|
255
|
+
):
|
|
256
|
+
if param_name in cj_component_spec['outputDefinitions']['parameters']:
|
|
257
|
+
raise ValueError(
|
|
258
|
+
f'Output parameter {param_name} already exists in the CustomJob component.' # pylint: disable=line-too-long
|
|
259
|
+
)
|
|
260
|
+
|
|
213
261
|
# merge parameters from user component into the customjob component
|
|
214
262
|
cj_component_spec['inputDefinitions']['parameters'].update(
|
|
215
263
|
user_component_spec.get('inputDefinitions', {}).get('parameters', {})
|
|
@@ -217,6 +265,7 @@ def create_custom_training_job_from_component(
|
|
|
217
265
|
cj_component_spec['outputDefinitions']['parameters'].update(
|
|
218
266
|
user_component_spec.get('outputDefinitions', {}).get('parameters', {})
|
|
219
267
|
)
|
|
268
|
+
|
|
220
269
|
# use artifacts from user component
|
|
221
270
|
## assign artifacts, not update, since customjob has no artifact outputs
|
|
222
271
|
cj_component_spec['inputDefinitions']['artifacts'] = user_component_spec.get(
|
|
@@ -57,6 +57,8 @@ def starry_net( # pylint: disable=dangerous-default-value
|
|
|
57
57
|
dataprep_target_column: str = '',
|
|
58
58
|
dataprep_static_covariate_columns: List[str] = [],
|
|
59
59
|
dataprep_previous_run_dir: str = '',
|
|
60
|
+
dataprep_nan_threshold: float = 0.2,
|
|
61
|
+
dataprep_zero_threshold: float = 0.2,
|
|
60
62
|
trainer_machine_type: str = 'n1-standard-4',
|
|
61
63
|
trainer_accelerator_type: str = 'NVIDIA_TESLA_V100',
|
|
62
64
|
trainer_num_epochs: int = 50,
|
|
@@ -84,7 +86,16 @@ def starry_net( # pylint: disable=dangerous-default-value
|
|
|
84
86
|
project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
|
|
85
87
|
):
|
|
86
88
|
# fmt: off
|
|
87
|
-
"""
|
|
89
|
+
"""Starry Net is a state-of-the-art forecaster used internally by Google.
|
|
90
|
+
|
|
91
|
+
Starry Net is a glass-box neural network inspired by statistical time series
|
|
92
|
+
models, capable of cleaning step changes and spikes, modeling seasonality and
|
|
93
|
+
events, forecasting trend, and providing both point and prediction interval
|
|
94
|
+
forecasts in a single, lightweight model. Starry Net stands out among neural
|
|
95
|
+
network based forecasting models by providing the explainability,
|
|
96
|
+
interpretability and tunability of traditional statistical forecasters.
|
|
97
|
+
For example, it features time series feature decomposition and damped local
|
|
98
|
+
linear exponential smoothing model as the trend structure.
|
|
88
99
|
|
|
89
100
|
Args:
|
|
90
101
|
tensorboard_instance_id: The tensorboard instance ID. This must be in same
|
|
@@ -149,6 +160,13 @@ def starry_net( # pylint: disable=dangerous-default-value
|
|
|
149
160
|
dataprep_previous_run_dir: The dataprep dir from a previous run. Use this
|
|
150
161
|
to save time if you've already created TFRecords from your BigQuery
|
|
151
162
|
dataset with the same dataprep parameters as this run.
|
|
163
|
+
dataprep_nan_threshold: Series having more nan / missing values than
|
|
164
|
+
nan_threshold (inclusive) in percentage for either backtest or forecast
|
|
165
|
+
will not be sampled in the training set (including missing due to
|
|
166
|
+
train_start and train_end). All existing nans are replaced by zeros.
|
|
167
|
+
dataprep_zero_threshold: Series having more 0.0 values than zero_threshold
|
|
168
|
+
(inclusive) in percentage for either backtest or forecast will not be
|
|
169
|
+
sampled in the training set.
|
|
152
170
|
trainer_machine_type: The machine type for training. Must be compatible with
|
|
153
171
|
trainer_accelerator_type.
|
|
154
172
|
trainer_accelerator_type: The accelerator type for training.
|
|
@@ -221,39 +239,6 @@ def starry_net( # pylint: disable=dangerous-default-value
|
|
|
221
239
|
model_blocks=trainer_model_blocks,
|
|
222
240
|
static_covariates=dataprep_static_covariate_columns,
|
|
223
241
|
)
|
|
224
|
-
test_set_task = DataprepOp(
|
|
225
|
-
backcast_length=dataprep_backcast_length,
|
|
226
|
-
forecast_length=dataprep_forecast_length,
|
|
227
|
-
train_end_date=dataprep_train_end_date,
|
|
228
|
-
n_val_windows=dataprep_n_val_windows,
|
|
229
|
-
n_test_windows=dataprep_n_test_windows,
|
|
230
|
-
test_set_stride=dataprep_test_set_stride,
|
|
231
|
-
model_blocks=create_dataprep_args_task.outputs['model_blocks'],
|
|
232
|
-
bigquery_source=dataprep_bigquery_data_path,
|
|
233
|
-
ts_identifier_columns=create_dataprep_args_task.outputs[
|
|
234
|
-
'ts_identifier_columns'],
|
|
235
|
-
time_column=dataprep_time_column,
|
|
236
|
-
static_covariate_columns=create_dataprep_args_task.outputs[
|
|
237
|
-
'static_covariate_columns'],
|
|
238
|
-
target_column=dataprep_target_column,
|
|
239
|
-
machine_type=dataflow_machine_type,
|
|
240
|
-
docker_region=create_dataprep_args_task.outputs['docker_region'],
|
|
241
|
-
location=location,
|
|
242
|
-
project=project,
|
|
243
|
-
job_id=job_id,
|
|
244
|
-
job_name_prefix='test-set',
|
|
245
|
-
num_workers=dataflow_starting_replica_count,
|
|
246
|
-
max_num_workers=dataflow_max_replica_count,
|
|
247
|
-
disk_size_gb=dataflow_disk_size_gb,
|
|
248
|
-
test_set_only=True,
|
|
249
|
-
bigquery_output=dataprep_test_set_bigquery_dataset,
|
|
250
|
-
gcs_source=dataprep_csv_data_path,
|
|
251
|
-
gcs_static_covariate_source=dataprep_csv_static_covariates_path,
|
|
252
|
-
encryption_spec_key_name=encryption_spec_key_name
|
|
253
|
-
)
|
|
254
|
-
test_set_task.set_display_name('create-test-set')
|
|
255
|
-
set_test_set_task = SetTestSetOp(
|
|
256
|
-
dataprep_dir=test_set_task.outputs['dataprep_dir'])
|
|
257
242
|
with dsl.If(create_dataprep_args_task.outputs['create_tf_records'] == True, # pylint: disable=singleton-comparison
|
|
258
243
|
'create-tf-records'):
|
|
259
244
|
create_tf_records_task = DataprepOp(
|
|
@@ -270,6 +255,7 @@ def starry_net( # pylint: disable=dangerous-default-value
|
|
|
270
255
|
time_column=dataprep_time_column,
|
|
271
256
|
static_covariate_columns=create_dataprep_args_task.outputs[
|
|
272
257
|
'static_covariate_columns'],
|
|
258
|
+
static_covariates_vocab_path='',
|
|
273
259
|
target_column=dataprep_target_column,
|
|
274
260
|
machine_type=dataflow_machine_type,
|
|
275
261
|
docker_region=create_dataprep_args_task.outputs['docker_region'],
|
|
@@ -282,6 +268,8 @@ def starry_net( # pylint: disable=dangerous-default-value
|
|
|
282
268
|
disk_size_gb=dataflow_disk_size_gb,
|
|
283
269
|
test_set_only=False,
|
|
284
270
|
bigquery_output=dataprep_test_set_bigquery_dataset,
|
|
271
|
+
nan_threshold=dataprep_nan_threshold,
|
|
272
|
+
zero_threshold=dataprep_zero_threshold,
|
|
285
273
|
gcs_source=dataprep_csv_data_path,
|
|
286
274
|
gcs_static_covariate_source=dataprep_csv_static_covariates_path,
|
|
287
275
|
encryption_spec_key_name=encryption_spec_key_name
|
|
@@ -303,6 +291,42 @@ def starry_net( # pylint: disable=dangerous-default-value
|
|
|
303
291
|
'static_covariates_vocab_path'],
|
|
304
292
|
set_tfrecord_args_this_run_task.outputs['static_covariates_vocab_path']
|
|
305
293
|
)
|
|
294
|
+
test_set_task = DataprepOp(
|
|
295
|
+
backcast_length=dataprep_backcast_length,
|
|
296
|
+
forecast_length=dataprep_forecast_length,
|
|
297
|
+
train_end_date=dataprep_train_end_date,
|
|
298
|
+
n_val_windows=dataprep_n_val_windows,
|
|
299
|
+
n_test_windows=dataprep_n_test_windows,
|
|
300
|
+
test_set_stride=dataprep_test_set_stride,
|
|
301
|
+
model_blocks=create_dataprep_args_task.outputs['model_blocks'],
|
|
302
|
+
bigquery_source=dataprep_bigquery_data_path,
|
|
303
|
+
ts_identifier_columns=create_dataprep_args_task.outputs[
|
|
304
|
+
'ts_identifier_columns'],
|
|
305
|
+
time_column=dataprep_time_column,
|
|
306
|
+
static_covariate_columns=create_dataprep_args_task.outputs[
|
|
307
|
+
'static_covariate_columns'],
|
|
308
|
+
static_covariates_vocab_path=static_covariates_vocab_path,
|
|
309
|
+
target_column=dataprep_target_column,
|
|
310
|
+
machine_type=dataflow_machine_type,
|
|
311
|
+
docker_region=create_dataprep_args_task.outputs['docker_region'],
|
|
312
|
+
location=location,
|
|
313
|
+
project=project,
|
|
314
|
+
job_id=job_id,
|
|
315
|
+
job_name_prefix='test-set',
|
|
316
|
+
num_workers=dataflow_starting_replica_count,
|
|
317
|
+
max_num_workers=dataflow_max_replica_count,
|
|
318
|
+
disk_size_gb=dataflow_disk_size_gb,
|
|
319
|
+
test_set_only=True,
|
|
320
|
+
bigquery_output=dataprep_test_set_bigquery_dataset,
|
|
321
|
+
nan_threshold=dataprep_nan_threshold,
|
|
322
|
+
zero_threshold=dataprep_zero_threshold,
|
|
323
|
+
gcs_source=dataprep_csv_data_path,
|
|
324
|
+
gcs_static_covariate_source=dataprep_csv_static_covariates_path,
|
|
325
|
+
encryption_spec_key_name=encryption_spec_key_name
|
|
326
|
+
)
|
|
327
|
+
test_set_task.set_display_name('create-test-set')
|
|
328
|
+
set_test_set_task = SetTestSetOp(
|
|
329
|
+
dataprep_dir=test_set_task.outputs['dataprep_dir'])
|
|
306
330
|
train_tf_record_patterns = dsl.OneOf(
|
|
307
331
|
set_tfrecord_args_previous_run_task.outputs['train_tf_record_patterns'],
|
|
308
332
|
set_tfrecord_args_this_run_task.outputs['train_tf_record_patterns']
|
|
@@ -330,6 +354,8 @@ def starry_net( # pylint: disable=dangerous-default-value
|
|
|
330
354
|
n_val_windows=dataprep_n_val_windows,
|
|
331
355
|
n_test_windows=dataprep_n_test_windows,
|
|
332
356
|
test_set_stride=dataprep_test_set_stride,
|
|
357
|
+
nan_threshold=dataprep_nan_threshold,
|
|
358
|
+
zero_threshold=dataprep_zero_threshold,
|
|
333
359
|
cleaning_activation_regularizer_coeff=trainer_cleaning_activation_regularizer_coeff,
|
|
334
360
|
change_point_activation_regularizer_coeff=trainer_change_point_activation_regularizer_coeff,
|
|
335
361
|
change_point_output_regularizer_coeff=trainer_change_point_output_regularizer_coeff,
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
-
#
|
|
3
|
+
# NO CHECKED-IN PROTOBUF GENCODE
|
|
4
|
+
# Protobuf Python Version: 0.20240806.0
|
|
4
5
|
"""Generated protocol buffer code."""
|
|
5
6
|
from google.protobuf import descriptor as _descriptor
|
|
6
7
|
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
7
8
|
from google.protobuf import symbol_database as _symbol_database
|
|
8
9
|
from google.protobuf.internal import builder as _builder
|
|
10
|
+
|
|
9
11
|
# @@protoc_insertion_point(imports)
|
|
10
12
|
|
|
11
13
|
_sym_db = _symbol_database.Default()
|
|
@@ -71,14 +73,16 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
|
|
71
73
|
b' \x03(\x0b\x32\x36.template_metadata.GoogleCloudServiceAccountValidation\x12O\n\x11quota_validations\x18\x02'
|
|
72
74
|
b' \x03(\x0b\x32\x34.template_metadata.GoogleCloudProjectQuotaValidation\x12N\n\x0f\x61pi_validations\x18\x03'
|
|
73
75
|
b' \x03(\x0b\x32\x35.template_metadata.GoogleCloudApiEnablementValidation\x12H\n\x0fgcs_validations\x18\x04'
|
|
74
|
-
b' \x03(\x0b\x32/.template_metadata.GoogleCloudStorageValidation"\
|
|
76
|
+
b' \x03(\x0b\x32/.template_metadata.GoogleCloudStorageValidation"\x92\x01\n\x1cGoogleCloudStorageValidation\x12\x0f\n\x07gcs_uri\x18\x01'
|
|
75
77
|
b' \x01(\t\x12\x10\n\x08is_input\x18\x02'
|
|
76
78
|
b' \x01(\x08\x12\x1f\n\x17\x64\x65\x66\x61ult_service_account\x18\x03'
|
|
77
79
|
b' \x01(\t\x12\x1c\n\x14override_placeholder\x18\x04'
|
|
78
|
-
b' \x01(\t
|
|
80
|
+
b' \x01(\t\x12\x10\n\x08gcs_uris\x18\x05'
|
|
81
|
+
b' \x03(\t"\x80\x01\n!GoogleCloudProjectQuotaValidation\x12\x13\n\x0bmetric_name\x18\x01'
|
|
79
82
|
b' \x01(\t\x12\x15\n\x0bint64_value\x18\x02'
|
|
80
83
|
b' \x01(\x03H\x00\x12\x16\n\x0c\x64ouble_value\x18\x03'
|
|
81
|
-
b' \x01(\x01H\x00\
|
|
84
|
+
b' \x01(\x01H\x00\x12\x0e\n\x06region\x18\x04'
|
|
85
|
+
b' \x01(\tB\x07\n\x05value"\x8d\x01\n#GoogleCloudServiceAccountValidation\x12\x1f\n\x17\x64\x65\x66\x61ult_principal_email\x18\x01'
|
|
82
86
|
b' \x01(\t\x12\x1c\n\x14override_placeholder\x18\x02'
|
|
83
87
|
b' \x01(\t\x12\x13\n\x0bpermissions\x18\x03'
|
|
84
88
|
b' \x03(\t\x12\x12\n\nrole_names\x18\x04'
|
|
@@ -96,12 +100,12 @@ _builder.BuildTopDescriptorsAndMessages(
|
|
|
96
100
|
if not _descriptor._USE_C_DESCRIPTORS:
|
|
97
101
|
_globals['DESCRIPTOR']._loaded_options = None
|
|
98
102
|
_globals['DESCRIPTOR']._serialized_options = b'P\001'
|
|
99
|
-
_globals['_SIZE']._serialized_start =
|
|
100
|
-
_globals['_SIZE']._serialized_end =
|
|
101
|
-
_globals['_CONTENTTYPE']._serialized_start =
|
|
102
|
-
_globals['_CONTENTTYPE']._serialized_end =
|
|
103
|
-
_globals['_URITYPE']._serialized_start =
|
|
104
|
-
_globals['_URITYPE']._serialized_end =
|
|
103
|
+
_globals['_SIZE']._serialized_start = 3127
|
|
104
|
+
_globals['_SIZE']._serialized_end = 3198
|
|
105
|
+
_globals['_CONTENTTYPE']._serialized_start = 3201
|
|
106
|
+
_globals['_CONTENTTYPE']._serialized_end = 3331
|
|
107
|
+
_globals['_URITYPE']._serialized_start = 3333
|
|
108
|
+
_globals['_URITYPE']._serialized_end = 3430
|
|
105
109
|
_globals['_TEMPLATEMETADATA']._serialized_start = 164
|
|
106
110
|
_globals['_TEMPLATEMETADATA']._serialized_end = 301
|
|
107
111
|
_globals['_IOMETADATA']._serialized_start = 303
|
|
@@ -143,11 +147,11 @@ if not _descriptor._USE_C_DESCRIPTORS:
|
|
|
143
147
|
_globals['_VALIDATIONITEMS']._serialized_start = 2308
|
|
144
148
|
_globals['_VALIDATIONITEMS']._serialized_end = 2640
|
|
145
149
|
_globals['_GOOGLECLOUDSTORAGEVALIDATION']._serialized_start = 2643
|
|
146
|
-
_globals['_GOOGLECLOUDSTORAGEVALIDATION']._serialized_end =
|
|
147
|
-
_globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_start =
|
|
148
|
-
_globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_end =
|
|
149
|
-
_globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_start =
|
|
150
|
-
_globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_end =
|
|
151
|
-
_globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_start =
|
|
152
|
-
_globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_end =
|
|
150
|
+
_globals['_GOOGLECLOUDSTORAGEVALIDATION']._serialized_end = 2789
|
|
151
|
+
_globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_start = 2792
|
|
152
|
+
_globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_end = 2920
|
|
153
|
+
_globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_start = 2923
|
|
154
|
+
_globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_end = 3064
|
|
155
|
+
_globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_start = 3066
|
|
156
|
+
_globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_end = 3125
|
|
153
157
|
# @@protoc_insertion_point(module_scope)
|
|
@@ -658,7 +658,7 @@ deploymentSpec:
|
|
|
658
658
|
\ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\
|
|
659
659
|
\ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \
|
|
660
660
|
\ ref.project, ref.dataset_id)\n\n"
|
|
661
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
661
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
|
|
662
662
|
exec-bigquery-create-dataset-2:
|
|
663
663
|
container:
|
|
664
664
|
args:
|
|
@@ -693,7 +693,7 @@ deploymentSpec:
|
|
|
693
693
|
\ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\
|
|
694
694
|
\ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \
|
|
695
695
|
\ ref.project, ref.dataset_id)\n\n"
|
|
696
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
696
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
|
|
697
697
|
exec-bigquery-delete-dataset-with-prefix:
|
|
698
698
|
container:
|
|
699
699
|
args:
|
|
@@ -727,7 +727,7 @@ deploymentSpec:
|
|
|
727
727
|
\ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\
|
|
728
728
|
\ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\
|
|
729
729
|
\n"
|
|
730
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
730
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
|
|
731
731
|
exec-bigquery-query-job:
|
|
732
732
|
container:
|
|
733
733
|
args:
|
|
@@ -788,7 +788,7 @@ deploymentSpec:
|
|
|
788
788
|
\ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\
|
|
789
789
|
\ if write_disposition:\n config['write_disposition'] = write_disposition\n\
|
|
790
790
|
\ return config\n\n"
|
|
791
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
791
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
|
|
792
792
|
exec-get-first-valid:
|
|
793
793
|
container:
|
|
794
794
|
args:
|
|
@@ -812,7 +812,7 @@ deploymentSpec:
|
|
|
812
812
|
\ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
|
|
813
813
|
\n for value in json.loads(values):\n if value:\n return value\n\
|
|
814
814
|
\ raise ValueError('No valid values.')\n\n"
|
|
815
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
815
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
|
|
816
816
|
exec-get-model-metadata:
|
|
817
817
|
container:
|
|
818
818
|
args:
|
|
@@ -851,7 +851,7 @@ deploymentSpec:
|
|
|
851
851
|
\ 'forecast_horizon',\n ],\n )(\n options.time_series_timestamp_column,\n\
|
|
852
852
|
\ options.time_series_id_column,\n options.time_series_data_column,\n\
|
|
853
853
|
\ options.horizon,\n )\n\n"
|
|
854
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
854
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
|
|
855
855
|
exec-get-table-location:
|
|
856
856
|
container:
|
|
857
857
|
args:
|
|
@@ -887,7 +887,7 @@ deploymentSpec:
|
|
|
887
887
|
\ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\
|
|
888
888
|
\ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\
|
|
889
889
|
\ return client.get_table(table).location\n\n"
|
|
890
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
890
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
|
|
891
891
|
exec-load-table-from-uri:
|
|
892
892
|
container:
|
|
893
893
|
args:
|
|
@@ -928,7 +928,7 @@ deploymentSpec:
|
|
|
928
928
|
\ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\
|
|
929
929
|
\ destination=destination,\n project=project,\n location=location,\n\
|
|
930
930
|
\ job_config=job_config).result()\n return destination\n\n"
|
|
931
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
931
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
|
|
932
932
|
exec-maybe-replace-with-default:
|
|
933
933
|
container:
|
|
934
934
|
args:
|
|
@@ -950,7 +950,7 @@ deploymentSpec:
|
|
|
950
950
|
\ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\
|
|
951
951
|
\ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\
|
|
952
952
|
\n return default if not value else value\n\n"
|
|
953
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
953
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
|
|
954
954
|
exec-validate-inputs:
|
|
955
955
|
container:
|
|
956
956
|
args:
|
|
@@ -1046,7 +1046,7 @@ deploymentSpec:
|
|
|
1046
1046
|
\ raise ValueError(\n 'Granularity unit should be one of the\
|
|
1047
1047
|
\ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\
|
|
1048
1048
|
\n"
|
|
1049
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
1049
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240808_0625
|
|
1050
1050
|
pipelineInfo:
|
|
1051
1051
|
description: Forecasts using a BQML ARIMA_PLUS model.
|
|
1052
1052
|
name: automl-tabular-bqml-arima-prediction
|