google-cloud-pipeline-components 2.14.0__py3-none-any.whl → 2.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of google-cloud-pipeline-components might be problematic. Click here for more details.

Files changed (64) hide show
  1. google_cloud_pipeline_components/_implementation/llm/deployment_graph.py +10 -26
  2. google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py +1 -1
  3. google_cloud_pipeline_components/_implementation/llm/infer_preprocessor.py +109 -0
  4. google_cloud_pipeline_components/_implementation/llm/online_evaluation_pairwise.py +8 -0
  5. google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py +5 -6
  6. google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py +24 -0
  7. google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +0 -12
  8. google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py +2 -1
  9. google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation_preprocessor/component.py +14 -0
  10. google_cloud_pipeline_components/_implementation/starry_net/__init__.py +41 -0
  11. google_cloud_pipeline_components/_implementation/{model_evaluation/import_evaluation → starry_net/dataprep}/__init__.py +1 -2
  12. google_cloud_pipeline_components/_implementation/starry_net/dataprep/component.py +159 -0
  13. google_cloud_pipeline_components/_implementation/starry_net/evaluation/__init__.py +13 -0
  14. google_cloud_pipeline_components/_implementation/starry_net/evaluation/component.py +23 -0
  15. google_cloud_pipeline_components/_implementation/starry_net/evaluation/evaluation.yaml +197 -0
  16. google_cloud_pipeline_components/_implementation/starry_net/get_training_artifacts/__init__.py +13 -0
  17. google_cloud_pipeline_components/_implementation/starry_net/get_training_artifacts/component.py +62 -0
  18. google_cloud_pipeline_components/_implementation/starry_net/maybe_set_tfrecord_args/__init__.py +13 -0
  19. google_cloud_pipeline_components/_implementation/starry_net/maybe_set_tfrecord_args/component.py +77 -0
  20. google_cloud_pipeline_components/_implementation/starry_net/set_dataprep_args/__init__.py +13 -0
  21. google_cloud_pipeline_components/_implementation/starry_net/set_dataprep_args/component.py +97 -0
  22. google_cloud_pipeline_components/_implementation/starry_net/set_eval_args/__init__.py +13 -0
  23. google_cloud_pipeline_components/_implementation/starry_net/set_eval_args/component.py +76 -0
  24. google_cloud_pipeline_components/_implementation/starry_net/set_test_set/__init__.py +13 -0
  25. google_cloud_pipeline_components/_implementation/starry_net/set_test_set/component.py +48 -0
  26. google_cloud_pipeline_components/_implementation/starry_net/set_tfrecord_args/__init__.py +13 -0
  27. google_cloud_pipeline_components/_implementation/starry_net/set_tfrecord_args/component.py +70 -0
  28. google_cloud_pipeline_components/_implementation/starry_net/set_train_args/__init__.py +13 -0
  29. google_cloud_pipeline_components/_implementation/starry_net/set_train_args/component.py +90 -0
  30. google_cloud_pipeline_components/_implementation/starry_net/train/__init__.py +13 -0
  31. google_cloud_pipeline_components/_implementation/starry_net/train/component.py +209 -0
  32. google_cloud_pipeline_components/_implementation/starry_net/upload_decomposition_plots/__init__.py +13 -0
  33. google_cloud_pipeline_components/_implementation/starry_net/upload_decomposition_plots/component.py +59 -0
  34. google_cloud_pipeline_components/_implementation/starry_net/upload_model/__init__.py +13 -0
  35. google_cloud_pipeline_components/_implementation/starry_net/upload_model/component.py +23 -0
  36. google_cloud_pipeline_components/_implementation/starry_net/upload_model/upload_model.yaml +37 -0
  37. google_cloud_pipeline_components/_implementation/starry_net/version.py +18 -0
  38. google_cloud_pipeline_components/container/utils/error_surfacing.py +45 -0
  39. google_cloud_pipeline_components/container/v1/model/get_model/remote_runner.py +36 -7
  40. google_cloud_pipeline_components/preview/llm/infer/component.py +22 -25
  41. google_cloud_pipeline_components/preview/llm/rlhf/component.py +15 -8
  42. google_cloud_pipeline_components/preview/model_evaluation/__init__.py +4 -1
  43. google_cloud_pipeline_components/{_implementation/model_evaluation/import_evaluation/component.py → preview/model_evaluation/model_evaluation_import_component.py} +4 -3
  44. google_cloud_pipeline_components/preview/starry_net/__init__.py +19 -0
  45. google_cloud_pipeline_components/preview/starry_net/component.py +443 -0
  46. google_cloud_pipeline_components/proto/task_error_pb2.py +32 -0
  47. google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml +13 -13
  48. google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +10 -0
  49. google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +4 -1
  50. google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py +8 -10
  51. google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py +2 -2
  52. google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py +2 -2
  53. google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py +2 -2
  54. google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py +2 -2
  55. google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py +2 -2
  56. google_cloud_pipeline_components/v1/model_evaluation/evaluation_llm_classification_pipeline.py +4 -2
  57. google_cloud_pipeline_components/v1/model_evaluation/evaluation_llm_text_generation_pipeline.py +8 -2
  58. google_cloud_pipeline_components/v1/model_evaluation/model_based_llm_evaluation/autosxs/autosxs_pipeline.py +1 -0
  59. google_cloud_pipeline_components/version.py +1 -1
  60. {google_cloud_pipeline_components-2.14.0.dist-info → google_cloud_pipeline_components-2.15.0.dist-info}/METADATA +17 -20
  61. {google_cloud_pipeline_components-2.14.0.dist-info → google_cloud_pipeline_components-2.15.0.dist-info}/RECORD +64 -32
  62. {google_cloud_pipeline_components-2.14.0.dist-info → google_cloud_pipeline_components-2.15.0.dist-info}/WHEEL +1 -1
  63. {google_cloud_pipeline_components-2.14.0.dist-info → google_cloud_pipeline_components-2.15.0.dist-info}/LICENSE +0 -0
  64. {google_cloud_pipeline_components-2.14.0.dist-info → google_cloud_pipeline_components-2.15.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,443 @@
1
+ # Copyright 2024 The Kubeflow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Defines the pipeline for Starry Net."""
15
+
16
+ from typing import List
17
+
18
+ # pylint: disable=g-importing-member
19
+ from google_cloud_pipeline_components import _placeholders
20
+ from google_cloud_pipeline_components._implementation.starry_net import DataprepOp
21
+ from google_cloud_pipeline_components._implementation.starry_net import EvaluationOp
22
+ from google_cloud_pipeline_components._implementation.starry_net import GetTrainingArtifactsOp
23
+ from google_cloud_pipeline_components._implementation.starry_net import MaybeSetTfrecordArgsOp
24
+ from google_cloud_pipeline_components._implementation.starry_net import SetDataprepArgsOp
25
+ from google_cloud_pipeline_components._implementation.starry_net import SetEvalArgsOp
26
+ from google_cloud_pipeline_components._implementation.starry_net import SetTestSetOp
27
+ from google_cloud_pipeline_components._implementation.starry_net import SetTfrecordArgsOp
28
+ from google_cloud_pipeline_components._implementation.starry_net import SetTrainArgsOp
29
+ from google_cloud_pipeline_components._implementation.starry_net import TrainOp
30
+ from google_cloud_pipeline_components._implementation.starry_net import UploadDecompositionPlotsOp
31
+ from google_cloud_pipeline_components._implementation.starry_net import UploadModelOp
32
+ from google_cloud_pipeline_components.preview.model_evaluation import model_evaluation_import_component
33
+ from google_cloud_pipeline_components.types import artifact_types
34
+ from google_cloud_pipeline_components.v1 import batch_predict_job
35
+ from kfp import dsl
36
+
37
+
38
+ @dsl.pipeline
39
+ def starry_net( # pylint: disable=dangerous-default-value
40
+ tensorboard_instance_id: str,
41
+ dataprep_backcast_length: int,
42
+ dataprep_forecast_length: int,
43
+ dataprep_train_end_date: str,
44
+ dataprep_n_val_windows: int,
45
+ dataprep_n_test_windows: int,
46
+ dataprep_test_set_stride: int,
47
+ dataprep_test_set_bigquery_dataset: str,
48
+ dataflow_machine_type: str = 'n1-standard-16',
49
+ dataflow_max_replica_count: int = 50,
50
+ dataflow_starting_replica_count: int = 1,
51
+ dataflow_disk_size_gb: int = 50,
52
+ dataprep_csv_data_path: str = '',
53
+ dataprep_csv_static_covariates_path: str = '',
54
+ dataprep_bigquery_data_path: str = '',
55
+ dataprep_ts_identifier_columns: List[str] = [],
56
+ dataprep_time_column: str = '',
57
+ dataprep_target_column: str = '',
58
+ dataprep_static_covariate_columns: List[str] = [],
59
+ dataprep_previous_run_dir: str = '',
60
+ trainer_machine_type: str = 'n1-standard-4',
61
+ trainer_accelerator_type: str = 'NVIDIA_TESLA_V100',
62
+ trainer_num_epochs: int = 50,
63
+ trainer_cleaning_activation_regularizer_coeff: float = 1e3,
64
+ trainer_change_point_activation_regularizer_coeff: float = 1e3,
65
+ trainer_change_point_output_regularizer_coeff: float = 1e3,
66
+ trainer_trend_alpha_upper_bound: float = 0.5,
67
+ trainer_trend_beta_upper_bound: float = 0.2,
68
+ trainer_trend_phi_lower_bound: float = 0.99,
69
+ trainer_trend_b_fixed_val: int = -1,
70
+ trainer_trend_b0_fixed_val: int = -1,
71
+ trainer_trend_phi_fixed_val: int = -1,
72
+ trainer_quantiles: List[float] = [],
73
+ trainer_model_blocks: List[str] = [
74
+ 'cleaning',
75
+ 'change_point',
76
+ 'trend',
77
+ 'day_of_week',
78
+ 'week_of_year',
79
+ 'residual',
80
+ ],
81
+ tensorboard_n_decomposition_plots: int = 25,
82
+ encryption_spec_key_name: str = '',
83
+ location: str = _placeholders.LOCATION_PLACEHOLDER,
84
+ project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
85
+ ):
86
+ # fmt: off
87
+ """Trains a STARRY-Net model.
88
+
89
+ Args:
90
+ tensorboard_instance_id: The tensorboard instance ID. This must be in same
91
+ location as the pipeline job.
92
+ dataprep_backcast_length: The length of the context window to feed into the
93
+ model.
94
+ dataprep_forecast_length: The length of the forecast horizon used in the
95
+ loss function during training and during evaluation, so that the model is
96
+ optimized to produce forecasts from 0 to H.
97
+ dataprep_train_end_date: The last date of data to use in the training and
98
+ validation set. All dates after a train_end_date are part of the test set.
99
+ If last_forecasted_date is equal to the final day forecasted in the test
100
+ set, then last_forecasted_date =
101
+ train_end_date + forecast_length + (n_test_windows * test_set_stride).
102
+ last_forecasted_date must be included in the dataset.
103
+ dataprep_n_val_windows: The number of windows to use for the val set. If 0,
104
+ no validation set is used.
105
+ dataprep_n_test_windows: The number of windows to use for the test set. Must
106
+ be >= 1. See note in dataprep_train_end_date.
107
+ dataprep_test_set_stride: The number of timestamps to roll forward
108
+ when constructing each window of the val and test sets. See note in
109
+ dataprep_train_end_date.
110
+ dataprep_test_set_bigquery_dataset: The bigquery dataset where the test set
111
+ is saved in the format bq://project.dataset. This must be in the same
112
+ region or multi-region as the output or staging bucket of the pipeline and
113
+ the dataprep_bigquery_data_path, if using a Big Query data source.
114
+ dataflow_machine_type: The type of machine to use for dataprep,
115
+ batch prediction, and evaluation jobs..
116
+ dataflow_max_replica_count: The maximum number of replicas to scale the
117
+ dataprep, batch prediction, and evaluation jobs.
118
+ dataflow_starting_replica_count: The number of replicas to start the
119
+ dataprep, batch prediction, and evaluation jobs.
120
+ dataflow_disk_size_gb: The disk size of dataflow workers in GB for the
121
+ dataprep, batch prediction, and evaluation jobs.
122
+ dataprep_csv_data_path: The path to the training data csv in the format
123
+ gs://bucket_name/sub_dir/blob_name.csv. Each row of the csv represents
124
+ a time series, where the column names are the dates, and the index is the
125
+ unique time series names.
126
+ dataprep_csv_static_covariates_path: The path to the static covariates csv.
127
+ Each row of the csv represents the static covariate values for the series,
128
+ where the column names are the static covariate names, and the
129
+ index is the unique time series names. The index values must match the
130
+ index values of dataprep_csv_data_path. The column values must match
131
+ dataprep_static_covariate_columns.
132
+ dataprep_bigquery_data_path: The path to the training data on BigQuery in
133
+ the format bq://project.dataset.table_id. You should only set this or
134
+ csv_data_path. This must be in the same region or multi-region as the
135
+ output or staging bucket of the pipeline and the
136
+ dataprep_test_set_bigquery_dataset.
137
+ dataprep_ts_identifier_columns: The list of ts_identifier columns from the
138
+ BigQuery data source. These columns are used to distinguish the different
139
+ time series, so that if multiple rows have identical ts_identifier
140
+ columns, the series is generated by summing the target columns for each
141
+ timestamp. This is only used if dataprep_bigquery_data_path is set.
142
+ dataprep_time_column: The time column from the BigQuery data source. This is
143
+ only used if dataprep_bigquery_data_path is set.
144
+ dataprep_target_column: The column to be forecasted from the BigQuery data
145
+ source. This is only used if dataprep_bigquery_data_path is set.
146
+ dataprep_static_covariate_columns: The list of strings of static covariate
147
+ names. This needs to be set if training with static covariates regardless
148
+ of whether you're using bigquery_data_path or csv_static_covariates_path.
149
+ dataprep_previous_run_dir: The dataprep dir from a previous run. Use this
150
+ to save time if you've already created TFRecords from your BigQuery
151
+ dataset with the same dataprep parameters as this run.
152
+ trainer_machine_type: The machine type for training. Must be compatible with
153
+ trainer_accelerator_type.
154
+ trainer_accelerator_type: The accelerator type for training.
155
+ trainer_num_epochs: The number of epochs to train for.
156
+ trainer_cleaning_activation_regularizer_coeff: The L1 regularization
157
+ coefficient for the anomaly detection activation in the cleaning block.
158
+ The larger the value, the less aggressive the cleaning, so fewer and only
159
+ the most extreme anomalies are detected. A rule of thumb is that this
160
+ value should be about the same scale of your series.
161
+ trainer_change_point_activation_regularizer_coeff: The L1 regularization
162
+ coefficient for the change point detection activation in the change point
163
+ block. The larger the value, the less aggressive the cleaning, so fewer
164
+ and only the most extreme change points are detected. A rule of thumb is
165
+ that this value should be a ratio of the
166
+ trainer_change_point_output_regularizer_coeff to determine the sparsity
167
+ of the changes. If you want the model to detect many small step changes
168
+ this number should be smaller than the
169
+ trainer_change_point_output_regularizer_coeff. To detect fewer large step
170
+ changes, this number should be about equal to or larger than the
171
+ trainer_change_point_output_regularizer_coeff.
172
+ trainer_change_point_output_regularizer_coeff: The L2 regularization
173
+ penalty applied to the mean lag-one difference of the cleaned output of
174
+ the change point block. Intutively,
175
+ trainer_change_point_activation_regularizer_coeff determines how many
176
+ steps to detect in the series, while this parameter determines how
177
+ aggressively to clean the detected steps. The higher this value, the more
178
+ aggressive the cleaning. A rule of thumb is that this value should be
179
+ about the same scale of your series.
180
+ trainer_trend_alpha_upper_bound: The upper bound for data smooth parameter
181
+ alpha in the trend block.
182
+ trainer_trend_beta_upper_bound: The upper bound for trend smooth parameter
183
+ beta in the trend block.
184
+ trainer_trend_phi_lower_bound: The lower bound for damping param phi in the
185
+ trend block.
186
+ trainer_trend_b_fixed_val: The fixed value for long term trend parameter b
187
+ in the trend block. If set to anything other than -1, the trend block will
188
+ not learn to provide estimates but use the fixed value directly.
189
+ trainer_trend_b0_fixed_val: The fixed value for starting short-term trend
190
+ parameter b0 in the trend block. If set to anything other than -1, the
191
+ trend block will not learn to provide estimates but use the fixed value
192
+ directly.
193
+ trainer_trend_phi_fixed_val: The fixed value for the damping parameter phi
194
+ in the trend block. If set to anything other than -1, the trend block will
195
+ not learn to provide estimates but use the fixed value directly.
196
+ trainer_quantiles: The list of floats representing quantiles. Leave blank if
197
+ only training to produce point forecasts.
198
+ trainer_model_blocks: The list of model blocks to use in the order they will
199
+ appear in the model. Possible values are `cleaning`, `change_point`,
200
+ `trend`, `hour_of_week`, `day_of_week`, `day_of_year`, `week_of_year`,
201
+ `month_of_year`, `residual`.
202
+ tensorboard_n_decomposition_plots: How many decomposition plots from the
203
+ test set to save to tensorboard.
204
+ encryption_spec_key_name: Customer-managed encryption key options for the
205
+ CustomJob. If this is set, then all resources created by the CustomJob
206
+ will be encrypted with the provided encryption key.
207
+ location: The location where the pipeline components are run.
208
+ project: The project where the pipeline is run. Defaults to current project.
209
+ """
210
+ job_id = dsl.PIPELINE_JOB_NAME_PLACEHOLDER
211
+ create_dataprep_args_task = SetDataprepArgsOp(
212
+ model_blocks=trainer_model_blocks,
213
+ ts_identifier_columns=dataprep_ts_identifier_columns,
214
+ static_covariate_columns=dataprep_static_covariate_columns,
215
+ csv_data_path=dataprep_csv_data_path,
216
+ previous_run_dir=dataprep_previous_run_dir,
217
+ location=location,
218
+ )
219
+ create_trainer_args_task = SetTrainArgsOp(
220
+ quantiles=trainer_quantiles,
221
+ model_blocks=trainer_model_blocks,
222
+ static_covariates=dataprep_static_covariate_columns,
223
+ )
224
+ test_set_task = DataprepOp(
225
+ backcast_length=dataprep_backcast_length,
226
+ forecast_length=dataprep_forecast_length,
227
+ train_end_date=dataprep_train_end_date,
228
+ n_val_windows=dataprep_n_val_windows,
229
+ n_test_windows=dataprep_n_test_windows,
230
+ test_set_stride=dataprep_test_set_stride,
231
+ model_blocks=create_dataprep_args_task.outputs['model_blocks'],
232
+ bigquery_source=dataprep_bigquery_data_path,
233
+ ts_identifier_columns=create_dataprep_args_task.outputs[
234
+ 'ts_identifier_columns'],
235
+ time_column=dataprep_time_column,
236
+ static_covariate_columns=create_dataprep_args_task.outputs[
237
+ 'static_covariate_columns'],
238
+ target_column=dataprep_target_column,
239
+ machine_type=dataflow_machine_type,
240
+ docker_region=create_dataprep_args_task.outputs['docker_region'],
241
+ location=location,
242
+ project=project,
243
+ job_id=job_id,
244
+ job_name_prefix='test-set',
245
+ num_workers=dataflow_starting_replica_count,
246
+ max_num_workers=dataflow_max_replica_count,
247
+ disk_size_gb=dataflow_disk_size_gb,
248
+ test_set_only=True,
249
+ bigquery_output=dataprep_test_set_bigquery_dataset,
250
+ gcs_source=dataprep_csv_data_path,
251
+ gcs_static_covariate_source=dataprep_csv_static_covariates_path,
252
+ encryption_spec_key_name=encryption_spec_key_name
253
+ )
254
+ test_set_task.set_display_name('create-test-set')
255
+ set_test_set_task = SetTestSetOp(
256
+ dataprep_dir=test_set_task.outputs['dataprep_dir'])
257
+ with dsl.If(create_dataprep_args_task.outputs['create_tf_records'] == True, # pylint: disable=singleton-comparison
258
+ 'create-tf-records'):
259
+ create_tf_records_task = DataprepOp(
260
+ backcast_length=dataprep_backcast_length,
261
+ forecast_length=dataprep_forecast_length,
262
+ train_end_date=dataprep_train_end_date,
263
+ n_val_windows=dataprep_n_val_windows,
264
+ n_test_windows=dataprep_n_test_windows,
265
+ test_set_stride=dataprep_test_set_stride,
266
+ model_blocks=create_dataprep_args_task.outputs['model_blocks'],
267
+ bigquery_source=dataprep_bigquery_data_path,
268
+ ts_identifier_columns=create_dataprep_args_task.outputs[
269
+ 'ts_identifier_columns'],
270
+ time_column=dataprep_time_column,
271
+ static_covariate_columns=create_dataprep_args_task.outputs[
272
+ 'static_covariate_columns'],
273
+ target_column=dataprep_target_column,
274
+ machine_type=dataflow_machine_type,
275
+ docker_region=create_dataprep_args_task.outputs['docker_region'],
276
+ location=location,
277
+ project=project,
278
+ job_id=job_id,
279
+ job_name_prefix='tf-records',
280
+ num_workers=dataflow_starting_replica_count,
281
+ max_num_workers=dataflow_max_replica_count,
282
+ disk_size_gb=dataflow_disk_size_gb,
283
+ test_set_only=False,
284
+ bigquery_output=dataprep_test_set_bigquery_dataset,
285
+ gcs_source=dataprep_csv_data_path,
286
+ gcs_static_covariate_source=dataprep_csv_static_covariates_path,
287
+ encryption_spec_key_name=encryption_spec_key_name
288
+ )
289
+ create_tf_records_task.set_display_name('create-tf-records')
290
+ set_tfrecord_args_this_run_task = (
291
+ SetTfrecordArgsOp(
292
+ dataprep_dir=create_tf_records_task.outputs['dataprep_dir'],
293
+ static_covariates=dataprep_static_covariate_columns))
294
+ with dsl.Else('skip-tf-record-generation'):
295
+ set_tfrecord_args_previous_run_task = (
296
+ MaybeSetTfrecordArgsOp(
297
+ dataprep_previous_run_dir=dataprep_previous_run_dir,
298
+ static_covariates=dataprep_static_covariate_columns))
299
+ set_tfrecord_args_previous_run_task.set_display_name(
300
+ 'set_tfrecord_args_previous_run')
301
+ static_covariates_vocab_path = dsl.OneOf(
302
+ set_tfrecord_args_previous_run_task.outputs[
303
+ 'static_covariates_vocab_path'],
304
+ set_tfrecord_args_this_run_task.outputs['static_covariates_vocab_path']
305
+ )
306
+ train_tf_record_patterns = dsl.OneOf(
307
+ set_tfrecord_args_previous_run_task.outputs['train_tf_record_patterns'],
308
+ set_tfrecord_args_this_run_task.outputs['train_tf_record_patterns']
309
+ )
310
+ val_tf_record_patterns = dsl.OneOf(
311
+ set_tfrecord_args_previous_run_task.outputs['val_tf_record_patterns'],
312
+ set_tfrecord_args_this_run_task.outputs['val_tf_record_patterns']
313
+ )
314
+ test_tf_record_patterns = dsl.OneOf(
315
+ set_tfrecord_args_previous_run_task.outputs['test_tf_record_patterns'],
316
+ set_tfrecord_args_this_run_task.outputs['test_tf_record_patterns']
317
+ )
318
+ trainer_task = TrainOp(
319
+ num_epochs=trainer_num_epochs,
320
+ backcast_length=dataprep_backcast_length,
321
+ forecast_length=dataprep_forecast_length,
322
+ train_end_date=dataprep_train_end_date,
323
+ csv_data_path=dataprep_csv_data_path,
324
+ csv_static_covariates_path=dataprep_csv_static_covariates_path,
325
+ static_covariates_vocab_path=static_covariates_vocab_path,
326
+ train_tf_record_patterns=train_tf_record_patterns,
327
+ val_tf_record_patterns=val_tf_record_patterns,
328
+ test_tf_record_patterns=test_tf_record_patterns,
329
+ n_decomposition_plots=tensorboard_n_decomposition_plots,
330
+ n_val_windows=dataprep_n_val_windows,
331
+ n_test_windows=dataprep_n_test_windows,
332
+ test_set_stride=dataprep_test_set_stride,
333
+ cleaning_activation_regularizer_coeff=trainer_cleaning_activation_regularizer_coeff,
334
+ change_point_activation_regularizer_coeff=trainer_change_point_activation_regularizer_coeff,
335
+ change_point_output_regularizer_coeff=trainer_change_point_output_regularizer_coeff,
336
+ alpha_upper_bound=trainer_trend_alpha_upper_bound,
337
+ beta_upper_bound=trainer_trend_beta_upper_bound,
338
+ phi_lower_bound=trainer_trend_phi_lower_bound,
339
+ b_fixed_val=trainer_trend_b_fixed_val,
340
+ b0_fixed_val=trainer_trend_b0_fixed_val,
341
+ phi_fixed_val=trainer_trend_phi_fixed_val,
342
+ quantiles=create_trainer_args_task.outputs['quantiles'],
343
+ use_static_covariates=create_trainer_args_task.outputs[
344
+ 'use_static_covariates'],
345
+ static_covariate_names=create_trainer_args_task.outputs[
346
+ 'static_covariate_names'],
347
+ model_blocks=create_trainer_args_task.outputs['model_blocks'],
348
+ freeze_point_forecasts=create_trainer_args_task.outputs[
349
+ 'freeze_point_forecasts'],
350
+ machine_type=trainer_machine_type,
351
+ accelerator_type=trainer_accelerator_type,
352
+ docker_region=create_dataprep_args_task.outputs['docker_region'],
353
+ location=location,
354
+ job_id=job_id,
355
+ project=project,
356
+ encryption_spec_key_name=encryption_spec_key_name
357
+ )
358
+ _ = UploadDecompositionPlotsOp(
359
+ project=project,
360
+ location=location,
361
+ tensorboard_id=tensorboard_instance_id,
362
+ display_name=job_id,
363
+ trainer_dir=trainer_task.outputs['trainer_dir'])
364
+ training_artifacts_task = GetTrainingArtifactsOp(
365
+ docker_region=create_dataprep_args_task.outputs['docker_region'],
366
+ trainer_dir=trainer_task.outputs['trainer_dir'])
367
+ model = dsl.importer(
368
+ artifact_uri=training_artifacts_task.outputs['artifact_uri'],
369
+ artifact_class=artifact_types.UnmanagedContainerModel,
370
+ metadata={
371
+ 'predictSchemata': {
372
+ 'instanceSchemaUri': training_artifacts_task.outputs[
373
+ 'instance_schema_uri'],
374
+ 'predictionSchemaUri': training_artifacts_task.outputs[
375
+ 'prediction_schema_uri'],
376
+ },
377
+ 'containerSpec': {
378
+ 'imageUri': training_artifacts_task.outputs['image_uri'],
379
+ 'healthRoute': '/health',
380
+ 'predictRoute': '/predict',
381
+ }
382
+ },
383
+ )
384
+ model.set_display_name('set-model')
385
+ upload_model_task = UploadModelOp(
386
+ project=project,
387
+ location=location,
388
+ display_name=job_id,
389
+ unmanaged_container_model=model.output,
390
+ encryption_spec_key_name=encryption_spec_key_name,
391
+ )
392
+ upload_model_task.set_display_name('upload-model')
393
+ batch_predict_task = batch_predict_job.ModelBatchPredictOp(
394
+ project=project,
395
+ location=location,
396
+ unmanaged_container_model=model.output,
397
+ job_display_name=f'batch-predict-{job_id}',
398
+ instances_format='bigquery',
399
+ predictions_format='bigquery',
400
+ bigquery_source_input_uri=set_test_set_task.outputs['uri'],
401
+ bigquery_destination_output_uri=dataprep_test_set_bigquery_dataset,
402
+ machine_type=dataflow_machine_type,
403
+ starting_replica_count=dataflow_starting_replica_count,
404
+ max_replica_count=dataflow_max_replica_count,
405
+ encryption_spec_key_name=encryption_spec_key_name,
406
+ generate_explanation=False,
407
+ )
408
+ batch_predict_task.set_display_name('run-batch-prediction')
409
+ set_eval_args_task = SetEvalArgsOp(
410
+ big_query_source=batch_predict_task.outputs['bigquery_output_table'],
411
+ quantiles=trainer_quantiles)
412
+ eval_task = EvaluationOp(
413
+ project=project,
414
+ location=location,
415
+ root_dir=test_set_task.outputs['dataprep_dir'],
416
+ target_field_name='HORIZON__x',
417
+ predictions_format='bigquery',
418
+ ground_truth_format='bigquery',
419
+ predictions_bigquery_source=batch_predict_task.outputs[
420
+ 'bigquery_output_table'],
421
+ ground_truth_bigquery_source=set_eval_args_task.outputs[
422
+ 'big_query_source'],
423
+ ground_truth_gcs_source=[],
424
+ forecasting_type=set_eval_args_task.outputs['forecasting_type'],
425
+ forecasting_quantiles=set_eval_args_task.outputs['quantiles'],
426
+ prediction_score_column=set_eval_args_task.outputs[
427
+ 'prediction_score_column'],
428
+ dataflow_service_account=_placeholders.SERVICE_ACCOUNT_PLACEHOLDER,
429
+ dataflow_machine_type=dataflow_machine_type,
430
+ dataflow_max_workers_num=dataflow_max_replica_count,
431
+ dataflow_workers_num=dataflow_starting_replica_count,
432
+ dataflow_disk_size=dataflow_disk_size_gb,
433
+ dataflow_use_public_ips=True,
434
+ encryption_spec_key_name=encryption_spec_key_name,
435
+ )
436
+ model_evaluation_import_component.model_evaluation_import(
437
+ forecasting_metrics=eval_task.outputs['evaluation_metrics'],
438
+ model=upload_model_task.outputs['model'],
439
+ dataset_type='bigquery',
440
+ dataset_path=set_test_set_task.outputs['uri'],
441
+ display_name=job_id,
442
+ problem_type='forecasting',
443
+ )
@@ -0,0 +1,32 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # NO CHECKED-IN PROTOBUF GENCODE
4
+ # Protobuf Python Version: 0.20240502.0
5
+ """Generated protocol buffer code."""
6
+ from google.protobuf import descriptor as _descriptor
7
+ from google.protobuf import descriptor_pool as _descriptor_pool
8
+ from google.protobuf import symbol_database as _symbol_database
9
+ from google.protobuf.internal import builder as _builder
10
+ # @@protoc_insertion_point(imports)
11
+
12
+ _sym_db = _symbol_database.Default()
13
+
14
+
15
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
16
+ b'\n\x13task_error.proto\x12\ntask_error""\n\tTaskError\x12\x15\n\rerror_message\x18\x01'
17
+ b' \x01(\tB\x02P\x01\x62\x06proto3'
18
+ )
19
+
20
+ _globals = globals()
21
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
22
+ _builder.BuildTopDescriptorsAndMessages(
23
+ DESCRIPTOR,
24
+ 'google_cloud_pipeline_components.google_cloud_pipeline_components.proto.task_error_pb2',
25
+ _globals,
26
+ )
27
+ if not _descriptor._USE_C_DESCRIPTORS:
28
+ _globals['DESCRIPTOR']._loaded_options = None
29
+ _globals['DESCRIPTOR']._serialized_options = b'P\001'
30
+ _globals['_TASKERROR']._serialized_start = 119
31
+ _globals['_TASKERROR']._serialized_end = 153
32
+ # @@protoc_insertion_point(module_scope)
@@ -1461,7 +1461,7 @@ deploymentSpec:
1461
1461
  \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\
1462
1462
  \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \
1463
1463
  \ ref.project, ref.dataset_id)\n\n"
1464
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1464
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1465
1465
  exec-bigquery-delete-dataset-with-prefix:
1466
1466
  container:
1467
1467
  args:
@@ -1495,7 +1495,7 @@ deploymentSpec:
1495
1495
  \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\
1496
1496
  \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\
1497
1497
  \n"
1498
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1498
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1499
1499
  exec-bigquery-query-job:
1500
1500
  container:
1501
1501
  args:
@@ -1583,7 +1583,7 @@ deploymentSpec:
1583
1583
  \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\
1584
1584
  \ if write_disposition:\n config['write_disposition'] = write_disposition\n\
1585
1585
  \ return config\n\n"
1586
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1586
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1587
1587
  exec-build-job-configuration-query-2:
1588
1588
  container:
1589
1589
  args:
@@ -1617,7 +1617,7 @@ deploymentSpec:
1617
1617
  \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\
1618
1618
  \ if write_disposition:\n config['write_disposition'] = write_disposition\n\
1619
1619
  \ return config\n\n"
1620
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1620
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1621
1621
  exec-get-first-valid:
1622
1622
  container:
1623
1623
  args:
@@ -1641,7 +1641,7 @@ deploymentSpec:
1641
1641
  \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
1642
1642
  \n for value in json.loads(values):\n if value:\n return value\n\
1643
1643
  \ raise ValueError('No valid values.')\n\n"
1644
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1644
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1645
1645
  exec-get-table-location:
1646
1646
  container:
1647
1647
  args:
@@ -1677,7 +1677,7 @@ deploymentSpec:
1677
1677
  \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\
1678
1678
  \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\
1679
1679
  \ return client.get_table(table).location\n\n"
1680
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1680
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1681
1681
  exec-get-table-location-2:
1682
1682
  container:
1683
1683
  args:
@@ -1713,7 +1713,7 @@ deploymentSpec:
1713
1713
  \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\
1714
1714
  \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\
1715
1715
  \ return client.get_table(table).location\n\n"
1716
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1716
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1717
1717
  exec-load-table-from-uri:
1718
1718
  container:
1719
1719
  args:
@@ -1754,7 +1754,7 @@ deploymentSpec:
1754
1754
  \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\
1755
1755
  \ destination=destination,\n project=project,\n location=location,\n\
1756
1756
  \ job_config=job_config).result()\n return destination\n\n"
1757
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1757
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1758
1758
  exec-make-vertex-model-artifact:
1759
1759
  container:
1760
1760
  args:
@@ -1778,7 +1778,7 @@ deploymentSpec:
1778
1778
  Creates a google.VertexModel artifact.\"\"\"\n vertex_model.metadata =\
1779
1779
  \ {'resourceName': model_resource_name}\n vertex_model.uri = (f'https://{location}-aiplatform.googleapis.com'\n\
1780
1780
  \ f'/v1/{model_resource_name}')\n\n"
1781
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1781
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1782
1782
  exec-maybe-replace-with-default:
1783
1783
  container:
1784
1784
  args:
@@ -1800,7 +1800,7 @@ deploymentSpec:
1800
1800
  \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\
1801
1801
  \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\
1802
1802
  \n return default if not value else value\n\n"
1803
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1803
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1804
1804
  exec-model-batch-predict:
1805
1805
  container:
1806
1806
  args:
@@ -1879,7 +1879,7 @@ deploymentSpec:
1879
1879
  \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\
1880
1880
  \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\
1881
1881
  \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n"
1882
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1882
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1883
1883
  exec-table-to-uri-2:
1884
1884
  container:
1885
1885
  args:
@@ -1909,7 +1909,7 @@ deploymentSpec:
1909
1909
  \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\
1910
1910
  \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\
1911
1911
  \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n"
1912
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1912
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1913
1913
  exec-validate-inputs:
1914
1914
  container:
1915
1915
  args:
@@ -2005,7 +2005,7 @@ deploymentSpec:
2005
2005
  \ raise ValueError(\n 'Granularity unit should be one of the\
2006
2006
  \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\
2007
2007
  \n"
2008
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
2008
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
2009
2009
  pipelineInfo:
2010
2010
  description: Creates a batch prediction using a Prophet model.
2011
2011
  name: prophet-predict
@@ -180,6 +180,16 @@ def prophet_trainer(
180
180
  '--dataflow_use_public_ips=',
181
181
  dataflow_use_public_ips,
182
182
  '", "',
183
+ '--dataflow_staging_dir=',
184
+ root_dir,
185
+ (
186
+ f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging", "'
187
+ ),
188
+ '--dataflow_tmp_dir=',
189
+ root_dir,
190
+ (
191
+ f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp", "'
192
+ ),
183
193
  '--gcp_resources_path=',
184
194
  gcp_resources,
185
195
  '", "',
@@ -2418,7 +2418,10 @@ deploymentSpec:
2418
2418
  "\", \"", "--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}",
2419
2419
  "\", \"", "--dataflow_subnetwork=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}",
2420
2420
  "\", \"", "--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}",
2421
- "\", \"", "--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}",
2421
+ "\", \"", "--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}",
2422
+ "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"",
2423
+ "--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\",
2424
+ \"", "--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}",
2422
2425
  "\", \"", "--executor_input={{$.json_escape[1]}}\"]}}]}}"]}'
2423
2426
  command:
2424
2427
  - python3
@@ -1,4 +1,4 @@
1
- # Copyright 2023 The Kubeflow Authors. All Rights Reserved.
1
+ # Copyright 2024 The Kubeflow Authors. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@ from google_cloud_pipeline_components._implementation.model_evaluation import Ev
21
21
  from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDatasetPreprocessorOp as DatasetPreprocessorOp
22
22
  from google_cloud_pipeline_components._implementation.model_evaluation import FeatureExtractorOp
23
23
  from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluatedAnnotationOp
24
- from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp
24
+ from google_cloud_pipeline_components.preview.model_evaluation.model_evaluation_import_component import model_evaluation_import as ModelImportEvaluationOp
25
25
  from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
26
26
  from google_cloud_pipeline_components.v1.dataset import GetVertexDatasetOp
27
27
  from google_cloud_pipeline_components.v1.model_evaluation.classification_component import model_evaluation_classification as ModelEvaluationClassificationOp
@@ -224,14 +224,12 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v
224
224
  )
225
225
 
226
226
  with dsl.Condition(
227
- (
228
- (
229
- test_dataset_resource_name == ''
230
- and training_dataset_resource_name == ''
231
- and test_dataset_annotation_set_name == ''
232
- and training_dataset_annotation_set_name == ''
233
- )
234
- ),
227
+ ((
228
+ test_dataset_resource_name == ''
229
+ and training_dataset_resource_name == ''
230
+ and test_dataset_annotation_set_name == ''
231
+ and training_dataset_annotation_set_name == ''
232
+ )),
235
233
  name='CustomDataset',
236
234
  ):
237
235
  dataset_preprocessor_task = DatasetPreprocessorOp(