PyPI - google-cloud-pipeline-components - Versions diffs - 2.14.0__py3-none-any.whl → 2.15.0__py3-none-any.whl - Mend

google-cloud-pipeline-components 2.14.0py3-none-any.whl → 2.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of google-cloud-pipeline-components might be problematic. Click here for more details.

Files changed (64) hide show

google_cloud_pipeline_components/_implementation/starry_net/dataprep/component.py ADDED Viewed

@@ -0,0 +1,159 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Starry Net component for data preparation."""
+from google_cloud_pipeline_components import utils
+from google_cloud_pipeline_components._implementation.starry_net import version
+from kfp import dsl
+@dsl.container_component
+def dataprep(
+    gcp_resources: dsl.OutputPath(str),
+    dataprep_dir: dsl.Output[dsl.Artifact],  # pytype: disable=unsupported-operands
+    backcast_length: int,
+    forecast_length: int,
+    train_end_date: str,
+    n_val_windows: int,
+    n_test_windows: int,
+    test_set_stride: int,
+    model_blocks: str,
+    bigquery_source: str,
+    ts_identifier_columns: str,
+    time_column: str,
+    static_covariate_columns: str,
+    target_column: str,
+    machine_type: str,
+    docker_region: str,
+    location: str,
+    project: str,
+    job_id: str,
+    job_name_prefix: str,
+    num_workers: int,
+    max_num_workers: int,
+    disk_size_gb: int,
+    test_set_only: bool,
+    bigquery_output: str,
+    gcs_source: str,
+    gcs_static_covariate_source: str,
+    encryption_spec_key_name: str,
+):
+  # fmt: off
+  """Runs Dataprep for training and evaluating a STARRY-Net model.
+  Args:
+    gcp_resources: Serialized JSON of ``gcp_resources`` which tracks the
+      CustomJob.
+    dataprep_dir: The gcp bucket path where all dataprep artifacts
+      are saved.
+    backcast_length: The length of the input window to feed into the model.
+    forecast_length: The length of the forecast horizon.
+    train_end_date: The last date of data to use in the training set. All
+      subsequent dates are part of the test set.
+    n_val_windows: The number of windows to use for the val set. If 0, no
+      validation set is used.
+    n_test_windows: The number of windows to use for the test set. Must be >= 1.
+    test_set_stride: The number of timestamps to roll forward when
+      constructing the val and test sets.
+    model_blocks: The stringified tuple of blocks to use in the order
+      that they appear in the model. Possible values are `cleaning`,
+      `change_point`, `trend`, `hour_of_week-hybrid`, `day_of_week-hybrid`,
+      `day_of_year-hybrid`, `week_of_year-hybrid`, `month_of_year-hybrid`,
+      `residual`, `quantile`.
+    bigquery_source: The BigQuery source of the data.
+    ts_identifier_columns: The columns that identify unique time series in the BigQuery
+      data source.
+    time_column: The column with timestamps in the BigQuery source.
+    static_covariate_columns: The names of the staic covariates.
+    target_column: The target column in the Big Query data source.
+    machine_type: The machine type of the dataflow workers.
+    docker_region: The docker region, used to determine which image to use.
+    location: The location where the job is run.
+    project: The name of the project.
+    job_id: The pipeline job id.
+    job_name_prefix: The name of the dataflow job name prefix.
+    num_workers: The initial number of workers in the dataflow job.
+    max_num_workers: The maximum number of workers in the dataflow job.
+    disk_size_gb: The disk size of each dataflow worker.
+    test_set_only: Whether to only create the test set BigQuery table or also
+      to create TFRecords for traiing and validation.
+    bigquery_output: The BigQuery dataset where the test set is written in the
+      form bq://project.dataset.
+    gcs_source: The path the csv file of the data source.
+    gcs_static_covariate_source: The path to the csv file of static covariates.
+    encryption_spec_key_name: Customer-managed encryption key options for the
+      CustomJob. If this is set, then all resources created by the CustomJob
+      will be encrypted with the provided encryption key.
+  Returns:
+    gcp_resources: Serialized JSON of ``gcp_resources`` which tracks the
+      CustomJob.
+    dataprep_dir: The gcp bucket path where all dataprep artifacts
+      are saved.
+  """
+  job_name = f'{job_name_prefix}-{job_id}'
+  payload = {
+      'display_name': job_name,
+      'encryption_spec': {
+          'kms_key_name': str(encryption_spec_key_name),
+      },
+      'job_spec': {
+          'worker_pool_specs': [{
+              'replica_count': '1',
+              'machine_spec': {
+                  'machine_type': str(machine_type),
+              },
+              'disk_spec': {
+                  'boot_disk_type': 'pd-ssd',
+                  'boot_disk_size_gb': 100,
+              },
+              'container_spec': {
+                  'image_uri': f'{docker_region}-docker.pkg.dev/vertex-ai-restricted/starryn/dataprep:captain_{version.DATAPREP_VERSION}',
+                  'args': [
+                      '--config=starryn/experiments/configs/vertex.py',
+                      f'--config.datasets.backcast_length={backcast_length}',
+                      f'--config.datasets.forecast_length={forecast_length}',
+                      f'--config.datasets.train_end_date={train_end_date}',
+                      f'--config.datasets.n_val_windows={n_val_windows}',
+                      f'--config.datasets.val_rolling_window_size={test_set_stride}',
+                      f'--config.datasets.n_test_windows={n_test_windows}',
+                      f'--config.datasets.test_rolling_window_size={test_set_stride}',
+                      f'--config.model.static_cov_names={static_covariate_columns}',
+                      f'--config.model.blocks_list={model_blocks}',
+                      f'--bigquery_source={bigquery_source}',
+                      f'--bigquery_output={bigquery_output}',
+                      f'--gcs_source={gcs_source}',
+                      f'--gcs_static_covariate_source={gcs_static_covariate_source}',
+                      f'--ts_identifier_columns={ts_identifier_columns}',
+                      f'--time_column={time_column}',
+                      f'--target_column={target_column}',
+                      f'--job_id={job_name}',
+                      f'--num_workers={num_workers}',
+                      f'--max_num_workers={max_num_workers}',
+                      f'--root_bucket={dataprep_dir.uri}',
+                      f'--disk_size={disk_size_gb}',
+                      f'--machine_type={machine_type}',
+                      f'--test_set_only={test_set_only}',
+                      f'--image_uri={docker_region}-docker.pkg.dev/vertex-ai-restricted/starryn/dataprep:replica_{version.DATAPREP_VERSION}',
+                  ],
+              },
+          }]
+      }
+    }
+  return utils.build_serverless_customjob_container_spec(
+      project=project,
+      location=location,
+      custom_job_payload=payload,
+      gcp_resources=gcp_resources,
+    )

google_cloud_pipeline_components/_implementation/starry_net/evaluation/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

google_cloud_pipeline_components/_implementation/starry_net/evaluation/component.py ADDED Viewed

@@ -0,0 +1,23 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""StarryNet Evaluation Component."""
+import os
+from kfp import components
+# TODO(b/346580764)
+evaluation = components.load_component_from_file(
+    os.path.join(os.path.dirname(__file__), 'evaluation.yaml')
+)

google_cloud_pipeline_components/_implementation/starry_net/evaluation/evaluation.yaml ADDED Viewed

@@ -0,0 +1,197 @@
+name: model_evaluation_forecasting
+description: |
+  Computes a google.ForecastingMetrics Artifact, containing evaluation metrics given a model's prediction results.
+  Creates a dataflow job with Apache Beam and TFMA to compute evaluation metrics.
+  Supports point forecasting and quantile forecasting for tabular data.
+  Args:
+      project (str):
+          Project to run evaluation container.
+      location (Optional[str]):
+          Location for running the evaluation.
+          If not set, defaulted to `us-central1`.
+      root_dir (str):
+          The GCS directory for keeping staging files.
+          A random subdirectory will be created under the directory to keep job info for resuming
+          the job in case of failure.
+      predictions_format (Optional[str]):
+          The file format for the batch prediction results. `jsonl` is currently the only allowed
+          format.
+          If not set, defaulted to `jsonl`.
+      predictions_gcs_source (Optional[system.Artifact]):
+          An artifact with its URI pointing toward a GCS directory with prediction or explanation
+          files to be used for this evaluation.
+          For prediction results, the files should be named "prediction.results-*".
+          For explanation results, the files should be named "explanation.results-*".
+      predictions_bigquery_source (Optional[google.BQTable]):
+          BigQuery table with prediction or explanation data to be used for this evaluation.
+          For prediction results, the table column should be named "predicted_*".
+      ground_truth_format(Optional[str]):
+          Required for custom tabular and non tabular data.
+          The file format for the ground truth files. `jsonl` is currently the only allowed format.
+          If not set, defaulted to `jsonl`.
+      ground_truth_gcs_source(Optional[Sequence[str]]):
+          Required for custom tabular and non tabular data.
+          The GCS uris representing where the ground truth is located.
+          Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance.
+      ground_truth_bigquery_source(Optional[str]):
+          Required for custom tabular.
+          The BigQuery table uri representing where the ground truth is located.
+          Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance.
+      target_field_name (str):
+          The full name path of the features target field in the predictions file.
+          Formatted to be able to find nested columns, delimited by `.`.
+          Alternatively referred to as the ground truth (or ground_truth_column) field.
+      model (Optional[google.VertexModel]):
+          The Model used for predictions job.
+          Must share the same ancestor Location.
+      prediction_score_column (Optional[str]):
+          Optional. The column name of the field containing batch prediction scores.
+          Formatted to be able to find nested columns, delimited by `.`.
+          If not set, defaulted to `prediction.value` for a `point` forecasting_type and
+          `prediction.quantile_predictions` for a `quantile` forecasting_type.
+      forecasting_type (Optional[str]):
+          Optional. If the problem_type is `forecasting`, then the forecasting type being addressed
+          by this regression evaluation run. `point` and `quantile` are the supported types.
+          If not set, defaulted to `point`.
+      forecasting_quantiles (Optional[Sequence[Float]]):
+          Required for a `quantile` forecasting_type.
+          The list of quantiles in the same order appeared in the quantile prediction score column.
+          If one of the quantiles is set to `0.5f`, point evaluation will be set on that index.
+      example_weight_column (Optional[str]):
+          Optional. The column name of the field containing example weights.
+            Each value of positive_classes provided.
+      point_evaluation_quantile (Optional[Float]):
+          Required for a `quantile` forecasting_type.
+          A quantile in the list of forecasting_quantiles that will be used for point evaluation
+          metrics.
+      dataflow_service_account (Optional[str]):
+          Optional. Service account to run the dataflow job.
+          If not set, dataflow will use the default woker service account.
+          For more details, see https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account
+      dataflow_disk_size (Optional[int]):
+          Optional. The disk size (in GB) of the machine executing the evaluation run.
+          If not set, defaulted to `50`.
+      dataflow_machine_type (Optional[str]):
+          Optional. The machine type executing the evaluation run.
+          If not set, defaulted to `n1-standard-4`.
+      dataflow_workers_num (Optional[int]):
+          Optional. The number of workers executing the evaluation run.
+          If not set, defaulted to `10`.
+      dataflow_max_workers_num (Optional[int]):
+          Optional. The max number of workers executing the evaluation run.
+          If not set, defaulted to `25`.
+      dataflow_subnetwork (Optional[str]):
+          Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be
+          used. More details:
+          https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications
+      dataflow_use_public_ips (Optional[bool]):
+          Specifies whether Dataflow workers use public IP addresses.
+      encryption_spec_key_name (Optional[str]):
+          Customer-managed encryption key.
+  Returns:
+      evaluation_metrics (google.ForecastingMetrics):
+          google.ForecastingMetrics artifact representing the forecasting evaluation metrics in GCS.
+inputs:
+  - { name: project, type: String }
+  - { name: location, type: String, default: "us-central1" }
+  - { name: root_dir, type: system.Artifact }
+  - { name: predictions_format, type: String, default: "jsonl" }
+  - { name: predictions_gcs_source, type: Artifact, optional: True }
+  - { name: predictions_bigquery_source, type: google.BQTable, optional: True }
+  - { name: ground_truth_format, type: String, default: "jsonl" }
+  - { name: ground_truth_gcs_source, type: JsonArray, default: "[]" }
+  - { name: ground_truth_bigquery_source, type: String, default: "" }
+  - { name: target_field_name, type: String }
+  - { name: model, type: google.VertexModel, optional: True }
+  - { name: prediction_score_column, type: String, default: "" }
+  - { name: forecasting_type, type: String, default: "point" }
+  - { name: forecasting_quantiles, type: JsonArray, default: "[0.5]" }
+  - { name: example_weight_column, type: String, default: "" }
+  - { name: point_evaluation_quantile, type: Float, default: 0.5 }
+  - { name: dataflow_service_account, type: String, default: "" }
+  - { name: dataflow_disk_size, type: Integer, default: 50 }
+  - { name: dataflow_machine_type, type: String, default: "n1-standard-4" }
+  - { name: dataflow_workers_num, type: Integer, default: 1 }
+  - { name: dataflow_max_workers_num, type: Integer, default: 5 }
+  - { name: dataflow_subnetwork, type: String, default: "" }
+  - { name: dataflow_use_public_ips, type: Boolean, default: "true" }
+  - { name: encryption_spec_key_name, type: String, default: "" }
+outputs:
+  - { name: evaluation_metrics, type: google.ForecastingMetrics }
+  - { name: gcp_resources, type: String }
+implementation:
+  container:
+    image: gcr.io/ml-pipeline/model-evaluation:v0.9
+    command:
+      - python
+      - /main.py
+    args:
+      - --setup_file
+      - /setup.py
+      - --json_mode
+      - "true"
+      - --project_id
+      - { inputValue: project }
+      - --location
+      - { inputValue: location }
+      - --problem_type
+      - "forecasting"
+      - --forecasting_type
+      - { inputValue: forecasting_type }
+      - --forecasting_quantiles
+      - { inputValue: forecasting_quantiles }
+      - --point_evaluation_quantile
+      - { inputValue: point_evaluation_quantile }
+      - --batch_prediction_format
+      - { inputValue: predictions_format }
+      - if:
+          cond: {isPresent: predictions_gcs_source}
+          then:
+          - --batch_prediction_gcs_source
+          - "{{$.inputs.artifacts['predictions_gcs_source'].uri}}"
+      - if:
+          cond: {isPresent: predictions_bigquery_source}
+          then:
+          - --batch_prediction_bigquery_source
+          - "bq://{{$.inputs.artifacts['predictions_bigquery_source'].metadata['projectId']}}.{{$.inputs.artifacts['predictions_bigquery_source'].metadata['datasetId']}}.{{$.inputs.artifacts['predictions_bigquery_source'].metadata['tableId']}}"
+      - if:
+          cond: {isPresent: model}
+          then:
+          - --model_name
+          - "{{$.inputs.artifacts['model'].metadata['resourceName']}}"
+      - --ground_truth_format
+      - { inputValue: ground_truth_format }
+      - --ground_truth_gcs_source
+      - { inputValue: ground_truth_gcs_source }
+      - --ground_truth_bigquery_source
+      - { inputValue: ground_truth_bigquery_source }
+      - --root_dir
+      - "{{$.inputs.artifacts['root_dir'].uri}}"
+      - --target_field_name
+      - "instance.{{$.inputs.parameters['target_field_name']}}"
+      - --prediction_score_column
+      - { inputValue: prediction_score_column }
+      - --dataflow_job_prefix
+      - "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}"
+      - --dataflow_service_account
+      - { inputValue: dataflow_service_account }
+      - --dataflow_disk_size
+      - { inputValue: dataflow_disk_size }
+      - --dataflow_machine_type
+      - { inputValue: dataflow_machine_type }
+      - --dataflow_workers_num
+      - { inputValue: dataflow_workers_num }
+      - --dataflow_max_workers_num
+      - { inputValue: dataflow_max_workers_num }
+      - --dataflow_subnetwork
+      - { inputValue: dataflow_subnetwork }
+      - --dataflow_use_public_ips
+      - { inputValue: dataflow_use_public_ips }
+      - --kms_key_name
+      - { inputValue: encryption_spec_key_name }
+      - --output_metrics_gcs_path
+      - { outputUri: evaluation_metrics }
+      - --gcp_resources
+      - { outputPath: gcp_resources }
+      - --executor_input
+      - "{{$}}"

google_cloud_pipeline_components/_implementation/starry_net/get_training_artifacts/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

google_cloud_pipeline_components/_implementation/starry_net/get_training_artifacts/component.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""StarryNet get training artifacts component."""
+from typing import NamedTuple
+from kfp import dsl
+@dsl.component(packages_to_install=['tensorflow==2.11.0'])
+def get_training_artifacts(
+    docker_region: str,
+    trainer_dir: dsl.InputPath(),
+) -> NamedTuple(
+    'TrainingArtifacts',
+    image_uri=str,
+    artifact_uri=str,
+    prediction_schema_uri=str,
+    instance_schema_uri=str,
+):
+  # fmt: off
+  """Gets the artifact URIs from the training job.
+  Args:
+    docker_region: The region from which the training docker image is pulled.
+    trainer_dir: The directory where training artifacts where stored.
+  Returns:
+    A NamedTuple containing the image_uri for the prediction server,
+    the artifact_uri with model artifacts, the prediction_schema_uri,
+    and the instance_schema_uri.
+  """
+  import os  # pylint: disable=g-import-not-at-top
+  import tensorflow as tf  # pylint: disable=g-import-not-at-top
+  with tf.io.gfile.GFile(os.path.join(trainer_dir, 'trainer.txt')) as f:
+    private_dir = f.read().strip()
+  outputs = NamedTuple(
+      'TrainingArtifacts',
+      image_uri=str,
+      artifact_uri=str,
+      prediction_schema_uri=bool,
+      instance_schema_uri=str,
+  )
+  return outputs(
+      f'{docker_region}-docker.pkg.dev/vertex-ai/starryn/predictor:20240617_2142_RC00',  # pylint: disable=too-many-function-args
+      private_dir,  # pylint: disable=too-many-function-args
+      os.path.join(private_dir, 'predict_schema.yaml'),  # pylint: disable=too-many-function-args
+      os.path.join(private_dir, 'instance_schema.yaml'),  # pylint: disable=too-many-function-args
+  )

google_cloud_pipeline_components/_implementation/starry_net/maybe_set_tfrecord_args/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

google_cloud_pipeline_components/_implementation/starry_net/maybe_set_tfrecord_args/component.py ADDED Viewed

@@ -0,0 +1,77 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Starry Net component to set TFRecord args if training with TF Records."""
+from typing import List, NamedTuple
+from kfp import dsl
+@dsl.component
+def maybe_set_tfrecord_args(
+    dataprep_previous_run_dir: str,
+    static_covariates: List[str],
+) -> NamedTuple(
+    'TfrecordArgs',
+    static_covariates_vocab_path=str,
+    train_tf_record_patterns=str,
+    val_tf_record_patterns=str,
+    test_tf_record_patterns=str,
+):
+  # fmt: off
+  """Creates Trainer TFRecord args if training with TF Records.
+  Args:
+    dataprep_previous_run_dir: The dataprep dir from a previous run. Use this
+      to save time if you've already created TFRecords from your BigQuery
+      dataset with the same dataprep parameters as this run.
+    static_covariates: The static covariates to train the model with.
+  Returns:
+    A NamedTuple containing the path to the static covariates covabulary, and
+    the tf record patterns for the train, validation, and test sets.
+  """
+  outputs = NamedTuple(
+      'TfrecordArgs',
+      static_covariates_vocab_path=str,
+      train_tf_record_patterns=str,
+      val_tf_record_patterns=str,
+      test_tf_record_patterns=str,
+  )
+  if static_covariates and dataprep_previous_run_dir:
+    static_covariates_vocab_path = (
+        f'{dataprep_previous_run_dir}/static_covariate_vocab.json'
+    )
+  else:
+    static_covariates_vocab_path = ''
+  if dataprep_previous_run_dir:
+    train_tf_record_patterns = (
+        f"('{dataprep_previous_run_dir}/tf_records/train*',)"
+    )
+    val_tf_record_patterns = f"('{dataprep_previous_run_dir}/tf_records/val*',)"
+    test_tf_record_patterns = (
+        f"('{dataprep_previous_run_dir}/tf_records/test_path_for_plot*',)"
+    )
+  else:
+    train_tf_record_patterns = '()'
+    val_tf_record_patterns = '()'
+    test_tf_record_patterns = '()'
+  return outputs(
+      static_covariates_vocab_path,  # pylint: disable=too-many-function-args
+      train_tf_record_patterns,  # pylint: disable=too-many-function-args
+      val_tf_record_patterns,  # pylint: disable=too-many-function-args
+      test_tf_record_patterns,  # pylint: disable=too-many-function-args
+  )

google_cloud_pipeline_components/_implementation/starry_net/set_dataprep_args/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

google_cloud_pipeline_components/_implementation/starry_net/set_dataprep_args/component.py ADDED Viewed

@@ -0,0 +1,97 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""StarryNet Set Dataprep Args Component."""
+from typing import List, NamedTuple
+from kfp import dsl
+@dsl.component
+def set_dataprep_args(
+    model_blocks: List[str],
+    ts_identifier_columns: List[str],
+    static_covariate_columns: List[str],
+    csv_data_path: str,
+    previous_run_dir: str,
+    location: str,
+) -> NamedTuple(
+    'DataprepArgs',
+    model_blocks=str,
+    ts_identifier_columns=str,
+    static_covariate_columns=str,
+    create_tf_records=bool,
+    docker_region=str,
+):
+  # fmt: off
+  """Creates Dataprep args.
+  Args:
+    model_blocks: The list of model blocks to use in the order they will appear
+      in the model. Possible values are `cleaning`, `change_point`, `trend`,
+      `hour_of_week`, `day_of_week`, `day_of_year`, `week_of_year`,
+      `month_of_year`, `residual`.
+    ts_identifier_columns: The list of ts_identifier columns from the BigQuery
+      data source.
+    static_covariate_columns: The list of strings of static covariate names.
+    csv_data_path: The path to the training data csv in the format
+      gs://bucket_name/sub_dir/blob_name.csv.
+    previous_run_dir: The dataprep dir from a previous run. Use this
+      to save time if you've already created TFRecords from your BigQuery
+      dataset with the same dataprep parameters as this run.
+    location: The location where the pipeline is run.
+  Returns:
+    A NamedTuple containing the model blocks formatted as expected by the
+    dataprep job, the ts_identifier_columns formatted as expected by the
+    dataprep job, the static_covariate_columns formatted as expected by the
+    dataprep job, a boolean indicating whether to create tf records, and the
+    region of the dataprep docker image.
+  """
+  outputs = NamedTuple(
+      'DataprepArgs',
+      model_blocks=str,
+      ts_identifier_columns=str,
+      static_covariate_columns=str,
+      create_tf_records=bool,
+      docker_region=str,
+  )
+  def maybe_update_model_blocks(model_blocks: List[str]) -> List[str]:
+    return [f'{b}-hybrid' if '_of_' in b else b for b in model_blocks]
+  def create_name_tuple_from_list(input_list: List[str]) -> str:
+    if len(input_list) == 1:
+      return str(input_list).replace('[', '(').replace(']', ',)')
+    return str(input_list).replace('[', '(').replace(']', ')')
+  def set_docker_region(location: str) -> str:
+    if location.startswith('africa') or location.startswith('europe'):
+      return 'europe'
+    elif (
+        location.startswith('asia')
+        or location.startswith('australia')
+        or location.startswith('me')
+    ):
+      return 'asia'
+    else:
+      return 'us'
+  return outputs(
+      create_name_tuple_from_list(maybe_update_model_blocks(model_blocks)),  # pylint: disable=too-many-function-args
+      ','.join(ts_identifier_columns),  # pylint: disable=too-many-function-args
+      create_name_tuple_from_list(static_covariate_columns),  # pylint: disable=too-many-function-args
+      False if csv_data_path or previous_run_dir else True,  # pylint: disable=too-many-function-args
+      set_docker_region(location),  # pylint: disable=too-many-function-args
+  )

google-cloud-pipeline-components 2.14.0__py3-none-any.whl → 2.15.0__py3-none-any.whl

Potentially problematic release.

google-cloud-pipeline-components 2.14.0py3-none-any.whl → 2.15.0py3-none-any.whl