PyPI - google-cloud-pipeline-components - Versions diffs - 2.14.1__py3-none-any.whl → 2.16.0__py3-none-any.whl - Mend

google-cloud-pipeline-components 2.14.1py3-none-any.whl → 2.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of google-cloud-pipeline-components might be problematic. Click here for more details.

Files changed (88) hide show

google_cloud_pipeline_components/_implementation/starry_net/maybe_set_tfrecord_args/component.py ADDED Viewed

@@ -0,0 +1,77 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Starry Net component to set TFRecord args if training with TF Records."""
+from typing import List, NamedTuple
+from kfp import dsl
+@dsl.component
+def maybe_set_tfrecord_args(
+    dataprep_previous_run_dir: str,
+    static_covariates: List[str],
+) -> NamedTuple(
+    'TfrecordArgs',
+    static_covariates_vocab_path=str,
+    train_tf_record_patterns=str,
+    val_tf_record_patterns=str,
+    test_tf_record_patterns=str,
+):
+  # fmt: off
+  """Creates Trainer TFRecord args if training with TF Records.
+  Args:
+    dataprep_previous_run_dir: The dataprep dir from a previous run. Use this
+      to save time if you've already created TFRecords from your BigQuery
+      dataset with the same dataprep parameters as this run.
+    static_covariates: The static covariates to train the model with.
+  Returns:
+    A NamedTuple containing the path to the static covariates covabulary, and
+    the tf record patterns for the train, validation, and test sets.
+  """
+  outputs = NamedTuple(
+      'TfrecordArgs',
+      static_covariates_vocab_path=str,
+      train_tf_record_patterns=str,
+      val_tf_record_patterns=str,
+      test_tf_record_patterns=str,
+  )
+  if static_covariates and dataprep_previous_run_dir:
+    static_covariates_vocab_path = (
+        f'{dataprep_previous_run_dir}/static_covariate_vocab.json'
+    )
+  else:
+    static_covariates_vocab_path = ''
+  if dataprep_previous_run_dir:
+    train_tf_record_patterns = (
+        f"('{dataprep_previous_run_dir}/tf_records/train*',)"
+    )
+    val_tf_record_patterns = f"('{dataprep_previous_run_dir}/tf_records/val*',)"
+    test_tf_record_patterns = (
+        f"('{dataprep_previous_run_dir}/tf_records/test_path_for_plot*',)"
+    )
+  else:
+    train_tf_record_patterns = '()'
+    val_tf_record_patterns = '()'
+    test_tf_record_patterns = '()'
+  return outputs(
+      static_covariates_vocab_path,  # pylint: disable=too-many-function-args
+      train_tf_record_patterns,  # pylint: disable=too-many-function-args
+      val_tf_record_patterns,  # pylint: disable=too-many-function-args
+      test_tf_record_patterns,  # pylint: disable=too-many-function-args
+  )

google_cloud_pipeline_components/_implementation/starry_net/set_dataprep_args/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

google_cloud_pipeline_components/_implementation/starry_net/set_dataprep_args/component.py ADDED Viewed

@@ -0,0 +1,97 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""StarryNet Set Dataprep Args Component."""
+from typing import List, NamedTuple
+from kfp import dsl
+@dsl.component
+def set_dataprep_args(
+    model_blocks: List[str],
+    ts_identifier_columns: List[str],
+    static_covariate_columns: List[str],
+    csv_data_path: str,
+    previous_run_dir: str,
+    location: str,
+) -> NamedTuple(
+    'DataprepArgs',
+    model_blocks=str,
+    ts_identifier_columns=str,
+    static_covariate_columns=str,
+    create_tf_records=bool,
+    docker_region=str,
+):
+  # fmt: off
+  """Creates Dataprep args.
+  Args:
+    model_blocks: The list of model blocks to use in the order they will appear
+      in the model. Possible values are `cleaning`, `change_point`, `trend`,
+      `hour_of_week`, `day_of_week`, `day_of_year`, `week_of_year`,
+      `month_of_year`, `residual`.
+    ts_identifier_columns: The list of ts_identifier columns from the BigQuery
+      data source.
+    static_covariate_columns: The list of strings of static covariate names.
+    csv_data_path: The path to the training data csv in the format
+      gs://bucket_name/sub_dir/blob_name.csv.
+    previous_run_dir: The dataprep dir from a previous run. Use this
+      to save time if you've already created TFRecords from your BigQuery
+      dataset with the same dataprep parameters as this run.
+    location: The location where the pipeline is run.
+  Returns:
+    A NamedTuple containing the model blocks formatted as expected by the
+    dataprep job, the ts_identifier_columns formatted as expected by the
+    dataprep job, the static_covariate_columns formatted as expected by the
+    dataprep job, a boolean indicating whether to create tf records, and the
+    region of the dataprep docker image.
+  """
+  outputs = NamedTuple(
+      'DataprepArgs',
+      model_blocks=str,
+      ts_identifier_columns=str,
+      static_covariate_columns=str,
+      create_tf_records=bool,
+      docker_region=str,
+  )
+  def maybe_update_model_blocks(model_blocks: List[str]) -> List[str]:
+    return [f'{b}-hybrid' if '_of_' in b else b for b in model_blocks]
+  def create_name_tuple_from_list(input_list: List[str]) -> str:
+    if len(input_list) == 1:
+      return str(input_list).replace('[', '(').replace(']', ',)')
+    return str(input_list).replace('[', '(').replace(']', ')')
+  def set_docker_region(location: str) -> str:
+    if location.startswith('africa') or location.startswith('europe'):
+      return 'europe'
+    elif (
+        location.startswith('asia')
+        or location.startswith('australia')
+        or location.startswith('me')
+    ):
+      return 'asia'
+    else:
+      return 'us'
+  return outputs(
+      create_name_tuple_from_list(maybe_update_model_blocks(model_blocks)),  # pylint: disable=too-many-function-args
+      ','.join(ts_identifier_columns),  # pylint: disable=too-many-function-args
+      create_name_tuple_from_list(static_covariate_columns),  # pylint: disable=too-many-function-args
+      False if csv_data_path or previous_run_dir else True,  # pylint: disable=too-many-function-args
+      set_docker_region(location),  # pylint: disable=too-many-function-args
+  )

google_cloud_pipeline_components/_implementation/starry_net/set_eval_args/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

google_cloud_pipeline_components/_implementation/starry_net/set_eval_args/component.py ADDED Viewed

@@ -0,0 +1,76 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Starry Net Set Eval Args Component."""
+from typing import List, NamedTuple
+from kfp import dsl
+@dsl.component
+def set_eval_args(
+    big_query_source: dsl.Input[dsl.Artifact], quantiles: List[float]
+) -> NamedTuple(
+    'EvalArgs',
+    big_query_source=str,
+    forecasting_type=str,
+    quantiles=List[float],
+    prediction_score_column=str,
+):
+  # fmt: off
+  """Creates Evaluation args.
+  Args:
+    big_query_source: The BQ Table containing the test set.
+    quantiles: The quantiles the model was trained to output.
+  Returns:
+    A NamedTuple containing big_query_source as a string, forecasting_type
+    used for evaluation step, quantiles in the format expected by the evaluation
+    job, and the prediction_score_column used to evaluate.
+  """
+  outputs = NamedTuple(
+      'EvalArgs',
+      big_query_source=str,
+      forecasting_type=str,
+      quantiles=List[float],
+      prediction_score_column=str)
+  def set_forecasting_type_for_eval(quantiles: List[float]) -> str:
+    if quantiles and quantiles[-1] != 0.5:
+      return 'quantile'
+    return 'point'
+  def set_quantiles_for_eval(quantiles: List[float]) -> List[float]:
+    updated_q = [q for q in quantiles if q != 0.5]
+    if updated_q:
+      updated_q = [0.5] + updated_q
+    return updated_q
+  def set_prediction_score_column(
+      quantiles: List[float]) -> str:
+    updated_q = [q for q in quantiles if q != 0.5]
+    if updated_q:
+      return 'predicted_x.quantile_predictions'
+    return 'predicted_x.value'
+  project_id = big_query_source.metadata['projectId']
+  dataset_id = big_query_source.metadata['datasetId']
+  table_id = big_query_source.metadata['tableId']
+  return outputs(
+      f'bq://{project_id}.{dataset_id}.{table_id}',  # pylint: disable=too-many-function-args
+      set_forecasting_type_for_eval(quantiles),  # pylint: disable=too-many-function-args
+      set_quantiles_for_eval(quantiles),  # pylint: disable=too-many-function-args
+      set_prediction_score_column(quantiles),  # pylint: disable=too-many-function-args
+  )

google_cloud_pipeline_components/_implementation/starry_net/set_test_set/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

google_cloud_pipeline_components/_implementation/starry_net/set_test_set/component.py ADDED Viewed

@@ -0,0 +1,48 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Starry Net Set Test Set Component."""
+from typing import NamedTuple
+from kfp import dsl
+@dsl.component(packages_to_install=['tensorflow==2.11.0'])
+def set_test_set(
+    dataprep_dir: dsl.InputPath(),
+) -> NamedTuple('TestSetArtifact', uri=str, artifact=dsl.Artifact):
+  # fmt: off
+  """Creates test set artifact.
+  Args:
+    dataprep_dir: The bucket where dataprep artifacts are stored.
+  Returns:
+    The test set dsl.Artifact.
+  """
+  import os  # pylint: disable=g-import-not-at-top
+  import json  # pylint: disable=g-import-not-at-top
+  import tensorflow as tf  # pylint: disable=g-import-not-at-top
+  with tf.io.gfile.GFile(
+      os.path.join(dataprep_dir, 'big_query_test_set.json')
+  ) as f:
+    metadata = json.load(f)
+  project = metadata['projectId']
+  dataset = metadata['datasetId']
+  table = metadata['tableId']
+  output = NamedTuple('TestSetArtifact', uri=str, artifact=dsl.Artifact)
+  uri = f'bq://{project}.{dataset}.{table}'
+  artifact = dsl.Artifact(uri=uri, metadata=metadata)
+  return output(uri, artifact)  # pylint: disable=too-many-function-args

google_cloud_pipeline_components/_implementation/starry_net/set_tfrecord_args/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

google_cloud_pipeline_components/_implementation/starry_net/set_tfrecord_args/component.py ADDED Viewed

@@ -0,0 +1,70 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" "Starry Net component to set TFRecord args."""
+from typing import List, NamedTuple
+from kfp import dsl
+@dsl.component
+def set_tfrecord_args(
+    dataprep_dir: dsl.InputPath(),
+    static_covariates: List[str],
+) -> NamedTuple(
+    'TfrecordArgs',
+    static_covariates_vocab_path=str,
+    train_tf_record_patterns=str,
+    val_tf_record_patterns=str,
+    test_tf_record_patterns=str,
+):
+  # fmt: off
+  """Creates Trainer TFRecord args.
+  Args:
+    dataprep_dir: The dataprep directory where dataprep artifacts are stored.
+    static_covariates: The static covariates to train the model with.
+  Returns:
+    A NamedTuple containing the path to the static covariates covabulary, and
+    the tf record patterns for the train, validation, and test sets.
+  """
+  outputs = NamedTuple(
+      'TfrecordArgs',
+      static_covariates_vocab_path=str,
+      train_tf_record_patterns=str,
+      val_tf_record_patterns=str,
+      test_tf_record_patterns=str,
+  )
+  if static_covariates and dataprep_dir:
+    static_covariates_vocab_path = f'{dataprep_dir}/static_covariate_vocab.json'
+  else:
+    static_covariates_vocab_path = ''
+  if dataprep_dir:
+    train_tf_record_patterns = f"('{dataprep_dir}/tf_records/train*',)"
+    val_tf_record_patterns = f"('{dataprep_dir}/tf_records/val*',)"
+    test_tf_record_patterns = (
+        f"('{dataprep_dir}/tf_records/test_path_for_plot*',)")
+  else:
+    train_tf_record_patterns = '()'
+    val_tf_record_patterns = '()'
+    test_tf_record_patterns = '()'
+  return outputs(
+      static_covariates_vocab_path,  # pylint: disable=too-many-function-args
+      train_tf_record_patterns,  # pylint: disable=too-many-function-args
+      val_tf_record_patterns,  # pylint: disable=too-many-function-args
+      test_tf_record_patterns,  # pylint: disable=too-many-function-args
+  )

google_cloud_pipeline_components/_implementation/starry_net/set_train_args/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

google_cloud_pipeline_components/_implementation/starry_net/set_train_args/component.py ADDED Viewed

@@ -0,0 +1,90 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Starry Net component to set training args."""
+from typing import List, NamedTuple
+from kfp import dsl
+@dsl.component
+def set_train_args(
+    quantiles: List[float],
+    model_blocks: List[str],
+    static_covariates: List[str],
+) -> NamedTuple(
+    'TrainArgs',
+    quantiles=str,
+    use_static_covariates=bool,
+    static_covariate_names=str,
+    model_blocks=str,
+    freeze_point_forecasts=bool,
+):
+  # fmt: off
+  """Creates Trainer model args.
+  Args:
+    quantiles: The list of floats representing quantiles. Leave blank if
+      only training to produce point forecasts.
+    model_blocks: The list of model blocks to use in the order they will appear
+      in the model. Possible values are `cleaning`, `change_point`, `trend`,
+      `hour_of_week`, `day_of_week`, `day_of_year`, `week_of_year`,
+      `month_of_year`, `residual`.
+    static_covariates: The list of strings of static covariate names.
+  Returns:
+    A NamedTuple containing the quantiles formatted as expected by the train
+    job, a bool indicating whether the job should train with static covariates,
+    the model blocks formatted as expected by the train job, and a bool
+    indicating whether or not to do two-pass training, fist training for point
+    forecsats and then quantiles.
+  """
+  outputs = NamedTuple(
+      'TrainArgs',
+      quantiles=str,
+      use_static_covariates=bool,
+      static_covariate_names=str,
+      model_blocks=str,
+      freeze_point_forecasts=bool,
+  )
+  def set_quantiles(input_list: List[float]) -> str:
+    if not input_list or input_list[0] != 0.5:
+      input_list = [0.5] + input_list
+    if len(input_list) == 1:
+      return str(input_list).replace('[', '(').replace(']', ',)')
+    return str(input_list).replace('[', '(').replace(']', ')')
+  def maybe_update_model_blocks(
+      quantiles: List[float], model_blocks: List[str]) -> List[str]:
+    updated_q = [q for q in quantiles if q != 0.5]
+    model_blocks = [b for b in model_blocks if b != 'quantile']
+    if updated_q:
+      model_blocks.append('quantile')
+    return [f'{b}-hybrid' if '_of_' in b else b for b in model_blocks]
+  def create_name_tuple_from_list(input_list: List[str]) -> str:
+    if len(input_list) == 1:
+      return str(input_list).replace('[', '(').replace(']', ',)')
+    return str(input_list).replace('[', '(').replace(']', ')')
+  return outputs(
+      set_quantiles(quantiles),  # pylint: disable=too-many-function-args
+      True if static_covariates else False,  # pylint: disable=too-many-function-args
+      create_name_tuple_from_list(static_covariates),  # pylint: disable=too-many-function-args
+      create_name_tuple_from_list(  # pylint: disable=too-many-function-args
+          maybe_update_model_blocks(quantiles, model_blocks)),
+      True if quantiles and quantiles[-1] != 0.5 else False,  # pylint: disable=too-many-function-args
+  )

google_cloud_pipeline_components/_implementation/starry_net/train/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

google-cloud-pipeline-components 2.14.1__py3-none-any.whl → 2.16.0__py3-none-any.whl

Potentially problematic release.

google-cloud-pipeline-components 2.14.1py3-none-any.whl → 2.16.0py3-none-any.whl