PyPI - salesforce-data-customcode - Versions diffs - 0.1.15__py3-none-any.whl - Mend

salesforce-data-customcode 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

datacustomcode/__init__.py +20 -0
datacustomcode/cli.py +215 -0
datacustomcode/client.py +237 -0
datacustomcode/cmd.py +105 -0
datacustomcode/config.py +172 -0
datacustomcode/config.yaml +19 -0
datacustomcode/credentials.py +97 -0
datacustomcode/deploy.py +468 -0
datacustomcode/file/__init__.py +14 -0
datacustomcode/file/base.py +19 -0
datacustomcode/file/path/__init__.py +14 -0
datacustomcode/file/path/default.py +171 -0
datacustomcode/io/__init__.py +14 -0
datacustomcode/io/base.py +28 -0
datacustomcode/io/reader/__init__.py +14 -0
datacustomcode/io/reader/base.py +34 -0
datacustomcode/io/reader/query_api.py +172 -0
datacustomcode/io/writer/__init__.py +14 -0
datacustomcode/io/writer/base.py +49 -0
datacustomcode/io/writer/csv.py +41 -0
datacustomcode/io/writer/print.py +98 -0
datacustomcode/mixin.py +94 -0
datacustomcode/py.typed +0 -0
datacustomcode/run.py +111 -0
datacustomcode/scan.py +286 -0
datacustomcode/spark/__init__.py +20 -0
datacustomcode/spark/base.py +29 -0
datacustomcode/spark/default.py +39 -0
datacustomcode/template.py +36 -0
datacustomcode/templates/.devcontainer/devcontainer.json +10 -0
datacustomcode/templates/Dockerfile +18 -0
datacustomcode/templates/Dockerfile.dependencies +11 -0
datacustomcode/templates/README.md +0 -0
datacustomcode/templates/account.ipynb +86 -0
datacustomcode/templates/build_native_dependencies.sh +9 -0
datacustomcode/templates/examples/employee_hierarchy/employee_data.csv +13 -0
datacustomcode/templates/examples/employee_hierarchy/entrypoint.py +78 -0
datacustomcode/templates/jupyterlab.sh +97 -0
datacustomcode/templates/payload/config.json +1 -0
datacustomcode/templates/payload/entrypoint.py +25 -0
datacustomcode/templates/requirements-dev.txt +10 -0
datacustomcode/templates/requirements.txt +1 -0
datacustomcode/version.py +27 -0
salesforce_data_customcode-0.1.15.dist-info/METADATA +340 -0
salesforce_data_customcode-0.1.15.dist-info/RECORD +48 -0
salesforce_data_customcode-0.1.15.dist-info/WHEEL +4 -0
salesforce_data_customcode-0.1.15.dist-info/entry_points.txt +5 -0
salesforce_data_customcode-0.1.15.dist-info/licenses/LICENSE.txt +206 -0

datacustomcode/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+# Copyright (c) 2025, Salesforce, Inc.
+# SPDX-License-Identifier: Apache-2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from datacustomcode.client import Client
+from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
+from datacustomcode.io.writer.print import PrintDataCloudWriter
+__all__ = ["Client", "QueryAPIDataCloudReader", "PrintDataCloudWriter"]

datacustomcode/cli.py ADDED Viewed

@@ -0,0 +1,215 @@
+# Copyright (c) 2025, Salesforce, Inc.
+# SPDX-License-Identifier: Apache-2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from importlib import metadata
+import json
+import os
+import sys
+from typing import List, Union
+import click
+from loguru import logger
+@click.group()
+@click.option("--debug", is_flag=True)
+def cli(debug: bool):
+    logger.remove()
+    if debug:
+        logger.configure(handlers=[{"sink": sys.stderr, "level": "DEBUG"}])
+    else:
+        logger.configure(handlers=[{"sink": sys.stderr, "level": "INFO"}])
+@cli.command()
+def version():
+    """Display the current version of the package."""
+    print(__name__)
+    try:
+        version = metadata.version("salesforce-data-customcode")
+        click.echo(f"salesforce-data-customcode version: {version}")
+    except metadata.PackageNotFoundError:
+        click.echo("Version information not available")
+@cli.command()
+@click.option("--profile", default="default")
+@click.option("--username", prompt=True)
+@click.option("--password", prompt=True, hide_input=True)
+@click.option("--client-id", prompt=True)
+@click.option("--client-secret", prompt=True)
+@click.option("--login-url", prompt=True)
+def configure(
+    username: str,
+    password: str,
+    client_id: str,
+    client_secret: str,
+    login_url: str,
+    profile: str,
+) -> None:
+    from datacustomcode.credentials import Credentials
+    Credentials(
+        username=username,
+        password=password,
+        client_id=client_id,
+        client_secret=client_secret,
+        login_url=login_url,
+    ).update_ini(profile=profile)
+@cli.command()
+@click.argument("path", default="payload")
+@click.option("--network", default="default")
+def zip(path: str, network: str):
+    from datacustomcode.deploy import zip
+    logger.debug("Zipping project")
+    zip(path, network)
+@cli.command()
+@click.option("--path", default="payload")
+@click.option("--name", required=True)
+@click.option("--version", default="0.0.1")
+@click.option("--description", default="Custom Data Transform Code")
+@click.option("--profile", default="default")
+@click.option("--network", default="default")
+@click.option(
+    "--cpu-size",
+    default="CPU_2XL",
+    help="""CPU size for deployment. Available options:
+    \b
+    CPU_L     - Large CPU instance
+    CPU_XL    - X-Large CPU instance
+    CPU_2XL   - 2X-Large CPU instance [DEFAULT]
+    CPU_4XL   - 4X-Large CPU instance
+    Choose based on your workload requirements.""",
+)
+def deploy(
+    path: str,
+    name: str,
+    version: str,
+    description: str,
+    cpu_size: str,
+    profile: str,
+    network: str,
+):
+    from datacustomcode.credentials import Credentials
+    from datacustomcode.deploy import TransformationJobMetadata, deploy_full
+    logger.debug("Deploying project")
+    # Validate compute type
+    from datacustomcode.deploy import COMPUTE_TYPES
+    if cpu_size not in COMPUTE_TYPES.keys():
+        click.secho(
+            f"Error: Invalid CPU size '{cpu_size}'. "
+            f"Available options: {', '.join(COMPUTE_TYPES.keys())}",
+            fg="red",
+        )
+        raise click.Abort()
+    logger.debug(f"Deploying with CPU size: {cpu_size}")
+    metadata = TransformationJobMetadata(
+        name=name,
+        version=version,
+        description=description,
+        computeType=COMPUTE_TYPES[cpu_size],
+    )
+    try:
+        credentials = Credentials.from_available(profile=profile)
+    except ValueError as e:
+        click.secho(
+            f"Error: {e}",
+            fg="red",
+        )
+        raise click.Abort() from None
+    deploy_full(path, metadata, credentials, network)
+@cli.command()
+@click.argument("directory", default=".")
+def init(directory: str):
+    from datacustomcode.scan import dc_config_json_from_file
+    from datacustomcode.template import copy_template
+    click.echo("Copying template to " + click.style(directory, fg="blue", bold=True))
+    copy_template(directory)
+    entrypoint_path = os.path.join(directory, "payload", "entrypoint.py")
+    config_location = os.path.join(os.path.dirname(entrypoint_path), "config.json")
+    config_json = dc_config_json_from_file(entrypoint_path)
+    with open(config_location, "w") as f:
+        json.dump(config_json, f, indent=2)
+    click.echo(
+        "Start developing by updating the code in "
+        + click.style(entrypoint_path, fg="blue", bold=True)
+    )
+    click.echo(
+        "You can run "
+        + click.style(f"datacustomcode scan {entrypoint_path}", fg="blue", bold=True)
+        + " to automatically update config.json when you make changes to your code"
+    )
+@cli.command()
+@click.argument("filename")
+@click.option("--config")
+@click.option("--dry-run", is_flag=True)
+@click.option(
+    "--no-requirements", is_flag=True, help="Skip generating requirements.txt file"
+)
+def scan(filename: str, config: str, dry_run: bool, no_requirements: bool):
+    from datacustomcode.scan import dc_config_json_from_file, write_requirements_file
+    config_location = config or os.path.join(os.path.dirname(filename), "config.json")
+    click.echo(
+        "Dumping scan results to config file: "
+        + click.style(config_location, fg="blue", bold=True)
+    )
+    click.echo("Scanning " + click.style(filename, fg="blue", bold=True) + "...")
+    config_json = dc_config_json_from_file(filename)
+    click.secho(json.dumps(config_json, indent=2), fg="yellow")
+    if not dry_run:
+        with open(config_location, "w") as f:
+            json.dump(config_json, f, indent=2)
+        if not no_requirements:
+            requirements_path = write_requirements_file(filename)
+            click.echo(
+                "Generated requirements file: "
+                + click.style(requirements_path, fg="blue", bold=True)
+            )
+@cli.command()
+@click.argument("entrypoint")
+@click.option("--config-file", default=None)
+@click.option("--dependencies", default=[], multiple=True)
+@click.option("--profile", default="default")
+def run(
+    entrypoint: str,
+    config_file: Union[str, None],
+    dependencies: List[str],
+    profile: str,
+):
+    from datacustomcode.run import run_entrypoint
+    run_entrypoint(entrypoint, config_file, dependencies, profile)

datacustomcode/client.py ADDED Viewed

@@ -0,0 +1,237 @@
+# Copyright (c) 2025, Salesforce, Inc.
+# SPDX-License-Identifier: Apache-2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from enum import Enum
+from typing import (
+    TYPE_CHECKING,
+    ClassVar,
+    Optional,
+)
+from datacustomcode.config import config
+from datacustomcode.file.path.default import DefaultFindFilePath
+from datacustomcode.io.reader.base import BaseDataCloudReader
+from datacustomcode.spark.default import DefaultSparkSessionProvider
+if TYPE_CHECKING:
+    from pathlib import Path
+    from pyspark.sql import DataFrame as PySparkDataFrame
+    from datacustomcode.io.reader.base import BaseDataCloudReader
+    from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode
+    from datacustomcode.spark.base import BaseSparkSessionProvider
+class DataCloudObjectType(Enum):
+    DLO = "dlo"
+    DMO = "dmo"
+class DataCloudAccessLayerException(Exception):
+    """Exception raised when mixing DMOs and DLOs is detected."""
+    def __init__(
+        self,
+        data_layer_history: dict[DataCloudObjectType, set[str]],
+        should_not_contain: DataCloudObjectType,
+    ) -> None:
+        self.data_layer_history = data_layer_history
+        self.should_not_contain = should_not_contain
+    def __str__(self) -> str:
+        msg = (
+            "Mixed use of DMOs and DLOs. "
+            "You can only read from DMOs to write to DMOs "
+            "and read from DLOs to write to DLOs. "
+        )
+        if self.should_not_contain is DataCloudObjectType.DLO:
+            msg += (
+                "You have read from the following DLOs: "
+                f"{self.data_layer_history[DataCloudObjectType.DLO]} "
+                f"and are attempting to write to DMO. "
+            )
+        else:
+            msg += (
+                "You have read from the following DMOs: "
+                f"{self.data_layer_history[DataCloudObjectType.DMO]} "
+                f"and are attempting to write to to a DLO. "
+            )
+        msg += "Restart to clear history."
+        return msg
+class Client:
+    """Entrypoint for accessing DataCloud objects.
+    This is the object used to access Data Cloud DLOs and DMOs. Accessing DLOs/DMOs
+    are tracked and will throw an exception if they are mixed. In other words, you
+    can read from DLOs and write to DLOs, read from DMOs and write to DMOs, but you
+    cannot read from DLOs and write to DMOs or read from DMOs and write to DLOs.
+    Furthermore you cannot mix during merging tables. This class is a singleton to
+    prevent accidental mixing of DLOs and DMOs.
+    You can provide custom readers and writers to the client for advanced use
+    cases, but this is not recommended for testing as they may result in unexpected
+    behavior once deployed to Data Cloud. By default, the client intercepts all
+    read/write operations and mocks access to Data Cloud. For example, during
+    writing, we print to the console instead of writing to Data Cloud.
+    Args:
+        finder: Find a file path
+        reader: A custom reader to use for reading Data Cloud objects.
+        writer: A custom writer to use for writing Data Cloud objects.
+    Example:
+    >>> client = Client()
+    >>> file_path = client.find_file_path("data.csv")
+    >>> dlo = client.read_dlo("my_dlo")
+    >>> client.write_to_dmo("my_dmo", dlo)
+    """
+    _instance: ClassVar[Optional[Client]] = None
+    _reader: BaseDataCloudReader
+    _writer: BaseDataCloudWriter
+    _file: DefaultFindFilePath
+    _data_layer_history: dict[DataCloudObjectType, set[str]]
+    def __new__(
+        cls,
+        reader: Optional[BaseDataCloudReader] = None,
+        writer: Optional["BaseDataCloudWriter"] = None,
+        spark_provider: Optional["BaseSparkSessionProvider"] = None,
+    ) -> Client:
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            # Initialize Readers and Writers from config
+            # and/or provided reader and writer
+            if reader is None or writer is None:
+                # We need a spark because we will initialize readers and writers
+                if config.spark_config is None:
+                    raise ValueError(
+                        "Spark config is required when reader/writer is not provided"
+                    )
+                provider: BaseSparkSessionProvider
+                if spark_provider is not None:
+                    provider = spark_provider
+                elif config.spark_provider_config is not None:
+                    provider = config.spark_provider_config.to_object()
+                else:
+                    provider = DefaultSparkSessionProvider()
+                spark = provider.get_session(config.spark_config)
+            if config.reader_config is None and reader is None:
+                raise ValueError(
+                    "Reader config is required when reader is not provided"
+                )
+            elif reader is None or (
+                config.reader_config is not None and config.reader_config.force
+            ):
+                reader_init = config.reader_config.to_object(spark)  # type: ignore
+            else:
+                reader_init = reader
+            if config.writer_config is None and writer is None:
+                raise ValueError(
+                    "Writer config is required when writer is not provided"
+                )
+            elif writer is None or (
+                config.writer_config is not None and config.writer_config.force
+            ):
+                writer_init = config.writer_config.to_object(spark)  # type: ignore
+            else:
+                writer_init = writer
+            cls._instance._reader = reader_init
+            cls._instance._writer = writer_init
+            cls._instance._file = DefaultFindFilePath()
+            cls._instance._data_layer_history = {
+                DataCloudObjectType.DLO: set(),
+                DataCloudObjectType.DMO: set(),
+            }
+        elif (reader is not None or writer is not None) and cls._instance is not None:
+            raise ValueError("Cannot set reader or writer after client is initialized")
+        return cls._instance
+    def read_dlo(self, name: str) -> PySparkDataFrame:
+        """Read a DLO from Data Cloud.
+        Args:
+            name: The name of the DLO to read.
+        Returns:
+            A PySpark DataFrame containing the DLO data.
+        """
+        self._record_dlo_access(name)
+        return self._reader.read_dlo(name)
+    def read_dmo(self, name: str) -> PySparkDataFrame:
+        """Read a DMO from Data Cloud.
+        Args:
+            name: The name of the DMO to read.
+        Returns:
+            A PySpark DataFrame containing the DMO data.
+        """
+        self._record_dmo_access(name)
+        return self._reader.read_dmo(name)
+    def write_to_dlo(
+        self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode, **kwargs
+    ) -> None:
+        """Write a PySpark DataFrame to a DLO in Data Cloud.
+        Args:
+            name: The name of the DLO to write to.
+            dataframe: The PySpark DataFrame to write.
+            write_mode: The write mode to use for writing to the DLO.
+        """
+        self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DMO)
+        return self._writer.write_to_dlo(name, dataframe, write_mode, **kwargs)
+    def write_to_dmo(
+        self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode, **kwargs
+    ) -> None:
+        """Write a PySpark DataFrame to a DMO in Data Cloud.
+        Args:
+            name: The name of the DMO to write to.
+            dataframe: The PySpark DataFrame to write.
+            write_mode: The write mode to use for writing to the DMO.
+        """
+        self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DLO)
+        return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs)
+    def find_file_path(self, file_name: str) -> Path:
+        """Return a file path"""
+        return self._file.find_file_path(file_name)
+    def _validate_data_layer_history_does_not_contain(
+        self, data_cloud_object_type: DataCloudObjectType
+    ) -> None:
+        if len(self._data_layer_history[data_cloud_object_type]) > 0:
+            raise DataCloudAccessLayerException(
+                self._data_layer_history, data_cloud_object_type
+            )
+    def _record_dlo_access(self, name: str) -> None:
+        self._data_layer_history[DataCloudObjectType.DLO].add(name)
+    def _record_dmo_access(self, name: str) -> None:
+        self._data_layer_history[DataCloudObjectType.DMO].add(name)

datacustomcode/cmd.py ADDED Viewed

@@ -0,0 +1,105 @@
+# Copyright (c) 2025, Salesforce, Inc.
+# SPDX-License-Identifier: Apache-2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This module is shamelessly copied from conda to nicely wrap subprocess calls.
+"""
+from __future__ import annotations
+import contextlib
+import subprocess
+from typing import Any, Union
+def _force_bytes(exc: Any) -> bytes:
+    with contextlib.suppress(TypeError):
+        return bytes(exc)
+    with contextlib.suppress(Exception):
+        return str(exc).encode()
+    return f"<unprintable {type(exc).__name__} object>".encode()
+def _setdefault_kwargs(kwargs: dict[str, Any]) -> None:
+    for arg in ("stdin", "stdout", "stderr"):
+        kwargs.setdefault(arg, subprocess.PIPE)
+def _oserror_to_output(e: OSError) -> tuple[int, bytes, None]:
+    return 1, _force_bytes(e).rstrip(b"\n") + b"\n", None
+class CalledProcessError(RuntimeError):
+    """Nicely formatted subprocess call error."""
+    def __init__(
+        self,
+        returncode: int,
+        cmd: tuple[str, ...],
+        stdout: bytes,
+        stderr: Union[bytes, None],
+    ) -> None:
+        super().__init__(returncode, cmd, stdout, stderr)
+        self.returncode = returncode
+        self.cmd = cmd
+        self.stdout = stdout
+        self.stderr = stderr
+    def __bytes__(self) -> bytes:
+        def _indent_or_none(part: Union[bytes, None]) -> bytes:
+            if part:
+                return b"\n    " + part.replace(b"\n", b"\n    ").rstrip()
+            else:
+                return b" (none)"
+        return b"".join(
+            (
+                f"command: {self.cmd!r}\n".encode(),
+                f"return code: {self.returncode}\n".encode(),
+                b"stdout:",
+                self.stdout,
+                b"\n",
+                b"stderr:",
+                _indent_or_none(self.stderr),
+            )
+        )
+    def __str__(self) -> str:
+        return self.__bytes__().decode()
+def _cmd_output(
+    *cmd: str,
+    check: bool = True,
+    **kwargs: Any,
+) -> tuple[int, bytes, Union[bytes, None]]:
+    _setdefault_kwargs(kwargs)
+    try:
+        kwargs.setdefault("shell", True)
+        proc = subprocess.Popen(cmd, **kwargs)
+    except OSError as e:
+        returncode, stdout_b, stderr_b = _oserror_to_output(e)
+    else:
+        stdout_b, stderr_b = proc.communicate()
+        returncode = proc.returncode
+    if check and returncode:
+        raise CalledProcessError(returncode, cmd, stdout_b, stderr_b)
+    return returncode, stdout_b, stderr_b
+def cmd_output(*cmd: str, **kwargs: Any) -> Union[str, None]:
+    returncode, stdout_b, stderr_b = _cmd_output(*cmd, **kwargs)
+    stdout = stdout_b.decode() if stdout_b is not None else None
+    return stdout