PyPI - salesforce-data-customcode - Versions diffs - 4.0.0__tar.gz → 4.0.2__tar.gz - Mend

salesforce-data-customcode 4.0.0tar.gz → 4.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: salesforce-data-customcode
-Version: 4.0.0
+Version: 4.0.2
 Summary: Data Cloud Custom Code SDK
 License-Expression: Apache-2.0
 License-File: LICENSE.txt

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/pyproject.toml RENAMED Viewed

@@ -18,7 +18,7 @@ license = "Apache-2.0"
 name = "salesforce-data-customcode"
 readme = "README.md"
 requires-python = ">=3.10,<3.12"
-version = "4.0.0"
+version = "4.0.2"
 [tool.black]
 exclude = '''

salesforce_data_customcode-4.0.2/src/datacustomcode/__init__.py ADDED Viewed

@@ -0,0 +1,47 @@
+# Copyright (c) 2025, Salesforce, Inc.
+# SPDX-License-Identifier: Apache-2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+__all__ = [
+    "AuthType",
+    "Client",
+    "Credentials",
+    "PrintDataCloudWriter",
+    "QueryAPIDataCloudReader",
+]
+def __getattr__(name: str):
+    """Lazy import heavy dependencies."""
+    if name == "Client":
+        from datacustomcode.client import Client
+        return Client
+    elif name == "AuthType":
+        from datacustomcode.credentials import AuthType
+        return AuthType
+    elif name == "Credentials":
+        from datacustomcode.credentials import Credentials
+        return Credentials
+    elif name == "PrintDataCloudWriter":
+        from datacustomcode.io.writer.print import PrintDataCloudWriter
+        return PrintDataCloudWriter
+    elif name == "QueryAPIDataCloudReader":
+        from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
+        return QueryAPIDataCloudReader
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/client.py RENAMED Viewed

@@ -112,8 +112,8 @@ class Client:
     def __new__(
         cls,
         reader: Optional[BaseDataCloudReader] = None,
-        writer: Optional["BaseDataCloudWriter"] = None,
-        spark_provider: Optional["BaseSparkSessionProvider"] = None,
+        writer: Optional[BaseDataCloudWriter] = None,
+        spark_provider: Optional[BaseSparkSessionProvider] = None,
         code_type: str = "script",
     ) -> Client:

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/function/feature_types/chunking.py RENAMED Viewed

@@ -50,16 +50,16 @@ class ChunkType(str, Enum):
 class SearchIndexChunkingV1PrependField(BaseModel):
     """Field to prepend to chunk content"""
-    dmo_name: str = Field(
-        default="", description="Data Model Object name", examples=["udmo_1__dlm"]
+    dmo_name: Optional[str] = Field(
+        default=None, description="Data Model Object name", examples=["udmo_1__dlm"]
     )
-    field_name: str = Field(
-        default="",
+    field_name: Optional[str] = Field(
+        default=None,
         description="Field name to prepend",
         examples=["ResolvedFilePath__c"],
     )
-    value: str = Field(
-        default="",
+    value: Optional[str] = Field(
+        default=None,
         description="Field value to prepend",
         examples=["udlo_1__dll:quarterly_report.pdf"],
     )
@@ -67,20 +67,20 @@ class SearchIndexChunkingV1PrependField(BaseModel):
 class SearchIndexChunkingV1TranscriptField(BaseModel):
-    """Field to prepend to chunk content"""
+    """Transcript timing and speaker metadata for audio/video documents"""
-    speaker: str = Field(
-        default="",
+    speaker: Optional[str] = Field(
+        default=None,
         description="Speaker name for audio/video transcripts",
         examples=["Agent"],
     )
-    start_timestamp: str = Field(
-        default="",
+    start_timestamp: Optional[str] = Field(
+        default=None,
         description="Start timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff",
         examples=["2026-03-25T02:01:24.918000"],
     )
-    end_timestamp: str = Field(
-        default="",
+    end_timestamp: Optional[str] = Field(
+        default=None,
         description="End timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff",
         examples=["2026-03-25T02:01:30.500000"],
     )
@@ -88,44 +88,76 @@ class SearchIndexChunkingV1TranscriptField(BaseModel):
 class SearchIndexChunkingV1Metadata(BaseModel):
-    """Metadata for input documents"""
+    """Metadata for input documents."""
-    type: DocumentType = Field(
-        default=DocumentType.TEXT, description="Document type (text)", examples=["text"]
-    )
-    transcript_fields: SearchIndexChunkingV1TranscriptField = Field(
-        default_factory=SearchIndexChunkingV1TranscriptField,
+    type: Optional[DocumentType] = Field(
+        default=DocumentType.TEXT,
         description=(
-            "Transcript information. Will only be there in case of audio-video files"
+            "Document type of the chunk input. Currently only 'text' is supported."
         ),
+        examples=["text"],
     )
-    page_number: int = Field(
-        default=0,
-        description="Page number in the source document (0-based)",
+    page_number: Optional[int] = Field(
+        default=None,
+        description=("Page number in the source document (0-based). "),
         examples=[1],
     )
+    transcript_fields: Optional[SearchIndexChunkingV1TranscriptField] = Field(
+        default=None,
+        description=(
+            "Speaker and timestamp metadata for audio/video transcripts. "
+            "Optional — only present when the source document is a transcript."
+        ),
+    )
     text_as_html: Optional[str] = Field(
         default=None,
-        description="HTML representation of the document text",
+        description=("HTML representation of the chunk text, if available. "),
         examples=["<p>Online Remittance Instructions</p>"],
     )
-    source_dmo_fields: Dict[str, Union[str, int]] = Field(
-        default_factory=dict,
+    source_dmo_fields: Optional[Dict[str, Union[str, int, float]]] = Field(
+        default=None,
         description=(
-            "Source Data Model Object fields as key-value pairs "
-            "(values can be string or int)"
+            "Source Data Model Object fields as key-value pairs. "
+            "Values can be string, int, or float."
         ),
         examples=[
             {
                 "FilePath__c": "quarterly_report.pdf",
-                "Size__c": 1377454,
+                "Size__c": 1377454.0,
                 "ContentType__c": "pdf",
                 "LastModified__c": "2026-03-25T02:01:24.918000",
             }
         ],
     )
-    prepend: List[SearchIndexChunkingV1PrependField] = Field(
-        default_factory=list, description="List of fields to prepend to each chunk"
+    prepend: Optional[List[SearchIndexChunkingV1PrependField]] = Field(
+        default=None,
+        description=(
+            "List of DMO fields whose values are prepended to the chunk "
+            "text before indexing"
+        ),
+    )
+    image_base64: Optional[str] = Field(
+        default=None,
+        description=(
+            "Base64-encoded image data associated with this chunk. "
+            "Optional — only applicable for image-type document elements."
+        ),
+    )
+    image_mime_type: Optional[str] = Field(
+        default=None,
+        description=(
+            "MIME type of the associated image (e.g., 'image/png', 'image/jpeg'). "
+            "Optional — should be provided alongside image_base64 when present."
+        ),
+        examples=["image/png", "image/jpeg"],
+    )
+    image_type: Optional[str] = Field(
+        default=None,
+        description=(
+            "Semantic category of the image content"
+            "(e.g., 'diagram', 'screenshot', 'chart'). Optional."
+        ),
+        examples=["diagram", "screenshot"],
     )
     model_config = ConfigDict(extra="ignore")
@@ -143,9 +175,12 @@ class SearchIndexChunkingV1DocElement(BaseModel):
             )
         ],
     )
-    metadata: SearchIndexChunkingV1Metadata = Field(
-        default_factory=SearchIndexChunkingV1Metadata,
-        description="Source document metadata",
+    metadata: Optional[SearchIndexChunkingV1Metadata] = Field(
+        default=None,
+        description=(
+            "Source document metadata. Optional — may be absent if no "
+            "metadata is available for the document element."
+        ),
     )
     model_config = ConfigDict(extra="ignore")
@@ -159,21 +194,25 @@ class SearchIndexChunkingV1Output(BaseModel):
         examples=["Online Remittance Instructions"],
     )
     seq_no: int = Field(
-        default=0, description="Sequential chunk number (1-based)", ge=1, examples=[1]
-    )
-    chunk_id: str = Field(
-        default="",
-        description="Unique identifier for this chunk (UUID format)",
-        examples=["550e8400-e29b-41d4-a716-446655440000"],
+        default=0,
+        description=(
+            "Sequential order of this chunk within the output "
+            "Represents chunk ordering within the source document (1-based)."
+        ),
+        ge=1,
+        examples=[1],
     )
     chunk_type: ChunkType = Field(
         default=ChunkType.TEXT,
-        description="Type of chunk (e.g., 'text')",
+        description="Type of chunk. Fixed value — always 'text'.",
         examples=["text"],
     )
-    citations: Dict[str, str] = Field(
-        default_factory=dict,
-        description="Citation information as key-value pairs",
+    citations: Optional[Dict[str, str]] = Field(
+        default=None,
+        description=(
+            "Citation metadata associated with this chunk as key-value "
+            "pairs. Optional — defaults to None if no citations are present."
+        ),
         examples=[{"source": "quarterly_report.pdf"}],
     )
     model_config = ConfigDict(extra="ignore")
@@ -194,4 +233,3 @@ class SearchIndexChunkingV1Response(BaseModel):
     output: List[SearchIndexChunkingV1Output] = Field(
         default_factory=list, description="Flat list of chunks from all docs"
     )
-    model_config = ConfigDict(extra="ignore")

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/function/runtime.py RENAMED Viewed

@@ -21,7 +21,8 @@ from datacustomcode.einstein_predictions.base import EinsteinPredictions
 from datacustomcode.einstein_predictions_config import einstein_predictions_config
 from datacustomcode.file.path.default import DefaultFindFilePath
 from datacustomcode.function.base import BaseRuntime
-from datacustomcode.llm_gateway.default import DefaultLLMGateway
+from datacustomcode.llm_gateway.base import LLMGateway
+from datacustomcode.llm_gateway_config import llm_gateway_config
 class Runtime(BaseRuntime):
@@ -46,7 +47,7 @@ class Runtime(BaseRuntime):
                 raise RuntimeError(
                     "Runtime can only be instantiated once by the SDK.\n\n"
                     "Do not instantiate it yourself. Accept it as a parameter:\n\n"
-                    "  from datacustomcode.runtime.function.RunTime import Function\n"
+                    "  from datacustomcode.function.runtime import Runtime\n"
                     "  \n"
                     "  def function(request: dict, runtime: Runtime) -> dict:\n"
                     "      response = {...}\n"
@@ -65,13 +66,19 @@ class Runtime(BaseRuntime):
         super().__init__()
         # Initialize resources
-        self._llm_gateway = DefaultLLMGateway()
+        self._llm_gateway: Optional[LLMGateway] = None
         self._file = DefaultFindFilePath()
         self._einstein_predictions: Optional[EinsteinPredictions] = None
     @property
-    def llm_gateway(self) -> DefaultLLMGateway:
-        """Access LLM operations."""
+    def llm_gateway(self) -> LLMGateway:
+        if self._llm_gateway is None:
+            if llm_gateway_config.llm_gateway_config is None:
+                raise RuntimeError(
+                    "LLM Gateway is not configured. "
+                    "Add 'llm_gateway_config' section to config.yaml"
+                )
+            self._llm_gateway = llm_gateway_config.llm_gateway_config.to_object()
         return self._llm_gateway
     @property

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/function_utils.py RENAMED Viewed

@@ -16,6 +16,7 @@
 """Utilities for inspecting and working with function entrypoints."""
 import ast
+from enum import Enum
 import importlib.util
 import inspect
 import json
@@ -278,11 +279,17 @@ def _generate_model_sample_data(model_type):
         # Use examples if available
         if field_info.examples and len(field_info.examples) > 0:
             sample_data[field_name] = field_info.examples[0]
-        # Check if field has a real default value
-        elif field_info.default is not PydanticUndefined:
+        # If field has a non-None, non-empty default value, use it
+        elif (
+            field_info.default is not PydanticUndefined
+            and field_info.default is not None
+            and field_info.default != []
+            and field_info.default != {}
+        ):
             sample_data[field_name] = field_info.default
+        # For all other fields (including default_factory, None defaults,
+        # empty defaults), generate sample data
         else:
-            # Required field or field without default - generate sample
             sample_data[field_name] = generate_sample_value(
                 field_info.annotation, field_name
             )
@@ -301,6 +308,17 @@ def generate_sample_value(field_type, field_name: str):
     """
     origin = typing.get_origin(field_type)
+    # Handle Optional[T] (Union[T, None]) by unwrapping to T
+    if origin is typing.Union:
+        non_none_args = [
+            arg for arg in typing.get_args(field_type) if arg is not type(None)
+        ]
+        return (
+            generate_sample_value(non_none_args[0], field_name)
+            if non_none_args
+            else None
+        )
     if origin is list or field_type is list:
         args = typing.get_args(field_type)
         if args:
@@ -320,6 +338,10 @@ def generate_sample_value(field_type, field_name: str):
         return 1.0
     elif field_type is bool:
         return True
+    # Handle Enum types
+    elif isinstance(field_type, type) and issubclass(field_type, Enum):
+        # Return the first enum value
+        return next(iter(field_type)).value
     elif hasattr(field_type, "model_fields"):
         # Nested Pydantic model - use shared helper
         return _generate_model_sample_data(field_type)

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/reader/sf_cli.py RENAMED Viewed

@@ -23,7 +23,6 @@ from typing import (
     Union,
 )
-import pandas as pd
 import requests
 from datacustomcode.io.reader.base import BaseDataCloudReader
@@ -31,6 +30,7 @@ from datacustomcode.io.reader.utils import _pandas_to_spark_schema
 from datacustomcode.token_provider import SFCLITokenProvider
 if TYPE_CHECKING:
+    import pandas as pd
     from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
     from pyspark.sql.types import AtomicType, StructType
@@ -97,6 +97,8 @@ class SFCLIDataCloudReader(BaseDataCloudReader):
         Raises:
             RuntimeError: On HTTP errors or unexpected response shapes.
         """
+        import pandas as pd
         access_token, instance_url = self._get_token()
         url = f"{instance_url}/services/data/{API_VERSION}/ssot/query-sql"

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/reader/utils.py RENAMED Viewed

@@ -16,32 +16,32 @@ from __future__ import annotations
 from typing import TYPE_CHECKING
-import pandas.api.types as pd_types
-from pyspark.sql.types import (
-    BooleanType,
-    DoubleType,
-    LongType,
-    StringType,
-    StructField,
-    StructType,
-    TimestampType,
-)
 if TYPE_CHECKING:
     import pandas
-    from pyspark.sql.types import AtomicType
-PANDAS_TYPE_MAPPING = {
-    "object": StringType(),
-    "int64": LongType(),
-    "float64": DoubleType(),
-    "bool": BooleanType(),
-}
+    from pyspark.sql.types import AtomicType, StructType
 def _pandas_to_spark_schema(
     pandas_df: pandas.DataFrame, nullable: bool = True
 ) -> StructType:
+    import pandas.api.types as pd_types
+    from pyspark.sql.types import (
+        BooleanType,
+        DoubleType,
+        LongType,
+        StringType,
+        StructField,
+        StructType,
+        TimestampType,
+    )
+    PANDAS_TYPE_MAPPING = {
+        "object": StringType(),
+        "int64": LongType(),
+        "float64": DoubleType(),
+        "bool": BooleanType(),
+    }
     fields = []
     for column, dtype in pandas_df.dtypes.items():
         spark_type: AtomicType

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/writer/csv.py RENAMED Viewed

@@ -13,8 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
-from pyspark.sql import DataFrame as PySparkDataFrame
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from pyspark.sql import DataFrame as PySparkDataFrame
 from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/writer/print.py RENAMED Viewed

@@ -13,12 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
-from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
+if TYPE_CHECKING:
+    from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
+    from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
-from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
 from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode
@@ -61,6 +64,8 @@ class PrintDataCloudWriter(BaseDataCloudWriter):
             sf_cli_org: Optional SF CLI org alias or username. If provided,
                 credentials are fetched via `sf org display`.
         """
+        from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
         super().__init__(spark)
         if reader is None:
             self.reader = QueryAPIDataCloudReader(

salesforce_data_customcode-4.0.2/src/datacustomcode/llm_gateway/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+# Copyright (c) 2025, Salesforce, Inc.
+# SPDX-License-Identifier: Apache-2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from datacustomcode.llm_gateway.base import LLMGateway
+from datacustomcode.llm_gateway.default import DefaultLLMGateway
+__all__ = [
+    "DefaultLLMGateway",
+    "LLMGateway",
+]

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/mixin.py RENAMED Viewed

@@ -72,6 +72,35 @@ class UserExtendableNamedConfigMixin:
         Args:
             config_name: should match a subclass's ``CONFIG_NAME``.
         """
+        # First, check if already registered (from __init_subclass__)
+        if config_name in UserExtendableNamedConfigMixin._registered_config_names:
+            candidate = UserExtendableNamedConfigMixin._registered_config_names[
+                config_name
+            ]
+            # Verify it's actually a subclass of cls (respects hierarchy)
+            if candidate is cls or issubclass(candidate, cls):
+                return candidate
+        # If not found, try to trigger lazy import via __getattr__
+        # This handles the case where subclasses use lazy loading
+        try:
+            import datacustomcode
+            # Attempt to trigger __getattr__ by accessing the name
+            getattr(datacustomcode, config_name, None)
+        except (ImportError, AttributeError):
+            pass
+        # Check again after potential lazy import
+        if config_name in UserExtendableNamedConfigMixin._registered_config_names:
+            candidate = UserExtendableNamedConfigMixin._registered_config_names[
+                config_name
+            ]
+            # Verify it's actually a subclass of cls (respects hierarchy)
+            if candidate is cls or issubclass(candidate, cls):
+                return candidate
+        # Fallback to dynamic lookup (for user-added subclasses)
         subclass_config_name_map = {}
         for type_ in _get_all_subclass_descendants(cls):
             if name := getattr(type_, "CONFIG_NAME", ""):

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/spark/base.py RENAMED Viewed

@@ -25,5 +25,5 @@ if TYPE_CHECKING:
 class BaseSparkSessionProvider(UserExtendableNamedConfigMixin):
-    def get_session(self, spark_config: SparkConfig) -> "SparkSession":
+    def get_session(self, spark_config: SparkConfig) -> SparkSession:
         raise NotImplementedError

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/spark/default.py RENAMED Viewed

@@ -27,7 +27,7 @@ if TYPE_CHECKING:
 class DefaultSparkSessionProvider(BaseSparkSessionProvider):
     CONFIG_NAME = "DefaultSparkSessionProvider"
-    def get_session(self, spark_config: SparkConfig) -> "SparkSession":
+    def get_session(self, spark_config: SparkConfig) -> SparkSession:
         from pyspark.sql import SparkSession
         builder = SparkSession.builder

{salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/chunking/payload/entrypoint.py RENAMED Viewed

@@ -1,5 +1,4 @@
 import logging
-import uuid
 from datacustomcode.function import Runtime
 from datacustomcode.function.feature_types.chunking import (
@@ -124,12 +123,11 @@ def function(
         for chunk_text in text_chunks:
             # Create citations from source_dmo_fields if available
             citations = {}
-            if metadata.source_dmo_fields:
+            if metadata and metadata.source_dmo_fields:
                 for key, value in metadata.source_dmo_fields.items():
                     citations[key] = str(value)
             chunk_output = SearchIndexChunkingV1Output(
-                chunk_id=str(uuid.uuid4()),
                 chunk_type=ChunkType.TEXT,
                 text=chunk_text.strip(),
                 seq_no=seq_no,

salesforce_data_customcode-4.0.2/src/datacustomcode/templates/function/example/chunking_with_llm/config.json ADDED Viewed

@@ -0,0 +1,3 @@
+{
+    "entryPoint": "entrypoint.py"
+}

salesforce_data_customcode-4.0.2/src/datacustomcode/templates/function/example/chunking_with_llm/entrypoint.py ADDED Viewed

@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+"""
+Sample Search Index Chunking Customer Function
+This function demonstrates the new signature-based invocation with Pydantic models:
+- Uses SearchIndexChunkingV1Request/Response (Pydantic models)
+- Requires Runtime parameter (for agentic capabilities)
+- Type-safe with direct field access (no wrappers)
+- Automatic validation and conversion
+"""
+import logging
+from datacustomcode.function.feature_types.chunking import (
+    ChunkType,
+    SearchIndexChunkingV1Output,
+    SearchIndexChunkingV1Request,
+    SearchIndexChunkingV1Response,
+)
+from datacustomcode.function.runtime import Runtime
+from datacustomcode.llm_gateway.types.generate_text_request_builder import (
+    GenerateTextRequestBuilder,
+)
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+def _load_prompt_template(runtime: Runtime) -> str:
+    """Load the chunking prompt template from file."""
+    prompt_file = runtime.file.find_file_path("chunking_prompt.txt")
+    with open(prompt_file, "r") as f:
+        _prompt_template_cache = f.read()
+    logger.info(f"Loaded prompt template from {prompt_file}")
+    return _prompt_template_cache
+def function(
+    request: SearchIndexChunkingV1Request, runtime: Runtime
+) -> SearchIndexChunkingV1Response:
+    """
+    Chunk documents for Search Index.
+    Args:
+        request: SearchIndexChunkingV1Request with input documents
+        runtime: Runtime instance for agentic capabilities (future use)
+    Returns:
+        SearchIndexChunkingV1Response with chunked output
+    """
+    logger.info(f"Received {len(request.input)} documents to chunk")
+    # Load prompt template (cached after first call)
+    prompt_template = _load_prompt_template(runtime)
+    chunks = []
+    chunk_id = 1
+    # Process each document
+    for doc_idx, doc in enumerate(request.input):
+        # Direct field access - no wrappers!
+        text = doc.text
+        # Use LLM to intelligently chunk the document
+        # This creates semantic chunks that preserve context and meaning
+        prompt = prompt_template.format(text=text)
+        builder = GenerateTextRequestBuilder()
+        llm_request = (
+            builder.set_model("sfdc_ai__DefaultGPT4Turbo").set_prompt(prompt).build()
+        )
+        response = runtime.llm_gateway.generate_text(llm_request)
+        if response.is_success:
+            # Parse LLM response to extract chunks
+            llm_chunks = response.text.split("---CHUNK---")
+            llm_chunks = [chunk.strip() for chunk in llm_chunks if chunk.strip()]
+            # Create chunk outputs
+            for chunk_text in llm_chunks:
+                chunk = SearchIndexChunkingV1Output(
+                    text=chunk_text,
+                    seq_no=chunk_id,
+                    chunk_type=ChunkType.TEXT,
+                    citations={},
+                )
+                chunks.append(chunk)
+                chunk_id += 1
+        else:
+            # LLM chunking failed - log error and raise exception
+            error_msg = (
+                f"LLM chunking failed for document {doc_idx + 1}: {response.error_code}"
+            )
+            logger.error(error_msg)
+            raise RuntimeError(error_msg)
+    # Return Pydantic response
+    return SearchIndexChunkingV1Response(output=chunks)

salesforce_data_customcode-4.0.2/src/datacustomcode/templates/function/example/chunking_with_llm/files/chunking_prompt.txt ADDED Viewed

@@ -0,0 +1,19 @@
+Analyze this document and break it into logical chunks for search/retrieval.
+Rules:
+1. Each chunk should be 150-300 words
+2. Break at semantic/topic boundaries (not mid-sentence)
+3. Each chunk should be self-contained (understandable alone)
+4. Preserve important context in each chunk
+Format your response as chunks separated by "---CHUNK---" markers.
+Document:
+{text}
+Output format:
+<chunk 1 text>
+---CHUNK---
+<chunk 2 text>
+---CHUNK---
+...

salesforce_data_customcode-4.0.2/src/datacustomcode/templates/function/example/chunking_with_llm/tests/test.json ADDED Viewed

@@ -0,0 +1,51 @@
+{
+    "input": [
+      {
+        "text": "Employee Stock Ownership Plan (ESOP) Guide\n\nWhat is an ESOP?\n\nAn Employee Stock Ownership Plan (ESOP) is a qualified retirement plan that invests primarily in the stock of the sponsoring employer. ESOPs are designed to provide employees with an ownership interest in the company, aligning their interests with those of shareholders. As the company grows and prospers, so does the value of the ESOP shares held by employees.\n\nHow ESOPs Work\n\nWhen you join a company with an ESOP, you become eligible to participate after meeting certain requirements, typically one year of service. The company makes contributions to your ESOP account, usually as a percentage of your compensation. These contributions are made in the form of company stock or cash that is used to purchase company stock. The shares are held in a trust account in your name.\n\nVesting Schedule and Ownership\n\nYour ownership of ESOP shares typically follows a vesting schedule. A common vesting schedule is 20% per year over five years, meaning you become fully vested after five years of service. Once vested, those shares belong to you even if you leave the company. If you leave before becoming fully vested, you forfeit the unvested portion.\n\nDistribution Rules and Tax Implications\n\nWhen you leave the company, retire, or meet other distribution trigger events, you are entitled to receive the value of your vested ESOP shares. Distributions typically begin in the year following your separation from service. You can choose to receive distributions in a lump sum or in installments over several years. The tax treatment depends on how you receive the distribution - rolling over to an IRA defers taxes, while direct distributions are taxed as ordinary income.\n\nSelling Your ESOP Shares\n\nFor privately held companies, the ESOP trust or the company itself typically repurchases your shares at fair market value, determined by an independent appraiser. For publicly traded companies, shares may be sold on the open market. The repurchase obligation ensures you can convert your ownership stake to cash when you leave the company, providing liquidity for what might otherwise be an illiquid investment.",
+        "metadata": {
+          "type": "text",
+          "page_number": 1,
+          "text_as_html": null,
+          "source_dmo_fields": {
+            "FilePath__c": "employee_handbook/esop_guide.pdf",
+            "Size__c": 2847521,
+            "ContentType__c": "pdf",
+            "LastModified__c": "2026-04-15T08:23:11.442000"
+          },
+          "prepend": [
+            {
+              "dmo_name": "udmo_1__dlm",
+              "field_name": "ResolvedFilePath__c",
+              "value": "udlo_1__dll:employee_handbook/esop_guide.pdf"
+            }
+          ]
+        }
+      },
+      {
+        "text": "Data Privacy and Security Policy\n\nIntroduction and Scope\n\nThis policy establishes the framework for protecting confidential and sensitive information within our organization. It applies to all employees, contractors, consultants, and third parties who have access to company systems or data. The policy covers all forms of information, whether stored electronically, on paper, or transmitted verbally. Compliance with this policy is mandatory and violations may result in disciplinary action up to and including termination of employment.\n\nData Classification Standards\n\nAll company data must be classified according to sensitivity level. Public data can be freely shared without risk to the organization. Internal data is intended for employees only and should not be shared externally without approval. Confidential data includes business plans, financial records, and employee information that could cause significant harm if disclosed. Restricted data includes trade secrets, personal identifiable information (PII), and regulated data that must comply with specific legal requirements like GDPR, HIPAA, or CCPA.\n\nAccess Control and Authentication Requirements\n\nAccess to company systems and data is granted on a need-to-know basis following the principle of least privilege. All users must authenticate using strong passwords that meet complexity requirements: minimum 12 characters, including uppercase, lowercase, numbers, and special characters. Multi-factor authentication (MFA) is required for all remote access and privileged accounts. Passwords must be changed every 90 days and cannot reuse the previous 12 passwords. Sharing of credentials is strictly prohibited.\n\nData Handling and Transmission Security\n\nWhen transmitting confidential or restricted data, encryption must be used. Email containing sensitive information should be encrypted using approved tools. File transfers must use secure protocols like SFTP or HTTPS. Physical documents containing sensitive information must be stored in locked cabinets when not in use and shredded when no longer needed. Laptops and mobile devices must use full-disk encryption and automatic screen locking after 5 minutes of inactivity.\n\nIncident Response and Reporting Obligations\n\nAny suspected or actual security incident must be reported immediately to the Information Security team. Incidents include unauthorized access attempts, malware infections, lost or stolen devices, or accidental disclosure of sensitive information. Do not attempt to investigate or remediate security incidents yourself. The Security team will coordinate the response, including containment, investigation, remediation, and required notifications. For data breaches involving PII, regulatory notification requirements may apply within 72 hours of discovery.",
+        "metadata": {
+          "type": "text",
+          "page_number": 1,
+          "source_dmo_fields": {
+            "FilePath__c": "policies/data_privacy_security.pdf",
+            "Size__c": 1923456,
+            "ContentType__c": "pdf",
+            "LastModified__c": "2026-03-01T14:52:33.127000"
+          }
+        }
+      },
+      {
+        "text": "Product Launch Strategy: CloudSync Pro Q2 2026\n\nExecutive Summary\n\nCloudSync Pro represents our entry into the enterprise data synchronization market, targeting organizations with hybrid cloud infrastructures. Our research indicates strong demand for real-time data replication across on-premise and cloud environments. The total addressable market is estimated at $4.2B globally, growing at 23% annually. We aim to capture 3% market share within 18 months, generating $126M in annual recurring revenue. This launch is critical to our strategic objective of expanding beyond SMB customers into enterprise accounts.\n\nTarget Customer Profile and Pain Points\n\nOur primary target is IT Directors and Cloud Architects at mid-to-large enterprises (1000+ employees) operating hybrid infrastructure. These customers struggle with data consistency across distributed systems, experiencing latency issues, sync failures, and compliance challenges. Current solutions require significant custom development and ongoing maintenance. Our research shows customers spend an average of $340K annually on data integration tools and engineering resources. They need a solution that reduces integration complexity while providing real-time synchronization guarantees.\n\nCompetitive Landscape and Differentiation\n\nThe market leaders are DataSync Enterprise (32% share) and ReplicaCloud (28% share), both offering batch-oriented synchronization with 5-15 minute latencies. Our key differentiator is true real-time replication with sub-second latency using change data capture (CDC) technology. Additionally, our pricing model is consumption-based ($0.02 per GB transferred) versus competitors' per-connector licensing ($5K-15K per connector annually). This makes CloudSync Pro 40% more cost-effective for high-volume use cases while eliminating the capacity planning burden.\n\nGo-to-Market Strategy and Channel Plan\n\nWe will launch through a hybrid direct and partner model. Direct sales will target Fortune 2000 accounts through our existing enterprise sales team, augmented with three new cloud specialist hires. Partner channels include cloud marketplaces (AWS, Azure, GCP) and system integrators. We are finalizing partnerships with Deloitte and Accenture to include CloudSync Pro in their cloud migration practices. Marketing will emphasize analyst relations (targeting Gartner Magic Quadrant inclusion), thought leadership content, and targeted account-based marketing campaigns.\n\nRevenue Model and Financial Projections\n\nPricing is based on data volume transferred: $0.02/GB for the first 100TB monthly, $0.015/GB for 100-500TB, and $0.01/GB above 500TB. Average customer is projected at 250TB monthly ($4,375 MRR, $52,500 ARR). We forecast 100 customers by end of Q4 2026, 300 by end of 2027. Year 1 revenue target is $2.1M, scaling to $15.6M in Year 2 and $47M in Year 3. Gross margins are expected at 78% at scale. Initial investment includes $3.2M in product development (already completed), $2.8M in go-to-market expenses, and $1.5M in infrastructure scaling over 18 months.",
+        "metadata": {
+          "type": "text",
+          "page_number": 1,
+          "source_dmo_fields": {
+            "FilePath__c": "product_strategy/cloudsync_launch_plan.pdf",
+            "Size__c": 3156789,
+            "ContentType__c": "pdf",
+            "LastModified__c": "2026-04-28T11:18:47.893000"
+          }
+        }
+      }
+    ]
+}

salesforce_data_customcode-4.0.0/src/datacustomcode/__init__.py DELETED Viewed

@@ -1,27 +0,0 @@
-# Copyright (c) 2025, Salesforce, Inc.
-# SPDX-License-Identifier: Apache-2
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from datacustomcode.client import Client
-from datacustomcode.credentials import AuthType, Credentials
-from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
-from datacustomcode.io.writer.print import PrintDataCloudWriter
-__all__ = [
-    "AuthType",
-    "Client",
-    "Credentials",
-    "PrintDataCloudWriter",
-    "QueryAPIDataCloudReader",
-]

salesforce_data_customcode-4.0.0/src/datacustomcode/proxy/client/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-# Copyright (c) 2025, Salesforce, Inc.
-# SPDX-License-Identifier: Apache-2
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.