salesforce-data-customcode 1.1.0__tar.gz → 3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/PKG-INFO +2 -2
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/pyproject.toml +4 -2
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/client.py +8 -22
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/config.py +3 -3
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/config.yaml +1 -0
- salesforce_data_customcode-3.0.0/src/datacustomcode/function/__init__.py +20 -0
- salesforce_data_customcode-3.0.0/src/datacustomcode/function/base.py +18 -0
- salesforce_data_customcode-3.0.0/src/datacustomcode/function/features_types/chunking.py +89 -0
- salesforce_data_customcode-3.0.0/src/datacustomcode/function/runtime.py +77 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/reader/base.py +0 -2
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/reader/query_api.py +26 -10
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/reader/sf_cli.py +15 -9
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/writer/print.py +1 -1
- salesforce_data_customcode-3.0.0/src/datacustomcode/llm_gateway/base.py +34 -0
- salesforce_data_customcode-3.0.0/src/datacustomcode/llm_gateway/default.py +33 -0
- salesforce_data_customcode-3.0.0/src/datacustomcode/llm_gateway/types/generate_text_request.py +46 -0
- salesforce_data_customcode-3.0.0/src/datacustomcode/llm_gateway/types/generate_text_request_builder.py +82 -0
- salesforce_data_customcode-3.0.0/src/datacustomcode/llm_gateway/types/generate_text_response.py +58 -0
- salesforce_data_customcode-3.0.0/src/datacustomcode/llm_gateway/types/generate_text_response_builder.py +37 -0
- salesforce_data_customcode-3.0.0/src/datacustomcode/proxy/__init__.py +14 -0
- salesforce_data_customcode-3.0.0/src/datacustomcode/proxy/client/__init__.py +14 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/function/payload/entrypoint.py +16 -2
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/LICENSE.txt +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/README.md +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/__init__.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/auth.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/cli.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/cmd.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/credentials.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/deploy.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/file/__init__.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/file/base.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/file/path/__init__.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/file/path/default.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/__init__.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/base.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/reader/__init__.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/reader/utils.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/writer/__init__.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/writer/base.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/writer/csv.py +0 -0
- {salesforce_data_customcode-1.1.0/src/datacustomcode/proxy → salesforce_data_customcode-3.0.0/src/datacustomcode/llm_gateway}/__init__.py +0 -0
- {salesforce_data_customcode-1.1.0/src/datacustomcode/proxy/client → salesforce_data_customcode-3.0.0/src/datacustomcode/llm_gateway/types}/__init__.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/mixin.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/proxy/base.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/proxy/client/LocalProxyClientProvider.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/proxy/client/base.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/py.typed +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/run.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/scan.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/spark/__init__.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/spark/base.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/spark/default.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/template.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/function/.devcontainer/devcontainer.json +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/function/Dockerfile.dependencies +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/function/README.md +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/function/build_native_dependencies.sh +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/function/payload/config.json +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/function/requirements-dev.txt +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/function/requirements.txt +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/.devcontainer/devcontainer.json +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/Dockerfile +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/Dockerfile.dependencies +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/README.md +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/account.ipynb +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/build_native_dependencies.sh +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/examples/employee_hierarchy/employee_data.csv +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/examples/employee_hierarchy/entrypoint.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/jupyterlab.sh +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/payload/config.json +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/payload/entrypoint.py +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/requirements-dev.txt +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/templates/script/requirements.txt +0 -0
- {salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/version.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: salesforce-data-customcode
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0
|
|
4
4
|
Summary: Data Cloud Custom Code SDK
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE.txt
|
|
@@ -15,7 +15,7 @@ Requires-Dist: click (>=8.1.8,<9.0.0)
|
|
|
15
15
|
Requires-Dist: loguru (>=0.7.3,<0.8.0)
|
|
16
16
|
Requires-Dist: numpy
|
|
17
17
|
Requires-Dist: pandas
|
|
18
|
-
Requires-Dist: pydantic (
|
|
18
|
+
Requires-Dist: pydantic (==2.13.1)
|
|
19
19
|
Requires-Dist: pyspark (==3.5.1)
|
|
20
20
|
Requires-Dist: pyyaml (>=6.0,<7.0)
|
|
21
21
|
Requires-Dist: salesforce-cdp-connector (>=1.0.19)
|
|
@@ -18,7 +18,7 @@ license = "Apache-2.0"
|
|
|
18
18
|
name = "salesforce-data-customcode"
|
|
19
19
|
readme = "README.md"
|
|
20
20
|
requires-python = ">=3.10,<3.12"
|
|
21
|
-
version = "
|
|
21
|
+
version = "3.0.0"
|
|
22
22
|
|
|
23
23
|
[tool.black]
|
|
24
24
|
exclude = '''
|
|
@@ -73,7 +73,9 @@ use_parentheses = true
|
|
|
73
73
|
|
|
74
74
|
[tool.mypy]
|
|
75
75
|
check_untyped_defs = false
|
|
76
|
+
explicit_package_bases = true
|
|
76
77
|
ignore_missing_imports = true
|
|
78
|
+
mypy_path = "src"
|
|
77
79
|
no_implicit_optional = true
|
|
78
80
|
plugins = [
|
|
79
81
|
'pydantic.mypy'
|
|
@@ -99,7 +101,7 @@ click = "^8.1.8"
|
|
|
99
101
|
loguru = "^0.7.3"
|
|
100
102
|
numpy = "*"
|
|
101
103
|
pandas = "*"
|
|
102
|
-
pydantic = "
|
|
104
|
+
pydantic = "2.13.1"
|
|
103
105
|
pyspark = "3.5.1"
|
|
104
106
|
python = ">=3.10,<3.12"
|
|
105
107
|
pyyaml = "^6.0"
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/client.py
RENAMED
|
@@ -119,8 +119,6 @@ class Client:
|
|
|
119
119
|
spark_provider: Optional["BaseSparkSessionProvider"] = None,
|
|
120
120
|
code_type: str = "script",
|
|
121
121
|
) -> Client:
|
|
122
|
-
if "function" in code_type:
|
|
123
|
-
return cls._new_function_client()
|
|
124
122
|
|
|
125
123
|
if cls._instance is None:
|
|
126
124
|
cls._instance = super().__new__(cls)
|
|
@@ -175,41 +173,29 @@ class Client:
|
|
|
175
173
|
raise ValueError("Cannot set reader or writer after client is initialized")
|
|
176
174
|
return cls._instance
|
|
177
175
|
|
|
178
|
-
|
|
179
|
-
def _new_function_client(cls) -> Client:
|
|
180
|
-
cls._instance = super().__new__(cls)
|
|
181
|
-
cls._instance._proxy = (
|
|
182
|
-
config.proxy_config.to_object() # type: ignore
|
|
183
|
-
if config.proxy_config is not None
|
|
184
|
-
else None
|
|
185
|
-
)
|
|
186
|
-
return cls._instance
|
|
187
|
-
|
|
188
|
-
def read_dlo(self, name: str, row_limit: int = 1000) -> PySparkDataFrame:
|
|
176
|
+
def read_dlo(self, name: str) -> PySparkDataFrame:
|
|
189
177
|
"""Read a DLO from Data Cloud.
|
|
190
178
|
|
|
191
179
|
Args:
|
|
192
180
|
name: The name of the DLO to read.
|
|
193
|
-
row_limit: Maximum number of rows to fetch (default: 1000).
|
|
194
181
|
|
|
195
182
|
Returns:
|
|
196
183
|
A PySpark DataFrame containing the DLO data.
|
|
197
184
|
"""
|
|
198
185
|
self._record_dlo_access(name)
|
|
199
|
-
return self._reader.read_dlo(name
|
|
186
|
+
return self._reader.read_dlo(name) # type: ignore[no-any-return]
|
|
200
187
|
|
|
201
|
-
def read_dmo(self, name: str
|
|
188
|
+
def read_dmo(self, name: str) -> PySparkDataFrame:
|
|
202
189
|
"""Read a DMO from Data Cloud.
|
|
203
190
|
|
|
204
191
|
Args:
|
|
205
192
|
name: The name of the DMO to read.
|
|
206
|
-
row_limit: Maximum number of rows to fetch (default: 1000).
|
|
207
193
|
|
|
208
194
|
Returns:
|
|
209
195
|
A PySpark DataFrame containing the DMO data.
|
|
210
196
|
"""
|
|
211
197
|
self._record_dmo_access(name)
|
|
212
|
-
return self._reader.read_dmo(name
|
|
198
|
+
return self._reader.read_dmo(name) # type: ignore[no-any-return]
|
|
213
199
|
|
|
214
200
|
def write_to_dlo(
|
|
215
201
|
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode, **kwargs
|
|
@@ -222,7 +208,7 @@ class Client:
|
|
|
222
208
|
write_mode: The write mode to use for writing to the DLO.
|
|
223
209
|
"""
|
|
224
210
|
self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DMO)
|
|
225
|
-
return self._writer.write_to_dlo(name, dataframe, write_mode, **kwargs)
|
|
211
|
+
return self._writer.write_to_dlo(name, dataframe, write_mode, **kwargs) # type: ignore[no-any-return]
|
|
226
212
|
|
|
227
213
|
def write_to_dmo(
|
|
228
214
|
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode, **kwargs
|
|
@@ -235,17 +221,17 @@ class Client:
|
|
|
235
221
|
write_mode: The write mode to use for writing to the DMO.
|
|
236
222
|
"""
|
|
237
223
|
self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DLO)
|
|
238
|
-
return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs)
|
|
224
|
+
return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs) # type: ignore[no-any-return]
|
|
239
225
|
|
|
240
226
|
def call_llm_gateway(self, LLM_MODEL_ID: str, prompt: str, maxTokens: int) -> str:
|
|
241
227
|
if self._proxy is None:
|
|
242
228
|
raise ValueError("No proxy configured; set proxy or proxy_config")
|
|
243
|
-
return self._proxy.call_llm_gateway(LLM_MODEL_ID, prompt, maxTokens)
|
|
229
|
+
return self._proxy.call_llm_gateway(LLM_MODEL_ID, prompt, maxTokens) # type: ignore[no-any-return]
|
|
244
230
|
|
|
245
231
|
def find_file_path(self, file_name: str) -> Path:
|
|
246
232
|
"""Return a file path"""
|
|
247
233
|
|
|
248
|
-
return self._file.find_file_path(file_name)
|
|
234
|
+
return self._file.find_file_path(file_name) # type: ignore[no-any-return]
|
|
249
235
|
|
|
250
236
|
def _validate_data_layer_history_does_not_contain(
|
|
251
237
|
self, data_cloud_object_type: DataCloudObjectType
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/config.py
RENAMED
|
@@ -36,10 +36,10 @@ import yaml
|
|
|
36
36
|
# This lets all readers and writers to be findable via config
|
|
37
37
|
from datacustomcode.io import * # noqa: F403
|
|
38
38
|
from datacustomcode.io.base import BaseDataAccessLayer
|
|
39
|
-
from datacustomcode.io.reader.base import BaseDataCloudReader # noqa:
|
|
40
|
-
from datacustomcode.io.writer.base import BaseDataCloudWriter # noqa:
|
|
39
|
+
from datacustomcode.io.reader.base import BaseDataCloudReader # noqa: TCH002
|
|
40
|
+
from datacustomcode.io.writer.base import BaseDataCloudWriter # noqa: TCH002
|
|
41
41
|
from datacustomcode.proxy.base import BaseProxyAccessLayer
|
|
42
|
-
from datacustomcode.proxy.client.base import BaseProxyClient # noqa:
|
|
42
|
+
from datacustomcode.proxy.client.base import BaseProxyClient # noqa: TCH002
|
|
43
43
|
from datacustomcode.spark.base import BaseSparkSessionProvider
|
|
44
44
|
|
|
45
45
|
DEFAULT_CONFIG_NAME = "config.yaml"
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
"""Function runtime for Data Cloud Custom Code."""
|
|
17
|
+
|
|
18
|
+
from datacustomcode.function.runtime import Runtime
|
|
19
|
+
|
|
20
|
+
__all__ = ["Runtime"]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseRuntime:
|
|
18
|
+
"""Base class for datacustomcode run time"""
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
Pydantic models for byoc-function-proto (uds_chunking.proto)
|
|
18
|
+
Auto-generated - validation rules from buf.validate
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from typing import (
|
|
22
|
+
Any,
|
|
23
|
+
Dict,
|
|
24
|
+
List,
|
|
25
|
+
Literal,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
from pydantic import BaseModel, Field
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DocElement(BaseModel):
|
|
32
|
+
"""Document element to be chunked"""
|
|
33
|
+
|
|
34
|
+
text: str = Field(..., description="Text content to be chunked")
|
|
35
|
+
metadata: Dict[str, Any] = Field(
|
|
36
|
+
default_factory=dict, description="Source document metadata"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ChunkOutput(BaseModel):
|
|
41
|
+
"""Output chunk from the chunking process"""
|
|
42
|
+
|
|
43
|
+
chunk_id: str = Field(..., description="UUID for this chunk")
|
|
44
|
+
chunk_type: str = Field(..., description="Type: 'text'")
|
|
45
|
+
text: str = Field(..., description="Chunk text content")
|
|
46
|
+
seq_no: int = Field(..., description="Sequential chunk number (1-based)")
|
|
47
|
+
metadata: Dict[str, str] = Field(
|
|
48
|
+
default_factory=dict, description="Metadata from source (DMO fields)"
|
|
49
|
+
)
|
|
50
|
+
tag_metadata: Dict[str, Any] = Field(
|
|
51
|
+
default_factory=dict, description="Additional tags"
|
|
52
|
+
)
|
|
53
|
+
citations: Dict[str, Any] = Field(
|
|
54
|
+
default_factory=dict, description="Citation information"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class StatusResponse(BaseModel):
|
|
59
|
+
"""Status response for operation"""
|
|
60
|
+
|
|
61
|
+
status_type: str = Field(..., description="'success' or 'error'")
|
|
62
|
+
status_message: str = Field(..., description="Human-readable status")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class UdsChunkingV1BatchRequest(BaseModel):
|
|
66
|
+
"""Batch request for UDS chunking"""
|
|
67
|
+
|
|
68
|
+
version: Literal["v1"] = Field(
|
|
69
|
+
default="v1", description="API version, must be 'v1'"
|
|
70
|
+
)
|
|
71
|
+
input: List[DocElement] = Field(
|
|
72
|
+
..., min_length=1, description="List of documents (min 1)"
|
|
73
|
+
)
|
|
74
|
+
max_characters: int = Field(..., description="Max chars per chunk (default: 100)")
|
|
75
|
+
additional_params: Dict[str, Any] = Field(
|
|
76
|
+
default_factory=dict, description="Future extension point"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class UdsChunkingV1BatchResponse(BaseModel):
|
|
81
|
+
"""Batch response for UDS chunking"""
|
|
82
|
+
|
|
83
|
+
version: Literal["v1"] = Field(
|
|
84
|
+
default="v1", description="API version, must be 'v1'"
|
|
85
|
+
)
|
|
86
|
+
output: List[ChunkOutput] = Field(
|
|
87
|
+
default_factory=list, description="Flat list of chunks from all docs"
|
|
88
|
+
)
|
|
89
|
+
status: StatusResponse = Field(..., description="Overall operation status")
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
import threading
|
|
18
|
+
from typing import Optional
|
|
19
|
+
|
|
20
|
+
from datacustomcode.file.path.default import DefaultFindFilePath
|
|
21
|
+
from datacustomcode.function.base import BaseRuntime
|
|
22
|
+
from datacustomcode.llm_gateway.default import DefaultLLMGateway
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Runtime(BaseRuntime):
|
|
26
|
+
"""Client for Function code type.
|
|
27
|
+
|
|
28
|
+
NOTE: Do not instantiate this class directly.
|
|
29
|
+
It will be provided to your function by the SDK:
|
|
30
|
+
|
|
31
|
+
def function(request: dict, runtime: RunTime) -> dict:
|
|
32
|
+
response = {...}
|
|
33
|
+
return response
|
|
34
|
+
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
_instance: Optional["Runtime"] = None
|
|
38
|
+
_lock = threading.Lock()
|
|
39
|
+
|
|
40
|
+
def __new__(cls):
|
|
41
|
+
"""Create singleton instance (thread-safe)."""
|
|
42
|
+
with cls._lock:
|
|
43
|
+
if cls._instance is not None:
|
|
44
|
+
raise RuntimeError(
|
|
45
|
+
"Runtime can only be instantiated once by the SDK.\n\n"
|
|
46
|
+
"Do not instantiate it yourself. Accept it as a parameter:\n\n"
|
|
47
|
+
" from datacustomcode.runtime.function.RunTime import Function\n"
|
|
48
|
+
" \n"
|
|
49
|
+
" def function(request: dict, runtime: Runtime) -> dict:\n"
|
|
50
|
+
" response = {...}\n"
|
|
51
|
+
" return response"
|
|
52
|
+
)
|
|
53
|
+
cls._instance = super().__new__(cls)
|
|
54
|
+
return cls._instance
|
|
55
|
+
|
|
56
|
+
def __init__(self) -> None:
|
|
57
|
+
# Prevent re-initialization
|
|
58
|
+
if hasattr(self, "_initialized"):
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
self._initialized = True
|
|
62
|
+
|
|
63
|
+
super().__init__()
|
|
64
|
+
|
|
65
|
+
# Initialize resources
|
|
66
|
+
self._llm_gateway = DefaultLLMGateway()
|
|
67
|
+
self._file = DefaultFindFilePath()
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def llm_gateway(self) -> DefaultLLMGateway:
|
|
71
|
+
"""Access LLM operations."""
|
|
72
|
+
return self._llm_gateway
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def file(self) -> DefaultFindFilePath:
|
|
76
|
+
"""Access file operations."""
|
|
77
|
+
return self._file
|
|
@@ -33,7 +33,6 @@ class BaseDataCloudReader(BaseDataAccessLayer):
|
|
|
33
33
|
self,
|
|
34
34
|
name: str,
|
|
35
35
|
schema: Union[AtomicType, StructType, str, None] = None,
|
|
36
|
-
row_limit: int = 1000,
|
|
37
36
|
) -> PySparkDataFrame: ...
|
|
38
37
|
|
|
39
38
|
@abstractmethod
|
|
@@ -41,5 +40,4 @@ class BaseDataCloudReader(BaseDataAccessLayer):
|
|
|
41
40
|
self,
|
|
42
41
|
name: str,
|
|
43
42
|
schema: Union[AtomicType, StructType, str, None] = None,
|
|
44
|
-
row_limit: int = 1000,
|
|
45
43
|
) -> PySparkDataFrame: ...
|
|
@@ -37,6 +37,7 @@ logger = logging.getLogger(__name__)
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
SQL_QUERY_TEMPLATE: Final = "SELECT * FROM {} LIMIT {}"
|
|
40
|
+
SQL_QUERY_TEMPLATE_NO_LIMIT: Final = "SELECT * FROM {}"
|
|
40
41
|
|
|
41
42
|
|
|
42
43
|
def create_cdp_connection(
|
|
@@ -122,6 +123,7 @@ class QueryAPIDataCloudReader(BaseDataCloudReader):
|
|
|
122
123
|
credentials_profile: str = "default",
|
|
123
124
|
dataspace: Optional[str] = None,
|
|
124
125
|
sf_cli_org: Optional[str] = None,
|
|
126
|
+
default_row_limit: Optional[int] = None,
|
|
125
127
|
) -> None:
|
|
126
128
|
"""Initialize QueryAPIDataCloudReader.
|
|
127
129
|
|
|
@@ -137,8 +139,12 @@ class QueryAPIDataCloudReader(BaseDataCloudReader):
|
|
|
137
139
|
reader delegates to :class:`SFCLIDataCloudReader` which calls
|
|
138
140
|
the Data Cloud REST API directly using the token obtained from
|
|
139
141
|
``sf org display``, bypassing the CDP token-exchange flow.
|
|
142
|
+
default_row_limit: Maximum number of rows to fetch automatically.
|
|
143
|
+
When ``None``, no limit is applied (all rows are returned).
|
|
144
|
+
Set via ``default_row_limit`` in ``config.yaml`` reader options.
|
|
140
145
|
"""
|
|
141
146
|
self.spark = spark
|
|
147
|
+
self._default_row_limit = default_row_limit
|
|
142
148
|
if sf_cli_org:
|
|
143
149
|
logger.debug(
|
|
144
150
|
f"Initializing QueryAPIDataCloudReader with SF CLI org '{sf_cli_org}'"
|
|
@@ -147,6 +153,7 @@ class QueryAPIDataCloudReader(BaseDataCloudReader):
|
|
|
147
153
|
spark=spark,
|
|
148
154
|
sf_cli_org=sf_cli_org,
|
|
149
155
|
dataspace=dataspace,
|
|
156
|
+
default_row_limit=default_row_limit,
|
|
150
157
|
)
|
|
151
158
|
self._conn = None
|
|
152
159
|
else:
|
|
@@ -158,19 +165,30 @@ class QueryAPIDataCloudReader(BaseDataCloudReader):
|
|
|
158
165
|
)
|
|
159
166
|
self._conn = create_cdp_connection(credentials, dataspace)
|
|
160
167
|
|
|
168
|
+
def _build_query(self, name: str) -> str:
|
|
169
|
+
"""Build a SQL query, applying the configured default row limit.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
name: Object name to query.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
SQL query string.
|
|
176
|
+
"""
|
|
177
|
+
if self._default_row_limit is not None:
|
|
178
|
+
return SQL_QUERY_TEMPLATE.format(name, self._default_row_limit)
|
|
179
|
+
return SQL_QUERY_TEMPLATE_NO_LIMIT.format(name)
|
|
180
|
+
|
|
161
181
|
def read_dlo(
|
|
162
182
|
self,
|
|
163
183
|
name: str,
|
|
164
184
|
schema: Union[AtomicType, StructType, str, None] = None,
|
|
165
|
-
row_limit: int = 1000,
|
|
166
185
|
) -> PySparkDataFrame:
|
|
167
186
|
"""
|
|
168
|
-
Read a Data Lake Object (DLO) from the Data Cloud
|
|
187
|
+
Read a Data Lake Object (DLO) from the Data Cloud.
|
|
169
188
|
|
|
170
189
|
Args:
|
|
171
190
|
name (str): The name of the DLO.
|
|
172
191
|
schema (Optional[Union[AtomicType, StructType, str]]): Schema of the DLO.
|
|
173
|
-
row_limit (int): Maximum number of rows to fetch.
|
|
174
192
|
|
|
175
193
|
Returns:
|
|
176
194
|
PySparkDataFrame: The PySpark DataFrame.
|
|
@@ -179,9 +197,9 @@ class QueryAPIDataCloudReader(BaseDataCloudReader):
|
|
|
179
197
|
self, "_sf_cli_reader", None
|
|
180
198
|
)
|
|
181
199
|
if sf_cli_reader is not None:
|
|
182
|
-
return sf_cli_reader.read_dlo(name, schema
|
|
200
|
+
return sf_cli_reader.read_dlo(name, schema) # type: ignore[no-any-return]
|
|
183
201
|
|
|
184
|
-
query =
|
|
202
|
+
query = self._build_query(name)
|
|
185
203
|
|
|
186
204
|
assert self._conn is not None
|
|
187
205
|
pandas_df = self._conn.get_pandas_dataframe(query)
|
|
@@ -197,15 +215,13 @@ class QueryAPIDataCloudReader(BaseDataCloudReader):
|
|
|
197
215
|
self,
|
|
198
216
|
name: str,
|
|
199
217
|
schema: Union[AtomicType, StructType, str, None] = None,
|
|
200
|
-
row_limit: int = 1000,
|
|
201
218
|
) -> PySparkDataFrame:
|
|
202
219
|
"""
|
|
203
|
-
Read a Data Model Object (DMO) from the Data Cloud
|
|
220
|
+
Read a Data Model Object (DMO) from the Data Cloud.
|
|
204
221
|
|
|
205
222
|
Args:
|
|
206
223
|
name (str): The name of the DMO.
|
|
207
224
|
schema (Optional[Union[AtomicType, StructType, str]]): Schema of the DMO.
|
|
208
|
-
row_limit (int): Maximum number of rows to fetch.
|
|
209
225
|
|
|
210
226
|
Returns:
|
|
211
227
|
PySparkDataFrame: The PySpark DataFrame.
|
|
@@ -214,9 +230,9 @@ class QueryAPIDataCloudReader(BaseDataCloudReader):
|
|
|
214
230
|
self, "_sf_cli_reader", None
|
|
215
231
|
)
|
|
216
232
|
if sf_cli_reader is not None:
|
|
217
|
-
return sf_cli_reader.read_dmo(name, schema
|
|
233
|
+
return sf_cli_reader.read_dmo(name, schema) # type: ignore[no-any-return]
|
|
218
234
|
|
|
219
|
-
query =
|
|
235
|
+
query = self._build_query(name)
|
|
220
236
|
|
|
221
237
|
assert self._conn is not None
|
|
222
238
|
pandas_df = self._conn.get_pandas_dataframe(query)
|
|
@@ -55,6 +55,7 @@ class SFCLIDataCloudReader(BaseDataCloudReader):
|
|
|
55
55
|
spark: SparkSession,
|
|
56
56
|
sf_cli_org: str,
|
|
57
57
|
dataspace: Optional[str] = None,
|
|
58
|
+
default_row_limit: Optional[int] = None,
|
|
58
59
|
) -> None:
|
|
59
60
|
"""Initialize SFCLIDataCloudReader.
|
|
60
61
|
|
|
@@ -64,9 +65,13 @@ class SFCLIDataCloudReader(BaseDataCloudReader):
|
|
|
64
65
|
(e.g. the alias given to ``sf org login web --alias dev1``).
|
|
65
66
|
dataspace: Optional dataspace identifier. If ``None`` or
|
|
66
67
|
``"default"`` the query runs against the default dataspace.
|
|
68
|
+
default_row_limit: Maximum number of rows to fetch automatically.
|
|
69
|
+
When ``None``, no limit is applied (all rows are returned).
|
|
70
|
+
Set via ``default_row_limit`` in ``config.yaml`` reader options.
|
|
67
71
|
"""
|
|
68
72
|
self.spark = spark
|
|
69
73
|
self.sf_cli_org = sf_cli_org
|
|
74
|
+
self._default_row_limit = default_row_limit
|
|
70
75
|
self.dataspace = (
|
|
71
76
|
dataspace if dataspace and dataspace != "default" else "default"
|
|
72
77
|
)
|
|
@@ -132,12 +137,14 @@ class SFCLIDataCloudReader(BaseDataCloudReader):
|
|
|
132
137
|
logger.debug(f"Fetched token from SF CLI for org '{self.sf_cli_org}'")
|
|
133
138
|
return access_token, instance_url
|
|
134
139
|
|
|
135
|
-
def _execute_query(self, sql: str
|
|
140
|
+
def _execute_query(self, sql: str) -> pd.DataFrame:
|
|
136
141
|
"""Execute *sql* against the Data Cloud REST endpoint.
|
|
137
142
|
|
|
143
|
+
The configured ``default_row_limit`` is automatically appended as a
|
|
144
|
+
``LIMIT`` clause when set (typically for local development).
|
|
145
|
+
|
|
138
146
|
Args:
|
|
139
147
|
sql: Base SQL query (no ``LIMIT`` clause).
|
|
140
|
-
row_limit: Maximum rows to return.
|
|
141
148
|
|
|
142
149
|
Returns:
|
|
143
150
|
Pandas DataFrame with query results.
|
|
@@ -150,7 +157,10 @@ class SFCLIDataCloudReader(BaseDataCloudReader):
|
|
|
150
157
|
url = f"{instance_url}/services/data/{API_VERSION}/ssot/query-sql"
|
|
151
158
|
headers = {"Authorization": f"Bearer {access_token}"}
|
|
152
159
|
params = {"dataspace": self.dataspace}
|
|
153
|
-
|
|
160
|
+
if self._default_row_limit is not None:
|
|
161
|
+
body = {"sql": f"{sql} LIMIT {self._default_row_limit}"}
|
|
162
|
+
else:
|
|
163
|
+
body = {"sql": sql}
|
|
154
164
|
|
|
155
165
|
logger.debug(f"Executing Data Cloud query: {body['sql']}")
|
|
156
166
|
|
|
@@ -190,19 +200,17 @@ class SFCLIDataCloudReader(BaseDataCloudReader):
|
|
|
190
200
|
self,
|
|
191
201
|
name: str,
|
|
192
202
|
schema: Union[AtomicType, StructType, str, None] = None,
|
|
193
|
-
row_limit: int = 1000,
|
|
194
203
|
) -> PySparkDataFrame:
|
|
195
204
|
"""Read a Data Lake Object (DLO) from Data Cloud.
|
|
196
205
|
|
|
197
206
|
Args:
|
|
198
207
|
name: DLO name.
|
|
199
208
|
schema: Optional explicit schema.
|
|
200
|
-
row_limit: Maximum rows to fetch.
|
|
201
209
|
|
|
202
210
|
Returns:
|
|
203
211
|
PySpark DataFrame.
|
|
204
212
|
"""
|
|
205
|
-
pandas_df = self._execute_query(f"SELECT * FROM {name}"
|
|
213
|
+
pandas_df = self._execute_query(f"SELECT * FROM {name}")
|
|
206
214
|
if not schema:
|
|
207
215
|
schema = _pandas_to_spark_schema(pandas_df)
|
|
208
216
|
return self.spark.createDataFrame(pandas_df, schema)
|
|
@@ -211,19 +219,17 @@ class SFCLIDataCloudReader(BaseDataCloudReader):
|
|
|
211
219
|
self,
|
|
212
220
|
name: str,
|
|
213
221
|
schema: Union[AtomicType, StructType, str, None] = None,
|
|
214
|
-
row_limit: int = 1000,
|
|
215
222
|
) -> PySparkDataFrame:
|
|
216
223
|
"""Read a Data Model Object (DMO) from Data Cloud.
|
|
217
224
|
|
|
218
225
|
Args:
|
|
219
226
|
name: DMO name.
|
|
220
227
|
schema: Optional explicit schema.
|
|
221
|
-
row_limit: Maximum rows to fetch.
|
|
222
228
|
|
|
223
229
|
Returns:
|
|
224
230
|
PySpark DataFrame.
|
|
225
231
|
"""
|
|
226
|
-
pandas_df = self._execute_query(f"SELECT * FROM {name}"
|
|
232
|
+
pandas_df = self._execute_query(f"SELECT * FROM {name}")
|
|
227
233
|
if not schema:
|
|
228
234
|
schema = _pandas_to_spark_schema(pandas_df)
|
|
229
235
|
return self.spark.createDataFrame(pandas_df, schema)
|
|
@@ -90,7 +90,7 @@ class PrintDataCloudWriter(BaseDataCloudWriter):
|
|
|
90
90
|
schema.
|
|
91
91
|
"""
|
|
92
92
|
# Get DLO schema (no data, just schema)
|
|
93
|
-
dlo_df = self.reader.read_dlo(dlo_name
|
|
93
|
+
dlo_df = self.reader.read_dlo(dlo_name).limit(0)
|
|
94
94
|
dlo_columns = set(dlo_df.columns)
|
|
95
95
|
df_columns = set(dataframe.columns)
|
|
96
96
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from abc import abstractmethod
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from datacustomcode.llm_gateway.types.generate_text_request import (
|
|
22
|
+
GenerateTextRequest,
|
|
23
|
+
)
|
|
24
|
+
from datacustomcode.llm_gateway.types.generate_text_response import (
|
|
25
|
+
GenerateTextResponse,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class LLMGateway:
|
|
30
|
+
def __init__(self) -> None:
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def generate_text(self, request: GenerateTextRequest) -> GenerateTextResponse: ...
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from datacustomcode.llm_gateway.base import LLMGateway
|
|
17
|
+
from datacustomcode.llm_gateway.types.generate_text_request import GenerateTextRequest
|
|
18
|
+
from datacustomcode.llm_gateway.types.generate_text_response import GenerateTextResponse
|
|
19
|
+
from datacustomcode.llm_gateway.types.generate_text_response_builder import (
|
|
20
|
+
GenerateTextResponseBuilder,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DefaultLLMGateway(LLMGateway):
|
|
25
|
+
def generate_text(self, request: GenerateTextRequest) -> GenerateTextResponse:
|
|
26
|
+
|
|
27
|
+
response_data = {
|
|
28
|
+
"version": "v1",
|
|
29
|
+
"status_code": 200,
|
|
30
|
+
"data": {"generation": {"generatedText": "Hello World"}},
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return GenerateTextResponseBuilder.build(response_data)
|
salesforce_data_customcode-3.0.0/src/datacustomcode/llm_gateway/types/generate_text_request.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from typing import (
|
|
17
|
+
Any,
|
|
18
|
+
Dict,
|
|
19
|
+
Literal,
|
|
20
|
+
Optional,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
from pydantic import (
|
|
24
|
+
BaseModel,
|
|
25
|
+
ConfigDict,
|
|
26
|
+
Field,
|
|
27
|
+
)
|
|
28
|
+
from pydantic.alias_generators import to_camel
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class GenerateTextRequest(BaseModel):
|
|
32
|
+
|
|
33
|
+
model_config = ConfigDict(
|
|
34
|
+
alias_generator=to_camel,
|
|
35
|
+
populate_by_name=True, # Allows both snake_case and camelCase input
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
version: Literal["v1"] = Field(
|
|
39
|
+
default="v1", description="API version, must be 'v1'"
|
|
40
|
+
)
|
|
41
|
+
model_name: str = Field(..., min_length=1, description="Name of the model to use")
|
|
42
|
+
prompt: str = Field(..., description="Input prompt")
|
|
43
|
+
localization: Optional[Dict[str, Any]] = Field(
|
|
44
|
+
default=None, description="Localization settings"
|
|
45
|
+
)
|
|
46
|
+
tags: Optional[Dict[str, Any]] = Field(default=None, description="Additional tags")
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from typing import (
|
|
17
|
+
Any,
|
|
18
|
+
Dict,
|
|
19
|
+
Optional,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from datacustomcode.llm_gateway.types.generate_text_request import GenerateTextRequest
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class GenerateTextRequestBuilder:
|
|
26
|
+
def __init__(self) -> None:
|
|
27
|
+
self._prompt = ""
|
|
28
|
+
self._model_name = ""
|
|
29
|
+
self._localization: Optional[Dict[str, Any]] = None
|
|
30
|
+
self._tags: Optional[Dict[str, Any]] = None
|
|
31
|
+
|
|
32
|
+
def set_prompt(self, prompt: str) -> "GenerateTextRequestBuilder":
|
|
33
|
+
self._prompt = prompt
|
|
34
|
+
return self
|
|
35
|
+
|
|
36
|
+
def set_model(self, model_name: str) -> "GenerateTextRequestBuilder":
|
|
37
|
+
self._model_name = model_name
|
|
38
|
+
return self
|
|
39
|
+
|
|
40
|
+
def set_localization(
|
|
41
|
+
self,
|
|
42
|
+
localization: Optional[Dict[str, Any]] = None,
|
|
43
|
+
locale: Optional[str] = None,
|
|
44
|
+
) -> "GenerateTextRequestBuilder":
|
|
45
|
+
"""
|
|
46
|
+
Set localization either from a dict or a simple locale string.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
localization: Full localization dict (if provided, locale is ignored)
|
|
50
|
+
locale: Simple locale string for defaultLocale only
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
self for method chaining
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
if localization is not None:
|
|
57
|
+
self._localization = localization
|
|
58
|
+
elif locale is not None:
|
|
59
|
+
self._localization = {
|
|
60
|
+
"defaultLocale": locale,
|
|
61
|
+
"inputLocales": [{"locale": locale, "probability": 1.0}],
|
|
62
|
+
"expectedLocales": [locale],
|
|
63
|
+
}
|
|
64
|
+
else:
|
|
65
|
+
raise ValueError("Must provide either localization or locale")
|
|
66
|
+
|
|
67
|
+
return self
|
|
68
|
+
|
|
69
|
+
def set_tags(self, tags: Dict[str, Any]) -> "GenerateTextRequestBuilder":
|
|
70
|
+
self._tags = tags
|
|
71
|
+
return self
|
|
72
|
+
|
|
73
|
+
def build(self) -> GenerateTextRequest:
|
|
74
|
+
|
|
75
|
+
request = GenerateTextRequest(
|
|
76
|
+
prompt=self._prompt,
|
|
77
|
+
model_name=self._model_name,
|
|
78
|
+
localization=self._localization,
|
|
79
|
+
tags=self._tags,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return request
|
salesforce_data_customcode-3.0.0/src/datacustomcode/llm_gateway/types/generate_text_response.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from typing import (
|
|
17
|
+
Any,
|
|
18
|
+
Dict,
|
|
19
|
+
Optional,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from pydantic import BaseModel, Field
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class GenerateTextResponse(BaseModel):
|
|
26
|
+
"""Response from LLM text generation"""
|
|
27
|
+
|
|
28
|
+
version: str = Field(default="v1", description="API version")
|
|
29
|
+
status_code: int = Field(..., description="HTTP status code", ge=0)
|
|
30
|
+
data: Optional[Dict[str, Any]] = Field(default=None, description="Response data")
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def is_success(self) -> bool:
|
|
34
|
+
"""Check if request succeeded."""
|
|
35
|
+
return self.status_code == 200
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def is_error(self) -> bool:
|
|
39
|
+
"""Check if request failed."""
|
|
40
|
+
return not self.is_success
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def text(self) -> str:
|
|
44
|
+
"""Generated text (convenience property)."""
|
|
45
|
+
if self.is_success and self.data:
|
|
46
|
+
generation = self.data.get("generation", {})
|
|
47
|
+
if isinstance(generation, dict):
|
|
48
|
+
text = generation.get("generatedText", "")
|
|
49
|
+
return str(text) if text else ""
|
|
50
|
+
return ""
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def error_code(self) -> str:
|
|
54
|
+
"""Generated text (convenience property)."""
|
|
55
|
+
if self.is_error and self.data:
|
|
56
|
+
error_code = self.data.get("errorCode", str(self.status_code))
|
|
57
|
+
return str(error_code)
|
|
58
|
+
return ""
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from typing import Any, Dict
|
|
17
|
+
|
|
18
|
+
from datacustomcode.llm_gateway.types.generate_text_response import GenerateTextResponse
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GenerateTextResponseBuilder:
|
|
22
|
+
def __init__(self):
|
|
23
|
+
self._version = "v1" # Hardcoded default for your SDK
|
|
24
|
+
self._status_code = None
|
|
25
|
+
self._data = None
|
|
26
|
+
|
|
27
|
+
def set_status_code(self, status_code: int):
|
|
28
|
+
self._status_code = status_code
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def set_data(self, data: dict):
|
|
32
|
+
self._data = data
|
|
33
|
+
return self
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def build(response_dict: Dict[str, Any]) -> GenerateTextResponse:
|
|
37
|
+
return GenerateTextResponse.model_validate(response_dict)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
@@ -2,6 +2,11 @@ import logging
|
|
|
2
2
|
from typing import List
|
|
3
3
|
from uuid import uuid4
|
|
4
4
|
|
|
5
|
+
from datacustomcode.function import Runtime
|
|
6
|
+
from datacustomcode.llm_gateway.types.generate_text_request_builder import (
|
|
7
|
+
GenerateTextRequestBuilder,
|
|
8
|
+
)
|
|
9
|
+
|
|
5
10
|
logger = logging.getLogger(__name__)
|
|
6
11
|
|
|
7
12
|
|
|
@@ -33,7 +38,7 @@ def chunk_text(text: str, chunk_size: int = 1000) -> List[str]:
|
|
|
33
38
|
return chunks
|
|
34
39
|
|
|
35
40
|
|
|
36
|
-
def function(request: dict) -> dict:
|
|
41
|
+
def function(request: dict, runtime: Runtime) -> dict:
|
|
37
42
|
logger.info("Inside Function")
|
|
38
43
|
logger.info(request)
|
|
39
44
|
|
|
@@ -41,6 +46,15 @@ def function(request: dict) -> dict:
|
|
|
41
46
|
output_chunks = []
|
|
42
47
|
current_seq_no = 1 # Start sequence number from 1
|
|
43
48
|
|
|
49
|
+
builder = GenerateTextRequestBuilder()
|
|
50
|
+
llm_request = builder.set_prompt("Hello").set_model("modelName").build()
|
|
51
|
+
llm_response = runtime.llm_gateway.generate_text(llm_request)
|
|
52
|
+
|
|
53
|
+
if llm_response.is_success:
|
|
54
|
+
print(llm_response.text)
|
|
55
|
+
else:
|
|
56
|
+
print(llm_response.error_code)
|
|
57
|
+
|
|
44
58
|
for item in items:
|
|
45
59
|
# Item is DocElement as dict
|
|
46
60
|
logger.info(f"Processing item: {item}")
|
|
@@ -107,7 +121,7 @@ if __name__ == "__main__":
|
|
|
107
121
|
}
|
|
108
122
|
|
|
109
123
|
# Run the function
|
|
110
|
-
result = function(test_request)
|
|
124
|
+
result = function(test_request, Runtime())
|
|
111
125
|
|
|
112
126
|
# Print the results in a more readable format
|
|
113
127
|
print("\nChunking Results:")
|
|
File without changes
|
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/__init__.py
RENAMED
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/auth.py
RENAMED
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/cli.py
RENAMED
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/cmd.py
RENAMED
|
File without changes
|
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/deploy.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/io/base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/mixin.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/py.typed
RENAMED
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/run.py
RENAMED
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/scan.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/template.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{salesforce_data_customcode-1.1.0 → salesforce_data_customcode-3.0.0}/src/datacustomcode/version.py
RENAMED
|
File without changes
|