salesforce-data-customcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacustomcode/__init__.py +20 -0
- datacustomcode/cli.py +142 -0
- datacustomcode/client.py +227 -0
- datacustomcode/cmd.py +105 -0
- datacustomcode/config.py +149 -0
- datacustomcode/config.yaml +15 -0
- datacustomcode/credentials.py +97 -0
- datacustomcode/deploy.py +379 -0
- datacustomcode/io/__init__.py +14 -0
- datacustomcode/io/base.py +28 -0
- datacustomcode/io/reader/__init__.py +14 -0
- datacustomcode/io/reader/base.py +34 -0
- datacustomcode/io/reader/query_api.py +115 -0
- datacustomcode/io/writer/__init__.py +14 -0
- datacustomcode/io/writer/base.py +49 -0
- datacustomcode/io/writer/csv.py +41 -0
- datacustomcode/io/writer/print.py +33 -0
- datacustomcode/mixin.py +94 -0
- datacustomcode/py.typed +0 -0
- datacustomcode/run.py +47 -0
- datacustomcode/scan.py +153 -0
- datacustomcode/template.py +36 -0
- datacustomcode/templates/.devcontainer/devcontainer.json +10 -0
- datacustomcode/templates/Dockerfile +20 -0
- datacustomcode/templates/README.md +0 -0
- datacustomcode/templates/jupyterlab.sh +97 -0
- datacustomcode/templates/payload/config.json +1 -0
- datacustomcode/templates/payload/entrypoint.py +10 -0
- datacustomcode/templates/requirements-dev.txt +10 -0
- datacustomcode/templates/requirements.txt +1 -0
- salesforce_data_customcode-0.1.0.dist-info/LICENSE.txt +206 -0
- salesforce_data_customcode-0.1.0.dist-info/METADATA +159 -0
- salesforce_data_customcode-0.1.0.dist-info/RECORD +35 -0
- salesforce_data_customcode-0.1.0.dist-info/WHEEL +4 -0
- salesforce_data_customcode-0.1.0.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import configparser
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
import os
|
|
20
|
+
|
|
21
|
+
from loguru import logger
|
|
22
|
+
|
|
23
|
+
ENV_CREDENTIALS = {
|
|
24
|
+
"username": "SFDC_USERNAME",
|
|
25
|
+
"password": "SFDC_PASSWORD",
|
|
26
|
+
"client_id": "SFDC_CLIENT_ID",
|
|
27
|
+
"client_secret": "SFDC_CLIENT_SECRET",
|
|
28
|
+
"login_url": "SFDC_LOGIN_URL",
|
|
29
|
+
}
|
|
30
|
+
INI_FILE = os.path.expanduser("~/.datacustomcode/credentials.ini")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class Credentials:
|
|
35
|
+
username: str
|
|
36
|
+
password: str
|
|
37
|
+
client_id: str
|
|
38
|
+
client_secret: str
|
|
39
|
+
login_url: str
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def from_ini(
|
|
43
|
+
cls,
|
|
44
|
+
profile: str = "default",
|
|
45
|
+
ini_file: str = INI_FILE,
|
|
46
|
+
) -> Credentials:
|
|
47
|
+
config = configparser.ConfigParser()
|
|
48
|
+
logger.debug(f"Reading {ini_file} for profile {profile}")
|
|
49
|
+
config.read(ini_file)
|
|
50
|
+
return cls(
|
|
51
|
+
username=config[profile]["username"],
|
|
52
|
+
password=config[profile]["password"],
|
|
53
|
+
client_id=config[profile]["client_id"],
|
|
54
|
+
client_secret=config[profile]["client_secret"],
|
|
55
|
+
login_url=config[profile]["login_url"],
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_env(cls) -> Credentials:
|
|
60
|
+
try:
|
|
61
|
+
return cls(**{k: os.environ[v] for k, v in ENV_CREDENTIALS.items()})
|
|
62
|
+
except KeyError as exc:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"All of {ENV_CREDENTIALS.values()} must be set in environment."
|
|
65
|
+
) from exc
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def from_available(cls) -> Credentials:
|
|
69
|
+
if os.environ.get("SFDC_USERNAME"):
|
|
70
|
+
return cls.from_env()
|
|
71
|
+
if os.path.exists(INI_FILE):
|
|
72
|
+
return cls.from_ini()
|
|
73
|
+
raise ValueError(
|
|
74
|
+
"Credentials not found in env or ini file. "
|
|
75
|
+
"Run `datacustomcode configure` to create a credentials file."
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def update_ini(self, profile: str = "default", ini_file: str = INI_FILE):
|
|
79
|
+
config = configparser.ConfigParser()
|
|
80
|
+
|
|
81
|
+
expanded_ini_file = os.path.expanduser(ini_file)
|
|
82
|
+
os.makedirs(os.path.dirname(expanded_ini_file), exist_ok=True)
|
|
83
|
+
|
|
84
|
+
if os.path.exists(expanded_ini_file):
|
|
85
|
+
config.read(expanded_ini_file)
|
|
86
|
+
|
|
87
|
+
if profile not in config:
|
|
88
|
+
config[profile] = {}
|
|
89
|
+
|
|
90
|
+
config[profile]["username"] = self.username
|
|
91
|
+
config[profile]["password"] = self.password
|
|
92
|
+
config[profile]["client_id"] = self.client_id
|
|
93
|
+
config[profile]["client_secret"] = self.client_secret
|
|
94
|
+
config[profile]["login_url"] = self.login_url
|
|
95
|
+
|
|
96
|
+
with open(expanded_ini_file, "w") as f:
|
|
97
|
+
config.write(f)
|
datacustomcode/deploy.py
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from html import unescape
|
|
18
|
+
import json
|
|
19
|
+
import os
|
|
20
|
+
import shutil
|
|
21
|
+
import tarfile
|
|
22
|
+
import tempfile
|
|
23
|
+
import time
|
|
24
|
+
from typing import (
|
|
25
|
+
TYPE_CHECKING,
|
|
26
|
+
Any,
|
|
27
|
+
Callable,
|
|
28
|
+
Dict,
|
|
29
|
+
List,
|
|
30
|
+
Union,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
from loguru import logger
|
|
34
|
+
from pydantic import BaseModel
|
|
35
|
+
import requests
|
|
36
|
+
|
|
37
|
+
from datacustomcode.cmd import cmd_output
|
|
38
|
+
from datacustomcode.scan import scan_file
|
|
39
|
+
|
|
40
|
+
if TYPE_CHECKING:
|
|
41
|
+
from datacustomcode.credentials import Credentials
|
|
42
|
+
|
|
43
|
+
DATA_CUSTOM_CODE_PATH = "services/data/v63.0/ssot/data-custom-code"
|
|
44
|
+
DATA_TRANSFORMS_PATH = "services/data/v63.0/ssot/data-transforms"
|
|
45
|
+
AUTH_PATH = "services/oauth2/token"
|
|
46
|
+
WAIT_FOR_DEPLOYMENT_TIMEOUT = 3000
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class TransformationJobMetadata(BaseModel):
|
|
50
|
+
name: str
|
|
51
|
+
version: str
|
|
52
|
+
description: str
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _join_strip_url(*args: str) -> str:
|
|
56
|
+
return "/".join(arg.strip("/") for arg in args)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
JSONValue = Union[
|
|
60
|
+
Dict[str, "JSONValue"], List["JSONValue"], str, int, float, bool, None
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _make_api_call(
|
|
65
|
+
url: str,
|
|
66
|
+
method: str,
|
|
67
|
+
headers: Union[dict, None] = None,
|
|
68
|
+
token: Union[str, None] = None,
|
|
69
|
+
**kwargs,
|
|
70
|
+
) -> dict[str, JSONValue]:
|
|
71
|
+
"""Make a request to Data Cloud Custom Code API."""
|
|
72
|
+
headers = headers or {}
|
|
73
|
+
if token:
|
|
74
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
75
|
+
|
|
76
|
+
logger.debug(f"Making API call: {method} {url}")
|
|
77
|
+
logger.debug(f"Headers: {headers}")
|
|
78
|
+
logger.debug(f"Request params: {kwargs}")
|
|
79
|
+
|
|
80
|
+
response = requests.request(method=method, url=url, headers=headers, **kwargs)
|
|
81
|
+
response.raise_for_status()
|
|
82
|
+
json_response = response.json()
|
|
83
|
+
assert isinstance(
|
|
84
|
+
json_response, dict
|
|
85
|
+
), f"Unexpected response type: {type(json_response)}"
|
|
86
|
+
return json_response
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class AccessTokenResponse(BaseModel):
|
|
90
|
+
access_token: str
|
|
91
|
+
instance_url: str
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _retrieve_access_token(credentials: Credentials) -> AccessTokenResponse:
|
|
95
|
+
"""Get a token for the Salesforce API."""
|
|
96
|
+
logger.debug("Getting oauth token...")
|
|
97
|
+
|
|
98
|
+
url = f"{credentials.login_url.rstrip('/')}/{AUTH_PATH.lstrip('/')}"
|
|
99
|
+
|
|
100
|
+
data = {
|
|
101
|
+
"grant_type": "password",
|
|
102
|
+
"username": credentials.username,
|
|
103
|
+
"password": credentials.password,
|
|
104
|
+
"client_id": credentials.client_id,
|
|
105
|
+
"client_secret": credentials.client_secret,
|
|
106
|
+
}
|
|
107
|
+
response = _make_api_call(url, "POST", data=data)
|
|
108
|
+
return AccessTokenResponse(**response)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class CreateDeploymentResponse(BaseModel):
|
|
112
|
+
fileUploadUrl: str
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def create_deployment(
|
|
116
|
+
access_token: AccessTokenResponse, metadata: TransformationJobMetadata
|
|
117
|
+
) -> CreateDeploymentResponse:
|
|
118
|
+
"""Create a custom code deployment in the DataCloud."""
|
|
119
|
+
url = _join_strip_url(access_token.instance_url, DATA_CUSTOM_CODE_PATH)
|
|
120
|
+
body = {
|
|
121
|
+
"label": metadata.name,
|
|
122
|
+
"name": metadata.name,
|
|
123
|
+
"description": metadata.description,
|
|
124
|
+
"version": metadata.version,
|
|
125
|
+
"computeType": "CPU_M",
|
|
126
|
+
}
|
|
127
|
+
logger.debug(f"Creating deployment {metadata.name}...")
|
|
128
|
+
try:
|
|
129
|
+
response = _make_api_call(
|
|
130
|
+
url, "POST", token=access_token.access_token, json=body
|
|
131
|
+
)
|
|
132
|
+
return CreateDeploymentResponse(**response)
|
|
133
|
+
except requests.HTTPError as exc:
|
|
134
|
+
if exc.response.status_code == 409:
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"Deployment {metadata.name} exists. Please use a different name."
|
|
137
|
+
) from exc
|
|
138
|
+
raise
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
DOCKER_IMAGE_NAME = "datacloud-custom-code"
|
|
142
|
+
DEPENDENCIES_ARCHIVE_NAME = "dependencies.tar.gz"
|
|
143
|
+
ZIP_FILE_NAME = "deployment.zip"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def prepare_dependency_archive(directory: str) -> None:
|
|
147
|
+
cmd = f"docker images -q {DOCKER_IMAGE_NAME}"
|
|
148
|
+
image_exists = cmd_output(cmd)
|
|
149
|
+
|
|
150
|
+
if not image_exists:
|
|
151
|
+
logger.debug("Building docker image...")
|
|
152
|
+
cmd = f"docker build -t {DOCKER_IMAGE_NAME} ."
|
|
153
|
+
cmd_output(cmd)
|
|
154
|
+
|
|
155
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
156
|
+
shutil.copy("requirements.txt", temp_dir)
|
|
157
|
+
cmd = (
|
|
158
|
+
f"docker run --rm "
|
|
159
|
+
f"-v {temp_dir}:/dependencies "
|
|
160
|
+
f"{DOCKER_IMAGE_NAME} "
|
|
161
|
+
f'/bin/bash -c "cd /dependencies && pip download -r requirements.txt"'
|
|
162
|
+
)
|
|
163
|
+
cmd_output(cmd)
|
|
164
|
+
|
|
165
|
+
archives_dir = os.path.join(directory, "archives")
|
|
166
|
+
os.makedirs(archives_dir, exist_ok=True)
|
|
167
|
+
archive_file = os.path.join(archives_dir, DEPENDENCIES_ARCHIVE_NAME)
|
|
168
|
+
with tarfile.open(archive_file, "w:gz") as tar:
|
|
169
|
+
for file in os.listdir(temp_dir):
|
|
170
|
+
tar.add(os.path.join(temp_dir, file), arcname=file)
|
|
171
|
+
|
|
172
|
+
logger.debug(f"Dependencies downloaded and archived to {archive_file}")
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def zip_and_upload_directory(directory: str, file_upload_url: str) -> None:
|
|
176
|
+
file_upload_url = unescape(file_upload_url)
|
|
177
|
+
|
|
178
|
+
logger.debug(f"Zipping directory... {directory}")
|
|
179
|
+
shutil.make_archive(ZIP_FILE_NAME.rstrip(".zip"), "zip", directory)
|
|
180
|
+
|
|
181
|
+
logger.debug(f"Uploading deployment to {file_upload_url}")
|
|
182
|
+
with open(ZIP_FILE_NAME, "rb") as zip_file:
|
|
183
|
+
response = requests.put(
|
|
184
|
+
file_upload_url, data=zip_file, headers={"Content-Type": "application/zip"}
|
|
185
|
+
)
|
|
186
|
+
response.raise_for_status()
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class DeploymentsResponse(BaseModel):
|
|
190
|
+
deploymentStatus: str
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def get_deployments(
|
|
194
|
+
access_token: AccessTokenResponse, metadata: TransformationJobMetadata
|
|
195
|
+
) -> DeploymentsResponse:
|
|
196
|
+
"""Get all custom code deployments from the DataCloud."""
|
|
197
|
+
url = _join_strip_url(
|
|
198
|
+
access_token.instance_url, DATA_CUSTOM_CODE_PATH, metadata.name
|
|
199
|
+
)
|
|
200
|
+
response = _make_api_call(url, "GET", token=access_token.access_token)
|
|
201
|
+
return DeploymentsResponse(**response)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def wait_for_deployment(
|
|
205
|
+
access_token: AccessTokenResponse,
|
|
206
|
+
metadata: TransformationJobMetadata,
|
|
207
|
+
callback: Union[Callable[[str], None], None] = None,
|
|
208
|
+
) -> None:
|
|
209
|
+
"""Wait for deployment to complete.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
callback: Optional callback function that receives the deployment status
|
|
213
|
+
"""
|
|
214
|
+
start_time = time.time()
|
|
215
|
+
logger.debug("Waiting for deployment to complete")
|
|
216
|
+
|
|
217
|
+
while True:
|
|
218
|
+
deployment_status = get_deployments(access_token, metadata)
|
|
219
|
+
status = deployment_status.deploymentStatus
|
|
220
|
+
if (time.time() - start_time) > WAIT_FOR_DEPLOYMENT_TIMEOUT:
|
|
221
|
+
raise TimeoutError("Deployment timed out.")
|
|
222
|
+
|
|
223
|
+
if callback:
|
|
224
|
+
callback(status)
|
|
225
|
+
if status == "Deployed":
|
|
226
|
+
logger.debug(
|
|
227
|
+
"Deployment completed, Elapsed time: {time.time() - start_time}"
|
|
228
|
+
)
|
|
229
|
+
break
|
|
230
|
+
time.sleep(1)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
DATA_TRANSFORM_REQUEST_TEMPLATE: dict[str, Any] = {
|
|
234
|
+
"metadata": {
|
|
235
|
+
"dbt_schema_version": "https://schemas.getdbt.com/dbt/manifest/v8.json",
|
|
236
|
+
"dbt_version": "1.4.6",
|
|
237
|
+
"generated_at": "2023-04-25T18:54:11.375589Z",
|
|
238
|
+
"invocation_id": "d6c68c69-533a-4d54-861e-1493d6cd8092",
|
|
239
|
+
"env": {},
|
|
240
|
+
"project_id": "jaffle_shop",
|
|
241
|
+
"user_id": "1ca8403c-a1a5-43af-8b88-9265e948b9d2",
|
|
242
|
+
"send_anonymous_usage_stats": True,
|
|
243
|
+
"adapter_type": "spark",
|
|
244
|
+
},
|
|
245
|
+
"nodes": {
|
|
246
|
+
"model.dcexample.dim_listings_w_hosts": {
|
|
247
|
+
"name": "dim_listings_w_hosts",
|
|
248
|
+
"resource_type": "model",
|
|
249
|
+
"relation_name": "{OUTPUT_DLO}",
|
|
250
|
+
"config": {"materialized": "table"},
|
|
251
|
+
"compiled_code": "",
|
|
252
|
+
"depends_on": {"nodes": []},
|
|
253
|
+
}
|
|
254
|
+
},
|
|
255
|
+
"sources": {
|
|
256
|
+
"source.dcexample.listings": {
|
|
257
|
+
"name": "listings",
|
|
258
|
+
"resource_type": "source",
|
|
259
|
+
"relation_name": "{INPUT_DLO}",
|
|
260
|
+
"identifier": "{INPUT_DLO}",
|
|
261
|
+
}
|
|
262
|
+
},
|
|
263
|
+
"macros": {
|
|
264
|
+
"macro.dcexample.byoc": {
|
|
265
|
+
"name": "byoc_example",
|
|
266
|
+
"resource_type": "macro",
|
|
267
|
+
"path": "",
|
|
268
|
+
"original_file_path": "",
|
|
269
|
+
"unique_id": "unique id",
|
|
270
|
+
"macro_sql": "",
|
|
271
|
+
"supported_languages": None,
|
|
272
|
+
"arguments": [{"name": "{SCRIPT_NAME}", "type": "BYOC_SCRIPT"}],
|
|
273
|
+
}
|
|
274
|
+
},
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class DataTransformConfig(BaseModel):
|
|
279
|
+
input: Union[str, list[str]]
|
|
280
|
+
output: Union[str, list[str]]
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
DATA_TRANSFORM_CONFIG_TEMPLATE: dict[str, Any] = {
|
|
284
|
+
"entryPoint": "entrypoint.py",
|
|
285
|
+
"dataspace": "default",
|
|
286
|
+
"permissions": {"read": {"dlo": ""}, "write": {"dlo": ""}},
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def get_data_transform_config(directory: str) -> DataTransformConfig:
|
|
291
|
+
"""Get the data transform config from the entrypoint.py file."""
|
|
292
|
+
entrypoint_file = os.path.join(directory, "entrypoint.py")
|
|
293
|
+
data_access_layer_calls = scan_file(entrypoint_file)
|
|
294
|
+
input_ = data_access_layer_calls.input_str
|
|
295
|
+
output = data_access_layer_calls.output_str
|
|
296
|
+
return DataTransformConfig(input=input_, output=output)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def create_data_transform_config(directory: str) -> None:
|
|
300
|
+
"""Create a data transform config.json file in the directory."""
|
|
301
|
+
data_transform_config = get_data_transform_config(directory)
|
|
302
|
+
request_hydrated = DATA_TRANSFORM_CONFIG_TEMPLATE.copy()
|
|
303
|
+
request_hydrated["permissions"]["read"]["dlo"] = data_transform_config.input
|
|
304
|
+
request_hydrated["permissions"]["write"]["dlo"] = data_transform_config.output
|
|
305
|
+
logger.debug(f"Creating data transform config in {directory}")
|
|
306
|
+
json.dump(
|
|
307
|
+
request_hydrated, open(os.path.join(directory, "config.json"), "w"), indent=4
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def create_data_transform(
|
|
312
|
+
directory: str,
|
|
313
|
+
access_token: AccessTokenResponse,
|
|
314
|
+
metadata: TransformationJobMetadata,
|
|
315
|
+
) -> dict:
|
|
316
|
+
"""Create a data transform in the DataCloud."""
|
|
317
|
+
script_name = metadata.name
|
|
318
|
+
data_transform_config = get_data_transform_config(directory)
|
|
319
|
+
request_hydrated = DATA_TRANSFORM_REQUEST_TEMPLATE.copy()
|
|
320
|
+
request_hydrated["nodes"]["model.dcexample.dim_listings_w_hosts"][
|
|
321
|
+
"relation_name"
|
|
322
|
+
] = data_transform_config.input
|
|
323
|
+
request_hydrated["sources"]["source.dcexample.listings"][
|
|
324
|
+
"relation_name"
|
|
325
|
+
] = data_transform_config.output
|
|
326
|
+
request_hydrated["sources"]["source.dcexample.listings"][
|
|
327
|
+
"identifier"
|
|
328
|
+
] = data_transform_config.output
|
|
329
|
+
request_hydrated["macros"]["macro.dcexample.byoc"]["arguments"][0][
|
|
330
|
+
"name"
|
|
331
|
+
] = script_name
|
|
332
|
+
|
|
333
|
+
body = {
|
|
334
|
+
"definition": {
|
|
335
|
+
"type": "DBT",
|
|
336
|
+
"manifest": request_hydrated,
|
|
337
|
+
"version": "56.0",
|
|
338
|
+
},
|
|
339
|
+
"label": f"{metadata.name}",
|
|
340
|
+
"name": f"{metadata.name}",
|
|
341
|
+
"type": "BATCH",
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
url = _join_strip_url(access_token.instance_url, DATA_TRANSFORMS_PATH)
|
|
345
|
+
response = _make_api_call(url, "POST", token=access_token.access_token, json=body)
|
|
346
|
+
return response
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def deploy_full(
|
|
350
|
+
directory: str,
|
|
351
|
+
metadata: TransformationJobMetadata,
|
|
352
|
+
credentials: Credentials,
|
|
353
|
+
callback=None,
|
|
354
|
+
) -> AccessTokenResponse:
|
|
355
|
+
"""Deploy a data transform in the DataCloud."""
|
|
356
|
+
access_token = _retrieve_access_token(credentials)
|
|
357
|
+
|
|
358
|
+
# prepare payload
|
|
359
|
+
prepare_dependency_archive(directory)
|
|
360
|
+
create_data_transform_config(directory)
|
|
361
|
+
|
|
362
|
+
# create deployment and upload payload
|
|
363
|
+
deployment = create_deployment(access_token, metadata)
|
|
364
|
+
zip_and_upload_directory(directory, deployment.fileUploadUrl)
|
|
365
|
+
wait_for_deployment(access_token, metadata, callback)
|
|
366
|
+
|
|
367
|
+
# create data transform
|
|
368
|
+
create_data_transform(directory, access_token, metadata)
|
|
369
|
+
return access_token
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def run_data_transform(
|
|
373
|
+
access_token: AccessTokenResponse, metadata: TransformationJobMetadata
|
|
374
|
+
) -> dict:
|
|
375
|
+
logger.debug(f"Triggering data transform {metadata.name}")
|
|
376
|
+
url = _join_strip_url(
|
|
377
|
+
access_token.instance_url, DATA_TRANSFORMS_PATH, metadata.name, "actions", "run"
|
|
378
|
+
)
|
|
379
|
+
return _make_api_call(url, "POST")
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from abc import ABC
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
from datacustomcode.mixin import UserExtendableNamedConfigMixin
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from pyspark.sql import SparkSession
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BaseDataAccessLayer(ABC, UserExtendableNamedConfigMixin):
|
|
27
|
+
def __init__(self, spark: SparkSession):
|
|
28
|
+
self.spark = spark
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from abc import abstractmethod
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
from datacustomcode.io.base import BaseDataAccessLayer
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BaseDataCloudReader(BaseDataAccessLayer):
|
|
27
|
+
def __init__(self, spark: SparkSession):
|
|
28
|
+
self.spark = spark
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def read_dlo(self, name: str) -> PySparkDataFrame: ...
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def read_dmo(self, name: str) -> PySparkDataFrame: ...
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from typing import (
|
|
19
|
+
TYPE_CHECKING,
|
|
20
|
+
Final,
|
|
21
|
+
Union,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
from pyspark.sql.types import (
|
|
25
|
+
BooleanType,
|
|
26
|
+
DoubleType,
|
|
27
|
+
LongType,
|
|
28
|
+
StringType,
|
|
29
|
+
StructField,
|
|
30
|
+
StructType,
|
|
31
|
+
TimestampType,
|
|
32
|
+
)
|
|
33
|
+
from salesforcecdpconnector.connection import SalesforceCDPConnection
|
|
34
|
+
|
|
35
|
+
from datacustomcode.credentials import Credentials
|
|
36
|
+
from datacustomcode.io.reader.base import BaseDataCloudReader
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
import pandas
|
|
40
|
+
from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
|
|
41
|
+
from pyspark.sql.types import AtomicType
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
SQL_QUERY_TEMPLATE: Final = "SELECT * FROM {}"
|
|
47
|
+
PANDAS_TYPE_MAPPING = {
|
|
48
|
+
"object": StringType(),
|
|
49
|
+
"int64": LongType(),
|
|
50
|
+
"float64": DoubleType(),
|
|
51
|
+
"datetime64[ns]": TimestampType(),
|
|
52
|
+
"datetime64[ns, UTC]": TimestampType(),
|
|
53
|
+
"bool": BooleanType(),
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _pandas_to_spark_schema(
|
|
58
|
+
pandas_df: pandas.DataFrame, nullable: bool = True
|
|
59
|
+
) -> StructType:
|
|
60
|
+
fields = []
|
|
61
|
+
for column, dtype in pandas_df.dtypes.items():
|
|
62
|
+
spark_type = PANDAS_TYPE_MAPPING.get(str(dtype), StringType())
|
|
63
|
+
fields.append(StructField(column, spark_type, nullable))
|
|
64
|
+
return StructType(fields)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class QueryAPIDataCloudReader(BaseDataCloudReader):
|
|
68
|
+
"""DataCloud reader using Query API.
|
|
69
|
+
|
|
70
|
+
This reader emulates data access within Data Cloud by calling the Query API.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
CONFIG_NAME = "QueryAPIDataCloudReader"
|
|
74
|
+
|
|
75
|
+
def __init__(self, spark: SparkSession) -> None:
|
|
76
|
+
self.spark = spark
|
|
77
|
+
credentials = Credentials.from_available()
|
|
78
|
+
|
|
79
|
+
self._conn = SalesforceCDPConnection(
|
|
80
|
+
credentials.login_url,
|
|
81
|
+
credentials.username,
|
|
82
|
+
credentials.password,
|
|
83
|
+
credentials.client_id,
|
|
84
|
+
credentials.client_secret,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def read_dlo(
|
|
88
|
+
self, name: str, schema: Union[AtomicType, StructType, str, None] = None
|
|
89
|
+
) -> PySparkDataFrame:
|
|
90
|
+
"""
|
|
91
|
+
Read a Data Lake Object (DLO) from the Data Cloud.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
name (str): The name of the DLO.
|
|
95
|
+
schema (Optional[Union[AtomicType, StructType, str]]): Schema of the DLO.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
PySparkDataFrame: The PySpark DataFrame.
|
|
99
|
+
"""
|
|
100
|
+
pandas_df = self._conn.get_pandas_dataframe(SQL_QUERY_TEMPLATE.format(name))
|
|
101
|
+
if not schema:
|
|
102
|
+
# auto infer schema
|
|
103
|
+
schema = _pandas_to_spark_schema(pandas_df)
|
|
104
|
+
spark_dataframe = self.spark.createDataFrame(pandas_df, schema)
|
|
105
|
+
return spark_dataframe
|
|
106
|
+
|
|
107
|
+
def read_dmo(
|
|
108
|
+
self, name: str, schema: Union[AtomicType, StructType, str, None] = None
|
|
109
|
+
) -> PySparkDataFrame:
|
|
110
|
+
pandas_df = self._conn.get_pandas_dataframe(SQL_QUERY_TEMPLATE.format(name))
|
|
111
|
+
if not schema:
|
|
112
|
+
# auto infer schema
|
|
113
|
+
schema = _pandas_to_spark_schema(pandas_df)
|
|
114
|
+
spark_dataframe = self.spark.createDataFrame(pandas_df, schema)
|
|
115
|
+
return spark_dataframe
|