salesforce-data-customcode 0.1.2__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/PKG-INFO +69 -7
  2. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/README.md +68 -6
  3. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/pyproject.toml +1 -1
  4. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/cli.py +21 -2
  5. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/deploy.py +70 -15
  6. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/scan.py +134 -0
  7. salesforce_data_customcode-0.1.5/src/datacustomcode/templates/account.ipynb +86 -0
  8. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/templates/jupyterlab.sh +2 -2
  9. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/LICENSE.txt +0 -0
  10. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/__init__.py +0 -0
  11. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/client.py +0 -0
  12. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/cmd.py +0 -0
  13. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/config.py +0 -0
  14. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/config.yaml +0 -0
  15. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/credentials.py +0 -0
  16. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/io/__init__.py +0 -0
  17. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/io/base.py +0 -0
  18. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/io/reader/__init__.py +0 -0
  19. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/io/reader/base.py +0 -0
  20. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/io/reader/query_api.py +0 -0
  21. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/io/writer/__init__.py +0 -0
  22. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/io/writer/base.py +0 -0
  23. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/io/writer/csv.py +0 -0
  24. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/io/writer/print.py +0 -0
  25. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/mixin.py +0 -0
  26. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/py.typed +0 -0
  27. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/run.py +0 -0
  28. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/template.py +0 -0
  29. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/templates/.devcontainer/devcontainer.json +0 -0
  30. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/templates/Dockerfile +0 -0
  31. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/templates/README.md +0 -0
  32. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/templates/payload/config.json +0 -0
  33. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/templates/payload/entrypoint.py +0 -0
  34. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/templates/requirements-dev.txt +0 -0
  35. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/templates/requirements.txt +0 -0
  36. {salesforce_data_customcode-0.1.2 → salesforce_data_customcode-0.1.5}/src/datacustomcode/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: salesforce-data-customcode
3
- Version: 0.1.2
3
+ Version: 0.1.5
4
4
  Summary: Data Cloud Custom Code SDK
5
5
  License: Apache-2.0
6
6
  Requires-Python: >=3.10,<3.12
@@ -31,6 +31,14 @@ More specifically, this codebase gives you ability to test code locally before p
31
31
 
32
32
  Use of this project with Salesforce is subject to the [TERMS OF USE](./TERMS_OF_USE.md)
33
33
 
34
+ ## Prerequisites
35
+
36
+ - Python 3.11 (If your system version is different, we recommend using [pyenv](https://github.com/pyenv/pyenv) to configure 3.11)
37
+ - [Azul Zulu OpenJDK 17.x](https://www.azul.com/downloads/?version=java-17-lts&package=jdk#zulu)
38
+ - Docker support like [Docker Desktop](https://docs.docker.com/desktop/)
39
+ - A salesforce org, with some DLOs or DMOs with data
40
+ - A [connected app](#creating-a-connected-app)
41
+
34
42
  ## Installation
35
43
  The SDK can be downloaded directly from PyPI with `pip`:
36
44
  ```
@@ -42,12 +50,16 @@ You can verify it was properly installed via CLI:
42
50
  datacustomcode version
43
51
  ```
44
52
 
45
- ## Development Setup
46
- We offer two built-in development interfaces: `devcontainers` and Jupyter, but you can set up any tool you would like manually.
53
+ ## Quick start
54
+ Ensure you have all the [prerequisites](#prerequisites) prepared on your machine.
47
55
 
48
- To get started, use the CLI to initialize a new development environment:
49
- ```
50
- datacustomcode init [DIRECTORY TO DUMP NEW REPO]
56
+ To get started, create a directory and initialize a new project with the CLI:
57
+ ```zsh
58
+ mkdir datacloud && cd datacloud
59
+ python3.11 -m venv .venv
60
+ source .venv/bin/activate
61
+ pip install salesforce-data-customcode
62
+ datacustomcode init my_package
51
63
  ```
52
64
 
53
65
  This will yield all necessary files to get started:
@@ -66,11 +78,33 @@ This will yield all necessary files to get started:
66
78
  * `Dockerfile` <span style="color:grey;font-style:italic;">(Do not update)</span> – Development container emulating the remote execution environment.
67
79
  * `requirements-dev.txt` <span style="color:grey;font-style:italic;">(Do not update)</span> – These are the dependencies for the development environment.
68
80
  * `jupyterlab.sh` <span style="color:grey;font-style:italic;">(Do not update)</span> – Helper script for setting up Jupyter.
69
- * `requirements.txt` – Here you define the requirements that you will need remotely
81
+ * `requirements.txt` – Here you define the requirements that you will need for your script.
70
82
  * `payload` – This folder will be compressed and deployed to the remote execution environment.
71
83
  * `config.json` – This config defines permissions on the back and can be generated programmatically with `scan` CLI method.
72
84
  * `entrypoint.py` – The script that defines the data transformation logic.
73
85
 
86
+ A functional entrypoint.py is provided so you can run once you've configured your connected app:
87
+ ```zsh
88
+ cd my_package
89
+ datacustomcode configure
90
+ datacustomcode run ./payload/entrypoint.py
91
+ ```
92
+
93
+ > [!IMPORTANT]
94
+ > The example entrypoint.py requires a `Account_Home__dll` DLO to be present. And in order to deploy the script (next step), the output DLO (which is `Account_Home_copy__dll` in the example entrypoint.py) also needs to exist and be in the same dataspace as `Account_Home__dll`.
95
+
96
+ After modifying the `entrypoint.py` as needed, using any dependencies you add in the `.venv` virtual environment, you can run this script in Data Cloud:
97
+ ```zsh
98
+ datacustomcode scan ./payload/entrypoint.py
99
+ datacustomcode deploy --path ./payload --name my_custom_script
100
+ ```
101
+
102
+ > [!TIP]
103
+ > The `deploy` process can take several minutes. If you'd like more feedback on the underlying process, you can add `--debug` to the command like `datacustomcode --debug deploy --path ./payload --name my_custom_script`
104
+
105
+ You can now use the Salesforce Data Cloud UI to find the created Data Transform and use the `Run Now` button to run it.
106
+ Once the Data Transform run is successful, check the DLO your script is writing to and verify the correct records were added.
107
+
74
108
  ## API
75
109
 
76
110
  You entry point script will define logic using the `Client` object which wraps data access layers.
@@ -157,3 +191,31 @@ Options:
157
191
  - `--config-file TEXT`: Path to configuration file
158
192
  - `--dependencies TEXT`: Additional dependencies (can be specified multiple times)
159
193
 
194
+ #### `datacustomcode zip`
195
+ Zip a transformation job in preparation to upload to Data Cloud.
196
+
197
+ Options:
198
+ - `--path TEXT`: Path to the code directory (default: ".")
199
+
200
+ ## Prerequisite details
201
+
202
+ ### Creating a connected app
203
+
204
+ 1. Log in to salesforce as an admin. In the top right corner, click on the gear icon and go to `Setup`
205
+ 2. In the left hand side, search for "App Manager" and select the `App Manager` underneath `Apps`
206
+ 3. Click on `New Connected App` in the upper right
207
+ 4. Fill in the required fields within the `Basic Information` section
208
+ 5. Under the `API (Enable OAuth Settings)` section:
209
+ 1. Click on the checkbox to Enable OAuth Settings.
210
+ 2. Provide a callback URL like http://localhost:55555/callback
211
+ 3. In the Selected OAuth Scopes, make sure that `refresh_token`, `api`, `cdp_query_api`, `cdp_profile_api` is selected.
212
+ 4. Click on Save to save the connected app
213
+ 6. From the detail page that opens up afterwards, click the "Manage Consumer Details" button to find your client id and client secret
214
+ 7. Go back to `Setup`, then `OAuth and OpenID Connect Settings`, and enable the "Allow OAuth Username-Password Flows" option
215
+
216
+ You now have all fields necessary for the `datacustomcode configure` command.
217
+
218
+ ## Other docs
219
+
220
+ [Troubleshooting](./docs/troubleshooting.md)
221
+
@@ -8,6 +8,14 @@ More specifically, this codebase gives you ability to test code locally before p
8
8
 
9
9
  Use of this project with Salesforce is subject to the [TERMS OF USE](./TERMS_OF_USE.md)
10
10
 
11
+ ## Prerequisites
12
+
13
+ - Python 3.11 (If your system version is different, we recommend using [pyenv](https://github.com/pyenv/pyenv) to configure 3.11)
14
+ - [Azul Zulu OpenJDK 17.x](https://www.azul.com/downloads/?version=java-17-lts&package=jdk#zulu)
15
+ - Docker support like [Docker Desktop](https://docs.docker.com/desktop/)
16
+ - A salesforce org, with some DLOs or DMOs with data
17
+ - A [connected app](#creating-a-connected-app)
18
+
11
19
  ## Installation
12
20
  The SDK can be downloaded directly from PyPI with `pip`:
13
21
  ```
@@ -19,12 +27,16 @@ You can verify it was properly installed via CLI:
19
27
  datacustomcode version
20
28
  ```
21
29
 
22
- ## Development Setup
23
- We offer two built-in development interfaces: `devcontainers` and Jupyter, but you can set up any tool you would like manually.
30
+ ## Quick start
31
+ Ensure you have all the [prerequisites](#prerequisites) prepared on your machine.
24
32
 
25
- To get started, use the CLI to initialize a new development environment:
26
- ```
27
- datacustomcode init [DIRECTORY TO DUMP NEW REPO]
33
+ To get started, create a directory and initialize a new project with the CLI:
34
+ ```zsh
35
+ mkdir datacloud && cd datacloud
36
+ python3.11 -m venv .venv
37
+ source .venv/bin/activate
38
+ pip install salesforce-data-customcode
39
+ datacustomcode init my_package
28
40
  ```
29
41
 
30
42
  This will yield all necessary files to get started:
@@ -43,11 +55,33 @@ This will yield all necessary files to get started:
43
55
  * `Dockerfile` <span style="color:grey;font-style:italic;">(Do not update)</span> – Development container emulating the remote execution environment.
44
56
  * `requirements-dev.txt` <span style="color:grey;font-style:italic;">(Do not update)</span> – These are the dependencies for the development environment.
45
57
  * `jupyterlab.sh` <span style="color:grey;font-style:italic;">(Do not update)</span> – Helper script for setting up Jupyter.
46
- * `requirements.txt` – Here you define the requirements that you will need remotely
58
+ * `requirements.txt` – Here you define the requirements that you will need for your script.
47
59
  * `payload` – This folder will be compressed and deployed to the remote execution environment.
48
60
  * `config.json` – This config defines permissions on the back and can be generated programmatically with `scan` CLI method.
49
61
  * `entrypoint.py` – The script that defines the data transformation logic.
50
62
 
63
+ A functional entrypoint.py is provided so you can run once you've configured your connected app:
64
+ ```zsh
65
+ cd my_package
66
+ datacustomcode configure
67
+ datacustomcode run ./payload/entrypoint.py
68
+ ```
69
+
70
+ > [!IMPORTANT]
71
+ > The example entrypoint.py requires a `Account_Home__dll` DLO to be present. And in order to deploy the script (next step), the output DLO (which is `Account_Home_copy__dll` in the example entrypoint.py) also needs to exist and be in the same dataspace as `Account_Home__dll`.
72
+
73
+ After modifying the `entrypoint.py` as needed, using any dependencies you add in the `.venv` virtual environment, you can run this script in Data Cloud:
74
+ ```zsh
75
+ datacustomcode scan ./payload/entrypoint.py
76
+ datacustomcode deploy --path ./payload --name my_custom_script
77
+ ```
78
+
79
+ > [!TIP]
80
+ > The `deploy` process can take several minutes. If you'd like more feedback on the underlying process, you can add `--debug` to the command like `datacustomcode --debug deploy --path ./payload --name my_custom_script`
81
+
82
+ You can now use the Salesforce Data Cloud UI to find the created Data Transform and use the `Run Now` button to run it.
83
+ Once the Data Transform run is successful, check the DLO your script is writing to and verify the correct records were added.
84
+
51
85
  ## API
52
86
 
53
87
  You entry point script will define logic using the `Client` object which wraps data access layers.
@@ -133,3 +167,31 @@ Argument:
133
167
  Options:
134
168
  - `--config-file TEXT`: Path to configuration file
135
169
  - `--dependencies TEXT`: Additional dependencies (can be specified multiple times)
170
+
171
+ #### `datacustomcode zip`
172
+ Zip a transformation job in preparation to upload to Data Cloud.
173
+
174
+ Options:
175
+ - `--path TEXT`: Path to the code directory (default: ".")
176
+
177
+ ## Prerequisite details
178
+
179
+ ### Creating a connected app
180
+
181
+ 1. Log in to salesforce as an admin. In the top right corner, click on the gear icon and go to `Setup`
182
+ 2. In the left hand side, search for "App Manager" and select the `App Manager` underneath `Apps`
183
+ 3. Click on `New Connected App` in the upper right
184
+ 4. Fill in the required fields within the `Basic Information` section
185
+ 5. Under the `API (Enable OAuth Settings)` section:
186
+ 1. Click on the checkbox to Enable OAuth Settings.
187
+ 2. Provide a callback URL like http://localhost:55555/callback
188
+ 3. In the Selected OAuth Scopes, make sure that `refresh_token`, `api`, `cdp_query_api`, `cdp_profile_api` is selected.
189
+ 4. Click on Save to save the connected app
190
+ 6. From the detail page that opens up afterwards, click the "Manage Consumer Details" button to find your client id and client secret
191
+ 7. Go back to `Setup`, then `OAuth and OpenID Connect Settings`, and enable the "Allow OAuth Username-Password Flows" option
192
+
193
+ You now have all fields necessary for the `datacustomcode configure` command.
194
+
195
+ ## Other docs
196
+
197
+ [Troubleshooting](./docs/troubleshooting.md)
@@ -18,7 +18,7 @@ license = "Apache-2.0"
18
18
  name = "salesforce-data-customcode"
19
19
  readme = "README.md"
20
20
  requires-python = ">=3.10,<3.12"
21
- version = "0.1.2"
21
+ version = "0.1.5"
22
22
 
23
23
  [tool.black]
24
24
  exclude = '''
@@ -69,6 +69,15 @@ def configure(
69
69
  ).update_ini(profile=profile)
70
70
 
71
71
 
72
+ @cli.command()
73
+ @click.argument("path", default="payload")
74
+ def zip(path: str):
75
+ from datacustomcode.deploy import zip
76
+
77
+ logger.debug("Zipping project")
78
+ zip(path)
79
+
80
+
72
81
  @cli.command()
73
82
  @click.option("--profile", default="default")
74
83
  @click.option("--path", default="payload")
@@ -127,8 +136,11 @@ def init(directory: str):
127
136
  @click.argument("filename")
128
137
  @click.option("--config")
129
138
  @click.option("--dry-run", is_flag=True)
130
- def scan(filename: str, config: str, dry_run: bool):
131
- from datacustomcode.scan import dc_config_json_from_file
139
+ @click.option(
140
+ "--no-requirements", is_flag=True, help="Skip generating requirements.txt file"
141
+ )
142
+ def scan(filename: str, config: str, dry_run: bool, no_requirements: bool):
143
+ from datacustomcode.scan import dc_config_json_from_file, write_requirements_file
132
144
 
133
145
  config_location = config or os.path.join(os.path.dirname(filename), "config.json")
134
146
  click.echo(
@@ -143,6 +155,13 @@ def scan(filename: str, config: str, dry_run: bool):
143
155
  with open(config_location, "w") as f:
144
156
  json.dump(config_json, f, indent=2)
145
157
 
158
+ if not no_requirements:
159
+ requirements_path = write_requirements_file(filename)
160
+ click.echo(
161
+ "Generated requirements file: "
162
+ + click.style(requirements_path, fg="blue", bold=True)
163
+ )
164
+
146
165
 
147
166
  @cli.command()
148
167
  @click.argument("entrypoint")
@@ -169,25 +169,14 @@ def prepare_dependency_archive(directory: str) -> None:
169
169
  archive_file = os.path.join(archives_dir, DEPENDENCIES_ARCHIVE_NAME)
170
170
  with tarfile.open(archive_file, "w:gz") as tar:
171
171
  for file in os.listdir(temp_dir):
172
+ # Exclude requirements.txt from the archive
173
+ if file == "requirements.txt":
174
+ continue
172
175
  tar.add(os.path.join(temp_dir, file), arcname=file)
173
176
 
174
177
  logger.debug(f"Dependencies downloaded and archived to {archive_file}")
175
178
 
176
179
 
177
- def zip_and_upload_directory(directory: str, file_upload_url: str) -> None:
178
- file_upload_url = unescape(file_upload_url)
179
-
180
- logger.debug(f"Zipping directory... {directory}")
181
- shutil.make_archive(ZIP_FILE_NAME.rstrip(".zip"), "zip", directory)
182
-
183
- logger.debug(f"Uploading deployment to {file_upload_url}")
184
- with open(ZIP_FILE_NAME, "rb") as zip_file:
185
- response = requests.put(
186
- file_upload_url, data=zip_file, headers={"Content-Type": "application/zip"}
187
- )
188
- response.raise_for_status()
189
-
190
-
191
180
  class DeploymentsResponse(BaseModel):
192
181
  deploymentStatus: str
193
182
 
@@ -325,6 +314,71 @@ def create_data_transform(
325
314
  return response
326
315
 
327
316
 
317
+ def has_nonempty_requirements_file(directory: str) -> bool:
318
+ """
319
+ Check if requirements.txt exists in the given directory and has at least
320
+ one non-comment line.
321
+ Args:
322
+ directory (str): The directory to check for requirements.txt.
323
+ Returns:
324
+ bool: True if requirements.txt exists and has a non-comment line,
325
+ False otherwise.
326
+ """
327
+ # Look for requirements.txt in the parent directory of the given directory
328
+ requirements_path = os.path.join(os.path.dirname(directory), "requirements.txt")
329
+
330
+ try:
331
+ if os.path.isfile(requirements_path):
332
+ with open(requirements_path, "r", encoding="utf-8") as f:
333
+ for line in f:
334
+ # Consider non-empty if any line is not a comment (ignoring
335
+ # leading whitespace)
336
+ if line.strip() and not line.lstrip().startswith("#"):
337
+ return True
338
+ except Exception as e:
339
+ logger.error(f"Error reading requirements.txt: {e}")
340
+ return False
341
+
342
+
343
+ def upload_zip(file_upload_url: str) -> None:
344
+ file_upload_url = unescape(file_upload_url)
345
+ with open(ZIP_FILE_NAME, "rb") as zip_file:
346
+ response = requests.put(
347
+ file_upload_url, data=zip_file, headers={"Content-Type": "application/zip"}
348
+ )
349
+ response.raise_for_status()
350
+
351
+
352
+ def zip(
353
+ directory: str,
354
+ ):
355
+ # Create a zip file excluding .DS_Store files
356
+ import zipfile
357
+
358
+ # prepare payload only if requirements.txt is non-empty
359
+ if has_nonempty_requirements_file(directory):
360
+ prepare_dependency_archive(directory)
361
+ else:
362
+ logger.info(
363
+ f"Skipping dependency archive: requirements.txt is missing or empty "
364
+ f"in {directory}"
365
+ )
366
+
367
+ logger.debug(f"Zipping directory... {directory}")
368
+
369
+ with zipfile.ZipFile(ZIP_FILE_NAME, "w", zipfile.ZIP_DEFLATED) as zipf:
370
+ for root, dirs, files in os.walk(directory):
371
+ # Skip .DS_Store files when adding to zip
372
+ for file in files:
373
+ if file != ".DS_Store":
374
+ file_path = os.path.join(root, file)
375
+ # Preserve relative path structure in the zip file
376
+ arcname = os.path.relpath(file_path, directory)
377
+ zipf.write(file_path, arcname)
378
+
379
+ logger.debug(f"Created zip file: {ZIP_FILE_NAME}")
380
+
381
+
328
382
  def deploy_full(
329
383
  directory: str,
330
384
  metadata: TransformationJobMetadata,
@@ -340,7 +394,8 @@ def deploy_full(
340
394
 
341
395
  # create deployment and upload payload
342
396
  deployment = create_deployment(access_token, metadata)
343
- zip_and_upload_directory(directory, deployment.fileUploadUrl)
397
+ zip(directory)
398
+ upload_zip(deployment.fileUploadUrl)
344
399
  wait_for_deployment(access_token, metadata, callback)
345
400
 
346
401
  # create data transform
@@ -15,9 +15,12 @@
15
15
  from __future__ import annotations
16
16
 
17
17
  import ast
18
+ import os
18
19
  from typing import (
19
20
  Any,
21
+ ClassVar,
20
22
  Dict,
23
+ Set,
21
24
  Union,
22
25
  )
23
26
 
@@ -131,6 +134,137 @@ class ClientMethodVisitor(ast.NodeVisitor):
131
134
  )
132
135
 
133
136
 
137
+ class ImportVisitor(ast.NodeVisitor):
138
+ """AST Visitor that extracts external package imports from Python code."""
139
+
140
+ # Standard library modules that should be excluded from requirements
141
+ STANDARD_LIBS: ClassVar[set[str]] = {
142
+ "abc",
143
+ "argparse",
144
+ "ast",
145
+ "asyncio",
146
+ "base64",
147
+ "collections",
148
+ "configparser",
149
+ "contextlib",
150
+ "copy",
151
+ "csv",
152
+ "datetime",
153
+ "enum",
154
+ "functools",
155
+ "glob",
156
+ "hashlib",
157
+ "http",
158
+ "importlib",
159
+ "inspect",
160
+ "io",
161
+ "itertools",
162
+ "json",
163
+ "logging",
164
+ "math",
165
+ "os",
166
+ "pathlib",
167
+ "pickle",
168
+ "random",
169
+ "re",
170
+ "shutil",
171
+ "site",
172
+ "socket",
173
+ "sqlite3",
174
+ "string",
175
+ "subprocess",
176
+ "sys",
177
+ "tempfile",
178
+ "threading",
179
+ "time",
180
+ "traceback",
181
+ "typing",
182
+ "uuid",
183
+ "warnings",
184
+ "xml",
185
+ "zipfile",
186
+ }
187
+
188
+ # Additional packages to exclude from requirements.txt
189
+ EXCLUDED_PACKAGES: ClassVar[set[str]] = {
190
+ "datacustomcode", # Internal package
191
+ "pyspark", # Provided by the runtime environment
192
+ }
193
+
194
+ def __init__(self) -> None:
195
+ self.imports: Set[str] = set()
196
+
197
+ def visit_Import(self, node: ast.Import) -> None:
198
+ """Visit an import statement (e.g., import os, sys)."""
199
+ for name in node.names:
200
+ # Get the top-level package name
201
+ package = name.name.split(".")[0]
202
+ if (
203
+ package not in self.STANDARD_LIBS
204
+ and package not in self.EXCLUDED_PACKAGES
205
+ and not package.startswith("_")
206
+ ):
207
+ self.imports.add(package)
208
+ self.generic_visit(node)
209
+
210
+ def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
211
+ """Visit a from-import statement (e.g., from os import path)."""
212
+ if node.module is not None:
213
+ # Get the top-level package
214
+ package = node.module.split(".")[0]
215
+ if (
216
+ package not in self.STANDARD_LIBS
217
+ and package not in self.EXCLUDED_PACKAGES
218
+ and not package.startswith("_")
219
+ ):
220
+ self.imports.add(package)
221
+ self.generic_visit(node)
222
+
223
+
224
+ def scan_file_for_imports(file_path: str) -> Set[str]:
225
+ """Scan a Python file for external package imports."""
226
+ with open(file_path, "r") as f:
227
+ code = f.read()
228
+ tree = ast.parse(code)
229
+ visitor = ImportVisitor()
230
+ visitor.visit(tree)
231
+ return visitor.imports
232
+
233
+
234
+ def write_requirements_file(file_path: str) -> str:
235
+ """
236
+ Scan a Python file for imports and write them to requirements.txt.
237
+
238
+ Args:
239
+ file_path: Path to the Python file to scan
240
+
241
+ Returns:
242
+ Path to the generated requirements.txt file
243
+ """
244
+ imports = scan_file_for_imports(file_path)
245
+
246
+ # Write requirements.txt in the parent directory of the Python file
247
+ file_dir = os.path.dirname(file_path)
248
+ parent_dir = os.path.dirname(file_dir) if file_dir else "."
249
+ requirements_path = os.path.join(parent_dir, "requirements.txt")
250
+
251
+ # If the file exists, read existing requirements and merge with new ones
252
+ existing_requirements = set()
253
+ if os.path.exists(requirements_path):
254
+ with open(requirements_path, "r") as f:
255
+ existing_requirements = {line.strip() for line in f if line.strip()}
256
+
257
+ # Merge existing requirements with newly discovered ones
258
+ all_requirements = existing_requirements.union(imports)
259
+
260
+ # Write the combined requirements
261
+ with open(requirements_path, "w") as f:
262
+ for package in sorted(all_requirements):
263
+ f.write(f"{package}\n")
264
+
265
+ return requirements_path
266
+
267
+
134
268
  def scan_file(file_path: str) -> DataAccessLayerCalls:
135
269
  """Scan a single Python file for Client read/write method calls."""
136
270
  with open(file_path, "r") as f:
@@ -0,0 +1,86 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "0",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from datacustomcode.client import Client\n",
11
+ "from datacustomcode.io.writer.base import WriteMode\n",
12
+ "from pyspark.sql.functions import col, upper"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": null,
18
+ "id": "1",
19
+ "metadata": {},
20
+ "outputs": [],
21
+ "source": [
22
+ "client = Client()\n",
23
+ "\n",
24
+ "df = client.read_dlo(\"Account_Home__dll\")"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "id": "2",
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "# Perform transformations on the DataFrame\n",
35
+ "df_upper1 = df.withColumn(\"Description__c\", upper(col(\"Description__c\")))\n",
36
+ "\n",
37
+ "# Drop specific columns related to relationships\n",
38
+ "df_upper1 = df_upper1.drop(\"KQ_ParentId__c\")\n",
39
+ "df_upper1 = df_upper1.drop(\"KQ_Id__c\")\n",
40
+ "\n",
41
+ "df_upper1.show()"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": null,
47
+ "id": "3",
48
+ "metadata": {},
49
+ "outputs": [],
50
+ "source": [
51
+ "# Save the transformed DataFrame\n",
52
+ "dlo_name = \"Account_Home_copy__dll\"\n",
53
+ "client.write_to_dlo(dlo_name, df_upper1, write_mode=WriteMode.APPEND)"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": null,
59
+ "id": "4",
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": []
63
+ }
64
+ ],
65
+ "metadata": {
66
+ "kernelspec": {
67
+ "display_name": "Python 3 (ipykernel)",
68
+ "language": "python",
69
+ "name": "python3"
70
+ },
71
+ "language_info": {
72
+ "codemirror_mode": {
73
+ "name": "ipython",
74
+ "version": 3
75
+ },
76
+ "file_extension": ".py",
77
+ "mimetype": "text/x-python",
78
+ "name": "python",
79
+ "nbconvert_exporter": "python",
80
+ "pygments_lexer": "ipython3",
81
+ "version": "3.11.11"
82
+ }
83
+ },
84
+ "nbformat": 4,
85
+ "nbformat_minor": 5
86
+ }
@@ -48,13 +48,13 @@ check_docker() {
48
48
  # Function to start Jupyter server
49
49
  start_jupyter() {
50
50
  echo "Building the docker image"
51
- docker build -t datacloud-byoc .
51
+ docker build -t datacloud-customcode .
52
52
 
53
53
  echo "Running the docker container"
54
54
  docker run -d --rm -p 8888:8888 \
55
55
  -v $(pwd):/workspace \
56
56
  --name jupyter-server \
57
- datacloud-byoc jupyter lab \
57
+ datacloud-customcode jupyter lab \
58
58
  --ip=0.0.0.0 \
59
59
  --port=8888 \
60
60
  --no-browser \