salesforce-data-customcode 0.1.25__tar.gz → 0.1.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/PKG-INFO +6 -1
  2. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/README.md +5 -0
  3. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/pyproject.toml +1 -1
  4. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/cmd.py +6 -2
  5. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/deploy.py +16 -14
  6. {salesforce_data_customcode-0.1.25/src/datacustomcode/templates/script → salesforce_data_customcode-0.1.27/src/datacustomcode/templates/function}/Dockerfile.dependencies +1 -1
  7. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/function/payload/entrypoint.py +3 -3
  8. {salesforce_data_customcode-0.1.25/src/datacustomcode/templates/script → salesforce_data_customcode-0.1.27/src/datacustomcode/templates/function}/requirements-dev.txt +0 -7
  9. salesforce_data_customcode-0.1.25/src/datacustomcode/templates/script/Dockerfile +0 -18
  10. salesforce_data_customcode-0.1.25/src/datacustomcode/templates/script/account.ipynb +0 -86
  11. salesforce_data_customcode-0.1.25/src/datacustomcode/templates/script/examples/employee_hierarchy/employee_data.csv +0 -13
  12. salesforce_data_customcode-0.1.25/src/datacustomcode/templates/script/examples/employee_hierarchy/entrypoint.py +0 -78
  13. salesforce_data_customcode-0.1.25/src/datacustomcode/templates/script/jupyterlab.sh +0 -97
  14. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/LICENSE.txt +0 -0
  15. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/__init__.py +0 -0
  16. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/auth.py +0 -0
  17. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/cli.py +0 -0
  18. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/client.py +0 -0
  19. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/config.py +0 -0
  20. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/config.yaml +0 -0
  21. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/credentials.py +0 -0
  22. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/file/__init__.py +0 -0
  23. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/file/base.py +0 -0
  24. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/file/path/__init__.py +0 -0
  25. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/file/path/default.py +0 -0
  26. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/io/__init__.py +0 -0
  27. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/io/base.py +0 -0
  28. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/io/reader/__init__.py +0 -0
  29. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/io/reader/base.py +0 -0
  30. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/io/reader/query_api.py +0 -0
  31. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/io/reader/sf_cli.py +0 -0
  32. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/io/reader/utils.py +0 -0
  33. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/io/writer/__init__.py +0 -0
  34. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/io/writer/base.py +0 -0
  35. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/io/writer/csv.py +0 -0
  36. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/io/writer/print.py +0 -0
  37. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/mixin.py +0 -0
  38. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/proxy/__init__.py +0 -0
  39. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/proxy/base.py +0 -0
  40. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/proxy/client/LocalProxyClientProvider.py +0 -0
  41. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/proxy/client/__init__.py +0 -0
  42. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/proxy/client/base.py +0 -0
  43. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/py.typed +0 -0
  44. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/run.py +0 -0
  45. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/scan.py +0 -0
  46. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/spark/__init__.py +0 -0
  47. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/spark/base.py +0 -0
  48. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/spark/default.py +0 -0
  49. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/template.py +0 -0
  50. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/function/.devcontainer/devcontainer.json +0 -0
  51. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/function/README.md +0 -0
  52. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/function/build_native_dependencies.sh +0 -0
  53. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/function/payload/config.json +0 -0
  54. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/function/requirements.txt +0 -0
  55. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/script/.devcontainer/devcontainer.json +0 -0
  56. {salesforce_data_customcode-0.1.25/src/datacustomcode/templates/function → salesforce_data_customcode-0.1.27/src/datacustomcode/templates/script}/Dockerfile +0 -0
  57. {salesforce_data_customcode-0.1.25/src/datacustomcode/templates/function → salesforce_data_customcode-0.1.27/src/datacustomcode/templates/script}/Dockerfile.dependencies +0 -0
  58. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/script/README.md +0 -0
  59. {salesforce_data_customcode-0.1.25/src/datacustomcode/templates/function → salesforce_data_customcode-0.1.27/src/datacustomcode/templates/script}/account.ipynb +0 -0
  60. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/script/build_native_dependencies.sh +0 -0
  61. {salesforce_data_customcode-0.1.25/src/datacustomcode/templates/function → salesforce_data_customcode-0.1.27/src/datacustomcode/templates/script}/examples/employee_hierarchy/employee_data.csv +0 -0
  62. {salesforce_data_customcode-0.1.25/src/datacustomcode/templates/function → salesforce_data_customcode-0.1.27/src/datacustomcode/templates/script}/examples/employee_hierarchy/entrypoint.py +0 -0
  63. {salesforce_data_customcode-0.1.25/src/datacustomcode/templates/function → salesforce_data_customcode-0.1.27/src/datacustomcode/templates/script}/jupyterlab.sh +0 -0
  64. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/script/payload/config.json +0 -0
  65. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/script/payload/entrypoint.py +0 -0
  66. {salesforce_data_customcode-0.1.25/src/datacustomcode/templates/function → salesforce_data_customcode-0.1.27/src/datacustomcode/templates/script}/requirements-dev.txt +0 -0
  67. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/templates/script/requirements.txt +0 -0
  68. {salesforce_data_customcode-0.1.25 → salesforce_data_customcode-0.1.27}/src/datacustomcode/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: salesforce-data-customcode
3
- Version: 0.1.25
3
+ Version: 0.1.27
4
4
  Summary: Data Cloud Custom Code SDK
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE.txt
@@ -64,6 +64,8 @@ pip install salesforce-data-customcode
64
64
  datacustomcode init my_package
65
65
  ```
66
66
 
67
+ To create a package of type function, pass the parameter `--code-type=function` with the init command.
68
+
67
69
  This will yield all necessary files to get started:
68
70
  ```
69
71
  .
@@ -268,6 +270,8 @@ Initialize a new development environment with a code package template.
268
270
 
269
271
  Argument:
270
272
  - `DIRECTORY`: Directory to create project in (default: ".")
273
+ Options:
274
+ - `--code-type TEXT`: This can be either `function` or `script`. The default value is `script` if the argument is missing.
271
275
 
272
276
 
273
277
  #### `datacustomcode scan`
@@ -317,6 +321,7 @@ Options:
317
321
  - `--description TEXT`: Description of the transformation job (default: "")
318
322
  - `--network TEXT`: docker network (default: "default")
319
323
  - `--cpu-size TEXT`: CPU size for the deployment (default: `CPU_2XL`). Available options: CPU_L(Large), CPU_XL(Extra Large), CPU_2XL(2X Large), CPU_4XL(4X Large)
324
+ - `--function-invoke-opt TEXT`: Currently we support only `UnstructuredChunking` for functions.
320
325
 
321
326
 
322
327
  ## Docker usage
@@ -40,6 +40,8 @@ pip install salesforce-data-customcode
40
40
  datacustomcode init my_package
41
41
  ```
42
42
 
43
+ To create a package of type function, pass the parameter `--code-type=function` with the init command.
44
+
43
45
  This will yield all necessary files to get started:
44
46
  ```
45
47
  .
@@ -244,6 +246,8 @@ Initialize a new development environment with a code package template.
244
246
 
245
247
  Argument:
246
248
  - `DIRECTORY`: Directory to create project in (default: ".")
249
+ Options:
250
+ - `--code-type TEXT`: This can be either `function` or `script`. The default value is `script` if the argument is missing.
247
251
 
248
252
 
249
253
  #### `datacustomcode scan`
@@ -293,6 +297,7 @@ Options:
293
297
  - `--description TEXT`: Description of the transformation job (default: "")
294
298
  - `--network TEXT`: docker network (default: "default")
295
299
  - `--cpu-size TEXT`: CPU size for the deployment (default: `CPU_2XL`). Available options: CPU_L(Large), CPU_XL(Extra Large), CPU_2XL(2X Large), CPU_4XL(4X Large)
300
+ - `--function-invoke-opt TEXT`: Currently we support only `UnstructuredChunking` for functions.
296
301
 
297
302
 
298
303
  ## Docker usage
@@ -18,7 +18,7 @@ license = "Apache-2.0"
18
18
  name = "salesforce-data-customcode"
19
19
  readme = "README.md"
20
20
  requires-python = ">=3.10,<3.12"
21
- version = "0.1.25"
21
+ version = "0.1.27"
22
22
 
23
23
  [tool.black]
24
24
  exclude = '''
@@ -85,9 +85,13 @@ def _cmd_output(
85
85
  **kwargs: Any,
86
86
  ) -> tuple[int, bytes, Union[bytes, None]]:
87
87
  _setdefault_kwargs(kwargs)
88
+ kwargs.setdefault("shell", True)
89
+ # On Windows, Popen with shell=True and a sequence uses list2cmdline() which
90
+ # quotes the entire string, causing cmd.exe to fail. Joining to a plain string
91
+ # works correctly on both Unix (/bin/sh -c "...") and Windows (cmd.exe /c ...).
92
+ cmd_arg: Union[tuple[str, ...], str] = " ".join(cmd) if kwargs.get("shell") else cmd
88
93
  try:
89
- kwargs.setdefault("shell", True)
90
- proc = subprocess.Popen(cmd, **kwargs)
94
+ proc = subprocess.Popen(cmd_arg, **kwargs)
91
95
  except OSError as e:
92
96
  returncode, stdout_b, stderr_b = _oserror_to_output(e)
93
97
  else:
@@ -272,7 +272,7 @@ def create_deployment(
272
272
  raise
273
273
 
274
274
 
275
- PLATFORM_ENV_VAR = "DOCKER_DEFAULT_PLATFORM=linux/amd64"
275
+ PLATFORM_ENV = {"DOCKER_DEFAULT_PLATFORM": "linux/amd64"}
276
276
  DOCKER_IMAGE_NAME = "datacloud-custom-code-dependency-builder"
277
277
  DEPENDENCIES_ARCHIVE_NAME = "native_dependencies"
278
278
  DEPENDENCIES_ARCHIVE_FULL_NAME = f"{DEPENDENCIES_ARCHIVE_NAME}.tar.gz"
@@ -286,19 +286,25 @@ def prepare_dependency_archive(directory: str, docker_network: str) -> None:
286
286
  cmd = f"docker images -q {DOCKER_IMAGE_NAME}"
287
287
  image_exists = cmd_output(cmd)
288
288
 
289
+ docker_env = {**os.environ, **PLATFORM_ENV}
290
+
289
291
  if not image_exists:
290
292
  logger.info(f"Building docker image with docker network: {docker_network}...")
291
293
  cmd = docker_build_cmd(docker_network)
292
- cmd_output(cmd)
294
+ cmd_output(cmd, env=docker_env)
293
295
 
294
- with tempfile.TemporaryDirectory() as temp_dir:
296
+ # ignore_cleanup_errors=True: on Windows, Docker creates files inside the
297
+ # mounted volume whose permissions prevent the host from deleting them.
298
+ # The archive has already been copied out, so silently skipping leftover
299
+ # files is safe and avoids a fatal error on context-manager exit.
300
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
295
301
  logger.info(
296
302
  f"Building dependencies archive with docker network: {docker_network}"
297
303
  )
298
304
  shutil.copy("requirements.txt", temp_dir)
299
305
  shutil.copy("build_native_dependencies.sh", temp_dir)
300
306
  cmd = docker_run_cmd(docker_network, temp_dir)
301
- cmd_output(cmd)
307
+ cmd_output(cmd, env=docker_env)
302
308
  archives_temp_path = os.path.join(temp_dir, DEPENDENCIES_ARCHIVE_FULL_NAME)
303
309
  os.makedirs(os.path.dirname(DEPENDENCIES_ARCHIVE_PATH), exist_ok=True)
304
310
  shutil.copy(archives_temp_path, DEPENDENCIES_ARCHIVE_PATH)
@@ -307,10 +313,7 @@ def prepare_dependency_archive(directory: str, docker_network: str) -> None:
307
313
 
308
314
 
309
315
  def docker_build_cmd(network: str) -> str:
310
- cmd = (
311
- f"{PLATFORM_ENV_VAR} docker build -t {DOCKER_IMAGE_NAME} "
312
- f"--file Dockerfile.dependencies . "
313
- )
316
+ cmd = f"docker build -t {DOCKER_IMAGE_NAME} --file Dockerfile.dependencies . "
314
317
 
315
318
  if network != "default":
316
319
  cmd = cmd + f"--network {network}"
@@ -318,12 +321,11 @@ def docker_build_cmd(network: str) -> str:
318
321
  return cmd
319
322
 
320
323
 
321
- def docker_run_cmd(network: str, temp_dir) -> str:
322
- cmd = (
323
- f"{PLATFORM_ENV_VAR} docker run --rm "
324
- f"-v {temp_dir}:/workspace "
325
- f"{DOCKER_IMAGE_NAME} "
326
- )
324
+ def docker_run_cmd(network: str, temp_dir: str) -> str:
325
+ # Normalise path separators: Docker expects forward slashes even on Windows,
326
+ # and quoting handles paths that contain spaces.
327
+ docker_path = temp_dir.replace("\\", "/")
328
+ cmd = f'docker run --rm -v "{docker_path}:/workspace" {DOCKER_IMAGE_NAME} '
327
329
 
328
330
  if network != "default":
329
331
  cmd = cmd + f"--network {network} "
@@ -1,4 +1,4 @@
1
- FROM public.ecr.aws/emr-on-eks/spark/emr-7.3.0:latest
1
+ FROM python:3.11-slim
2
2
 
3
3
  USER root
4
4
 
@@ -33,8 +33,8 @@ def chunk_text(text: str, chunk_size: int = 1000) -> List[str]:
33
33
  return chunks
34
34
 
35
35
 
36
- def dc_function(request: dict) -> dict:
37
- logger.info("Inside DC Function")
36
+ def function(request: dict) -> dict:
37
+ logger.info("Inside Function")
38
38
  logger.info(request)
39
39
 
40
40
  items = request["input"]
@@ -107,7 +107,7 @@ if __name__ == "__main__":
107
107
  }
108
108
 
109
109
  # Run the function
110
- result = dc_function(test_request)
110
+ result = function(test_request)
111
111
 
112
112
  # Print the results in a more readable format
113
113
  print("\nChunking Results:")
@@ -1,10 +1,3 @@
1
1
  # Required packages for the project - Do not modify
2
- salesforce-cdp-connector>=1.0.16
3
- pyspark==3.5.1
4
- pandas
5
- numpy
6
2
  pydantic
7
- jupyterlab
8
- ipywidgets
9
- tqdm
10
3
  salesforce-data-customcode
@@ -1,18 +0,0 @@
1
- FROM public.ecr.aws/emr-on-eks/spark/emr-7.3.0:latest
2
-
3
- USER root
4
-
5
- # install from dev requirements.txt
6
- COPY requirements-dev.txt ./requirements-dev.txt
7
- RUN pip3.11 install --no-cache-dir -r requirements-dev.txt
8
-
9
- # Install from requirements.txt:
10
- COPY requirements.txt ./requirements.txt
11
- RUN pip3.11 install --no-cache-dir -r requirements.txt
12
-
13
- # Create workspace directory
14
- RUN mkdir /workspace
15
-
16
- # Set user and working directory
17
- USER hadoop:hadoop
18
- WORKDIR /workspace
@@ -1,86 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "id": "0",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "from datacustomcode.client import Client\n",
11
- "from datacustomcode.io.writer.base import WriteMode\n",
12
- "from pyspark.sql.functions import col, upper"
13
- ]
14
- },
15
- {
16
- "cell_type": "code",
17
- "execution_count": null,
18
- "id": "1",
19
- "metadata": {},
20
- "outputs": [],
21
- "source": [
22
- "client = Client()\n",
23
- "\n",
24
- "df = client.read_dlo(\"Account_std__dll\")"
25
- ]
26
- },
27
- {
28
- "cell_type": "code",
29
- "execution_count": null,
30
- "id": "2",
31
- "metadata": {},
32
- "outputs": [],
33
- "source": [
34
- "# Perform transformations on the DataFrame\n",
35
- "df_upper1 = df.withColumn(\"Description__c\", upper(col(\"Description__c\")))\n",
36
- "\n",
37
- "# Drop specific columns related to relationships\n",
38
- "df_upper1 = df_upper1.drop(\"SfdcOrganizationId__c\")\n",
39
- "df_upper1 = df_upper1.drop(\"KQ_Id__c\")\n",
40
- "\n",
41
- "df_upper1.show()"
42
- ]
43
- },
44
- {
45
- "cell_type": "code",
46
- "execution_count": null,
47
- "id": "3",
48
- "metadata": {},
49
- "outputs": [],
50
- "source": [
51
- "# Save the transformed DataFrame\n",
52
- "dlo_name = \"Account_std_copy__dll\"\n",
53
- "client.write_to_dlo(dlo_name, df_upper1, write_mode=WriteMode.APPEND)"
54
- ]
55
- },
56
- {
57
- "cell_type": "code",
58
- "execution_count": null,
59
- "id": "4",
60
- "metadata": {},
61
- "outputs": [],
62
- "source": []
63
- }
64
- ],
65
- "metadata": {
66
- "kernelspec": {
67
- "display_name": "Python 3 (ipykernel)",
68
- "language": "python",
69
- "name": "python3"
70
- },
71
- "language_info": {
72
- "codemirror_mode": {
73
- "name": "ipython",
74
- "version": 3
75
- },
76
- "file_extension": ".py",
77
- "mimetype": "text/x-python",
78
- "name": "python",
79
- "nbconvert_exporter": "python",
80
- "pygments_lexer": "ipython3",
81
- "version": "3.11.11"
82
- }
83
- },
84
- "nbformat": 4,
85
- "nbformat_minor": 5
86
- }
@@ -1,13 +0,0 @@
1
- id,name,position,manager_id
2
- 1,Alice,CEO,
3
- 2,Bob,VP Engineering,1
4
- 3,Charlie,VP Sales,1
5
- 4,David,Engineering Lead,2
6
- 5,Eve,Engineering Lead,2
7
- 6,Frank,Sales Manager,3
8
- 7,Grace,Software Eng.,4
9
- 8,Hannah,Software Eng.,4
10
- 9,Ian,Software Eng.,5
11
- 10,Jack,Sales Rep,6
12
- 11,Kelly,Sales Rep,6
13
- 12,Leo,Intern,7
@@ -1,78 +0,0 @@
1
- from pyspark.sql.functions import (
2
- col,
3
- concat_ws,
4
- lit,
5
- )
6
-
7
- from datacustomcode.client import Client
8
- from datacustomcode.io.writer.base import WriteMode
9
-
10
-
11
- def main():
12
- client = Client()
13
-
14
- employees = client.read_dlo("Employee__dll").persist()
15
- employees = employees.select("id__c", "manager_id__c", "name__c", "position__c")
16
- employees.show()
17
- employees_with_manager = (
18
- employees.alias("e")
19
- .join(
20
- employees.alias("m"),
21
- col("e.manager_id__c").cast("string") == col("m.id__c").cast("string"),
22
- "left",
23
- )
24
- .select(
25
- col("e.id__c"),
26
- col("e.name__c"),
27
- col("e.position__c"),
28
- col("e.manager_id__c"),
29
- col("m.name__c").alias("manager_name__c"),
30
- )
31
- .persist()
32
- )
33
-
34
- hierarchy_df = (
35
- employees_with_manager.filter(col("manager_id__c").isNull())
36
- .withColumn("hierarchy_level__c", lit(1))
37
- .withColumn("management_chain__c", col("name__c"))
38
- .persist()
39
- )
40
-
41
- current_level = 1
42
-
43
- while True:
44
- ewm = employees_with_manager.alias("ewm")
45
- hdf = hierarchy_df.filter(col("hierarchy_level__c") == current_level).alias(
46
- "hdf"
47
- )
48
-
49
- next_level_df = ewm.join(
50
- hdf,
51
- col("ewm.manager_id__c").cast("string") == col("hdf.id__c").cast("string"),
52
- "inner",
53
- ).select(
54
- col("ewm.id__c"),
55
- col("ewm.name__c"),
56
- col("ewm.position__c"),
57
- col("ewm.manager_id__c"),
58
- col("ewm.manager_name__c"),
59
- (col("hdf.hierarchy_level__c") + 1).alias("hierarchy_level__c"),
60
- concat_ws(" | ", col("hdf.management_chain__c"), col("ewm.name__c")).alias(
61
- "management_chain__c"
62
- ),
63
- )
64
-
65
- if next_level_df.isEmpty():
66
- break
67
-
68
- hierarchy_df = hierarchy_df.union(next_level_df).persist()
69
- current_level += 1
70
-
71
- hierarchy_df = hierarchy_df.orderBy("hierarchy_level__c", "manager_id__c", "id__c")
72
-
73
- dlo_name = "Employee_Hierarchy__dll"
74
- client.write_to_dlo(dlo_name, hierarchy_df, WriteMode.APPEND)
75
-
76
-
77
- if __name__ == "__main__":
78
- main()
@@ -1,97 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Description: This script is used to start/stop the jupyter notebook in a docker container
4
-
5
- # Function to open browser based on OS
6
- open_browser() {
7
- local url=$1
8
- case "$(uname -s)" in
9
- Darwin*) # macOS
10
- open "$url"
11
- ;;
12
- Linux*) # Linux
13
- if command -v xdg-open &> /dev/null; then
14
- xdg-open "$url"
15
- elif command -v gnome-open &> /dev/null; then
16
- gnome-open "$url"
17
- else
18
- echo "Could not detect the web browser to use"
19
- return 1
20
- fi
21
- ;;
22
- CYGWIN*|MINGW32*|MSYS*|MINGW*) # Windows
23
- start "$url"
24
- ;;
25
- *)
26
- echo "Unknown operating system"
27
- return 1
28
- ;;
29
- esac
30
- }
31
-
32
- # Function to check if Docker is installed and running
33
- check_docker() {
34
- if ! command -v docker &> /dev/null; then
35
- echo "Docker is not installed"
36
- exit 1
37
- fi
38
- echo "Docker is installed"
39
- docker --version
40
-
41
- if ! docker info &> /dev/null; then
42
- echo "Docker daemon is not running"
43
- exit 1
44
- fi
45
- echo "Docker daemon is running"
46
- }
47
-
48
- # Function to start Jupyter server
49
- start_jupyter() {
50
- echo "Building the docker image"
51
- docker build -t datacloud-customcode .
52
-
53
- echo "Running the docker container"
54
- docker run -d --rm -p 8888:8888 \
55
- -v $(pwd):/workspace \
56
- --name jupyter-server \
57
- datacloud-customcode jupyter lab \
58
- --ip=0.0.0.0 \
59
- --port=8888 \
60
- --no-browser \
61
- --allow-root \
62
- --NotebookApp.token='' \
63
- --NotebookApp.password='' \
64
- --notebook-dir=/workspace
65
-
66
- sleep 3 # Wait for server to start
67
- open_browser "http://localhost:8888"
68
- }
69
-
70
- # Function to stop Jupyter server
71
- stop_jupyter() {
72
- echo "Stopping Jupyter server container..."
73
- if docker ps -q --filter "name=jupyter-server" | grep -q .; then
74
- docker stop jupyter-server
75
- echo "Jupyter server stopped successfully"
76
- else
77
- echo "No Jupyter server container running"
78
- fi
79
- }
80
-
81
- # Main script logic
82
- case "$1" in
83
- "start")
84
- check_docker
85
- start_jupyter
86
- ;;
87
- "stop")
88
- check_docker
89
- stop_jupyter
90
- ;;
91
- *)
92
- echo "Usage: $0 {start|stop}"
93
- echo " start - Start Jupyter server"
94
- echo " stop - Stop Jupyter server"
95
- exit 1
96
- ;;
97
- esac