salesforce-data-customcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacustomcode/__init__.py +20 -0
- datacustomcode/cli.py +142 -0
- datacustomcode/client.py +227 -0
- datacustomcode/cmd.py +105 -0
- datacustomcode/config.py +149 -0
- datacustomcode/config.yaml +15 -0
- datacustomcode/credentials.py +97 -0
- datacustomcode/deploy.py +379 -0
- datacustomcode/io/__init__.py +14 -0
- datacustomcode/io/base.py +28 -0
- datacustomcode/io/reader/__init__.py +14 -0
- datacustomcode/io/reader/base.py +34 -0
- datacustomcode/io/reader/query_api.py +115 -0
- datacustomcode/io/writer/__init__.py +14 -0
- datacustomcode/io/writer/base.py +49 -0
- datacustomcode/io/writer/csv.py +41 -0
- datacustomcode/io/writer/print.py +33 -0
- datacustomcode/mixin.py +94 -0
- datacustomcode/py.typed +0 -0
- datacustomcode/run.py +47 -0
- datacustomcode/scan.py +153 -0
- datacustomcode/template.py +36 -0
- datacustomcode/templates/.devcontainer/devcontainer.json +10 -0
- datacustomcode/templates/Dockerfile +20 -0
- datacustomcode/templates/README.md +0 -0
- datacustomcode/templates/jupyterlab.sh +97 -0
- datacustomcode/templates/payload/config.json +1 -0
- datacustomcode/templates/payload/entrypoint.py +10 -0
- datacustomcode/templates/requirements-dev.txt +10 -0
- datacustomcode/templates/requirements.txt +1 -0
- salesforce_data_customcode-0.1.0.dist-info/LICENSE.txt +206 -0
- salesforce_data_customcode-0.1.0.dist-info/METADATA +159 -0
- salesforce_data_customcode-0.1.0.dist-info/RECORD +35 -0
- salesforce_data_customcode-0.1.0.dist-info/WHEEL +4 -0
- salesforce_data_customcode-0.1.0.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from abc import abstractmethod
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
|
+
|
|
21
|
+
from datacustomcode.io.base import BaseDataAccessLayer
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class WriteMode(str, Enum):
|
|
28
|
+
APPEND = "append"
|
|
29
|
+
OVERWRITE = "overwrite"
|
|
30
|
+
OVERWRITE_PARTITIONS = "overwrite_partitions"
|
|
31
|
+
MERGE = "merge"
|
|
32
|
+
MERGE_UPSERT_DELETE = "merge_upsert_delete"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class BaseDataCloudWriter(BaseDataAccessLayer):
|
|
36
|
+
"""Base class for Data Cloud writers."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, spark: SparkSession) -> None:
|
|
39
|
+
self.spark = spark
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def write_to_dlo(
|
|
43
|
+
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode
|
|
44
|
+
) -> None: ...
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def write_to_dmo(
|
|
48
|
+
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode
|
|
49
|
+
) -> None: ...
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
from pyspark.sql import DataFrame as PySparkDataFrame
|
|
18
|
+
|
|
19
|
+
from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode
|
|
20
|
+
|
|
21
|
+
SUFFIX = ".csv"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CSVDataCloudWriter(BaseDataCloudWriter):
|
|
25
|
+
CONFIG_NAME = "CSVDataCloudWriter"
|
|
26
|
+
|
|
27
|
+
def write_to_dlo(
|
|
28
|
+
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode
|
|
29
|
+
) -> None:
|
|
30
|
+
# Only add the suffix if it's not already there
|
|
31
|
+
if not name.lower().endswith(SUFFIX):
|
|
32
|
+
name = f"{name}{SUFFIX}"
|
|
33
|
+
dataframe.write.csv(name, mode=write_mode)
|
|
34
|
+
|
|
35
|
+
def write_to_dmo(
|
|
36
|
+
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode
|
|
37
|
+
) -> None:
|
|
38
|
+
# Only add the suffix if it's not already there
|
|
39
|
+
if not name.lower().endswith(SUFFIX):
|
|
40
|
+
name = f"{name}{SUFFIX}"
|
|
41
|
+
dataframe.write.csv(name, mode=write_mode)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
from pyspark.sql import DataFrame as PySparkDataFrame
|
|
18
|
+
|
|
19
|
+
from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PrintDataCloudWriter(BaseDataCloudWriter):
|
|
23
|
+
CONFIG_NAME = "PrintDataCloudWriter"
|
|
24
|
+
|
|
25
|
+
def write_to_dlo(
|
|
26
|
+
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode
|
|
27
|
+
) -> None:
|
|
28
|
+
dataframe.show()
|
|
29
|
+
|
|
30
|
+
def write_to_dmo(
|
|
31
|
+
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode
|
|
32
|
+
) -> None:
|
|
33
|
+
dataframe.show()
|
datacustomcode/mixin.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from typing import ClassVar, TypeVar
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _get_all_subclass_descendants(cls: type) -> list[type]:
|
|
21
|
+
all_subclasses = [cls]
|
|
22
|
+
|
|
23
|
+
for subclass in cls.__subclasses__():
|
|
24
|
+
all_subclasses.append(subclass)
|
|
25
|
+
all_subclasses.extend(_get_all_subclass_descendants(subclass))
|
|
26
|
+
|
|
27
|
+
return all_subclasses
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
_V = TypeVar("_V", bound="UserExtendableNamedConfigMixin")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class UserExtendableNamedConfigMixin:
|
|
34
|
+
"""Allows users to access extended classes by name in config.
|
|
35
|
+
|
|
36
|
+
Our client code is text config driven. This means that if a user extends readers
|
|
37
|
+
and writers, they wouldn't be able to add their own classes if they want to use a
|
|
38
|
+
config. This allows them to name their subclasses and let them be found at runtime
|
|
39
|
+
by the config driven execution.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
CONFIG_NAME: str
|
|
43
|
+
|
|
44
|
+
_registered_config_names: ClassVar[dict[str, type]] = {}
|
|
45
|
+
|
|
46
|
+
def __init_subclass__(cls, **kwargs):
|
|
47
|
+
"""Validate CONFIG_NAME uniqueness across all subclasses."""
|
|
48
|
+
super().__init_subclass__(**kwargs)
|
|
49
|
+
if "CONFIG_NAME" not in cls.__dict__:
|
|
50
|
+
return
|
|
51
|
+
if cls.CONFIG_NAME is None or cls.CONFIG_NAME == "":
|
|
52
|
+
return
|
|
53
|
+
|
|
54
|
+
if cls.CONFIG_NAME in UserExtendableNamedConfigMixin._registered_config_names:
|
|
55
|
+
existing_class = UserExtendableNamedConfigMixin._registered_config_names[
|
|
56
|
+
cls.CONFIG_NAME
|
|
57
|
+
]
|
|
58
|
+
raise TypeError(
|
|
59
|
+
f"Class {cls.__name__} has the same CONFIG_NAME ('{cls.CONFIG_NAME}') "
|
|
60
|
+
f"as existing class {existing_class.__name__}. "
|
|
61
|
+
f"Each concrete class must have a unique CONFIG_NAME."
|
|
62
|
+
)
|
|
63
|
+
UserExtendableNamedConfigMixin._registered_config_names[cls.CONFIG_NAME] = cls
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def subclass_from_config_name(cls: type[_V], config_name: str) -> type[_V]:
|
|
67
|
+
"""Create an instance of subclass by calling its string name (``CONFIG_NAME``).
|
|
68
|
+
|
|
69
|
+
This is and should stay dynamic because a user may interactively add subclasses
|
|
70
|
+
through REPL systems like Jupyter.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
config_name: should match a subclass's ``CONFIG_NAME``.
|
|
74
|
+
"""
|
|
75
|
+
subclass_config_name_map = {}
|
|
76
|
+
for type_ in _get_all_subclass_descendants(cls):
|
|
77
|
+
if name := getattr(type_, "CONFIG_NAME", ""):
|
|
78
|
+
subclass_config_name_map[name] = type_
|
|
79
|
+
try:
|
|
80
|
+
return subclass_config_name_map[config_name]
|
|
81
|
+
except KeyError as exc:
|
|
82
|
+
raise KeyError(
|
|
83
|
+
"Passed config_name does not match any subclass CONFIG_NAME"
|
|
84
|
+
) from exc
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def available_config_names(cls: type[_V]) -> list[str]:
|
|
88
|
+
"""Get all available config names from the subclasses."""
|
|
89
|
+
config_names = [
|
|
90
|
+
type_.CONFIG_NAME
|
|
91
|
+
for type_ in _get_all_subclass_descendants(cls)
|
|
92
|
+
if hasattr(type_, "CONFIG_NAME") and type_.CONFIG_NAME
|
|
93
|
+
]
|
|
94
|
+
return config_names
|
datacustomcode/py.typed
ADDED
|
File without changes
|
datacustomcode/run.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
import importlib
|
|
16
|
+
import runpy
|
|
17
|
+
from typing import List, Union
|
|
18
|
+
|
|
19
|
+
from datacustomcode.config import config
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def run_entrypoint(
|
|
23
|
+
entrypoint: str, config_file: Union[str, None], dependencies: List[str]
|
|
24
|
+
) -> None:
|
|
25
|
+
"""Run the entrypoint script with the given config and dependencies.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
entrypoint: The entrypoint script to run.
|
|
29
|
+
config_file: The config file to use.
|
|
30
|
+
dependencies: The dependencies to import.
|
|
31
|
+
"""
|
|
32
|
+
if config_file:
|
|
33
|
+
config.load(config_file)
|
|
34
|
+
for dependency in dependencies:
|
|
35
|
+
try:
|
|
36
|
+
importlib.import_module(dependency)
|
|
37
|
+
except ModuleNotFoundError as exc:
|
|
38
|
+
try:
|
|
39
|
+
if "." in dependency:
|
|
40
|
+
module_name, object_name = dependency.rsplit(".", 1)
|
|
41
|
+
module = importlib.import_module(module_name)
|
|
42
|
+
getattr(module, object_name)
|
|
43
|
+
else:
|
|
44
|
+
raise exc
|
|
45
|
+
except (ModuleNotFoundError, AttributeError) as inner_exc:
|
|
46
|
+
raise inner_exc from exc
|
|
47
|
+
runpy.run_path(entrypoint, init_globals=globals(), run_name="__main__")
|
datacustomcode/scan.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import ast
|
|
18
|
+
from typing import (
|
|
19
|
+
Dict,
|
|
20
|
+
List,
|
|
21
|
+
Union,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
import pydantic
|
|
25
|
+
|
|
26
|
+
DATA_ACCESS_METHODS = ["read_dlo", "read_dmo", "write_to_dlo", "write_to_dmo"]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DataAccessLayerCalls(pydantic.BaseModel):
|
|
30
|
+
read_dlo: frozenset[str]
|
|
31
|
+
read_dmo: frozenset[str]
|
|
32
|
+
write_to_dlo: frozenset[str]
|
|
33
|
+
write_to_dmo: frozenset[str]
|
|
34
|
+
|
|
35
|
+
@pydantic.model_validator(mode="after")
|
|
36
|
+
def validate_access_layer(self) -> DataAccessLayerCalls:
|
|
37
|
+
if self.read_dlo and self.read_dmo:
|
|
38
|
+
raise ValueError("Cannot read from DLO and DMO in the same file.")
|
|
39
|
+
if len(self.write_to_dlo) > 1 or len(self.write_to_dmo) > 1:
|
|
40
|
+
raise ValueError(
|
|
41
|
+
"Cannot write to more than one DLO or DMO in the same file."
|
|
42
|
+
)
|
|
43
|
+
if not self.read_dlo and not self.read_dmo:
|
|
44
|
+
raise ValueError("Must read from at least one DLO or DMO.")
|
|
45
|
+
if self.read_dlo and self.write_to_dmo:
|
|
46
|
+
raise ValueError("Cannot read from DLO and write to DMO in the same file.")
|
|
47
|
+
if self.read_dmo and self.write_to_dlo:
|
|
48
|
+
raise ValueError("Cannot read from DMO and write to DLO in the same file.")
|
|
49
|
+
return self
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def input_str(self) -> str:
|
|
53
|
+
if self.read_dlo:
|
|
54
|
+
return next(iter(self.read_dlo))
|
|
55
|
+
return next(iter(self.read_dmo))
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def output_str(self) -> str:
|
|
59
|
+
if self.write_to_dlo:
|
|
60
|
+
return next(iter(self.write_to_dlo))
|
|
61
|
+
return next(iter(self.write_to_dmo))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ClientMethodVisitor(ast.NodeVisitor):
|
|
65
|
+
"""AST Visitor that finds all instances of Client read/write method calls."""
|
|
66
|
+
|
|
67
|
+
def __init__(self) -> None:
|
|
68
|
+
self._read_dlo_instances: set[str] = set()
|
|
69
|
+
self._read_dmo_instances: set[str] = set()
|
|
70
|
+
self._write_to_dlo_instances: set[str] = set()
|
|
71
|
+
self._write_to_dmo_instances: set[str] = set()
|
|
72
|
+
self.variable_values: Dict[str, Union[str, None]] = {}
|
|
73
|
+
|
|
74
|
+
def visit_Assign(self, node: ast.Assign) -> None:
|
|
75
|
+
"""Track variable assignments that might be used as DLO/DMO names."""
|
|
76
|
+
for target in node.targets:
|
|
77
|
+
if isinstance(target, ast.Name):
|
|
78
|
+
var_name = target.id
|
|
79
|
+
if isinstance(node.value, ast.Constant) and isinstance(
|
|
80
|
+
node.value.value, str
|
|
81
|
+
):
|
|
82
|
+
self.variable_values[var_name] = node.value.value
|
|
83
|
+
else:
|
|
84
|
+
self.variable_values[var_name] = None
|
|
85
|
+
self.generic_visit(node)
|
|
86
|
+
|
|
87
|
+
def visit_Call(self, node: ast.Call) -> None:
|
|
88
|
+
"""Visit a method call and check if it's a Client read/write method."""
|
|
89
|
+
if isinstance(node.func, ast.Attribute) and isinstance(
|
|
90
|
+
node.func.value, ast.Name
|
|
91
|
+
):
|
|
92
|
+
method_name = node.func.attr
|
|
93
|
+
if method_name in DATA_ACCESS_METHODS and node.args:
|
|
94
|
+
arg = node.args[0]
|
|
95
|
+
name = None
|
|
96
|
+
|
|
97
|
+
if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
|
|
98
|
+
name = arg.value
|
|
99
|
+
elif isinstance(arg, ast.Name) and arg.id in self.variable_values:
|
|
100
|
+
name = self.variable_values[arg.id]
|
|
101
|
+
|
|
102
|
+
if name:
|
|
103
|
+
if method_name == "read_dlo":
|
|
104
|
+
self._read_dlo_instances.add(name)
|
|
105
|
+
elif method_name == "read_dmo":
|
|
106
|
+
self._read_dmo_instances.add(name)
|
|
107
|
+
elif method_name == "write_to_dlo":
|
|
108
|
+
self._write_to_dlo_instances.add(name)
|
|
109
|
+
elif method_name == "write_to_dmo":
|
|
110
|
+
self._write_to_dmo_instances.add(name)
|
|
111
|
+
self.generic_visit(node)
|
|
112
|
+
|
|
113
|
+
def found(self) -> DataAccessLayerCalls:
|
|
114
|
+
return DataAccessLayerCalls(
|
|
115
|
+
read_dlo=frozenset(self._read_dlo_instances),
|
|
116
|
+
read_dmo=frozenset(self._read_dmo_instances),
|
|
117
|
+
write_to_dlo=frozenset(self._write_to_dlo_instances),
|
|
118
|
+
write_to_dmo=frozenset(self._write_to_dmo_instances),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def scan_file(file_path: str) -> DataAccessLayerCalls:
|
|
123
|
+
"""Scan a single Python file for Client read/write method calls."""
|
|
124
|
+
with open(file_path, "r") as f:
|
|
125
|
+
code = f.read()
|
|
126
|
+
tree = ast.parse(code)
|
|
127
|
+
visitor = ClientMethodVisitor()
|
|
128
|
+
visitor.visit(tree)
|
|
129
|
+
return visitor.found()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def dc_config_json_from_file(file_path: str) -> dict:
|
|
133
|
+
"""Create a Data Cloud Custom Code config JSON from a script."""
|
|
134
|
+
output = scan_file(file_path)
|
|
135
|
+
read = {}
|
|
136
|
+
if output.read_dlo:
|
|
137
|
+
read["dlo"] = list(output.read_dlo)
|
|
138
|
+
else:
|
|
139
|
+
read["dmo"] = list(output.read_dmo)
|
|
140
|
+
write = {}
|
|
141
|
+
if output.write_to_dlo:
|
|
142
|
+
write["dlo"] = list(output.write_to_dlo)
|
|
143
|
+
else:
|
|
144
|
+
write["dmo"] = list(output.write_to_dmo)
|
|
145
|
+
config: Dict[str, Union[str, Dict[str, Dict[str, List[str]]]]] = {
|
|
146
|
+
"entryPoint": file_path.rpartition("/")[-1],
|
|
147
|
+
"dataspace": "default",
|
|
148
|
+
"permissions": {
|
|
149
|
+
"read": read,
|
|
150
|
+
"write": write,
|
|
151
|
+
},
|
|
152
|
+
}
|
|
153
|
+
return config
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Copyright (c) 2025, Salesforce, Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
import os
|
|
16
|
+
import shutil
|
|
17
|
+
|
|
18
|
+
from loguru import logger
|
|
19
|
+
|
|
20
|
+
template_dir = os.path.join(os.path.dirname(__file__), "templates")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def copy_template(target_dir: str) -> None:
|
|
24
|
+
"""Copy the template to the target directory."""
|
|
25
|
+
os.makedirs(target_dir, exist_ok=True)
|
|
26
|
+
|
|
27
|
+
for item in os.listdir(template_dir):
|
|
28
|
+
source = os.path.join(template_dir, item)
|
|
29
|
+
destination = os.path.join(target_dir, item)
|
|
30
|
+
|
|
31
|
+
if os.path.isdir(source):
|
|
32
|
+
logger.debug(f"Copying directory {source} to {destination}...")
|
|
33
|
+
shutil.copytree(source, destination, dirs_exist_ok=True)
|
|
34
|
+
else:
|
|
35
|
+
logger.debug(f"Copying file {source} to {destination}...")
|
|
36
|
+
shutil.copy2(source, destination)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
FROM public.ecr.aws/emr-on-eks/spark/emr-7.3.0:latest
|
|
2
|
+
|
|
3
|
+
USER root
|
|
4
|
+
|
|
5
|
+
ENV ENVIRONMENT=Outside_DataCloud
|
|
6
|
+
|
|
7
|
+
# install from dev requirements.txt
|
|
8
|
+
COPY requirements-dev.txt ./requirements-dev.txt
|
|
9
|
+
RUN pip3 install --no-cache-dir -r requirements-dev.txt
|
|
10
|
+
|
|
11
|
+
# Install from requirements.txt:
|
|
12
|
+
COPY requirements.txt ./requirements.txt
|
|
13
|
+
RUN pip3 install --no-cache-dir -r requirements.txt
|
|
14
|
+
|
|
15
|
+
# Create workspace directory
|
|
16
|
+
RUN mkdir /workspace
|
|
17
|
+
|
|
18
|
+
# Set user and working directory
|
|
19
|
+
USER hadoop:hadoop
|
|
20
|
+
WORKDIR /workspace
|
|
File without changes
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# Description: This script is used to start/stop the jupyter notebook in a docker container
|
|
4
|
+
|
|
5
|
+
# Function to open browser based on OS
|
|
6
|
+
open_browser() {
|
|
7
|
+
local url=$1
|
|
8
|
+
case "$(uname -s)" in
|
|
9
|
+
Darwin*) # macOS
|
|
10
|
+
open "$url"
|
|
11
|
+
;;
|
|
12
|
+
Linux*) # Linux
|
|
13
|
+
if command -v xdg-open &> /dev/null; then
|
|
14
|
+
xdg-open "$url"
|
|
15
|
+
elif command -v gnome-open &> /dev/null; then
|
|
16
|
+
gnome-open "$url"
|
|
17
|
+
else
|
|
18
|
+
echo "Could not detect the web browser to use"
|
|
19
|
+
return 1
|
|
20
|
+
fi
|
|
21
|
+
;;
|
|
22
|
+
CYGWIN*|MINGW32*|MSYS*|MINGW*) # Windows
|
|
23
|
+
start "$url"
|
|
24
|
+
;;
|
|
25
|
+
*)
|
|
26
|
+
echo "Unknown operating system"
|
|
27
|
+
return 1
|
|
28
|
+
;;
|
|
29
|
+
esac
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# Function to check if Docker is installed and running
|
|
33
|
+
check_docker() {
|
|
34
|
+
if ! command -v docker &> /dev/null; then
|
|
35
|
+
echo "Docker is not installed"
|
|
36
|
+
exit 1
|
|
37
|
+
fi
|
|
38
|
+
echo "Docker is installed"
|
|
39
|
+
docker --version
|
|
40
|
+
|
|
41
|
+
if ! docker info &> /dev/null; then
|
|
42
|
+
echo "Docker daemon is not running"
|
|
43
|
+
exit 1
|
|
44
|
+
fi
|
|
45
|
+
echo "Docker daemon is running"
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# Function to start Jupyter server
|
|
49
|
+
start_jupyter() {
|
|
50
|
+
echo "Building the docker image"
|
|
51
|
+
docker build -t datacloud-byoc .
|
|
52
|
+
|
|
53
|
+
echo "Running the docker container"
|
|
54
|
+
docker run -d --rm -p 8888:8888 \
|
|
55
|
+
-v $(pwd):/workspace \
|
|
56
|
+
--name jupyter-server \
|
|
57
|
+
datacloud-byoc jupyter lab \
|
|
58
|
+
--ip=0.0.0.0 \
|
|
59
|
+
--port=8888 \
|
|
60
|
+
--no-browser \
|
|
61
|
+
--allow-root \
|
|
62
|
+
--NotebookApp.token='' \
|
|
63
|
+
--NotebookApp.password='' \
|
|
64
|
+
--notebook-dir=/workspace
|
|
65
|
+
|
|
66
|
+
sleep 3 # Wait for server to start
|
|
67
|
+
open_browser "http://localhost:8888"
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# Function to stop Jupyter server
|
|
71
|
+
stop_jupyter() {
|
|
72
|
+
echo "Stopping Jupyter server container..."
|
|
73
|
+
if docker ps -q --filter "name=jupyter-server" | grep -q .; then
|
|
74
|
+
docker stop jupyter-server
|
|
75
|
+
echo "Jupyter server stopped successfully"
|
|
76
|
+
else
|
|
77
|
+
echo "No Jupyter server container running"
|
|
78
|
+
fi
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Main script logic
|
|
82
|
+
case "$1" in
|
|
83
|
+
"start")
|
|
84
|
+
check_docker
|
|
85
|
+
start_jupyter
|
|
86
|
+
;;
|
|
87
|
+
"stop")
|
|
88
|
+
check_docker
|
|
89
|
+
stop_jupyter
|
|
90
|
+
;;
|
|
91
|
+
*)
|
|
92
|
+
echo "Usage: $0 {start|stop}"
|
|
93
|
+
echo " start - Start Jupyter server"
|
|
94
|
+
echo " stop - Stop Jupyter server"
|
|
95
|
+
exit 1
|
|
96
|
+
;;
|
|
97
|
+
esac
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Packages required for the custom code
|