demo_uc_setup 0.1.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- demo_uc_setup-0.1.0/.gitignore +62 -0
- demo_uc_setup-0.1.0/.python-version +1 -0
- demo_uc_setup-0.1.0/PKG-INFO +105 -0
- demo_uc_setup-0.1.0/README.md +94 -0
- demo_uc_setup-0.1.0/configuration_approach.txt +38 -0
- demo_uc_setup-0.1.0/demo_uc_setup/__init__.py +0 -0
- demo_uc_setup-0.1.0/demo_uc_setup/common.py +49 -0
- demo_uc_setup-0.1.0/demo_uc_setup/config.py +23 -0
- demo_uc_setup-0.1.0/demo_uc_setup/unity_catalog_setup.py +82 -0
- demo_uc_setup-0.1.0/demo_uc_setup/unity_catalog_teardown.py +28 -0
- demo_uc_setup-0.1.0/main.py +31 -0
- demo_uc_setup-0.1.0/pyproject.toml +21 -0
- demo_uc_setup-0.1.0/setup.py +7 -0
- demo_uc_setup-0.1.0/teardown.py +7 -0
- demo_uc_setup-0.1.0/uv.lock +1186 -0
@@ -0,0 +1,62 @@
|
|
1
|
+
# Created by https://www.toptal.com/developers/gitignore/api/macos,visualstudiocode
|
2
|
+
# Edit at https://www.toptal.com/developers/gitignore?templates=macos,visualstudiocode
|
3
|
+
|
4
|
+
### macOS ###
|
5
|
+
# General
|
6
|
+
.DS_Store
|
7
|
+
.AppleDouble
|
8
|
+
.LSOverride
|
9
|
+
|
10
|
+
# Icon must end with two \r
|
11
|
+
Icon
|
12
|
+
|
13
|
+
|
14
|
+
# Thumbnails
|
15
|
+
._*
|
16
|
+
|
17
|
+
# Files that might appear in the root of a volume
|
18
|
+
.DocumentRevisions-V100
|
19
|
+
.fseventsd
|
20
|
+
.Spotlight-V100
|
21
|
+
.TemporaryItems
|
22
|
+
.Trashes
|
23
|
+
.VolumeIcon.icns
|
24
|
+
.com.apple.timemachine.donotpresent
|
25
|
+
|
26
|
+
# Directories potentially created on remote AFP share
|
27
|
+
.AppleDB
|
28
|
+
.AppleDesktop
|
29
|
+
Network Trash Folder
|
30
|
+
Temporary Items
|
31
|
+
.apdisk
|
32
|
+
|
33
|
+
### macOS Patch ###
|
34
|
+
# iCloud generated files
|
35
|
+
*.icloud
|
36
|
+
|
37
|
+
### VisualStudioCode ###
|
38
|
+
.vscode/*
|
39
|
+
!.vscode/settings.json
|
40
|
+
!.vscode/tasks.json
|
41
|
+
!.vscode/launch.json
|
42
|
+
!.vscode/extensions.json
|
43
|
+
!.vscode/*.code-snippets
|
44
|
+
|
45
|
+
# Local History for Visual Studio Code
|
46
|
+
.history/
|
47
|
+
|
48
|
+
# Built Visual Studio Code Extensions
|
49
|
+
*.vsix
|
50
|
+
|
51
|
+
### VisualStudioCode Patch ###
|
52
|
+
# Ignore all local history of files
|
53
|
+
.history
|
54
|
+
.ionide
|
55
|
+
|
56
|
+
# End of https://www.toptal.com/developers/gitignore/api/macos,visualstudiocode
|
57
|
+
|
58
|
+
.databricks
|
59
|
+
*.pdf
|
60
|
+
__pycache__
|
61
|
+
.env
|
62
|
+
.venv
|
@@ -0,0 +1 @@
|
|
1
|
+
3.11
|
@@ -0,0 +1,105 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: demo_uc_setup
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: A reusable task-based framework for managing Unity Catalog in Databricks
|
5
|
+
Requires-Python: >=3.11
|
6
|
+
Requires-Dist: databricks-connect>=16.1.1
|
7
|
+
Requires-Dist: databricks-sdk>=0.44.1
|
8
|
+
Requires-Dist: pydantic-settings>=2.8.0
|
9
|
+
Requires-Dist: pydantic>=2.10.6
|
10
|
+
Description-Content-Type: text/markdown
|
11
|
+
|
12
|
+
# Databricks Unity Catalog Setup Demo
|
13
|
+
|
14
|
+
A Python package that demonstrates automated setup and teardown of Databricks Unity Catalog resources using the Databricks SDK. This package provides a reusable framework for managing Unity Catalog resources programmatically, both from local environments and within Databricks notebooks.
|
15
|
+
|
16
|
+
## Features
|
17
|
+
|
18
|
+
- Automated creation of Unity Catalog resources:
|
19
|
+
- Catalogs
|
20
|
+
- Schemas
|
21
|
+
- Volumes
|
22
|
+
- Configurable resource naming via environment variables
|
23
|
+
- Support for both local execution and Databricks notebook execution
|
24
|
+
- Type-safe configuration management using Pydantic
|
25
|
+
- Clean teardown functionality
|
26
|
+
|
27
|
+
## Prerequisites
|
28
|
+
|
29
|
+
- Python 3.8+
|
30
|
+
- A Databricks workspace with Unity Catalog enabled
|
31
|
+
- Appropriate permissions to create/manage Unity Catalog resources
|
32
|
+
|
33
|
+
## Installation
|
34
|
+
|
35
|
+
```bash
|
36
|
+
pip install demo-uc-setup
|
37
|
+
```
|
38
|
+
|
39
|
+
## Configuration
|
40
|
+
|
41
|
+
The package uses environment variables for configuration. You can set these either in your environment or in a `.env` file:
|
42
|
+
|
43
|
+
```env
|
44
|
+
# Required for local execution (optional in Databricks notebooks)
|
45
|
+
DATABRICKS_HOST=your-workspace-url
|
46
|
+
DATABRICKS_TOKEN=your-pat-token
|
47
|
+
|
48
|
+
# Optional - override default resource names
|
49
|
+
DEMO_CATALOG_NAME=custom_catalog_name
|
50
|
+
DEMO_SCHEMAS=["schema1", "schema2"]
|
51
|
+
DEMO_VOLUME_NAME=custom_volume_name
|
52
|
+
```
|
53
|
+
|
54
|
+
## Usage
|
55
|
+
|
56
|
+
### Local Execution
|
57
|
+
|
58
|
+
```python
|
59
|
+
from demo_uc_setup.unity_catalog_setup import UnityCatalogSetupTask
|
60
|
+
from demo_uc_setup.unity_catalog_teardown import UnityCatalogTeardownTask
|
61
|
+
|
62
|
+
# Setup Unity Catalog resources
|
63
|
+
UnityCatalogSetupTask.entrypoint()
|
64
|
+
|
65
|
+
# Teardown Unity Catalog resources
|
66
|
+
UnityCatalogTeardownTask.entrypoint()
|
67
|
+
```
|
68
|
+
|
69
|
+
### Databricks Notebook Execution
|
70
|
+
|
71
|
+
```python
|
72
|
+
%pip install demo-uc-setup
|
73
|
+
|
74
|
+
from demo_uc_setup.unity_catalog_setup import UnityCatalogSetupTask
|
75
|
+
UnityCatalogSetupTask.entrypoint()
|
76
|
+
```
|
77
|
+
|
78
|
+
## Default Resource Names
|
79
|
+
|
80
|
+
If not overridden via environment variables, the package will create:
|
81
|
+
- A catalog named `demo_catalog`
|
82
|
+
- Two schemas: `demo_schema_1` and `demo_schema_2`
|
83
|
+
- A volume named `demo_volume` in each schema
|
84
|
+
|
85
|
+
## Extending the Framework
|
86
|
+
|
87
|
+
The package provides a reusable `Task` base class that can be extended for custom Unity Catalog operations:
|
88
|
+
|
89
|
+
```python
|
90
|
+
from demo_uc_setup.common import Task
|
91
|
+
from demo_uc_setup.config import Config
|
92
|
+
|
93
|
+
class CustomTask(Task[Config]):
|
94
|
+
def run(self):
|
95
|
+
self.logger.info("Starting custom task...")
|
96
|
+
# Your custom logic here
|
97
|
+
```
|
98
|
+
|
99
|
+
## Contributing
|
100
|
+
|
101
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
102
|
+
|
103
|
+
## License
|
104
|
+
|
105
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# Databricks Unity Catalog Setup Demo
|
2
|
+
|
3
|
+
A Python package that demonstrates automated setup and teardown of Databricks Unity Catalog resources using the Databricks SDK. This package provides a reusable framework for managing Unity Catalog resources programmatically, both from local environments and within Databricks notebooks.
|
4
|
+
|
5
|
+
## Features
|
6
|
+
|
7
|
+
- Automated creation of Unity Catalog resources:
|
8
|
+
- Catalogs
|
9
|
+
- Schemas
|
10
|
+
- Volumes
|
11
|
+
- Configurable resource naming via environment variables
|
12
|
+
- Support for both local execution and Databricks notebook execution
|
13
|
+
- Type-safe configuration management using Pydantic
|
14
|
+
- Clean teardown functionality
|
15
|
+
|
16
|
+
## Prerequisites
|
17
|
+
|
18
|
+
- Python 3.8+
|
19
|
+
- A Databricks workspace with Unity Catalog enabled
|
20
|
+
- Appropriate permissions to create/manage Unity Catalog resources
|
21
|
+
|
22
|
+
## Installation
|
23
|
+
|
24
|
+
```bash
|
25
|
+
pip install demo-uc-setup
|
26
|
+
```
|
27
|
+
|
28
|
+
## Configuration
|
29
|
+
|
30
|
+
The package uses environment variables for configuration. You can set these either in your environment or in a `.env` file:
|
31
|
+
|
32
|
+
```env
|
33
|
+
# Required for local execution (optional in Databricks notebooks)
|
34
|
+
DATABRICKS_HOST=your-workspace-url
|
35
|
+
DATABRICKS_TOKEN=your-pat-token
|
36
|
+
|
37
|
+
# Optional - override default resource names
|
38
|
+
DEMO_CATALOG_NAME=custom_catalog_name
|
39
|
+
DEMO_SCHEMAS=["schema1", "schema2"]
|
40
|
+
DEMO_VOLUME_NAME=custom_volume_name
|
41
|
+
```
|
42
|
+
|
43
|
+
## Usage
|
44
|
+
|
45
|
+
### Local Execution
|
46
|
+
|
47
|
+
```python
|
48
|
+
from demo_uc_setup.unity_catalog_setup import UnityCatalogSetupTask
|
49
|
+
from demo_uc_setup.unity_catalog_teardown import UnityCatalogTeardownTask
|
50
|
+
|
51
|
+
# Setup Unity Catalog resources
|
52
|
+
UnityCatalogSetupTask.entrypoint()
|
53
|
+
|
54
|
+
# Teardown Unity Catalog resources
|
55
|
+
UnityCatalogTeardownTask.entrypoint()
|
56
|
+
```
|
57
|
+
|
58
|
+
### Databricks Notebook Execution
|
59
|
+
|
60
|
+
```python
|
61
|
+
%pip install demo-uc-setup
|
62
|
+
|
63
|
+
from demo_uc_setup.unity_catalog_setup import UnityCatalogSetupTask
|
64
|
+
UnityCatalogSetupTask.entrypoint()
|
65
|
+
```
|
66
|
+
|
67
|
+
## Default Resource Names
|
68
|
+
|
69
|
+
If not overridden via environment variables, the package will create:
|
70
|
+
- A catalog named `demo_catalog`
|
71
|
+
- Two schemas: `demo_schema_1` and `demo_schema_2`
|
72
|
+
- A volume named `demo_volume` in each schema
|
73
|
+
|
74
|
+
## Extending the Framework
|
75
|
+
|
76
|
+
The package provides a reusable `Task` base class that can be extended for custom Unity Catalog operations:
|
77
|
+
|
78
|
+
```python
|
79
|
+
from demo_uc_setup.common import Task
|
80
|
+
from demo_uc_setup.config import Config
|
81
|
+
|
82
|
+
class CustomTask(Task[Config]):
|
83
|
+
def run(self):
|
84
|
+
self.logger.info("Starting custom task...")
|
85
|
+
# Your custom logic here
|
86
|
+
```
|
87
|
+
|
88
|
+
## Contributing
|
89
|
+
|
90
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
91
|
+
|
92
|
+
## License
|
93
|
+
|
94
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# Pythonic Config Management Approach
|
2
|
+
Type variables are a key concept in generic programming, allowing for the creation of flexible and reusable code that can work with multiple data types while maintaining type safety. In the context of Python configuration management, type variables play a crucial role in implementing generic classes and methods, as demonstrated in the common.py file of the chatten project, where they are used to create a versatile Task class that can work with different configuration types.
|
3
|
+
|
4
|
+
## Pythonic Configuration with Pydantic
|
5
|
+
Pydantic's BaseSettings class forms the foundation of a flexible, type-safe configuration management system in the chatten project. This approach centralizes configuration parameters, leveraging Pydantic's automatic type validation and easy integration with environment variables and command-line arguments[1][2]. The configuration can be seamlessly imported into various tasks, as demonstrated in the loader.py and indexer.py files, while the databricks.yml file showcases parameterization for cloud deployment[3]. This method offers a balance between flexibility and type safety, particularly suitable for Python projects in cloud environments like Databricks, though it may not be ideal for scenarios requiring language-agnostic configurations.
|
6
|
+
|
7
|
+
Citations:
|
8
|
+
[1] https://docs.pydantic.dev/latest/api/pydantic_settings/
|
9
|
+
[2] https://docs.pydantic.dev/2.4/concepts/pydantic_settings/
|
10
|
+
[3] https://dzone.com/articles/order-in-chaos-python-configuration-management-for
|
11
|
+
|
12
|
+
## Task Class Structure Explained
|
13
|
+
The `Task` class in `common.py` serves as a foundation for various tasks within the `chatten_rag` package, reducing boilerplate code and providing a reusable structure. It utilizes a generic type `T`, bound to the `Config` class, allowing each task to have its own specific configuration while maintaining type safety. The class initializes common components such as SparkSession, logger, and Databricks WorkspaceClient[1]. A key feature is the dynamic creation of configuration instances in the `__init__` method, where `self.config: T = self.config_class()` instantiates the task-specific configuration[1]. The `entrypoint` class method offers a standardized way to execute tasks, handling logging, configuration setup, and Spark environment initialization[1].
|
14
|
+
|
15
|
+
Citations:
|
16
|
+
[1] https://github.com/renardeinside/chatten/blob/main/packages/chatten_rag/chatten_rag/common.py
|
17
|
+
|
18
|
+
## Understanding Type Variables in Python
|
19
|
+
Type variables serve as placeholders for specific types in generic programming, allowing for the creation of flexible and reusable code. They are typically denoted by single uppercase letters like T, U, or V, and can represent any non-primitive type, including class types, interface types, array types, or even other type variables[1][2]. In Python, type variables are defined using the TypeVar construct from the typing module, enabling the specification of generic types in type hints[3].
|
20
|
+
|
21
|
+
* Enhance code reusability by enabling functions or classes to work with multiple types
|
22
|
+
* Provide compile-time type checking, reducing runtime errors
|
23
|
+
* Eliminate the need for type casting, potentially improving performance
|
24
|
+
* Distinct from type parameters, which are formal declarations in class or method signatures[4]
|
25
|
+
|
26
|
+
Citations:
|
27
|
+
[1] https://stackoverflow.com/questions/42847287/what-is-type-variable-in-haskell-java
|
28
|
+
[2] https://docs.oracle.com/javase/tutorial/java/generics/types.html
|
29
|
+
[3] https://realpython.com/python-variables/
|
30
|
+
[4] https://stackoverflow.com/questions/7075363/definition-of-type-variable-and-parameter
|
31
|
+
|
32
|
+
## Example of Configuration Management in the Chatten Project
|
33
|
+
The chatten project demonstrates an efficient approach to configuration management using Pydantic's BaseSettings. In the `config.py` file, a `Config` class is defined that inherits from `BaseSettings`, allowing for easy configuration sharing across multiple workflows and applications[1]. This class includes various settings such as database configurations, model parameters, and API keys, all with type annotations for improved safety and clarity.
|
34
|
+
|
35
|
+
The configuration is then seamlessly integrated into task files like `loader.py` and `indexer.py`[1]. These tasks import the `Config` class and utilize its properties, demonstrating how easily the shared configuration can be referenced and used across different components of the project. This approach not only centralizes configuration management but also leverages Pydantic's built-in validation and environment variable integration, making it a flexible and maintainable solution for complex Python projects, particularly those deployed in cloud environments like Databricks.
|
36
|
+
|
37
|
+
Citations:
|
38
|
+
[1] https://gist.github.com/renardeinside
|
File without changes
|
@@ -0,0 +1,49 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import TypeVar, Generic
|
3
|
+
from databricks.sdk import WorkspaceClient
|
4
|
+
|
5
|
+
from demo_uc_setup.config import Config
|
6
|
+
|
7
|
+
# Example of a type variable bound to our Config class
|
8
|
+
T = TypeVar("T", bound=Config)
|
9
|
+
|
10
|
+
class Task(Generic[T]):
|
11
|
+
"""
|
12
|
+
A reusable Task base class that works both locally and in Databricks notebooks.
|
13
|
+
When running locally, requires databricks_host and databricks_token.
|
14
|
+
When running in a notebook, these parameters are optional.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def __init__(self, config_class: type[T]):
|
18
|
+
# Instantiate the typed configuration
|
19
|
+
self.config: T = config_class()
|
20
|
+
# Setup a basic logger
|
21
|
+
self.logger = logging.getLogger(self.__class__.__name__)
|
22
|
+
logging.basicConfig(level=logging.INFO)
|
23
|
+
|
24
|
+
# Create a Databricks workspace client with or without credentials
|
25
|
+
if self.config.databricks_host and self.config.databricks_token:
|
26
|
+
# Local execution with credentials
|
27
|
+
self.workspace_client = WorkspaceClient(
|
28
|
+
host=self.config.databricks_host,
|
29
|
+
token=self.config.databricks_token
|
30
|
+
)
|
31
|
+
else:
|
32
|
+
# Notebook execution - no credentials needed
|
33
|
+
self.workspace_client = WorkspaceClient()
|
34
|
+
|
35
|
+
@classmethod
|
36
|
+
def entrypoint(cls, *args, **kwargs):
|
37
|
+
"""
|
38
|
+
Creates an instance of the task and runs it. If you
|
39
|
+
want a consistent run pattern, place it here.
|
40
|
+
"""
|
41
|
+
instance = cls(*args, **kwargs)
|
42
|
+
instance.run()
|
43
|
+
|
44
|
+
def run(self):
|
45
|
+
"""
|
46
|
+
The main entrypoint for the task's execution.
|
47
|
+
Override this in subclasses to implement custom logic.
|
48
|
+
"""
|
49
|
+
self.logger.info("Base Task run method. Override in subclasses.")
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from pydantic_settings import BaseSettings
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
class Config(BaseSettings):
|
5
|
+
"""
|
6
|
+
Configuration class using Pydantic BaseSettings.
|
7
|
+
By default, each field can be overridden by environment
|
8
|
+
variables matching the field name (in uppercase).
|
9
|
+
For example, DATABRICKS_HOST, DATABRICKS_TOKEN, etc.
|
10
|
+
"""
|
11
|
+
|
12
|
+
# Databricks connection settings - optional for notebook execution
|
13
|
+
databricks_host: Optional[str] = None
|
14
|
+
databricks_token: Optional[str] = None
|
15
|
+
|
16
|
+
# Default names for Unity Catalog demo objects
|
17
|
+
demo_catalog_name: str = "demo_catalog"
|
18
|
+
demo_schemas: list[str] = ["demo_schema_1", "demo_schema_2"] # List of schemas
|
19
|
+
demo_volume_name: str = "demo_volume" # This could also be a list if needed
|
20
|
+
|
21
|
+
class Config:
|
22
|
+
env_file = ".env" # or any custom file, if desired
|
23
|
+
env_file_encoding = "utf-8"
|
@@ -0,0 +1,82 @@
|
|
1
|
+
from typing import Type
|
2
|
+
from databricks.sdk.service import catalog
|
3
|
+
from demo_uc_setup.common import Task, T
|
4
|
+
from demo_uc_setup.config import Config
|
5
|
+
|
6
|
+
class UnityCatalogSetupTask(Task[Config]):
|
7
|
+
"""
|
8
|
+
A task to ensure catalogs, schemas, and volumes exist
|
9
|
+
in the Databricks workspace. Uses typed config for
|
10
|
+
resource names, credentials, etc.
|
11
|
+
"""
|
12
|
+
|
13
|
+
def __init__(self, config_class: Type[T] = Config):
|
14
|
+
super().__init__(config_class)
|
15
|
+
|
16
|
+
def run(self):
|
17
|
+
self.logger.info("Starting Unity Catalog setup...")
|
18
|
+
|
19
|
+
# 1) Ensure the catalog exists
|
20
|
+
catalog_name = self.config.demo_catalog_name
|
21
|
+
self.logger.info(f"Ensuring catalog '{catalog_name}'")
|
22
|
+
try:
|
23
|
+
self.workspace_client.catalogs.get(name=catalog_name)
|
24
|
+
self.logger.info(f"Catalog '{catalog_name}' already exists.")
|
25
|
+
except Exception:
|
26
|
+
self.logger.info(f"Catalog '{catalog_name}' not found; creating it.")
|
27
|
+
self.workspace_client.catalogs.create(
|
28
|
+
name=catalog_name,
|
29
|
+
comment="Demo Catalog for Databricks demos"
|
30
|
+
)
|
31
|
+
|
32
|
+
# 2) Ensure all schemas exist and create volumes within each schema
|
33
|
+
for schema_name in self.config.demo_schemas:
|
34
|
+
# Create schema
|
35
|
+
self.logger.info(f"Ensuring schema '{catalog_name}.{schema_name}'")
|
36
|
+
try:
|
37
|
+
self.workspace_client.schemas.get(
|
38
|
+
name=schema_name,
|
39
|
+
catalog_name=catalog_name
|
40
|
+
)
|
41
|
+
self.logger.info(f"Schema '{catalog_name}.{schema_name}' already exists.")
|
42
|
+
except Exception:
|
43
|
+
try:
|
44
|
+
self.logger.info(f"Schema '{catalog_name}.{schema_name}' not found; creating it.")
|
45
|
+
self.workspace_client.schemas.create(
|
46
|
+
name=schema_name,
|
47
|
+
catalog_name=catalog_name,
|
48
|
+
comment=f"Demo Schema {schema_name} for Databricks demos"
|
49
|
+
)
|
50
|
+
except Exception as e:
|
51
|
+
if "already exists" in str(e):
|
52
|
+
self.logger.info(f"Schema '{catalog_name}.{schema_name}' already exists (caught during creation).")
|
53
|
+
else:
|
54
|
+
raise e
|
55
|
+
|
56
|
+
# Create volume within this schema
|
57
|
+
volume_name = self.config.demo_volume_name
|
58
|
+
self.logger.info(f"Ensuring volume '{catalog_name}.{schema_name}.{volume_name}'")
|
59
|
+
try:
|
60
|
+
self.workspace_client.volumes.get(
|
61
|
+
name=volume_name,
|
62
|
+
catalog_name=catalog_name,
|
63
|
+
schema_name=schema_name
|
64
|
+
)
|
65
|
+
self.logger.info(f"Volume '{catalog_name}.{schema_name}.{volume_name}' already exists.")
|
66
|
+
except Exception:
|
67
|
+
try:
|
68
|
+
self.logger.info(f"Volume '{catalog_name}.{schema_name}.{volume_name}' not found; creating it.")
|
69
|
+
self.workspace_client.volumes.create(
|
70
|
+
name=volume_name,
|
71
|
+
catalog_name=catalog_name,
|
72
|
+
schema_name=schema_name,
|
73
|
+
volume_type=catalog.VolumeType.MANAGED,
|
74
|
+
comment=f"Demo Volume for schema {schema_name}"
|
75
|
+
)
|
76
|
+
except Exception as e:
|
77
|
+
if "already exists" in str(e):
|
78
|
+
self.logger.info(f"Volume '{catalog_name}.{schema_name}.{volume_name}' already exists (caught during creation).")
|
79
|
+
else:
|
80
|
+
raise e
|
81
|
+
|
82
|
+
self.logger.info("Unity Catalog setup complete!")
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from typing import Type
|
2
|
+
from databricks.sdk import WorkspaceClient
|
3
|
+
from demo_uc_setup.common import Task, T
|
4
|
+
from demo_uc_setup.config import Config
|
5
|
+
|
6
|
+
class UnityCatalogTeardownTask(Task[Config]):
|
7
|
+
"""
|
8
|
+
A task to delete (teardown) the Unity Catalog resources in
|
9
|
+
the configured Databricks workspace. Uses typed config for
|
10
|
+
resource names, credentials, etc.
|
11
|
+
"""
|
12
|
+
|
13
|
+
def __init__(self, config_class: Type[T] = Config):
|
14
|
+
super().__init__(config_class)
|
15
|
+
|
16
|
+
def run(self):
|
17
|
+
self.logger.info("Starting teardown of Unity Catalog resources...")
|
18
|
+
|
19
|
+
catalog_name = self.config.demo_catalog_name
|
20
|
+
self.logger.info(f"Deleting catalog '{catalog_name}' and its dependencies (force=True).")
|
21
|
+
|
22
|
+
try:
|
23
|
+
self.workspace_client.catalogs.delete(name=catalog_name, force=True)
|
24
|
+
self.logger.info(f"Catalog '{catalog_name}' (and its contents) successfully deleted.")
|
25
|
+
except Exception as e:
|
26
|
+
self.logger.error(f"Failed to delete catalog '{catalog_name}'. Reason: {e}")
|
27
|
+
|
28
|
+
self.logger.info("Unity Catalog teardown complete!")
|
@@ -0,0 +1,31 @@
|
|
1
|
+
"""
|
2
|
+
Main entrypoint to run the Unity Catalog resource creation using
|
3
|
+
the Databricks Python SDK and the flexible Pydantic-based config.
|
4
|
+
|
5
|
+
Usage:
|
6
|
+
python main.py setup # Run setup
|
7
|
+
python main.py teardown # Run teardown
|
8
|
+
"""
|
9
|
+
|
10
|
+
import sys
|
11
|
+
from demo_uc_setup.unity_catalog_setup import UnityCatalogSetupTask
|
12
|
+
from demo_uc_setup.unity_catalog_teardown import UnityCatalogTeardownTask
|
13
|
+
|
14
|
+
def print_usage():
|
15
|
+
print("Usage: python main.py [setup|teardown]")
|
16
|
+
print(" setup - Run Unity Catalog setup")
|
17
|
+
print(" teardown - Run Unity Catalog teardown")
|
18
|
+
|
19
|
+
if __name__ == "__main__":
|
20
|
+
if len(sys.argv) != 2 or sys.argv[1] not in ["setup", "teardown"]:
|
21
|
+
print_usage()
|
22
|
+
sys.exit(1)
|
23
|
+
|
24
|
+
if sys.argv[1] == "setup":
|
25
|
+
UnityCatalogSetupTask.entrypoint()
|
26
|
+
else:
|
27
|
+
UnityCatalogTeardownTask.entrypoint()
|
28
|
+
|
29
|
+
# Option B) Or instantiate directly:
|
30
|
+
# task = UnityCatalogSetupTask()
|
31
|
+
# task.run()
|
@@ -0,0 +1,21 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["hatchling"]
|
3
|
+
build-backend = "hatchling.build"
|
4
|
+
|
5
|
+
[project]
|
6
|
+
name = "demo_uc_setup"
|
7
|
+
version = "0.1.0"
|
8
|
+
description = "A reusable task-based framework for managing Unity Catalog in Databricks"
|
9
|
+
readme = "README.md"
|
10
|
+
requires-python = ">=3.11"
|
11
|
+
dependencies = [
|
12
|
+
"databricks-connect>=16.1.1",
|
13
|
+
"databricks-sdk>=0.44.1",
|
14
|
+
"pydantic>=2.10.6",
|
15
|
+
"pydantic-settings>=2.8.0",
|
16
|
+
]
|
17
|
+
|
18
|
+
[dependency-groups]
|
19
|
+
dev = [
|
20
|
+
"hatch>=1.14.0",
|
21
|
+
]
|