pydataframer-databricks 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydataframer_databricks-0.1.2/PKG-INFO +46 -0
- pydataframer_databricks-0.1.2/README.md +31 -0
- {pydataframer_databricks-0.1.0 → pydataframer_databricks-0.1.2}/pydataframer_databricks/connectors.py +40 -42
- {pydataframer_databricks-0.1.0 → pydataframer_databricks-0.1.2}/pyproject.toml +1 -1
- {pydataframer_databricks-0.1.0 → pydataframer_databricks-0.1.2}/tests/test_connectors.py +3 -2
- pydataframer_databricks-0.1.0/PKG-INFO +0 -43
- pydataframer_databricks-0.1.0/README.md +0 -28
- {pydataframer_databricks-0.1.0 → pydataframer_databricks-0.1.2}/.github/workflows/python-publish.yml +0 -0
- {pydataframer_databricks-0.1.0 → pydataframer_databricks-0.1.2}/.gitignore +0 -0
- {pydataframer_databricks-0.1.0 → pydataframer_databricks-0.1.2}/pydataframer_databricks/__init__.py +0 -0
- {pydataframer_databricks-0.1.0 → pydataframer_databricks-0.1.2}/tests/__init__.py +0 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pydataframer-databricks
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Databricks connector for Dataframer
|
|
5
|
+
Author-email: Dataframer <info@dataframer.ai>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Requires-Dist: databricks-sdk>=0.81.0
|
|
9
|
+
Requires-Dist: databricks-sql-connector>=4.2.4
|
|
10
|
+
Requires-Dist: pandas>=2.0.0
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
|
|
13
|
+
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# pydataframer-databricks
|
|
17
|
+
|
|
18
|
+
Databricks connector for Dataframer
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install pydataframer-databricks
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from pydataframer_databricks import DatabricksConnector, DatasetType, FileType
|
|
30
|
+
|
|
31
|
+
databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
|
|
32
|
+
|
|
33
|
+
# Fetch sample data
|
|
34
|
+
df = databricks_connector.fetch_sample_data(
|
|
35
|
+
num_items_to_select=100,
|
|
36
|
+
table_name="catalog.schema.table"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Load generated data
|
|
40
|
+
databricks_connector.load_generated_data(
|
|
41
|
+
table_name="catalog.schema.table",
|
|
42
|
+
downloaded_zip=downloaded_zip,
|
|
43
|
+
dataset_type=DatasetType.SINGLE_FILE,
|
|
44
|
+
file_type=FileType.CSV
|
|
45
|
+
)
|
|
46
|
+
```
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# pydataframer-databricks
|
|
2
|
+
|
|
3
|
+
Databricks connector for Dataframer
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install pydataframer-databricks
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from pydataframer_databricks import DatabricksConnector, DatasetType, FileType
|
|
15
|
+
|
|
16
|
+
databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
|
|
17
|
+
|
|
18
|
+
# Fetch sample data
|
|
19
|
+
df = databricks_connector.fetch_sample_data(
|
|
20
|
+
num_items_to_select=100,
|
|
21
|
+
table_name="catalog.schema.table"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Load generated data
|
|
25
|
+
databricks_connector.load_generated_data(
|
|
26
|
+
table_name="catalog.schema.table",
|
|
27
|
+
downloaded_zip=downloaded_zip,
|
|
28
|
+
dataset_type=DatasetType.SINGLE_FILE,
|
|
29
|
+
file_type=FileType.CSV
|
|
30
|
+
)
|
|
31
|
+
```
|
|
@@ -33,7 +33,7 @@ class DatabricksConnector:
|
|
|
33
33
|
|
|
34
34
|
Examples
|
|
35
35
|
--------
|
|
36
|
-
>>> databricks_connector = DatabricksConnector(dbutils)
|
|
36
|
+
>>> databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
|
|
37
37
|
>>> df = databricks_connector.fetch_sample_data(
|
|
38
38
|
... num_items_to_select=25,
|
|
39
39
|
... table_name="samples.bakehouse.media_customer_reviews"
|
|
@@ -41,7 +41,7 @@ class DatabricksConnector:
|
|
|
41
41
|
>>> df.head()
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
def __init__(self, dbutils):
|
|
44
|
+
def __init__(self, dbutils, scope):
|
|
45
45
|
"""
|
|
46
46
|
Initialize the Databricks connector.
|
|
47
47
|
|
|
@@ -49,8 +49,11 @@ class DatabricksConnector:
|
|
|
49
49
|
----------
|
|
50
50
|
dbutils : DBUtils
|
|
51
51
|
The dbutils object from your Databricks notebook context.
|
|
52
|
+
scope : str
|
|
53
|
+
The Databricks secret scope name containing connection credentials.
|
|
52
54
|
"""
|
|
53
55
|
self.dbutils = dbutils
|
|
56
|
+
self.scope = scope
|
|
54
57
|
|
|
55
58
|
def get_connection(self):
|
|
56
59
|
"""
|
|
@@ -64,14 +67,14 @@ class DatabricksConnector:
|
|
|
64
67
|
from databricks import sql
|
|
65
68
|
from databricks.sdk.core import Config, oauth_service_principal
|
|
66
69
|
|
|
67
|
-
server_hostname = self.dbutils.secrets.get(
|
|
68
|
-
http_path = self.dbutils.secrets.get(
|
|
70
|
+
server_hostname = self.dbutils.secrets.get(self.scope, "DATABRICKS_SERVER_HOSTNAME")
|
|
71
|
+
http_path = self.dbutils.secrets.get(self.scope, "DATABRICKS_HTTP_PATH")
|
|
69
72
|
|
|
70
73
|
def credential_provider():
|
|
71
74
|
config = Config(
|
|
72
75
|
host=f"https://{server_hostname}",
|
|
73
|
-
client_id=self.dbutils.secrets.get(
|
|
74
|
-
client_secret=self.dbutils.secrets.get(
|
|
76
|
+
client_id=self.dbutils.secrets.get(self.scope, "DATABRICKS_CLIENT_ID"),
|
|
77
|
+
client_secret=self.dbutils.secrets.get(self.scope, "DATABRICKS_CLIENT_SECRET"),
|
|
75
78
|
)
|
|
76
79
|
return oauth_service_principal(config)
|
|
77
80
|
|
|
@@ -100,7 +103,7 @@ class DatabricksConnector:
|
|
|
100
103
|
|
|
101
104
|
Examples
|
|
102
105
|
--------
|
|
103
|
-
>>> databricks_connector = DatabricksConnector(dbutils)
|
|
106
|
+
>>> databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
|
|
104
107
|
>>> df = databricks_connector.fetch_sample_data(
|
|
105
108
|
... num_items_to_select=25,
|
|
106
109
|
... table_name="samples.bakehouse.media_customer_reviews"
|
|
@@ -146,7 +149,7 @@ class DatabricksConnector:
|
|
|
146
149
|
|
|
147
150
|
Examples
|
|
148
151
|
--------
|
|
149
|
-
>>> databricks_connector = DatabricksConnector(dbutils)
|
|
152
|
+
>>> databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
|
|
150
153
|
>>> with open("samples.zip", "rb") as f:
|
|
151
154
|
... databricks_connector.load_generated_data(
|
|
152
155
|
... table_name="my_catalog.my_schema.my_table",
|
|
@@ -200,42 +203,37 @@ class DatabricksConnector:
|
|
|
200
203
|
raise ValueError(f"Unsupported file_type: {file_type}. Supported: CSV, JSON, JSONL for SINGLE_FILE datasets")
|
|
201
204
|
|
|
202
205
|
with self.get_connection() as connection:
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
f"`{col}` STRING" for col in pandas_df.columns
|
|
207
|
-
)
|
|
208
|
-
|
|
209
|
-
try:
|
|
210
|
-
cursor.execute(f"""
|
|
211
|
-
CREATE OR REPLACE TABLE {table_name} (
|
|
212
|
-
{columns_sql}
|
|
213
|
-
)
|
|
214
|
-
""")
|
|
215
|
-
except Exception as e:
|
|
216
|
-
error_msg = f"Failed to create table `{table_name}`"
|
|
217
|
-
print(f"{error_msg}: {str(e)}")
|
|
218
|
-
print("Verify table name format (catalog.schema.table), permissions, and warehouse is running")
|
|
219
|
-
cursor.close()
|
|
220
|
-
raise RuntimeError(f"{error_msg}: {str(e)}") from e
|
|
221
|
-
|
|
222
|
-
insert_sql = f"""
|
|
223
|
-
INSERT INTO {table_name}
|
|
224
|
-
VALUES ({", ".join(["?"] * len(pandas_df.columns))})
|
|
225
|
-
"""
|
|
226
|
-
|
|
227
|
-
try:
|
|
228
|
-
cursor.executemany(
|
|
229
|
-
insert_sql,
|
|
230
|
-
pandas_df.values.tolist()
|
|
206
|
+
with connection.cursor() as cursor:
|
|
207
|
+
columns_sql = ", ".join(
|
|
208
|
+
f"`{col}` STRING" for col in pandas_df.columns
|
|
231
209
|
)
|
|
232
|
-
except Exception as e:
|
|
233
|
-
error_msg = f"Failed to insert data into table `{table_name}`"
|
|
234
|
-
print(f"{error_msg}: {str(e)} | Rows attempted: {len(pandas_df)}")
|
|
235
|
-
cursor.close()
|
|
236
|
-
raise RuntimeError(f"{error_msg}: {str(e)}") from e
|
|
237
210
|
|
|
238
|
-
|
|
211
|
+
try:
|
|
212
|
+
cursor.execute(f"""
|
|
213
|
+
CREATE OR REPLACE TABLE {table_name} (
|
|
214
|
+
{columns_sql}
|
|
215
|
+
)
|
|
216
|
+
""")
|
|
217
|
+
except Exception as e:
|
|
218
|
+
error_msg = f"Failed to create table `{table_name}`"
|
|
219
|
+
print(f"{error_msg}: {str(e)}")
|
|
220
|
+
print("Verify table name format (catalog.schema.table), permissions, and warehouse is running")
|
|
221
|
+
raise RuntimeError(f"{error_msg}: {str(e)}") from e
|
|
222
|
+
|
|
223
|
+
insert_sql = f"""
|
|
224
|
+
INSERT INTO {table_name}
|
|
225
|
+
VALUES ({", ".join(["?"] * len(pandas_df.columns))})
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
cursor.executemany(
|
|
230
|
+
insert_sql,
|
|
231
|
+
pandas_df.values.tolist()
|
|
232
|
+
)
|
|
233
|
+
except Exception as e:
|
|
234
|
+
error_msg = f"Failed to insert data into table `{table_name}`"
|
|
235
|
+
print(f"{error_msg}: {str(e)} | Rows attempted: {len(pandas_df)}")
|
|
236
|
+
raise RuntimeError(f"{error_msg}: {str(e)}") from e
|
|
239
237
|
|
|
240
238
|
print(f"✅ Table `{table_name}` saved successfully using Databricks SQL")
|
|
241
239
|
|
|
@@ -26,13 +26,14 @@ class TestDatabricksConnector:
|
|
|
26
26
|
def connector(self, mock_dbutils):
|
|
27
27
|
"""Create a DatabricksConnector instance with mocked dbutils"""
|
|
28
28
|
from pydataframer_databricks import DatabricksConnector
|
|
29
|
-
return DatabricksConnector(mock_dbutils)
|
|
29
|
+
return DatabricksConnector(mock_dbutils, scope="dataframer")
|
|
30
30
|
|
|
31
31
|
def test_init(self, mock_dbutils):
|
|
32
32
|
"""Test connector initialization"""
|
|
33
33
|
from pydataframer_databricks import DatabricksConnector
|
|
34
|
-
connector = DatabricksConnector(mock_dbutils)
|
|
34
|
+
connector = DatabricksConnector(mock_dbutils, scope="dataframer")
|
|
35
35
|
assert connector.dbutils == mock_dbutils
|
|
36
|
+
assert connector.scope == "dataframer"
|
|
36
37
|
|
|
37
38
|
def test_get_connection(self, connector, mock_dbutils):
|
|
38
39
|
"""Test get_connection establishes connection with correct parameters"""
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: pydataframer-databricks
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: Databricks connector for Dataframer
|
|
5
|
-
Author-email: Dataframer <info@dataframer.ai>
|
|
6
|
-
License: MIT
|
|
7
|
-
Requires-Python: >=3.9
|
|
8
|
-
Requires-Dist: databricks-sdk>=0.81.0
|
|
9
|
-
Requires-Dist: databricks-sql-connector>=4.2.4
|
|
10
|
-
Requires-Dist: pandas>=2.0.0
|
|
11
|
-
Provides-Extra: dev
|
|
12
|
-
Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
|
|
13
|
-
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
|
14
|
-
Description-Content-Type: text/markdown
|
|
15
|
-
|
|
16
|
-
# pydataframer-databricks
|
|
17
|
-
|
|
18
|
-
Databricks connector package for Dataframer, providing seamless integration with Databricks SQL and data operations.
|
|
19
|
-
|
|
20
|
-
## Installation
|
|
21
|
-
|
|
22
|
-
```bash
|
|
23
|
-
pip install pydataframer-databricks
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
## Building
|
|
27
|
-
|
|
28
|
-
Requires [uv](https://docs.astral.sh/uv/) installed in your environment.
|
|
29
|
-
|
|
30
|
-
```bash
|
|
31
|
-
uv build
|
|
32
|
-
```
|
|
33
|
-
|
|
34
|
-
## Development
|
|
35
|
-
|
|
36
|
-
```bash
|
|
37
|
-
# Install with dev dependencies
|
|
38
|
-
uv pip install -e ".[dev]"
|
|
39
|
-
|
|
40
|
-
# Run tests
|
|
41
|
-
pytest
|
|
42
|
-
```
|
|
43
|
-
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
# pydataframer-databricks
|
|
2
|
-
|
|
3
|
-
Databricks connector package for Dataframer, providing seamless integration with Databricks SQL and data operations.
|
|
4
|
-
|
|
5
|
-
## Installation
|
|
6
|
-
|
|
7
|
-
```bash
|
|
8
|
-
pip install pydataframer-databricks
|
|
9
|
-
```
|
|
10
|
-
|
|
11
|
-
## Building
|
|
12
|
-
|
|
13
|
-
Requires [uv](https://docs.astral.sh/uv/) installed in your environment.
|
|
14
|
-
|
|
15
|
-
```bash
|
|
16
|
-
uv build
|
|
17
|
-
```
|
|
18
|
-
|
|
19
|
-
## Development
|
|
20
|
-
|
|
21
|
-
```bash
|
|
22
|
-
# Install with dev dependencies
|
|
23
|
-
uv pip install -e ".[dev]"
|
|
24
|
-
|
|
25
|
-
# Run tests
|
|
26
|
-
pytest
|
|
27
|
-
```
|
|
28
|
-
|
{pydataframer_databricks-0.1.0 → pydataframer_databricks-0.1.2}/.github/workflows/python-publish.yml
RENAMED
|
File without changes
|
|
File without changes
|
{pydataframer_databricks-0.1.0 → pydataframer_databricks-0.1.2}/pydataframer_databricks/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|