pydataframer-databricks 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ Metadata-Version: 2.4
2
+ Name: pydataframer-databricks
3
+ Version: 0.1.2
4
+ Summary: Databricks connector for Dataframer
5
+ Author-email: Dataframer <info@dataframer.ai>
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Requires-Dist: databricks-sdk>=0.81.0
9
+ Requires-Dist: databricks-sql-connector>=4.2.4
10
+ Requires-Dist: pandas>=2.0.0
11
+ Provides-Extra: dev
12
+ Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
13
+ Requires-Dist: pytest>=7.4.0; extra == 'dev'
14
+ Description-Content-Type: text/markdown
15
+
16
+ # pydataframer-databricks
17
+
18
+ Databricks connector for Dataframer
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install pydataframer-databricks
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ```python
29
+ from pydataframer_databricks import DatabricksConnector, DatasetType, FileType
30
+
31
+ databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
32
+
33
+ # Fetch sample data
34
+ df = databricks_connector.fetch_sample_data(
35
+ num_items_to_select=100,
36
+ table_name="catalog.schema.table"
37
+ )
38
+
39
+ # Load generated data
40
+ databricks_connector.load_generated_data(
41
+ table_name="catalog.schema.table",
42
+ downloaded_zip=downloaded_zip,
43
+ dataset_type=DatasetType.SINGLE_FILE,
44
+ file_type=FileType.CSV
45
+ )
46
+ ```
@@ -0,0 +1,31 @@
1
+ # pydataframer-databricks
2
+
3
+ Databricks connector for Dataframer
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install pydataframer-databricks
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```python
14
+ from pydataframer_databricks import DatabricksConnector, DatasetType, FileType
15
+
16
+ databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
17
+
18
+ # Fetch sample data
19
+ df = databricks_connector.fetch_sample_data(
20
+ num_items_to_select=100,
21
+ table_name="catalog.schema.table"
22
+ )
23
+
24
+ # Load generated data
25
+ databricks_connector.load_generated_data(
26
+ table_name="catalog.schema.table",
27
+ downloaded_zip=downloaded_zip,
28
+ dataset_type=DatasetType.SINGLE_FILE,
29
+ file_type=FileType.CSV
30
+ )
31
+ ```
@@ -33,7 +33,7 @@ class DatabricksConnector:
33
33
 
34
34
  Examples
35
35
  --------
36
- >>> databricks_connector = DatabricksConnector(dbutils)
36
+ >>> databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
37
37
  >>> df = databricks_connector.fetch_sample_data(
38
38
  ... num_items_to_select=25,
39
39
  ... table_name="samples.bakehouse.media_customer_reviews"
@@ -41,7 +41,7 @@ class DatabricksConnector:
41
41
  >>> df.head()
42
42
  """
43
43
 
44
- def __init__(self, dbutils):
44
+ def __init__(self, dbutils, scope):
45
45
  """
46
46
  Initialize the Databricks connector.
47
47
 
@@ -49,8 +49,11 @@ class DatabricksConnector:
49
49
  ----------
50
50
  dbutils : DBUtils
51
51
  The dbutils object from your Databricks notebook context.
52
+ scope : str
53
+ The Databricks secret scope name containing connection credentials.
52
54
  """
53
55
  self.dbutils = dbutils
56
+ self.scope = scope
54
57
 
55
58
  def get_connection(self):
56
59
  """
@@ -64,14 +67,14 @@ class DatabricksConnector:
64
67
  from databricks import sql
65
68
  from databricks.sdk.core import Config, oauth_service_principal
66
69
 
67
- server_hostname = self.dbutils.secrets.get("dataframer", "DATABRICKS_SERVER_HOSTNAME")
68
- http_path = self.dbutils.secrets.get("dataframer", "DATABRICKS_HTTP_PATH")
70
+ server_hostname = self.dbutils.secrets.get(self.scope, "DATABRICKS_SERVER_HOSTNAME")
71
+ http_path = self.dbutils.secrets.get(self.scope, "DATABRICKS_HTTP_PATH")
69
72
 
70
73
  def credential_provider():
71
74
  config = Config(
72
75
  host=f"https://{server_hostname}",
73
- client_id=self.dbutils.secrets.get("dataframer", "DATABRICKS_CLIENT_ID"),
74
- client_secret=self.dbutils.secrets.get("dataframer", "DATABRICKS_CLIENT_SECRET"),
76
+ client_id=self.dbutils.secrets.get(self.scope, "DATABRICKS_CLIENT_ID"),
77
+ client_secret=self.dbutils.secrets.get(self.scope, "DATABRICKS_CLIENT_SECRET"),
75
78
  )
76
79
  return oauth_service_principal(config)
77
80
 
@@ -100,7 +103,7 @@ class DatabricksConnector:
100
103
 
101
104
  Examples
102
105
  --------
103
- >>> databricks_connector = DatabricksConnector(dbutils)
106
+ >>> databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
104
107
  >>> df = databricks_connector.fetch_sample_data(
105
108
  ... num_items_to_select=25,
106
109
  ... table_name="samples.bakehouse.media_customer_reviews"
@@ -146,7 +149,7 @@ class DatabricksConnector:
146
149
 
147
150
  Examples
148
151
  --------
149
- >>> databricks_connector = DatabricksConnector(dbutils)
152
+ >>> databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
150
153
  >>> with open("samples.zip", "rb") as f:
151
154
  ... databricks_connector.load_generated_data(
152
155
  ... table_name="my_catalog.my_schema.my_table",
@@ -200,42 +203,37 @@ class DatabricksConnector:
200
203
  raise ValueError(f"Unsupported file_type: {file_type}. Supported: CSV, JSON, JSONL for SINGLE_FILE datasets")
201
204
 
202
205
  with self.get_connection() as connection:
203
- cursor = connection.cursor()
204
-
205
- columns_sql = ", ".join(
206
- f"`{col}` STRING" for col in pandas_df.columns
207
- )
208
-
209
- try:
210
- cursor.execute(f"""
211
- CREATE OR REPLACE TABLE {table_name} (
212
- {columns_sql}
213
- )
214
- """)
215
- except Exception as e:
216
- error_msg = f"Failed to create table `{table_name}`"
217
- print(f"{error_msg}: {str(e)}")
218
- print("Verify table name format (catalog.schema.table), permissions, and warehouse is running")
219
- cursor.close()
220
- raise RuntimeError(f"{error_msg}: {str(e)}") from e
221
-
222
- insert_sql = f"""
223
- INSERT INTO {table_name}
224
- VALUES ({", ".join(["?"] * len(pandas_df.columns))})
225
- """
226
-
227
- try:
228
- cursor.executemany(
229
- insert_sql,
230
- pandas_df.values.tolist()
206
+ with connection.cursor() as cursor:
207
+ columns_sql = ", ".join(
208
+ f"`{col}` STRING" for col in pandas_df.columns
231
209
  )
232
- except Exception as e:
233
- error_msg = f"Failed to insert data into table `{table_name}`"
234
- print(f"{error_msg}: {str(e)} | Rows attempted: {len(pandas_df)}")
235
- cursor.close()
236
- raise RuntimeError(f"{error_msg}: {str(e)}") from e
237
210
 
238
- cursor.close()
211
+ try:
212
+ cursor.execute(f"""
213
+ CREATE OR REPLACE TABLE {table_name} (
214
+ {columns_sql}
215
+ )
216
+ """)
217
+ except Exception as e:
218
+ error_msg = f"Failed to create table `{table_name}`"
219
+ print(f"{error_msg}: {str(e)}")
220
+ print("Verify table name format (catalog.schema.table), permissions, and warehouse is running")
221
+ raise RuntimeError(f"{error_msg}: {str(e)}") from e
222
+
223
+ insert_sql = f"""
224
+ INSERT INTO {table_name}
225
+ VALUES ({", ".join(["?"] * len(pandas_df.columns))})
226
+ """
227
+
228
+ try:
229
+ cursor.executemany(
230
+ insert_sql,
231
+ pandas_df.values.tolist()
232
+ )
233
+ except Exception as e:
234
+ error_msg = f"Failed to insert data into table `{table_name}`"
235
+ print(f"{error_msg}: {str(e)} | Rows attempted: {len(pandas_df)}")
236
+ raise RuntimeError(f"{error_msg}: {str(e)}") from e
239
237
 
240
238
  print(f"✅ Table `{table_name}` saved successfully using Databricks SQL")
241
239
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pydataframer-databricks"
3
- version = "0.1.0"
3
+ version = "0.1.2"
4
4
  description = "Databricks connector for Dataframer"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.9"
@@ -26,13 +26,14 @@ class TestDatabricksConnector:
26
26
  def connector(self, mock_dbutils):
27
27
  """Create a DatabricksConnector instance with mocked dbutils"""
28
28
  from pydataframer_databricks import DatabricksConnector
29
- return DatabricksConnector(mock_dbutils)
29
+ return DatabricksConnector(mock_dbutils, scope="dataframer")
30
30
 
31
31
  def test_init(self, mock_dbutils):
32
32
  """Test connector initialization"""
33
33
  from pydataframer_databricks import DatabricksConnector
34
- connector = DatabricksConnector(mock_dbutils)
34
+ connector = DatabricksConnector(mock_dbutils, scope="dataframer")
35
35
  assert connector.dbutils == mock_dbutils
36
+ assert connector.scope == "dataframer"
36
37
 
37
38
  def test_get_connection(self, connector, mock_dbutils):
38
39
  """Test get_connection establishes connection with correct parameters"""
@@ -1,43 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: pydataframer-databricks
3
- Version: 0.1.0
4
- Summary: Databricks connector for Dataframer
5
- Author-email: Dataframer <info@dataframer.ai>
6
- License: MIT
7
- Requires-Python: >=3.9
8
- Requires-Dist: databricks-sdk>=0.81.0
9
- Requires-Dist: databricks-sql-connector>=4.2.4
10
- Requires-Dist: pandas>=2.0.0
11
- Provides-Extra: dev
12
- Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
13
- Requires-Dist: pytest>=7.4.0; extra == 'dev'
14
- Description-Content-Type: text/markdown
15
-
16
- # pydataframer-databricks
17
-
18
- Databricks connector package for Dataframer, providing seamless integration with Databricks SQL and data operations.
19
-
20
- ## Installation
21
-
22
- ```bash
23
- pip install pydataframer-databricks
24
- ```
25
-
26
- ## Building
27
-
28
- Requires [uv](https://docs.astral.sh/uv/) installed in your environment.
29
-
30
- ```bash
31
- uv build
32
- ```
33
-
34
- ## Development
35
-
36
- ```bash
37
- # Install with dev dependencies
38
- uv pip install -e ".[dev]"
39
-
40
- # Run tests
41
- pytest
42
- ```
43
-
@@ -1,28 +0,0 @@
1
- # pydataframer-databricks
2
-
3
- Databricks connector package for Dataframer, providing seamless integration with Databricks SQL and data operations.
4
-
5
- ## Installation
6
-
7
- ```bash
8
- pip install pydataframer-databricks
9
- ```
10
-
11
- ## Building
12
-
13
- Requires [uv](https://docs.astral.sh/uv/) installed in your environment.
14
-
15
- ```bash
16
- uv build
17
- ```
18
-
19
- ## Development
20
-
21
- ```bash
22
- # Install with dev dependencies
23
- uv pip install -e ".[dev]"
24
-
25
- # Run tests
26
- pytest
27
- ```
28
-