pydataframer-databricks 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,7 +33,7 @@ class DatabricksConnector:
33
33
 
34
34
  Examples
35
35
  --------
36
- >>> databricks_connector = DatabricksConnector(dbutils)
36
+ >>> databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
37
37
  >>> df = databricks_connector.fetch_sample_data(
38
38
  ... num_items_to_select=25,
39
39
  ... table_name="samples.bakehouse.media_customer_reviews"
@@ -41,7 +41,7 @@ class DatabricksConnector:
41
41
  >>> df.head()
42
42
  """
43
43
 
44
- def __init__(self, dbutils):
44
+ def __init__(self, dbutils, scope):
45
45
  """
46
46
  Initialize the Databricks connector.
47
47
 
@@ -49,8 +49,11 @@ class DatabricksConnector:
49
49
  ----------
50
50
  dbutils : DBUtils
51
51
  The dbutils object from your Databricks notebook context.
52
+ scope : str
53
+ The Databricks secret scope name containing connection credentials.
52
54
  """
53
55
  self.dbutils = dbutils
56
+ self.scope = scope
54
57
 
55
58
  def get_connection(self):
56
59
  """
@@ -64,14 +67,14 @@ class DatabricksConnector:
64
67
  from databricks import sql
65
68
  from databricks.sdk.core import Config, oauth_service_principal
66
69
 
67
- server_hostname = self.dbutils.secrets.get("dataframer", "DATABRICKS_SERVER_HOSTNAME")
68
- http_path = self.dbutils.secrets.get("dataframer", "DATABRICKS_HTTP_PATH")
70
+ server_hostname = self.dbutils.secrets.get(self.scope, "DATABRICKS_SERVER_HOSTNAME")
71
+ http_path = self.dbutils.secrets.get(self.scope, "DATABRICKS_HTTP_PATH")
69
72
 
70
73
  def credential_provider():
71
74
  config = Config(
72
75
  host=f"https://{server_hostname}",
73
- client_id=self.dbutils.secrets.get("dataframer", "DATABRICKS_CLIENT_ID"),
74
- client_secret=self.dbutils.secrets.get("dataframer", "DATABRICKS_CLIENT_SECRET"),
76
+ client_id=self.dbutils.secrets.get(self.scope, "DATABRICKS_CLIENT_ID"),
77
+ client_secret=self.dbutils.secrets.get(self.scope, "DATABRICKS_CLIENT_SECRET"),
75
78
  )
76
79
  return oauth_service_principal(config)
77
80
 
@@ -100,7 +103,7 @@ class DatabricksConnector:
100
103
 
101
104
  Examples
102
105
  --------
103
- >>> databricks_connector = DatabricksConnector(dbutils)
106
+ >>> databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
104
107
  >>> df = databricks_connector.fetch_sample_data(
105
108
  ... num_items_to_select=25,
106
109
  ... table_name="samples.bakehouse.media_customer_reviews"
@@ -146,7 +149,7 @@ class DatabricksConnector:
146
149
 
147
150
  Examples
148
151
  --------
149
- >>> databricks_connector = DatabricksConnector(dbutils)
152
+ >>> databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
150
153
  >>> with open("samples.zip", "rb") as f:
151
154
  ... databricks_connector.load_generated_data(
152
155
  ... table_name="my_catalog.my_schema.my_table",
@@ -200,42 +203,37 @@ class DatabricksConnector:
200
203
  raise ValueError(f"Unsupported file_type: {file_type}. Supported: CSV, JSON, JSONL for SINGLE_FILE datasets")
201
204
 
202
205
  with self.get_connection() as connection:
203
- cursor = connection.cursor()
204
-
205
- columns_sql = ", ".join(
206
- f"`{col}` STRING" for col in pandas_df.columns
207
- )
208
-
209
- try:
210
- cursor.execute(f"""
211
- CREATE OR REPLACE TABLE {table_name} (
212
- {columns_sql}
213
- )
214
- """)
215
- except Exception as e:
216
- error_msg = f"Failed to create table `{table_name}`"
217
- print(f"{error_msg}: {str(e)}")
218
- print("Verify table name format (catalog.schema.table), permissions, and warehouse is running")
219
- cursor.close()
220
- raise RuntimeError(f"{error_msg}: {str(e)}") from e
221
-
222
- insert_sql = f"""
223
- INSERT INTO {table_name}
224
- VALUES ({", ".join(["?"] * len(pandas_df.columns))})
225
- """
226
-
227
- try:
228
- cursor.executemany(
229
- insert_sql,
230
- pandas_df.values.tolist()
206
+ with connection.cursor() as cursor:
207
+ columns_sql = ", ".join(
208
+ f"`{col}` STRING" for col in pandas_df.columns
231
209
  )
232
- except Exception as e:
233
- error_msg = f"Failed to insert data into table `{table_name}`"
234
- print(f"{error_msg}: {str(e)} | Rows attempted: {len(pandas_df)}")
235
- cursor.close()
236
- raise RuntimeError(f"{error_msg}: {str(e)}") from e
237
210
 
238
- cursor.close()
211
+ try:
212
+ cursor.execute(f"""
213
+ CREATE OR REPLACE TABLE {table_name} (
214
+ {columns_sql}
215
+ )
216
+ """)
217
+ except Exception as e:
218
+ error_msg = f"Failed to create table `{table_name}`"
219
+ print(f"{error_msg}: {str(e)}")
220
+ print("Verify table name format (catalog.schema.table), permissions, and warehouse is running")
221
+ raise RuntimeError(f"{error_msg}: {str(e)}") from e
222
+
223
+ insert_sql = f"""
224
+ INSERT INTO {table_name}
225
+ VALUES ({", ".join(["?"] * len(pandas_df.columns))})
226
+ """
227
+
228
+ try:
229
+ cursor.executemany(
230
+ insert_sql,
231
+ pandas_df.values.tolist()
232
+ )
233
+ except Exception as e:
234
+ error_msg = f"Failed to insert data into table `{table_name}`"
235
+ print(f"{error_msg}: {str(e)} | Rows attempted: {len(pandas_df)}")
236
+ raise RuntimeError(f"{error_msg}: {str(e)}") from e
239
237
 
240
238
  print(f"✅ Table `{table_name}` saved successfully using Databricks SQL")
241
239
 
@@ -0,0 +1,46 @@
1
+ Metadata-Version: 2.4
2
+ Name: pydataframer-databricks
3
+ Version: 0.1.2
4
+ Summary: Databricks connector for Dataframer
5
+ Author-email: Dataframer <info@dataframer.ai>
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Requires-Dist: databricks-sdk>=0.81.0
9
+ Requires-Dist: databricks-sql-connector>=4.2.4
10
+ Requires-Dist: pandas>=2.0.0
11
+ Provides-Extra: dev
12
+ Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
13
+ Requires-Dist: pytest>=7.4.0; extra == 'dev'
14
+ Description-Content-Type: text/markdown
15
+
16
+ # pydataframer-databricks
17
+
18
+ Databricks connector for Dataframer
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install pydataframer-databricks
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ```python
29
+ from pydataframer_databricks import DatabricksConnector, DatasetType, FileType
30
+
31
+ databricks_connector = DatabricksConnector(dbutils, scope="dataframer")
32
+
33
+ # Fetch sample data
34
+ df = databricks_connector.fetch_sample_data(
35
+ num_items_to_select=100,
36
+ table_name="catalog.schema.table"
37
+ )
38
+
39
+ # Load generated data
40
+ databricks_connector.load_generated_data(
41
+ table_name="catalog.schema.table",
42
+ downloaded_zip=downloaded_zip,
43
+ dataset_type=DatasetType.SINGLE_FILE,
44
+ file_type=FileType.CSV
45
+ )
46
+ ```
@@ -0,0 +1,5 @@
1
+ pydataframer_databricks/__init__.py,sha256=piRrFtKpGGc2ctFDnMNblp5Whp6froRKXNeYkHnrw_o,214
2
+ pydataframer_databricks/connectors.py,sha256=ZzRBuv-RGJeKoGaAtUTov_52yq2Tq_YKfOs5vFSQ9Zw,9389
3
+ pydataframer_databricks-0.1.2.dist-info/METADATA,sha256=VN0LL1dH5UL0f9eWJnf15gSA-kvyVoXemJGkgbRa6BI,1113
4
+ pydataframer_databricks-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
5
+ pydataframer_databricks-0.1.2.dist-info/RECORD,,
@@ -1,43 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: pydataframer-databricks
3
- Version: 0.1.0
4
- Summary: Databricks connector for Dataframer
5
- Author-email: Dataframer <info@dataframer.ai>
6
- License: MIT
7
- Requires-Python: >=3.9
8
- Requires-Dist: databricks-sdk>=0.81.0
9
- Requires-Dist: databricks-sql-connector>=4.2.4
10
- Requires-Dist: pandas>=2.0.0
11
- Provides-Extra: dev
12
- Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
13
- Requires-Dist: pytest>=7.4.0; extra == 'dev'
14
- Description-Content-Type: text/markdown
15
-
16
- # pydataframer-databricks
17
-
18
- Databricks connector package for Dataframer, providing seamless integration with Databricks SQL and data operations.
19
-
20
- ## Installation
21
-
22
- ```bash
23
- pip install pydataframer-databricks
24
- ```
25
-
26
- ## Building
27
-
28
- Requires [uv](https://docs.astral.sh/uv/) installed in your environment.
29
-
30
- ```bash
31
- uv build
32
- ```
33
-
34
- ## Development
35
-
36
- ```bash
37
- # Install with dev dependencies
38
- uv pip install -e ".[dev]"
39
-
40
- # Run tests
41
- pytest
42
- ```
43
-
@@ -1,5 +0,0 @@
1
- pydataframer_databricks/__init__.py,sha256=piRrFtKpGGc2ctFDnMNblp5Whp6froRKXNeYkHnrw_o,214
2
- pydataframer_databricks/connectors.py,sha256=E4RlU30ADp0V27tuHWOai-7CM1YvmTInS_YonUpWMds,9191
3
- pydataframer_databricks-0.1.0.dist-info/METADATA,sha256=L-5a9ThsJYq_CTeAI22Zlo269NoXH3jRtyFOW_dPxAQ,891
4
- pydataframer_databricks-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
5
- pydataframer_databricks-0.1.0.dist-info/RECORD,,