acryl-datahub 1.2.0.11rc5__py3-none-any.whl → 1.3.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.11rc5.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.3.0rc5.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=Uoww4PlvFNDBfp6ZJQjTb1JZU6f9boAYIIUAiM-zWns,324
4
+ datahub/_version.py,sha256=tQwOz8j8lgLnVKUe2eNZLleYAQzj2kRWOmo8yGoOMOk,321
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -334,7 +334,7 @@ datahub/ingestion/source/excel/report.py,sha256=oEkeI8J6is7zB9iz4RqASu_-Q5xl36lA
334
334
  datahub/ingestion/source/excel/source.py,sha256=w_vOz4UD7BcXBBDKoo81_6-QFeOPITuXqkfjIMHCQj4,23827
335
335
  datahub/ingestion/source/excel/util.py,sha256=YYmadYuCiT-4_MfQM0YSE7wuDcE0k8o2KrlOKM9Z6eI,406
336
336
  datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
337
- datahub/ingestion/source/fivetran/config.py,sha256=vNmnQM3oekr2dOLPria-wjCLmp27bcYypIfoA6xx5k8,10290
337
+ datahub/ingestion/source/fivetran/config.py,sha256=6yriUMtTPMZUHqbZ9gzyFduPVt6CxzirdYSg4k-ziYI,10285
338
338
  datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
339
339
  datahub/ingestion/source/fivetran/fivetran.py,sha256=Up5wbLk7hBk9b0pqcHwW6b0H52UJj90cmLhn0QJeZ4g,14416
340
340
  datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=-ibtfgxFv08P5_X5PVqV4CocxAjRWmY858esQL5OaAQ,13697
@@ -568,7 +568,8 @@ datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=wsVD0SkGUwb-H9
568
568
  datahub/ingestion/source/tableau/tableau_validation.py,sha256=Hjbfc1AMIkGgzo5ffWXtNRjrxSxzHvw7-dYZDt4d3WE,1819
569
569
  datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
570
570
  datahub/ingestion/source/unity/analyze_profiler.py,sha256=2pqkFY30CfN4aHgFZZntjeG0hNhBytZJvXC13VfTc1I,4689
571
- datahub/ingestion/source/unity/config.py,sha256=A5lkm-koBDOnBSSCTzOvYlsSIxT-xbK3NcJMS6xJMaQ,20914
571
+ datahub/ingestion/source/unity/config.py,sha256=lHvr-PGVcZ0P_2e0RuwmfSRlQRJ81astx4hQZkNrX_k,18713
572
+ datahub/ingestion/source/unity/connection.py,sha256=iCsQhZ1vxzv1qQKTl_sFUZdmBLLIrNdu2X2V8hT7IGI,2441
572
573
  datahub/ingestion/source/unity/connection_test.py,sha256=Dwpz4AIc6ZDwq6pWmRCSCuDUgNjPP_bVAVJumgAAS4w,2661
573
574
  datahub/ingestion/source/unity/ge_profiler.py,sha256=NBRHZceq-f95iUn7u0h7cgcd9nAc48Aa-lmp_BqE0As,8409
574
575
  datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se2D7wmdYRY_afUG4QlVmQu6xaw,15351
@@ -1125,8 +1126,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1125
1126
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1126
1127
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1127
1128
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1128
- acryl_datahub-1.2.0.11rc5.dist-info/METADATA,sha256=Yb9MOwBUZG-CqJKSTyxscoMetKu6ZLIz6SNrrWEzmaI,184341
1129
- acryl_datahub-1.2.0.11rc5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1130
- acryl_datahub-1.2.0.11rc5.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1131
- acryl_datahub-1.2.0.11rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1132
- acryl_datahub-1.2.0.11rc5.dist-info/RECORD,,
1129
+ acryl_datahub-1.3.0rc5.dist-info/METADATA,sha256=k7aciCTdmq9GKnJBJOrO9nQZt5Os9gcongq5sM9F330,184426
1130
+ acryl_datahub-1.3.0rc5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1131
+ acryl_datahub-1.3.0rc5.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1132
+ acryl_datahub-1.3.0rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1133
+ acryl_datahub-1.3.0rc5.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.11rc5"
3
+ __version__ = "1.3.0rc5"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -29,9 +29,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
29
29
  from datahub.ingestion.source.state.stateful_ingestion_base import (
30
30
  StatefulIngestionConfigBase,
31
31
  )
32
- from datahub.ingestion.source.unity.config import (
33
- UnityCatalogConnectionConfig,
34
- )
32
+ from datahub.ingestion.source.unity.connection import UnityCatalogConnectionConfig
35
33
  from datahub.utilities.lossy_collections import LossyList
36
34
  from datahub.utilities.perf_timer import PerfTimer
37
35
 
@@ -2,7 +2,6 @@ import logging
2
2
  import os
3
3
  from datetime import datetime, timedelta, timezone
4
4
  from typing import Any, Dict, List, Optional, Union
5
- from urllib.parse import urlparse
6
5
 
7
6
  import pydantic
8
7
  from pydantic import Field
@@ -20,10 +19,8 @@ from datahub.configuration.source_common import (
20
19
  )
21
20
  from datahub.configuration.validate_field_removal import pydantic_removed_field
22
21
  from datahub.configuration.validate_field_rename import pydantic_renamed_field
23
- from datahub.ingestion.source.ge_data_profiler import DATABRICKS
24
22
  from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
25
23
  from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
26
- from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
27
24
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
28
25
  StatefulStaleMetadataRemovalConfig,
29
26
  )
@@ -31,6 +28,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
31
28
  StatefulIngestionConfigBase,
32
29
  StatefulProfilingConfigMixin,
33
30
  )
31
+ from datahub.ingestion.source.unity.connection import UnityCatalogConnectionConfig
34
32
  from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
35
33
  from datahub.ingestion.source_config.operation_config import (
36
34
  OperationConfig,
@@ -132,55 +130,6 @@ class UnityCatalogGEProfilerConfig(UnityCatalogProfilerConfig, GEProfilingConfig
132
130
  )
133
131
 
134
132
 
135
- class UnityCatalogConnectionConfig(ConfigModel):
136
- """
137
- Configuration for connecting to Databricks Unity Catalog.
138
- Contains only connection-related fields that can be reused across different sources.
139
- """
140
-
141
- scheme: str = DATABRICKS
142
- token: str = pydantic.Field(description="Databricks personal access token")
143
- workspace_url: str = pydantic.Field(
144
- description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com"
145
- )
146
- warehouse_id: Optional[str] = pydantic.Field(
147
- default=None,
148
- description=(
149
- "SQL Warehouse id, for running queries. Must be explicitly provided to enable SQL-based features. "
150
- "Required for the following features that need SQL access: "
151
- "1) Tag extraction (include_tags=True) - queries system.information_schema.tags "
152
- "2) Hive Metastore catalog (include_hive_metastore=True) - queries legacy hive_metastore catalog "
153
- "3) System table lineage (lineage_data_source=SYSTEM_TABLES) - queries system.access.table_lineage/column_lineage "
154
- "4) Data profiling (profiling.enabled=True) - runs SELECT/ANALYZE queries on tables. "
155
- "When warehouse_id is missing, these features will be automatically disabled (with warnings) to allow ingestion to continue."
156
- ),
157
- )
158
-
159
- extra_client_options: Dict[str, Any] = Field(
160
- default={},
161
- description="Additional options to pass to Databricks SQLAlchemy client.",
162
- )
163
-
164
- def __init__(self, **data: Any):
165
- super().__init__(**data)
166
-
167
- def get_sql_alchemy_url(self, database: Optional[str] = None) -> str:
168
- uri_opts = {"http_path": f"/sql/1.0/warehouses/{self.warehouse_id}"}
169
- if database:
170
- uri_opts["catalog"] = database
171
- return make_sqlalchemy_uri(
172
- scheme=self.scheme,
173
- username="token",
174
- password=self.token,
175
- at=urlparse(self.workspace_url).netloc,
176
- db=database,
177
- uri_opts=uri_opts,
178
- )
179
-
180
- def get_options(self) -> dict:
181
- return self.extra_client_options
182
-
183
-
184
133
  class UnityCatalogSourceConfig(
185
134
  UnityCatalogConnectionConfig,
186
135
  SQLCommonConfig,
@@ -0,0 +1,61 @@
1
+ """Databricks Unity Catalog connection configuration."""
2
+
3
+ from typing import Any, Dict, Optional
4
+ from urllib.parse import urlparse
5
+
6
+ import pydantic
7
+ from pydantic import Field
8
+
9
+ from datahub.configuration.common import ConfigModel
10
+ from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
11
+
12
+ DATABRICKS = "databricks"
13
+
14
+
15
+ class UnityCatalogConnectionConfig(ConfigModel):
16
+ """
17
+ Configuration for connecting to Databricks Unity Catalog.
18
+ Contains only connection-related fields that can be reused across different sources.
19
+ """
20
+
21
+ scheme: str = DATABRICKS
22
+ token: str = pydantic.Field(description="Databricks personal access token")
23
+ workspace_url: str = pydantic.Field(
24
+ description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com"
25
+ )
26
+ warehouse_id: Optional[str] = pydantic.Field(
27
+ default=None,
28
+ description=(
29
+ "SQL Warehouse id, for running queries. Must be explicitly provided to enable SQL-based features. "
30
+ "Required for the following features that need SQL access: "
31
+ "1) Tag extraction (include_tags=True) - queries system.information_schema.tags "
32
+ "2) Hive Metastore catalog (include_hive_metastore=True) - queries legacy hive_metastore catalog "
33
+ "3) System table lineage (lineage_data_source=SYSTEM_TABLES) - queries system.access.table_lineage/column_lineage "
34
+ "4) Data profiling (profiling.enabled=True) - runs SELECT/ANALYZE queries on tables. "
35
+ "When warehouse_id is missing, these features will be automatically disabled (with warnings) to allow ingestion to continue."
36
+ ),
37
+ )
38
+
39
+ extra_client_options: Dict[str, Any] = Field(
40
+ default={},
41
+ description="Additional options to pass to Databricks SQLAlchemy client.",
42
+ )
43
+
44
+ def __init__(self, **data: Any):
45
+ super().__init__(**data)
46
+
47
+ def get_sql_alchemy_url(self, database: Optional[str] = None) -> str:
48
+ uri_opts = {"http_path": f"/sql/1.0/warehouses/{self.warehouse_id}"}
49
+ if database:
50
+ uri_opts["catalog"] = database
51
+ return make_sqlalchemy_uri(
52
+ scheme=self.scheme,
53
+ username="token",
54
+ password=self.token,
55
+ at=urlparse(self.workspace_url).netloc,
56
+ db=database,
57
+ uri_opts=uri_opts,
58
+ )
59
+
60
+ def get_options(self) -> dict:
61
+ return self.extra_client_options