acryl-datahub 1.2.0.11rc5__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.11rc5.dist-info → acryl_datahub-1.3.0.dist-info}/METADATA +2603 -2601
- {acryl_datahub-1.2.0.11rc5.dist-info → acryl_datahub-1.3.0.dist-info}/RECORD +10 -9
- datahub/_version.py +1 -1
- datahub/ingestion/source/fivetran/config.py +1 -3
- datahub/ingestion/source/unity/config.py +1 -52
- datahub/ingestion/source/unity/connection.py +61 -0
- {acryl_datahub-1.2.0.11rc5.dist-info → acryl_datahub-1.3.0.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.11rc5.dist-info → acryl_datahub-1.3.0.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.11rc5.dist-info → acryl_datahub-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.11rc5.dist-info → acryl_datahub-1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.
|
|
1
|
+
acryl_datahub-1.3.0.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=9bBnOhXKK0Kz9kBFa9_js_7Pj_9YvV8did9BXYK34H8,318
|
|
5
5
|
datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -334,7 +334,7 @@ datahub/ingestion/source/excel/report.py,sha256=oEkeI8J6is7zB9iz4RqASu_-Q5xl36lA
|
|
|
334
334
|
datahub/ingestion/source/excel/source.py,sha256=w_vOz4UD7BcXBBDKoo81_6-QFeOPITuXqkfjIMHCQj4,23827
|
|
335
335
|
datahub/ingestion/source/excel/util.py,sha256=YYmadYuCiT-4_MfQM0YSE7wuDcE0k8o2KrlOKM9Z6eI,406
|
|
336
336
|
datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
337
|
-
datahub/ingestion/source/fivetran/config.py,sha256=
|
|
337
|
+
datahub/ingestion/source/fivetran/config.py,sha256=6yriUMtTPMZUHqbZ9gzyFduPVt6CxzirdYSg4k-ziYI,10285
|
|
338
338
|
datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
|
|
339
339
|
datahub/ingestion/source/fivetran/fivetran.py,sha256=Up5wbLk7hBk9b0pqcHwW6b0H52UJj90cmLhn0QJeZ4g,14416
|
|
340
340
|
datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=-ibtfgxFv08P5_X5PVqV4CocxAjRWmY858esQL5OaAQ,13697
|
|
@@ -568,7 +568,8 @@ datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=wsVD0SkGUwb-H9
|
|
|
568
568
|
datahub/ingestion/source/tableau/tableau_validation.py,sha256=Hjbfc1AMIkGgzo5ffWXtNRjrxSxzHvw7-dYZDt4d3WE,1819
|
|
569
569
|
datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
570
570
|
datahub/ingestion/source/unity/analyze_profiler.py,sha256=2pqkFY30CfN4aHgFZZntjeG0hNhBytZJvXC13VfTc1I,4689
|
|
571
|
-
datahub/ingestion/source/unity/config.py,sha256=
|
|
571
|
+
datahub/ingestion/source/unity/config.py,sha256=lHvr-PGVcZ0P_2e0RuwmfSRlQRJ81astx4hQZkNrX_k,18713
|
|
572
|
+
datahub/ingestion/source/unity/connection.py,sha256=iCsQhZ1vxzv1qQKTl_sFUZdmBLLIrNdu2X2V8hT7IGI,2441
|
|
572
573
|
datahub/ingestion/source/unity/connection_test.py,sha256=Dwpz4AIc6ZDwq6pWmRCSCuDUgNjPP_bVAVJumgAAS4w,2661
|
|
573
574
|
datahub/ingestion/source/unity/ge_profiler.py,sha256=NBRHZceq-f95iUn7u0h7cgcd9nAc48Aa-lmp_BqE0As,8409
|
|
574
575
|
datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se2D7wmdYRY_afUG4QlVmQu6xaw,15351
|
|
@@ -1125,8 +1126,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1125
1126
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1126
1127
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1127
1128
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1128
|
-
acryl_datahub-1.
|
|
1129
|
-
acryl_datahub-1.
|
|
1130
|
-
acryl_datahub-1.
|
|
1131
|
-
acryl_datahub-1.
|
|
1132
|
-
acryl_datahub-1.
|
|
1129
|
+
acryl_datahub-1.3.0.dist-info/METADATA,sha256=4oWnajuLBUpYiIu7a_J7r6ddL3EWumCUI_yiesFjvgk,184417
|
|
1130
|
+
acryl_datahub-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1131
|
+
acryl_datahub-1.3.0.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
|
|
1132
|
+
acryl_datahub-1.3.0.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1133
|
+
acryl_datahub-1.3.0.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -29,9 +29,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
|
29
29
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
30
30
|
StatefulIngestionConfigBase,
|
|
31
31
|
)
|
|
32
|
-
from datahub.ingestion.source.unity.
|
|
33
|
-
UnityCatalogConnectionConfig,
|
|
34
|
-
)
|
|
32
|
+
from datahub.ingestion.source.unity.connection import UnityCatalogConnectionConfig
|
|
35
33
|
from datahub.utilities.lossy_collections import LossyList
|
|
36
34
|
from datahub.utilities.perf_timer import PerfTimer
|
|
37
35
|
|
|
@@ -2,7 +2,6 @@ import logging
|
|
|
2
2
|
import os
|
|
3
3
|
from datetime import datetime, timedelta, timezone
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
|
-
from urllib.parse import urlparse
|
|
6
5
|
|
|
7
6
|
import pydantic
|
|
8
7
|
from pydantic import Field
|
|
@@ -20,10 +19,8 @@ from datahub.configuration.source_common import (
|
|
|
20
19
|
)
|
|
21
20
|
from datahub.configuration.validate_field_removal import pydantic_removed_field
|
|
22
21
|
from datahub.configuration.validate_field_rename import pydantic_renamed_field
|
|
23
|
-
from datahub.ingestion.source.ge_data_profiler import DATABRICKS
|
|
24
22
|
from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
|
|
25
23
|
from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
|
|
26
|
-
from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
|
|
27
24
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
28
25
|
StatefulStaleMetadataRemovalConfig,
|
|
29
26
|
)
|
|
@@ -31,6 +28,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
31
28
|
StatefulIngestionConfigBase,
|
|
32
29
|
StatefulProfilingConfigMixin,
|
|
33
30
|
)
|
|
31
|
+
from datahub.ingestion.source.unity.connection import UnityCatalogConnectionConfig
|
|
34
32
|
from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
|
|
35
33
|
from datahub.ingestion.source_config.operation_config import (
|
|
36
34
|
OperationConfig,
|
|
@@ -132,55 +130,6 @@ class UnityCatalogGEProfilerConfig(UnityCatalogProfilerConfig, GEProfilingConfig
|
|
|
132
130
|
)
|
|
133
131
|
|
|
134
132
|
|
|
135
|
-
class UnityCatalogConnectionConfig(ConfigModel):
|
|
136
|
-
"""
|
|
137
|
-
Configuration for connecting to Databricks Unity Catalog.
|
|
138
|
-
Contains only connection-related fields that can be reused across different sources.
|
|
139
|
-
"""
|
|
140
|
-
|
|
141
|
-
scheme: str = DATABRICKS
|
|
142
|
-
token: str = pydantic.Field(description="Databricks personal access token")
|
|
143
|
-
workspace_url: str = pydantic.Field(
|
|
144
|
-
description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com"
|
|
145
|
-
)
|
|
146
|
-
warehouse_id: Optional[str] = pydantic.Field(
|
|
147
|
-
default=None,
|
|
148
|
-
description=(
|
|
149
|
-
"SQL Warehouse id, for running queries. Must be explicitly provided to enable SQL-based features. "
|
|
150
|
-
"Required for the following features that need SQL access: "
|
|
151
|
-
"1) Tag extraction (include_tags=True) - queries system.information_schema.tags "
|
|
152
|
-
"2) Hive Metastore catalog (include_hive_metastore=True) - queries legacy hive_metastore catalog "
|
|
153
|
-
"3) System table lineage (lineage_data_source=SYSTEM_TABLES) - queries system.access.table_lineage/column_lineage "
|
|
154
|
-
"4) Data profiling (profiling.enabled=True) - runs SELECT/ANALYZE queries on tables. "
|
|
155
|
-
"When warehouse_id is missing, these features will be automatically disabled (with warnings) to allow ingestion to continue."
|
|
156
|
-
),
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
extra_client_options: Dict[str, Any] = Field(
|
|
160
|
-
default={},
|
|
161
|
-
description="Additional options to pass to Databricks SQLAlchemy client.",
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
def __init__(self, **data: Any):
|
|
165
|
-
super().__init__(**data)
|
|
166
|
-
|
|
167
|
-
def get_sql_alchemy_url(self, database: Optional[str] = None) -> str:
|
|
168
|
-
uri_opts = {"http_path": f"/sql/1.0/warehouses/{self.warehouse_id}"}
|
|
169
|
-
if database:
|
|
170
|
-
uri_opts["catalog"] = database
|
|
171
|
-
return make_sqlalchemy_uri(
|
|
172
|
-
scheme=self.scheme,
|
|
173
|
-
username="token",
|
|
174
|
-
password=self.token,
|
|
175
|
-
at=urlparse(self.workspace_url).netloc,
|
|
176
|
-
db=database,
|
|
177
|
-
uri_opts=uri_opts,
|
|
178
|
-
)
|
|
179
|
-
|
|
180
|
-
def get_options(self) -> dict:
|
|
181
|
-
return self.extra_client_options
|
|
182
|
-
|
|
183
|
-
|
|
184
133
|
class UnityCatalogSourceConfig(
|
|
185
134
|
UnityCatalogConnectionConfig,
|
|
186
135
|
SQLCommonConfig,
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Databricks Unity Catalog connection configuration."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
|
|
6
|
+
import pydantic
|
|
7
|
+
from pydantic import Field
|
|
8
|
+
|
|
9
|
+
from datahub.configuration.common import ConfigModel
|
|
10
|
+
from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
|
|
11
|
+
|
|
12
|
+
DATABRICKS = "databricks"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class UnityCatalogConnectionConfig(ConfigModel):
|
|
16
|
+
"""
|
|
17
|
+
Configuration for connecting to Databricks Unity Catalog.
|
|
18
|
+
Contains only connection-related fields that can be reused across different sources.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
scheme: str = DATABRICKS
|
|
22
|
+
token: str = pydantic.Field(description="Databricks personal access token")
|
|
23
|
+
workspace_url: str = pydantic.Field(
|
|
24
|
+
description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com"
|
|
25
|
+
)
|
|
26
|
+
warehouse_id: Optional[str] = pydantic.Field(
|
|
27
|
+
default=None,
|
|
28
|
+
description=(
|
|
29
|
+
"SQL Warehouse id, for running queries. Must be explicitly provided to enable SQL-based features. "
|
|
30
|
+
"Required for the following features that need SQL access: "
|
|
31
|
+
"1) Tag extraction (include_tags=True) - queries system.information_schema.tags "
|
|
32
|
+
"2) Hive Metastore catalog (include_hive_metastore=True) - queries legacy hive_metastore catalog "
|
|
33
|
+
"3) System table lineage (lineage_data_source=SYSTEM_TABLES) - queries system.access.table_lineage/column_lineage "
|
|
34
|
+
"4) Data profiling (profiling.enabled=True) - runs SELECT/ANALYZE queries on tables. "
|
|
35
|
+
"When warehouse_id is missing, these features will be automatically disabled (with warnings) to allow ingestion to continue."
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
extra_client_options: Dict[str, Any] = Field(
|
|
40
|
+
default={},
|
|
41
|
+
description="Additional options to pass to Databricks SQLAlchemy client.",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def __init__(self, **data: Any):
|
|
45
|
+
super().__init__(**data)
|
|
46
|
+
|
|
47
|
+
def get_sql_alchemy_url(self, database: Optional[str] = None) -> str:
|
|
48
|
+
uri_opts = {"http_path": f"/sql/1.0/warehouses/{self.warehouse_id}"}
|
|
49
|
+
if database:
|
|
50
|
+
uri_opts["catalog"] = database
|
|
51
|
+
return make_sqlalchemy_uri(
|
|
52
|
+
scheme=self.scheme,
|
|
53
|
+
username="token",
|
|
54
|
+
password=self.token,
|
|
55
|
+
at=urlparse(self.workspace_url).netloc,
|
|
56
|
+
db=database,
|
|
57
|
+
uri_opts=uri_opts,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def get_options(self) -> dict:
|
|
61
|
+
return self.extra_client_options
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|