acryl-datahub 1.0.0.3rc1__py3-none-any.whl → 1.0.0.3rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/METADATA +2285 -2283
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/RECORD +27 -26
- datahub/_version.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/datajob.py +7 -4
- datahub/emitter/mcp.py +5 -1
- datahub/ingestion/run/pipeline.py +6 -4
- datahub/ingestion/source/common/subtypes.py +3 -0
- datahub/ingestion/source/mlflow.py +19 -6
- datahub/ingestion/source/powerbi/config.py +12 -0
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
- datahub/ingestion/source/sigma/config.py +2 -3
- datahub/ingestion/source/snowflake/snowflake_config.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/tableau/tableau.py +31 -6
- datahub/ingestion/source/vertexai/vertexai.py +316 -4
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +23 -2
- datahub/metadata/_schema_classes.py +1 -1
- datahub/metadata/schema.avsc +2 -1
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/testing/mcp_diff.py +15 -2
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/top_level.txt +0 -0
|
@@ -29,6 +29,12 @@ from datahub.ingestion.source.powerbi.m_query.data_classes import (
|
|
|
29
29
|
Lineage,
|
|
30
30
|
ReferencedTable,
|
|
31
31
|
)
|
|
32
|
+
from datahub.ingestion.source.powerbi.m_query.odbc import (
|
|
33
|
+
extract_dsn,
|
|
34
|
+
extract_platform,
|
|
35
|
+
extract_server,
|
|
36
|
+
normalize_platform_name,
|
|
37
|
+
)
|
|
32
38
|
from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import Table
|
|
33
39
|
from datahub.metadata.schema_classes import SchemaFieldDataTypeClass
|
|
34
40
|
from datahub.sql_parsing.sqlglot_lineage import (
|
|
@@ -155,6 +161,7 @@ class AbstractLineage(ABC):
|
|
|
155
161
|
tree_function.token_values(arg_list)
|
|
156
162
|
),
|
|
157
163
|
)
|
|
164
|
+
logger.debug(f"DB Details: {arguments}")
|
|
158
165
|
|
|
159
166
|
if len(arguments) < 2:
|
|
160
167
|
logger.debug(f"Expected minimum 2 arguments, but got {len(arguments)}")
|
|
@@ -940,6 +947,147 @@ class NativeQueryLineage(AbstractLineage):
|
|
|
940
947
|
)
|
|
941
948
|
|
|
942
949
|
|
|
950
|
+
class OdbcLineage(AbstractLineage):
|
|
951
|
+
def create_lineage(
|
|
952
|
+
self, data_access_func_detail: DataAccessFunctionDetail
|
|
953
|
+
) -> Lineage:
|
|
954
|
+
logger.debug(
|
|
955
|
+
f"Processing {self.get_platform_pair().powerbi_data_platform_name} "
|
|
956
|
+
f"data-access function detail {data_access_func_detail}"
|
|
957
|
+
)
|
|
958
|
+
|
|
959
|
+
connect_string, _ = self.get_db_detail_from_argument(
|
|
960
|
+
data_access_func_detail.arg_list
|
|
961
|
+
)
|
|
962
|
+
|
|
963
|
+
if not connect_string:
|
|
964
|
+
self.reporter.warning(
|
|
965
|
+
title="Can not extract ODBC connect string",
|
|
966
|
+
message="Can not extract ODBC connect string from data access function. Skipping Lineage creation.",
|
|
967
|
+
context=f"table-name={self.table.full_name}, data-access-func-detail={data_access_func_detail}",
|
|
968
|
+
)
|
|
969
|
+
return Lineage.empty()
|
|
970
|
+
|
|
971
|
+
logger.debug(f"ODBC connect string: {connect_string}")
|
|
972
|
+
data_platform, powerbi_platform = extract_platform(connect_string)
|
|
973
|
+
server_name = extract_server(connect_string)
|
|
974
|
+
|
|
975
|
+
if not data_platform:
|
|
976
|
+
dsn = extract_dsn(connect_string)
|
|
977
|
+
if dsn:
|
|
978
|
+
logger.debug(f"Extracted DSN: {dsn}")
|
|
979
|
+
server_name = dsn
|
|
980
|
+
if dsn and self.config.dsn_to_platform_name:
|
|
981
|
+
logger.debug(f"Attempting to map DSN {dsn} to platform")
|
|
982
|
+
name = self.config.dsn_to_platform_name.get(dsn)
|
|
983
|
+
if name:
|
|
984
|
+
logger.debug(f"Found DSN {dsn} mapped to platform {name}")
|
|
985
|
+
data_platform, powerbi_platform = normalize_platform_name(name)
|
|
986
|
+
|
|
987
|
+
if not data_platform or not powerbi_platform:
|
|
988
|
+
self.reporter.warning(
|
|
989
|
+
title="Can not determine ODBC platform",
|
|
990
|
+
message="Can not determine platform from ODBC connect string. Skipping Lineage creation.",
|
|
991
|
+
context=f"table-name={self.table.full_name}, connect-string={connect_string}",
|
|
992
|
+
)
|
|
993
|
+
return Lineage.empty()
|
|
994
|
+
|
|
995
|
+
platform_pair: DataPlatformPair = self.create_platform_pair(
|
|
996
|
+
data_platform, powerbi_platform
|
|
997
|
+
)
|
|
998
|
+
|
|
999
|
+
if not server_name and self.config.server_to_platform_instance:
|
|
1000
|
+
self.reporter.warning(
|
|
1001
|
+
title="Can not determine ODBC server name",
|
|
1002
|
+
message="Can not determine server name with server_to_platform_instance mapping. Skipping Lineage creation.",
|
|
1003
|
+
context=f"table-name={self.table.full_name}",
|
|
1004
|
+
)
|
|
1005
|
+
return Lineage.empty()
|
|
1006
|
+
elif not server_name:
|
|
1007
|
+
server_name = "unknown"
|
|
1008
|
+
|
|
1009
|
+
database_name = None
|
|
1010
|
+
schema_name = None
|
|
1011
|
+
table_name = None
|
|
1012
|
+
qualified_table_name = None
|
|
1013
|
+
|
|
1014
|
+
temp_accessor: Optional[IdentifierAccessor] = (
|
|
1015
|
+
data_access_func_detail.identifier_accessor
|
|
1016
|
+
)
|
|
1017
|
+
|
|
1018
|
+
while temp_accessor:
|
|
1019
|
+
logger.debug(
|
|
1020
|
+
f"identifier = {temp_accessor.identifier} items = {temp_accessor.items}"
|
|
1021
|
+
)
|
|
1022
|
+
if temp_accessor.items.get("Kind") == "Database":
|
|
1023
|
+
database_name = temp_accessor.items["Name"]
|
|
1024
|
+
|
|
1025
|
+
if temp_accessor.items.get("Kind") == "Schema":
|
|
1026
|
+
schema_name = temp_accessor.items["Name"]
|
|
1027
|
+
|
|
1028
|
+
if temp_accessor.items.get("Kind") == "Table":
|
|
1029
|
+
table_name = temp_accessor.items["Name"]
|
|
1030
|
+
|
|
1031
|
+
if temp_accessor.next is not None:
|
|
1032
|
+
temp_accessor = temp_accessor.next
|
|
1033
|
+
else:
|
|
1034
|
+
break
|
|
1035
|
+
|
|
1036
|
+
if (
|
|
1037
|
+
database_name is not None
|
|
1038
|
+
and schema_name is not None
|
|
1039
|
+
and table_name is not None
|
|
1040
|
+
):
|
|
1041
|
+
qualified_table_name = f"{database_name}.{schema_name}.{table_name}"
|
|
1042
|
+
elif database_name is not None and table_name is not None:
|
|
1043
|
+
qualified_table_name = f"{database_name}.{table_name}"
|
|
1044
|
+
|
|
1045
|
+
if not qualified_table_name:
|
|
1046
|
+
self.reporter.warning(
|
|
1047
|
+
title="Can not determine qualified table name",
|
|
1048
|
+
message="Can not determine qualified table name for ODBC data source. Skipping Lineage creation.",
|
|
1049
|
+
context=f"table-name={self.table.full_name}, data-platform={data_platform}",
|
|
1050
|
+
)
|
|
1051
|
+
logger.warning(
|
|
1052
|
+
f"Can not determine qualified table name for ODBC data source {data_platform} "
|
|
1053
|
+
f"table {self.table.full_name}."
|
|
1054
|
+
)
|
|
1055
|
+
return Lineage.empty()
|
|
1056
|
+
|
|
1057
|
+
logger.debug(
|
|
1058
|
+
f"ODBC Platform {data_platform} found qualified table name {qualified_table_name}"
|
|
1059
|
+
)
|
|
1060
|
+
|
|
1061
|
+
urn = make_urn(
|
|
1062
|
+
config=self.config,
|
|
1063
|
+
platform_instance_resolver=self.platform_instance_resolver,
|
|
1064
|
+
data_platform_pair=platform_pair,
|
|
1065
|
+
server=server_name,
|
|
1066
|
+
qualified_table_name=qualified_table_name,
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
column_lineage = self.create_table_column_lineage(urn)
|
|
1070
|
+
|
|
1071
|
+
return Lineage(
|
|
1072
|
+
upstreams=[
|
|
1073
|
+
DataPlatformTable(
|
|
1074
|
+
data_platform_pair=platform_pair,
|
|
1075
|
+
urn=urn,
|
|
1076
|
+
)
|
|
1077
|
+
],
|
|
1078
|
+
column_lineage=column_lineage,
|
|
1079
|
+
)
|
|
1080
|
+
|
|
1081
|
+
@staticmethod
|
|
1082
|
+
def create_platform_pair(
|
|
1083
|
+
data_platform: str, powerbi_platform: str
|
|
1084
|
+
) -> DataPlatformPair:
|
|
1085
|
+
return DataPlatformPair(data_platform, powerbi_platform)
|
|
1086
|
+
|
|
1087
|
+
def get_platform_pair(self) -> DataPlatformPair:
|
|
1088
|
+
return SupportedDataPlatform.ODBC.value
|
|
1089
|
+
|
|
1090
|
+
|
|
943
1091
|
class SupportedPattern(Enum):
|
|
944
1092
|
DATABRICKS_QUERY = (
|
|
945
1093
|
DatabricksLineage,
|
|
@@ -991,6 +1139,11 @@ class SupportedPattern(Enum):
|
|
|
991
1139
|
FunctionName.NATIVE_QUERY,
|
|
992
1140
|
)
|
|
993
1141
|
|
|
1142
|
+
ODBC = (
|
|
1143
|
+
OdbcLineage,
|
|
1144
|
+
FunctionName.ODBC_DATA_ACCESS,
|
|
1145
|
+
)
|
|
1146
|
+
|
|
994
1147
|
def handler(self) -> Type[AbstractLineage]:
|
|
995
1148
|
return self.value[0]
|
|
996
1149
|
|
|
@@ -18,7 +18,6 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
|
18
18
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
19
19
|
StatefulIngestionConfigBase,
|
|
20
20
|
)
|
|
21
|
-
from datahub.utilities.lossy_collections import LossyDict
|
|
22
21
|
|
|
23
22
|
logger = logging.getLogger(__name__)
|
|
24
23
|
|
|
@@ -81,8 +80,8 @@ class WorkspaceCounts(BaseModel):
|
|
|
81
80
|
class SigmaWorkspaceEntityFilterReport(EntityFilterReport):
|
|
82
81
|
type: str = "workspace"
|
|
83
82
|
|
|
84
|
-
workspace_counts:
|
|
85
|
-
default_factory=
|
|
83
|
+
workspace_counts: Dict[str, WorkspaceCounts] = Field(
|
|
84
|
+
default_factory=dict,
|
|
86
85
|
description="Counts of workbooks, datasets, elements and pages in each workspace.",
|
|
87
86
|
)
|
|
88
87
|
|
|
@@ -301,6 +301,7 @@ class SnowflakeV2Config(
|
|
|
301
301
|
default=AllowDenyPattern.allow_all(),
|
|
302
302
|
description=(
|
|
303
303
|
"List of regex patterns for structured properties to include in ingestion."
|
|
304
|
+
" Applied to tags with form `<database>.<schema>.<tag_name>`."
|
|
304
305
|
" Only used if `extract_tags` and `extract_tags_as_structured_properties` are enabled."
|
|
305
306
|
),
|
|
306
307
|
)
|
|
@@ -23,6 +23,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.common import AuditStamp
|
|
|
23
23
|
from datahub.metadata.com.linkedin.pegasus2avro.structured import (
|
|
24
24
|
StructuredPropertyDefinition,
|
|
25
25
|
)
|
|
26
|
+
from datahub.metadata.schema_classes import ChangeTypeClass
|
|
26
27
|
from datahub.metadata.urns import (
|
|
27
28
|
ContainerUrn,
|
|
28
29
|
DatasetUrn,
|
|
@@ -81,7 +82,7 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
|
|
|
81
82
|
def create_structured_property_templates(self) -> Iterable[MetadataWorkUnit]:
|
|
82
83
|
for tag in self.data_dictionary.get_all_tags():
|
|
83
84
|
if not self.config.structured_property_pattern.allowed(
|
|
84
|
-
tag.
|
|
85
|
+
tag._id_prefix_as_str()
|
|
85
86
|
):
|
|
86
87
|
continue
|
|
87
88
|
if self.config.extract_tags_as_structured_properties:
|
|
@@ -111,6 +112,8 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
|
|
|
111
112
|
yield MetadataChangeProposalWrapper(
|
|
112
113
|
entityUrn=urn,
|
|
113
114
|
aspect=aspect,
|
|
115
|
+
changeType=ChangeTypeClass.CREATE,
|
|
116
|
+
headers={"If-None-Match": "*"},
|
|
114
117
|
).as_workunit()
|
|
115
118
|
|
|
116
119
|
def _get_tags_on_object_with_propagation(
|
|
@@ -12,6 +12,7 @@ from typing import (
|
|
|
12
12
|
Dict,
|
|
13
13
|
Iterable,
|
|
14
14
|
List,
|
|
15
|
+
Literal,
|
|
15
16
|
Optional,
|
|
16
17
|
Set,
|
|
17
18
|
Tuple,
|
|
@@ -612,10 +613,14 @@ class TableauConfig(
|
|
|
612
613
|
description="Configuration settings for ingesting Tableau groups and their capabilities as custom properties.",
|
|
613
614
|
)
|
|
614
615
|
|
|
615
|
-
ingest_hidden_assets: bool = Field(
|
|
616
|
-
|
|
617
|
-
description=
|
|
618
|
-
|
|
616
|
+
ingest_hidden_assets: Union[List[Literal["worksheet", "dashboard"]], bool] = Field(
|
|
617
|
+
default=["worksheet", "dashboard"],
|
|
618
|
+
description=(
|
|
619
|
+
"When enabled, hidden worksheets and dashboards are ingested into Datahub."
|
|
620
|
+
" If a dashboard or worksheet is hidden in Tableau the luid is blank."
|
|
621
|
+
" A list of asset types can also be specified, to only ingest those hidden assets."
|
|
622
|
+
" Current options supported are 'worksheet' and 'dashboard'."
|
|
623
|
+
),
|
|
619
624
|
)
|
|
620
625
|
|
|
621
626
|
tags_for_hidden_assets: List[str] = Field(
|
|
@@ -1348,6 +1353,26 @@ class TableauSiteSource:
|
|
|
1348
1353
|
# More info here: https://help.tableau.com/current/api/metadata_api/en-us/reference/view.doc.html
|
|
1349
1354
|
return not dashboard_or_view.get(c.LUID)
|
|
1350
1355
|
|
|
1356
|
+
def _should_ingest_worksheet(self, worksheet: Dict) -> bool:
|
|
1357
|
+
return (
|
|
1358
|
+
self.config.ingest_hidden_assets is True
|
|
1359
|
+
or (
|
|
1360
|
+
isinstance(self.config.ingest_hidden_assets, list)
|
|
1361
|
+
and "worksheet" in self.config.ingest_hidden_assets
|
|
1362
|
+
)
|
|
1363
|
+
or not self._is_hidden_view(worksheet)
|
|
1364
|
+
)
|
|
1365
|
+
|
|
1366
|
+
def _should_ingest_dashboard(self, dashboard: Dict) -> bool:
|
|
1367
|
+
return (
|
|
1368
|
+
self.config.ingest_hidden_assets is True
|
|
1369
|
+
or (
|
|
1370
|
+
isinstance(self.config.ingest_hidden_assets, list)
|
|
1371
|
+
and "dashboard" in self.config.ingest_hidden_assets
|
|
1372
|
+
)
|
|
1373
|
+
or not self._is_hidden_view(dashboard)
|
|
1374
|
+
)
|
|
1375
|
+
|
|
1351
1376
|
def get_connection_object_page(
|
|
1352
1377
|
self,
|
|
1353
1378
|
query: str,
|
|
@@ -3059,7 +3084,7 @@ class TableauSiteSource:
|
|
|
3059
3084
|
query_filter=sheets_filter,
|
|
3060
3085
|
page_size=self.config.effective_sheet_page_size,
|
|
3061
3086
|
):
|
|
3062
|
-
if self.
|
|
3087
|
+
if self._should_ingest_worksheet(sheet):
|
|
3063
3088
|
yield from self.emit_sheets_as_charts(sheet, sheet.get(c.WORKBOOK))
|
|
3064
3089
|
else:
|
|
3065
3090
|
self.report.num_hidden_assets_skipped += 1
|
|
@@ -3380,7 +3405,7 @@ class TableauSiteSource:
|
|
|
3380
3405
|
query_filter=dashboards_filter,
|
|
3381
3406
|
page_size=self.config.effective_dashboard_page_size,
|
|
3382
3407
|
):
|
|
3383
|
-
if self.
|
|
3408
|
+
if self._should_ingest_dashboard(dashboard):
|
|
3384
3409
|
yield from self.emit_dashboard(dashboard, dashboard.get(c.WORKBOOK))
|
|
3385
3410
|
else:
|
|
3386
3411
|
self.report.num_hidden_assets_skipped += 1
|