acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/METADATA +2391 -2392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/RECORD +105 -88
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/entry_points.txt +2 -1
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -28
- datahub/cli/specific/dataset_cli.py +26 -10
- datahub/emitter/mce_builder.py +1 -3
- datahub/emitter/mcp_builder.py +8 -0
- datahub/emitter/request_helper.py +19 -14
- datahub/emitter/response_helper.py +25 -18
- datahub/emitter/rest_emitter.py +23 -7
- datahub/errors.py +8 -0
- datahub/ingestion/api/source.py +7 -2
- datahub/ingestion/api/source_helpers.py +14 -2
- datahub/ingestion/extractor/schema_util.py +1 -0
- datahub/ingestion/graph/client.py +26 -20
- datahub/ingestion/graph/filters.py +62 -17
- datahub/ingestion/sink/datahub_rest.py +2 -2
- datahub/ingestion/source/cassandra/cassandra.py +1 -10
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +6 -0
- datahub/ingestion/source/common/subtypes.py +17 -1
- datahub/ingestion/source/data_lake_common/path_spec.py +21 -1
- datahub/ingestion/source/dbt/dbt_common.py +6 -4
- datahub/ingestion/source/dbt/dbt_core.py +4 -6
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_source.py +96 -117
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/ge_data_profiler.py +11 -1
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +394 -0
- datahub/ingestion/source/hex/constants.py +3 -0
- datahub/ingestion/source/hex/hex.py +167 -0
- datahub/ingestion/source/hex/mapper.py +372 -0
- datahub/ingestion/source/hex/model.py +68 -0
- datahub/ingestion/source/iceberg/iceberg.py +193 -140
- datahub/ingestion/source/iceberg/iceberg_profiler.py +21 -18
- datahub/ingestion/source/mlflow.py +217 -8
- datahub/ingestion/source/mode.py +11 -1
- datahub/ingestion/source/openapi.py +69 -34
- datahub/ingestion/source/powerbi/config.py +31 -4
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +111 -10
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +41 -24
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -11
- datahub/ingestion/source/redshift/lineage_v2.py +9 -1
- datahub/ingestion/source/redshift/query.py +1 -1
- datahub/ingestion/source/s3/source.py +11 -0
- datahub/ingestion/source/sigma/config.py +3 -4
- datahub/ingestion/source/sigma/sigma.py +10 -6
- datahub/ingestion/source/slack/slack.py +399 -82
- datahub/ingestion/source/snowflake/constants.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +14 -1
- datahub/ingestion/source/snowflake/snowflake_query.py +17 -0
- datahub/ingestion/source/snowflake/snowflake_report.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +29 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +112 -42
- datahub/ingestion/source/snowflake/snowflake_utils.py +25 -1
- datahub/ingestion/source/sql/mssql/job_models.py +15 -1
- datahub/ingestion/source/sql/mssql/source.py +8 -4
- datahub/ingestion/source/sql/oracle.py +51 -4
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +242 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +1 -29
- datahub/ingestion/source/superset.py +291 -35
- datahub/ingestion/source/usage/usage_common.py +0 -65
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1055 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +68 -0
- datahub/metadata/_schema_classes.py +472 -1
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/schema.avsc +313 -2
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +14 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +95 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +32 -0
- datahub/metadata/schemas/QueryProperties.avsc +20 -0
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/dataset.py +122 -0
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +27 -3
- datahub/sdk/main_client.py +24 -1
- datahub/sdk/search_client.py +81 -8
- datahub/sdk/search_filters.py +94 -37
- datahub/sql_parsing/split_statements.py +17 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
- datahub/sql_parsing/tool_meta_extractor.py +27 -2
- datahub/testing/mcp_diff.py +1 -18
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/ingestion/source/vertexai.py +0 -697
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/top_level.txt +0 -0
|
@@ -1,30 +1,58 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import enum
|
|
3
|
-
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Dict, List, Literal, Optional, Union
|
|
5
|
+
|
|
6
|
+
from typing_extensions import TypeAlias
|
|
4
7
|
|
|
5
8
|
from datahub.emitter.mce_builder import (
|
|
6
9
|
make_data_platform_urn,
|
|
7
10
|
make_dataplatform_instance_urn,
|
|
8
11
|
)
|
|
12
|
+
from datahub.errors import SearchFilterWarning
|
|
9
13
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
10
14
|
|
|
11
|
-
RawSearchFilterRule = Dict[str,
|
|
15
|
+
RawSearchFilterRule: TypeAlias = Dict[str, Union[str, bool, List[str]]]
|
|
16
|
+
|
|
17
|
+
# This is a list of OR filters, each of which is a list of AND filters.
|
|
18
|
+
# This can be put directly into the orFilters parameter in GraphQL.
|
|
19
|
+
RawSearchFilter: TypeAlias = List[Dict[Literal["and"], List[RawSearchFilterRule]]]
|
|
20
|
+
|
|
21
|
+
# Mirrors our GraphQL enum: https://datahubproject.io/docs/graphql/enums#filteroperator
|
|
22
|
+
FilterOperator: TypeAlias = Literal[
|
|
23
|
+
"CONTAIN",
|
|
24
|
+
"EQUAL",
|
|
25
|
+
"IEQUAL",
|
|
26
|
+
"IN",
|
|
27
|
+
"EXISTS",
|
|
28
|
+
"GREATER_THAN",
|
|
29
|
+
"GREATER_THAN_OR_EQUAL_TO",
|
|
30
|
+
"LESS_THAN",
|
|
31
|
+
"LESS_THAN_OR_EQUAL_TO",
|
|
32
|
+
"START_WITH",
|
|
33
|
+
"END_WITH",
|
|
34
|
+
"DESCENDANTS_INCL",
|
|
35
|
+
"ANCESTORS_INCL",
|
|
36
|
+
"RELATED_INCL",
|
|
37
|
+
]
|
|
12
38
|
|
|
13
39
|
|
|
14
40
|
@dataclasses.dataclass
|
|
15
41
|
class SearchFilterRule:
|
|
16
42
|
field: str
|
|
17
|
-
condition:
|
|
43
|
+
condition: FilterOperator
|
|
18
44
|
values: List[str]
|
|
19
45
|
negated: bool = False
|
|
20
46
|
|
|
21
47
|
def to_raw(self) -> RawSearchFilterRule:
|
|
22
|
-
|
|
48
|
+
rule: RawSearchFilterRule = {
|
|
23
49
|
"field": self.field,
|
|
24
50
|
"condition": self.condition,
|
|
25
51
|
"values": self.values,
|
|
26
|
-
"negated": self.negated,
|
|
27
52
|
}
|
|
53
|
+
if self.negated:
|
|
54
|
+
rule["negated"] = True
|
|
55
|
+
return rule
|
|
28
56
|
|
|
29
57
|
def negate(self) -> "SearchFilterRule":
|
|
30
58
|
return SearchFilterRule(
|
|
@@ -53,10 +81,10 @@ def generate_filter(
|
|
|
53
81
|
platform_instance: Optional[str],
|
|
54
82
|
env: Optional[str],
|
|
55
83
|
container: Optional[str],
|
|
56
|
-
status: RemovedStatusFilter,
|
|
84
|
+
status: Optional[RemovedStatusFilter],
|
|
57
85
|
extra_filters: Optional[List[RawSearchFilterRule]],
|
|
58
|
-
extra_or_filters: Optional[
|
|
59
|
-
) ->
|
|
86
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
87
|
+
) -> RawSearchFilter:
|
|
60
88
|
"""
|
|
61
89
|
Generate a search filter based on the provided parameters.
|
|
62
90
|
:param platform: The platform to filter by.
|
|
@@ -85,15 +113,16 @@ def generate_filter(
|
|
|
85
113
|
and_filters.append(_get_container_filter(container).to_raw())
|
|
86
114
|
|
|
87
115
|
# Status filter.
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
116
|
+
if status:
|
|
117
|
+
status_filter = _get_status_filter(status)
|
|
118
|
+
if status_filter:
|
|
119
|
+
and_filters.append(status_filter.to_raw())
|
|
91
120
|
|
|
92
121
|
# Extra filters.
|
|
93
122
|
if extra_filters:
|
|
94
123
|
and_filters += extra_filters
|
|
95
124
|
|
|
96
|
-
or_filters:
|
|
125
|
+
or_filters: RawSearchFilter = [{"and": and_filters}]
|
|
97
126
|
|
|
98
127
|
# Env filter
|
|
99
128
|
if env:
|
|
@@ -107,11 +136,27 @@ def generate_filter(
|
|
|
107
136
|
|
|
108
137
|
# Extra OR filters are distributed across the top level and lists.
|
|
109
138
|
if extra_or_filters:
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
for extra_or_filter in extra_or_filters
|
|
113
|
-
|
|
114
|
-
|
|
139
|
+
new_or_filters: RawSearchFilter = []
|
|
140
|
+
for and_filter in or_filters:
|
|
141
|
+
for extra_or_filter in extra_or_filters:
|
|
142
|
+
if isinstance(extra_or_filter, dict) and "and" in extra_or_filter:
|
|
143
|
+
new_or_filters.append(
|
|
144
|
+
{"and": and_filter["and"] + extra_or_filter["and"]}
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
# Hack for backwards compatibility.
|
|
148
|
+
# We have some code that erroneously passed a List[RawSearchFilterRule]
|
|
149
|
+
# instead of a List[Dict["and", List[RawSearchFilterRule]]].
|
|
150
|
+
warnings.warn(
|
|
151
|
+
"Passing a List[RawSearchFilterRule] to extra_or_filters is deprecated. "
|
|
152
|
+
"Please pass a List[Dict[str, List[RawSearchFilterRule]]] instead.",
|
|
153
|
+
SearchFilterWarning,
|
|
154
|
+
stacklevel=3,
|
|
155
|
+
)
|
|
156
|
+
new_or_filters.append(
|
|
157
|
+
{"and": and_filter["and"] + [extra_or_filter]} # type: ignore
|
|
158
|
+
)
|
|
159
|
+
or_filters = new_or_filters
|
|
115
160
|
|
|
116
161
|
return or_filters
|
|
117
162
|
|
|
@@ -20,7 +20,7 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
|
20
20
|
from datahub.emitter.mcp_builder import mcps_from_mce
|
|
21
21
|
from datahub.emitter.rest_emitter import (
|
|
22
22
|
BATCH_INGEST_MAX_PAYLOAD_LENGTH,
|
|
23
|
-
|
|
23
|
+
DEFAULT_REST_EMITTER_ENDPOINT,
|
|
24
24
|
DEFAULT_REST_TRACE_MODE,
|
|
25
25
|
DataHubRestEmitter,
|
|
26
26
|
RestSinkEndpoint,
|
|
@@ -70,7 +70,7 @@ _DEFAULT_REST_SINK_MODE = pydantic.parse_obj_as(
|
|
|
70
70
|
|
|
71
71
|
class DatahubRestSinkConfig(DatahubClientConfig):
|
|
72
72
|
mode: RestSinkMode = _DEFAULT_REST_SINK_MODE
|
|
73
|
-
endpoint: RestSinkEndpoint =
|
|
73
|
+
endpoint: RestSinkEndpoint = DEFAULT_REST_EMITTER_ENDPOINT
|
|
74
74
|
default_trace_mode: RestTraceMode = DEFAULT_REST_TRACE_MODE
|
|
75
75
|
|
|
76
76
|
# These only apply in async modes.
|
|
@@ -123,16 +123,7 @@ class CassandraSource(StatefulIngestionSourceBase):
|
|
|
123
123
|
).workunit_processor,
|
|
124
124
|
]
|
|
125
125
|
|
|
126
|
-
def get_workunits_internal(
|
|
127
|
-
self,
|
|
128
|
-
) -> Iterable[MetadataWorkUnit]:
|
|
129
|
-
for metadata in self._get_metadata():
|
|
130
|
-
if isinstance(metadata, MetadataWorkUnit):
|
|
131
|
-
yield metadata
|
|
132
|
-
else:
|
|
133
|
-
yield from metadata.as_workunits()
|
|
134
|
-
|
|
135
|
-
def _get_metadata(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
|
|
126
|
+
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
|
|
136
127
|
if not self.cassandra_api.authenticate():
|
|
137
128
|
return
|
|
138
129
|
keyspaces: List[CassandraKeyspace] = self.cassandra_api.get_keyspaces()
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# This is a pretty limited list, and is not really complete yet. Right now it's only used to allow
|
|
2
|
+
# automatic platform mapping when generating lineage and we have a manual override, so
|
|
3
|
+
# it being incomplete is ok. This should not be used for urn validation.
|
|
4
|
+
KNOWN_VALID_PLATFORM_NAMES = [
|
|
5
|
+
"bigquery",
|
|
6
|
+
"cassandra",
|
|
7
|
+
"databricks",
|
|
8
|
+
"delta-lake",
|
|
9
|
+
"dbt",
|
|
10
|
+
"feast",
|
|
11
|
+
"file",
|
|
12
|
+
"gcs",
|
|
13
|
+
"hdfs",
|
|
14
|
+
"hive",
|
|
15
|
+
"mssql",
|
|
16
|
+
"mysql",
|
|
17
|
+
"oracle",
|
|
18
|
+
"postgres",
|
|
19
|
+
"redshift",
|
|
20
|
+
"s3",
|
|
21
|
+
"sagemaker",
|
|
22
|
+
"snowflake",
|
|
23
|
+
]
|
|
@@ -51,3 +51,9 @@ class GCPCredential(ConfigModel):
|
|
|
51
51
|
cred_json = json.dumps(configs, indent=4, separators=(",", ": "))
|
|
52
52
|
fp.write(cred_json.encode())
|
|
53
53
|
return fp.name
|
|
54
|
+
|
|
55
|
+
def to_dict(self, project_id: Optional[str] = None) -> Dict[str, str]:
|
|
56
|
+
configs = self.dict()
|
|
57
|
+
if project_id:
|
|
58
|
+
configs["project_id"] = project_id
|
|
59
|
+
return configs
|
|
@@ -25,6 +25,7 @@ class DatasetSubTypes(StrEnum):
|
|
|
25
25
|
NEO4J_NODE = "Neo4j Node"
|
|
26
26
|
NEO4J_RELATIONSHIP = "Neo4j Relationship"
|
|
27
27
|
SNOWFLAKE_STREAM = "Snowflake Stream"
|
|
28
|
+
API_ENDPOINT = "API Endpoint"
|
|
28
29
|
|
|
29
30
|
# TODO: Create separate entity...
|
|
30
31
|
NOTEBOOK = "Notebook"
|
|
@@ -44,6 +45,7 @@ class DatasetContainerSubTypes(StrEnum):
|
|
|
44
45
|
GCS_BUCKET = "GCS bucket"
|
|
45
46
|
ABS_CONTAINER = "ABS container"
|
|
46
47
|
KEYSPACE = "Keyspace" # Cassandra
|
|
48
|
+
NAMESPACE = "Namespace" # Iceberg
|
|
47
49
|
|
|
48
50
|
|
|
49
51
|
class BIContainerSubTypes(StrEnum):
|
|
@@ -68,7 +70,7 @@ class FlowContainerSubTypes(StrEnum):
|
|
|
68
70
|
class JobContainerSubTypes(StrEnum):
|
|
69
71
|
NIFI_PROCESS_GROUP = "Process Group"
|
|
70
72
|
MSSQL_JOBSTEP = "Job Step"
|
|
71
|
-
|
|
73
|
+
STORED_PROCEDURE = "Stored Procedure"
|
|
72
74
|
|
|
73
75
|
|
|
74
76
|
class BIAssetSubTypes(StrEnum):
|
|
@@ -93,7 +95,21 @@ class BIAssetSubTypes(StrEnum):
|
|
|
93
95
|
SAC_STORY = "Story"
|
|
94
96
|
SAC_APPLICATION = "Application"
|
|
95
97
|
|
|
98
|
+
# Hex
|
|
99
|
+
HEX_PROJECT = "Project"
|
|
100
|
+
HEX_COMPONENT = "Component"
|
|
101
|
+
|
|
96
102
|
|
|
97
103
|
class MLAssetSubTypes(StrEnum):
|
|
98
104
|
MLFLOW_TRAINING_RUN = "ML Training Run"
|
|
99
105
|
MLFLOW_EXPERIMENT = "ML Experiment"
|
|
106
|
+
VERTEX_EXPERIMENT = "Experiment"
|
|
107
|
+
VERTEX_EXPERIMENT_RUN = "Experiment Run"
|
|
108
|
+
VERTEX_EXECUTION = "Execution"
|
|
109
|
+
|
|
110
|
+
VERTEX_MODEL = "ML Model"
|
|
111
|
+
VERTEX_MODEL_GROUP = "ML Model Group"
|
|
112
|
+
VERTEX_TRAINING_JOB = "Training Job"
|
|
113
|
+
VERTEX_ENDPOINT = "Endpoint"
|
|
114
|
+
VERTEX_DATASET = "Dataset"
|
|
115
|
+
VERTEX_PROJECT = "Project"
|
|
@@ -11,7 +11,7 @@ from cached_property import cached_property
|
|
|
11
11
|
from pydantic.fields import Field
|
|
12
12
|
from wcmatch import pathlib
|
|
13
13
|
|
|
14
|
-
from datahub.configuration.common import ConfigModel
|
|
14
|
+
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
15
15
|
from datahub.ingestion.source.aws.s3_util import is_s3_uri
|
|
16
16
|
from datahub.ingestion.source.azure.abs_utils import is_abs_uri
|
|
17
17
|
from datahub.ingestion.source.gcs.gcs_utils import is_gcs_uri
|
|
@@ -145,6 +145,11 @@ class PathSpec(ConfigModel):
|
|
|
145
145
|
description="Include hidden folders in the traversal (folders starting with . or _",
|
|
146
146
|
)
|
|
147
147
|
|
|
148
|
+
tables_filter_pattern: AllowDenyPattern = Field(
|
|
149
|
+
default=AllowDenyPattern.allow_all(),
|
|
150
|
+
description="The tables_filter_pattern configuration field uses regular expressions to filter the tables part of the Pathspec for ingestion, allowing fine-grained control over which tables are included or excluded based on specified patterns. The default setting allows all tables.",
|
|
151
|
+
)
|
|
152
|
+
|
|
148
153
|
def is_path_hidden(self, path: str) -> bool:
|
|
149
154
|
# Split the path into directories and filename
|
|
150
155
|
dirs, filename = os.path.split(path)
|
|
@@ -177,6 +182,12 @@ class PathSpec(ConfigModel):
|
|
|
177
182
|
):
|
|
178
183
|
return False
|
|
179
184
|
logger.debug(f"{path} is not excluded")
|
|
185
|
+
|
|
186
|
+
table_name, _ = self.extract_table_name_and_path(path)
|
|
187
|
+
if not self.tables_filter_pattern.allowed(table_name):
|
|
188
|
+
return False
|
|
189
|
+
logger.debug(f"{path} is passed table name check")
|
|
190
|
+
|
|
180
191
|
ext = os.path.splitext(path)[1].strip(".")
|
|
181
192
|
|
|
182
193
|
if not ignore_ext:
|
|
@@ -218,6 +229,15 @@ class PathSpec(ConfigModel):
|
|
|
218
229
|
exclude_path.rstrip("/"), flags=pathlib.GLOBSTAR
|
|
219
230
|
):
|
|
220
231
|
return False
|
|
232
|
+
|
|
233
|
+
file_name_pattern = self.include.rsplit("/", 1)[1]
|
|
234
|
+
table_name, _ = self.extract_table_name_and_path(
|
|
235
|
+
os.path.join(path, file_name_pattern)
|
|
236
|
+
)
|
|
237
|
+
if not self.tables_filter_pattern.allowed(table_name):
|
|
238
|
+
return False
|
|
239
|
+
logger.debug(f"{path} is passed table name check")
|
|
240
|
+
|
|
221
241
|
return True
|
|
222
242
|
|
|
223
243
|
@classmethod
|
|
@@ -4,7 +4,7 @@ from abc import abstractmethod
|
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from enum import auto
|
|
7
|
-
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
|
|
7
|
+
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
|
|
8
8
|
|
|
9
9
|
import more_itertools
|
|
10
10
|
import pydantic
|
|
@@ -849,7 +849,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
849
849
|
test_nodes: List[DBTNode],
|
|
850
850
|
extra_custom_props: Dict[str, str],
|
|
851
851
|
all_nodes_map: Dict[str, DBTNode],
|
|
852
|
-
) -> Iterable[
|
|
852
|
+
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
853
853
|
for node in sorted(test_nodes, key=lambda n: n.dbt_name):
|
|
854
854
|
upstreams = get_upstreams_for_test(
|
|
855
855
|
test_node=node,
|
|
@@ -902,7 +902,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
902
902
|
yield MetadataChangeProposalWrapper(
|
|
903
903
|
entityUrn=assertion_urn,
|
|
904
904
|
aspect=self._make_data_platform_instance_aspect(),
|
|
905
|
-
)
|
|
905
|
+
)
|
|
906
906
|
|
|
907
907
|
yield make_assertion_from_test(
|
|
908
908
|
custom_props,
|
|
@@ -949,7 +949,9 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
949
949
|
),
|
|
950
950
|
)
|
|
951
951
|
|
|
952
|
-
def get_workunits_internal(
|
|
952
|
+
def get_workunits_internal(
|
|
953
|
+
self,
|
|
954
|
+
) -> Iterable[Union[MetadataWorkUnit, MetadataChangeProposalWrapper]]:
|
|
953
955
|
if self.config.write_semantics == "PATCH":
|
|
954
956
|
self.ctx.require_graph("Using dbt with write_semantics=PATCH")
|
|
955
957
|
|
|
@@ -343,6 +343,9 @@ class DBTRunResult(BaseModel):
|
|
|
343
343
|
def timing_map(self) -> Dict[str, DBTRunTiming]:
|
|
344
344
|
return {x.name: x for x in self.timing if x.name}
|
|
345
345
|
|
|
346
|
+
def has_success_status(self) -> bool:
|
|
347
|
+
return self.status in ("pass", "success")
|
|
348
|
+
|
|
346
349
|
|
|
347
350
|
class DBTRunMetadata(BaseModel):
|
|
348
351
|
dbt_schema_version: str
|
|
@@ -355,12 +358,7 @@ def _parse_test_result(
|
|
|
355
358
|
dbt_metadata: DBTRunMetadata,
|
|
356
359
|
run_result: DBTRunResult,
|
|
357
360
|
) -> Optional[DBTTestResult]:
|
|
358
|
-
if run_result.
|
|
359
|
-
# This was probably a docs generate run result, so this isn't actually
|
|
360
|
-
# a test result.
|
|
361
|
-
return None
|
|
362
|
-
|
|
363
|
-
if run_result.status != "pass":
|
|
361
|
+
if not run_result.has_success_status():
|
|
364
362
|
native_results = {"message": run_result.message or ""}
|
|
365
363
|
if run_result.failures:
|
|
366
364
|
native_results.update({"failures": str(run_result.failures)})
|
|
@@ -6,7 +6,6 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
|
|
|
6
6
|
|
|
7
7
|
from datahub.emitter import mce_builder
|
|
8
8
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
9
|
-
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
10
9
|
from datahub.metadata.schema_classes import (
|
|
11
10
|
AssertionInfoClass,
|
|
12
11
|
AssertionResultClass,
|
|
@@ -43,6 +42,9 @@ class DBTTestResult:
|
|
|
43
42
|
|
|
44
43
|
native_results: Dict[str, str]
|
|
45
44
|
|
|
45
|
+
def has_success_status(self) -> bool:
|
|
46
|
+
return self.status in ("pass", "success")
|
|
47
|
+
|
|
46
48
|
|
|
47
49
|
def _get_name_for_relationship_test(kw_args: Dict[str, str]) -> Optional[str]:
|
|
48
50
|
"""
|
|
@@ -157,7 +159,7 @@ def make_assertion_from_test(
|
|
|
157
159
|
node: "DBTNode",
|
|
158
160
|
assertion_urn: str,
|
|
159
161
|
upstream_urn: str,
|
|
160
|
-
) ->
|
|
162
|
+
) -> MetadataChangeProposalWrapper:
|
|
161
163
|
assert node.test_info
|
|
162
164
|
qualified_test_name = node.test_info.qualified_test_name
|
|
163
165
|
column_name = node.test_info.column_name
|
|
@@ -231,7 +233,7 @@ def make_assertion_from_test(
|
|
|
231
233
|
return MetadataChangeProposalWrapper(
|
|
232
234
|
entityUrn=assertion_urn,
|
|
233
235
|
aspect=assertion_info,
|
|
234
|
-
)
|
|
236
|
+
)
|
|
235
237
|
|
|
236
238
|
|
|
237
239
|
def make_assertion_result_from_test(
|
|
@@ -240,7 +242,7 @@ def make_assertion_result_from_test(
|
|
|
240
242
|
assertion_urn: str,
|
|
241
243
|
upstream_urn: str,
|
|
242
244
|
test_warnings_are_errors: bool,
|
|
243
|
-
) ->
|
|
245
|
+
) -> MetadataChangeProposalWrapper:
|
|
244
246
|
assertionResult = AssertionRunEventClass(
|
|
245
247
|
timestampMillis=int(test_result.execution_time.timestamp() * 1000.0),
|
|
246
248
|
assertionUrn=assertion_urn,
|
|
@@ -249,7 +251,7 @@ def make_assertion_result_from_test(
|
|
|
249
251
|
result=AssertionResultClass(
|
|
250
252
|
type=(
|
|
251
253
|
AssertionResultTypeClass.SUCCESS
|
|
252
|
-
if test_result.
|
|
254
|
+
if test_result.has_success_status()
|
|
253
255
|
or (not test_warnings_are_errors and test_result.status == "warn")
|
|
254
256
|
else AssertionResultTypeClass.FAILURE
|
|
255
257
|
),
|
|
@@ -261,4 +263,4 @@ def make_assertion_result_from_test(
|
|
|
261
263
|
return MetadataChangeProposalWrapper(
|
|
262
264
|
entityUrn=assertion_urn,
|
|
263
265
|
aspect=assertionResult,
|
|
264
|
-
)
|
|
266
|
+
)
|
|
@@ -294,7 +294,7 @@ class DremioContainer:
|
|
|
294
294
|
)
|
|
295
295
|
|
|
296
296
|
|
|
297
|
-
class
|
|
297
|
+
class DremioSourceContainer(DremioContainer):
|
|
298
298
|
subclass: str = "Dremio Source"
|
|
299
299
|
dremio_source_type: str
|
|
300
300
|
root_path: Optional[str]
|
|
@@ -337,7 +337,7 @@ class DremioCatalog:
|
|
|
337
337
|
self.dremio_api = dremio_api
|
|
338
338
|
self.edition = dremio_api.edition
|
|
339
339
|
self.datasets: Deque[DremioDataset] = deque()
|
|
340
|
-
self.sources: Deque[
|
|
340
|
+
self.sources: Deque[DremioSourceContainer] = deque()
|
|
341
341
|
self.spaces: Deque[DremioSpace] = deque()
|
|
342
342
|
self.folders: Deque[DremioFolder] = deque()
|
|
343
343
|
self.glossary_terms: Deque[DremioGlossaryTerm] = deque()
|
|
@@ -380,12 +380,13 @@ class DremioCatalog:
|
|
|
380
380
|
container_type = container.get("container_type")
|
|
381
381
|
if container_type == DremioEntityContainerType.SOURCE:
|
|
382
382
|
self.sources.append(
|
|
383
|
-
|
|
383
|
+
DremioSourceContainer(
|
|
384
384
|
container_name=container.get("name"),
|
|
385
385
|
location_id=container.get("id"),
|
|
386
386
|
path=[],
|
|
387
387
|
api_operations=self.dremio_api,
|
|
388
|
-
dremio_source_type=container.get("source_type")
|
|
388
|
+
dremio_source_type=container.get("source_type")
|
|
389
|
+
or "unknown",
|
|
389
390
|
root_path=container.get("root_path"),
|
|
390
391
|
database_name=container.get("database_name"),
|
|
391
392
|
)
|
|
@@ -426,7 +427,7 @@ class DremioCatalog:
|
|
|
426
427
|
self.set_containers()
|
|
427
428
|
return deque(itertools.chain(self.sources, self.spaces, self.folders))
|
|
428
429
|
|
|
429
|
-
def get_sources(self) -> Deque[
|
|
430
|
+
def get_sources(self) -> Deque[DremioSourceContainer]:
|
|
430
431
|
self.set_containers()
|
|
431
432
|
return self.sources
|
|
432
433
|
|