acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (106) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/METADATA +2391 -2392
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/RECORD +105 -88
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/entry_points.txt +2 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/entities/dataset/dataset.py +1 -28
  7. datahub/cli/specific/dataset_cli.py +26 -10
  8. datahub/emitter/mce_builder.py +1 -3
  9. datahub/emitter/mcp_builder.py +8 -0
  10. datahub/emitter/request_helper.py +19 -14
  11. datahub/emitter/response_helper.py +25 -18
  12. datahub/emitter/rest_emitter.py +23 -7
  13. datahub/errors.py +8 -0
  14. datahub/ingestion/api/source.py +7 -2
  15. datahub/ingestion/api/source_helpers.py +14 -2
  16. datahub/ingestion/extractor/schema_util.py +1 -0
  17. datahub/ingestion/graph/client.py +26 -20
  18. datahub/ingestion/graph/filters.py +62 -17
  19. datahub/ingestion/sink/datahub_rest.py +2 -2
  20. datahub/ingestion/source/cassandra/cassandra.py +1 -10
  21. datahub/ingestion/source/common/data_platforms.py +23 -0
  22. datahub/ingestion/source/common/gcp_credentials_config.py +6 -0
  23. datahub/ingestion/source/common/subtypes.py +17 -1
  24. datahub/ingestion/source/data_lake_common/path_spec.py +21 -1
  25. datahub/ingestion/source/dbt/dbt_common.py +6 -4
  26. datahub/ingestion/source/dbt/dbt_core.py +4 -6
  27. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  28. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  29. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  30. datahub/ingestion/source/dremio/dremio_source.py +96 -117
  31. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  32. datahub/ingestion/source/ge_data_profiler.py +11 -1
  33. datahub/ingestion/source/hex/__init__.py +0 -0
  34. datahub/ingestion/source/hex/api.py +394 -0
  35. datahub/ingestion/source/hex/constants.py +3 -0
  36. datahub/ingestion/source/hex/hex.py +167 -0
  37. datahub/ingestion/source/hex/mapper.py +372 -0
  38. datahub/ingestion/source/hex/model.py +68 -0
  39. datahub/ingestion/source/iceberg/iceberg.py +193 -140
  40. datahub/ingestion/source/iceberg/iceberg_profiler.py +21 -18
  41. datahub/ingestion/source/mlflow.py +217 -8
  42. datahub/ingestion/source/mode.py +11 -1
  43. datahub/ingestion/source/openapi.py +69 -34
  44. datahub/ingestion/source/powerbi/config.py +31 -4
  45. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  46. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +111 -10
  47. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  48. datahub/ingestion/source/powerbi/powerbi.py +41 -24
  49. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -11
  50. datahub/ingestion/source/redshift/lineage_v2.py +9 -1
  51. datahub/ingestion/source/redshift/query.py +1 -1
  52. datahub/ingestion/source/s3/source.py +11 -0
  53. datahub/ingestion/source/sigma/config.py +3 -4
  54. datahub/ingestion/source/sigma/sigma.py +10 -6
  55. datahub/ingestion/source/slack/slack.py +399 -82
  56. datahub/ingestion/source/snowflake/constants.py +1 -0
  57. datahub/ingestion/source/snowflake/snowflake_config.py +14 -1
  58. datahub/ingestion/source/snowflake/snowflake_query.py +17 -0
  59. datahub/ingestion/source/snowflake/snowflake_report.py +3 -0
  60. datahub/ingestion/source/snowflake/snowflake_schema.py +29 -0
  61. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +112 -42
  62. datahub/ingestion/source/snowflake/snowflake_utils.py +25 -1
  63. datahub/ingestion/source/sql/mssql/job_models.py +15 -1
  64. datahub/ingestion/source/sql/mssql/source.py +8 -4
  65. datahub/ingestion/source/sql/oracle.py +51 -4
  66. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  67. datahub/ingestion/source/sql/stored_procedures/base.py +242 -0
  68. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +1 -29
  69. datahub/ingestion/source/superset.py +291 -35
  70. datahub/ingestion/source/usage/usage_common.py +0 -65
  71. datahub/ingestion/source/vertexai/__init__.py +0 -0
  72. datahub/ingestion/source/vertexai/vertexai.py +1055 -0
  73. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  74. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +68 -0
  75. datahub/metadata/_schema_classes.py +472 -1
  76. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  77. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  78. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  79. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  80. datahub/metadata/schema.avsc +313 -2
  81. datahub/metadata/schemas/CorpUserEditableInfo.avsc +14 -0
  82. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  83. datahub/metadata/schemas/CorpUserSettings.avsc +95 -0
  84. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  85. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  86. datahub/metadata/schemas/Deprecation.avsc +2 -0
  87. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  88. datahub/metadata/schemas/MetadataChangeEvent.avsc +32 -0
  89. datahub/metadata/schemas/QueryProperties.avsc +20 -0
  90. datahub/metadata/schemas/Siblings.avsc +2 -0
  91. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  92. datahub/sdk/__init__.py +1 -0
  93. datahub/sdk/dataset.py +122 -0
  94. datahub/sdk/entity.py +99 -3
  95. datahub/sdk/entity_client.py +27 -3
  96. datahub/sdk/main_client.py +24 -1
  97. datahub/sdk/search_client.py +81 -8
  98. datahub/sdk/search_filters.py +94 -37
  99. datahub/sql_parsing/split_statements.py +17 -3
  100. datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
  101. datahub/sql_parsing/tool_meta_extractor.py +27 -2
  102. datahub/testing/mcp_diff.py +1 -18
  103. datahub/utilities/threaded_iterator_executor.py +16 -3
  104. datahub/ingestion/source/vertexai.py +0 -697
  105. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info/licenses}/LICENSE +0 -0
  106. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/top_level.txt +0 -0
@@ -1,30 +1,58 @@
1
1
  import dataclasses
2
2
  import enum
3
- from typing import Any, Dict, List, Optional
3
+ import warnings
4
+ from typing import Dict, List, Literal, Optional, Union
5
+
6
+ from typing_extensions import TypeAlias
4
7
 
5
8
  from datahub.emitter.mce_builder import (
6
9
  make_data_platform_urn,
7
10
  make_dataplatform_instance_urn,
8
11
  )
12
+ from datahub.errors import SearchFilterWarning
9
13
  from datahub.utilities.urns.urn import guess_entity_type
10
14
 
11
- RawSearchFilterRule = Dict[str, Any]
15
+ RawSearchFilterRule: TypeAlias = Dict[str, Union[str, bool, List[str]]]
16
+
17
+ # This is a list of OR filters, each of which is a list of AND filters.
18
+ # This can be put directly into the orFilters parameter in GraphQL.
19
+ RawSearchFilter: TypeAlias = List[Dict[Literal["and"], List[RawSearchFilterRule]]]
20
+
21
+ # Mirrors our GraphQL enum: https://datahubproject.io/docs/graphql/enums#filteroperator
22
+ FilterOperator: TypeAlias = Literal[
23
+ "CONTAIN",
24
+ "EQUAL",
25
+ "IEQUAL",
26
+ "IN",
27
+ "EXISTS",
28
+ "GREATER_THAN",
29
+ "GREATER_THAN_OR_EQUAL_TO",
30
+ "LESS_THAN",
31
+ "LESS_THAN_OR_EQUAL_TO",
32
+ "START_WITH",
33
+ "END_WITH",
34
+ "DESCENDANTS_INCL",
35
+ "ANCESTORS_INCL",
36
+ "RELATED_INCL",
37
+ ]
12
38
 
13
39
 
14
40
  @dataclasses.dataclass
15
41
  class SearchFilterRule:
16
42
  field: str
17
- condition: str # TODO: convert to an enum
43
+ condition: FilterOperator
18
44
  values: List[str]
19
45
  negated: bool = False
20
46
 
21
47
  def to_raw(self) -> RawSearchFilterRule:
22
- return {
48
+ rule: RawSearchFilterRule = {
23
49
  "field": self.field,
24
50
  "condition": self.condition,
25
51
  "values": self.values,
26
- "negated": self.negated,
27
52
  }
53
+ if self.negated:
54
+ rule["negated"] = True
55
+ return rule
28
56
 
29
57
  def negate(self) -> "SearchFilterRule":
30
58
  return SearchFilterRule(
@@ -53,10 +81,10 @@ def generate_filter(
53
81
  platform_instance: Optional[str],
54
82
  env: Optional[str],
55
83
  container: Optional[str],
56
- status: RemovedStatusFilter,
84
+ status: Optional[RemovedStatusFilter],
57
85
  extra_filters: Optional[List[RawSearchFilterRule]],
58
- extra_or_filters: Optional[List[RawSearchFilterRule]] = None,
59
- ) -> List[Dict[str, List[RawSearchFilterRule]]]:
86
+ extra_or_filters: Optional[RawSearchFilter] = None,
87
+ ) -> RawSearchFilter:
60
88
  """
61
89
  Generate a search filter based on the provided parameters.
62
90
  :param platform: The platform to filter by.
@@ -85,15 +113,16 @@ def generate_filter(
85
113
  and_filters.append(_get_container_filter(container).to_raw())
86
114
 
87
115
  # Status filter.
88
- status_filter = _get_status_filter(status)
89
- if status_filter:
90
- and_filters.append(status_filter.to_raw())
116
+ if status:
117
+ status_filter = _get_status_filter(status)
118
+ if status_filter:
119
+ and_filters.append(status_filter.to_raw())
91
120
 
92
121
  # Extra filters.
93
122
  if extra_filters:
94
123
  and_filters += extra_filters
95
124
 
96
- or_filters: List[Dict[str, List[RawSearchFilterRule]]] = [{"and": and_filters}]
125
+ or_filters: RawSearchFilter = [{"and": and_filters}]
97
126
 
98
127
  # Env filter
99
128
  if env:
@@ -107,11 +136,27 @@ def generate_filter(
107
136
 
108
137
  # Extra OR filters are distributed across the top level and lists.
109
138
  if extra_or_filters:
110
- or_filters = [
111
- {"and": and_filter["and"] + [extra_or_filter]}
112
- for extra_or_filter in extra_or_filters
113
- for and_filter in or_filters
114
- ]
139
+ new_or_filters: RawSearchFilter = []
140
+ for and_filter in or_filters:
141
+ for extra_or_filter in extra_or_filters:
142
+ if isinstance(extra_or_filter, dict) and "and" in extra_or_filter:
143
+ new_or_filters.append(
144
+ {"and": and_filter["and"] + extra_or_filter["and"]}
145
+ )
146
+ else:
147
+ # Hack for backwards compatibility.
148
+ # We have some code that erroneously passed a List[RawSearchFilterRule]
149
+ # instead of a List[Dict["and", List[RawSearchFilterRule]]].
150
+ warnings.warn(
151
+ "Passing a List[RawSearchFilterRule] to extra_or_filters is deprecated. "
152
+ "Please pass a List[Dict[str, List[RawSearchFilterRule]]] instead.",
153
+ SearchFilterWarning,
154
+ stacklevel=3,
155
+ )
156
+ new_or_filters.append(
157
+ {"and": and_filter["and"] + [extra_or_filter]} # type: ignore
158
+ )
159
+ or_filters = new_or_filters
115
160
 
116
161
  return or_filters
117
162
 
@@ -20,7 +20,7 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
20
20
  from datahub.emitter.mcp_builder import mcps_from_mce
21
21
  from datahub.emitter.rest_emitter import (
22
22
  BATCH_INGEST_MAX_PAYLOAD_LENGTH,
23
- DEFAULT_REST_SINK_ENDPOINT,
23
+ DEFAULT_REST_EMITTER_ENDPOINT,
24
24
  DEFAULT_REST_TRACE_MODE,
25
25
  DataHubRestEmitter,
26
26
  RestSinkEndpoint,
@@ -70,7 +70,7 @@ _DEFAULT_REST_SINK_MODE = pydantic.parse_obj_as(
70
70
 
71
71
  class DatahubRestSinkConfig(DatahubClientConfig):
72
72
  mode: RestSinkMode = _DEFAULT_REST_SINK_MODE
73
- endpoint: RestSinkEndpoint = DEFAULT_REST_SINK_ENDPOINT
73
+ endpoint: RestSinkEndpoint = DEFAULT_REST_EMITTER_ENDPOINT
74
74
  default_trace_mode: RestTraceMode = DEFAULT_REST_TRACE_MODE
75
75
 
76
76
  # These only apply in async modes.
@@ -123,16 +123,7 @@ class CassandraSource(StatefulIngestionSourceBase):
123
123
  ).workunit_processor,
124
124
  ]
125
125
 
126
- def get_workunits_internal(
127
- self,
128
- ) -> Iterable[MetadataWorkUnit]:
129
- for metadata in self._get_metadata():
130
- if isinstance(metadata, MetadataWorkUnit):
131
- yield metadata
132
- else:
133
- yield from metadata.as_workunits()
134
-
135
- def _get_metadata(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
126
+ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
136
127
  if not self.cassandra_api.authenticate():
137
128
  return
138
129
  keyspaces: List[CassandraKeyspace] = self.cassandra_api.get_keyspaces()
@@ -0,0 +1,23 @@
1
+ # This is a pretty limited list, and is not really complete yet. Right now it's only used to allow
2
+ # automatic platform mapping when generating lineage and we have a manual override, so
3
+ # it being incomplete is ok. This should not be used for urn validation.
4
+ KNOWN_VALID_PLATFORM_NAMES = [
5
+ "bigquery",
6
+ "cassandra",
7
+ "databricks",
8
+ "delta-lake",
9
+ "dbt",
10
+ "feast",
11
+ "file",
12
+ "gcs",
13
+ "hdfs",
14
+ "hive",
15
+ "mssql",
16
+ "mysql",
17
+ "oracle",
18
+ "postgres",
19
+ "redshift",
20
+ "s3",
21
+ "sagemaker",
22
+ "snowflake",
23
+ ]
@@ -51,3 +51,9 @@ class GCPCredential(ConfigModel):
51
51
  cred_json = json.dumps(configs, indent=4, separators=(",", ": "))
52
52
  fp.write(cred_json.encode())
53
53
  return fp.name
54
+
55
+ def to_dict(self, project_id: Optional[str] = None) -> Dict[str, str]:
56
+ configs = self.dict()
57
+ if project_id:
58
+ configs["project_id"] = project_id
59
+ return configs
@@ -25,6 +25,7 @@ class DatasetSubTypes(StrEnum):
25
25
  NEO4J_NODE = "Neo4j Node"
26
26
  NEO4J_RELATIONSHIP = "Neo4j Relationship"
27
27
  SNOWFLAKE_STREAM = "Snowflake Stream"
28
+ API_ENDPOINT = "API Endpoint"
28
29
 
29
30
  # TODO: Create separate entity...
30
31
  NOTEBOOK = "Notebook"
@@ -44,6 +45,7 @@ class DatasetContainerSubTypes(StrEnum):
44
45
  GCS_BUCKET = "GCS bucket"
45
46
  ABS_CONTAINER = "ABS container"
46
47
  KEYSPACE = "Keyspace" # Cassandra
48
+ NAMESPACE = "Namespace" # Iceberg
47
49
 
48
50
 
49
51
  class BIContainerSubTypes(StrEnum):
@@ -68,7 +70,7 @@ class FlowContainerSubTypes(StrEnum):
68
70
  class JobContainerSubTypes(StrEnum):
69
71
  NIFI_PROCESS_GROUP = "Process Group"
70
72
  MSSQL_JOBSTEP = "Job Step"
71
- MSSQL_STORED_PROCEDURE = "Stored Procedure"
73
+ STORED_PROCEDURE = "Stored Procedure"
72
74
 
73
75
 
74
76
  class BIAssetSubTypes(StrEnum):
@@ -93,7 +95,21 @@ class BIAssetSubTypes(StrEnum):
93
95
  SAC_STORY = "Story"
94
96
  SAC_APPLICATION = "Application"
95
97
 
98
+ # Hex
99
+ HEX_PROJECT = "Project"
100
+ HEX_COMPONENT = "Component"
101
+
96
102
 
97
103
  class MLAssetSubTypes(StrEnum):
98
104
  MLFLOW_TRAINING_RUN = "ML Training Run"
99
105
  MLFLOW_EXPERIMENT = "ML Experiment"
106
+ VERTEX_EXPERIMENT = "Experiment"
107
+ VERTEX_EXPERIMENT_RUN = "Experiment Run"
108
+ VERTEX_EXECUTION = "Execution"
109
+
110
+ VERTEX_MODEL = "ML Model"
111
+ VERTEX_MODEL_GROUP = "ML Model Group"
112
+ VERTEX_TRAINING_JOB = "Training Job"
113
+ VERTEX_ENDPOINT = "Endpoint"
114
+ VERTEX_DATASET = "Dataset"
115
+ VERTEX_PROJECT = "Project"
@@ -11,7 +11,7 @@ from cached_property import cached_property
11
11
  from pydantic.fields import Field
12
12
  from wcmatch import pathlib
13
13
 
14
- from datahub.configuration.common import ConfigModel
14
+ from datahub.configuration.common import AllowDenyPattern, ConfigModel
15
15
  from datahub.ingestion.source.aws.s3_util import is_s3_uri
16
16
  from datahub.ingestion.source.azure.abs_utils import is_abs_uri
17
17
  from datahub.ingestion.source.gcs.gcs_utils import is_gcs_uri
@@ -145,6 +145,11 @@ class PathSpec(ConfigModel):
145
145
  description="Include hidden folders in the traversal (folders starting with . or _",
146
146
  )
147
147
 
148
+ tables_filter_pattern: AllowDenyPattern = Field(
149
+ default=AllowDenyPattern.allow_all(),
150
+ description="The tables_filter_pattern configuration field uses regular expressions to filter the tables part of the Pathspec for ingestion, allowing fine-grained control over which tables are included or excluded based on specified patterns. The default setting allows all tables.",
151
+ )
152
+
148
153
  def is_path_hidden(self, path: str) -> bool:
149
154
  # Split the path into directories and filename
150
155
  dirs, filename = os.path.split(path)
@@ -177,6 +182,12 @@ class PathSpec(ConfigModel):
177
182
  ):
178
183
  return False
179
184
  logger.debug(f"{path} is not excluded")
185
+
186
+ table_name, _ = self.extract_table_name_and_path(path)
187
+ if not self.tables_filter_pattern.allowed(table_name):
188
+ return False
189
+ logger.debug(f"{path} is passed table name check")
190
+
180
191
  ext = os.path.splitext(path)[1].strip(".")
181
192
 
182
193
  if not ignore_ext:
@@ -218,6 +229,15 @@ class PathSpec(ConfigModel):
218
229
  exclude_path.rstrip("/"), flags=pathlib.GLOBSTAR
219
230
  ):
220
231
  return False
232
+
233
+ file_name_pattern = self.include.rsplit("/", 1)[1]
234
+ table_name, _ = self.extract_table_name_and_path(
235
+ os.path.join(path, file_name_pattern)
236
+ )
237
+ if not self.tables_filter_pattern.allowed(table_name):
238
+ return False
239
+ logger.debug(f"{path} is passed table name check")
240
+
221
241
  return True
222
242
 
223
243
  @classmethod
@@ -4,7 +4,7 @@ from abc import abstractmethod
4
4
  from dataclasses import dataclass, field
5
5
  from datetime import datetime
6
6
  from enum import auto
7
- from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
7
+ from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
8
8
 
9
9
  import more_itertools
10
10
  import pydantic
@@ -849,7 +849,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
849
849
  test_nodes: List[DBTNode],
850
850
  extra_custom_props: Dict[str, str],
851
851
  all_nodes_map: Dict[str, DBTNode],
852
- ) -> Iterable[MetadataWorkUnit]:
852
+ ) -> Iterable[MetadataChangeProposalWrapper]:
853
853
  for node in sorted(test_nodes, key=lambda n: n.dbt_name):
854
854
  upstreams = get_upstreams_for_test(
855
855
  test_node=node,
@@ -902,7 +902,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
902
902
  yield MetadataChangeProposalWrapper(
903
903
  entityUrn=assertion_urn,
904
904
  aspect=self._make_data_platform_instance_aspect(),
905
- ).as_workunit()
905
+ )
906
906
 
907
907
  yield make_assertion_from_test(
908
908
  custom_props,
@@ -949,7 +949,9 @@ class DBTSourceBase(StatefulIngestionSourceBase):
949
949
  ),
950
950
  )
951
951
 
952
- def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
952
+ def get_workunits_internal(
953
+ self,
954
+ ) -> Iterable[Union[MetadataWorkUnit, MetadataChangeProposalWrapper]]:
953
955
  if self.config.write_semantics == "PATCH":
954
956
  self.ctx.require_graph("Using dbt with write_semantics=PATCH")
955
957
 
@@ -343,6 +343,9 @@ class DBTRunResult(BaseModel):
343
343
  def timing_map(self) -> Dict[str, DBTRunTiming]:
344
344
  return {x.name: x for x in self.timing if x.name}
345
345
 
346
+ def has_success_status(self) -> bool:
347
+ return self.status in ("pass", "success")
348
+
346
349
 
347
350
  class DBTRunMetadata(BaseModel):
348
351
  dbt_schema_version: str
@@ -355,12 +358,7 @@ def _parse_test_result(
355
358
  dbt_metadata: DBTRunMetadata,
356
359
  run_result: DBTRunResult,
357
360
  ) -> Optional[DBTTestResult]:
358
- if run_result.status == "success":
359
- # This was probably a docs generate run result, so this isn't actually
360
- # a test result.
361
- return None
362
-
363
- if run_result.status != "pass":
361
+ if not run_result.has_success_status():
364
362
  native_results = {"message": run_result.message or ""}
365
363
  if run_result.failures:
366
364
  native_results.update({"failures": str(run_result.failures)})
@@ -6,7 +6,6 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
6
6
 
7
7
  from datahub.emitter import mce_builder
8
8
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
9
- from datahub.ingestion.api.workunit import MetadataWorkUnit
10
9
  from datahub.metadata.schema_classes import (
11
10
  AssertionInfoClass,
12
11
  AssertionResultClass,
@@ -43,6 +42,9 @@ class DBTTestResult:
43
42
 
44
43
  native_results: Dict[str, str]
45
44
 
45
+ def has_success_status(self) -> bool:
46
+ return self.status in ("pass", "success")
47
+
46
48
 
47
49
  def _get_name_for_relationship_test(kw_args: Dict[str, str]) -> Optional[str]:
48
50
  """
@@ -157,7 +159,7 @@ def make_assertion_from_test(
157
159
  node: "DBTNode",
158
160
  assertion_urn: str,
159
161
  upstream_urn: str,
160
- ) -> MetadataWorkUnit:
162
+ ) -> MetadataChangeProposalWrapper:
161
163
  assert node.test_info
162
164
  qualified_test_name = node.test_info.qualified_test_name
163
165
  column_name = node.test_info.column_name
@@ -231,7 +233,7 @@ def make_assertion_from_test(
231
233
  return MetadataChangeProposalWrapper(
232
234
  entityUrn=assertion_urn,
233
235
  aspect=assertion_info,
234
- ).as_workunit()
236
+ )
235
237
 
236
238
 
237
239
  def make_assertion_result_from_test(
@@ -240,7 +242,7 @@ def make_assertion_result_from_test(
240
242
  assertion_urn: str,
241
243
  upstream_urn: str,
242
244
  test_warnings_are_errors: bool,
243
- ) -> MetadataWorkUnit:
245
+ ) -> MetadataChangeProposalWrapper:
244
246
  assertionResult = AssertionRunEventClass(
245
247
  timestampMillis=int(test_result.execution_time.timestamp() * 1000.0),
246
248
  assertionUrn=assertion_urn,
@@ -249,7 +251,7 @@ def make_assertion_result_from_test(
249
251
  result=AssertionResultClass(
250
252
  type=(
251
253
  AssertionResultTypeClass.SUCCESS
252
- if test_result.status == "pass"
254
+ if test_result.has_success_status()
253
255
  or (not test_warnings_are_errors and test_result.status == "warn")
254
256
  else AssertionResultTypeClass.FAILURE
255
257
  ),
@@ -261,4 +263,4 @@ def make_assertion_result_from_test(
261
263
  return MetadataChangeProposalWrapper(
262
264
  entityUrn=assertion_urn,
263
265
  aspect=assertionResult,
264
- ).as_workunit()
266
+ )
@@ -66,7 +66,7 @@ class DremioToDataHubSourceTypeMapping:
66
66
  }
67
67
 
68
68
  @staticmethod
69
- def get_datahub_source_type(dremio_source_type: str) -> str:
69
+ def get_datahub_platform(dremio_source_type: str) -> str:
70
70
  """
71
71
  Return the DataHub source type.
72
72
  """
@@ -294,7 +294,7 @@ class DremioContainer:
294
294
  )
295
295
 
296
296
 
297
- class DremioSource(DremioContainer):
297
+ class DremioSourceContainer(DremioContainer):
298
298
  subclass: str = "Dremio Source"
299
299
  dremio_source_type: str
300
300
  root_path: Optional[str]
@@ -337,7 +337,7 @@ class DremioCatalog:
337
337
  self.dremio_api = dremio_api
338
338
  self.edition = dremio_api.edition
339
339
  self.datasets: Deque[DremioDataset] = deque()
340
- self.sources: Deque[DremioSource] = deque()
340
+ self.sources: Deque[DremioSourceContainer] = deque()
341
341
  self.spaces: Deque[DremioSpace] = deque()
342
342
  self.folders: Deque[DremioFolder] = deque()
343
343
  self.glossary_terms: Deque[DremioGlossaryTerm] = deque()
@@ -380,12 +380,13 @@ class DremioCatalog:
380
380
  container_type = container.get("container_type")
381
381
  if container_type == DremioEntityContainerType.SOURCE:
382
382
  self.sources.append(
383
- DremioSource(
383
+ DremioSourceContainer(
384
384
  container_name=container.get("name"),
385
385
  location_id=container.get("id"),
386
386
  path=[],
387
387
  api_operations=self.dremio_api,
388
- dremio_source_type=container.get("source_type"),
388
+ dremio_source_type=container.get("source_type")
389
+ or "unknown",
389
390
  root_path=container.get("root_path"),
390
391
  database_name=container.get("database_name"),
391
392
  )
@@ -426,7 +427,7 @@ class DremioCatalog:
426
427
  self.set_containers()
427
428
  return deque(itertools.chain(self.sources, self.spaces, self.folders))
428
429
 
429
- def get_sources(self) -> Deque[DremioSource]:
430
+ def get_sources(self) -> Deque[DremioSourceContainer]:
430
431
  self.set_containers()
431
432
  return self.sources
432
433