acryl-datahub 0.15.0.5rc7__py3-none-any.whl → 0.15.0.5rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc8.dist-info}/METADATA +2466 -2438
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc8.dist-info}/RECORD +28 -25
- datahub/_version.py +1 -1
- datahub/cli/iceberg_cli.py +707 -0
- datahub/entrypoints.py +12 -0
- datahub/ingestion/source/aws/glue.py +3 -2
- datahub/ingestion/source/snowflake/snowflake_config.py +6 -0
- datahub/ingestion/source/snowflake/snowflake_query.py +11 -0
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +17 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +18 -36
- datahub/ingestion/source/snowflake/snowflake_tag.py +57 -3
- datahub/ingestion/source/sql/mssql/job_models.py +37 -8
- datahub/ingestion/source/sql/mssql/source.py +17 -0
- datahub/ingestion/source/tableau/tableau.py +14 -12
- datahub/ingestion/source/tableau/tableau_common.py +1 -1
- datahub/metadata/_schema_classes.py +160 -2
- datahub/metadata/com/linkedin/pegasus2avro/dataplatforminstance/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataset/__init__.py +2 -0
- datahub/metadata/schema.avsc +91 -2
- datahub/metadata/schemas/DataPlatformInstanceKey.avsc +2 -1
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/IcebergCatalogInfo.avsc +28 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +92 -0
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc8.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc8.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc8.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc8.dist-info}/top_level.txt +0 -0
datahub/entrypoints.py
CHANGED
|
@@ -183,6 +183,18 @@ datahub.add_command(datacontract)
|
|
|
183
183
|
datahub.add_command(assertions)
|
|
184
184
|
datahub.add_command(container)
|
|
185
185
|
|
|
186
|
+
try:
|
|
187
|
+
from datahub.cli.iceberg_cli import iceberg
|
|
188
|
+
|
|
189
|
+
datahub.add_command(iceberg)
|
|
190
|
+
except ImportError as e:
|
|
191
|
+
logger.debug(f"Failed to load datahub iceberg command: {e}")
|
|
192
|
+
datahub.add_command(
|
|
193
|
+
make_shim_command(
|
|
194
|
+
"iceberg", "run `pip install 'acryl-datahub[iceberg-catalog]'`"
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
|
|
186
198
|
try:
|
|
187
199
|
from datahub.cli.lite_cli import lite
|
|
188
200
|
|
|
@@ -113,6 +113,7 @@ from datahub.metadata.schema_classes import (
|
|
|
113
113
|
)
|
|
114
114
|
from datahub.utilities.delta import delta_type_to_hive_type
|
|
115
115
|
from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column
|
|
116
|
+
from datahub.utilities.lossy_collections import LossyList
|
|
116
117
|
|
|
117
118
|
logger = logging.getLogger(__name__)
|
|
118
119
|
|
|
@@ -220,7 +221,7 @@ class GlueSourceConfig(
|
|
|
220
221
|
class GlueSourceReport(StaleEntityRemovalSourceReport):
|
|
221
222
|
catalog_id: Optional[str] = None
|
|
222
223
|
tables_scanned = 0
|
|
223
|
-
filtered:
|
|
224
|
+
filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
224
225
|
databases: EntityFilterReport = EntityFilterReport.field(type="database")
|
|
225
226
|
|
|
226
227
|
num_job_script_location_missing: int = 0
|
|
@@ -746,7 +747,7 @@ class GlueSource(StatefulIngestionSourceBase):
|
|
|
746
747
|
for tables in self.get_tables_from_database(database):
|
|
747
748
|
all_tables.append(tables)
|
|
748
749
|
except Exception as e:
|
|
749
|
-
self.report.
|
|
750
|
+
self.report.warning(
|
|
750
751
|
message="Failed to get tables from database",
|
|
751
752
|
context=database["Name"],
|
|
752
753
|
exc=e,
|
|
@@ -249,6 +249,12 @@ class SnowflakeV2Config(
|
|
|
249
249
|
description="If enabled along with `extract_tags`, extracts snowflake's key-value tags as DataHub structured properties instead of DataHub tags.",
|
|
250
250
|
)
|
|
251
251
|
|
|
252
|
+
structured_properties_template_cache_invalidation_interval: int = Field(
|
|
253
|
+
hidden_from_docs=True,
|
|
254
|
+
default=60,
|
|
255
|
+
description="Interval in seconds to invalidate the structured properties template cache.",
|
|
256
|
+
)
|
|
257
|
+
|
|
252
258
|
include_external_url: bool = Field(
|
|
253
259
|
default=True,
|
|
254
260
|
description="Whether to populate Snowsight url for Snowflake Objects",
|
|
@@ -159,6 +159,17 @@ class SnowflakeQuery:
|
|
|
159
159
|
and table_type in ('BASE TABLE', 'EXTERNAL TABLE', 'HYBRID TABLE')
|
|
160
160
|
order by table_schema, table_name"""
|
|
161
161
|
|
|
162
|
+
@staticmethod
|
|
163
|
+
def get_all_tags():
|
|
164
|
+
return """
|
|
165
|
+
SELECT tag_database as "TAG_DATABASE",
|
|
166
|
+
tag_schema AS "TAG_SCHEMA",
|
|
167
|
+
tag_name AS "TAG_NAME",
|
|
168
|
+
FROM snowflake.account_usage.tag_references
|
|
169
|
+
GROUP BY TAG_DATABASE , TAG_SCHEMA, tag_name
|
|
170
|
+
ORDER BY TAG_DATABASE, TAG_SCHEMA, TAG_NAME ASC;
|
|
171
|
+
"""
|
|
172
|
+
|
|
162
173
|
@staticmethod
|
|
163
174
|
def get_all_tags_on_object_with_propagation(
|
|
164
175
|
db_name: str, quoted_identifier: str, domain: str
|
|
@@ -114,6 +114,7 @@ class SnowflakeV2Report(
|
|
|
114
114
|
num_tables_with_known_upstreams: int = 0
|
|
115
115
|
num_upstream_lineage_edge_parsing_failed: int = 0
|
|
116
116
|
num_secure_views_missing_definition: int = 0
|
|
117
|
+
num_structured_property_templates_created: int = 0
|
|
117
118
|
|
|
118
119
|
data_dictionary_cache: Optional["SnowflakeDataDictionary"] = None
|
|
119
120
|
|
|
@@ -285,6 +285,23 @@ class SnowflakeDataDictionary(SupportsAsObj):
|
|
|
285
285
|
|
|
286
286
|
return secure_view_definitions
|
|
287
287
|
|
|
288
|
+
def get_all_tags(self) -> List[SnowflakeTag]:
|
|
289
|
+
cur = self.connection.query(
|
|
290
|
+
SnowflakeQuery.get_all_tags(),
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
tags = [
|
|
294
|
+
SnowflakeTag(
|
|
295
|
+
database=tag["TAG_DATABASE"],
|
|
296
|
+
schema=tag["TAG_SCHEMA"],
|
|
297
|
+
name=tag["TAG_NAME"],
|
|
298
|
+
value="",
|
|
299
|
+
)
|
|
300
|
+
for tag in cur
|
|
301
|
+
]
|
|
302
|
+
|
|
303
|
+
return tags
|
|
304
|
+
|
|
288
305
|
@serialized_lru_cache(maxsize=1)
|
|
289
306
|
def get_tables_for_database(
|
|
290
307
|
self, db_name: str
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import itertools
|
|
2
2
|
import logging
|
|
3
|
+
import time
|
|
3
4
|
from typing import Dict, Iterable, List, Optional, Union
|
|
4
5
|
|
|
5
6
|
from datahub.configuration.pattern_utils import is_schema_allowed
|
|
6
7
|
from datahub.emitter.mce_builder import (
|
|
7
|
-
get_sys_time,
|
|
8
8
|
make_data_platform_urn,
|
|
9
9
|
make_dataset_urn_with_platform_instance,
|
|
10
10
|
make_schema_field_urn,
|
|
@@ -74,7 +74,6 @@ from datahub.ingestion.source_report.ingestion_stage import (
|
|
|
74
74
|
PROFILING,
|
|
75
75
|
)
|
|
76
76
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
77
|
-
AuditStamp,
|
|
78
77
|
GlobalTags,
|
|
79
78
|
Status,
|
|
80
79
|
SubTypes,
|
|
@@ -101,15 +100,8 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
101
100
|
StringType,
|
|
102
101
|
TimeType,
|
|
103
102
|
)
|
|
104
|
-
from datahub.metadata.com.linkedin.pegasus2avro.structured import (
|
|
105
|
-
StructuredPropertyDefinition,
|
|
106
|
-
)
|
|
107
103
|
from datahub.metadata.com.linkedin.pegasus2avro.tag import TagProperties
|
|
108
104
|
from datahub.metadata.urns import (
|
|
109
|
-
ContainerUrn,
|
|
110
|
-
DatasetUrn,
|
|
111
|
-
DataTypeUrn,
|
|
112
|
-
EntityTypeUrn,
|
|
113
105
|
SchemaFieldUrn,
|
|
114
106
|
StructuredPropertyUrn,
|
|
115
107
|
)
|
|
@@ -191,7 +183,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
191
183
|
self.domain_registry: Optional[DomainRegistry] = domain_registry
|
|
192
184
|
self.classification_handler = ClassificationHandler(self.config, self.report)
|
|
193
185
|
self.tag_extractor = SnowflakeTagExtractor(
|
|
194
|
-
config, self.data_dictionary, self.report
|
|
186
|
+
config, self.data_dictionary, self.report, identifiers
|
|
195
187
|
)
|
|
196
188
|
self.profiler: Optional[SnowflakeProfiler] = profiler
|
|
197
189
|
self.snowsight_url_builder: Optional[SnowsightUrlBuilder] = (
|
|
@@ -217,6 +209,16 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
217
209
|
return self.identifiers.snowflake_identifier(identifier)
|
|
218
210
|
|
|
219
211
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
212
|
+
if self.config.extract_tags_as_structured_properties:
|
|
213
|
+
logger.info("Creating structured property templates for tags")
|
|
214
|
+
yield from self.tag_extractor.create_structured_property_templates()
|
|
215
|
+
# We have to wait until cache invalidates to make sure the structured property template is available
|
|
216
|
+
logger.info(
|
|
217
|
+
f"Waiting for {self.config.structured_properties_template_cache_invalidation_interval} seconds for structured properties cache to invalidate"
|
|
218
|
+
)
|
|
219
|
+
time.sleep(
|
|
220
|
+
self.config.structured_properties_template_cache_invalidation_interval
|
|
221
|
+
)
|
|
220
222
|
self.databases = []
|
|
221
223
|
for database in self.get_databases() or []:
|
|
222
224
|
self.report.report_entity_scanned(database.name, "database")
|
|
@@ -698,6 +700,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
698
700
|
|
|
699
701
|
def _process_tag(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]:
|
|
700
702
|
use_sp = self.config.extract_tags_as_structured_properties
|
|
703
|
+
|
|
701
704
|
identifier = (
|
|
702
705
|
self.snowflake_identifier(tag.structured_property_identifier())
|
|
703
706
|
if use_sp
|
|
@@ -708,10 +711,11 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
708
711
|
return
|
|
709
712
|
|
|
710
713
|
self.report.report_tag_processed(identifier)
|
|
714
|
+
|
|
711
715
|
if use_sp:
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
716
|
+
return
|
|
717
|
+
|
|
718
|
+
yield from self.gen_tag_workunits(tag)
|
|
715
719
|
|
|
716
720
|
def _format_tags_as_structured_properties(
|
|
717
721
|
self, tags: List[SnowflakeTag]
|
|
@@ -732,6 +736,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
732
736
|
if table.tags:
|
|
733
737
|
for tag in table.tags:
|
|
734
738
|
yield from self._process_tag(tag)
|
|
739
|
+
|
|
735
740
|
for column_name in table.column_tags:
|
|
736
741
|
for tag in table.column_tags[column_name]:
|
|
737
742
|
yield from self._process_tag(tag)
|
|
@@ -903,29 +908,6 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
903
908
|
entityUrn=tag_urn, aspect=tag_properties_aspect
|
|
904
909
|
).as_workunit()
|
|
905
910
|
|
|
906
|
-
def gen_tag_as_structured_property_workunits(
|
|
907
|
-
self, tag: SnowflakeTag
|
|
908
|
-
) -> Iterable[MetadataWorkUnit]:
|
|
909
|
-
identifier = self.snowflake_identifier(tag.structured_property_identifier())
|
|
910
|
-
urn = StructuredPropertyUrn(identifier).urn()
|
|
911
|
-
aspect = StructuredPropertyDefinition(
|
|
912
|
-
qualifiedName=identifier,
|
|
913
|
-
displayName=tag.name,
|
|
914
|
-
valueType=DataTypeUrn("datahub.string").urn(),
|
|
915
|
-
entityTypes=[
|
|
916
|
-
EntityTypeUrn(f"datahub.{ContainerUrn.ENTITY_TYPE}").urn(),
|
|
917
|
-
EntityTypeUrn(f"datahub.{DatasetUrn.ENTITY_TYPE}").urn(),
|
|
918
|
-
EntityTypeUrn(f"datahub.{SchemaFieldUrn.ENTITY_TYPE}").urn(),
|
|
919
|
-
],
|
|
920
|
-
lastModified=AuditStamp(
|
|
921
|
-
time=get_sys_time(), actor="urn:li:corpuser:datahub"
|
|
922
|
-
),
|
|
923
|
-
)
|
|
924
|
-
yield MetadataChangeProposalWrapper(
|
|
925
|
-
entityUrn=urn,
|
|
926
|
-
aspect=aspect,
|
|
927
|
-
).as_workunit()
|
|
928
|
-
|
|
929
911
|
def gen_column_tags_as_structured_properties(
|
|
930
912
|
self, dataset_urn: str, table: Union[SnowflakeTable, SnowflakeView]
|
|
931
913
|
) -> Iterable[MetadataWorkUnit]:
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Dict, List, Optional
|
|
2
|
+
from typing import Dict, Iterable, List, Optional
|
|
3
3
|
|
|
4
|
+
from datahub.emitter.mce_builder import get_sys_time
|
|
5
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
6
|
+
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
4
7
|
from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
|
|
5
8
|
from datahub.ingestion.source.snowflake.snowflake_config import (
|
|
6
9
|
SnowflakeV2Config,
|
|
@@ -12,7 +15,22 @@ from datahub.ingestion.source.snowflake.snowflake_schema import (
|
|
|
12
15
|
SnowflakeTag,
|
|
13
16
|
_SnowflakeTagCache,
|
|
14
17
|
)
|
|
15
|
-
from datahub.ingestion.source.snowflake.snowflake_utils import
|
|
18
|
+
from datahub.ingestion.source.snowflake.snowflake_utils import (
|
|
19
|
+
SnowflakeCommonMixin,
|
|
20
|
+
SnowflakeIdentifierBuilder,
|
|
21
|
+
)
|
|
22
|
+
from datahub.metadata.com.linkedin.pegasus2avro.common import AuditStamp
|
|
23
|
+
from datahub.metadata.com.linkedin.pegasus2avro.structured import (
|
|
24
|
+
StructuredPropertyDefinition,
|
|
25
|
+
)
|
|
26
|
+
from datahub.metadata.urns import (
|
|
27
|
+
ContainerUrn,
|
|
28
|
+
DatasetUrn,
|
|
29
|
+
DataTypeUrn,
|
|
30
|
+
EntityTypeUrn,
|
|
31
|
+
SchemaFieldUrn,
|
|
32
|
+
StructuredPropertyUrn,
|
|
33
|
+
)
|
|
16
34
|
|
|
17
35
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
18
36
|
|
|
@@ -23,11 +41,12 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
|
|
|
23
41
|
config: SnowflakeV2Config,
|
|
24
42
|
data_dictionary: SnowflakeDataDictionary,
|
|
25
43
|
report: SnowflakeV2Report,
|
|
44
|
+
snowflake_identifiers: SnowflakeIdentifierBuilder,
|
|
26
45
|
) -> None:
|
|
27
46
|
self.config = config
|
|
28
47
|
self.data_dictionary = data_dictionary
|
|
29
48
|
self.report = report
|
|
30
|
-
|
|
49
|
+
self.snowflake_identifiers = snowflake_identifiers
|
|
31
50
|
self.tag_cache: Dict[str, _SnowflakeTagCache] = {}
|
|
32
51
|
|
|
33
52
|
def _get_tags_on_object_without_propagation(
|
|
@@ -59,6 +78,41 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
|
|
|
59
78
|
raise ValueError(f"Unknown domain {domain}")
|
|
60
79
|
return tags
|
|
61
80
|
|
|
81
|
+
def create_structured_property_templates(self) -> Iterable[MetadataWorkUnit]:
|
|
82
|
+
for tag in self.data_dictionary.get_all_tags():
|
|
83
|
+
if not self.config.structured_property_pattern.allowed(
|
|
84
|
+
tag.tag_identifier()
|
|
85
|
+
):
|
|
86
|
+
continue
|
|
87
|
+
if self.config.extract_tags_as_structured_properties:
|
|
88
|
+
self.report.num_structured_property_templates_created += 1
|
|
89
|
+
yield from self.gen_tag_as_structured_property_workunits(tag)
|
|
90
|
+
|
|
91
|
+
def gen_tag_as_structured_property_workunits(
|
|
92
|
+
self, tag: SnowflakeTag
|
|
93
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
94
|
+
identifier = self.snowflake_identifiers.snowflake_identifier(
|
|
95
|
+
tag.structured_property_identifier()
|
|
96
|
+
)
|
|
97
|
+
urn = StructuredPropertyUrn(identifier).urn()
|
|
98
|
+
aspect = StructuredPropertyDefinition(
|
|
99
|
+
qualifiedName=identifier,
|
|
100
|
+
displayName=tag.name,
|
|
101
|
+
valueType=DataTypeUrn("datahub.string").urn(),
|
|
102
|
+
entityTypes=[
|
|
103
|
+
EntityTypeUrn(f"datahub.{ContainerUrn.ENTITY_TYPE}").urn(),
|
|
104
|
+
EntityTypeUrn(f"datahub.{DatasetUrn.ENTITY_TYPE}").urn(),
|
|
105
|
+
EntityTypeUrn(f"datahub.{SchemaFieldUrn.ENTITY_TYPE}").urn(),
|
|
106
|
+
],
|
|
107
|
+
lastModified=AuditStamp(
|
|
108
|
+
time=get_sys_time(), actor="urn:li:corpuser:datahub"
|
|
109
|
+
),
|
|
110
|
+
)
|
|
111
|
+
yield MetadataChangeProposalWrapper(
|
|
112
|
+
entityUrn=urn,
|
|
113
|
+
aspect=aspect,
|
|
114
|
+
).as_workunit()
|
|
115
|
+
|
|
62
116
|
def _get_tags_on_object_with_propagation(
|
|
63
117
|
self,
|
|
64
118
|
domain: str,
|
|
@@ -7,7 +7,12 @@ from datahub.emitter.mce_builder import (
|
|
|
7
7
|
make_data_platform_urn,
|
|
8
8
|
make_dataplatform_instance_urn,
|
|
9
9
|
)
|
|
10
|
+
from datahub.emitter.mcp_builder import (
|
|
11
|
+
DatabaseKey,
|
|
12
|
+
SchemaKey,
|
|
13
|
+
)
|
|
10
14
|
from datahub.metadata.schema_classes import (
|
|
15
|
+
ContainerClass,
|
|
11
16
|
DataFlowInfoClass,
|
|
12
17
|
DataJobInfoClass,
|
|
13
18
|
DataJobInputOutputClass,
|
|
@@ -171,11 +176,7 @@ class MSSQLDataJob:
|
|
|
171
176
|
flow_id=self.entity.flow.formatted_name,
|
|
172
177
|
job_id=self.entity.formatted_name,
|
|
173
178
|
cluster=self.entity.flow.cluster,
|
|
174
|
-
platform_instance=
|
|
175
|
-
self.entity.flow.platform_instance
|
|
176
|
-
if self.entity.flow.platform_instance
|
|
177
|
-
else None
|
|
178
|
-
),
|
|
179
|
+
platform_instance=self.entity.flow.platform_instance,
|
|
179
180
|
)
|
|
180
181
|
|
|
181
182
|
def add_property(
|
|
@@ -222,6 +223,26 @@ class MSSQLDataJob:
|
|
|
222
223
|
)
|
|
223
224
|
return None
|
|
224
225
|
|
|
226
|
+
@property
|
|
227
|
+
def as_container_aspect(self) -> ContainerClass:
|
|
228
|
+
key_args = dict(
|
|
229
|
+
platform=self.entity.flow.orchestrator,
|
|
230
|
+
instance=self.entity.flow.platform_instance,
|
|
231
|
+
env=self.entity.flow.env,
|
|
232
|
+
database=self.entity.flow.db,
|
|
233
|
+
)
|
|
234
|
+
container_key = (
|
|
235
|
+
SchemaKey(
|
|
236
|
+
schema=self.entity.schema,
|
|
237
|
+
**key_args,
|
|
238
|
+
)
|
|
239
|
+
if isinstance(self.entity, StoredProcedure)
|
|
240
|
+
else DatabaseKey(
|
|
241
|
+
**key_args,
|
|
242
|
+
)
|
|
243
|
+
)
|
|
244
|
+
return ContainerClass(container=container_key.as_urn())
|
|
245
|
+
|
|
225
246
|
|
|
226
247
|
@dataclass
|
|
227
248
|
class MSSQLDataFlow:
|
|
@@ -244,9 +265,7 @@ class MSSQLDataFlow:
|
|
|
244
265
|
orchestrator=self.entity.orchestrator,
|
|
245
266
|
flow_id=self.entity.formatted_name,
|
|
246
267
|
cluster=self.entity.cluster,
|
|
247
|
-
platform_instance=
|
|
248
|
-
self.entity.platform_instance if self.entity.platform_instance else None
|
|
249
|
-
),
|
|
268
|
+
platform_instance=self.entity.platform_instance,
|
|
250
269
|
)
|
|
251
270
|
|
|
252
271
|
@property
|
|
@@ -267,3 +286,13 @@ class MSSQLDataFlow:
|
|
|
267
286
|
),
|
|
268
287
|
)
|
|
269
288
|
return None
|
|
289
|
+
|
|
290
|
+
@property
|
|
291
|
+
def as_container_aspect(self) -> ContainerClass:
|
|
292
|
+
databaseKey = DatabaseKey(
|
|
293
|
+
platform=self.entity.orchestrator,
|
|
294
|
+
instance=self.entity.platform_instance,
|
|
295
|
+
env=self.entity.env,
|
|
296
|
+
database=self.entity.db,
|
|
297
|
+
)
|
|
298
|
+
return ContainerClass(container=databaseKey.as_urn())
|
|
@@ -108,6 +108,10 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
|
|
|
108
108
|
default=True,
|
|
109
109
|
description="Enable lineage extraction for stored procedures",
|
|
110
110
|
)
|
|
111
|
+
include_containers_for_pipelines: bool = Field(
|
|
112
|
+
default=False,
|
|
113
|
+
description="Enable the container aspects ingestion for both pipelines and tasks. Note that this feature requires the corresponding model support in the backend, which was introduced in version 0.15.0.1.",
|
|
114
|
+
)
|
|
111
115
|
|
|
112
116
|
@pydantic.validator("uri_args")
|
|
113
117
|
def passwords_match(cls, v, values, **kwargs):
|
|
@@ -641,6 +645,12 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
641
645
|
aspect=data_platform_instance_aspect,
|
|
642
646
|
).as_workunit()
|
|
643
647
|
|
|
648
|
+
if self.config.include_containers_for_pipelines:
|
|
649
|
+
yield MetadataChangeProposalWrapper(
|
|
650
|
+
entityUrn=data_job.urn,
|
|
651
|
+
aspect=data_job.as_container_aspect,
|
|
652
|
+
).as_workunit()
|
|
653
|
+
|
|
644
654
|
if include_lineage:
|
|
645
655
|
yield MetadataChangeProposalWrapper(
|
|
646
656
|
entityUrn=data_job.urn,
|
|
@@ -683,6 +693,13 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
683
693
|
entityUrn=data_flow.urn,
|
|
684
694
|
aspect=data_platform_instance_aspect,
|
|
685
695
|
).as_workunit()
|
|
696
|
+
|
|
697
|
+
if self.config.include_containers_for_pipelines:
|
|
698
|
+
yield MetadataChangeProposalWrapper(
|
|
699
|
+
entityUrn=data_flow.urn,
|
|
700
|
+
aspect=data_flow.as_container_aspect,
|
|
701
|
+
).as_workunit()
|
|
702
|
+
|
|
686
703
|
# TODO: Add SubType when it appear
|
|
687
704
|
|
|
688
705
|
def get_inspectors(self) -> Iterable[Inspector]:
|
|
@@ -2428,10 +2428,12 @@ class TableauSiteSource:
|
|
|
2428
2428
|
]
|
|
2429
2429
|
],
|
|
2430
2430
|
) -> Optional["SqlParsingResult"]:
|
|
2431
|
-
|
|
2432
|
-
|
|
2433
|
-
|
|
2434
|
-
|
|
2431
|
+
database_field = datasource.get(c.DATABASE) or {}
|
|
2432
|
+
database_id: Optional[str] = database_field.get(c.ID)
|
|
2433
|
+
database_name: Optional[str] = database_field.get(c.NAME) or c.UNKNOWN.lower()
|
|
2434
|
+
database_connection_type: Optional[str] = database_field.get(
|
|
2435
|
+
c.CONNECTION_TYPE
|
|
2436
|
+
) or datasource.get(c.CONNECTION_TYPE)
|
|
2435
2437
|
|
|
2436
2438
|
if (
|
|
2437
2439
|
datasource.get(c.IS_UNSUPPORTED_CUSTOM_SQL) in (None, False)
|
|
@@ -2440,10 +2442,7 @@ class TableauSiteSource:
|
|
|
2440
2442
|
logger.debug(f"datasource {datasource_urn} is not created from custom sql")
|
|
2441
2443
|
return None
|
|
2442
2444
|
|
|
2443
|
-
if
|
|
2444
|
-
database_info.get(c.NAME) is None
|
|
2445
|
-
or database_info.get(c.CONNECTION_TYPE) is None
|
|
2446
|
-
):
|
|
2445
|
+
if database_connection_type is None:
|
|
2447
2446
|
logger.debug(
|
|
2448
2447
|
f"database information is missing from datasource {datasource_urn}"
|
|
2449
2448
|
)
|
|
@@ -2459,14 +2458,14 @@ class TableauSiteSource:
|
|
|
2459
2458
|
|
|
2460
2459
|
logger.debug(f"Parsing sql={query}")
|
|
2461
2460
|
|
|
2462
|
-
upstream_db =
|
|
2461
|
+
upstream_db = database_name
|
|
2463
2462
|
|
|
2464
2463
|
if func_overridden_info is not None:
|
|
2465
2464
|
# Override the information as per configuration
|
|
2466
2465
|
upstream_db, platform_instance, platform, _ = func_overridden_info(
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2466
|
+
database_connection_type,
|
|
2467
|
+
database_name,
|
|
2468
|
+
database_id,
|
|
2470
2469
|
self.config.platform_instance_map,
|
|
2471
2470
|
self.config.lineage_overrides,
|
|
2472
2471
|
self.config.database_hostname_to_platform_instance_map,
|
|
@@ -2534,6 +2533,9 @@ class TableauSiteSource:
|
|
|
2534
2533
|
platform_instance=self.config.platform_instance,
|
|
2535
2534
|
func_overridden_info=get_overridden_info,
|
|
2536
2535
|
)
|
|
2536
|
+
logger.debug(
|
|
2537
|
+
f"_create_lineage_from_unsupported_csql parsed_result = {parsed_result}"
|
|
2538
|
+
)
|
|
2537
2539
|
|
|
2538
2540
|
if parsed_result is None:
|
|
2539
2541
|
return
|