acryl-datahub 0.15.0rc3__py3-none-any.whl → 0.15.0rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc3.dist-info → acryl_datahub-0.15.0rc5.dist-info}/METADATA +2406 -2406
- {acryl_datahub-0.15.0rc3.dist-info → acryl_datahub-0.15.0rc5.dist-info}/RECORD +17 -16
- {acryl_datahub-0.15.0rc3.dist-info → acryl_datahub-0.15.0rc5.dist-info}/entry_points.txt +1 -1
- datahub/__init__.py +1 -1
- datahub/ingestion/source/feast.py +97 -6
- datahub/ingestion/source/iceberg/iceberg.py +12 -5
- datahub/ingestion/source/kafka/kafka.py +21 -8
- datahub/ingestion/source/powerbi/__init__.py +0 -1
- datahub/ingestion/source/powerbi/config.py +3 -3
- datahub/ingestion/source/powerbi/m_query/data_classes.py +34 -2
- datahub/ingestion/source/powerbi/m_query/parser.py +6 -3
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +920 -0
- datahub/ingestion/source/powerbi/m_query/resolver.py +16 -938
- datahub/ingestion/source/powerbi/m_query/validator.py +9 -3
- datahub/ingestion/source/powerbi/powerbi.py +12 -6
- {acryl_datahub-0.15.0rc3.dist-info → acryl_datahub-0.15.0rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc3.dist-info → acryl_datahub-0.15.0rc5.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=c5YiGS9ajJPufFiwc_4_Bv9DF1Ha6s0H9dd-rtKRF3Y,574
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -187,7 +187,7 @@ datahub/ingestion/source/confluent_schema_registry.py,sha256=_h9D8bUXoaGcwgwB94d
|
|
|
187
187
|
datahub/ingestion/source/csv_enricher.py,sha256=xjCbcsSMM8l_ASCRAnNsUGKuYMrD1lec19Waixub1EM,29498
|
|
188
188
|
datahub/ingestion/source/demo_data.py,sha256=yzA_R-wfSX2WPz0i5ukYlscpmpb0Pt8D7EkhtKfftvo,1286
|
|
189
189
|
datahub/ingestion/source/elastic_search.py,sha256=qFUVNzynTVJTabASTjGMu8Qhf9UpNbEtSBFjaPQjBJE,22641
|
|
190
|
-
datahub/ingestion/source/feast.py,sha256=
|
|
190
|
+
datahub/ingestion/source/feast.py,sha256=uZpeUkJsiNlvZcUkARiEuZT_3n6sbGc0yFzwqhtnefA,18103
|
|
191
191
|
datahub/ingestion/source/file.py,sha256=pH-Qkjh5FQ2XvyYPE7Z8XEY4vUk_SUHxm8p8IxG12tU,15879
|
|
192
192
|
datahub/ingestion/source/ge_data_profiler.py,sha256=JqTonv8y7Re4Rfn2YKOEaLufiiAOWKfK1XQvJfV5dvs,64126
|
|
193
193
|
datahub/ingestion/source/ge_profiling_config.py,sha256=P-9pd20koFvpxeEL_pqFvKWWz-qnpZ6XkELUyBKr7is,10807
|
|
@@ -312,14 +312,14 @@ datahub/ingestion/source/git/git_import.py,sha256=5CT6vMDb0MDctCtShnxb3JVihULtvk
|
|
|
312
312
|
datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
313
313
|
datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
|
|
314
314
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
315
|
-
datahub/ingestion/source/iceberg/iceberg.py,sha256=
|
|
315
|
+
datahub/ingestion/source/iceberg/iceberg.py,sha256=fjqp3VBW5W5-54X_-ubkRZiAmdHvuMbxRbC4UYzEr4U,25900
|
|
316
316
|
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=TS3_ZYZ47Fe02CmzEo1z0pvy7yjXuG1VlwqNxa0U6pc,8506
|
|
317
317
|
datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=hLT1Le_TEUoFXvsJSlrRB1qbTiTe-YVGCof5TFHMyd8,9908
|
|
318
318
|
datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
319
319
|
datahub/ingestion/source/identity/azure_ad.py,sha256=GdmJFD4UMsb5353Z7phXRf-YsXR2woGLRJwBXUkgXq0,28809
|
|
320
320
|
datahub/ingestion/source/identity/okta.py,sha256=PnRokWLG8wSoNZlXJiRZiW6APTEHO09q4n2j_l6m3V0,30756
|
|
321
321
|
datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
322
|
-
datahub/ingestion/source/kafka/kafka.py,sha256=
|
|
322
|
+
datahub/ingestion/source/kafka/kafka.py,sha256=QUw8VCmqIhZJvUiFJmFmekFmy4nXCLD4EKJNC6jk6Y4,26092
|
|
323
323
|
datahub/ingestion/source/kafka/kafka_connect.py,sha256=5KUlhn3876c41Z3kx5l4oJhbu0ekXZQRdxmu52vb_v8,55167
|
|
324
324
|
datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
|
|
325
325
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -347,18 +347,19 @@ datahub/ingestion/source/looker/view_upstream.py,sha256=k278-uwh8uspdREpjE_uqks4
|
|
|
347
347
|
datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
348
348
|
datahub/ingestion/source/metadata/business_glossary.py,sha256=eRVRpQI0ZX5OofS1BUhNihFOfWih70TIAkJM7zaMH80,17577
|
|
349
349
|
datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwrvEhapVLdRlDxCuc,9507
|
|
350
|
-
datahub/ingestion/source/powerbi/__init__.py,sha256=
|
|
351
|
-
datahub/ingestion/source/powerbi/config.py,sha256=
|
|
350
|
+
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
351
|
+
datahub/ingestion/source/powerbi/config.py,sha256=LV8BOm2zzF9t0RMwQVVUNB0bStzBPo8A6JkaW0xlgsQ,23241
|
|
352
352
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=AIU89lVPoCWlzc_RfUjDJwRQ11akPtnGpBTluBMCKio,2242
|
|
353
353
|
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=PRWzuZMMhKdOVoAaE8csHvUFbZHxYe5meJHgrqlgiuw,19795
|
|
354
|
-
datahub/ingestion/source/powerbi/powerbi.py,sha256=
|
|
354
|
+
datahub/ingestion/source/powerbi/powerbi.py,sha256=7UsAEqaFlkWONcXJdQ2hotUYYn46ks6Fe71KXEMh7lI,54495
|
|
355
355
|
datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
356
|
-
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=
|
|
356
|
+
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=s2Nckmr50hxae5gPFcIfpyLzYpaMH56Q9nsDsEgi_-k,2243
|
|
357
357
|
datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
|
|
358
|
-
datahub/ingestion/source/powerbi/m_query/parser.py,sha256=
|
|
359
|
-
datahub/ingestion/source/powerbi/m_query/
|
|
358
|
+
datahub/ingestion/source/powerbi/m_query/parser.py,sha256=pB1LGdb02Ryf8Pr8JPSgiOmLE6mEAgDbKodtKOOY6LU,5782
|
|
359
|
+
datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=K9kyX-pMFjOOCAvedKXXce7xG-cHwTGQTrBT5GFXEIg,32865
|
|
360
|
+
datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=v2DJYT70Vtw9NyIwFecP7tHGcVxBWH9UqV7vbpYPhws,16985
|
|
360
361
|
datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=i7HM9Oyj9XdJpNfG2lE8puxeuc47aSJ-5dPdBEcw2WU,6165
|
|
361
|
-
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=
|
|
362
|
+
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
|
|
362
363
|
datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
363
364
|
datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=xqAsnNUCP44Wd1rE1m_phbKtNCMJTFJfOX4_2varadg,8298
|
|
364
365
|
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=O2XTVBdXteIgQF8Lss_t2RhRSsRMmMyWrAoNonDMQFI,39604
|
|
@@ -971,8 +972,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
971
972
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
972
973
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
973
974
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
974
|
-
acryl_datahub-0.15.
|
|
975
|
-
acryl_datahub-0.15.
|
|
976
|
-
acryl_datahub-0.15.
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
975
|
+
acryl_datahub-0.15.0rc5.dist-info/METADATA,sha256=SiiSRUUBz-MJZHnnGpn6342w8hkW9COEktKjlJDQQuw,171117
|
|
976
|
+
acryl_datahub-0.15.0rc5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
977
|
+
acryl_datahub-0.15.0rc5.dist-info/entry_points.txt,sha256=3jOfMXB66r8zRDaqzRYpNc0tK-oUO-3tXlnGYDdVAmg,9440
|
|
978
|
+
acryl_datahub-0.15.0rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
979
|
+
acryl_datahub-0.15.0rc5.dist-info/RECORD,,
|
|
@@ -73,7 +73,7 @@ okta = datahub.ingestion.source.identity.okta:OktaSource
|
|
|
73
73
|
openapi = datahub.ingestion.source.openapi:OpenApiSource
|
|
74
74
|
oracle = datahub.ingestion.source.sql.oracle:OracleSource
|
|
75
75
|
postgres = datahub.ingestion.source.sql.postgres:PostgresSource
|
|
76
|
-
powerbi = datahub.ingestion.source.powerbi:PowerBiDashboardSource
|
|
76
|
+
powerbi = datahub.ingestion.source.powerbi.powerbi:PowerBiDashboardSource
|
|
77
77
|
powerbi-report-server = datahub.ingestion.source.powerbi_report_server:PowerBiReportServerDashboardSource
|
|
78
78
|
preset = datahub.ingestion.source.preset:PresetSource
|
|
79
79
|
presto = datahub.ingestion.source.sql.presto:PrestoSource
|
datahub/__init__.py
CHANGED
|
@@ -42,10 +42,14 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
|
|
|
42
42
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
43
43
|
from datahub.metadata.schema_classes import (
|
|
44
44
|
BrowsePathsClass,
|
|
45
|
+
GlobalTagsClass,
|
|
45
46
|
MLFeaturePropertiesClass,
|
|
46
47
|
MLFeatureTablePropertiesClass,
|
|
47
48
|
MLPrimaryKeyPropertiesClass,
|
|
49
|
+
OwnerClass,
|
|
50
|
+
OwnershipClass,
|
|
48
51
|
StatusClass,
|
|
52
|
+
TagAssociationClass,
|
|
49
53
|
)
|
|
50
54
|
|
|
51
55
|
# FIXME: ValueType module cannot be used as a type
|
|
@@ -91,6 +95,24 @@ class FeastRepositorySourceConfig(ConfigModel):
|
|
|
91
95
|
environment: str = Field(
|
|
92
96
|
default=DEFAULT_ENV, description="Environment to use when constructing URNs"
|
|
93
97
|
)
|
|
98
|
+
# owner_mappings example:
|
|
99
|
+
# This must be added to the recipe in order to extract owners, otherwise NO owners will be extracted
|
|
100
|
+
# owner_mappings:
|
|
101
|
+
# - feast_owner_name: "<owner>"
|
|
102
|
+
# datahub_owner_urn: "urn:li:corpGroup:<owner>"
|
|
103
|
+
# datahub_ownership_type: "BUSINESS_OWNER"
|
|
104
|
+
owner_mappings: Optional[List[Dict[str, str]]] = Field(
|
|
105
|
+
default=None, description="Mapping of owner names to owner types"
|
|
106
|
+
)
|
|
107
|
+
enable_owner_extraction: bool = Field(
|
|
108
|
+
default=False,
|
|
109
|
+
description="If this is disabled, then we NEVER try to map owners. "
|
|
110
|
+
"If this is enabled, then owner_mappings is REQUIRED to extract ownership.",
|
|
111
|
+
)
|
|
112
|
+
enable_tag_extraction: bool = Field(
|
|
113
|
+
default=False,
|
|
114
|
+
description="If this is disabled, then we NEVER try to extract tags.",
|
|
115
|
+
)
|
|
94
116
|
|
|
95
117
|
|
|
96
118
|
@platform_name("Feast")
|
|
@@ -215,10 +237,15 @@ class FeastRepositorySource(Source):
|
|
|
215
237
|
"""
|
|
216
238
|
|
|
217
239
|
feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
|
|
240
|
+
aspects = (
|
|
241
|
+
[StatusClass(removed=False)]
|
|
242
|
+
+ self._get_tags(entity)
|
|
243
|
+
+ self._get_owners(entity)
|
|
244
|
+
)
|
|
218
245
|
|
|
219
246
|
entity_snapshot = MLPrimaryKeySnapshot(
|
|
220
247
|
urn=builder.make_ml_primary_key_urn(feature_view_name, entity.name),
|
|
221
|
-
aspects=
|
|
248
|
+
aspects=aspects,
|
|
222
249
|
)
|
|
223
250
|
|
|
224
251
|
entity_snapshot.aspects.append(
|
|
@@ -243,10 +270,11 @@ class FeastRepositorySource(Source):
|
|
|
243
270
|
Generate an MLFeature work unit for a Feast feature.
|
|
244
271
|
"""
|
|
245
272
|
feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
|
|
273
|
+
aspects = [StatusClass(removed=False)] + self._get_tags(field)
|
|
246
274
|
|
|
247
275
|
feature_snapshot = MLFeatureSnapshot(
|
|
248
276
|
urn=builder.make_ml_feature_urn(feature_view_name, field.name),
|
|
249
|
-
aspects=
|
|
277
|
+
aspects=aspects,
|
|
250
278
|
)
|
|
251
279
|
|
|
252
280
|
feature_sources = []
|
|
@@ -295,13 +323,18 @@ class FeastRepositorySource(Source):
|
|
|
295
323
|
"""
|
|
296
324
|
|
|
297
325
|
feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
|
|
326
|
+
aspects = (
|
|
327
|
+
[
|
|
328
|
+
BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]),
|
|
329
|
+
StatusClass(removed=False),
|
|
330
|
+
]
|
|
331
|
+
+ self._get_tags(feature_view)
|
|
332
|
+
+ self._get_owners(feature_view)
|
|
333
|
+
)
|
|
298
334
|
|
|
299
335
|
feature_view_snapshot = MLFeatureTableSnapshot(
|
|
300
336
|
urn=builder.make_ml_feature_table_urn("feast", feature_view_name),
|
|
301
|
-
aspects=
|
|
302
|
-
BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]),
|
|
303
|
-
StatusClass(removed=False),
|
|
304
|
-
],
|
|
337
|
+
aspects=aspects,
|
|
305
338
|
)
|
|
306
339
|
|
|
307
340
|
feature_view_snapshot.aspects.append(
|
|
@@ -360,6 +393,64 @@ class FeastRepositorySource(Source):
|
|
|
360
393
|
|
|
361
394
|
return MetadataWorkUnit(id=on_demand_feature_view_name, mce=mce)
|
|
362
395
|
|
|
396
|
+
# If a tag is specified in a Feast object, then the tag will be ingested into Datahub if enable_tag_extraction is
|
|
397
|
+
# True, otherwise NO tags will be ingested
|
|
398
|
+
def _get_tags(self, obj: Union[Entity, FeatureView, FeastField]) -> list:
|
|
399
|
+
"""
|
|
400
|
+
Extracts tags from the given object and returns a list of aspects.
|
|
401
|
+
"""
|
|
402
|
+
aspects: List[Union[GlobalTagsClass]] = []
|
|
403
|
+
|
|
404
|
+
# Extract tags
|
|
405
|
+
if self.source_config.enable_tag_extraction:
|
|
406
|
+
if obj.tags.get("name"):
|
|
407
|
+
tag_name: str = obj.tags["name"]
|
|
408
|
+
tag_association = TagAssociationClass(
|
|
409
|
+
tag=builder.make_tag_urn(tag_name)
|
|
410
|
+
)
|
|
411
|
+
global_tags_aspect = GlobalTagsClass(tags=[tag_association])
|
|
412
|
+
aspects.append(global_tags_aspect)
|
|
413
|
+
|
|
414
|
+
return aspects
|
|
415
|
+
|
|
416
|
+
# If an owner is specified in a Feast object, it will only be ingested into Datahub if owner_mappings is specified
|
|
417
|
+
# and enable_owner_extraction is True in FeastRepositorySourceConfig, otherwise NO owners will be ingested
|
|
418
|
+
def _get_owners(self, obj: Union[Entity, FeatureView, FeastField]) -> list:
|
|
419
|
+
"""
|
|
420
|
+
Extracts owners from the given object and returns a list of aspects.
|
|
421
|
+
"""
|
|
422
|
+
aspects: List[Union[OwnershipClass]] = []
|
|
423
|
+
|
|
424
|
+
# Extract owner
|
|
425
|
+
if self.source_config.enable_owner_extraction:
|
|
426
|
+
owner = getattr(obj, "owner", None)
|
|
427
|
+
if owner:
|
|
428
|
+
# Create owner association, skipping if None
|
|
429
|
+
owner_association = self._create_owner_association(owner)
|
|
430
|
+
if owner_association: # Only add valid owner associations
|
|
431
|
+
owners_aspect = OwnershipClass(owners=[owner_association])
|
|
432
|
+
aspects.append(owners_aspect)
|
|
433
|
+
|
|
434
|
+
return aspects
|
|
435
|
+
|
|
436
|
+
def _create_owner_association(self, owner: str) -> Optional[OwnerClass]:
|
|
437
|
+
"""
|
|
438
|
+
Create an OwnerClass instance for the given owner using the owner mappings.
|
|
439
|
+
"""
|
|
440
|
+
if self.source_config.owner_mappings is not None:
|
|
441
|
+
for mapping in self.source_config.owner_mappings:
|
|
442
|
+
if mapping["feast_owner_name"] == owner:
|
|
443
|
+
ownership_type_class: str = mapping.get(
|
|
444
|
+
"datahub_ownership_type", "TECHNICAL_OWNER"
|
|
445
|
+
)
|
|
446
|
+
datahub_owner_urn = mapping.get("datahub_owner_urn")
|
|
447
|
+
if datahub_owner_urn:
|
|
448
|
+
return OwnerClass(
|
|
449
|
+
owner=datahub_owner_urn,
|
|
450
|
+
type=ownership_type_class,
|
|
451
|
+
)
|
|
452
|
+
return None
|
|
453
|
+
|
|
363
454
|
@classmethod
|
|
364
455
|
def create(cls, config_dict, ctx):
|
|
365
456
|
config = FeastRepositorySourceConfig.parse_obj(config_dict)
|
|
@@ -9,6 +9,7 @@ from pyiceberg.exceptions import (
|
|
|
9
9
|
NoSuchIcebergTableError,
|
|
10
10
|
NoSuchNamespaceError,
|
|
11
11
|
NoSuchPropertyException,
|
|
12
|
+
NoSuchTableError,
|
|
12
13
|
)
|
|
13
14
|
from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
|
|
14
15
|
from pyiceberg.table import Table
|
|
@@ -104,7 +105,7 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
|
|
|
104
105
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default.")
|
|
105
106
|
@capability(
|
|
106
107
|
SourceCapability.OWNERSHIP,
|
|
107
|
-
"
|
|
108
|
+
"Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`",
|
|
108
109
|
)
|
|
109
110
|
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
110
111
|
class IcebergSource(StatefulIngestionSourceBase):
|
|
@@ -192,9 +193,7 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
192
193
|
table = thread_local.local_catalog.load_table(dataset_path)
|
|
193
194
|
time_taken = timer.elapsed_seconds()
|
|
194
195
|
self.report.report_table_load_time(time_taken)
|
|
195
|
-
LOGGER.debug(
|
|
196
|
-
f"Loaded table: {table.identifier}, time taken: {time_taken}"
|
|
197
|
-
)
|
|
196
|
+
LOGGER.debug(f"Loaded table: {table.name()}, time taken: {time_taken}")
|
|
198
197
|
yield from self._create_iceberg_workunit(dataset_name, table)
|
|
199
198
|
except NoSuchPropertyException as e:
|
|
200
199
|
self.report.report_warning(
|
|
@@ -206,12 +205,20 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
206
205
|
)
|
|
207
206
|
except NoSuchIcebergTableError as e:
|
|
208
207
|
self.report.report_warning(
|
|
209
|
-
"
|
|
208
|
+
"not-an-iceberg-table",
|
|
210
209
|
f"Failed to create workunit for {dataset_name}. {e}",
|
|
211
210
|
)
|
|
212
211
|
LOGGER.warning(
|
|
213
212
|
f"NoSuchIcebergTableError while processing table {dataset_path}, skipping it.",
|
|
214
213
|
)
|
|
214
|
+
except NoSuchTableError as e:
|
|
215
|
+
self.report.report_warning(
|
|
216
|
+
"no-such-table",
|
|
217
|
+
f"Failed to create workunit for {dataset_name}. {e}",
|
|
218
|
+
)
|
|
219
|
+
LOGGER.warning(
|
|
220
|
+
f"NoSuchTableError while processing table {dataset_path}, skipping it.",
|
|
221
|
+
)
|
|
215
222
|
except Exception as e:
|
|
216
223
|
self.report.report_failure("general", f"Failed to create workunit: {e}")
|
|
217
224
|
LOGGER.exception(
|
|
@@ -148,7 +148,7 @@ def get_kafka_consumer(
|
|
|
148
148
|
) -> confluent_kafka.Consumer:
|
|
149
149
|
consumer = confluent_kafka.Consumer(
|
|
150
150
|
{
|
|
151
|
-
"group.id": "
|
|
151
|
+
"group.id": "datahub-kafka-ingestion",
|
|
152
152
|
"bootstrap.servers": connection.bootstrap,
|
|
153
153
|
**connection.consumer_config,
|
|
154
154
|
}
|
|
@@ -164,6 +164,25 @@ def get_kafka_consumer(
|
|
|
164
164
|
return consumer
|
|
165
165
|
|
|
166
166
|
|
|
167
|
+
def get_kafka_admin_client(
|
|
168
|
+
connection: KafkaConsumerConnectionConfig,
|
|
169
|
+
) -> AdminClient:
|
|
170
|
+
client = AdminClient(
|
|
171
|
+
{
|
|
172
|
+
"group.id": "datahub-kafka-ingestion",
|
|
173
|
+
"bootstrap.servers": connection.bootstrap,
|
|
174
|
+
**connection.consumer_config,
|
|
175
|
+
}
|
|
176
|
+
)
|
|
177
|
+
if CallableConsumerConfig.is_callable_config(connection.consumer_config):
|
|
178
|
+
# As per documentation, we need to explicitly call the poll method to make sure OAuth callback gets executed
|
|
179
|
+
# https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration
|
|
180
|
+
logger.debug("Initiating polling for kafka admin client")
|
|
181
|
+
client.poll(timeout=30)
|
|
182
|
+
logger.debug("Initiated polling for kafka admin client")
|
|
183
|
+
return client
|
|
184
|
+
|
|
185
|
+
|
|
167
186
|
@dataclass
|
|
168
187
|
class KafkaSourceReport(StaleEntityRemovalSourceReport):
|
|
169
188
|
topics_scanned: int = 0
|
|
@@ -278,13 +297,7 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
278
297
|
def init_kafka_admin_client(self) -> None:
|
|
279
298
|
try:
|
|
280
299
|
# TODO: Do we require separate config than existing consumer_config ?
|
|
281
|
-
self.admin_client =
|
|
282
|
-
{
|
|
283
|
-
"group.id": "test",
|
|
284
|
-
"bootstrap.servers": self.source_config.connection.bootstrap,
|
|
285
|
-
**self.source_config.connection.consumer_config,
|
|
286
|
-
}
|
|
287
|
-
)
|
|
300
|
+
self.admin_client = get_kafka_admin_client(self.source_config.connection)
|
|
288
301
|
except Exception as e:
|
|
289
302
|
logger.debug(e, exc_info=e)
|
|
290
303
|
self.report.report_warning(
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from datahub.ingestion.source.powerbi.powerbi import PowerBiDashboardSource
|
|
@@ -173,7 +173,7 @@ class SupportedDataPlatform(Enum):
|
|
|
173
173
|
datahub_data_platform_name="redshift",
|
|
174
174
|
)
|
|
175
175
|
|
|
176
|
-
|
|
176
|
+
DATABRICKS_SQL = DataPlatformPair(
|
|
177
177
|
powerbi_data_platform_name="Databricks", datahub_data_platform_name="databricks"
|
|
178
178
|
)
|
|
179
179
|
|
|
@@ -313,8 +313,8 @@ class PowerBiDashboardSourceConfig(
|
|
|
313
313
|
" Note: This field works in conjunction with 'workspace_type_filter' and both must be considered when filtering workspaces.",
|
|
314
314
|
)
|
|
315
315
|
|
|
316
|
-
# Dataset type mapping PowerBI support many type of data-sources. Here user
|
|
317
|
-
# DataSource
|
|
316
|
+
# Dataset type mapping PowerBI support many type of data-sources. Here user needs to define what type of PowerBI
|
|
317
|
+
# DataSource needs to be mapped to corresponding DataHub Platform DataSource. For example, PowerBI `Snowflake` is
|
|
318
318
|
# mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
|
|
319
319
|
dataset_type_mapping: Union[
|
|
320
320
|
Dict[str, str], Dict[str, PlatformDetail]
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from abc import ABC
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
5
6
|
|
|
6
7
|
from lark import Tree
|
|
7
8
|
|
|
9
|
+
from datahub.ingestion.source.powerbi.config import DataPlatformPair
|
|
10
|
+
from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
|
|
11
|
+
|
|
8
12
|
TRACE_POWERBI_MQUERY_PARSER = os.getenv("DATAHUB_TRACE_POWERBI_MQUERY_PARSER", False)
|
|
9
13
|
|
|
10
14
|
|
|
@@ -30,7 +34,7 @@ class IdentifierAccessor(AbstractIdentifierAccessor):
|
|
|
30
34
|
|
|
31
35
|
"[Schema="public",Item="order_date"]" is "items" in ItemSelector. Data of items varies as per DataSource
|
|
32
36
|
|
|
33
|
-
"public_order_date" is in "next" of ItemSelector. The "next" will be None if this identifier is leaf i.e
|
|
37
|
+
"public_order_date" is in "next" of ItemSelector. The "next" will be None if this identifier is leaf i.e., table
|
|
34
38
|
|
|
35
39
|
"""
|
|
36
40
|
|
|
@@ -53,3 +57,31 @@ class ReferencedTable:
|
|
|
53
57
|
database: str
|
|
54
58
|
schema: str
|
|
55
59
|
table: str
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class DataPlatformTable:
|
|
64
|
+
data_platform_pair: DataPlatformPair
|
|
65
|
+
urn: str
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class Lineage:
|
|
70
|
+
upstreams: List[DataPlatformTable]
|
|
71
|
+
column_lineage: List[ColumnLineageInfo]
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def empty() -> "Lineage":
|
|
75
|
+
return Lineage(upstreams=[], column_lineage=[])
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class FunctionName(Enum):
|
|
79
|
+
NATIVE_QUERY = "Value.NativeQuery"
|
|
80
|
+
POSTGRESQL_DATA_ACCESS = "PostgreSQL.Database"
|
|
81
|
+
ORACLE_DATA_ACCESS = "Oracle.Database"
|
|
82
|
+
SNOWFLAKE_DATA_ACCESS = "Snowflake.Databases"
|
|
83
|
+
MSSQL_DATA_ACCESS = "Sql.Database"
|
|
84
|
+
DATABRICK_DATA_ACCESS = "Databricks.Catalogs"
|
|
85
|
+
GOOGLE_BIGQUERY_DATA_ACCESS = "GoogleBigQuery.Database"
|
|
86
|
+
AMAZON_REDSHIFT_DATA_ACCESS = "AmazonRedshift.Database"
|
|
87
|
+
DATABRICK_MULTI_CLOUD_DATA_ACCESS = "DatabricksMultiCloud.Catalogs"
|
|
@@ -7,6 +7,7 @@ from typing import Dict, List
|
|
|
7
7
|
import lark
|
|
8
8
|
from lark import Lark, Tree
|
|
9
9
|
|
|
10
|
+
import datahub.ingestion.source.powerbi.m_query.data_classes
|
|
10
11
|
from datahub.ingestion.api.common import PipelineContext
|
|
11
12
|
from datahub.ingestion.source.powerbi.config import (
|
|
12
13
|
PowerBiDashboardSourceConfig,
|
|
@@ -65,7 +66,7 @@ def get_upstream_tables(
|
|
|
65
66
|
ctx: PipelineContext,
|
|
66
67
|
config: PowerBiDashboardSourceConfig,
|
|
67
68
|
parameters: Dict[str, str] = {},
|
|
68
|
-
) -> List[
|
|
69
|
+
) -> List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage]:
|
|
69
70
|
if table.expression is None:
|
|
70
71
|
logger.debug(f"There is no M-Query expression in table {table.full_name}")
|
|
71
72
|
return []
|
|
@@ -127,12 +128,14 @@ def get_upstream_tables(
|
|
|
127
128
|
reporter.m_query_parse_successes += 1
|
|
128
129
|
|
|
129
130
|
try:
|
|
130
|
-
lineage: List[
|
|
131
|
+
lineage: List[
|
|
132
|
+
datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
|
|
133
|
+
] = resolver.MQueryResolver(
|
|
131
134
|
table=table,
|
|
132
135
|
parse_tree=parse_tree,
|
|
133
136
|
reporter=reporter,
|
|
134
137
|
parameters=parameters,
|
|
135
|
-
).
|
|
138
|
+
).resolve_to_lineage(
|
|
136
139
|
ctx=ctx,
|
|
137
140
|
config=config,
|
|
138
141
|
platform_instance_resolver=platform_instance_resolver,
|