acryl-datahub 0.15.0rc2__py3-none-any.whl → 0.15.0rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc4.dist-info}/METADATA +2390 -2390
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc4.dist-info}/RECORD +20 -19
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc4.dist-info}/entry_points.txt +1 -1
- datahub/__init__.py +1 -1
- datahub/ingestion/graph/client.py +2 -0
- datahub/ingestion/run/pipeline.py +5 -4
- datahub/ingestion/source/feast.py +97 -6
- datahub/ingestion/source/gc/datahub_gc.py +22 -5
- datahub/ingestion/source/gc/dataprocess_cleanup.py +3 -1
- datahub/ingestion/source/powerbi/__init__.py +0 -1
- datahub/ingestion/source/powerbi/config.py +3 -3
- datahub/ingestion/source/powerbi/m_query/data_classes.py +34 -2
- datahub/ingestion/source/powerbi/m_query/parser.py +6 -3
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +920 -0
- datahub/ingestion/source/powerbi/m_query/resolver.py +16 -938
- datahub/ingestion/source/powerbi/m_query/validator.py +9 -3
- datahub/ingestion/source/powerbi/powerbi.py +12 -6
- datahub/telemetry/telemetry.py +23 -9
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc4.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=s_nIg7mDSc39CXTxls5vnfHHXg9rzRp55gtGWfhtJWM,574
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -162,7 +162,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
|
|
|
162
162
|
datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
|
|
163
163
|
datahub/ingestion/glossary/datahub_classifier.py,sha256=8VhwuLDhyOqqOr0jqAPIgorb4eAOnvTr4m13Y2Wy1-E,7515
|
|
164
164
|
datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
165
|
-
datahub/ingestion/graph/client.py,sha256
|
|
165
|
+
datahub/ingestion/graph/client.py,sha256=oBlM6RSo0SPFJ-yit2eFFOB3rOpnjKtQ83YNiWGd334,64584
|
|
166
166
|
datahub/ingestion/graph/config.py,sha256=3b_Gxa5wcBnphP63bBiAFdWS7PJhUHRE1WZL_q4Cw8k,749
|
|
167
167
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
168
168
|
datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
|
|
@@ -172,7 +172,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
|
|
|
172
172
|
datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
|
|
173
173
|
datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
174
174
|
datahub/ingestion/run/connection.py,sha256=dqS9Fp8byIJNydPmVgtjjjlPJguuUWuMuvGnpNbQdSs,1474
|
|
175
|
-
datahub/ingestion/run/pipeline.py,sha256=
|
|
175
|
+
datahub/ingestion/run/pipeline.py,sha256=8MNUC19h7AvxjlDJj3E_FZlY56SAUlYG0heIko2XK_g,30572
|
|
176
176
|
datahub/ingestion/run/pipeline_config.py,sha256=91Uvs76EGbCzZZbm819TT0L6pixf2tfI2_nHpnCoyS4,3948
|
|
177
177
|
datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
178
178
|
datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvSc7YOgY,557
|
|
@@ -187,7 +187,7 @@ datahub/ingestion/source/confluent_schema_registry.py,sha256=_h9D8bUXoaGcwgwB94d
|
|
|
187
187
|
datahub/ingestion/source/csv_enricher.py,sha256=xjCbcsSMM8l_ASCRAnNsUGKuYMrD1lec19Waixub1EM,29498
|
|
188
188
|
datahub/ingestion/source/demo_data.py,sha256=yzA_R-wfSX2WPz0i5ukYlscpmpb0Pt8D7EkhtKfftvo,1286
|
|
189
189
|
datahub/ingestion/source/elastic_search.py,sha256=qFUVNzynTVJTabASTjGMu8Qhf9UpNbEtSBFjaPQjBJE,22641
|
|
190
|
-
datahub/ingestion/source/feast.py,sha256=
|
|
190
|
+
datahub/ingestion/source/feast.py,sha256=uZpeUkJsiNlvZcUkARiEuZT_3n6sbGc0yFzwqhtnefA,18103
|
|
191
191
|
datahub/ingestion/source/file.py,sha256=pH-Qkjh5FQ2XvyYPE7Z8XEY4vUk_SUHxm8p8IxG12tU,15879
|
|
192
192
|
datahub/ingestion/source/ge_data_profiler.py,sha256=JqTonv8y7Re4Rfn2YKOEaLufiiAOWKfK1XQvJfV5dvs,64126
|
|
193
193
|
datahub/ingestion/source/ge_profiling_config.py,sha256=P-9pd20koFvpxeEL_pqFvKWWz-qnpZ6XkELUyBKr7is,10807
|
|
@@ -300,8 +300,8 @@ datahub/ingestion/source/fivetran/fivetran.py,sha256=uKbM5czPz-6LOseoh1FwavWDIuL
|
|
|
300
300
|
datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP_CyAT5Cian2N4a-lb8x1NKHk,12776
|
|
301
301
|
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
|
|
302
302
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
303
|
-
datahub/ingestion/source/gc/datahub_gc.py,sha256=
|
|
304
|
-
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=
|
|
303
|
+
datahub/ingestion/source/gc/datahub_gc.py,sha256=f6Erj3KfD0Hx3ydwL5MUVCZgFzS9c6U2Pkr54JLIUOA,12394
|
|
304
|
+
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=IEEHO6UvDWWK3W5siqFrk4J1zUKbL6TrKNUaXdNiEW4,14362
|
|
305
305
|
datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=cHJmxz4NmA7VjTX2iGEo3wZ_SDrjC_rCQcnRxKgfUVI,8713
|
|
306
306
|
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=qLgdr-Rrsba0z_Y-CaHT9d1zSgy2jzg6CXaCKoN2jFk,7360
|
|
307
307
|
datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -347,18 +347,19 @@ datahub/ingestion/source/looker/view_upstream.py,sha256=k278-uwh8uspdREpjE_uqks4
|
|
|
347
347
|
datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
348
348
|
datahub/ingestion/source/metadata/business_glossary.py,sha256=eRVRpQI0ZX5OofS1BUhNihFOfWih70TIAkJM7zaMH80,17577
|
|
349
349
|
datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwrvEhapVLdRlDxCuc,9507
|
|
350
|
-
datahub/ingestion/source/powerbi/__init__.py,sha256=
|
|
351
|
-
datahub/ingestion/source/powerbi/config.py,sha256=
|
|
350
|
+
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
351
|
+
datahub/ingestion/source/powerbi/config.py,sha256=LV8BOm2zzF9t0RMwQVVUNB0bStzBPo8A6JkaW0xlgsQ,23241
|
|
352
352
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=AIU89lVPoCWlzc_RfUjDJwRQ11akPtnGpBTluBMCKio,2242
|
|
353
353
|
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=PRWzuZMMhKdOVoAaE8csHvUFbZHxYe5meJHgrqlgiuw,19795
|
|
354
|
-
datahub/ingestion/source/powerbi/powerbi.py,sha256=
|
|
354
|
+
datahub/ingestion/source/powerbi/powerbi.py,sha256=7UsAEqaFlkWONcXJdQ2hotUYYn46ks6Fe71KXEMh7lI,54495
|
|
355
355
|
datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
356
|
-
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=
|
|
356
|
+
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=s2Nckmr50hxae5gPFcIfpyLzYpaMH56Q9nsDsEgi_-k,2243
|
|
357
357
|
datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
|
|
358
|
-
datahub/ingestion/source/powerbi/m_query/parser.py,sha256=
|
|
359
|
-
datahub/ingestion/source/powerbi/m_query/
|
|
358
|
+
datahub/ingestion/source/powerbi/m_query/parser.py,sha256=pB1LGdb02Ryf8Pr8JPSgiOmLE6mEAgDbKodtKOOY6LU,5782
|
|
359
|
+
datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=K9kyX-pMFjOOCAvedKXXce7xG-cHwTGQTrBT5GFXEIg,32865
|
|
360
|
+
datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=v2DJYT70Vtw9NyIwFecP7tHGcVxBWH9UqV7vbpYPhws,16985
|
|
360
361
|
datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=i7HM9Oyj9XdJpNfG2lE8puxeuc47aSJ-5dPdBEcw2WU,6165
|
|
361
|
-
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=
|
|
362
|
+
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
|
|
362
363
|
datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
363
364
|
datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=xqAsnNUCP44Wd1rE1m_phbKtNCMJTFJfOX4_2varadg,8298
|
|
364
365
|
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=O2XTVBdXteIgQF8Lss_t2RhRSsRMmMyWrAoNonDMQFI,39604
|
|
@@ -872,7 +873,7 @@ datahub/sql_parsing/sqlglot_utils.py,sha256=8MYzkyekhup3ihVStRPuwneWPNu17xhBg5SG
|
|
|
872
873
|
datahub/sql_parsing/tool_meta_extractor.py,sha256=pE-pkRKBfNTXEJkaQM9NlG807mc-X6OtetgskJySCs8,2908
|
|
873
874
|
datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
874
875
|
datahub/telemetry/stats.py,sha256=YltbtC3fe6rl1kcxn1A-mSnVpECTPm5k-brrUt7QxTI,967
|
|
875
|
-
datahub/telemetry/telemetry.py,sha256=
|
|
876
|
+
datahub/telemetry/telemetry.py,sha256=gzla-QGNsynGg2FqFxiDDFQ0emG53MJ9lhOA2-UUg-Y,15047
|
|
876
877
|
datahub/testing/__init__.py,sha256=TywIuzGQvzJsNhI_PGD1RFk11M3RtGl9jIMtAVVHIkg,272
|
|
877
878
|
datahub/testing/check_imports.py,sha256=EKuJmgUA46uOrlaOy0fCvPB7j9POkpJ0ExhO_pT3YAk,1356
|
|
878
879
|
datahub/testing/check_sql_parser_result.py,sha256=f7U7IUSbfV4VACdNI857wPZ9tAZ9j6mXiXmcJNT_RzM,2671
|
|
@@ -971,8 +972,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
971
972
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
972
973
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
973
974
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
974
|
-
acryl_datahub-0.15.
|
|
975
|
-
acryl_datahub-0.15.
|
|
976
|
-
acryl_datahub-0.15.
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
975
|
+
acryl_datahub-0.15.0rc4.dist-info/METADATA,sha256=c65oplVIArqkkCm4xgv3OOJqeiK_9XKwiN5O-UPJ6Ss,171129
|
|
976
|
+
acryl_datahub-0.15.0rc4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
977
|
+
acryl_datahub-0.15.0rc4.dist-info/entry_points.txt,sha256=3jOfMXB66r8zRDaqzRYpNc0tK-oUO-3tXlnGYDdVAmg,9440
|
|
978
|
+
acryl_datahub-0.15.0rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
979
|
+
acryl_datahub-0.15.0rc4.dist-info/RECORD,,
|
|
@@ -73,7 +73,7 @@ okta = datahub.ingestion.source.identity.okta:OktaSource
|
|
|
73
73
|
openapi = datahub.ingestion.source.openapi:OpenApiSource
|
|
74
74
|
oracle = datahub.ingestion.source.sql.oracle:OracleSource
|
|
75
75
|
postgres = datahub.ingestion.source.sql.postgres:PostgresSource
|
|
76
|
-
powerbi = datahub.ingestion.source.powerbi:PowerBiDashboardSource
|
|
76
|
+
powerbi = datahub.ingestion.source.powerbi.powerbi:PowerBiDashboardSource
|
|
77
77
|
powerbi-report-server = datahub.ingestion.source.powerbi_report_server:PowerBiReportServerDashboardSource
|
|
78
78
|
preset = datahub.ingestion.source.preset:PresetSource
|
|
79
79
|
presto = datahub.ingestion.source.sql.presto:PrestoSource
|
datahub/__init__.py
CHANGED
|
@@ -67,6 +67,7 @@ from datahub.metadata.schema_classes import (
|
|
|
67
67
|
SystemMetadataClass,
|
|
68
68
|
TelemetryClientIdClass,
|
|
69
69
|
)
|
|
70
|
+
from datahub.telemetry.telemetry import telemetry_instance
|
|
70
71
|
from datahub.utilities.perf_timer import PerfTimer
|
|
71
72
|
from datahub.utilities.str_enum import StrEnum
|
|
72
73
|
from datahub.utilities.urns.urn import Urn, guess_entity_type
|
|
@@ -1819,4 +1820,5 @@ def get_default_graph() -> DataHubGraph:
|
|
|
1819
1820
|
graph_config = config_utils.load_client_config()
|
|
1820
1821
|
graph = DataHubGraph(graph_config)
|
|
1821
1822
|
graph.test_connection()
|
|
1823
|
+
telemetry_instance.set_context(server=graph)
|
|
1822
1824
|
return graph
|
|
@@ -44,7 +44,8 @@ from datahub.ingestion.transformer.system_metadata_transformer import (
|
|
|
44
44
|
)
|
|
45
45
|
from datahub.ingestion.transformer.transform_registry import transform_registry
|
|
46
46
|
from datahub.metadata.schema_classes import MetadataChangeProposalClass
|
|
47
|
-
from datahub.telemetry import stats
|
|
47
|
+
from datahub.telemetry import stats
|
|
48
|
+
from datahub.telemetry.telemetry import telemetry_instance
|
|
48
49
|
from datahub.utilities._custom_package_loader import model_version_name
|
|
49
50
|
from datahub.utilities.global_warning_util import (
|
|
50
51
|
clear_global_warnings,
|
|
@@ -273,8 +274,9 @@ class Pipeline:
|
|
|
273
274
|
if self.graph is None and isinstance(self.sink, DatahubRestSink):
|
|
274
275
|
with _add_init_error_context("setup default datahub client"):
|
|
275
276
|
self.graph = self.sink.emitter.to_graph()
|
|
277
|
+
self.graph.test_connection()
|
|
276
278
|
self.ctx.graph = self.graph
|
|
277
|
-
|
|
279
|
+
telemetry_instance.set_context(server=self.graph)
|
|
278
280
|
|
|
279
281
|
with set_graph_context(self.graph):
|
|
280
282
|
with _add_init_error_context("configure reporters"):
|
|
@@ -615,7 +617,7 @@ class Pipeline:
|
|
|
615
617
|
sink_warnings = len(self.sink.get_report().warnings)
|
|
616
618
|
global_warnings = len(get_global_warnings())
|
|
617
619
|
|
|
618
|
-
|
|
620
|
+
telemetry_instance.ping(
|
|
619
621
|
"ingest_stats",
|
|
620
622
|
{
|
|
621
623
|
"source_type": self.source_type,
|
|
@@ -637,7 +639,6 @@ class Pipeline:
|
|
|
637
639
|
),
|
|
638
640
|
"has_pipeline_name": bool(self.config.pipeline_name),
|
|
639
641
|
},
|
|
640
|
-
self.ctx.graph,
|
|
641
642
|
)
|
|
642
643
|
|
|
643
644
|
def _approx_all_vals(self, d: LossyList[Any]) -> int:
|
|
@@ -42,10 +42,14 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
|
|
|
42
42
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
43
43
|
from datahub.metadata.schema_classes import (
|
|
44
44
|
BrowsePathsClass,
|
|
45
|
+
GlobalTagsClass,
|
|
45
46
|
MLFeaturePropertiesClass,
|
|
46
47
|
MLFeatureTablePropertiesClass,
|
|
47
48
|
MLPrimaryKeyPropertiesClass,
|
|
49
|
+
OwnerClass,
|
|
50
|
+
OwnershipClass,
|
|
48
51
|
StatusClass,
|
|
52
|
+
TagAssociationClass,
|
|
49
53
|
)
|
|
50
54
|
|
|
51
55
|
# FIXME: ValueType module cannot be used as a type
|
|
@@ -91,6 +95,24 @@ class FeastRepositorySourceConfig(ConfigModel):
|
|
|
91
95
|
environment: str = Field(
|
|
92
96
|
default=DEFAULT_ENV, description="Environment to use when constructing URNs"
|
|
93
97
|
)
|
|
98
|
+
# owner_mappings example:
|
|
99
|
+
# This must be added to the recipe in order to extract owners, otherwise NO owners will be extracted
|
|
100
|
+
# owner_mappings:
|
|
101
|
+
# - feast_owner_name: "<owner>"
|
|
102
|
+
# datahub_owner_urn: "urn:li:corpGroup:<owner>"
|
|
103
|
+
# datahub_ownership_type: "BUSINESS_OWNER"
|
|
104
|
+
owner_mappings: Optional[List[Dict[str, str]]] = Field(
|
|
105
|
+
default=None, description="Mapping of owner names to owner types"
|
|
106
|
+
)
|
|
107
|
+
enable_owner_extraction: bool = Field(
|
|
108
|
+
default=False,
|
|
109
|
+
description="If this is disabled, then we NEVER try to map owners. "
|
|
110
|
+
"If this is enabled, then owner_mappings is REQUIRED to extract ownership.",
|
|
111
|
+
)
|
|
112
|
+
enable_tag_extraction: bool = Field(
|
|
113
|
+
default=False,
|
|
114
|
+
description="If this is disabled, then we NEVER try to extract tags.",
|
|
115
|
+
)
|
|
94
116
|
|
|
95
117
|
|
|
96
118
|
@platform_name("Feast")
|
|
@@ -215,10 +237,15 @@ class FeastRepositorySource(Source):
|
|
|
215
237
|
"""
|
|
216
238
|
|
|
217
239
|
feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
|
|
240
|
+
aspects = (
|
|
241
|
+
[StatusClass(removed=False)]
|
|
242
|
+
+ self._get_tags(entity)
|
|
243
|
+
+ self._get_owners(entity)
|
|
244
|
+
)
|
|
218
245
|
|
|
219
246
|
entity_snapshot = MLPrimaryKeySnapshot(
|
|
220
247
|
urn=builder.make_ml_primary_key_urn(feature_view_name, entity.name),
|
|
221
|
-
aspects=
|
|
248
|
+
aspects=aspects,
|
|
222
249
|
)
|
|
223
250
|
|
|
224
251
|
entity_snapshot.aspects.append(
|
|
@@ -243,10 +270,11 @@ class FeastRepositorySource(Source):
|
|
|
243
270
|
Generate an MLFeature work unit for a Feast feature.
|
|
244
271
|
"""
|
|
245
272
|
feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
|
|
273
|
+
aspects = [StatusClass(removed=False)] + self._get_tags(field)
|
|
246
274
|
|
|
247
275
|
feature_snapshot = MLFeatureSnapshot(
|
|
248
276
|
urn=builder.make_ml_feature_urn(feature_view_name, field.name),
|
|
249
|
-
aspects=
|
|
277
|
+
aspects=aspects,
|
|
250
278
|
)
|
|
251
279
|
|
|
252
280
|
feature_sources = []
|
|
@@ -295,13 +323,18 @@ class FeastRepositorySource(Source):
|
|
|
295
323
|
"""
|
|
296
324
|
|
|
297
325
|
feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
|
|
326
|
+
aspects = (
|
|
327
|
+
[
|
|
328
|
+
BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]),
|
|
329
|
+
StatusClass(removed=False),
|
|
330
|
+
]
|
|
331
|
+
+ self._get_tags(feature_view)
|
|
332
|
+
+ self._get_owners(feature_view)
|
|
333
|
+
)
|
|
298
334
|
|
|
299
335
|
feature_view_snapshot = MLFeatureTableSnapshot(
|
|
300
336
|
urn=builder.make_ml_feature_table_urn("feast", feature_view_name),
|
|
301
|
-
aspects=
|
|
302
|
-
BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]),
|
|
303
|
-
StatusClass(removed=False),
|
|
304
|
-
],
|
|
337
|
+
aspects=aspects,
|
|
305
338
|
)
|
|
306
339
|
|
|
307
340
|
feature_view_snapshot.aspects.append(
|
|
@@ -360,6 +393,64 @@ class FeastRepositorySource(Source):
|
|
|
360
393
|
|
|
361
394
|
return MetadataWorkUnit(id=on_demand_feature_view_name, mce=mce)
|
|
362
395
|
|
|
396
|
+
# If a tag is specified in a Feast object, then the tag will be ingested into Datahub if enable_tag_extraction is
|
|
397
|
+
# True, otherwise NO tags will be ingested
|
|
398
|
+
def _get_tags(self, obj: Union[Entity, FeatureView, FeastField]) -> list:
|
|
399
|
+
"""
|
|
400
|
+
Extracts tags from the given object and returns a list of aspects.
|
|
401
|
+
"""
|
|
402
|
+
aspects: List[Union[GlobalTagsClass]] = []
|
|
403
|
+
|
|
404
|
+
# Extract tags
|
|
405
|
+
if self.source_config.enable_tag_extraction:
|
|
406
|
+
if obj.tags.get("name"):
|
|
407
|
+
tag_name: str = obj.tags["name"]
|
|
408
|
+
tag_association = TagAssociationClass(
|
|
409
|
+
tag=builder.make_tag_urn(tag_name)
|
|
410
|
+
)
|
|
411
|
+
global_tags_aspect = GlobalTagsClass(tags=[tag_association])
|
|
412
|
+
aspects.append(global_tags_aspect)
|
|
413
|
+
|
|
414
|
+
return aspects
|
|
415
|
+
|
|
416
|
+
# If an owner is specified in a Feast object, it will only be ingested into Datahub if owner_mappings is specified
|
|
417
|
+
# and enable_owner_extraction is True in FeastRepositorySourceConfig, otherwise NO owners will be ingested
|
|
418
|
+
def _get_owners(self, obj: Union[Entity, FeatureView, FeastField]) -> list:
|
|
419
|
+
"""
|
|
420
|
+
Extracts owners from the given object and returns a list of aspects.
|
|
421
|
+
"""
|
|
422
|
+
aspects: List[Union[OwnershipClass]] = []
|
|
423
|
+
|
|
424
|
+
# Extract owner
|
|
425
|
+
if self.source_config.enable_owner_extraction:
|
|
426
|
+
owner = getattr(obj, "owner", None)
|
|
427
|
+
if owner:
|
|
428
|
+
# Create owner association, skipping if None
|
|
429
|
+
owner_association = self._create_owner_association(owner)
|
|
430
|
+
if owner_association: # Only add valid owner associations
|
|
431
|
+
owners_aspect = OwnershipClass(owners=[owner_association])
|
|
432
|
+
aspects.append(owners_aspect)
|
|
433
|
+
|
|
434
|
+
return aspects
|
|
435
|
+
|
|
436
|
+
def _create_owner_association(self, owner: str) -> Optional[OwnerClass]:
|
|
437
|
+
"""
|
|
438
|
+
Create an OwnerClass instance for the given owner using the owner mappings.
|
|
439
|
+
"""
|
|
440
|
+
if self.source_config.owner_mappings is not None:
|
|
441
|
+
for mapping in self.source_config.owner_mappings:
|
|
442
|
+
if mapping["feast_owner_name"] == owner:
|
|
443
|
+
ownership_type_class: str = mapping.get(
|
|
444
|
+
"datahub_ownership_type", "TECHNICAL_OWNER"
|
|
445
|
+
)
|
|
446
|
+
datahub_owner_urn = mapping.get("datahub_owner_urn")
|
|
447
|
+
if datahub_owner_urn:
|
|
448
|
+
return OwnerClass(
|
|
449
|
+
owner=datahub_owner_urn,
|
|
450
|
+
type=ownership_type_class,
|
|
451
|
+
)
|
|
452
|
+
return None
|
|
453
|
+
|
|
363
454
|
@classmethod
|
|
364
455
|
def create(cls, config_dict, ctx):
|
|
365
456
|
config = FeastRepositorySourceConfig.parse_obj(config_dict)
|
|
@@ -144,15 +144,32 @@ class DataHubGcSource(Source):
|
|
|
144
144
|
self,
|
|
145
145
|
) -> Iterable[MetadataWorkUnit]:
|
|
146
146
|
if self.config.cleanup_expired_tokens:
|
|
147
|
-
|
|
147
|
+
try:
|
|
148
|
+
self.revoke_expired_tokens()
|
|
149
|
+
except Exception as e:
|
|
150
|
+
self.report.failure("While trying to cleanup expired token ", exc=e)
|
|
148
151
|
if self.config.truncate_indices:
|
|
149
|
-
|
|
152
|
+
try:
|
|
153
|
+
self.truncate_indices()
|
|
154
|
+
except Exception as e:
|
|
155
|
+
self.report.failure("While trying to truncate indices ", exc=e)
|
|
150
156
|
if self.dataprocess_cleanup:
|
|
151
|
-
|
|
157
|
+
try:
|
|
158
|
+
yield from self.dataprocess_cleanup.get_workunits_internal()
|
|
159
|
+
except Exception as e:
|
|
160
|
+
self.report.failure("While trying to cleanup data process ", exc=e)
|
|
152
161
|
if self.soft_deleted_entities_cleanup:
|
|
153
|
-
|
|
162
|
+
try:
|
|
163
|
+
self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities()
|
|
164
|
+
except Exception as e:
|
|
165
|
+
self.report.failure(
|
|
166
|
+
"While trying to cleanup soft deleted entities ", exc=e
|
|
167
|
+
)
|
|
154
168
|
if self.execution_request_cleanup:
|
|
155
|
-
|
|
169
|
+
try:
|
|
170
|
+
self.execution_request_cleanup.run()
|
|
171
|
+
except Exception as e:
|
|
172
|
+
self.report.failure("While trying to cleanup execution request ", exc=e)
|
|
156
173
|
yield from []
|
|
157
174
|
|
|
158
175
|
def truncate_indices(self) -> None:
|
|
@@ -404,7 +404,9 @@ class DataProcessCleanup:
|
|
|
404
404
|
try:
|
|
405
405
|
self.delete_dpi_from_datajobs(datajob_entity)
|
|
406
406
|
except Exception as e:
|
|
407
|
-
|
|
407
|
+
self.report.failure(
|
|
408
|
+
f"While trying to delete {datajob_entity} ", exc=e
|
|
409
|
+
)
|
|
408
410
|
if (
|
|
409
411
|
datajob_entity.total_runs == 0
|
|
410
412
|
and self.config.delete_empty_data_jobs
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from datahub.ingestion.source.powerbi.powerbi import PowerBiDashboardSource
|
|
@@ -173,7 +173,7 @@ class SupportedDataPlatform(Enum):
|
|
|
173
173
|
datahub_data_platform_name="redshift",
|
|
174
174
|
)
|
|
175
175
|
|
|
176
|
-
|
|
176
|
+
DATABRICKS_SQL = DataPlatformPair(
|
|
177
177
|
powerbi_data_platform_name="Databricks", datahub_data_platform_name="databricks"
|
|
178
178
|
)
|
|
179
179
|
|
|
@@ -313,8 +313,8 @@ class PowerBiDashboardSourceConfig(
|
|
|
313
313
|
" Note: This field works in conjunction with 'workspace_type_filter' and both must be considered when filtering workspaces.",
|
|
314
314
|
)
|
|
315
315
|
|
|
316
|
-
# Dataset type mapping PowerBI support many type of data-sources. Here user
|
|
317
|
-
# DataSource
|
|
316
|
+
# Dataset type mapping PowerBI support many type of data-sources. Here user needs to define what type of PowerBI
|
|
317
|
+
# DataSource needs to be mapped to corresponding DataHub Platform DataSource. For example, PowerBI `Snowflake` is
|
|
318
318
|
# mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
|
|
319
319
|
dataset_type_mapping: Union[
|
|
320
320
|
Dict[str, str], Dict[str, PlatformDetail]
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from abc import ABC
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
5
6
|
|
|
6
7
|
from lark import Tree
|
|
7
8
|
|
|
9
|
+
from datahub.ingestion.source.powerbi.config import DataPlatformPair
|
|
10
|
+
from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
|
|
11
|
+
|
|
8
12
|
TRACE_POWERBI_MQUERY_PARSER = os.getenv("DATAHUB_TRACE_POWERBI_MQUERY_PARSER", False)
|
|
9
13
|
|
|
10
14
|
|
|
@@ -30,7 +34,7 @@ class IdentifierAccessor(AbstractIdentifierAccessor):
|
|
|
30
34
|
|
|
31
35
|
"[Schema="public",Item="order_date"]" is "items" in ItemSelector. Data of items varies as per DataSource
|
|
32
36
|
|
|
33
|
-
"public_order_date" is in "next" of ItemSelector. The "next" will be None if this identifier is leaf i.e
|
|
37
|
+
"public_order_date" is in "next" of ItemSelector. The "next" will be None if this identifier is leaf i.e., table
|
|
34
38
|
|
|
35
39
|
"""
|
|
36
40
|
|
|
@@ -53,3 +57,31 @@ class ReferencedTable:
|
|
|
53
57
|
database: str
|
|
54
58
|
schema: str
|
|
55
59
|
table: str
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class DataPlatformTable:
|
|
64
|
+
data_platform_pair: DataPlatformPair
|
|
65
|
+
urn: str
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class Lineage:
|
|
70
|
+
upstreams: List[DataPlatformTable]
|
|
71
|
+
column_lineage: List[ColumnLineageInfo]
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def empty() -> "Lineage":
|
|
75
|
+
return Lineage(upstreams=[], column_lineage=[])
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class FunctionName(Enum):
|
|
79
|
+
NATIVE_QUERY = "Value.NativeQuery"
|
|
80
|
+
POSTGRESQL_DATA_ACCESS = "PostgreSQL.Database"
|
|
81
|
+
ORACLE_DATA_ACCESS = "Oracle.Database"
|
|
82
|
+
SNOWFLAKE_DATA_ACCESS = "Snowflake.Databases"
|
|
83
|
+
MSSQL_DATA_ACCESS = "Sql.Database"
|
|
84
|
+
DATABRICK_DATA_ACCESS = "Databricks.Catalogs"
|
|
85
|
+
GOOGLE_BIGQUERY_DATA_ACCESS = "GoogleBigQuery.Database"
|
|
86
|
+
AMAZON_REDSHIFT_DATA_ACCESS = "AmazonRedshift.Database"
|
|
87
|
+
DATABRICK_MULTI_CLOUD_DATA_ACCESS = "DatabricksMultiCloud.Catalogs"
|
|
@@ -7,6 +7,7 @@ from typing import Dict, List
|
|
|
7
7
|
import lark
|
|
8
8
|
from lark import Lark, Tree
|
|
9
9
|
|
|
10
|
+
import datahub.ingestion.source.powerbi.m_query.data_classes
|
|
10
11
|
from datahub.ingestion.api.common import PipelineContext
|
|
11
12
|
from datahub.ingestion.source.powerbi.config import (
|
|
12
13
|
PowerBiDashboardSourceConfig,
|
|
@@ -65,7 +66,7 @@ def get_upstream_tables(
|
|
|
65
66
|
ctx: PipelineContext,
|
|
66
67
|
config: PowerBiDashboardSourceConfig,
|
|
67
68
|
parameters: Dict[str, str] = {},
|
|
68
|
-
) -> List[
|
|
69
|
+
) -> List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage]:
|
|
69
70
|
if table.expression is None:
|
|
70
71
|
logger.debug(f"There is no M-Query expression in table {table.full_name}")
|
|
71
72
|
return []
|
|
@@ -127,12 +128,14 @@ def get_upstream_tables(
|
|
|
127
128
|
reporter.m_query_parse_successes += 1
|
|
128
129
|
|
|
129
130
|
try:
|
|
130
|
-
lineage: List[
|
|
131
|
+
lineage: List[
|
|
132
|
+
datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
|
|
133
|
+
] = resolver.MQueryResolver(
|
|
131
134
|
table=table,
|
|
132
135
|
parse_tree=parse_tree,
|
|
133
136
|
reporter=reporter,
|
|
134
137
|
parameters=parameters,
|
|
135
|
-
).
|
|
138
|
+
).resolve_to_lineage(
|
|
136
139
|
ctx=ctx,
|
|
137
140
|
config=config,
|
|
138
141
|
platform_instance_resolver=platform_instance_resolver,
|