acryl-datahub 0.15.0rc2__py3-none-any.whl → 0.15.0rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=KO7jEI6gL24DSnvpEGpIbDtk11L8-MExzwH49xBLwgc,574
1
+ datahub/__init__.py,sha256=s_nIg7mDSc39CXTxls5vnfHHXg9rzRp55gtGWfhtJWM,574
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -162,7 +162,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
162
162
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
163
163
  datahub/ingestion/glossary/datahub_classifier.py,sha256=8VhwuLDhyOqqOr0jqAPIgorb4eAOnvTr4m13Y2Wy1-E,7515
164
164
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
- datahub/ingestion/graph/client.py,sha256=-SpQq0zWJ9hoeG9YhWUVZgPB97DD78AsgFJgpOsdAZ0,64476
165
+ datahub/ingestion/graph/client.py,sha256=oBlM6RSo0SPFJ-yit2eFFOB3rOpnjKtQ83YNiWGd334,64584
166
166
  datahub/ingestion/graph/config.py,sha256=3b_Gxa5wcBnphP63bBiAFdWS7PJhUHRE1WZL_q4Cw8k,749
167
167
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
168
168
  datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
@@ -172,7 +172,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
172
172
  datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
173
173
  datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
174
174
  datahub/ingestion/run/connection.py,sha256=dqS9Fp8byIJNydPmVgtjjjlPJguuUWuMuvGnpNbQdSs,1474
175
- datahub/ingestion/run/pipeline.py,sha256=QV1i1TWCIH9gBDGe8Xs0JEbOqEUmWbhUhfx7gvrR7vc,30548
175
+ datahub/ingestion/run/pipeline.py,sha256=8MNUC19h7AvxjlDJj3E_FZlY56SAUlYG0heIko2XK_g,30572
176
176
  datahub/ingestion/run/pipeline_config.py,sha256=91Uvs76EGbCzZZbm819TT0L6pixf2tfI2_nHpnCoyS4,3948
177
177
  datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
178
178
  datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvSc7YOgY,557
@@ -187,7 +187,7 @@ datahub/ingestion/source/confluent_schema_registry.py,sha256=_h9D8bUXoaGcwgwB94d
187
187
  datahub/ingestion/source/csv_enricher.py,sha256=xjCbcsSMM8l_ASCRAnNsUGKuYMrD1lec19Waixub1EM,29498
188
188
  datahub/ingestion/source/demo_data.py,sha256=yzA_R-wfSX2WPz0i5ukYlscpmpb0Pt8D7EkhtKfftvo,1286
189
189
  datahub/ingestion/source/elastic_search.py,sha256=qFUVNzynTVJTabASTjGMu8Qhf9UpNbEtSBFjaPQjBJE,22641
190
- datahub/ingestion/source/feast.py,sha256=NYaAjzLVRhmMKDawBwN0OL8AMyKDLsxOwEj3YFX0wIA,14244
190
+ datahub/ingestion/source/feast.py,sha256=uZpeUkJsiNlvZcUkARiEuZT_3n6sbGc0yFzwqhtnefA,18103
191
191
  datahub/ingestion/source/file.py,sha256=pH-Qkjh5FQ2XvyYPE7Z8XEY4vUk_SUHxm8p8IxG12tU,15879
192
192
  datahub/ingestion/source/ge_data_profiler.py,sha256=JqTonv8y7Re4Rfn2YKOEaLufiiAOWKfK1XQvJfV5dvs,64126
193
193
  datahub/ingestion/source/ge_profiling_config.py,sha256=P-9pd20koFvpxeEL_pqFvKWWz-qnpZ6XkELUyBKr7is,10807
@@ -300,8 +300,8 @@ datahub/ingestion/source/fivetran/fivetran.py,sha256=uKbM5czPz-6LOseoh1FwavWDIuL
300
300
  datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP_CyAT5Cian2N4a-lb8x1NKHk,12776
301
301
  datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
302
302
  datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
303
- datahub/ingestion/source/gc/datahub_gc.py,sha256=p1LiiZJDMaEjWuhnT5t83ALWDEHcPqmoZX64fCBGYmQ,11645
304
- datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=Y4KluNVVSdUbE85jPba8oc_EKm8WmKJrIbAuTPnSzx0,14301
303
+ datahub/ingestion/source/gc/datahub_gc.py,sha256=f6Erj3KfD0Hx3ydwL5MUVCZgFzS9c6U2Pkr54JLIUOA,12394
304
+ datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=IEEHO6UvDWWK3W5siqFrk4J1zUKbL6TrKNUaXdNiEW4,14362
305
305
  datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=cHJmxz4NmA7VjTX2iGEo3wZ_SDrjC_rCQcnRxKgfUVI,8713
306
306
  datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=qLgdr-Rrsba0z_Y-CaHT9d1zSgy2jzg6CXaCKoN2jFk,7360
307
307
  datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -347,18 +347,19 @@ datahub/ingestion/source/looker/view_upstream.py,sha256=k278-uwh8uspdREpjE_uqks4
347
347
  datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
348
348
  datahub/ingestion/source/metadata/business_glossary.py,sha256=eRVRpQI0ZX5OofS1BUhNihFOfWih70TIAkJM7zaMH80,17577
349
349
  datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwrvEhapVLdRlDxCuc,9507
350
- datahub/ingestion/source/powerbi/__init__.py,sha256=jYWYtaz95cyQAgEpZK1kxu1aKjOBAyDUlE77UdkCd6g,76
351
- datahub/ingestion/source/powerbi/config.py,sha256=T7E3YpfkLJlOJLr_xKfAVXhQRu_fkMqJbPtWp_v4k-8,23237
350
+ datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
351
+ datahub/ingestion/source/powerbi/config.py,sha256=LV8BOm2zzF9t0RMwQVVUNB0bStzBPo8A6JkaW0xlgsQ,23241
352
352
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=AIU89lVPoCWlzc_RfUjDJwRQ11akPtnGpBTluBMCKio,2242
353
353
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=PRWzuZMMhKdOVoAaE8csHvUFbZHxYe5meJHgrqlgiuw,19795
354
- datahub/ingestion/source/powerbi/powerbi.py,sha256=AuJPYakZ0Uko8lQUkJkeKPj2mtNmOHndUSwcwPOteMU,54294
354
+ datahub/ingestion/source/powerbi/powerbi.py,sha256=7UsAEqaFlkWONcXJdQ2hotUYYn46ks6Fe71KXEMh7lI,54495
355
355
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
356
- datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=NLEF8uNQNhtOlMf42F7jHAd8C0TIwDCDOmMfLeRpyk8,1278
356
+ datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=s2Nckmr50hxae5gPFcIfpyLzYpaMH56Q9nsDsEgi_-k,2243
357
357
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
358
- datahub/ingestion/source/powerbi/m_query/parser.py,sha256=YZXE6L1SYsfoT_sXolrVOfmWQXZOaoN9_KQiA1cdGAQ,5626
359
- datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=7lLdz8u3Xc2RIoNCwfwCJMj48naEQdNcbxPt-Vmt45w,50429
358
+ datahub/ingestion/source/powerbi/m_query/parser.py,sha256=pB1LGdb02Ryf8Pr8JPSgiOmLE6mEAgDbKodtKOOY6LU,5782
359
+ datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=K9kyX-pMFjOOCAvedKXXce7xG-cHwTGQTrBT5GFXEIg,32865
360
+ datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=v2DJYT70Vtw9NyIwFecP7tHGcVxBWH9UqV7vbpYPhws,16985
360
361
  datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=i7HM9Oyj9XdJpNfG2lE8puxeuc47aSJ-5dPdBEcw2WU,6165
361
- datahub/ingestion/source/powerbi/m_query/validator.py,sha256=FFAwBl_WmV1SSrYhSbyq8FUsTD0vd0ncdysmkgdKxj8,1008
362
+ datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
362
363
  datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
363
364
  datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=xqAsnNUCP44Wd1rE1m_phbKtNCMJTFJfOX4_2varadg,8298
364
365
  datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=O2XTVBdXteIgQF8Lss_t2RhRSsRMmMyWrAoNonDMQFI,39604
@@ -872,7 +873,7 @@ datahub/sql_parsing/sqlglot_utils.py,sha256=8MYzkyekhup3ihVStRPuwneWPNu17xhBg5SG
872
873
  datahub/sql_parsing/tool_meta_extractor.py,sha256=pE-pkRKBfNTXEJkaQM9NlG807mc-X6OtetgskJySCs8,2908
873
874
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
874
875
  datahub/telemetry/stats.py,sha256=YltbtC3fe6rl1kcxn1A-mSnVpECTPm5k-brrUt7QxTI,967
875
- datahub/telemetry/telemetry.py,sha256=xXtvMVkX1YNu4Z0NUZnDAiIYP6c9mFujbGpd1wk3bgM,14763
876
+ datahub/telemetry/telemetry.py,sha256=gzla-QGNsynGg2FqFxiDDFQ0emG53MJ9lhOA2-UUg-Y,15047
876
877
  datahub/testing/__init__.py,sha256=TywIuzGQvzJsNhI_PGD1RFk11M3RtGl9jIMtAVVHIkg,272
877
878
  datahub/testing/check_imports.py,sha256=EKuJmgUA46uOrlaOy0fCvPB7j9POkpJ0ExhO_pT3YAk,1356
878
879
  datahub/testing/check_sql_parser_result.py,sha256=f7U7IUSbfV4VACdNI857wPZ9tAZ9j6mXiXmcJNT_RzM,2671
@@ -971,8 +972,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
971
972
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
972
973
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
973
974
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
974
- acryl_datahub-0.15.0rc2.dist-info/METADATA,sha256=2QoTlFob7rk0n-bosgUCem4HzsSH11SC3Volk6JhCck,171129
975
- acryl_datahub-0.15.0rc2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
976
- acryl_datahub-0.15.0rc2.dist-info/entry_points.txt,sha256=VcQx0dnqaYLyeY_L5OaX7bLmmE-Il7TAXkxCKvEn2bA,9432
977
- acryl_datahub-0.15.0rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
978
- acryl_datahub-0.15.0rc2.dist-info/RECORD,,
975
+ acryl_datahub-0.15.0rc4.dist-info/METADATA,sha256=c65oplVIArqkkCm4xgv3OOJqeiK_9XKwiN5O-UPJ6Ss,171129
976
+ acryl_datahub-0.15.0rc4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
977
+ acryl_datahub-0.15.0rc4.dist-info/entry_points.txt,sha256=3jOfMXB66r8zRDaqzRYpNc0tK-oUO-3tXlnGYDdVAmg,9440
978
+ acryl_datahub-0.15.0rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
979
+ acryl_datahub-0.15.0rc4.dist-info/RECORD,,
@@ -73,7 +73,7 @@ okta = datahub.ingestion.source.identity.okta:OktaSource
73
73
  openapi = datahub.ingestion.source.openapi:OpenApiSource
74
74
  oracle = datahub.ingestion.source.sql.oracle:OracleSource
75
75
  postgres = datahub.ingestion.source.sql.postgres:PostgresSource
76
- powerbi = datahub.ingestion.source.powerbi:PowerBiDashboardSource
76
+ powerbi = datahub.ingestion.source.powerbi.powerbi:PowerBiDashboardSource
77
77
  powerbi-report-server = datahub.ingestion.source.powerbi_report_server:PowerBiReportServerDashboardSource
78
78
  preset = datahub.ingestion.source.preset:PresetSource
79
79
  presto = datahub.ingestion.source.sql.presto:PrestoSource
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc2"
6
+ __version__ = "0.15.0rc4"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -67,6 +67,7 @@ from datahub.metadata.schema_classes import (
67
67
  SystemMetadataClass,
68
68
  TelemetryClientIdClass,
69
69
  )
70
+ from datahub.telemetry.telemetry import telemetry_instance
70
71
  from datahub.utilities.perf_timer import PerfTimer
71
72
  from datahub.utilities.str_enum import StrEnum
72
73
  from datahub.utilities.urns.urn import Urn, guess_entity_type
@@ -1819,4 +1820,5 @@ def get_default_graph() -> DataHubGraph:
1819
1820
  graph_config = config_utils.load_client_config()
1820
1821
  graph = DataHubGraph(graph_config)
1821
1822
  graph.test_connection()
1823
+ telemetry_instance.set_context(server=graph)
1822
1824
  return graph
@@ -44,7 +44,8 @@ from datahub.ingestion.transformer.system_metadata_transformer import (
44
44
  )
45
45
  from datahub.ingestion.transformer.transform_registry import transform_registry
46
46
  from datahub.metadata.schema_classes import MetadataChangeProposalClass
47
- from datahub.telemetry import stats, telemetry
47
+ from datahub.telemetry import stats
48
+ from datahub.telemetry.telemetry import telemetry_instance
48
49
  from datahub.utilities._custom_package_loader import model_version_name
49
50
  from datahub.utilities.global_warning_util import (
50
51
  clear_global_warnings,
@@ -273,8 +274,9 @@ class Pipeline:
273
274
  if self.graph is None and isinstance(self.sink, DatahubRestSink):
274
275
  with _add_init_error_context("setup default datahub client"):
275
276
  self.graph = self.sink.emitter.to_graph()
277
+ self.graph.test_connection()
276
278
  self.ctx.graph = self.graph
277
- telemetry.telemetry_instance.update_capture_exception_context(server=self.graph)
279
+ telemetry_instance.set_context(server=self.graph)
278
280
 
279
281
  with set_graph_context(self.graph):
280
282
  with _add_init_error_context("configure reporters"):
@@ -615,7 +617,7 @@ class Pipeline:
615
617
  sink_warnings = len(self.sink.get_report().warnings)
616
618
  global_warnings = len(get_global_warnings())
617
619
 
618
- telemetry.telemetry_instance.ping(
620
+ telemetry_instance.ping(
619
621
  "ingest_stats",
620
622
  {
621
623
  "source_type": self.source_type,
@@ -637,7 +639,6 @@ class Pipeline:
637
639
  ),
638
640
  "has_pipeline_name": bool(self.config.pipeline_name),
639
641
  },
640
- self.ctx.graph,
641
642
  )
642
643
 
643
644
  def _approx_all_vals(self, d: LossyList[Any]) -> int:
@@ -42,10 +42,14 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
42
42
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
43
43
  from datahub.metadata.schema_classes import (
44
44
  BrowsePathsClass,
45
+ GlobalTagsClass,
45
46
  MLFeaturePropertiesClass,
46
47
  MLFeatureTablePropertiesClass,
47
48
  MLPrimaryKeyPropertiesClass,
49
+ OwnerClass,
50
+ OwnershipClass,
48
51
  StatusClass,
52
+ TagAssociationClass,
49
53
  )
50
54
 
51
55
  # FIXME: ValueType module cannot be used as a type
@@ -91,6 +95,24 @@ class FeastRepositorySourceConfig(ConfigModel):
91
95
  environment: str = Field(
92
96
  default=DEFAULT_ENV, description="Environment to use when constructing URNs"
93
97
  )
98
+ # owner_mappings example:
99
+ # This must be added to the recipe in order to extract owners, otherwise NO owners will be extracted
100
+ # owner_mappings:
101
+ # - feast_owner_name: "<owner>"
102
+ # datahub_owner_urn: "urn:li:corpGroup:<owner>"
103
+ # datahub_ownership_type: "BUSINESS_OWNER"
104
+ owner_mappings: Optional[List[Dict[str, str]]] = Field(
105
+ default=None, description="Mapping of owner names to owner types"
106
+ )
107
+ enable_owner_extraction: bool = Field(
108
+ default=False,
109
+ description="If this is disabled, then we NEVER try to map owners. "
110
+ "If this is enabled, then owner_mappings is REQUIRED to extract ownership.",
111
+ )
112
+ enable_tag_extraction: bool = Field(
113
+ default=False,
114
+ description="If this is disabled, then we NEVER try to extract tags.",
115
+ )
94
116
 
95
117
 
96
118
  @platform_name("Feast")
@@ -215,10 +237,15 @@ class FeastRepositorySource(Source):
215
237
  """
216
238
 
217
239
  feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
240
+ aspects = (
241
+ [StatusClass(removed=False)]
242
+ + self._get_tags(entity)
243
+ + self._get_owners(entity)
244
+ )
218
245
 
219
246
  entity_snapshot = MLPrimaryKeySnapshot(
220
247
  urn=builder.make_ml_primary_key_urn(feature_view_name, entity.name),
221
- aspects=[StatusClass(removed=False)],
248
+ aspects=aspects,
222
249
  )
223
250
 
224
251
  entity_snapshot.aspects.append(
@@ -243,10 +270,11 @@ class FeastRepositorySource(Source):
243
270
  Generate an MLFeature work unit for a Feast feature.
244
271
  """
245
272
  feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
273
+ aspects = [StatusClass(removed=False)] + self._get_tags(field)
246
274
 
247
275
  feature_snapshot = MLFeatureSnapshot(
248
276
  urn=builder.make_ml_feature_urn(feature_view_name, field.name),
249
- aspects=[StatusClass(removed=False)],
277
+ aspects=aspects,
250
278
  )
251
279
 
252
280
  feature_sources = []
@@ -295,13 +323,18 @@ class FeastRepositorySource(Source):
295
323
  """
296
324
 
297
325
  feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
326
+ aspects = (
327
+ [
328
+ BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]),
329
+ StatusClass(removed=False),
330
+ ]
331
+ + self._get_tags(feature_view)
332
+ + self._get_owners(feature_view)
333
+ )
298
334
 
299
335
  feature_view_snapshot = MLFeatureTableSnapshot(
300
336
  urn=builder.make_ml_feature_table_urn("feast", feature_view_name),
301
- aspects=[
302
- BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]),
303
- StatusClass(removed=False),
304
- ],
337
+ aspects=aspects,
305
338
  )
306
339
 
307
340
  feature_view_snapshot.aspects.append(
@@ -360,6 +393,64 @@ class FeastRepositorySource(Source):
360
393
 
361
394
  return MetadataWorkUnit(id=on_demand_feature_view_name, mce=mce)
362
395
 
396
+ # If a tag is specified in a Feast object, then the tag will be ingested into Datahub if enable_tag_extraction is
397
+ # True, otherwise NO tags will be ingested
398
+ def _get_tags(self, obj: Union[Entity, FeatureView, FeastField]) -> list:
399
+ """
400
+ Extracts tags from the given object and returns a list of aspects.
401
+ """
402
+ aspects: List[Union[GlobalTagsClass]] = []
403
+
404
+ # Extract tags
405
+ if self.source_config.enable_tag_extraction:
406
+ if obj.tags.get("name"):
407
+ tag_name: str = obj.tags["name"]
408
+ tag_association = TagAssociationClass(
409
+ tag=builder.make_tag_urn(tag_name)
410
+ )
411
+ global_tags_aspect = GlobalTagsClass(tags=[tag_association])
412
+ aspects.append(global_tags_aspect)
413
+
414
+ return aspects
415
+
416
+ # If an owner is specified in a Feast object, it will only be ingested into Datahub if owner_mappings is specified
417
+ # and enable_owner_extraction is True in FeastRepositorySourceConfig, otherwise NO owners will be ingested
418
+ def _get_owners(self, obj: Union[Entity, FeatureView, FeastField]) -> list:
419
+ """
420
+ Extracts owners from the given object and returns a list of aspects.
421
+ """
422
+ aspects: List[Union[OwnershipClass]] = []
423
+
424
+ # Extract owner
425
+ if self.source_config.enable_owner_extraction:
426
+ owner = getattr(obj, "owner", None)
427
+ if owner:
428
+ # Create owner association, skipping if None
429
+ owner_association = self._create_owner_association(owner)
430
+ if owner_association: # Only add valid owner associations
431
+ owners_aspect = OwnershipClass(owners=[owner_association])
432
+ aspects.append(owners_aspect)
433
+
434
+ return aspects
435
+
436
+ def _create_owner_association(self, owner: str) -> Optional[OwnerClass]:
437
+ """
438
+ Create an OwnerClass instance for the given owner using the owner mappings.
439
+ """
440
+ if self.source_config.owner_mappings is not None:
441
+ for mapping in self.source_config.owner_mappings:
442
+ if mapping["feast_owner_name"] == owner:
443
+ ownership_type_class: str = mapping.get(
444
+ "datahub_ownership_type", "TECHNICAL_OWNER"
445
+ )
446
+ datahub_owner_urn = mapping.get("datahub_owner_urn")
447
+ if datahub_owner_urn:
448
+ return OwnerClass(
449
+ owner=datahub_owner_urn,
450
+ type=ownership_type_class,
451
+ )
452
+ return None
453
+
363
454
  @classmethod
364
455
  def create(cls, config_dict, ctx):
365
456
  config = FeastRepositorySourceConfig.parse_obj(config_dict)
@@ -144,15 +144,32 @@ class DataHubGcSource(Source):
144
144
  self,
145
145
  ) -> Iterable[MetadataWorkUnit]:
146
146
  if self.config.cleanup_expired_tokens:
147
- self.revoke_expired_tokens()
147
+ try:
148
+ self.revoke_expired_tokens()
149
+ except Exception as e:
150
+ self.report.failure("While trying to cleanup expired token ", exc=e)
148
151
  if self.config.truncate_indices:
149
- self.truncate_indices()
152
+ try:
153
+ self.truncate_indices()
154
+ except Exception as e:
155
+ self.report.failure("While trying to truncate indices ", exc=e)
150
156
  if self.dataprocess_cleanup:
151
- yield from self.dataprocess_cleanup.get_workunits_internal()
157
+ try:
158
+ yield from self.dataprocess_cleanup.get_workunits_internal()
159
+ except Exception as e:
160
+ self.report.failure("While trying to cleanup data process ", exc=e)
152
161
  if self.soft_deleted_entities_cleanup:
153
- self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities()
162
+ try:
163
+ self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities()
164
+ except Exception as e:
165
+ self.report.failure(
166
+ "While trying to cleanup soft deleted entities ", exc=e
167
+ )
154
168
  if self.execution_request_cleanup:
155
- self.execution_request_cleanup.run()
169
+ try:
170
+ self.execution_request_cleanup.run()
171
+ except Exception as e:
172
+ self.report.failure("While trying to cleanup execution request ", exc=e)
156
173
  yield from []
157
174
 
158
175
  def truncate_indices(self) -> None:
@@ -404,7 +404,9 @@ class DataProcessCleanup:
404
404
  try:
405
405
  self.delete_dpi_from_datajobs(datajob_entity)
406
406
  except Exception as e:
407
- logger.error(f"While trying to delete {datajob_entity} got {e}")
407
+ self.report.failure(
408
+ f"While trying to delete {datajob_entity} ", exc=e
409
+ )
408
410
  if (
409
411
  datajob_entity.total_runs == 0
410
412
  and self.config.delete_empty_data_jobs
@@ -1 +0,0 @@
1
- from datahub.ingestion.source.powerbi.powerbi import PowerBiDashboardSource
@@ -173,7 +173,7 @@ class SupportedDataPlatform(Enum):
173
173
  datahub_data_platform_name="redshift",
174
174
  )
175
175
 
176
- DATABRICK_SQL = DataPlatformPair(
176
+ DATABRICKS_SQL = DataPlatformPair(
177
177
  powerbi_data_platform_name="Databricks", datahub_data_platform_name="databricks"
178
178
  )
179
179
 
@@ -313,8 +313,8 @@ class PowerBiDashboardSourceConfig(
313
313
  " Note: This field works in conjunction with 'workspace_type_filter' and both must be considered when filtering workspaces.",
314
314
  )
315
315
 
316
- # Dataset type mapping PowerBI support many type of data-sources. Here user need to define what type of PowerBI
317
- # DataSource need to be mapped to corresponding DataHub Platform DataSource. For example PowerBI `Snowflake` is
316
+ # Dataset type mapping PowerBI support many type of data-sources. Here user needs to define what type of PowerBI
317
+ # DataSource needs to be mapped to corresponding DataHub Platform DataSource. For example, PowerBI `Snowflake` is
318
318
  # mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
319
319
  dataset_type_mapping: Union[
320
320
  Dict[str, str], Dict[str, PlatformDetail]
@@ -1,10 +1,14 @@
1
1
  import os
2
2
  from abc import ABC
3
3
  from dataclasses import dataclass
4
- from typing import Any, Dict, Optional
4
+ from enum import Enum
5
+ from typing import Any, Dict, List, Optional
5
6
 
6
7
  from lark import Tree
7
8
 
9
+ from datahub.ingestion.source.powerbi.config import DataPlatformPair
10
+ from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
11
+
8
12
  TRACE_POWERBI_MQUERY_PARSER = os.getenv("DATAHUB_TRACE_POWERBI_MQUERY_PARSER", False)
9
13
 
10
14
 
@@ -30,7 +34,7 @@ class IdentifierAccessor(AbstractIdentifierAccessor):
30
34
 
31
35
  "[Schema="public",Item="order_date"]" is "items" in ItemSelector. Data of items varies as per DataSource
32
36
 
33
- "public_order_date" is in "next" of ItemSelector. The "next" will be None if this identifier is leaf i.e. table
37
+ "public_order_date" is in "next" of ItemSelector. The "next" will be None if this identifier is leaf i.e., table
34
38
 
35
39
  """
36
40
 
@@ -53,3 +57,31 @@ class ReferencedTable:
53
57
  database: str
54
58
  schema: str
55
59
  table: str
60
+
61
+
62
+ @dataclass
63
+ class DataPlatformTable:
64
+ data_platform_pair: DataPlatformPair
65
+ urn: str
66
+
67
+
68
+ @dataclass
69
+ class Lineage:
70
+ upstreams: List[DataPlatformTable]
71
+ column_lineage: List[ColumnLineageInfo]
72
+
73
+ @staticmethod
74
+ def empty() -> "Lineage":
75
+ return Lineage(upstreams=[], column_lineage=[])
76
+
77
+
78
+ class FunctionName(Enum):
79
+ NATIVE_QUERY = "Value.NativeQuery"
80
+ POSTGRESQL_DATA_ACCESS = "PostgreSQL.Database"
81
+ ORACLE_DATA_ACCESS = "Oracle.Database"
82
+ SNOWFLAKE_DATA_ACCESS = "Snowflake.Databases"
83
+ MSSQL_DATA_ACCESS = "Sql.Database"
84
+ DATABRICK_DATA_ACCESS = "Databricks.Catalogs"
85
+ GOOGLE_BIGQUERY_DATA_ACCESS = "GoogleBigQuery.Database"
86
+ AMAZON_REDSHIFT_DATA_ACCESS = "AmazonRedshift.Database"
87
+ DATABRICK_MULTI_CLOUD_DATA_ACCESS = "DatabricksMultiCloud.Catalogs"
@@ -7,6 +7,7 @@ from typing import Dict, List
7
7
  import lark
8
8
  from lark import Lark, Tree
9
9
 
10
+ import datahub.ingestion.source.powerbi.m_query.data_classes
10
11
  from datahub.ingestion.api.common import PipelineContext
11
12
  from datahub.ingestion.source.powerbi.config import (
12
13
  PowerBiDashboardSourceConfig,
@@ -65,7 +66,7 @@ def get_upstream_tables(
65
66
  ctx: PipelineContext,
66
67
  config: PowerBiDashboardSourceConfig,
67
68
  parameters: Dict[str, str] = {},
68
- ) -> List[resolver.Lineage]:
69
+ ) -> List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage]:
69
70
  if table.expression is None:
70
71
  logger.debug(f"There is no M-Query expression in table {table.full_name}")
71
72
  return []
@@ -127,12 +128,14 @@ def get_upstream_tables(
127
128
  reporter.m_query_parse_successes += 1
128
129
 
129
130
  try:
130
- lineage: List[resolver.Lineage] = resolver.MQueryResolver(
131
+ lineage: List[
132
+ datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
133
+ ] = resolver.MQueryResolver(
131
134
  table=table,
132
135
  parse_tree=parse_tree,
133
136
  reporter=reporter,
134
137
  parameters=parameters,
135
- ).resolve_to_data_platform_table_list(
138
+ ).resolve_to_lineage(
136
139
  ctx=ctx,
137
140
  config=config,
138
141
  platform_instance_resolver=platform_instance_resolver,