acryl-datahub 0.15.0rc3__py3-none-any.whl → 0.15.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=bjNaEX_th93Zf5oFOCSAaZbNy-DUb3y4CxlHrAvZOkI,574
1
+ datahub/__init__.py,sha256=c5YiGS9ajJPufFiwc_4_Bv9DF1Ha6s0H9dd-rtKRF3Y,574
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -187,7 +187,7 @@ datahub/ingestion/source/confluent_schema_registry.py,sha256=_h9D8bUXoaGcwgwB94d
187
187
  datahub/ingestion/source/csv_enricher.py,sha256=xjCbcsSMM8l_ASCRAnNsUGKuYMrD1lec19Waixub1EM,29498
188
188
  datahub/ingestion/source/demo_data.py,sha256=yzA_R-wfSX2WPz0i5ukYlscpmpb0Pt8D7EkhtKfftvo,1286
189
189
  datahub/ingestion/source/elastic_search.py,sha256=qFUVNzynTVJTabASTjGMu8Qhf9UpNbEtSBFjaPQjBJE,22641
190
- datahub/ingestion/source/feast.py,sha256=NYaAjzLVRhmMKDawBwN0OL8AMyKDLsxOwEj3YFX0wIA,14244
190
+ datahub/ingestion/source/feast.py,sha256=uZpeUkJsiNlvZcUkARiEuZT_3n6sbGc0yFzwqhtnefA,18103
191
191
  datahub/ingestion/source/file.py,sha256=pH-Qkjh5FQ2XvyYPE7Z8XEY4vUk_SUHxm8p8IxG12tU,15879
192
192
  datahub/ingestion/source/ge_data_profiler.py,sha256=JqTonv8y7Re4Rfn2YKOEaLufiiAOWKfK1XQvJfV5dvs,64126
193
193
  datahub/ingestion/source/ge_profiling_config.py,sha256=P-9pd20koFvpxeEL_pqFvKWWz-qnpZ6XkELUyBKr7is,10807
@@ -312,14 +312,14 @@ datahub/ingestion/source/git/git_import.py,sha256=5CT6vMDb0MDctCtShnxb3JVihULtvk
312
312
  datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
313
313
  datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
314
314
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
315
- datahub/ingestion/source/iceberg/iceberg.py,sha256=lfCettf4F_BjWt7Wh65AhXHerXPP5KLFEo_LCbEZZnI,25537
315
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=fjqp3VBW5W5-54X_-ubkRZiAmdHvuMbxRbC4UYzEr4U,25900
316
316
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=TS3_ZYZ47Fe02CmzEo1z0pvy7yjXuG1VlwqNxa0U6pc,8506
317
317
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=hLT1Le_TEUoFXvsJSlrRB1qbTiTe-YVGCof5TFHMyd8,9908
318
318
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
319
319
  datahub/ingestion/source/identity/azure_ad.py,sha256=GdmJFD4UMsb5353Z7phXRf-YsXR2woGLRJwBXUkgXq0,28809
320
320
  datahub/ingestion/source/identity/okta.py,sha256=PnRokWLG8wSoNZlXJiRZiW6APTEHO09q4n2j_l6m3V0,30756
321
321
  datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
322
- datahub/ingestion/source/kafka/kafka.py,sha256=D54QXYoEQJYVb7oWyvLVCGbJunS8ZTGYlMMdaBqpmMs,25475
322
+ datahub/ingestion/source/kafka/kafka.py,sha256=QUw8VCmqIhZJvUiFJmFmekFmy4nXCLD4EKJNC6jk6Y4,26092
323
323
  datahub/ingestion/source/kafka/kafka_connect.py,sha256=5KUlhn3876c41Z3kx5l4oJhbu0ekXZQRdxmu52vb_v8,55167
324
324
  datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
325
325
  datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -347,18 +347,19 @@ datahub/ingestion/source/looker/view_upstream.py,sha256=k278-uwh8uspdREpjE_uqks4
347
347
  datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
348
348
  datahub/ingestion/source/metadata/business_glossary.py,sha256=eRVRpQI0ZX5OofS1BUhNihFOfWih70TIAkJM7zaMH80,17577
349
349
  datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwrvEhapVLdRlDxCuc,9507
350
- datahub/ingestion/source/powerbi/__init__.py,sha256=jYWYtaz95cyQAgEpZK1kxu1aKjOBAyDUlE77UdkCd6g,76
351
- datahub/ingestion/source/powerbi/config.py,sha256=T7E3YpfkLJlOJLr_xKfAVXhQRu_fkMqJbPtWp_v4k-8,23237
350
+ datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
351
+ datahub/ingestion/source/powerbi/config.py,sha256=LV8BOm2zzF9t0RMwQVVUNB0bStzBPo8A6JkaW0xlgsQ,23241
352
352
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=AIU89lVPoCWlzc_RfUjDJwRQ11akPtnGpBTluBMCKio,2242
353
353
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=PRWzuZMMhKdOVoAaE8csHvUFbZHxYe5meJHgrqlgiuw,19795
354
- datahub/ingestion/source/powerbi/powerbi.py,sha256=AuJPYakZ0Uko8lQUkJkeKPj2mtNmOHndUSwcwPOteMU,54294
354
+ datahub/ingestion/source/powerbi/powerbi.py,sha256=7UsAEqaFlkWONcXJdQ2hotUYYn46ks6Fe71KXEMh7lI,54495
355
355
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
356
- datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=NLEF8uNQNhtOlMf42F7jHAd8C0TIwDCDOmMfLeRpyk8,1278
356
+ datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=s2Nckmr50hxae5gPFcIfpyLzYpaMH56Q9nsDsEgi_-k,2243
357
357
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
358
- datahub/ingestion/source/powerbi/m_query/parser.py,sha256=YZXE6L1SYsfoT_sXolrVOfmWQXZOaoN9_KQiA1cdGAQ,5626
359
- datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=7lLdz8u3Xc2RIoNCwfwCJMj48naEQdNcbxPt-Vmt45w,50429
358
+ datahub/ingestion/source/powerbi/m_query/parser.py,sha256=pB1LGdb02Ryf8Pr8JPSgiOmLE6mEAgDbKodtKOOY6LU,5782
359
+ datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=K9kyX-pMFjOOCAvedKXXce7xG-cHwTGQTrBT5GFXEIg,32865
360
+ datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=v2DJYT70Vtw9NyIwFecP7tHGcVxBWH9UqV7vbpYPhws,16985
360
361
  datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=i7HM9Oyj9XdJpNfG2lE8puxeuc47aSJ-5dPdBEcw2WU,6165
361
- datahub/ingestion/source/powerbi/m_query/validator.py,sha256=FFAwBl_WmV1SSrYhSbyq8FUsTD0vd0ncdysmkgdKxj8,1008
362
+ datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
362
363
  datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
363
364
  datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=xqAsnNUCP44Wd1rE1m_phbKtNCMJTFJfOX4_2varadg,8298
364
365
  datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=O2XTVBdXteIgQF8Lss_t2RhRSsRMmMyWrAoNonDMQFI,39604
@@ -971,8 +972,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
971
972
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
972
973
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
973
974
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
974
- acryl_datahub-0.15.0rc3.dist-info/METADATA,sha256=l71CWZc9fD8L_4p0NFRk9ruU6w4QEZgGJq3RObW_ljA,171129
975
- acryl_datahub-0.15.0rc3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
976
- acryl_datahub-0.15.0rc3.dist-info/entry_points.txt,sha256=VcQx0dnqaYLyeY_L5OaX7bLmmE-Il7TAXkxCKvEn2bA,9432
977
- acryl_datahub-0.15.0rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
978
- acryl_datahub-0.15.0rc3.dist-info/RECORD,,
975
+ acryl_datahub-0.15.0rc5.dist-info/METADATA,sha256=SiiSRUUBz-MJZHnnGpn6342w8hkW9COEktKjlJDQQuw,171117
976
+ acryl_datahub-0.15.0rc5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
977
+ acryl_datahub-0.15.0rc5.dist-info/entry_points.txt,sha256=3jOfMXB66r8zRDaqzRYpNc0tK-oUO-3tXlnGYDdVAmg,9440
978
+ acryl_datahub-0.15.0rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
979
+ acryl_datahub-0.15.0rc5.dist-info/RECORD,,
@@ -73,7 +73,7 @@ okta = datahub.ingestion.source.identity.okta:OktaSource
73
73
  openapi = datahub.ingestion.source.openapi:OpenApiSource
74
74
  oracle = datahub.ingestion.source.sql.oracle:OracleSource
75
75
  postgres = datahub.ingestion.source.sql.postgres:PostgresSource
76
- powerbi = datahub.ingestion.source.powerbi:PowerBiDashboardSource
76
+ powerbi = datahub.ingestion.source.powerbi.powerbi:PowerBiDashboardSource
77
77
  powerbi-report-server = datahub.ingestion.source.powerbi_report_server:PowerBiReportServerDashboardSource
78
78
  preset = datahub.ingestion.source.preset:PresetSource
79
79
  presto = datahub.ingestion.source.sql.presto:PrestoSource
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc3"
6
+ __version__ = "0.15.0rc5"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -42,10 +42,14 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
42
42
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
43
43
  from datahub.metadata.schema_classes import (
44
44
  BrowsePathsClass,
45
+ GlobalTagsClass,
45
46
  MLFeaturePropertiesClass,
46
47
  MLFeatureTablePropertiesClass,
47
48
  MLPrimaryKeyPropertiesClass,
49
+ OwnerClass,
50
+ OwnershipClass,
48
51
  StatusClass,
52
+ TagAssociationClass,
49
53
  )
50
54
 
51
55
  # FIXME: ValueType module cannot be used as a type
@@ -91,6 +95,24 @@ class FeastRepositorySourceConfig(ConfigModel):
91
95
  environment: str = Field(
92
96
  default=DEFAULT_ENV, description="Environment to use when constructing URNs"
93
97
  )
98
+ # owner_mappings example:
99
+ # This must be added to the recipe in order to extract owners, otherwise NO owners will be extracted
100
+ # owner_mappings:
101
+ # - feast_owner_name: "<owner>"
102
+ # datahub_owner_urn: "urn:li:corpGroup:<owner>"
103
+ # datahub_ownership_type: "BUSINESS_OWNER"
104
+ owner_mappings: Optional[List[Dict[str, str]]] = Field(
105
+ default=None, description="Mapping of owner names to owner types"
106
+ )
107
+ enable_owner_extraction: bool = Field(
108
+ default=False,
109
+ description="If this is disabled, then we NEVER try to map owners. "
110
+ "If this is enabled, then owner_mappings is REQUIRED to extract ownership.",
111
+ )
112
+ enable_tag_extraction: bool = Field(
113
+ default=False,
114
+ description="If this is disabled, then we NEVER try to extract tags.",
115
+ )
94
116
 
95
117
 
96
118
  @platform_name("Feast")
@@ -215,10 +237,15 @@ class FeastRepositorySource(Source):
215
237
  """
216
238
 
217
239
  feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
240
+ aspects = (
241
+ [StatusClass(removed=False)]
242
+ + self._get_tags(entity)
243
+ + self._get_owners(entity)
244
+ )
218
245
 
219
246
  entity_snapshot = MLPrimaryKeySnapshot(
220
247
  urn=builder.make_ml_primary_key_urn(feature_view_name, entity.name),
221
- aspects=[StatusClass(removed=False)],
248
+ aspects=aspects,
222
249
  )
223
250
 
224
251
  entity_snapshot.aspects.append(
@@ -243,10 +270,11 @@ class FeastRepositorySource(Source):
243
270
  Generate an MLFeature work unit for a Feast feature.
244
271
  """
245
272
  feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
273
+ aspects = [StatusClass(removed=False)] + self._get_tags(field)
246
274
 
247
275
  feature_snapshot = MLFeatureSnapshot(
248
276
  urn=builder.make_ml_feature_urn(feature_view_name, field.name),
249
- aspects=[StatusClass(removed=False)],
277
+ aspects=aspects,
250
278
  )
251
279
 
252
280
  feature_sources = []
@@ -295,13 +323,18 @@ class FeastRepositorySource(Source):
295
323
  """
296
324
 
297
325
  feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
326
+ aspects = (
327
+ [
328
+ BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]),
329
+ StatusClass(removed=False),
330
+ ]
331
+ + self._get_tags(feature_view)
332
+ + self._get_owners(feature_view)
333
+ )
298
334
 
299
335
  feature_view_snapshot = MLFeatureTableSnapshot(
300
336
  urn=builder.make_ml_feature_table_urn("feast", feature_view_name),
301
- aspects=[
302
- BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]),
303
- StatusClass(removed=False),
304
- ],
337
+ aspects=aspects,
305
338
  )
306
339
 
307
340
  feature_view_snapshot.aspects.append(
@@ -360,6 +393,64 @@ class FeastRepositorySource(Source):
360
393
 
361
394
  return MetadataWorkUnit(id=on_demand_feature_view_name, mce=mce)
362
395
 
396
+ # If a tag is specified in a Feast object, then the tag will be ingested into Datahub if enable_tag_extraction is
397
+ # True, otherwise NO tags will be ingested
398
+ def _get_tags(self, obj: Union[Entity, FeatureView, FeastField]) -> list:
399
+ """
400
+ Extracts tags from the given object and returns a list of aspects.
401
+ """
402
+ aspects: List[Union[GlobalTagsClass]] = []
403
+
404
+ # Extract tags
405
+ if self.source_config.enable_tag_extraction:
406
+ if obj.tags.get("name"):
407
+ tag_name: str = obj.tags["name"]
408
+ tag_association = TagAssociationClass(
409
+ tag=builder.make_tag_urn(tag_name)
410
+ )
411
+ global_tags_aspect = GlobalTagsClass(tags=[tag_association])
412
+ aspects.append(global_tags_aspect)
413
+
414
+ return aspects
415
+
416
+ # If an owner is specified in a Feast object, it will only be ingested into Datahub if owner_mappings is specified
417
+ # and enable_owner_extraction is True in FeastRepositorySourceConfig, otherwise NO owners will be ingested
418
+ def _get_owners(self, obj: Union[Entity, FeatureView, FeastField]) -> list:
419
+ """
420
+ Extracts owners from the given object and returns a list of aspects.
421
+ """
422
+ aspects: List[Union[OwnershipClass]] = []
423
+
424
+ # Extract owner
425
+ if self.source_config.enable_owner_extraction:
426
+ owner = getattr(obj, "owner", None)
427
+ if owner:
428
+ # Create owner association, skipping if None
429
+ owner_association = self._create_owner_association(owner)
430
+ if owner_association: # Only add valid owner associations
431
+ owners_aspect = OwnershipClass(owners=[owner_association])
432
+ aspects.append(owners_aspect)
433
+
434
+ return aspects
435
+
436
+ def _create_owner_association(self, owner: str) -> Optional[OwnerClass]:
437
+ """
438
+ Create an OwnerClass instance for the given owner using the owner mappings.
439
+ """
440
+ if self.source_config.owner_mappings is not None:
441
+ for mapping in self.source_config.owner_mappings:
442
+ if mapping["feast_owner_name"] == owner:
443
+ ownership_type_class: str = mapping.get(
444
+ "datahub_ownership_type", "TECHNICAL_OWNER"
445
+ )
446
+ datahub_owner_urn = mapping.get("datahub_owner_urn")
447
+ if datahub_owner_urn:
448
+ return OwnerClass(
449
+ owner=datahub_owner_urn,
450
+ type=ownership_type_class,
451
+ )
452
+ return None
453
+
363
454
  @classmethod
364
455
  def create(cls, config_dict, ctx):
365
456
  config = FeastRepositorySourceConfig.parse_obj(config_dict)
@@ -9,6 +9,7 @@ from pyiceberg.exceptions import (
9
9
  NoSuchIcebergTableError,
10
10
  NoSuchNamespaceError,
11
11
  NoSuchPropertyException,
12
+ NoSuchTableError,
12
13
  )
13
14
  from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
14
15
  from pyiceberg.table import Table
@@ -104,7 +105,7 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
104
105
  @capability(SourceCapability.DESCRIPTIONS, "Enabled by default.")
105
106
  @capability(
106
107
  SourceCapability.OWNERSHIP,
107
- "Optionally enabled via configuration by specifying which Iceberg table property holds user or group ownership.",
108
+ "Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`",
108
109
  )
109
110
  @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
110
111
  class IcebergSource(StatefulIngestionSourceBase):
@@ -192,9 +193,7 @@ class IcebergSource(StatefulIngestionSourceBase):
192
193
  table = thread_local.local_catalog.load_table(dataset_path)
193
194
  time_taken = timer.elapsed_seconds()
194
195
  self.report.report_table_load_time(time_taken)
195
- LOGGER.debug(
196
- f"Loaded table: {table.identifier}, time taken: {time_taken}"
197
- )
196
+ LOGGER.debug(f"Loaded table: {table.name()}, time taken: {time_taken}")
198
197
  yield from self._create_iceberg_workunit(dataset_name, table)
199
198
  except NoSuchPropertyException as e:
200
199
  self.report.report_warning(
@@ -206,12 +205,20 @@ class IcebergSource(StatefulIngestionSourceBase):
206
205
  )
207
206
  except NoSuchIcebergTableError as e:
208
207
  self.report.report_warning(
209
- "no-iceberg-table",
208
+ "not-an-iceberg-table",
210
209
  f"Failed to create workunit for {dataset_name}. {e}",
211
210
  )
212
211
  LOGGER.warning(
213
212
  f"NoSuchIcebergTableError while processing table {dataset_path}, skipping it.",
214
213
  )
214
+ except NoSuchTableError as e:
215
+ self.report.report_warning(
216
+ "no-such-table",
217
+ f"Failed to create workunit for {dataset_name}. {e}",
218
+ )
219
+ LOGGER.warning(
220
+ f"NoSuchTableError while processing table {dataset_path}, skipping it.",
221
+ )
215
222
  except Exception as e:
216
223
  self.report.report_failure("general", f"Failed to create workunit: {e}")
217
224
  LOGGER.exception(
@@ -148,7 +148,7 @@ def get_kafka_consumer(
148
148
  ) -> confluent_kafka.Consumer:
149
149
  consumer = confluent_kafka.Consumer(
150
150
  {
151
- "group.id": "test",
151
+ "group.id": "datahub-kafka-ingestion",
152
152
  "bootstrap.servers": connection.bootstrap,
153
153
  **connection.consumer_config,
154
154
  }
@@ -164,6 +164,25 @@ def get_kafka_consumer(
164
164
  return consumer
165
165
 
166
166
 
167
+ def get_kafka_admin_client(
168
+ connection: KafkaConsumerConnectionConfig,
169
+ ) -> AdminClient:
170
+ client = AdminClient(
171
+ {
172
+ "group.id": "datahub-kafka-ingestion",
173
+ "bootstrap.servers": connection.bootstrap,
174
+ **connection.consumer_config,
175
+ }
176
+ )
177
+ if CallableConsumerConfig.is_callable_config(connection.consumer_config):
178
+ # As per documentation, we need to explicitly call the poll method to make sure OAuth callback gets executed
179
+ # https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration
180
+ logger.debug("Initiating polling for kafka admin client")
181
+ client.poll(timeout=30)
182
+ logger.debug("Initiated polling for kafka admin client")
183
+ return client
184
+
185
+
167
186
  @dataclass
168
187
  class KafkaSourceReport(StaleEntityRemovalSourceReport):
169
188
  topics_scanned: int = 0
@@ -278,13 +297,7 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
278
297
  def init_kafka_admin_client(self) -> None:
279
298
  try:
280
299
  # TODO: Do we require separate config than existing consumer_config ?
281
- self.admin_client = AdminClient(
282
- {
283
- "group.id": "test",
284
- "bootstrap.servers": self.source_config.connection.bootstrap,
285
- **self.source_config.connection.consumer_config,
286
- }
287
- )
300
+ self.admin_client = get_kafka_admin_client(self.source_config.connection)
288
301
  except Exception as e:
289
302
  logger.debug(e, exc_info=e)
290
303
  self.report.report_warning(
@@ -1 +0,0 @@
1
- from datahub.ingestion.source.powerbi.powerbi import PowerBiDashboardSource
@@ -173,7 +173,7 @@ class SupportedDataPlatform(Enum):
173
173
  datahub_data_platform_name="redshift",
174
174
  )
175
175
 
176
- DATABRICK_SQL = DataPlatformPair(
176
+ DATABRICKS_SQL = DataPlatformPair(
177
177
  powerbi_data_platform_name="Databricks", datahub_data_platform_name="databricks"
178
178
  )
179
179
 
@@ -313,8 +313,8 @@ class PowerBiDashboardSourceConfig(
313
313
  " Note: This field works in conjunction with 'workspace_type_filter' and both must be considered when filtering workspaces.",
314
314
  )
315
315
 
316
- # Dataset type mapping PowerBI support many type of data-sources. Here user need to define what type of PowerBI
317
- # DataSource need to be mapped to corresponding DataHub Platform DataSource. For example PowerBI `Snowflake` is
316
+ # Dataset type mapping PowerBI support many type of data-sources. Here user needs to define what type of PowerBI
317
+ # DataSource needs to be mapped to corresponding DataHub Platform DataSource. For example, PowerBI `Snowflake` is
318
318
  # mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
319
319
  dataset_type_mapping: Union[
320
320
  Dict[str, str], Dict[str, PlatformDetail]
@@ -1,10 +1,14 @@
1
1
  import os
2
2
  from abc import ABC
3
3
  from dataclasses import dataclass
4
- from typing import Any, Dict, Optional
4
+ from enum import Enum
5
+ from typing import Any, Dict, List, Optional
5
6
 
6
7
  from lark import Tree
7
8
 
9
+ from datahub.ingestion.source.powerbi.config import DataPlatformPair
10
+ from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
11
+
8
12
  TRACE_POWERBI_MQUERY_PARSER = os.getenv("DATAHUB_TRACE_POWERBI_MQUERY_PARSER", False)
9
13
 
10
14
 
@@ -30,7 +34,7 @@ class IdentifierAccessor(AbstractIdentifierAccessor):
30
34
 
31
35
  "[Schema="public",Item="order_date"]" is "items" in ItemSelector. Data of items varies as per DataSource
32
36
 
33
- "public_order_date" is in "next" of ItemSelector. The "next" will be None if this identifier is leaf i.e. table
37
+ "public_order_date" is in "next" of ItemSelector. The "next" will be None if this identifier is leaf i.e., table
34
38
 
35
39
  """
36
40
 
@@ -53,3 +57,31 @@ class ReferencedTable:
53
57
  database: str
54
58
  schema: str
55
59
  table: str
60
+
61
+
62
+ @dataclass
63
+ class DataPlatformTable:
64
+ data_platform_pair: DataPlatformPair
65
+ urn: str
66
+
67
+
68
+ @dataclass
69
+ class Lineage:
70
+ upstreams: List[DataPlatformTable]
71
+ column_lineage: List[ColumnLineageInfo]
72
+
73
+ @staticmethod
74
+ def empty() -> "Lineage":
75
+ return Lineage(upstreams=[], column_lineage=[])
76
+
77
+
78
+ class FunctionName(Enum):
79
+ NATIVE_QUERY = "Value.NativeQuery"
80
+ POSTGRESQL_DATA_ACCESS = "PostgreSQL.Database"
81
+ ORACLE_DATA_ACCESS = "Oracle.Database"
82
+ SNOWFLAKE_DATA_ACCESS = "Snowflake.Databases"
83
+ MSSQL_DATA_ACCESS = "Sql.Database"
84
+ DATABRICK_DATA_ACCESS = "Databricks.Catalogs"
85
+ GOOGLE_BIGQUERY_DATA_ACCESS = "GoogleBigQuery.Database"
86
+ AMAZON_REDSHIFT_DATA_ACCESS = "AmazonRedshift.Database"
87
+ DATABRICK_MULTI_CLOUD_DATA_ACCESS = "DatabricksMultiCloud.Catalogs"
@@ -7,6 +7,7 @@ from typing import Dict, List
7
7
  import lark
8
8
  from lark import Lark, Tree
9
9
 
10
+ import datahub.ingestion.source.powerbi.m_query.data_classes
10
11
  from datahub.ingestion.api.common import PipelineContext
11
12
  from datahub.ingestion.source.powerbi.config import (
12
13
  PowerBiDashboardSourceConfig,
@@ -65,7 +66,7 @@ def get_upstream_tables(
65
66
  ctx: PipelineContext,
66
67
  config: PowerBiDashboardSourceConfig,
67
68
  parameters: Dict[str, str] = {},
68
- ) -> List[resolver.Lineage]:
69
+ ) -> List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage]:
69
70
  if table.expression is None:
70
71
  logger.debug(f"There is no M-Query expression in table {table.full_name}")
71
72
  return []
@@ -127,12 +128,14 @@ def get_upstream_tables(
127
128
  reporter.m_query_parse_successes += 1
128
129
 
129
130
  try:
130
- lineage: List[resolver.Lineage] = resolver.MQueryResolver(
131
+ lineage: List[
132
+ datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
133
+ ] = resolver.MQueryResolver(
131
134
  table=table,
132
135
  parse_tree=parse_tree,
133
136
  reporter=reporter,
134
137
  parameters=parameters,
135
- ).resolve_to_data_platform_table_list(
138
+ ).resolve_to_lineage(
136
139
  ctx=ctx,
137
140
  config=config,
138
141
  platform_instance_resolver=platform_instance_resolver,