acryl-datahub 1.2.0.8rc1__py3-none-any.whl → 1.2.0.8rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.8rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.8rc3.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=Rdij3ffZjrkKXarGFXcv2MZfNld3LEFCYwjv7W_kgqg,323
4
+ datahub/_version.py,sha256=JaJpqqBsZdjVnoJtUZG8gQFWP2yezj3I1JQe0wJwUy4,323
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -148,10 +148,10 @@ datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPs
148
148
  datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=3lLdkkxVqE9MVc26cdXImPeWy16az5BwgcorWxeBV50,1759
149
149
  datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINqPbUOKiZyOJta6a0og,713
150
150
  datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX63bI,7454
151
- datahub/ingestion/api/report.py,sha256=OuVZAgNkzSGkKhpOhpqebd9_bEsBCTeoWR1VcanPeD4,18509
151
+ datahub/ingestion/api/report.py,sha256=1w63Y2yN49IaDLZaIvXEjRU3yVb_9t3wzymSI-fumZM,18959
152
152
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
153
153
  datahub/ingestion/api/sink.py,sha256=GZt48PV56FAhNoma-V5EwwRZvezhb40YH_zprm8_Yo0,4961
154
- datahub/ingestion/api/source.py,sha256=uf0fNbiOy0bS_aKFOcNv6NvuZe0LSDIDdNza9hraP7s,21857
154
+ datahub/ingestion/api/source.py,sha256=JASs7WygVB6g-tcwtchaftzv3lNtlVM31lEa242pn44,21853
155
155
  datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
156
156
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
157
157
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
@@ -193,7 +193,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
193
193
  datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
194
194
  datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
195
195
  datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
196
- datahub/ingestion/run/pipeline.py,sha256=xKjH5jUTCwJ-rCAnOEXLmZqVLKz_qxcX2JJxcO5AGmE,32046
196
+ datahub/ingestion/run/pipeline.py,sha256=Cz1hxvUzFmxP3oIKRCaHVKDIqK8HBIuKfsxQz9mfUvE,32452
197
197
  datahub/ingestion/run/pipeline_config.py,sha256=joG1j9OlwJhb8zqv4TY6_FSzOaKOx6xsBu255A5lP8g,4101
198
198
  datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
199
199
  datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -366,7 +366,7 @@ datahub/ingestion/source/hex/mapper.py,sha256=IyDAE-TzZUji3ICI_9gkYC3dQN3gl6kERR
366
366
  datahub/ingestion/source/hex/model.py,sha256=eri4aRo1eXcE2SWjzCnPFMhzPTiJ8w8zC4GN7Lgpr74,1864
367
367
  datahub/ingestion/source/hex/query_fetcher.py,sha256=r9UvF_qwswkRlNY7AI8p46eqAYSxVtjVE2e7eO4XagA,13384
368
368
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
369
- datahub/ingestion/source/iceberg/iceberg.py,sha256=UWfI4sN5uO6f9KzxjY939a_BIkAnPf0ELCmFvf9KuYg,35427
369
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=KM9IDdWxW2VwOv0Iv3sMV6a60FNkgDEMS8vZaFHTOyA,37040
370
370
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=CD_yHQ_wEgivyLQUTRO9BZJB29S7j5fUVllki-BPwUU,12292
371
371
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
372
372
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -640,7 +640,7 @@ datahub/metadata/schema.avsc,sha256=HluHCVmYg7RpOaw9xUMigEJBxlHF5WLdNcqVBKPeoOU,
640
640
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
641
641
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
642
642
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
643
- datahub/metadata/_urns/urn_defs.py,sha256=Rl2wjTuHxpOk4rDAfqUHeBxCxTFA2JGFg1oCc9yR10s,143154
643
+ datahub/metadata/_urns/urn_defs.py,sha256=tBlEg7f0jaIWVQfpgzTe2gjkthP4janfAwJO7yx6-cw,143257
644
644
  datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
645
645
  datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
646
646
  datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
@@ -1093,7 +1093,7 @@ datahub/utilities/urns/field_paths.py,sha256=ra-o_fMGkBRLgzMewAJN5-HqAyo3PIpXQ0K
1093
1093
  datahub/utilities/urns/notebook_urn.py,sha256=CHqGrV45ReVODlFx7js2WUxjcXxt8B63-xsBZpujmtY,73
1094
1094
  datahub/utilities/urns/structured_properties_urn.py,sha256=fjA1Ysg7IQSly8IVYx1R8HnwnojQz6jZWbqfk_XVvno,271
1095
1095
  datahub/utilities/urns/tag_urn.py,sha256=MqEJdIaCnAyjYe_8VdNnUjOVV4TS8xMlv4pRsy8wwXY,63
1096
- datahub/utilities/urns/urn.py,sha256=B4nYxiFT8s5DLA2NJsWg0KoiUDp9UWg1nvL0j7Sx-h8,218
1096
+ datahub/utilities/urns/urn.py,sha256=CDtOtVccQW2yj5MBNtR3aO1yEInTnAmSDMbGbjheGJY,1279
1097
1097
  datahub/utilities/urns/urn_iter.py,sha256=3LtmobKksKFbnNCUCjFxm8qqFLCPPHUW_Q3zc4PE5nY,4736
1098
1098
  datahub_provider/__init__.py,sha256=qyPbz00f8pgtLVyqHG1TSnTqBfXb1x-kUH10zOLoq2U,53
1099
1099
  datahub_provider/_airflow_compat.py,sha256=unmFDGP57xKHPIhkdw_qo1vW1GAYpZ1yCvCrkMdGJXM,98
@@ -1114,8 +1114,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1114
1114
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1115
1115
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1116
1116
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1117
- acryl_datahub-1.2.0.8rc1.dist-info/METADATA,sha256=_TXC2AAKI66LHx6fTnBdVxsJBwedMdTKiIhnrBVSTQk,186651
1118
- acryl_datahub-1.2.0.8rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
- acryl_datahub-1.2.0.8rc1.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
- acryl_datahub-1.2.0.8rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
- acryl_datahub-1.2.0.8rc1.dist-info/RECORD,,
1117
+ acryl_datahub-1.2.0.8rc3.dist-info/METADATA,sha256=JHHJcxTDQE0vzyvrfaIxu2-B2bkridTSaWrk8K96Tdk,186651
1118
+ acryl_datahub-1.2.0.8rc3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
+ acryl_datahub-1.2.0.8rc3.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
+ acryl_datahub-1.2.0.8rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
+ acryl_datahub-1.2.0.8rc3.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.8rc1"
3
+ __version__ = "1.2.0.8rc3"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -29,6 +29,7 @@ from datahub.metadata.schema_classes import (
29
29
  )
30
30
  from datahub.utilities.file_backed_collections import FileBackedDict
31
31
  from datahub.utilities.lossy_collections import LossyList
32
+ from datahub.utilities.urns.urn import guess_platform_name
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
  LogLevel = Literal["ERROR", "WARNING", "INFO", "DEBUG"]
@@ -41,6 +42,15 @@ class SupportsAsObj(Protocol):
41
42
 
42
43
  @dataclass
43
44
  class Report(SupportsAsObj):
45
+ def __post_init__(self) -> None:
46
+ self.platform: Optional[str] = None
47
+
48
+ def set_platform(self, platform: str) -> None:
49
+ self.platform = platform
50
+
51
+ def get_platform(self) -> Optional[str]:
52
+ return self.platform
53
+
44
54
  @staticmethod
45
55
  def to_str(some_val: Any) -> str:
46
56
  if isinstance(some_val, Enum):
@@ -213,6 +223,7 @@ class ExamplesReport(Report, Closeable):
213
223
  _lineage_aspects_seen: Set[str] = field(default_factory=set)
214
224
 
215
225
  def __post_init__(self) -> None:
226
+ super().__post_init__()
216
227
  self._file_based_dict = FileBackedDict(
217
228
  tablename="urn_aspects",
218
229
  extra_columns={
@@ -347,6 +358,9 @@ class ExamplesReport(Report, Closeable):
347
358
  aspectName: str,
348
359
  mcp: Union[MetadataChangeProposalClass, MetadataChangeProposalWrapper],
349
360
  ) -> None:
361
+ platform_name = guess_platform_name(urn)
362
+ if platform_name != self.get_platform():
363
+ return
350
364
  if is_lineage_aspect(entityType, aspectName):
351
365
  self._lineage_aspects_seen.add(aspectName)
352
366
  has_fine_grained_lineage = self._has_fine_grained_lineage(mcp)
@@ -531,9 +531,9 @@ class Source(Closeable, metaclass=ABCMeta):
531
531
  auto_status_aspect,
532
532
  auto_materialize_referenced_tags_terms,
533
533
  partial(
534
- auto_fix_duplicate_schema_field_paths, platform=self._infer_platform()
534
+ auto_fix_duplicate_schema_field_paths, platform=self.infer_platform()
535
535
  ),
536
- partial(auto_fix_empty_field_paths, platform=self._infer_platform()),
536
+ partial(auto_fix_empty_field_paths, platform=self.infer_platform()),
537
537
  browse_path_processor,
538
538
  partial(auto_workunit_reporter, self.get_report()),
539
539
  auto_patch_last_modified,
@@ -583,7 +583,7 @@ class Source(Closeable, metaclass=ABCMeta):
583
583
  def close(self) -> None:
584
584
  self.get_report().close()
585
585
 
586
- def _infer_platform(self) -> Optional[str]:
586
+ def infer_platform(self) -> Optional[str]:
587
587
  config = self.get_config()
588
588
  platform = (
589
589
  getattr(config, "platform_name", None)
@@ -598,7 +598,7 @@ class Source(Closeable, metaclass=ABCMeta):
598
598
  def _get_browse_path_processor(self, dry_run: bool) -> MetadataWorkUnitProcessor:
599
599
  config = self.get_config()
600
600
 
601
- platform = self._infer_platform()
601
+ platform = self.infer_platform()
602
602
  env = getattr(config, "env", None)
603
603
  browse_path_drop_dirs = [
604
604
  platform,
@@ -440,7 +440,19 @@ class Pipeline:
440
440
  return True
441
441
  return False
442
442
 
443
+ def _set_platform(self) -> None:
444
+ platform = self.source.infer_platform()
445
+ if platform:
446
+ self.source.get_report().set_platform(platform)
447
+ else:
448
+ self.source.get_report().warning(
449
+ message="Platform not found",
450
+ title="Platform not found",
451
+ context="Platform not found",
452
+ )
453
+
443
454
  def run(self) -> None:
455
+ self._set_platform()
444
456
  self._warn_old_cli_version()
445
457
  with self.exit_stack, self.inner_exit_stack:
446
458
  if self.config.flags.generate_memory_profiles:
@@ -12,7 +12,7 @@ from pyiceberg.exceptions import (
12
12
  NoSuchNamespaceError,
13
13
  NoSuchPropertyException,
14
14
  NoSuchTableError,
15
- ServerError,
15
+ RESTError,
16
16
  )
17
17
  from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
18
18
  from pyiceberg.table import Table
@@ -154,6 +154,10 @@ class IcebergSource(StatefulIngestionSourceBase):
154
154
  self.report: IcebergSourceReport = IcebergSourceReport()
155
155
  self.config: IcebergSourceConfig = config
156
156
  self.ctx: PipelineContext = ctx
157
+ self.stamping_processor = AutoSystemMetadata(
158
+ self.ctx
159
+ ) # single instance used only when processing namespaces
160
+ self.namespaces: List[Tuple[Identifier, str]] = []
157
161
 
158
162
  @classmethod
159
163
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "IcebergSource":
@@ -196,9 +200,9 @@ class IcebergSource(StatefulIngestionSourceBase):
196
200
  auto_lowercase_dataset_urns,
197
201
  auto_materialize_referenced_tags_terms,
198
202
  partial(
199
- auto_fix_duplicate_schema_field_paths, platform=self._infer_platform()
203
+ auto_fix_duplicate_schema_field_paths, platform=self.infer_platform()
200
204
  ),
201
- partial(auto_fix_empty_field_paths, platform=self._infer_platform()),
205
+ partial(auto_fix_empty_field_paths, platform=self.infer_platform()),
202
206
  partial(auto_workunit_reporter, self.get_report()),
203
207
  auto_patch_last_modified,
204
208
  EnsureAspectSizeProcessor(self.get_report()).ensure_aspect_size,
@@ -246,6 +250,13 @@ class IcebergSource(StatefulIngestionSourceBase):
246
250
  context=str(namespace),
247
251
  exc=e,
248
252
  )
253
+ except RESTError as e:
254
+ self.report.warning(
255
+ title="Iceberg REST Server Error",
256
+ message="Iceberg REST Server returned error status when trying to list tables for a namespace, skipping it.",
257
+ context=str(namespace),
258
+ exc=e,
259
+ )
249
260
  except Exception as e:
250
261
  self.report.report_failure(
251
262
  title="Error when processing a namespace",
@@ -322,10 +333,10 @@ class IcebergSource(StatefulIngestionSourceBase):
322
333
  context=dataset_name,
323
334
  exc=e,
324
335
  )
325
- except ServerError as e:
336
+ except RESTError as e:
326
337
  self.report.warning(
327
338
  title="Iceberg REST Server Error",
328
- message="Iceberg returned 500 HTTP status when trying to process a table, skipping it.",
339
+ message="Iceberg REST Server returned error status when trying to process a table, skipping it.",
329
340
  context=dataset_name,
330
341
  exc=e,
331
342
  )
@@ -365,7 +376,7 @@ class IcebergSource(StatefulIngestionSourceBase):
365
376
  )
366
377
 
367
378
  try:
368
- catalog = self.config.get_catalog()
379
+ self.catalog = self.config.get_catalog()
369
380
  except Exception as e:
370
381
  self.report.report_failure(
371
382
  title="Failed to initialize catalog object",
@@ -375,33 +386,7 @@ class IcebergSource(StatefulIngestionSourceBase):
375
386
  return
376
387
 
377
388
  try:
378
- stamping_processor = AutoSystemMetadata(self.ctx)
379
- namespace_ids = self._get_namespaces(catalog)
380
- namespaces: List[Tuple[Identifier, str]] = []
381
- for namespace in namespace_ids:
382
- namespace_repr = ".".join(namespace)
383
- LOGGER.debug(f"Processing namespace {namespace_repr}")
384
- namespace_urn = make_container_urn(
385
- NamespaceKey(
386
- namespace=namespace_repr,
387
- platform=self.platform,
388
- instance=self.config.platform_instance,
389
- env=self.config.env,
390
- )
391
- )
392
- namespace_properties: Properties = catalog.load_namespace_properties(
393
- namespace
394
- )
395
- namespaces.append((namespace, namespace_urn))
396
- for aspect in self._create_iceberg_namespace_aspects(
397
- namespace, namespace_properties
398
- ):
399
- yield stamping_processor.stamp_wu(
400
- MetadataChangeProposalWrapper(
401
- entityUrn=namespace_urn, aspect=aspect
402
- ).as_workunit()
403
- )
404
- LOGGER.debug("Namespaces ingestion completed")
389
+ yield from self._process_namespaces()
405
390
  except Exception as e:
406
391
  self.report.report_failure(
407
392
  title="Failed to list namespaces",
@@ -415,13 +400,70 @@ class IcebergSource(StatefulIngestionSourceBase):
415
400
  args_list=[
416
401
  (dataset_path, namespace_urn)
417
402
  for dataset_path, namespace_urn in self._get_datasets(
418
- catalog, namespaces
403
+ self.catalog, self.namespaces
419
404
  )
420
405
  ],
421
406
  max_workers=self.config.processing_threads,
422
407
  ):
423
408
  yield wu
424
409
 
410
+ def _try_processing_namespace(
411
+ self, namespace: Identifier
412
+ ) -> Iterable[MetadataWorkUnit]:
413
+ namespace_repr = ".".join(namespace)
414
+ try:
415
+ LOGGER.debug(f"Processing namespace {namespace_repr}")
416
+ namespace_urn = make_container_urn(
417
+ NamespaceKey(
418
+ namespace=namespace_repr,
419
+ platform=self.platform,
420
+ instance=self.config.platform_instance,
421
+ env=self.config.env,
422
+ )
423
+ )
424
+
425
+ namespace_properties: Properties = self.catalog.load_namespace_properties(
426
+ namespace
427
+ )
428
+ for aspect in self._create_iceberg_namespace_aspects(
429
+ namespace, namespace_properties
430
+ ):
431
+ yield self.stamping_processor.stamp_wu(
432
+ MetadataChangeProposalWrapper(
433
+ entityUrn=namespace_urn, aspect=aspect
434
+ ).as_workunit()
435
+ )
436
+ self.namespaces.append((namespace, namespace_urn))
437
+ except NoSuchNamespaceError as e:
438
+ self.report.report_warning(
439
+ title="Failed to retrieve namespace properties",
440
+ message="Couldn't find the namespace, was it deleted during the ingestion?",
441
+ context=namespace_repr,
442
+ exc=e,
443
+ )
444
+ return
445
+ except RESTError as e:
446
+ self.report.warning(
447
+ title="Iceberg REST Server Error",
448
+ message="Iceberg REST Server returned error status when trying to retrieve namespace properties, skipping it.",
449
+ context=str(namespace),
450
+ exc=e,
451
+ )
452
+ except Exception as e:
453
+ self.report.report_failure(
454
+ title="Failed to process namespace",
455
+ message="Unhandled exception happened during processing of the namespace",
456
+ context=namespace_repr,
457
+ exc=e,
458
+ )
459
+
460
+ def _process_namespaces(self) -> Iterable[MetadataWorkUnit]:
461
+ namespace_ids = self._get_namespaces(self.catalog)
462
+ for namespace in namespace_ids:
463
+ yield from self._try_processing_namespace(namespace)
464
+
465
+ LOGGER.debug("Namespaces ingestion completed")
466
+
425
467
  def _create_iceberg_table_aspects(
426
468
  self, dataset_name: str, table: Table, namespace_urn: str
427
469
  ) -> Iterable[_Aspect]:
@@ -2904,6 +2904,10 @@ class DataJobUrn(_SpecificUrn):
2904
2904
  def get_data_flow_urn(self) -> "DataFlowUrn":
2905
2905
  return DataFlowUrn.from_string(self.flow)
2906
2906
 
2907
+ @property
2908
+ def orchestrator(self) -> str:
2909
+ return self.get_data_flow_urn().orchestrator
2910
+
2907
2911
  @deprecated(reason="Use .job_id instead")
2908
2912
  def get_job_id(self) -> str:
2909
2913
  return self.job_id
@@ -1,8 +1,47 @@
1
- from datahub.metadata.urns import Urn
1
+ from typing import Optional
2
2
 
3
- __all__ = ["Urn", "guess_entity_type"]
3
+ from datahub.metadata.urns import (
4
+ DataPlatformUrn,
5
+ Urn,
6
+ )
7
+
8
+ __all__ = ["Urn", "guess_entity_type", "guess_platform_name"]
4
9
 
5
10
 
6
11
  def guess_entity_type(urn: str) -> str:
7
12
  assert urn.startswith("urn:li:"), "urns must start with urn:li:"
8
13
  return urn.split(":")[2]
14
+
15
+
16
+ def guess_platform_name(urn: str) -> Optional[str]:
17
+ """Extract platform from URN using a mapping dictionary."""
18
+ urn_obj = Urn.from_string(urn)
19
+
20
+ try:
21
+ platform = None
22
+ try:
23
+ platform = urn_obj.platform # type: ignore[attr-defined]
24
+ platform_name = DataPlatformUrn.from_string(
25
+ platform
26
+ ).get_entity_id_as_string()
27
+ return platform_name
28
+ except AttributeError:
29
+ pass
30
+ try:
31
+ return urn_obj.orchestrator # type: ignore[attr-defined]
32
+ except AttributeError:
33
+ pass
34
+ try:
35
+ return urn_obj.dashboard_tool # type: ignore[attr-defined]
36
+ except AttributeError:
37
+ pass
38
+ try:
39
+ return urn_obj.ml_model_tool # type: ignore[attr-defined]
40
+ except AttributeError:
41
+ pass
42
+
43
+ if platform is None:
44
+ return None
45
+ except AttributeError:
46
+ pass
47
+ return None