acryl-datahub 1.2.0.8rc2__py3-none-any.whl → 1.2.0.8rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.8rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.8rc3.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=61ZxWUlQVKM0CF2BBOi-9OpFZENqh_B4oxFCZYQSJBc,323
4
+ datahub/_version.py,sha256=JaJpqqBsZdjVnoJtUZG8gQFWP2yezj3I1JQe0wJwUy4,323
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -148,10 +148,10 @@ datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPs
148
148
  datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=3lLdkkxVqE9MVc26cdXImPeWy16az5BwgcorWxeBV50,1759
149
149
  datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINqPbUOKiZyOJta6a0og,713
150
150
  datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX63bI,7454
151
- datahub/ingestion/api/report.py,sha256=OuVZAgNkzSGkKhpOhpqebd9_bEsBCTeoWR1VcanPeD4,18509
151
+ datahub/ingestion/api/report.py,sha256=1w63Y2yN49IaDLZaIvXEjRU3yVb_9t3wzymSI-fumZM,18959
152
152
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
153
153
  datahub/ingestion/api/sink.py,sha256=GZt48PV56FAhNoma-V5EwwRZvezhb40YH_zprm8_Yo0,4961
154
- datahub/ingestion/api/source.py,sha256=uf0fNbiOy0bS_aKFOcNv6NvuZe0LSDIDdNza9hraP7s,21857
154
+ datahub/ingestion/api/source.py,sha256=JASs7WygVB6g-tcwtchaftzv3lNtlVM31lEa242pn44,21853
155
155
  datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
156
156
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
157
157
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
@@ -193,7 +193,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
193
193
  datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
194
194
  datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
195
195
  datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
196
- datahub/ingestion/run/pipeline.py,sha256=xKjH5jUTCwJ-rCAnOEXLmZqVLKz_qxcX2JJxcO5AGmE,32046
196
+ datahub/ingestion/run/pipeline.py,sha256=Cz1hxvUzFmxP3oIKRCaHVKDIqK8HBIuKfsxQz9mfUvE,32452
197
197
  datahub/ingestion/run/pipeline_config.py,sha256=joG1j9OlwJhb8zqv4TY6_FSzOaKOx6xsBu255A5lP8g,4101
198
198
  datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
199
199
  datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -366,7 +366,7 @@ datahub/ingestion/source/hex/mapper.py,sha256=IyDAE-TzZUji3ICI_9gkYC3dQN3gl6kERR
366
366
  datahub/ingestion/source/hex/model.py,sha256=eri4aRo1eXcE2SWjzCnPFMhzPTiJ8w8zC4GN7Lgpr74,1864
367
367
  datahub/ingestion/source/hex/query_fetcher.py,sha256=r9UvF_qwswkRlNY7AI8p46eqAYSxVtjVE2e7eO4XagA,13384
368
368
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
369
- datahub/ingestion/source/iceberg/iceberg.py,sha256=2E3mhvsIDSHDUd1Prb0nlZnGIsQLIuwNeFRxJPYyS-0,37042
369
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=KM9IDdWxW2VwOv0Iv3sMV6a60FNkgDEMS8vZaFHTOyA,37040
370
370
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=CD_yHQ_wEgivyLQUTRO9BZJB29S7j5fUVllki-BPwUU,12292
371
371
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
372
372
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -640,7 +640,7 @@ datahub/metadata/schema.avsc,sha256=HluHCVmYg7RpOaw9xUMigEJBxlHF5WLdNcqVBKPeoOU,
640
640
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
641
641
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
642
642
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
643
- datahub/metadata/_urns/urn_defs.py,sha256=Rl2wjTuHxpOk4rDAfqUHeBxCxTFA2JGFg1oCc9yR10s,143154
643
+ datahub/metadata/_urns/urn_defs.py,sha256=tBlEg7f0jaIWVQfpgzTe2gjkthP4janfAwJO7yx6-cw,143257
644
644
  datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
645
645
  datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
646
646
  datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
@@ -1093,7 +1093,7 @@ datahub/utilities/urns/field_paths.py,sha256=ra-o_fMGkBRLgzMewAJN5-HqAyo3PIpXQ0K
1093
1093
  datahub/utilities/urns/notebook_urn.py,sha256=CHqGrV45ReVODlFx7js2WUxjcXxt8B63-xsBZpujmtY,73
1094
1094
  datahub/utilities/urns/structured_properties_urn.py,sha256=fjA1Ysg7IQSly8IVYx1R8HnwnojQz6jZWbqfk_XVvno,271
1095
1095
  datahub/utilities/urns/tag_urn.py,sha256=MqEJdIaCnAyjYe_8VdNnUjOVV4TS8xMlv4pRsy8wwXY,63
1096
- datahub/utilities/urns/urn.py,sha256=B4nYxiFT8s5DLA2NJsWg0KoiUDp9UWg1nvL0j7Sx-h8,218
1096
+ datahub/utilities/urns/urn.py,sha256=CDtOtVccQW2yj5MBNtR3aO1yEInTnAmSDMbGbjheGJY,1279
1097
1097
  datahub/utilities/urns/urn_iter.py,sha256=3LtmobKksKFbnNCUCjFxm8qqFLCPPHUW_Q3zc4PE5nY,4736
1098
1098
  datahub_provider/__init__.py,sha256=qyPbz00f8pgtLVyqHG1TSnTqBfXb1x-kUH10zOLoq2U,53
1099
1099
  datahub_provider/_airflow_compat.py,sha256=unmFDGP57xKHPIhkdw_qo1vW1GAYpZ1yCvCrkMdGJXM,98
@@ -1114,8 +1114,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1114
1114
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1115
1115
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1116
1116
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1117
- acryl_datahub-1.2.0.8rc2.dist-info/METADATA,sha256=Q8mmqp92zb_C5PbYaI7zQiAwkw9QrX0FUiCAGxtbzzg,186651
1118
- acryl_datahub-1.2.0.8rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
- acryl_datahub-1.2.0.8rc2.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
- acryl_datahub-1.2.0.8rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
- acryl_datahub-1.2.0.8rc2.dist-info/RECORD,,
1117
+ acryl_datahub-1.2.0.8rc3.dist-info/METADATA,sha256=JHHJcxTDQE0vzyvrfaIxu2-B2bkridTSaWrk8K96Tdk,186651
1118
+ acryl_datahub-1.2.0.8rc3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
+ acryl_datahub-1.2.0.8rc3.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
+ acryl_datahub-1.2.0.8rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
+ acryl_datahub-1.2.0.8rc3.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.8rc2"
3
+ __version__ = "1.2.0.8rc3"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -29,6 +29,7 @@ from datahub.metadata.schema_classes import (
29
29
  )
30
30
  from datahub.utilities.file_backed_collections import FileBackedDict
31
31
  from datahub.utilities.lossy_collections import LossyList
32
+ from datahub.utilities.urns.urn import guess_platform_name
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
  LogLevel = Literal["ERROR", "WARNING", "INFO", "DEBUG"]
@@ -41,6 +42,15 @@ class SupportsAsObj(Protocol):
41
42
 
42
43
  @dataclass
43
44
  class Report(SupportsAsObj):
45
+ def __post_init__(self) -> None:
46
+ self.platform: Optional[str] = None
47
+
48
+ def set_platform(self, platform: str) -> None:
49
+ self.platform = platform
50
+
51
+ def get_platform(self) -> Optional[str]:
52
+ return self.platform
53
+
44
54
  @staticmethod
45
55
  def to_str(some_val: Any) -> str:
46
56
  if isinstance(some_val, Enum):
@@ -213,6 +223,7 @@ class ExamplesReport(Report, Closeable):
213
223
  _lineage_aspects_seen: Set[str] = field(default_factory=set)
214
224
 
215
225
  def __post_init__(self) -> None:
226
+ super().__post_init__()
216
227
  self._file_based_dict = FileBackedDict(
217
228
  tablename="urn_aspects",
218
229
  extra_columns={
@@ -347,6 +358,9 @@ class ExamplesReport(Report, Closeable):
347
358
  aspectName: str,
348
359
  mcp: Union[MetadataChangeProposalClass, MetadataChangeProposalWrapper],
349
360
  ) -> None:
361
+ platform_name = guess_platform_name(urn)
362
+ if platform_name != self.get_platform():
363
+ return
350
364
  if is_lineage_aspect(entityType, aspectName):
351
365
  self._lineage_aspects_seen.add(aspectName)
352
366
  has_fine_grained_lineage = self._has_fine_grained_lineage(mcp)
@@ -531,9 +531,9 @@ class Source(Closeable, metaclass=ABCMeta):
531
531
  auto_status_aspect,
532
532
  auto_materialize_referenced_tags_terms,
533
533
  partial(
534
- auto_fix_duplicate_schema_field_paths, platform=self._infer_platform()
534
+ auto_fix_duplicate_schema_field_paths, platform=self.infer_platform()
535
535
  ),
536
- partial(auto_fix_empty_field_paths, platform=self._infer_platform()),
536
+ partial(auto_fix_empty_field_paths, platform=self.infer_platform()),
537
537
  browse_path_processor,
538
538
  partial(auto_workunit_reporter, self.get_report()),
539
539
  auto_patch_last_modified,
@@ -583,7 +583,7 @@ class Source(Closeable, metaclass=ABCMeta):
583
583
  def close(self) -> None:
584
584
  self.get_report().close()
585
585
 
586
- def _infer_platform(self) -> Optional[str]:
586
+ def infer_platform(self) -> Optional[str]:
587
587
  config = self.get_config()
588
588
  platform = (
589
589
  getattr(config, "platform_name", None)
@@ -598,7 +598,7 @@ class Source(Closeable, metaclass=ABCMeta):
598
598
  def _get_browse_path_processor(self, dry_run: bool) -> MetadataWorkUnitProcessor:
599
599
  config = self.get_config()
600
600
 
601
- platform = self._infer_platform()
601
+ platform = self.infer_platform()
602
602
  env = getattr(config, "env", None)
603
603
  browse_path_drop_dirs = [
604
604
  platform,
@@ -440,7 +440,19 @@ class Pipeline:
440
440
  return True
441
441
  return False
442
442
 
443
+ def _set_platform(self) -> None:
444
+ platform = self.source.infer_platform()
445
+ if platform:
446
+ self.source.get_report().set_platform(platform)
447
+ else:
448
+ self.source.get_report().warning(
449
+ message="Platform not found",
450
+ title="Platform not found",
451
+ context="Platform not found",
452
+ )
453
+
443
454
  def run(self) -> None:
455
+ self._set_platform()
444
456
  self._warn_old_cli_version()
445
457
  with self.exit_stack, self.inner_exit_stack:
446
458
  if self.config.flags.generate_memory_profiles:
@@ -200,9 +200,9 @@ class IcebergSource(StatefulIngestionSourceBase):
200
200
  auto_lowercase_dataset_urns,
201
201
  auto_materialize_referenced_tags_terms,
202
202
  partial(
203
- auto_fix_duplicate_schema_field_paths, platform=self._infer_platform()
203
+ auto_fix_duplicate_schema_field_paths, platform=self.infer_platform()
204
204
  ),
205
- partial(auto_fix_empty_field_paths, platform=self._infer_platform()),
205
+ partial(auto_fix_empty_field_paths, platform=self.infer_platform()),
206
206
  partial(auto_workunit_reporter, self.get_report()),
207
207
  auto_patch_last_modified,
208
208
  EnsureAspectSizeProcessor(self.get_report()).ensure_aspect_size,
@@ -2904,6 +2904,10 @@ class DataJobUrn(_SpecificUrn):
2904
2904
  def get_data_flow_urn(self) -> "DataFlowUrn":
2905
2905
  return DataFlowUrn.from_string(self.flow)
2906
2906
 
2907
+ @property
2908
+ def orchestrator(self) -> str:
2909
+ return self.get_data_flow_urn().orchestrator
2910
+
2907
2911
  @deprecated(reason="Use .job_id instead")
2908
2912
  def get_job_id(self) -> str:
2909
2913
  return self.job_id
@@ -1,8 +1,47 @@
1
- from datahub.metadata.urns import Urn
1
+ from typing import Optional
2
2
 
3
- __all__ = ["Urn", "guess_entity_type"]
3
+ from datahub.metadata.urns import (
4
+ DataPlatformUrn,
5
+ Urn,
6
+ )
7
+
8
+ __all__ = ["Urn", "guess_entity_type", "guess_platform_name"]
4
9
 
5
10
 
6
11
  def guess_entity_type(urn: str) -> str:
7
12
  assert urn.startswith("urn:li:"), "urns must start with urn:li:"
8
13
  return urn.split(":")[2]
14
+
15
+
16
+ def guess_platform_name(urn: str) -> Optional[str]:
17
+ """Extract platform from URN using a mapping dictionary."""
18
+ urn_obj = Urn.from_string(urn)
19
+
20
+ try:
21
+ platform = None
22
+ try:
23
+ platform = urn_obj.platform # type: ignore[attr-defined]
24
+ platform_name = DataPlatformUrn.from_string(
25
+ platform
26
+ ).get_entity_id_as_string()
27
+ return platform_name
28
+ except AttributeError:
29
+ pass
30
+ try:
31
+ return urn_obj.orchestrator # type: ignore[attr-defined]
32
+ except AttributeError:
33
+ pass
34
+ try:
35
+ return urn_obj.dashboard_tool # type: ignore[attr-defined]
36
+ except AttributeError:
37
+ pass
38
+ try:
39
+ return urn_obj.ml_model_tool # type: ignore[attr-defined]
40
+ except AttributeError:
41
+ pass
42
+
43
+ if platform is None:
44
+ return None
45
+ except AttributeError:
46
+ pass
47
+ return None