acryl-datahub 0.15.0rc6__py3-none-any.whl → 0.15.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (29) hide show
  1. {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/METADATA +2433 -2433
  2. {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/RECORD +29 -29
  3. datahub/__init__.py +1 -1
  4. datahub/api/entities/structuredproperties/structuredproperties.py +1 -1
  5. datahub/cli/put_cli.py +1 -1
  6. datahub/cli/specific/dataproduct_cli.py +1 -1
  7. datahub/emitter/mcp_patch_builder.py +43 -0
  8. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +9 -4
  9. datahub/ingestion/source/aws/sagemaker_processors/models.py +30 -1
  10. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
  11. datahub/ingestion/source/csv_enricher.py +1 -1
  12. datahub/ingestion/source/dremio/dremio_source.py +4 -2
  13. datahub/ingestion/source/elastic_search.py +1 -1
  14. datahub/ingestion/source/gc/dataprocess_cleanup.py +12 -7
  15. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +1 -1
  16. datahub/ingestion/source/ge_data_profiler.py +23 -1
  17. datahub/ingestion/source/sql/sql_types.py +14 -2
  18. datahub/ingestion/transformer/add_dataset_tags.py +1 -1
  19. datahub/ingestion/transformer/generic_aspect_transformer.py +1 -1
  20. datahub/integrations/assertion/common.py +1 -1
  21. datahub/lite/duckdb_lite.py +12 -17
  22. datahub/specific/chart.py +0 -39
  23. datahub/specific/dashboard.py +0 -39
  24. datahub/specific/datajob.py +3 -47
  25. datahub/utilities/urns/_urn_base.py +1 -1
  26. datahub/utilities/urns/structured_properties_urn.py +1 -1
  27. {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/WHEEL +0 -0
  28. {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/entry_points.txt +0 -0
  29. {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/top_level.txt +0 -0
@@ -609,7 +609,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
609
609
  aspect_map, DataPlatformInstanceClass
610
610
  ) # type: ignore
611
611
 
612
- needs_platform = Urn.create_from_string(entity_urn).get_type() in [
612
+ needs_platform = Urn.from_string(entity_urn).get_type() in [
613
613
  "dataset",
614
614
  "container",
615
615
  "chart",
@@ -617,7 +617,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
617
617
  "dataFlow",
618
618
  "dataJob",
619
619
  ]
620
- entity_urn_parsed = Urn.create_from_string(entity_urn)
620
+ entity_urn_parsed = Urn.from_string(entity_urn)
621
621
  if entity_urn_parsed.get_type() in ["dataFlow", "dataJob"]:
622
622
  self.add_edge(
623
623
  entity_urn,
@@ -630,15 +630,12 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
630
630
  # this is a top-level entity
631
631
  if not dpi:
632
632
  logger.debug(f"No data platform instance for {entity_urn}")
633
- maybe_parent_urn = Urn.create_from_string(entity_urn).get_entity_id()[0]
633
+ maybe_parent_urn = Urn.from_string(entity_urn).get_entity_id()[0]
634
634
  needs_dpi = False
635
635
  if maybe_parent_urn.startswith(Urn.URN_PREFIX):
636
636
  parent_urn = maybe_parent_urn
637
- if (
638
- Urn.create_from_string(maybe_parent_urn).get_type()
639
- == "dataPlatform"
640
- ):
641
- data_platform_urn = DataPlatformUrn.create_from_string(
637
+ if Urn.from_string(maybe_parent_urn).get_type() == "dataPlatform":
638
+ data_platform_urn = DataPlatformUrn.from_string(
642
639
  maybe_parent_urn
643
640
  )
644
641
  needs_dpi = True
@@ -660,7 +657,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
660
657
  logger.error(f"Failed to generate edges entity {entity_urn}", e)
661
658
  parent_urn = str(data_platform_instance_urn)
662
659
  else:
663
- data_platform_urn = DataPlatformUrn.create_from_string(dpi.platform)
660
+ data_platform_urn = DataPlatformUrn.from_string(dpi.platform)
664
661
  data_platform_instance = dpi.instance or "default"
665
662
  data_platform_instance_urn = Urn(
666
663
  entity_type="dataPlatformInstance",
@@ -673,9 +670,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
673
670
  parent_urn = "__root__"
674
671
 
675
672
  types = (
676
- subtypes.typeNames
677
- if subtypes
678
- else [Urn.create_from_string(entity_urn).get_type()]
673
+ subtypes.typeNames if subtypes else [Urn.from_string(entity_urn).get_type()]
679
674
  )
680
675
  for t in types:
681
676
  type_urn = Urn(entity_type="systemNode", entity_id=[parent_urn, t])
@@ -686,7 +681,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
686
681
  def _create_edges_from_data_platform_instance(
687
682
  self, data_platform_instance_urn: Urn
688
683
  ) -> None:
689
- data_platform_urn = DataPlatformUrn.create_from_string(
684
+ data_platform_urn = DataPlatformUrn.from_string(
690
685
  data_platform_instance_urn.get_entity_id()[0]
691
686
  )
692
687
  data_platform_instances_urn = Urn(
@@ -735,7 +730,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
735
730
  if isinstance(aspect, DatasetPropertiesClass):
736
731
  dp: DatasetPropertiesClass = aspect
737
732
  if dp.name:
738
- specific_urn = DatasetUrn.create_from_string(entity_urn)
733
+ specific_urn = DatasetUrn.from_string(entity_urn)
739
734
  if (
740
735
  specific_urn.get_data_platform_urn().get_entity_id_as_string()
741
736
  == "looker"
@@ -755,7 +750,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
755
750
  self.add_edge(entity_urn, "name", cp.name, remove_existing=True)
756
751
  elif isinstance(aspect, DataPlatformInstanceClass):
757
752
  dpi: DataPlatformInstanceClass = aspect
758
- data_platform_urn = DataPlatformUrn.create_from_string(dpi.platform)
753
+ data_platform_urn = DataPlatformUrn.from_string(dpi.platform)
759
754
  data_platform_instance = dpi.instance or "default"
760
755
  data_platform_instance_urn = Urn(
761
756
  entity_type="dataPlatformInstance",
@@ -763,7 +758,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
763
758
  )
764
759
  self._create_edges_from_data_platform_instance(data_platform_instance_urn)
765
760
  elif isinstance(aspect, ChartInfoClass):
766
- urn = Urn.create_from_string(entity_urn)
761
+ urn = Urn.from_string(entity_urn)
767
762
  self.add_edge(
768
763
  entity_urn,
769
764
  "name",
@@ -771,7 +766,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
771
766
  remove_existing=True,
772
767
  )
773
768
  elif isinstance(aspect, DashboardInfoClass):
774
- urn = Urn.create_from_string(entity_urn)
769
+ urn = Urn.from_string(entity_urn)
775
770
  self.add_edge(
776
771
  entity_urn,
777
772
  "name",
datahub/specific/chart.py CHANGED
@@ -1,10 +1,8 @@
1
- import time
2
1
  from typing import Dict, List, Optional, Union
3
2
 
4
3
  from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
5
4
  from datahub.metadata.schema_classes import (
6
5
  AccessLevelClass,
7
- AuditStampClass,
8
6
  ChangeAuditStampsClass,
9
7
  ChartInfoClass as ChartInfo,
10
8
  ChartTypeClass,
@@ -47,43 +45,6 @@ class ChartPatchBuilder(MetadataPatchProposal):
47
45
  )
48
46
  self.ownership_patch_helper = OwnershipPatchHelper(self)
49
47
 
50
- def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
51
- """
52
- Creates an AuditStampClass instance with the current timestamp and other default values.
53
-
54
- Args:
55
- message: The message associated with the audit stamp (optional).
56
-
57
- Returns:
58
- An instance of AuditStampClass.
59
- """
60
- return AuditStampClass(
61
- time=int(time.time() * 1000.0),
62
- actor="urn:li:corpuser:datahub",
63
- message=message,
64
- )
65
-
66
- def _ensure_urn_type(
67
- self, entity_type: str, edges: List[Edge], context: str
68
- ) -> None:
69
- """
70
- Ensures that the destination URNs in the given edges have the specified entity type.
71
-
72
- Args:
73
- entity_type: The entity type to check against.
74
- edges: A list of Edge objects.
75
- context: The context or description of the operation.
76
-
77
- Raises:
78
- ValueError: If any of the destination URNs is not of the specified entity type.
79
- """
80
- for e in edges:
81
- urn = Urn.create_from_string(e.destinationUrn)
82
- if not urn.get_type() == entity_type:
83
- raise ValueError(
84
- f"{context}: {e.destinationUrn} is not of type {entity_type}"
85
- )
86
-
87
48
  def add_owner(self, owner: Owner) -> "ChartPatchBuilder":
88
49
  """
89
50
  Adds an owner to the ChartPatchBuilder.
@@ -1,10 +1,8 @@
1
- import time
2
1
  from typing import Dict, List, Optional, Union
3
2
 
4
3
  from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
5
4
  from datahub.metadata.schema_classes import (
6
5
  AccessLevelClass,
7
- AuditStampClass,
8
6
  ChangeAuditStampsClass,
9
7
  DashboardInfoClass as DashboardInfo,
10
8
  EdgeClass as Edge,
@@ -46,43 +44,6 @@ class DashboardPatchBuilder(MetadataPatchProposal):
46
44
  )
47
45
  self.ownership_patch_helper = OwnershipPatchHelper(self)
48
46
 
49
- def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
50
- """
51
- Creates an AuditStampClass instance with the current timestamp and other default values.
52
-
53
- Args:
54
- message: The message associated with the audit stamp (optional).
55
-
56
- Returns:
57
- An instance of AuditStampClass.
58
- """
59
- return AuditStampClass(
60
- time=int(time.time() * 1000.0),
61
- actor="urn:li:corpuser:datahub",
62
- message=message,
63
- )
64
-
65
- def _ensure_urn_type(
66
- self, entity_type: str, edges: List[Edge], context: str
67
- ) -> None:
68
- """
69
- Ensures that the destination URNs in the given edges have the specified entity type.
70
-
71
- Args:
72
- entity_type: The entity type to check against.
73
- edges: A list of Edge objects.
74
- context: The context or description of the operation.
75
-
76
- Raises:
77
- ValueError: If any of the destination URNs is not of the specified entity type.
78
- """
79
- for e in edges:
80
- urn = Urn.create_from_string(e.destinationUrn)
81
- if not urn.get_type() == entity_type:
82
- raise ValueError(
83
- f"{context}: {e.destinationUrn} is not of type {entity_type}"
84
- )
85
-
86
47
  def add_owner(self, owner: Owner) -> "DashboardPatchBuilder":
87
48
  """
88
49
  Adds an owner to the DashboardPatchBuilder.
@@ -1,9 +1,7 @@
1
- import time
2
1
  from typing import Dict, List, Optional, Union
3
2
 
4
3
  from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
5
4
  from datahub.metadata.schema_classes import (
6
- AuditStampClass,
7
5
  DataJobInfoClass as DataJobInfo,
8
6
  DataJobInputOutputClass as DataJobInputOutput,
9
7
  EdgeClass as Edge,
@@ -16,10 +14,9 @@ from datahub.metadata.schema_classes import (
16
14
  SystemMetadataClass,
17
15
  TagAssociationClass as Tag,
18
16
  )
17
+ from datahub.metadata.urns import SchemaFieldUrn, TagUrn, Urn
19
18
  from datahub.specific.custom_properties import CustomPropertiesPatchHelper
20
19
  from datahub.specific.ownership import OwnershipPatchHelper
21
- from datahub.utilities.urns.tag_urn import TagUrn
22
- from datahub.utilities.urns.urn import Urn
23
20
 
24
21
 
25
22
  class DataJobPatchBuilder(MetadataPatchProposal):
@@ -45,43 +42,6 @@ class DataJobPatchBuilder(MetadataPatchProposal):
45
42
  )
46
43
  self.ownership_patch_helper = OwnershipPatchHelper(self)
47
44
 
48
- def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
49
- """
50
- Creates an AuditStampClass instance with the current timestamp and other default values.
51
-
52
- Args:
53
- message: The message associated with the audit stamp (optional).
54
-
55
- Returns:
56
- An instance of AuditStampClass.
57
- """
58
- return AuditStampClass(
59
- time=int(time.time() * 1000.0),
60
- actor="urn:li:corpuser:datahub",
61
- message=message,
62
- )
63
-
64
- def _ensure_urn_type(
65
- self, entity_type: str, edges: List[Edge], context: str
66
- ) -> None:
67
- """
68
- Ensures that the destination URNs in the given edges have the specified entity type.
69
-
70
- Args:
71
- entity_type: The entity type to check against.
72
- edges: A list of Edge objects.
73
- context: The context or description of the operation.
74
-
75
- Raises:
76
- ValueError: If any of the destination URNs is not of the specified entity type.
77
- """
78
- for e in edges:
79
- urn = Urn.create_from_string(e.destinationUrn)
80
- if not urn.get_type() == entity_type:
81
- raise ValueError(
82
- f"{context}: {e.destinationUrn} is not of type {entity_type}"
83
- )
84
-
85
45
  def add_owner(self, owner: Owner) -> "DataJobPatchBuilder":
86
46
  """
87
47
  Adds an owner to the DataJobPatchBuilder.
@@ -392,9 +352,7 @@ class DataJobPatchBuilder(MetadataPatchProposal):
392
352
  ValueError: If the input is not a Schema Field urn.
393
353
  """
394
354
  input_urn = str(input)
395
- urn = Urn.create_from_string(input_urn)
396
- if not urn.get_type() == "schemaField":
397
- raise ValueError(f"Input {input} is not a Schema Field urn")
355
+ assert SchemaFieldUrn.from_string(input_urn)
398
356
 
399
357
  self._add_patch(
400
358
  DataJobInputOutput.ASPECT_NAME,
@@ -466,9 +424,7 @@ class DataJobPatchBuilder(MetadataPatchProposal):
466
424
  ValueError: If the output is not a Schema Field urn.
467
425
  """
468
426
  output_urn = str(output)
469
- urn = Urn.create_from_string(output_urn)
470
- if not urn.get_type() == "schemaField":
471
- raise ValueError(f"Input {output} is not a Schema Field urn")
427
+ assert SchemaFieldUrn.from_string(output_urn)
472
428
 
473
429
  self._add_patch(
474
430
  DataJobInputOutput.ASPECT_NAME,
@@ -200,7 +200,7 @@ class Urn:
200
200
  @classmethod
201
201
  @deprecated(reason="no longer needed")
202
202
  def validate(cls, urn_str: str) -> None:
203
- Urn.create_from_string(urn_str)
203
+ Urn.from_string(urn_str)
204
204
 
205
205
  @staticmethod
206
206
  def url_encode(urn: str) -> str:
@@ -4,4 +4,4 @@ __all__ = ["StructuredPropertyUrn", "make_structured_property_urn"]
4
4
 
5
5
 
6
6
  def make_structured_property_urn(structured_property_id: str) -> str:
7
- return str(StructuredPropertyUrn.create_from_string(structured_property_id))
7
+ return str(StructuredPropertyUrn.from_string(structured_property_id))