acryl-datahub 0.15.0rc5__py3-none-any.whl → 0.15.0rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (43) hide show
  1. {acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/METADATA +2456 -2426
  2. {acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/RECORD +43 -41
  3. {acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/entry_points.txt +1 -0
  4. datahub/__init__.py +1 -1
  5. datahub/api/entities/structuredproperties/structuredproperties.py +1 -1
  6. datahub/cli/put_cli.py +1 -1
  7. datahub/cli/specific/dataproduct_cli.py +1 -1
  8. datahub/emitter/mcp_patch_builder.py +43 -0
  9. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +9 -4
  10. datahub/ingestion/source/aws/sagemaker_processors/models.py +30 -1
  11. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
  12. datahub/ingestion/source/common/subtypes.py +2 -0
  13. datahub/ingestion/source/csv_enricher.py +1 -1
  14. datahub/ingestion/source/dbt/dbt_common.py +7 -61
  15. datahub/ingestion/source/dremio/dremio_api.py +11 -0
  16. datahub/ingestion/source/dremio/dremio_aspects.py +19 -15
  17. datahub/ingestion/source/dremio/dremio_config.py +5 -0
  18. datahub/ingestion/source/dremio/dremio_entities.py +4 -0
  19. datahub/ingestion/source/dremio/dremio_source.py +7 -2
  20. datahub/ingestion/source/elastic_search.py +1 -1
  21. datahub/ingestion/source/gc/dataprocess_cleanup.py +6 -1
  22. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +1 -1
  23. datahub/ingestion/source/ge_data_profiler.py +23 -1
  24. datahub/ingestion/source/neo4j/__init__.py +0 -0
  25. datahub/ingestion/source/neo4j/neo4j_source.py +331 -0
  26. datahub/ingestion/source/qlik_sense/data_classes.py +1 -0
  27. datahub/ingestion/source/redshift/redshift.py +1 -0
  28. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +1 -0
  29. datahub/ingestion/source/sql/athena.py +46 -22
  30. datahub/ingestion/source/sql/sql_types.py +85 -8
  31. datahub/ingestion/source/unity/proxy_types.py +1 -0
  32. datahub/ingestion/transformer/add_dataset_tags.py +1 -1
  33. datahub/ingestion/transformer/generic_aspect_transformer.py +1 -1
  34. datahub/integrations/assertion/common.py +1 -1
  35. datahub/lite/duckdb_lite.py +12 -17
  36. datahub/specific/chart.py +0 -39
  37. datahub/specific/dashboard.py +0 -39
  38. datahub/specific/datajob.py +3 -47
  39. datahub/utilities/urn_encoder.py +2 -1
  40. datahub/utilities/urns/_urn_base.py +1 -1
  41. datahub/utilities/urns/structured_properties_urn.py +1 -1
  42. {acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/WHEEL +0 -0
  43. {acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/top_level.txt +0 -0
@@ -26,6 +26,7 @@ from datahub.ingestion.api.decorators import (
26
26
  platform_name,
27
27
  support_status,
28
28
  )
29
+ from datahub.ingestion.api.source import StructuredLogLevel
29
30
  from datahub.ingestion.api.workunit import MetadataWorkUnit
30
31
  from datahub.ingestion.source.aws.s3_util import make_s3_urn
31
32
  from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes
@@ -35,6 +36,7 @@ from datahub.ingestion.source.sql.sql_common import (
35
36
  register_custom_type,
36
37
  )
37
38
  from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, make_sqlalchemy_uri
39
+ from datahub.ingestion.source.sql.sql_report import SQLSourceReport
38
40
  from datahub.ingestion.source.sql.sql_utils import (
39
41
  add_table_to_schema_container,
40
42
  gen_database_container,
@@ -48,6 +50,15 @@ from datahub.utilities.sqlalchemy_type_converter import (
48
50
  get_schema_fields_for_sqlalchemy_column,
49
51
  )
50
52
 
53
+ try:
54
+ from typing_extensions import override
55
+ except ImportError:
56
+ _F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any])
57
+
58
+ def override(f: _F, /) -> _F: # noqa: F811
59
+ return f
60
+
61
+
51
62
  logger = logging.getLogger(__name__)
52
63
 
53
64
  assert STRUCT, "required type modules are not available"
@@ -322,12 +333,15 @@ class AthenaSource(SQLAlchemySource):
322
333
  - Profiling when enabled.
323
334
  """
324
335
 
325
- table_partition_cache: Dict[str, Dict[str, Partitionitem]] = {}
336
+ config: AthenaConfig
337
+ report: SQLSourceReport
326
338
 
327
339
  def __init__(self, config, ctx):
328
340
  super().__init__(config, ctx, "athena")
329
341
  self.cursor: Optional[BaseCursor] = None
330
342
 
343
+ self.table_partition_cache: Dict[str, Dict[str, Partitionitem]] = {}
344
+
331
345
  @classmethod
332
346
  def create(cls, config_dict, ctx):
333
347
  config = AthenaConfig.parse_obj(config_dict)
@@ -452,6 +466,7 @@ class AthenaSource(SQLAlchemySource):
452
466
  )
453
467
 
454
468
  # It seems like database/schema filter in the connection string does not work and this to work around that
469
+ @override
455
470
  def get_schema_names(self, inspector: Inspector) -> List[str]:
456
471
  athena_config = typing.cast(AthenaConfig, self.config)
457
472
  schemas = inspector.get_schema_names()
@@ -459,34 +474,42 @@ class AthenaSource(SQLAlchemySource):
459
474
  return [schema for schema in schemas if schema == athena_config.database]
460
475
  return schemas
461
476
 
462
- # Overwrite to get partitions
477
+ @classmethod
478
+ def _casted_partition_key(cls, key: str) -> str:
479
+ # We need to cast the partition keys to a VARCHAR, since otherwise
480
+ # Athena may throw an error during concatenation / comparison.
481
+ return f"CAST({key} as VARCHAR)"
482
+
483
+ @override
463
484
  def get_partitions(
464
485
  self, inspector: Inspector, schema: str, table: str
465
- ) -> List[str]:
466
- partitions = []
467
-
468
- athena_config = typing.cast(AthenaConfig, self.config)
469
-
470
- if not athena_config.extract_partitions:
471
- return []
486
+ ) -> Optional[List[str]]:
487
+ if not self.config.extract_partitions:
488
+ return None
472
489
 
473
490
  if not self.cursor:
474
- return []
491
+ return None
475
492
 
476
493
  metadata: AthenaTableMetadata = self.cursor.get_table_metadata(
477
494
  table_name=table, schema_name=schema
478
495
  )
479
496
 
480
- if metadata.partition_keys:
481
- for key in metadata.partition_keys:
482
- if key.name:
483
- partitions.append(key.name)
484
-
485
- if not partitions:
486
- return []
497
+ partitions = []
498
+ for key in metadata.partition_keys:
499
+ if key.name:
500
+ partitions.append(key.name)
501
+ if not partitions:
502
+ return []
487
503
 
488
- # We create an artiificaial concatenated partition key to be able to query max partition easier
489
- part_concat = "|| '-' ||".join(partitions)
504
+ with self.report.report_exc(
505
+ message="Failed to extract partition details",
506
+ context=f"{schema}.{table}",
507
+ level=StructuredLogLevel.WARN,
508
+ ):
509
+ # We create an artifical concatenated partition key to be able to query max partition easier
510
+ part_concat = " || '-' || ".join(
511
+ self._casted_partition_key(key) for key in partitions
512
+ )
490
513
  max_partition_query = f'select {",".join(partitions)} from "{schema}"."{table}$partitions" where {part_concat} = (select max({part_concat}) from "{schema}"."{table}$partitions")'
491
514
  ret = self.cursor.execute(max_partition_query)
492
515
  max_partition: Dict[str, str] = {}
@@ -500,9 +523,8 @@ class AthenaSource(SQLAlchemySource):
500
523
  partitions=partitions,
501
524
  max_partition=max_partition,
502
525
  )
503
- return partitions
504
526
 
505
- return []
527
+ return partitions
506
528
 
507
529
  # Overwrite to modify the creation of schema fields
508
530
  def get_schema_fields_for_column(
@@ -551,7 +573,9 @@ class AthenaSource(SQLAlchemySource):
551
573
  if partition and partition.max_partition:
552
574
  max_partition_filters = []
553
575
  for key, value in partition.max_partition.items():
554
- max_partition_filters.append(f"CAST({key} as VARCHAR) = '{value}'")
576
+ max_partition_filters.append(
577
+ f"{self._casted_partition_key(key)} = '{value}'"
578
+ )
555
579
  max_partition = str(partition.max_partition)
556
580
  return (
557
581
  max_partition,
@@ -1,5 +1,5 @@
1
1
  import re
2
- from typing import Any, Dict, ValuesView
2
+ from typing import Any, Dict, Optional, Type, Union, ValuesView
3
3
 
4
4
  from datahub.metadata.com.linkedin.pegasus2avro.schema import (
5
5
  ArrayType,
@@ -16,14 +16,28 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
16
16
  UnionType,
17
17
  )
18
18
 
19
- # these can be obtained by running `select format_type(oid, null),* from pg_type;`
20
- # we've omitted the types without a meaningful DataHub type (e.g. postgres-specific types, index vectors, etc.)
21
- # (run `\copy (select format_type(oid, null),* from pg_type) to 'pg_type.csv' csv header;` to get a CSV)
19
+ DATAHUB_FIELD_TYPE = Union[
20
+ ArrayType,
21
+ BooleanType,
22
+ BytesType,
23
+ DateType,
24
+ EnumType,
25
+ MapType,
26
+ NullType,
27
+ NumberType,
28
+ RecordType,
29
+ StringType,
30
+ TimeType,
31
+ UnionType,
32
+ ]
22
33
 
23
- # we map from format_type since this is what dbt uses
24
- # see https://github.com/fishtown-analytics/dbt/blob/master/plugins/postgres/dbt/include/postgres/macros/catalog.sql#L22
25
34
 
26
- # see https://www.npgsql.org/dev/types.html for helpful type annotations
35
+ # These can be obtained by running `select format_type(oid, null),* from pg_type;`
36
+ # We've omitted the types without a meaningful DataHub type (e.g. postgres-specific types, index vectors, etc.)
37
+ # (run `\copy (select format_type(oid, null),* from pg_type) to 'pg_type.csv' csv header;` to get a CSV)
38
+ # We map from format_type since this is what dbt uses.
39
+ # See https://github.com/fishtown-analytics/dbt/blob/master/plugins/postgres/dbt/include/postgres/macros/catalog.sql#L22
40
+ # See https://www.npgsql.org/dev/types.html for helpful type annotations
27
41
  POSTGRES_TYPES_MAP: Dict[str, Any] = {
28
42
  "boolean": BooleanType,
29
43
  "bytea": BytesType,
@@ -262,7 +276,6 @@ def resolve_vertica_modified_type(type_string: str) -> Any:
262
276
  return VERTICA_SQL_TYPES_MAP[type_string]
263
277
 
264
278
 
265
- # see https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html
266
279
  SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
267
280
  "NUMBER": NumberType,
268
281
  "DECIMAL": NumberType,
@@ -298,6 +311,18 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
298
311
  "GEOGRAPHY": None,
299
312
  }
300
313
 
314
+
315
+ def resolve_snowflake_modified_type(type_string: str) -> Any:
316
+ # Match types with precision and scale, e.g., 'DECIMAL(38,0)'
317
+ match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string)
318
+ if match:
319
+ modified_type_base = match.group(1) # Extract the base type
320
+ return SNOWFLAKE_TYPES_MAP.get(modified_type_base, None)
321
+
322
+ # Fallback for types without precision/scale
323
+ return SNOWFLAKE_TYPES_MAP.get(type_string, None)
324
+
325
+
301
326
  # see https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_types.py#L32
302
327
  BIGQUERY_TYPES_MAP: Dict[str, Any] = {
303
328
  "STRING": StringType,
@@ -366,6 +391,7 @@ TRINO_SQL_TYPES_MAP: Dict[str, Any] = {
366
391
  "row": RecordType,
367
392
  "map": MapType,
368
393
  "array": ArrayType,
394
+ "json": RecordType,
369
395
  }
370
396
 
371
397
  # https://docs.aws.amazon.com/athena/latest/ug/data-types.html
@@ -430,3 +456,54 @@ VERTICA_SQL_TYPES_MAP: Dict[str, Any] = {
430
456
  "geography": None,
431
457
  "uuid": StringType,
432
458
  }
459
+
460
+
461
+ _merged_mapping = {
462
+ "boolean": BooleanType,
463
+ "date": DateType,
464
+ "time": TimeType,
465
+ "numeric": NumberType,
466
+ "text": StringType,
467
+ "timestamp with time zone": DateType,
468
+ "timestamp without time zone": DateType,
469
+ "integer": NumberType,
470
+ "float8": NumberType,
471
+ "struct": RecordType,
472
+ **POSTGRES_TYPES_MAP,
473
+ **SNOWFLAKE_TYPES_MAP,
474
+ **BIGQUERY_TYPES_MAP,
475
+ **SPARK_SQL_TYPES_MAP,
476
+ **TRINO_SQL_TYPES_MAP,
477
+ **ATHENA_SQL_TYPES_MAP,
478
+ **VERTICA_SQL_TYPES_MAP,
479
+ }
480
+
481
+
482
+ def resolve_sql_type(
483
+ column_type: Optional[str],
484
+ platform: Optional[str] = None,
485
+ ) -> Optional[DATAHUB_FIELD_TYPE]:
486
+ # In theory, we should use the platform-specific mapping where available.
487
+ # However, the types don't ever conflict, so the merged mapping is fine.
488
+ TypeClass: Optional[Type[DATAHUB_FIELD_TYPE]] = (
489
+ _merged_mapping.get(column_type) if column_type else None
490
+ )
491
+
492
+ if TypeClass is None and column_type:
493
+ # resolve a modified type
494
+ if platform == "trino":
495
+ TypeClass = resolve_trino_modified_type(column_type)
496
+ elif platform == "athena":
497
+ TypeClass = resolve_athena_modified_type(column_type)
498
+ elif platform == "postgres" or platform == "redshift":
499
+ # Redshift uses a variant of Postgres, so we can use the same logic.
500
+ TypeClass = resolve_postgres_modified_type(column_type)
501
+ elif platform == "vertica":
502
+ TypeClass = resolve_vertica_modified_type(column_type)
503
+ elif platform == "snowflake":
504
+ # Snowflake types are uppercase, so we check that.
505
+ TypeClass = resolve_snowflake_modified_type(column_type.upper())
506
+
507
+ if TypeClass:
508
+ return TypeClass()
509
+ return None
@@ -33,6 +33,7 @@ from datahub.metadata.schema_classes import (
33
33
 
34
34
  logger = logging.getLogger(__name__)
35
35
 
36
+ # TODO: (maybe) Replace with standardized types in sql_types.py
36
37
  DATA_TYPE_REGISTRY: dict = {
37
38
  ColumnTypeName.BOOLEAN: BooleanTypeClass,
38
39
  ColumnTypeName.BYTE: BytesTypeClass,
@@ -74,7 +74,7 @@ class AddDatasetTags(DatasetTagsTransformer):
74
74
  logger.debug("Generating tags")
75
75
 
76
76
  for tag_association in self.processed_tags.values():
77
- tag_urn = TagUrn.create_from_string(tag_association.tag)
77
+ tag_urn = TagUrn.from_string(tag_association.tag)
78
78
  mcps.append(
79
79
  MetadataChangeProposalWrapper(
80
80
  entityUrn=tag_urn.urn(),
@@ -100,7 +100,7 @@ class GenericAspectTransformer(
100
100
  )
101
101
  if transformed_aspect:
102
102
  # for end of stream records, we modify the workunit-id
103
- structured_urn = Urn.create_from_string(urn)
103
+ structured_urn = Urn.from_string(urn)
104
104
  simple_name = "-".join(structured_urn.get_entity_id())
105
105
  record_metadata = envelope.metadata.copy()
106
106
  record_metadata.update(
@@ -42,7 +42,7 @@ def get_entity_name(assertion: BaseEntityAssertion) -> Tuple[str, str, str]:
42
42
  if qualified_name is not None:
43
43
  parts = qualified_name.split(".")
44
44
  else:
45
- urn_id = Urn.create_from_string(assertion.entity).entity_ids[1]
45
+ urn_id = Urn.from_string(assertion.entity).entity_ids[1]
46
46
  parts = urn_id.split(".")
47
47
  if len(parts) > 3:
48
48
  parts = parts[-3:]
@@ -609,7 +609,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
609
609
  aspect_map, DataPlatformInstanceClass
610
610
  ) # type: ignore
611
611
 
612
- needs_platform = Urn.create_from_string(entity_urn).get_type() in [
612
+ needs_platform = Urn.from_string(entity_urn).get_type() in [
613
613
  "dataset",
614
614
  "container",
615
615
  "chart",
@@ -617,7 +617,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
617
617
  "dataFlow",
618
618
  "dataJob",
619
619
  ]
620
- entity_urn_parsed = Urn.create_from_string(entity_urn)
620
+ entity_urn_parsed = Urn.from_string(entity_urn)
621
621
  if entity_urn_parsed.get_type() in ["dataFlow", "dataJob"]:
622
622
  self.add_edge(
623
623
  entity_urn,
@@ -630,15 +630,12 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
630
630
  # this is a top-level entity
631
631
  if not dpi:
632
632
  logger.debug(f"No data platform instance for {entity_urn}")
633
- maybe_parent_urn = Urn.create_from_string(entity_urn).get_entity_id()[0]
633
+ maybe_parent_urn = Urn.from_string(entity_urn).get_entity_id()[0]
634
634
  needs_dpi = False
635
635
  if maybe_parent_urn.startswith(Urn.URN_PREFIX):
636
636
  parent_urn = maybe_parent_urn
637
- if (
638
- Urn.create_from_string(maybe_parent_urn).get_type()
639
- == "dataPlatform"
640
- ):
641
- data_platform_urn = DataPlatformUrn.create_from_string(
637
+ if Urn.from_string(maybe_parent_urn).get_type() == "dataPlatform":
638
+ data_platform_urn = DataPlatformUrn.from_string(
642
639
  maybe_parent_urn
643
640
  )
644
641
  needs_dpi = True
@@ -660,7 +657,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
660
657
  logger.error(f"Failed to generate edges entity {entity_urn}", e)
661
658
  parent_urn = str(data_platform_instance_urn)
662
659
  else:
663
- data_platform_urn = DataPlatformUrn.create_from_string(dpi.platform)
660
+ data_platform_urn = DataPlatformUrn.from_string(dpi.platform)
664
661
  data_platform_instance = dpi.instance or "default"
665
662
  data_platform_instance_urn = Urn(
666
663
  entity_type="dataPlatformInstance",
@@ -673,9 +670,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
673
670
  parent_urn = "__root__"
674
671
 
675
672
  types = (
676
- subtypes.typeNames
677
- if subtypes
678
- else [Urn.create_from_string(entity_urn).get_type()]
673
+ subtypes.typeNames if subtypes else [Urn.from_string(entity_urn).get_type()]
679
674
  )
680
675
  for t in types:
681
676
  type_urn = Urn(entity_type="systemNode", entity_id=[parent_urn, t])
@@ -686,7 +681,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
686
681
  def _create_edges_from_data_platform_instance(
687
682
  self, data_platform_instance_urn: Urn
688
683
  ) -> None:
689
- data_platform_urn = DataPlatformUrn.create_from_string(
684
+ data_platform_urn = DataPlatformUrn.from_string(
690
685
  data_platform_instance_urn.get_entity_id()[0]
691
686
  )
692
687
  data_platform_instances_urn = Urn(
@@ -735,7 +730,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
735
730
  if isinstance(aspect, DatasetPropertiesClass):
736
731
  dp: DatasetPropertiesClass = aspect
737
732
  if dp.name:
738
- specific_urn = DatasetUrn.create_from_string(entity_urn)
733
+ specific_urn = DatasetUrn.from_string(entity_urn)
739
734
  if (
740
735
  specific_urn.get_data_platform_urn().get_entity_id_as_string()
741
736
  == "looker"
@@ -755,7 +750,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
755
750
  self.add_edge(entity_urn, "name", cp.name, remove_existing=True)
756
751
  elif isinstance(aspect, DataPlatformInstanceClass):
757
752
  dpi: DataPlatformInstanceClass = aspect
758
- data_platform_urn = DataPlatformUrn.create_from_string(dpi.platform)
753
+ data_platform_urn = DataPlatformUrn.from_string(dpi.platform)
759
754
  data_platform_instance = dpi.instance or "default"
760
755
  data_platform_instance_urn = Urn(
761
756
  entity_type="dataPlatformInstance",
@@ -763,7 +758,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
763
758
  )
764
759
  self._create_edges_from_data_platform_instance(data_platform_instance_urn)
765
760
  elif isinstance(aspect, ChartInfoClass):
766
- urn = Urn.create_from_string(entity_urn)
761
+ urn = Urn.from_string(entity_urn)
767
762
  self.add_edge(
768
763
  entity_urn,
769
764
  "name",
@@ -771,7 +766,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
771
766
  remove_existing=True,
772
767
  )
773
768
  elif isinstance(aspect, DashboardInfoClass):
774
- urn = Urn.create_from_string(entity_urn)
769
+ urn = Urn.from_string(entity_urn)
775
770
  self.add_edge(
776
771
  entity_urn,
777
772
  "name",
datahub/specific/chart.py CHANGED
@@ -1,10 +1,8 @@
1
- import time
2
1
  from typing import Dict, List, Optional, Union
3
2
 
4
3
  from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
5
4
  from datahub.metadata.schema_classes import (
6
5
  AccessLevelClass,
7
- AuditStampClass,
8
6
  ChangeAuditStampsClass,
9
7
  ChartInfoClass as ChartInfo,
10
8
  ChartTypeClass,
@@ -47,43 +45,6 @@ class ChartPatchBuilder(MetadataPatchProposal):
47
45
  )
48
46
  self.ownership_patch_helper = OwnershipPatchHelper(self)
49
47
 
50
- def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
51
- """
52
- Creates an AuditStampClass instance with the current timestamp and other default values.
53
-
54
- Args:
55
- message: The message associated with the audit stamp (optional).
56
-
57
- Returns:
58
- An instance of AuditStampClass.
59
- """
60
- return AuditStampClass(
61
- time=int(time.time() * 1000.0),
62
- actor="urn:li:corpuser:datahub",
63
- message=message,
64
- )
65
-
66
- def _ensure_urn_type(
67
- self, entity_type: str, edges: List[Edge], context: str
68
- ) -> None:
69
- """
70
- Ensures that the destination URNs in the given edges have the specified entity type.
71
-
72
- Args:
73
- entity_type: The entity type to check against.
74
- edges: A list of Edge objects.
75
- context: The context or description of the operation.
76
-
77
- Raises:
78
- ValueError: If any of the destination URNs is not of the specified entity type.
79
- """
80
- for e in edges:
81
- urn = Urn.create_from_string(e.destinationUrn)
82
- if not urn.get_type() == entity_type:
83
- raise ValueError(
84
- f"{context}: {e.destinationUrn} is not of type {entity_type}"
85
- )
86
-
87
48
  def add_owner(self, owner: Owner) -> "ChartPatchBuilder":
88
49
  """
89
50
  Adds an owner to the ChartPatchBuilder.
@@ -1,10 +1,8 @@
1
- import time
2
1
  from typing import Dict, List, Optional, Union
3
2
 
4
3
  from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
5
4
  from datahub.metadata.schema_classes import (
6
5
  AccessLevelClass,
7
- AuditStampClass,
8
6
  ChangeAuditStampsClass,
9
7
  DashboardInfoClass as DashboardInfo,
10
8
  EdgeClass as Edge,
@@ -46,43 +44,6 @@ class DashboardPatchBuilder(MetadataPatchProposal):
46
44
  )
47
45
  self.ownership_patch_helper = OwnershipPatchHelper(self)
48
46
 
49
- def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
50
- """
51
- Creates an AuditStampClass instance with the current timestamp and other default values.
52
-
53
- Args:
54
- message: The message associated with the audit stamp (optional).
55
-
56
- Returns:
57
- An instance of AuditStampClass.
58
- """
59
- return AuditStampClass(
60
- time=int(time.time() * 1000.0),
61
- actor="urn:li:corpuser:datahub",
62
- message=message,
63
- )
64
-
65
- def _ensure_urn_type(
66
- self, entity_type: str, edges: List[Edge], context: str
67
- ) -> None:
68
- """
69
- Ensures that the destination URNs in the given edges have the specified entity type.
70
-
71
- Args:
72
- entity_type: The entity type to check against.
73
- edges: A list of Edge objects.
74
- context: The context or description of the operation.
75
-
76
- Raises:
77
- ValueError: If any of the destination URNs is not of the specified entity type.
78
- """
79
- for e in edges:
80
- urn = Urn.create_from_string(e.destinationUrn)
81
- if not urn.get_type() == entity_type:
82
- raise ValueError(
83
- f"{context}: {e.destinationUrn} is not of type {entity_type}"
84
- )
85
-
86
47
  def add_owner(self, owner: Owner) -> "DashboardPatchBuilder":
87
48
  """
88
49
  Adds an owner to the DashboardPatchBuilder.
@@ -1,9 +1,7 @@
1
- import time
2
1
  from typing import Dict, List, Optional, Union
3
2
 
4
3
  from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
5
4
  from datahub.metadata.schema_classes import (
6
- AuditStampClass,
7
5
  DataJobInfoClass as DataJobInfo,
8
6
  DataJobInputOutputClass as DataJobInputOutput,
9
7
  EdgeClass as Edge,
@@ -16,10 +14,9 @@ from datahub.metadata.schema_classes import (
16
14
  SystemMetadataClass,
17
15
  TagAssociationClass as Tag,
18
16
  )
17
+ from datahub.metadata.urns import SchemaFieldUrn, TagUrn, Urn
19
18
  from datahub.specific.custom_properties import CustomPropertiesPatchHelper
20
19
  from datahub.specific.ownership import OwnershipPatchHelper
21
- from datahub.utilities.urns.tag_urn import TagUrn
22
- from datahub.utilities.urns.urn import Urn
23
20
 
24
21
 
25
22
  class DataJobPatchBuilder(MetadataPatchProposal):
@@ -45,43 +42,6 @@ class DataJobPatchBuilder(MetadataPatchProposal):
45
42
  )
46
43
  self.ownership_patch_helper = OwnershipPatchHelper(self)
47
44
 
48
- def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
49
- """
50
- Creates an AuditStampClass instance with the current timestamp and other default values.
51
-
52
- Args:
53
- message: The message associated with the audit stamp (optional).
54
-
55
- Returns:
56
- An instance of AuditStampClass.
57
- """
58
- return AuditStampClass(
59
- time=int(time.time() * 1000.0),
60
- actor="urn:li:corpuser:datahub",
61
- message=message,
62
- )
63
-
64
- def _ensure_urn_type(
65
- self, entity_type: str, edges: List[Edge], context: str
66
- ) -> None:
67
- """
68
- Ensures that the destination URNs in the given edges have the specified entity type.
69
-
70
- Args:
71
- entity_type: The entity type to check against.
72
- edges: A list of Edge objects.
73
- context: The context or description of the operation.
74
-
75
- Raises:
76
- ValueError: If any of the destination URNs is not of the specified entity type.
77
- """
78
- for e in edges:
79
- urn = Urn.create_from_string(e.destinationUrn)
80
- if not urn.get_type() == entity_type:
81
- raise ValueError(
82
- f"{context}: {e.destinationUrn} is not of type {entity_type}"
83
- )
84
-
85
45
  def add_owner(self, owner: Owner) -> "DataJobPatchBuilder":
86
46
  """
87
47
  Adds an owner to the DataJobPatchBuilder.
@@ -392,9 +352,7 @@ class DataJobPatchBuilder(MetadataPatchProposal):
392
352
  ValueError: If the input is not a Schema Field urn.
393
353
  """
394
354
  input_urn = str(input)
395
- urn = Urn.create_from_string(input_urn)
396
- if not urn.get_type() == "schemaField":
397
- raise ValueError(f"Input {input} is not a Schema Field urn")
355
+ assert SchemaFieldUrn.from_string(input_urn)
398
356
 
399
357
  self._add_patch(
400
358
  DataJobInputOutput.ASPECT_NAME,
@@ -466,9 +424,7 @@ class DataJobPatchBuilder(MetadataPatchProposal):
466
424
  ValueError: If the output is not a Schema Field urn.
467
425
  """
468
426
  output_urn = str(output)
469
- urn = Urn.create_from_string(output_urn)
470
- if not urn.get_type() == "schemaField":
471
- raise ValueError(f"Input {output} is not a Schema Field urn")
427
+ assert SchemaFieldUrn.from_string(output_urn)
472
428
 
473
429
  self._add_patch(
474
430
  DataJobInputOutput.ASPECT_NAME,
@@ -4,7 +4,8 @@ from typing import List
4
4
  # NOTE: Frontend relies on encoding these three characters. Specifically, we decode and encode schema fields for column level lineage.
5
5
  # If this changes, make appropriate changes to datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
6
6
  # We also rely on encoding these exact three characters when generating schemaField urns in our graphQL layer. Update SchemaFieldUtils if this changes.
7
- RESERVED_CHARS = {",", "(", ")"}
7
+ # Also see https://datahubproject.io/docs/what/urn/#restrictions
8
+ RESERVED_CHARS = {",", "(", ")", "␟"}
8
9
  RESERVED_CHARS_EXTENDED = RESERVED_CHARS.union({"%"})
9
10
 
10
11
 
@@ -200,7 +200,7 @@ class Urn:
200
200
  @classmethod
201
201
  @deprecated(reason="no longer needed")
202
202
  def validate(cls, urn_str: str) -> None:
203
- Urn.create_from_string(urn_str)
203
+ Urn.from_string(urn_str)
204
204
 
205
205
  @staticmethod
206
206
  def url_encode(urn: str) -> str:
@@ -4,4 +4,4 @@ __all__ = ["StructuredPropertyUrn", "make_structured_property_urn"]
4
4
 
5
5
 
6
6
  def make_structured_property_urn(structured_property_id: str) -> str:
7
- return str(StructuredPropertyUrn.create_from_string(structured_property_id))
7
+ return str(StructuredPropertyUrn.from_string(structured_property_id))