acryl-datahub 0.15.0.1rc11__py3-none-any.whl → 0.15.0.1rc13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (40) hide show
  1. {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/METADATA +2320 -2324
  2. {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/RECORD +40 -39
  3. datahub/__init__.py +1 -1
  4. datahub/api/circuit_breaker/assertion_circuit_breaker.py +5 -4
  5. datahub/configuration/common.py +2 -5
  6. datahub/emitter/mce_builder.py +17 -1
  7. datahub/emitter/mcp_builder.py +2 -7
  8. datahub/emitter/mcp_patch_builder.py +2 -2
  9. datahub/emitter/rest_emitter.py +2 -2
  10. datahub/ingestion/api/closeable.py +3 -3
  11. datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py +4 -7
  12. datahub/ingestion/api/report.py +4 -1
  13. datahub/ingestion/api/sink.py +4 -3
  14. datahub/ingestion/api/source_helpers.py +2 -6
  15. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +44 -1
  16. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -20
  17. datahub/ingestion/source/datahub/datahub_kafka_reader.py +2 -1
  18. datahub/ingestion/source/gc/dataprocess_cleanup.py +23 -10
  19. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +159 -71
  20. datahub/ingestion/source/s3/source.py +1 -1
  21. datahub/ingestion/source/sql/hive.py +15 -0
  22. datahub/ingestion/source/sql/hive_metastore.py +7 -0
  23. datahub/ingestion/source/sql/mssql/source.py +1 -1
  24. datahub/ingestion/source/sql/sql_common.py +41 -102
  25. datahub/ingestion/source/sql/sql_generic_profiler.py +5 -6
  26. datahub/ingestion/source/sql/sql_report.py +2 -0
  27. datahub/ingestion/source/state/checkpoint.py +2 -1
  28. datahub/ingestion/source/tableau/tableau.py +1 -4
  29. datahub/ingestion/source/unity/proxy.py +8 -27
  30. datahub/metadata/_schema_classes.py +61 -1
  31. datahub/metadata/_urns/urn_defs.py +168 -168
  32. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +4 -0
  33. datahub/metadata/schema.avsc +64 -29
  34. datahub/metadata/schemas/DataJobKey.avsc +2 -1
  35. datahub/metadata/schemas/DataTransformLogic.avsc +63 -0
  36. datahub/utilities/time.py +8 -3
  37. datahub/utilities/urns/_urn_base.py +5 -7
  38. {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/WHEEL +0 -0
  39. {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/entry_points.txt +0 -0
  40. {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,6 @@ from typing import (
11
11
  Dict,
12
12
  Iterable,
13
13
  List,
14
- MutableMapping,
15
14
  Optional,
16
15
  Set,
17
16
  Tuple,
@@ -36,7 +35,6 @@ from datahub.emitter.mce_builder import (
36
35
  make_tag_urn,
37
36
  )
38
37
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
39
- from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
40
38
  from datahub.ingestion.api.common import PipelineContext
41
39
  from datahub.ingestion.api.decorators import capability
42
40
  from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage
@@ -79,7 +77,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
79
77
  StatefulIngestionSourceBase,
80
78
  )
81
79
  from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass
82
- from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage
83
80
  from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
84
81
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
85
82
  from datahub.metadata.com.linkedin.pegasus2avro.schema import (
@@ -106,17 +103,11 @@ from datahub.metadata.schema_classes import (
106
103
  GlobalTagsClass,
107
104
  SubTypesClass,
108
105
  TagAssociationClass,
109
- UpstreamClass,
110
106
  ViewPropertiesClass,
111
107
  )
112
108
  from datahub.sql_parsing.schema_resolver import SchemaResolver
113
- from datahub.sql_parsing.sqlglot_lineage import (
114
- SqlParsingResult,
115
- sqlglot_lineage,
116
- view_definition_lineage_helper,
117
- )
109
+ from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator
118
110
  from datahub.telemetry import telemetry
119
- from datahub.utilities.file_backed_collections import FileBackedDict
120
111
  from datahub.utilities.registries.domain_registry import DomainRegistry
121
112
  from datahub.utilities.sqlalchemy_type_converter import (
122
113
  get_native_data_type_for_sqlalchemy_type,
@@ -347,17 +338,19 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
347
338
  )
348
339
 
349
340
  self.views_failed_parsing: Set[str] = set()
350
- self.schema_resolver: SchemaResolver = SchemaResolver(
341
+
342
+ self.discovered_datasets: Set[str] = set()
343
+ self.aggregator = SqlParsingAggregator(
351
344
  platform=self.platform,
352
345
  platform_instance=self.config.platform_instance,
353
346
  env=self.config.env,
347
+ graph=self.ctx.graph,
348
+ generate_lineage=self.include_lineage,
349
+ generate_usage_statistics=False,
350
+ generate_operations=False,
351
+ eager_graph_load=False,
354
352
  )
355
- self.discovered_datasets: Set[str] = set()
356
- self._view_definition_cache: MutableMapping[str, str]
357
- if self.config.use_file_backed_cache:
358
- self._view_definition_cache = FileBackedDict[str]()
359
- else:
360
- self._view_definition_cache = {}
353
+ self.report.sql_aggregator = self.aggregator.report
361
354
 
362
355
  @classmethod
363
356
  def test_connection(cls, config_dict: dict) -> TestConnectionReport:
@@ -572,36 +565,9 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
572
565
  profile_requests, profiler, platform=self.platform
573
566
  )
574
567
 
575
- if self.config.include_view_lineage:
576
- yield from self.get_view_lineage()
577
-
578
- def get_view_lineage(self) -> Iterable[MetadataWorkUnit]:
579
- builder = SqlParsingBuilder(
580
- generate_lineage=True,
581
- generate_usage_statistics=False,
582
- generate_operations=False,
583
- )
584
- for dataset_name in self._view_definition_cache.keys():
585
- # TODO: Ensure that the lineage generated from the view definition
586
- # matches the dataset_name.
587
- view_definition = self._view_definition_cache[dataset_name]
588
- result = self._run_sql_parser(
589
- dataset_name,
590
- view_definition,
591
- self.schema_resolver,
592
- )
593
- if result and result.out_tables:
594
- # This does not yield any workunits but we use
595
- # yield here to execute this method
596
- yield from builder.process_sql_parsing_result(
597
- result=result,
598
- query=view_definition,
599
- is_view_ddl=True,
600
- include_column_lineage=self.config.include_view_column_lineage,
601
- )
602
- else:
603
- self.views_failed_parsing.add(dataset_name)
604
- yield from builder.gen_workunits()
568
+ # Generate workunit for aggregated SQL parsing results
569
+ for mcp in self.aggregator.gen_metadata():
570
+ yield mcp.as_workunit()
605
571
 
606
572
  def get_identifier(
607
573
  self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any
@@ -760,16 +726,6 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
760
726
  )
761
727
  dataset_snapshot.aspects.append(dataset_properties)
762
728
 
763
- if self.config.include_table_location_lineage and location_urn:
764
- external_upstream_table = UpstreamClass(
765
- dataset=location_urn,
766
- type=DatasetLineageTypeClass.COPY,
767
- )
768
- yield MetadataChangeProposalWrapper(
769
- entityUrn=dataset_snapshot.urn,
770
- aspect=UpstreamLineage(upstreams=[external_upstream_table]),
771
- ).as_workunit()
772
-
773
729
  extra_tags = self.get_extra_tags(inspector, schema, table)
774
730
  pk_constraints: dict = inspector.get_pk_constraint(table, schema)
775
731
  partitions: Optional[List[str]] = self.get_partitions(inspector, schema, table)
@@ -795,7 +751,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
795
751
 
796
752
  dataset_snapshot.aspects.append(schema_metadata)
797
753
  if self._save_schema_to_resolver():
798
- self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata)
754
+ self.aggregator.register_schema(dataset_urn, schema_metadata)
799
755
  self.discovered_datasets.add(dataset_name)
800
756
  db_name = self.get_db_name(inspector)
801
757
 
@@ -815,6 +771,13 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
815
771
  ),
816
772
  )
817
773
 
774
+ if self.config.include_table_location_lineage and location_urn:
775
+ self.aggregator.add_known_lineage_mapping(
776
+ upstream_urn=location_urn,
777
+ downstream_urn=dataset_snapshot.urn,
778
+ lineage_type=DatasetLineageTypeClass.COPY,
779
+ )
780
+
818
781
  if self.config.domain:
819
782
  assert self.domain_registry
820
783
  yield from get_domain_wu(
@@ -1089,6 +1052,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1089
1052
  self.config.platform_instance,
1090
1053
  self.config.env,
1091
1054
  )
1055
+
1092
1056
  try:
1093
1057
  columns = inspector.get_columns(view, schema)
1094
1058
  except KeyError:
@@ -1108,7 +1072,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1108
1072
  canonical_schema=schema_fields,
1109
1073
  )
1110
1074
  if self._save_schema_to_resolver():
1111
- self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata)
1075
+ self.aggregator.register_schema(dataset_urn, schema_metadata)
1112
1076
  self.discovered_datasets.add(dataset_name)
1113
1077
 
1114
1078
  description, properties, _ = self.get_table_properties(inspector, schema, view)
@@ -1117,7 +1081,18 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1117
1081
  view_definition = self._get_view_definition(inspector, schema, view)
1118
1082
  properties["view_definition"] = view_definition
1119
1083
  if view_definition and self.config.include_view_lineage:
1120
- self._view_definition_cache[dataset_name] = view_definition
1084
+ default_db = None
1085
+ default_schema = None
1086
+ try:
1087
+ default_db, default_schema = self.get_db_schema(dataset_name)
1088
+ except ValueError:
1089
+ logger.warning(f"Invalid view identifier: {dataset_name}")
1090
+ self.aggregator.add_view_definition(
1091
+ view_urn=dataset_urn,
1092
+ view_definition=view_definition,
1093
+ default_db=default_db,
1094
+ default_schema=default_schema,
1095
+ )
1121
1096
 
1122
1097
  dataset_snapshot = DatasetSnapshot(
1123
1098
  urn=dataset_urn,
@@ -1169,48 +1144,9 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1169
1144
  hasattr(self.config, "include_lineage") and self.config.include_lineage
1170
1145
  )
1171
1146
 
1172
- def _run_sql_parser(
1173
- self, view_identifier: str, query: str, schema_resolver: SchemaResolver
1174
- ) -> Optional[SqlParsingResult]:
1175
- try:
1176
- database, schema = self.get_db_schema(view_identifier)
1177
- except ValueError:
1178
- logger.warning(f"Invalid view identifier: {view_identifier}")
1179
- return None
1180
- raw_lineage = sqlglot_lineage(
1181
- query,
1182
- schema_resolver=schema_resolver,
1183
- default_db=database,
1184
- default_schema=schema,
1185
- )
1186
- view_urn = make_dataset_urn_with_platform_instance(
1187
- self.platform,
1188
- view_identifier,
1189
- self.config.platform_instance,
1190
- self.config.env,
1191
- )
1192
-
1193
- if raw_lineage.debug_info.table_error:
1194
- logger.debug(
1195
- f"Failed to parse lineage for view {view_identifier}: "
1196
- f"{raw_lineage.debug_info.table_error}"
1197
- )
1198
- self.report.num_view_definitions_failed_parsing += 1
1199
- self.report.view_definitions_parsing_failures.append(
1200
- f"Table-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.table_error}"
1201
- )
1202
- return None
1203
-
1204
- elif raw_lineage.debug_info.column_error:
1205
- self.report.num_view_definitions_failed_column_parsing += 1
1206
- self.report.view_definitions_parsing_failures.append(
1207
- f"Column-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.column_error}"
1208
- )
1209
- else:
1210
- self.report.num_view_definitions_parsed += 1
1211
- if raw_lineage.out_tables != [view_urn]:
1212
- self.report.num_view_definitions_view_urn_mismatch += 1
1213
- return view_definition_lineage_helper(raw_lineage, view_urn)
1147
+ @property
1148
+ def include_lineage(self):
1149
+ return self.config.include_view_lineage
1214
1150
 
1215
1151
  def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]:
1216
1152
  database, schema, _view = dataset_identifier.split(".", 2)
@@ -1411,5 +1347,8 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1411
1347
  schema=schema, table=table, partition=partition, custom_sql=custom_sql
1412
1348
  )
1413
1349
 
1350
+ def get_schema_resolver(self) -> SchemaResolver:
1351
+ return self.aggregator._schema_resolver
1352
+
1414
1353
  def get_report(self):
1415
1354
  return self.report
@@ -7,7 +7,10 @@ from typing import Dict, Iterable, List, Optional, Union, cast
7
7
  from sqlalchemy import create_engine, inspect
8
8
  from sqlalchemy.engine.reflection import Inspector
9
9
 
10
- from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
10
+ from datahub.emitter.mce_builder import (
11
+ make_dataset_urn_with_platform_instance,
12
+ parse_ts_millis,
13
+ )
11
14
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
12
15
  from datahub.ingestion.api.workunit import MetadataWorkUnit
13
16
  from datahub.ingestion.source.ge_data_profiler import (
@@ -245,11 +248,7 @@ class GenericProfiler:
245
248
  # If profiling state exists we have to carry over to the new state
246
249
  self.state_handler.add_to_state(dataset_urn, last_profiled)
247
250
 
248
- threshold_time: Optional[datetime] = (
249
- datetime.fromtimestamp(last_profiled / 1000, timezone.utc)
250
- if last_profiled
251
- else None
252
- )
251
+ threshold_time: Optional[datetime] = parse_ts_millis(last_profiled)
253
252
  if (
254
253
  not threshold_time
255
254
  and self.config.profiling.profile_if_updated_since_days is not None
@@ -5,6 +5,7 @@ from datahub.ingestion.glossary.classification_mixin import ClassificationReport
5
5
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
6
6
  StaleEntityRemovalSourceReport,
7
7
  )
8
+ from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport
8
9
  from datahub.utilities.lossy_collections import LossyList
9
10
  from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport
10
11
  from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
@@ -52,6 +53,7 @@ class SQLSourceReport(
52
53
  num_view_definitions_failed_parsing: int = 0
53
54
  num_view_definitions_failed_column_parsing: int = 0
54
55
  view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList)
56
+ sql_aggregator: Optional[SqlAggregatorReport] = None
55
57
 
56
58
  def report_entity_scanned(self, name: str, ent_type: str = "table") -> None:
57
59
  """
@@ -12,6 +12,7 @@ from typing import Callable, Generic, Optional, Type, TypeVar
12
12
  import pydantic
13
13
 
14
14
  from datahub.configuration.common import ConfigModel
15
+ from datahub.emitter.mce_builder import parse_ts_millis
15
16
  from datahub.metadata.schema_classes import (
16
17
  DatahubIngestionCheckpointClass,
17
18
  IngestionCheckpointStateClass,
@@ -144,7 +145,7 @@ class Checkpoint(Generic[StateType]):
144
145
  )
145
146
  logger.info(
146
147
  f"Successfully constructed last checkpoint state for job {job_name} "
147
- f"with timestamp {datetime.fromtimestamp(checkpoint_aspect.timestampMillis/1000, tz=timezone.utc)}"
148
+ f"with timestamp {parse_ts_millis(checkpoint_aspect.timestampMillis)}"
148
149
  )
149
150
  return checkpoint
150
151
  return None
@@ -920,10 +920,7 @@ class TableauSiteSource:
920
920
  return f"/{self.config.env.lower()}{self.no_env_browse_prefix}"
921
921
 
922
922
  def _re_authenticate(self) -> None:
923
- self.report.info(
924
- message="Re-authenticating to Tableau",
925
- context=f"site='{self.site_content_url}'",
926
- )
923
+ logger.info(f"Re-authenticating to Tableau site '{self.site_content_url}'")
927
924
  # Sign-in again may not be enough because Tableau sometimes caches invalid sessions
928
925
  # so we need to recreate the Tableau Server object
929
926
  self.server = self.config.make_tableau_client(self.site_content_url)
@@ -4,7 +4,7 @@ Manage the communication with DataBricks Server and provide equivalent dataclass
4
4
 
5
5
  import dataclasses
6
6
  import logging
7
- from datetime import datetime, timezone
7
+ from datetime import datetime
8
8
  from typing import Any, Dict, Iterable, List, Optional, Union, cast
9
9
  from unittest.mock import patch
10
10
 
@@ -27,6 +27,7 @@ from databricks.sdk.service.sql import (
27
27
  from databricks.sdk.service.workspace import ObjectType
28
28
 
29
29
  import datahub
30
+ from datahub.emitter.mce_builder import parse_ts_millis
30
31
  from datahub.ingestion.source.unity.hive_metastore_proxy import HiveMetastoreProxy
31
32
  from datahub.ingestion.source.unity.proxy_profiling import (
32
33
  UnityCatalogProxyProfilingMixin,
@@ -211,16 +212,8 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
211
212
  id=obj.object_id,
212
213
  path=obj.path,
213
214
  language=obj.language,
214
- created_at=(
215
- datetime.fromtimestamp(obj.created_at / 1000, tz=timezone.utc)
216
- if obj.created_at
217
- else None
218
- ),
219
- modified_at=(
220
- datetime.fromtimestamp(obj.modified_at / 1000, tz=timezone.utc)
221
- if obj.modified_at
222
- else None
223
- ),
215
+ created_at=parse_ts_millis(obj.created_at),
216
+ modified_at=parse_ts_millis(obj.modified_at),
224
217
  )
225
218
 
226
219
  def query_history(
@@ -452,17 +445,9 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
452
445
  properties=obj.properties or {},
453
446
  owner=obj.owner,
454
447
  generation=obj.generation,
455
- created_at=(
456
- datetime.fromtimestamp(obj.created_at / 1000, tz=timezone.utc)
457
- if obj.created_at
458
- else None
459
- ),
448
+ created_at=(parse_ts_millis(obj.created_at) if obj.created_at else None),
460
449
  created_by=obj.created_by,
461
- updated_at=(
462
- datetime.fromtimestamp(obj.updated_at / 1000, tz=timezone.utc)
463
- if obj.updated_at
464
- else None
465
- ),
450
+ updated_at=(parse_ts_millis(obj.updated_at) if obj.updated_at else None),
466
451
  updated_by=obj.updated_by,
467
452
  table_id=obj.table_id,
468
453
  comment=obj.comment,
@@ -500,12 +485,8 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
500
485
  query_id=info.query_id,
501
486
  query_text=info.query_text,
502
487
  statement_type=info.statement_type,
503
- start_time=datetime.fromtimestamp(
504
- info.query_start_time_ms / 1000, tz=timezone.utc
505
- ),
506
- end_time=datetime.fromtimestamp(
507
- info.query_end_time_ms / 1000, tz=timezone.utc
508
- ),
488
+ start_time=parse_ts_millis(info.query_start_time_ms),
489
+ end_time=parse_ts_millis(info.query_end_time_ms),
509
490
  user_id=info.user_id,
510
491
  user_name=info.user_name,
511
492
  executed_as_user_id=info.executed_as_user_id,
@@ -4053,6 +4053,60 @@ class DataPlatformInstanceClass(_Aspect):
4053
4053
  self._inner_dict['instance'] = value
4054
4054
 
4055
4055
 
4056
+ class DataTransformClass(DictWrapper):
4057
+ """Information about a transformation. It may be a query,"""
4058
+
4059
+ RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.DataTransform")
4060
+ def __init__(self,
4061
+ queryStatement: Union[None, "QueryStatementClass"]=None,
4062
+ ):
4063
+ super().__init__()
4064
+
4065
+ self.queryStatement = queryStatement
4066
+
4067
+ def _restore_defaults(self) -> None:
4068
+ self.queryStatement = self.RECORD_SCHEMA.fields_dict["queryStatement"].default
4069
+
4070
+
4071
+ @property
4072
+ def queryStatement(self) -> Union[None, "QueryStatementClass"]:
4073
+ """The data transform may be defined by a query statement"""
4074
+ return self._inner_dict.get('queryStatement') # type: ignore
4075
+
4076
+ @queryStatement.setter
4077
+ def queryStatement(self, value: Union[None, "QueryStatementClass"]) -> None:
4078
+ self._inner_dict['queryStatement'] = value
4079
+
4080
+
4081
+ class DataTransformLogicClass(_Aspect):
4082
+ """Information about a Query against one or more data assets (e.g. Tables or Views)."""
4083
+
4084
+
4085
+ ASPECT_NAME = 'dataTransformLogic'
4086
+ ASPECT_INFO = {}
4087
+ RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.DataTransformLogic")
4088
+
4089
+ def __init__(self,
4090
+ transforms: List["DataTransformClass"],
4091
+ ):
4092
+ super().__init__()
4093
+
4094
+ self.transforms = transforms
4095
+
4096
+ def _restore_defaults(self) -> None:
4097
+ self.transforms = list()
4098
+
4099
+
4100
+ @property
4101
+ def transforms(self) -> List["DataTransformClass"]:
4102
+ """List of transformations applied"""
4103
+ return self._inner_dict.get('transforms') # type: ignore
4104
+
4105
+ @transforms.setter
4106
+ def transforms(self, value: List["DataTransformClass"]) -> None:
4107
+ self._inner_dict['transforms'] = value
4108
+
4109
+
4056
4110
  class DeprecationClass(_Aspect):
4057
4111
  """Deprecation status of an entity"""
4058
4112
 
@@ -14624,7 +14678,7 @@ class DataJobKeyClass(_Aspect):
14624
14678
 
14625
14679
 
14626
14680
  ASPECT_NAME = 'dataJobKey'
14627
- ASPECT_INFO = {'keyForEntity': 'dataJob', 'entityCategory': '_unset_', 'entityAspects': ['datahubIngestionRunSummary', 'datahubIngestionCheckpoint', 'domains', 'deprecation', 'versionInfo', 'dataJobInfo', 'dataJobInputOutput', 'editableDataJobProperties', 'ownership', 'status', 'globalTags', 'browsePaths', 'glossaryTerms', 'institutionalMemory', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'subTypes', 'incidentsSummary', 'testResults']}
14681
+ ASPECT_INFO = {'keyForEntity': 'dataJob', 'entityCategory': '_unset_', 'entityAspects': ['datahubIngestionRunSummary', 'datahubIngestionCheckpoint', 'domains', 'deprecation', 'versionInfo', 'dataJobInfo', 'dataJobInputOutput', 'editableDataJobProperties', 'ownership', 'status', 'globalTags', 'browsePaths', 'glossaryTerms', 'institutionalMemory', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'subTypes', 'incidentsSummary', 'testResults', 'dataTransformLogic']}
14628
14682
  RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataJobKey")
14629
14683
 
14630
14684
  def __init__(self,
@@ -24715,6 +24769,8 @@ __SCHEMA_TYPES = {
24715
24769
  'com.linkedin.pegasus2avro.common.CostCostDiscriminator': CostCostDiscriminatorClass,
24716
24770
  'com.linkedin.pegasus2avro.common.CostType': CostTypeClass,
24717
24771
  'com.linkedin.pegasus2avro.common.DataPlatformInstance': DataPlatformInstanceClass,
24772
+ 'com.linkedin.pegasus2avro.common.DataTransform': DataTransformClass,
24773
+ 'com.linkedin.pegasus2avro.common.DataTransformLogic': DataTransformLogicClass,
24718
24774
  'com.linkedin.pegasus2avro.common.Deprecation': DeprecationClass,
24719
24775
  'com.linkedin.pegasus2avro.common.Documentation': DocumentationClass,
24720
24776
  'com.linkedin.pegasus2avro.common.DocumentationAssociation': DocumentationAssociationClass,
@@ -25182,6 +25238,8 @@ __SCHEMA_TYPES = {
25182
25238
  'CostCostDiscriminator': CostCostDiscriminatorClass,
25183
25239
  'CostType': CostTypeClass,
25184
25240
  'DataPlatformInstance': DataPlatformInstanceClass,
25241
+ 'DataTransform': DataTransformClass,
25242
+ 'DataTransformLogic': DataTransformLogicClass,
25185
25243
  'Deprecation': DeprecationClass,
25186
25244
  'Documentation': DocumentationClass,
25187
25245
  'DocumentationAssociation': DocumentationAssociationClass,
@@ -25588,6 +25646,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
25588
25646
  CostClass,
25589
25647
  BrowsePathsClass,
25590
25648
  InstitutionalMemoryClass,
25649
+ DataTransformLogicClass,
25591
25650
  SubTypesClass,
25592
25651
  FormsClass,
25593
25652
  DeprecationClass,
@@ -25802,6 +25861,7 @@ class AspectBag(TypedDict, total=False):
25802
25861
  cost: CostClass
25803
25862
  browsePaths: BrowsePathsClass
25804
25863
  institutionalMemory: InstitutionalMemoryClass
25864
+ dataTransformLogic: DataTransformLogicClass
25805
25865
  subTypes: SubTypesClass
25806
25866
  forms: FormsClass
25807
25867
  deprecation: DeprecationClass