acryl-datahub 0.15.0.1rc11__py3-none-any.whl → 0.15.0.1rc13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/METADATA +2320 -2324
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/RECORD +40 -39
- datahub/__init__.py +1 -1
- datahub/api/circuit_breaker/assertion_circuit_breaker.py +5 -4
- datahub/configuration/common.py +2 -5
- datahub/emitter/mce_builder.py +17 -1
- datahub/emitter/mcp_builder.py +2 -7
- datahub/emitter/mcp_patch_builder.py +2 -2
- datahub/emitter/rest_emitter.py +2 -2
- datahub/ingestion/api/closeable.py +3 -3
- datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py +4 -7
- datahub/ingestion/api/report.py +4 -1
- datahub/ingestion/api/sink.py +4 -3
- datahub/ingestion/api/source_helpers.py +2 -6
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +44 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -20
- datahub/ingestion/source/datahub/datahub_kafka_reader.py +2 -1
- datahub/ingestion/source/gc/dataprocess_cleanup.py +23 -10
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +159 -71
- datahub/ingestion/source/s3/source.py +1 -1
- datahub/ingestion/source/sql/hive.py +15 -0
- datahub/ingestion/source/sql/hive_metastore.py +7 -0
- datahub/ingestion/source/sql/mssql/source.py +1 -1
- datahub/ingestion/source/sql/sql_common.py +41 -102
- datahub/ingestion/source/sql/sql_generic_profiler.py +5 -6
- datahub/ingestion/source/sql/sql_report.py +2 -0
- datahub/ingestion/source/state/checkpoint.py +2 -1
- datahub/ingestion/source/tableau/tableau.py +1 -4
- datahub/ingestion/source/unity/proxy.py +8 -27
- datahub/metadata/_schema_classes.py +61 -1
- datahub/metadata/_urns/urn_defs.py +168 -168
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +4 -0
- datahub/metadata/schema.avsc +64 -29
- datahub/metadata/schemas/DataJobKey.avsc +2 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +63 -0
- datahub/utilities/time.py +8 -3
- datahub/utilities/urns/_urn_base.py +5 -7
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/top_level.txt +0 -0
|
@@ -11,7 +11,6 @@ from typing import (
|
|
|
11
11
|
Dict,
|
|
12
12
|
Iterable,
|
|
13
13
|
List,
|
|
14
|
-
MutableMapping,
|
|
15
14
|
Optional,
|
|
16
15
|
Set,
|
|
17
16
|
Tuple,
|
|
@@ -36,7 +35,6 @@ from datahub.emitter.mce_builder import (
|
|
|
36
35
|
make_tag_urn,
|
|
37
36
|
)
|
|
38
37
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
39
|
-
from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
|
|
40
38
|
from datahub.ingestion.api.common import PipelineContext
|
|
41
39
|
from datahub.ingestion.api.decorators import capability
|
|
42
40
|
from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage
|
|
@@ -79,7 +77,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
79
77
|
StatefulIngestionSourceBase,
|
|
80
78
|
)
|
|
81
79
|
from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass
|
|
82
|
-
from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage
|
|
83
80
|
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
|
|
84
81
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
85
82
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
@@ -106,17 +103,11 @@ from datahub.metadata.schema_classes import (
|
|
|
106
103
|
GlobalTagsClass,
|
|
107
104
|
SubTypesClass,
|
|
108
105
|
TagAssociationClass,
|
|
109
|
-
UpstreamClass,
|
|
110
106
|
ViewPropertiesClass,
|
|
111
107
|
)
|
|
112
108
|
from datahub.sql_parsing.schema_resolver import SchemaResolver
|
|
113
|
-
from datahub.sql_parsing.
|
|
114
|
-
SqlParsingResult,
|
|
115
|
-
sqlglot_lineage,
|
|
116
|
-
view_definition_lineage_helper,
|
|
117
|
-
)
|
|
109
|
+
from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator
|
|
118
110
|
from datahub.telemetry import telemetry
|
|
119
|
-
from datahub.utilities.file_backed_collections import FileBackedDict
|
|
120
111
|
from datahub.utilities.registries.domain_registry import DomainRegistry
|
|
121
112
|
from datahub.utilities.sqlalchemy_type_converter import (
|
|
122
113
|
get_native_data_type_for_sqlalchemy_type,
|
|
@@ -347,17 +338,19 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
347
338
|
)
|
|
348
339
|
|
|
349
340
|
self.views_failed_parsing: Set[str] = set()
|
|
350
|
-
|
|
341
|
+
|
|
342
|
+
self.discovered_datasets: Set[str] = set()
|
|
343
|
+
self.aggregator = SqlParsingAggregator(
|
|
351
344
|
platform=self.platform,
|
|
352
345
|
platform_instance=self.config.platform_instance,
|
|
353
346
|
env=self.config.env,
|
|
347
|
+
graph=self.ctx.graph,
|
|
348
|
+
generate_lineage=self.include_lineage,
|
|
349
|
+
generate_usage_statistics=False,
|
|
350
|
+
generate_operations=False,
|
|
351
|
+
eager_graph_load=False,
|
|
354
352
|
)
|
|
355
|
-
self.
|
|
356
|
-
self._view_definition_cache: MutableMapping[str, str]
|
|
357
|
-
if self.config.use_file_backed_cache:
|
|
358
|
-
self._view_definition_cache = FileBackedDict[str]()
|
|
359
|
-
else:
|
|
360
|
-
self._view_definition_cache = {}
|
|
353
|
+
self.report.sql_aggregator = self.aggregator.report
|
|
361
354
|
|
|
362
355
|
@classmethod
|
|
363
356
|
def test_connection(cls, config_dict: dict) -> TestConnectionReport:
|
|
@@ -572,36 +565,9 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
572
565
|
profile_requests, profiler, platform=self.platform
|
|
573
566
|
)
|
|
574
567
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
def get_view_lineage(self) -> Iterable[MetadataWorkUnit]:
|
|
579
|
-
builder = SqlParsingBuilder(
|
|
580
|
-
generate_lineage=True,
|
|
581
|
-
generate_usage_statistics=False,
|
|
582
|
-
generate_operations=False,
|
|
583
|
-
)
|
|
584
|
-
for dataset_name in self._view_definition_cache.keys():
|
|
585
|
-
# TODO: Ensure that the lineage generated from the view definition
|
|
586
|
-
# matches the dataset_name.
|
|
587
|
-
view_definition = self._view_definition_cache[dataset_name]
|
|
588
|
-
result = self._run_sql_parser(
|
|
589
|
-
dataset_name,
|
|
590
|
-
view_definition,
|
|
591
|
-
self.schema_resolver,
|
|
592
|
-
)
|
|
593
|
-
if result and result.out_tables:
|
|
594
|
-
# This does not yield any workunits but we use
|
|
595
|
-
# yield here to execute this method
|
|
596
|
-
yield from builder.process_sql_parsing_result(
|
|
597
|
-
result=result,
|
|
598
|
-
query=view_definition,
|
|
599
|
-
is_view_ddl=True,
|
|
600
|
-
include_column_lineage=self.config.include_view_column_lineage,
|
|
601
|
-
)
|
|
602
|
-
else:
|
|
603
|
-
self.views_failed_parsing.add(dataset_name)
|
|
604
|
-
yield from builder.gen_workunits()
|
|
568
|
+
# Generate workunit for aggregated SQL parsing results
|
|
569
|
+
for mcp in self.aggregator.gen_metadata():
|
|
570
|
+
yield mcp.as_workunit()
|
|
605
571
|
|
|
606
572
|
def get_identifier(
|
|
607
573
|
self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any
|
|
@@ -760,16 +726,6 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
760
726
|
)
|
|
761
727
|
dataset_snapshot.aspects.append(dataset_properties)
|
|
762
728
|
|
|
763
|
-
if self.config.include_table_location_lineage and location_urn:
|
|
764
|
-
external_upstream_table = UpstreamClass(
|
|
765
|
-
dataset=location_urn,
|
|
766
|
-
type=DatasetLineageTypeClass.COPY,
|
|
767
|
-
)
|
|
768
|
-
yield MetadataChangeProposalWrapper(
|
|
769
|
-
entityUrn=dataset_snapshot.urn,
|
|
770
|
-
aspect=UpstreamLineage(upstreams=[external_upstream_table]),
|
|
771
|
-
).as_workunit()
|
|
772
|
-
|
|
773
729
|
extra_tags = self.get_extra_tags(inspector, schema, table)
|
|
774
730
|
pk_constraints: dict = inspector.get_pk_constraint(table, schema)
|
|
775
731
|
partitions: Optional[List[str]] = self.get_partitions(inspector, schema, table)
|
|
@@ -795,7 +751,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
795
751
|
|
|
796
752
|
dataset_snapshot.aspects.append(schema_metadata)
|
|
797
753
|
if self._save_schema_to_resolver():
|
|
798
|
-
self.
|
|
754
|
+
self.aggregator.register_schema(dataset_urn, schema_metadata)
|
|
799
755
|
self.discovered_datasets.add(dataset_name)
|
|
800
756
|
db_name = self.get_db_name(inspector)
|
|
801
757
|
|
|
@@ -815,6 +771,13 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
815
771
|
),
|
|
816
772
|
)
|
|
817
773
|
|
|
774
|
+
if self.config.include_table_location_lineage and location_urn:
|
|
775
|
+
self.aggregator.add_known_lineage_mapping(
|
|
776
|
+
upstream_urn=location_urn,
|
|
777
|
+
downstream_urn=dataset_snapshot.urn,
|
|
778
|
+
lineage_type=DatasetLineageTypeClass.COPY,
|
|
779
|
+
)
|
|
780
|
+
|
|
818
781
|
if self.config.domain:
|
|
819
782
|
assert self.domain_registry
|
|
820
783
|
yield from get_domain_wu(
|
|
@@ -1089,6 +1052,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1089
1052
|
self.config.platform_instance,
|
|
1090
1053
|
self.config.env,
|
|
1091
1054
|
)
|
|
1055
|
+
|
|
1092
1056
|
try:
|
|
1093
1057
|
columns = inspector.get_columns(view, schema)
|
|
1094
1058
|
except KeyError:
|
|
@@ -1108,7 +1072,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1108
1072
|
canonical_schema=schema_fields,
|
|
1109
1073
|
)
|
|
1110
1074
|
if self._save_schema_to_resolver():
|
|
1111
|
-
self.
|
|
1075
|
+
self.aggregator.register_schema(dataset_urn, schema_metadata)
|
|
1112
1076
|
self.discovered_datasets.add(dataset_name)
|
|
1113
1077
|
|
|
1114
1078
|
description, properties, _ = self.get_table_properties(inspector, schema, view)
|
|
@@ -1117,7 +1081,18 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1117
1081
|
view_definition = self._get_view_definition(inspector, schema, view)
|
|
1118
1082
|
properties["view_definition"] = view_definition
|
|
1119
1083
|
if view_definition and self.config.include_view_lineage:
|
|
1120
|
-
|
|
1084
|
+
default_db = None
|
|
1085
|
+
default_schema = None
|
|
1086
|
+
try:
|
|
1087
|
+
default_db, default_schema = self.get_db_schema(dataset_name)
|
|
1088
|
+
except ValueError:
|
|
1089
|
+
logger.warning(f"Invalid view identifier: {dataset_name}")
|
|
1090
|
+
self.aggregator.add_view_definition(
|
|
1091
|
+
view_urn=dataset_urn,
|
|
1092
|
+
view_definition=view_definition,
|
|
1093
|
+
default_db=default_db,
|
|
1094
|
+
default_schema=default_schema,
|
|
1095
|
+
)
|
|
1121
1096
|
|
|
1122
1097
|
dataset_snapshot = DatasetSnapshot(
|
|
1123
1098
|
urn=dataset_urn,
|
|
@@ -1169,48 +1144,9 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1169
1144
|
hasattr(self.config, "include_lineage") and self.config.include_lineage
|
|
1170
1145
|
)
|
|
1171
1146
|
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
try:
|
|
1176
|
-
database, schema = self.get_db_schema(view_identifier)
|
|
1177
|
-
except ValueError:
|
|
1178
|
-
logger.warning(f"Invalid view identifier: {view_identifier}")
|
|
1179
|
-
return None
|
|
1180
|
-
raw_lineage = sqlglot_lineage(
|
|
1181
|
-
query,
|
|
1182
|
-
schema_resolver=schema_resolver,
|
|
1183
|
-
default_db=database,
|
|
1184
|
-
default_schema=schema,
|
|
1185
|
-
)
|
|
1186
|
-
view_urn = make_dataset_urn_with_platform_instance(
|
|
1187
|
-
self.platform,
|
|
1188
|
-
view_identifier,
|
|
1189
|
-
self.config.platform_instance,
|
|
1190
|
-
self.config.env,
|
|
1191
|
-
)
|
|
1192
|
-
|
|
1193
|
-
if raw_lineage.debug_info.table_error:
|
|
1194
|
-
logger.debug(
|
|
1195
|
-
f"Failed to parse lineage for view {view_identifier}: "
|
|
1196
|
-
f"{raw_lineage.debug_info.table_error}"
|
|
1197
|
-
)
|
|
1198
|
-
self.report.num_view_definitions_failed_parsing += 1
|
|
1199
|
-
self.report.view_definitions_parsing_failures.append(
|
|
1200
|
-
f"Table-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.table_error}"
|
|
1201
|
-
)
|
|
1202
|
-
return None
|
|
1203
|
-
|
|
1204
|
-
elif raw_lineage.debug_info.column_error:
|
|
1205
|
-
self.report.num_view_definitions_failed_column_parsing += 1
|
|
1206
|
-
self.report.view_definitions_parsing_failures.append(
|
|
1207
|
-
f"Column-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.column_error}"
|
|
1208
|
-
)
|
|
1209
|
-
else:
|
|
1210
|
-
self.report.num_view_definitions_parsed += 1
|
|
1211
|
-
if raw_lineage.out_tables != [view_urn]:
|
|
1212
|
-
self.report.num_view_definitions_view_urn_mismatch += 1
|
|
1213
|
-
return view_definition_lineage_helper(raw_lineage, view_urn)
|
|
1147
|
+
@property
|
|
1148
|
+
def include_lineage(self):
|
|
1149
|
+
return self.config.include_view_lineage
|
|
1214
1150
|
|
|
1215
1151
|
def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]:
|
|
1216
1152
|
database, schema, _view = dataset_identifier.split(".", 2)
|
|
@@ -1411,5 +1347,8 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1411
1347
|
schema=schema, table=table, partition=partition, custom_sql=custom_sql
|
|
1412
1348
|
)
|
|
1413
1349
|
|
|
1350
|
+
def get_schema_resolver(self) -> SchemaResolver:
|
|
1351
|
+
return self.aggregator._schema_resolver
|
|
1352
|
+
|
|
1414
1353
|
def get_report(self):
|
|
1415
1354
|
return self.report
|
|
@@ -7,7 +7,10 @@ from typing import Dict, Iterable, List, Optional, Union, cast
|
|
|
7
7
|
from sqlalchemy import create_engine, inspect
|
|
8
8
|
from sqlalchemy.engine.reflection import Inspector
|
|
9
9
|
|
|
10
|
-
from datahub.emitter.mce_builder import
|
|
10
|
+
from datahub.emitter.mce_builder import (
|
|
11
|
+
make_dataset_urn_with_platform_instance,
|
|
12
|
+
parse_ts_millis,
|
|
13
|
+
)
|
|
11
14
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
12
15
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
13
16
|
from datahub.ingestion.source.ge_data_profiler import (
|
|
@@ -245,11 +248,7 @@ class GenericProfiler:
|
|
|
245
248
|
# If profiling state exists we have to carry over to the new state
|
|
246
249
|
self.state_handler.add_to_state(dataset_urn, last_profiled)
|
|
247
250
|
|
|
248
|
-
threshold_time: Optional[datetime] = (
|
|
249
|
-
datetime.fromtimestamp(last_profiled / 1000, timezone.utc)
|
|
250
|
-
if last_profiled
|
|
251
|
-
else None
|
|
252
|
-
)
|
|
251
|
+
threshold_time: Optional[datetime] = parse_ts_millis(last_profiled)
|
|
253
252
|
if (
|
|
254
253
|
not threshold_time
|
|
255
254
|
and self.config.profiling.profile_if_updated_since_days is not None
|
|
@@ -5,6 +5,7 @@ from datahub.ingestion.glossary.classification_mixin import ClassificationReport
|
|
|
5
5
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
6
6
|
StaleEntityRemovalSourceReport,
|
|
7
7
|
)
|
|
8
|
+
from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport
|
|
8
9
|
from datahub.utilities.lossy_collections import LossyList
|
|
9
10
|
from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport
|
|
10
11
|
from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
|
|
@@ -52,6 +53,7 @@ class SQLSourceReport(
|
|
|
52
53
|
num_view_definitions_failed_parsing: int = 0
|
|
53
54
|
num_view_definitions_failed_column_parsing: int = 0
|
|
54
55
|
view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList)
|
|
56
|
+
sql_aggregator: Optional[SqlAggregatorReport] = None
|
|
55
57
|
|
|
56
58
|
def report_entity_scanned(self, name: str, ent_type: str = "table") -> None:
|
|
57
59
|
"""
|
|
@@ -12,6 +12,7 @@ from typing import Callable, Generic, Optional, Type, TypeVar
|
|
|
12
12
|
import pydantic
|
|
13
13
|
|
|
14
14
|
from datahub.configuration.common import ConfigModel
|
|
15
|
+
from datahub.emitter.mce_builder import parse_ts_millis
|
|
15
16
|
from datahub.metadata.schema_classes import (
|
|
16
17
|
DatahubIngestionCheckpointClass,
|
|
17
18
|
IngestionCheckpointStateClass,
|
|
@@ -144,7 +145,7 @@ class Checkpoint(Generic[StateType]):
|
|
|
144
145
|
)
|
|
145
146
|
logger.info(
|
|
146
147
|
f"Successfully constructed last checkpoint state for job {job_name} "
|
|
147
|
-
f"with timestamp {
|
|
148
|
+
f"with timestamp {parse_ts_millis(checkpoint_aspect.timestampMillis)}"
|
|
148
149
|
)
|
|
149
150
|
return checkpoint
|
|
150
151
|
return None
|
|
@@ -920,10 +920,7 @@ class TableauSiteSource:
|
|
|
920
920
|
return f"/{self.config.env.lower()}{self.no_env_browse_prefix}"
|
|
921
921
|
|
|
922
922
|
def _re_authenticate(self) -> None:
|
|
923
|
-
|
|
924
|
-
message="Re-authenticating to Tableau",
|
|
925
|
-
context=f"site='{self.site_content_url}'",
|
|
926
|
-
)
|
|
923
|
+
logger.info(f"Re-authenticating to Tableau site '{self.site_content_url}'")
|
|
927
924
|
# Sign-in again may not be enough because Tableau sometimes caches invalid sessions
|
|
928
925
|
# so we need to recreate the Tableau Server object
|
|
929
926
|
self.server = self.config.make_tableau_client(self.site_content_url)
|
|
@@ -4,7 +4,7 @@ Manage the communication with DataBricks Server and provide equivalent dataclass
|
|
|
4
4
|
|
|
5
5
|
import dataclasses
|
|
6
6
|
import logging
|
|
7
|
-
from datetime import datetime
|
|
7
|
+
from datetime import datetime
|
|
8
8
|
from typing import Any, Dict, Iterable, List, Optional, Union, cast
|
|
9
9
|
from unittest.mock import patch
|
|
10
10
|
|
|
@@ -27,6 +27,7 @@ from databricks.sdk.service.sql import (
|
|
|
27
27
|
from databricks.sdk.service.workspace import ObjectType
|
|
28
28
|
|
|
29
29
|
import datahub
|
|
30
|
+
from datahub.emitter.mce_builder import parse_ts_millis
|
|
30
31
|
from datahub.ingestion.source.unity.hive_metastore_proxy import HiveMetastoreProxy
|
|
31
32
|
from datahub.ingestion.source.unity.proxy_profiling import (
|
|
32
33
|
UnityCatalogProxyProfilingMixin,
|
|
@@ -211,16 +212,8 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
211
212
|
id=obj.object_id,
|
|
212
213
|
path=obj.path,
|
|
213
214
|
language=obj.language,
|
|
214
|
-
created_at=(
|
|
215
|
-
|
|
216
|
-
if obj.created_at
|
|
217
|
-
else None
|
|
218
|
-
),
|
|
219
|
-
modified_at=(
|
|
220
|
-
datetime.fromtimestamp(obj.modified_at / 1000, tz=timezone.utc)
|
|
221
|
-
if obj.modified_at
|
|
222
|
-
else None
|
|
223
|
-
),
|
|
215
|
+
created_at=parse_ts_millis(obj.created_at),
|
|
216
|
+
modified_at=parse_ts_millis(obj.modified_at),
|
|
224
217
|
)
|
|
225
218
|
|
|
226
219
|
def query_history(
|
|
@@ -452,17 +445,9 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
452
445
|
properties=obj.properties or {},
|
|
453
446
|
owner=obj.owner,
|
|
454
447
|
generation=obj.generation,
|
|
455
|
-
created_at=(
|
|
456
|
-
datetime.fromtimestamp(obj.created_at / 1000, tz=timezone.utc)
|
|
457
|
-
if obj.created_at
|
|
458
|
-
else None
|
|
459
|
-
),
|
|
448
|
+
created_at=(parse_ts_millis(obj.created_at) if obj.created_at else None),
|
|
460
449
|
created_by=obj.created_by,
|
|
461
|
-
updated_at=(
|
|
462
|
-
datetime.fromtimestamp(obj.updated_at / 1000, tz=timezone.utc)
|
|
463
|
-
if obj.updated_at
|
|
464
|
-
else None
|
|
465
|
-
),
|
|
450
|
+
updated_at=(parse_ts_millis(obj.updated_at) if obj.updated_at else None),
|
|
466
451
|
updated_by=obj.updated_by,
|
|
467
452
|
table_id=obj.table_id,
|
|
468
453
|
comment=obj.comment,
|
|
@@ -500,12 +485,8 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
500
485
|
query_id=info.query_id,
|
|
501
486
|
query_text=info.query_text,
|
|
502
487
|
statement_type=info.statement_type,
|
|
503
|
-
start_time=
|
|
504
|
-
|
|
505
|
-
),
|
|
506
|
-
end_time=datetime.fromtimestamp(
|
|
507
|
-
info.query_end_time_ms / 1000, tz=timezone.utc
|
|
508
|
-
),
|
|
488
|
+
start_time=parse_ts_millis(info.query_start_time_ms),
|
|
489
|
+
end_time=parse_ts_millis(info.query_end_time_ms),
|
|
509
490
|
user_id=info.user_id,
|
|
510
491
|
user_name=info.user_name,
|
|
511
492
|
executed_as_user_id=info.executed_as_user_id,
|
|
@@ -4053,6 +4053,60 @@ class DataPlatformInstanceClass(_Aspect):
|
|
|
4053
4053
|
self._inner_dict['instance'] = value
|
|
4054
4054
|
|
|
4055
4055
|
|
|
4056
|
+
class DataTransformClass(DictWrapper):
|
|
4057
|
+
"""Information about a transformation. It may be a query,"""
|
|
4058
|
+
|
|
4059
|
+
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.DataTransform")
|
|
4060
|
+
def __init__(self,
|
|
4061
|
+
queryStatement: Union[None, "QueryStatementClass"]=None,
|
|
4062
|
+
):
|
|
4063
|
+
super().__init__()
|
|
4064
|
+
|
|
4065
|
+
self.queryStatement = queryStatement
|
|
4066
|
+
|
|
4067
|
+
def _restore_defaults(self) -> None:
|
|
4068
|
+
self.queryStatement = self.RECORD_SCHEMA.fields_dict["queryStatement"].default
|
|
4069
|
+
|
|
4070
|
+
|
|
4071
|
+
@property
|
|
4072
|
+
def queryStatement(self) -> Union[None, "QueryStatementClass"]:
|
|
4073
|
+
"""The data transform may be defined by a query statement"""
|
|
4074
|
+
return self._inner_dict.get('queryStatement') # type: ignore
|
|
4075
|
+
|
|
4076
|
+
@queryStatement.setter
|
|
4077
|
+
def queryStatement(self, value: Union[None, "QueryStatementClass"]) -> None:
|
|
4078
|
+
self._inner_dict['queryStatement'] = value
|
|
4079
|
+
|
|
4080
|
+
|
|
4081
|
+
class DataTransformLogicClass(_Aspect):
|
|
4082
|
+
"""Information about a Query against one or more data assets (e.g. Tables or Views)."""
|
|
4083
|
+
|
|
4084
|
+
|
|
4085
|
+
ASPECT_NAME = 'dataTransformLogic'
|
|
4086
|
+
ASPECT_INFO = {}
|
|
4087
|
+
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.DataTransformLogic")
|
|
4088
|
+
|
|
4089
|
+
def __init__(self,
|
|
4090
|
+
transforms: List["DataTransformClass"],
|
|
4091
|
+
):
|
|
4092
|
+
super().__init__()
|
|
4093
|
+
|
|
4094
|
+
self.transforms = transforms
|
|
4095
|
+
|
|
4096
|
+
def _restore_defaults(self) -> None:
|
|
4097
|
+
self.transforms = list()
|
|
4098
|
+
|
|
4099
|
+
|
|
4100
|
+
@property
|
|
4101
|
+
def transforms(self) -> List["DataTransformClass"]:
|
|
4102
|
+
"""List of transformations applied"""
|
|
4103
|
+
return self._inner_dict.get('transforms') # type: ignore
|
|
4104
|
+
|
|
4105
|
+
@transforms.setter
|
|
4106
|
+
def transforms(self, value: List["DataTransformClass"]) -> None:
|
|
4107
|
+
self._inner_dict['transforms'] = value
|
|
4108
|
+
|
|
4109
|
+
|
|
4056
4110
|
class DeprecationClass(_Aspect):
|
|
4057
4111
|
"""Deprecation status of an entity"""
|
|
4058
4112
|
|
|
@@ -14624,7 +14678,7 @@ class DataJobKeyClass(_Aspect):
|
|
|
14624
14678
|
|
|
14625
14679
|
|
|
14626
14680
|
ASPECT_NAME = 'dataJobKey'
|
|
14627
|
-
ASPECT_INFO = {'keyForEntity': 'dataJob', 'entityCategory': '_unset_', 'entityAspects': ['datahubIngestionRunSummary', 'datahubIngestionCheckpoint', 'domains', 'deprecation', 'versionInfo', 'dataJobInfo', 'dataJobInputOutput', 'editableDataJobProperties', 'ownership', 'status', 'globalTags', 'browsePaths', 'glossaryTerms', 'institutionalMemory', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'subTypes', 'incidentsSummary', 'testResults']}
|
|
14681
|
+
ASPECT_INFO = {'keyForEntity': 'dataJob', 'entityCategory': '_unset_', 'entityAspects': ['datahubIngestionRunSummary', 'datahubIngestionCheckpoint', 'domains', 'deprecation', 'versionInfo', 'dataJobInfo', 'dataJobInputOutput', 'editableDataJobProperties', 'ownership', 'status', 'globalTags', 'browsePaths', 'glossaryTerms', 'institutionalMemory', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'subTypes', 'incidentsSummary', 'testResults', 'dataTransformLogic']}
|
|
14628
14682
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataJobKey")
|
|
14629
14683
|
|
|
14630
14684
|
def __init__(self,
|
|
@@ -24715,6 +24769,8 @@ __SCHEMA_TYPES = {
|
|
|
24715
24769
|
'com.linkedin.pegasus2avro.common.CostCostDiscriminator': CostCostDiscriminatorClass,
|
|
24716
24770
|
'com.linkedin.pegasus2avro.common.CostType': CostTypeClass,
|
|
24717
24771
|
'com.linkedin.pegasus2avro.common.DataPlatformInstance': DataPlatformInstanceClass,
|
|
24772
|
+
'com.linkedin.pegasus2avro.common.DataTransform': DataTransformClass,
|
|
24773
|
+
'com.linkedin.pegasus2avro.common.DataTransformLogic': DataTransformLogicClass,
|
|
24718
24774
|
'com.linkedin.pegasus2avro.common.Deprecation': DeprecationClass,
|
|
24719
24775
|
'com.linkedin.pegasus2avro.common.Documentation': DocumentationClass,
|
|
24720
24776
|
'com.linkedin.pegasus2avro.common.DocumentationAssociation': DocumentationAssociationClass,
|
|
@@ -25182,6 +25238,8 @@ __SCHEMA_TYPES = {
|
|
|
25182
25238
|
'CostCostDiscriminator': CostCostDiscriminatorClass,
|
|
25183
25239
|
'CostType': CostTypeClass,
|
|
25184
25240
|
'DataPlatformInstance': DataPlatformInstanceClass,
|
|
25241
|
+
'DataTransform': DataTransformClass,
|
|
25242
|
+
'DataTransformLogic': DataTransformLogicClass,
|
|
25185
25243
|
'Deprecation': DeprecationClass,
|
|
25186
25244
|
'Documentation': DocumentationClass,
|
|
25187
25245
|
'DocumentationAssociation': DocumentationAssociationClass,
|
|
@@ -25588,6 +25646,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
|
|
|
25588
25646
|
CostClass,
|
|
25589
25647
|
BrowsePathsClass,
|
|
25590
25648
|
InstitutionalMemoryClass,
|
|
25649
|
+
DataTransformLogicClass,
|
|
25591
25650
|
SubTypesClass,
|
|
25592
25651
|
FormsClass,
|
|
25593
25652
|
DeprecationClass,
|
|
@@ -25802,6 +25861,7 @@ class AspectBag(TypedDict, total=False):
|
|
|
25802
25861
|
cost: CostClass
|
|
25803
25862
|
browsePaths: BrowsePathsClass
|
|
25804
25863
|
institutionalMemory: InstitutionalMemoryClass
|
|
25864
|
+
dataTransformLogic: DataTransformLogicClass
|
|
25805
25865
|
subTypes: SubTypesClass
|
|
25806
25866
|
forms: FormsClass
|
|
25807
25867
|
deprecation: DeprecationClass
|