PyPI - acryl-datahub - Versions diffs - 0.15.0rc24__py3-none-any.whl → 0.15.0.1__py3-none-any.whl - Mend

acryl-datahub 0.15.0rc24py3-none-any.whl → 0.15.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (120) hide show

{acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1.dist-info}/METADATA +2408 -2412
{acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1.dist-info}/RECORD +116 -106
{acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1.dist-info}/WHEEL +1 -1
{acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1.dist-info}/entry_points.txt +1 -1
datahub/__init__.py +1 -1
datahub/api/circuit_breaker/assertion_circuit_breaker.py +5 -4
datahub/api/entities/structuredproperties/structuredproperties.py +20 -8
datahub/configuration/common.py +2 -5
datahub/configuration/source_common.py +13 -0
datahub/emitter/mce_builder.py +20 -4
datahub/emitter/mcp_builder.py +2 -7
datahub/emitter/mcp_patch_builder.py +37 -13
datahub/emitter/rest_emitter.py +25 -3
datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +98 -0
datahub/ingestion/api/closeable.py +3 -3
datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py +4 -7
datahub/ingestion/api/report.py +4 -1
datahub/ingestion/api/sink.py +4 -3
datahub/ingestion/api/source.py +4 -0
datahub/ingestion/api/source_helpers.py +2 -6
datahub/ingestion/glossary/classifier.py +2 -3
datahub/ingestion/graph/client.py +6 -3
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +44 -1
datahub/ingestion/source/aws/aws_common.py +231 -27
datahub/ingestion/source/aws/glue.py +12 -2
datahub/ingestion/source/bigquery_v2/bigquery.py +10 -18
datahub/ingestion/source/bigquery_v2/bigquery_config.py +3 -9
datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -20
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -17
datahub/ingestion/source/bigquery_v2/lineage.py +9 -22
datahub/ingestion/source/datahub/config.py +22 -1
datahub/ingestion/source/datahub/datahub_database_reader.py +3 -17
datahub/ingestion/source/datahub/datahub_kafka_reader.py +2 -1
datahub/ingestion/source/datahub/datahub_source.py +1 -1
datahub/ingestion/source/dbt/dbt_cloud.py +10 -3
datahub/ingestion/source/gc/datahub_gc.py +21 -5
datahub/ingestion/source/gc/dataprocess_cleanup.py +23 -10
datahub/ingestion/source/gc/execution_request_cleanup.py +61 -16
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +178 -83
datahub/ingestion/source/iceberg/iceberg.py +27 -1
datahub/ingestion/source/iceberg/iceberg_common.py +4 -0
datahub/ingestion/source/kafka_connect/__init__.py +0 -0
datahub/ingestion/source/kafka_connect/common.py +202 -0
datahub/ingestion/source/kafka_connect/kafka_connect.py +367 -0
datahub/ingestion/source/kafka_connect/sink_connectors.py +341 -0
datahub/ingestion/source/kafka_connect/source_connectors.py +570 -0
datahub/ingestion/source/looker/looker_common.py +63 -2
datahub/ingestion/source/looker/looker_dataclasses.py +7 -9
datahub/ingestion/source/looker/looker_lib_wrapper.py +13 -1
datahub/ingestion/source/looker/looker_source.py +31 -4
datahub/ingestion/source/looker/looker_usage.py +23 -17
datahub/ingestion/source/mlflow.py +30 -5
datahub/ingestion/source/mode.py +40 -27
datahub/ingestion/source/powerbi/config.py +1 -14
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +1 -1
datahub/ingestion/source/powerbi/m_query/pattern_handler.py +1 -1
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule +16 -2
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +16 -15
datahub/ingestion/source/s3/source.py +1 -1
datahub/ingestion/source/snowflake/snowflake_config.py +13 -34
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +14 -64
datahub/ingestion/source/snowflake/snowflake_queries.py +44 -14
datahub/ingestion/source/snowflake/snowflake_query.py +5 -10
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +53 -7
datahub/ingestion/source/snowflake/snowflake_shares.py +1 -1
datahub/ingestion/source/snowflake/snowflake_usage_v2.py +2 -5
datahub/ingestion/source/snowflake/snowflake_utils.py +22 -18
datahub/ingestion/source/snowflake/snowflake_v2.py +38 -34
datahub/ingestion/source/sql/hive.py +621 -8
datahub/ingestion/source/sql/hive_metastore.py +7 -0
datahub/ingestion/source/sql/mssql/job_models.py +30 -1
datahub/ingestion/source/sql/mssql/source.py +15 -1
datahub/ingestion/source/sql/sql_common.py +41 -102
datahub/ingestion/source/sql/sql_generic_profiler.py +5 -6
datahub/ingestion/source/sql/sql_report.py +2 -0
datahub/ingestion/source/state/checkpoint.py +2 -1
datahub/ingestion/source/tableau/tableau.py +122 -45
datahub/ingestion/source/tableau/tableau_common.py +18 -0
datahub/ingestion/source/tableau/tableau_constant.py +3 -1
datahub/ingestion/source/tableau/tableau_server_wrapper.py +6 -2
datahub/ingestion/source/tableau/tableau_validation.py +1 -1
datahub/ingestion/source/unity/proxy.py +8 -27
datahub/ingestion/source/usage/usage_common.py +15 -1
datahub/ingestion/source_report/ingestion_stage.py +3 -0
datahub/metadata/_schema_classes.py +256 -3
datahub/metadata/_urns/urn_defs.py +168 -168
datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +4 -0
datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py +2 -0
datahub/metadata/schema.avsc +252 -33
datahub/metadata/schemas/DataJobKey.avsc +2 -1
datahub/metadata/schemas/DataProcessInstanceKey.avsc +5 -1
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
datahub/metadata/schemas/DataTransformLogic.avsc +63 -0
datahub/metadata/schemas/MLModelGroupProperties.avsc +82 -0
datahub/metadata/schemas/MLModelProperties.avsc +62 -2
datahub/metadata/schemas/MLTrainingRunProperties.avsc +171 -0
datahub/metadata/schemas/MetadataChangeEvent.avsc +94 -2
datahub/specific/aspect_helpers/__init__.py +0 -0
datahub/specific/aspect_helpers/custom_properties.py +79 -0
datahub/specific/aspect_helpers/ownership.py +67 -0
datahub/specific/aspect_helpers/structured_properties.py +72 -0
datahub/specific/aspect_helpers/tags.py +42 -0
datahub/specific/aspect_helpers/terms.py +43 -0
datahub/specific/chart.py +28 -184
datahub/specific/dashboard.py +31 -196
datahub/specific/datajob.py +34 -189
datahub/specific/dataproduct.py +24 -86
datahub/specific/dataset.py +48 -133
datahub/specific/form.py +12 -32
datahub/specific/structured_property.py +9 -9
datahub/sql_parsing/sql_parsing_aggregator.py +10 -9
datahub/sql_parsing/sqlglot_lineage.py +15 -5
datahub/sql_parsing/tool_meta_extractor.py +119 -5
datahub/utilities/time.py +8 -3
datahub/utilities/urns/_urn_base.py +5 -7
datahub/ingestion/source/kafka/kafka_connect.py +0 -1468
datahub/specific/custom_properties.py +0 -37
datahub/specific/ownership.py +0 -48
datahub/specific/structured_properties.py +0 -53
{acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/sql/hive_metastore.py CHANGED Viewed

@@ -123,6 +123,10 @@ class HiveMetastore(BasicSQLAlchemyConfig):
         description="Dataset Subtype name to be 'Table' or 'View' Valid options: ['True', 'False']",
     )
+    include_view_lineage: bool = Field(
+        default=False, description="", hidden_from_docs=True
+    )
     include_catalog_name_in_ids: bool = Field(
         default=False,
         description="Add the Presto catalog name (e.g. hive) to the generated dataset urns. `urn:li:dataset:(urn:li:dataPlatform:hive,hive.user.logging_events,PROD)` versus `urn:li:dataset:(urn:li:dataPlatform:hive,user.logging_events,PROD)`",
@@ -160,6 +164,9 @@ class HiveMetastore(BasicSQLAlchemyConfig):
 @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
 @capability(SourceCapability.DATA_PROFILING, "Not Supported", False)
 @capability(SourceCapability.CLASSIFICATION, "Not Supported", False)
+@capability(
+    SourceCapability.LINEAGE_COARSE, "View lineage is not supported", supported=False
+)
 class HiveMetastoreSource(SQLAlchemySource):
     """
     This plugin extracts the following:

datahub/ingestion/source/sql/mssql/job_models.py CHANGED Viewed

@@ -1,11 +1,17 @@
 from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Union
-from datahub.emitter.mce_builder import make_data_flow_urn, make_data_job_urn
+from datahub.emitter.mce_builder import (
+    make_data_flow_urn,
+    make_data_job_urn,
+    make_data_platform_urn,
+    make_dataplatform_instance_urn,
+)
 from datahub.metadata.schema_classes import (
     DataFlowInfoClass,
     DataJobInfoClass,
     DataJobInputOutputClass,
+    DataPlatformInstanceClass,
 )
@@ -204,6 +210,18 @@ class MSSQLDataJob:
             status=self.status,
         )
+    @property
+    def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
+        if self.entity.flow.platform_instance:
+            return DataPlatformInstanceClass(
+                platform=make_data_platform_urn(self.entity.flow.orchestrator),
+                instance=make_dataplatform_instance_urn(
+                    platform=self.entity.flow.orchestrator,
+                    instance=self.entity.flow.platform_instance,
+                ),
+            )
+        return None
 @dataclass
 class MSSQLDataFlow:
@@ -238,3 +256,14 @@ class MSSQLDataFlow:
             customProperties=self.flow_properties,
             externalUrl=self.external_url,
         )
+    @property
+    def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
+        if self.entity.platform_instance:
+            return DataPlatformInstanceClass(
+                platform=make_data_platform_urn(self.entity.orchestrator),
+                instance=make_dataplatform_instance_urn(
+                    self.entity.orchestrator, self.entity.platform_instance
+                ),
+            )
+        return None

datahub/ingestion/source/sql/mssql/source.py CHANGED Viewed

@@ -639,6 +639,13 @@ class SQLServerSource(SQLAlchemySource):
             aspect=data_job.as_datajob_info_aspect,
         ).as_workunit()
+        data_platform_instance_aspect = data_job.as_maybe_platform_instance_aspect
+        if data_platform_instance_aspect:
+            yield MetadataChangeProposalWrapper(
+                entityUrn=data_job.urn,
+                aspect=data_platform_instance_aspect,
+            ).as_workunit()
         if include_lineage:
             yield MetadataChangeProposalWrapper(
                 entityUrn=data_job.urn,
@@ -654,6 +661,13 @@ class SQLServerSource(SQLAlchemySource):
             entityUrn=data_flow.urn,
             aspect=data_flow.as_dataflow_info_aspect,
         ).as_workunit()
+        data_platform_instance_aspect = data_flow.as_maybe_platform_instance_aspect
+        if data_platform_instance_aspect:
+            yield MetadataChangeProposalWrapper(
+                entityUrn=data_flow.urn,
+                aspect=data_platform_instance_aspect,
+            ).as_workunit()
         # TODO: Add SubType when it appear
     def get_inspectors(self) -> Iterable[Inspector]:
@@ -710,7 +724,7 @@ class SQLServerSource(SQLAlchemySource):
             ):
                 yield from auto_workunit(
                     generate_procedure_lineage(
-                        schema_resolver=self.schema_resolver,
+                        schema_resolver=self.get_schema_resolver(),
                         procedure=procedure,
                         procedure_job_urn=MSSQLDataJob(entity=procedure).urn,
                         is_temp_table=self.is_temp_table,

datahub/ingestion/source/sql/sql_common.py CHANGED Viewed

@@ -11,7 +11,6 @@ from typing import (
     Dict,
     Iterable,
     List,
-    MutableMapping,
     Optional,
     Set,
     Tuple,
@@ -36,7 +35,6 @@ from datahub.emitter.mce_builder import (
     make_tag_urn,
 )
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.api.decorators import capability
 from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage
@@ -79,7 +77,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
     StatefulIngestionSourceBase,
 )
 from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass
-from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage
 from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
 from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
@@ -106,17 +103,11 @@ from datahub.metadata.schema_classes import (
     GlobalTagsClass,
     SubTypesClass,
     TagAssociationClass,
-    UpstreamClass,
     ViewPropertiesClass,
 )
 from datahub.sql_parsing.schema_resolver import SchemaResolver
-from datahub.sql_parsing.sqlglot_lineage import (
-    SqlParsingResult,
-    sqlglot_lineage,
-    view_definition_lineage_helper,
-)
+from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator
 from datahub.telemetry import telemetry
-from datahub.utilities.file_backed_collections import FileBackedDict
 from datahub.utilities.registries.domain_registry import DomainRegistry
 from datahub.utilities.sqlalchemy_type_converter import (
     get_native_data_type_for_sqlalchemy_type,
@@ -347,17 +338,19 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
             )
         self.views_failed_parsing: Set[str] = set()
-        self.schema_resolver: SchemaResolver = SchemaResolver(
+        self.discovered_datasets: Set[str] = set()
+        self.aggregator = SqlParsingAggregator(
             platform=self.platform,
             platform_instance=self.config.platform_instance,
             env=self.config.env,
+            graph=self.ctx.graph,
+            generate_lineage=self.include_lineage,
+            generate_usage_statistics=False,
+            generate_operations=False,
+            eager_graph_load=False,
         )
-        self.discovered_datasets: Set[str] = set()
-        self._view_definition_cache: MutableMapping[str, str]
-        if self.config.use_file_backed_cache:
-            self._view_definition_cache = FileBackedDict[str]()
-        else:
-            self._view_definition_cache = {}
+        self.report.sql_aggregator = self.aggregator.report
     @classmethod
     def test_connection(cls, config_dict: dict) -> TestConnectionReport:
@@ -572,36 +565,9 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
                     profile_requests, profiler, platform=self.platform
                 )
-        if self.config.include_view_lineage:
-            yield from self.get_view_lineage()
-    def get_view_lineage(self) -> Iterable[MetadataWorkUnit]:
-        builder = SqlParsingBuilder(
-            generate_lineage=True,
-            generate_usage_statistics=False,
-            generate_operations=False,
-        )
-        for dataset_name in self._view_definition_cache.keys():
-            # TODO: Ensure that the lineage generated from the view definition
-            # matches the dataset_name.
-            view_definition = self._view_definition_cache[dataset_name]
-            result = self._run_sql_parser(
-                dataset_name,
-                view_definition,
-                self.schema_resolver,
-            )
-            if result and result.out_tables:
-                # This does not yield any workunits but we use
-                # yield here to execute this method
-                yield from builder.process_sql_parsing_result(
-                    result=result,
-                    query=view_definition,
-                    is_view_ddl=True,
-                    include_column_lineage=self.config.include_view_column_lineage,
-                )
-            else:
-                self.views_failed_parsing.add(dataset_name)
-        yield from builder.gen_workunits()
+        # Generate workunit for aggregated SQL parsing results
+        for mcp in self.aggregator.gen_metadata():
+            yield mcp.as_workunit()
     def get_identifier(
         self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any
@@ -760,16 +726,6 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
         )
         dataset_snapshot.aspects.append(dataset_properties)
-        if self.config.include_table_location_lineage and location_urn:
-            external_upstream_table = UpstreamClass(
-                dataset=location_urn,
-                type=DatasetLineageTypeClass.COPY,
-            )
-            yield MetadataChangeProposalWrapper(
-                entityUrn=dataset_snapshot.urn,
-                aspect=UpstreamLineage(upstreams=[external_upstream_table]),
-            ).as_workunit()
         extra_tags = self.get_extra_tags(inspector, schema, table)
         pk_constraints: dict = inspector.get_pk_constraint(table, schema)
         partitions: Optional[List[str]] = self.get_partitions(inspector, schema, table)
@@ -795,7 +751,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
         dataset_snapshot.aspects.append(schema_metadata)
         if self._save_schema_to_resolver():
-            self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata)
+            self.aggregator.register_schema(dataset_urn, schema_metadata)
             self.discovered_datasets.add(dataset_name)
         db_name = self.get_db_name(inspector)
@@ -815,6 +771,13 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
             ),
         )
+        if self.config.include_table_location_lineage and location_urn:
+            self.aggregator.add_known_lineage_mapping(
+                upstream_urn=location_urn,
+                downstream_urn=dataset_snapshot.urn,
+                lineage_type=DatasetLineageTypeClass.COPY,
+            )
         if self.config.domain:
             assert self.domain_registry
             yield from get_domain_wu(
@@ -1089,6 +1052,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
             self.config.platform_instance,
             self.config.env,
         )
         try:
             columns = inspector.get_columns(view, schema)
         except KeyError:
@@ -1108,7 +1072,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
                 canonical_schema=schema_fields,
             )
             if self._save_schema_to_resolver():
-                self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata)
+                self.aggregator.register_schema(dataset_urn, schema_metadata)
                 self.discovered_datasets.add(dataset_name)
         description, properties, _ = self.get_table_properties(inspector, schema, view)
@@ -1117,7 +1081,18 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
         view_definition = self._get_view_definition(inspector, schema, view)
         properties["view_definition"] = view_definition
         if view_definition and self.config.include_view_lineage:
-            self._view_definition_cache[dataset_name] = view_definition
+            default_db = None
+            default_schema = None
+            try:
+                default_db, default_schema = self.get_db_schema(dataset_name)
+            except ValueError:
+                logger.warning(f"Invalid view identifier: {dataset_name}")
+            self.aggregator.add_view_definition(
+                view_urn=dataset_urn,
+                view_definition=view_definition,
+                default_db=default_db,
+                default_schema=default_schema,
+            )
         dataset_snapshot = DatasetSnapshot(
             urn=dataset_urn,
@@ -1169,48 +1144,9 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
             hasattr(self.config, "include_lineage") and self.config.include_lineage
         )
-    def _run_sql_parser(
-        self, view_identifier: str, query: str, schema_resolver: SchemaResolver
-    ) -> Optional[SqlParsingResult]:
-        try:
-            database, schema = self.get_db_schema(view_identifier)
-        except ValueError:
-            logger.warning(f"Invalid view identifier: {view_identifier}")
-            return None
-        raw_lineage = sqlglot_lineage(
-            query,
-            schema_resolver=schema_resolver,
-            default_db=database,
-            default_schema=schema,
-        )
-        view_urn = make_dataset_urn_with_platform_instance(
-            self.platform,
-            view_identifier,
-            self.config.platform_instance,
-            self.config.env,
-        )
-        if raw_lineage.debug_info.table_error:
-            logger.debug(
-                f"Failed to parse lineage for view {view_identifier}: "
-                f"{raw_lineage.debug_info.table_error}"
-            )
-            self.report.num_view_definitions_failed_parsing += 1
-            self.report.view_definitions_parsing_failures.append(
-                f"Table-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.table_error}"
-            )
-            return None
-        elif raw_lineage.debug_info.column_error:
-            self.report.num_view_definitions_failed_column_parsing += 1
-            self.report.view_definitions_parsing_failures.append(
-                f"Column-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.column_error}"
-            )
-        else:
-            self.report.num_view_definitions_parsed += 1
-            if raw_lineage.out_tables != [view_urn]:
-                self.report.num_view_definitions_view_urn_mismatch += 1
-        return view_definition_lineage_helper(raw_lineage, view_urn)
+    @property
+    def include_lineage(self):
+        return self.config.include_view_lineage
     def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]:
         database, schema, _view = dataset_identifier.split(".", 2)
@@ -1411,5 +1347,8 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
             schema=schema, table=table, partition=partition, custom_sql=custom_sql
         )
+    def get_schema_resolver(self) -> SchemaResolver:
+        return self.aggregator._schema_resolver
     def get_report(self):
         return self.report

datahub/ingestion/source/sql/sql_generic_profiler.py CHANGED Viewed

@@ -7,7 +7,10 @@ from typing import Dict, Iterable, List, Optional, Union, cast
 from sqlalchemy import create_engine, inspect
 from sqlalchemy.engine.reflection import Inspector
-from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
+from datahub.emitter.mce_builder import (
+    make_dataset_urn_with_platform_instance,
+    parse_ts_millis,
+)
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.ge_data_profiler import (
@@ -245,11 +248,7 @@ class GenericProfiler:
                 # If profiling state exists we have to carry over to the new state
                 self.state_handler.add_to_state(dataset_urn, last_profiled)
-        threshold_time: Optional[datetime] = (
-            datetime.fromtimestamp(last_profiled / 1000, timezone.utc)
-            if last_profiled
-            else None
-        )
+        threshold_time: Optional[datetime] = parse_ts_millis(last_profiled)
         if (
             not threshold_time
             and self.config.profiling.profile_if_updated_since_days is not None

datahub/ingestion/source/sql/sql_report.py CHANGED Viewed

@@ -5,6 +5,7 @@ from datahub.ingestion.glossary.classification_mixin import ClassificationReport
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StaleEntityRemovalSourceReport,
 )
+from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport
 from datahub.utilities.lossy_collections import LossyList
 from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport
 from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
@@ -52,6 +53,7 @@ class SQLSourceReport(
     num_view_definitions_failed_parsing: int = 0
     num_view_definitions_failed_column_parsing: int = 0
     view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList)
+    sql_aggregator: Optional[SqlAggregatorReport] = None
     def report_entity_scanned(self, name: str, ent_type: str = "table") -> None:
         """

datahub/ingestion/source/state/checkpoint.py CHANGED Viewed

@@ -12,6 +12,7 @@ from typing import Callable, Generic, Optional, Type, TypeVar
 import pydantic
 from datahub.configuration.common import ConfigModel
+from datahub.emitter.mce_builder import parse_ts_millis
 from datahub.metadata.schema_classes import (
     DatahubIngestionCheckpointClass,
     IngestionCheckpointStateClass,
@@ -144,7 +145,7 @@ class Checkpoint(Generic[StateType]):
                 )
                 logger.info(
                     f"Successfully constructed last checkpoint state for job {job_name} "
-                    f"with timestamp {datetime.fromtimestamp(checkpoint_aspect.timestampMillis/1000, tz=timezone.utc)}"
+                    f"with timestamp {parse_ts_millis(checkpoint_aspect.timestampMillis)}"
                 )
                 return checkpoint
         return None

acryl-datahub 0.15.0rc24__py3-none-any.whl → 0.15.0.1__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0rc24py3-none-any.whl → 0.15.0.1py3-none-any.whl