PyPI - acryl-datahub - Versions diffs - 0.14.1.13rc5__py3-none-any.whl → 0.14.1.13rc6__py3-none-any.whl - Mend

acryl-datahub 0.14.1.13rc5py3-none-any.whl → 0.14.1.13rc6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (27) hide show

datahub/ingestion/source/sql/sql_generic_profiler.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 from abc import abstractmethod
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
 from typing import Dict, Iterable, List, Optional, Union, cast
@@ -14,42 +14,13 @@ from datahub.ingestion.source.ge_data_profiler import (
     DatahubGEProfiler,
     GEProfilerRequest,
 )
-from datahub.ingestion.source.sql.sql_common import SQLSourceReport
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
 from datahub.ingestion.source.sql.sql_generic import BaseTable, BaseView
+from datahub.ingestion.source.sql.sql_report import SQLSourceReport
 from datahub.ingestion.source.sql.sql_utils import check_table_with_profile_pattern
 from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProfile
 from datahub.metadata.com.linkedin.pegasus2avro.timeseries import PartitionType
-from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
-@dataclass
-class DetailedProfilerReportMixin:
-    profiling_skipped_not_updated: TopKDict[str, int] = field(
-        default_factory=int_top_k_dict
-    )
-    profiling_skipped_size_limit: TopKDict[str, int] = field(
-        default_factory=int_top_k_dict
-    )
-    profiling_skipped_row_limit: TopKDict[str, int] = field(
-        default_factory=int_top_k_dict
-    )
-    profiling_skipped_table_profile_pattern: TopKDict[str, int] = field(
-        default_factory=int_top_k_dict
-    )
-    profiling_skipped_other: TopKDict[str, int] = field(default_factory=int_top_k_dict)
-    num_tables_not_eligible_profiling: Dict[str, int] = field(
-        default_factory=int_top_k_dict
-    )
-class ProfilingSqlReport(DetailedProfilerReportMixin, SQLSourceReport):
-    pass
 @dataclass
@@ -65,7 +36,7 @@ class GenericProfiler:
     def __init__(
         self,
         config: SQLCommonConfig,
-        report: ProfilingSqlReport,
+        report: SQLSourceReport,
         platform: str,
         state_handler: Optional[ProfilingHandler] = None,
     ) -> None:

datahub/ingestion/source/sql/sql_report.py ADDED Viewed

@@ -0,0 +1,75 @@
+from dataclasses import dataclass, field
+from typing import Dict, Optional
+from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin
+from datahub.ingestion.source.state.stale_entity_removal_handler import (
+    StaleEntityRemovalSourceReport,
+)
+from datahub.utilities.lossy_collections import LossyList
+from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport
+from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
+@dataclass
+class DetailedProfilerReportMixin:
+    profiling_skipped_not_updated: TopKDict[str, int] = field(
+        default_factory=int_top_k_dict
+    )
+    profiling_skipped_size_limit: TopKDict[str, int] = field(
+        default_factory=int_top_k_dict
+    )
+    profiling_skipped_row_limit: TopKDict[str, int] = field(
+        default_factory=int_top_k_dict
+    )
+    profiling_skipped_table_profile_pattern: TopKDict[str, int] = field(
+        default_factory=int_top_k_dict
+    )
+    profiling_skipped_other: TopKDict[str, int] = field(default_factory=int_top_k_dict)
+    num_tables_not_eligible_profiling: Dict[str, int] = field(
+        default_factory=int_top_k_dict
+    )
+@dataclass
+class SQLSourceReport(
+    StaleEntityRemovalSourceReport,
+    ClassificationReportMixin,
+    DetailedProfilerReportMixin,
+):
+    tables_scanned: int = 0
+    views_scanned: int = 0
+    entities_profiled: int = 0
+    filtered: LossyList[str] = field(default_factory=LossyList)
+    query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None
+    num_view_definitions_parsed: int = 0
+    num_view_definitions_failed_parsing: int = 0
+    num_view_definitions_failed_column_parsing: int = 0
+    view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList)
+    def report_entity_scanned(self, name: str, ent_type: str = "table") -> None:
+        """
+        Entity could be a view or a table
+        """
+        if ent_type == "table":
+            self.tables_scanned += 1
+        elif ent_type == "view":
+            self.views_scanned += 1
+        else:
+            raise KeyError(f"Unknown entity {ent_type}.")
+    def report_entity_profiled(self, name: str) -> None:
+        self.entities_profiled += 1
+    def report_dropped(self, ent_name: str) -> None:
+        self.filtered.append(ent_name)
+    def report_from_query_combiner(
+        self, query_combiner_report: SQLAlchemyQueryCombinerReport
+    ) -> None:
+        self.query_combiner = query_combiner_report

datahub/ingestion/source/sql/teradata.py CHANGED Viewed

@@ -44,7 +44,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.graph.client import DataHubGraph
 from datahub.ingestion.source.sql.sql_common import SqlWorkUnit, register_custom_type
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
-from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport
+from datahub.ingestion.source.sql.sql_report import SQLSourceReport
 from datahub.ingestion.source.sql.two_tier_sql_source import (
     TwoTierSQLAlchemyConfig,
     TwoTierSQLAlchemySource,
@@ -330,7 +330,7 @@ def optimized_get_view_definition(
 @dataclass
-class TeradataReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowReport):
+class TeradataReport(SQLSourceReport, IngestionStageReport, BaseTimeWindowReport):
     num_queries_parsed: int = 0
     num_view_ddl_parsed: int = 0
     num_table_parse_failures: int = 0

datahub/ingestion/source/sql/vertica.py CHANGED Viewed

@@ -27,7 +27,6 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.common.data_reader import DataReader
 from datahub.ingestion.source.sql.sql_common import (
     SQLAlchemySource,
-    SQLSourceReport,
     SqlWorkUnit,
     get_schema_metadata,
 )
@@ -35,6 +34,7 @@ from datahub.ingestion.source.sql.sql_config import (
     BasicSQLAlchemyConfig,
     SQLCommonConfig,
 )
+from datahub.ingestion.source.sql.sql_report import SQLSourceReport
 from datahub.ingestion.source.sql.sql_utils import get_domain_wu
 from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage
@@ -536,7 +536,7 @@ class VerticaSource(SQLAlchemySource):
             )
             if not self.is_dataset_eligible_for_profiling(
-                dataset_name, sql_config, inspector, profile_candidates
+                dataset_name, schema, inspector, profile_candidates
             ):
                 if self.config.profiling.report_dropped_profiles:
                     self.report.report_dropped(f"profile of {dataset_name}")

datahub/ingestion/source/unity/report.py CHANGED Viewed

@@ -2,7 +2,7 @@ from dataclasses import dataclass, field
 from typing import Optional, Tuple
 from datahub.ingestion.api.report import EntityFilterReport, Report
-from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport
+from datahub.ingestion.source.sql.sql_report import SQLSourceReport
 from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
 from datahub.utilities.lossy_collections import LossyDict, LossyList
 from datahub.utilities.perf_timer import PerfTimer
@@ -19,7 +19,7 @@ class UnityCatalogUsagePerfReport(Report):
 @dataclass
-class UnityCatalogReport(IngestionStageReport, ProfilingSqlReport):
+class UnityCatalogReport(IngestionStageReport, SQLSourceReport):
     metastores: EntityFilterReport = EntityFilterReport.field(type="metastore")
     catalogs: EntityFilterReport = EntityFilterReport.field(type="catalog")
     schemas: EntityFilterReport = EntityFilterReport.field(type="schema")

datahub/metadata/schema.avsc CHANGED Viewed

@@ -6005,7 +6005,7 @@
                           "fields": [
                             {
                               "Searchable": {
-                                "boostScore": 5.0,
+                                "boostScore": 1.0,
                                 "fieldName": "fieldPaths",
                                 "fieldType": "TEXT",
                                 "queryByDefault": "true"

datahub/metadata/schemas/AssertionInfo.avsc CHANGED Viewed

@@ -1542,7 +1542,7 @@
                         "fields": [
                           {
                             "Searchable": {
-                              "boostScore": 5.0,
+                              "boostScore": 1.0,
                               "fieldName": "fieldPaths",
                               "fieldType": "TEXT",
                               "queryByDefault": "true"

datahub/metadata/schemas/InputFields.avsc CHANGED Viewed

@@ -42,7 +42,7 @@
                   "fields": [
                     {
                       "Searchable": {
-                        "boostScore": 5.0,
+                        "boostScore": 1.0,
                         "fieldName": "fieldPaths",
                         "fieldType": "TEXT",
                         "queryByDefault": "true"

datahub/metadata/schemas/MetadataChangeEvent.avsc CHANGED Viewed

@@ -4013,7 +4013,7 @@
                             "fields": [
                               {
                                 "Searchable": {
-                                  "boostScore": 5.0,
+                                  "boostScore": 1.0,
                                   "fieldName": "fieldPaths",
                                   "fieldType": "TEXT",
                                   "queryByDefault": "true"

datahub/metadata/schemas/SchemaMetadata.avsc CHANGED Viewed

@@ -309,7 +309,7 @@
           "fields": [
             {
               "Searchable": {
-                "boostScore": 5.0,
+                "boostScore": 1.0,
                 "fieldName": "fieldPaths",
                 "fieldType": "TEXT",
                 "queryByDefault": "true"

{acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc6.dist-info}/WHEEL RENAMED Viewed

File without changes

{acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc6.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc6.dist-info}/top_level.txt RENAMED Viewed

File without changes

acryl-datahub 0.14.1.13rc5__py3-none-any.whl → 0.14.1.13rc6__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.14.1.13rc5py3-none-any.whl → 0.14.1.13rc6py3-none-any.whl