PyPI - acryl-datahub - Versions diffs - 1.0.0rc1__py3-none-any.whl → 1.0.0rc3__py3-none-any.whl - Mend

acryl-datahub 1.0.0rc1py3-none-any.whl → 1.0.0rc3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (49) hide show

{acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/METADATA +2469 -2469
{acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/RECORD +49 -48
datahub/_version.py +1 -1
datahub/cli/docker_cli.py +2 -2
datahub/configuration/common.py +1 -1
datahub/ingestion/api/decorators.py +1 -1
datahub/ingestion/graph/client.py +1 -1
datahub/ingestion/run/pipeline.py +1 -1
datahub/ingestion/source/identity/okta.py +13 -2
datahub/ingestion/source/kafka_connect/common.py +1 -1
datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
datahub/ingestion/source/kafka_connect/source_connectors.py +2 -2
datahub/ingestion/source/looker/looker_common.py +3 -3
datahub/ingestion/source/looker/looker_source.py +1 -1
datahub/ingestion/source/mode.py +3 -3
datahub/ingestion/source/nifi.py +2 -2
datahub/ingestion/source/openapi.py +1 -1
datahub/ingestion/source/openapi_parser.py +1 -1
datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
datahub/ingestion/source/snowflake/snowflake_queries.py +3 -0
datahub/ingestion/source/sql/athena.py +2 -2
datahub/ingestion/source/sql/hive_metastore.py +1 -1
datahub/ingestion/source/sql/mssql/source.py +1 -1
datahub/ingestion/source/sql/sql_common.py +1 -1
datahub/ingestion/source/sql/teradata.py +3 -3
datahub/ingestion/source/tableau/tableau.py +19 -0
datahub/metadata/schema.avsc +30 -3
datahub/metadata/schemas/AssertionInfo.avsc +3 -1
datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
datahub/metadata/schemas/ChartInfo.avsc +1 -0
datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
datahub/metadata/schemas/InputFields.avsc +3 -1
datahub/metadata/schemas/MetadataChangeEvent.avsc +7 -2
datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
datahub/sql_parsing/schema_resolver.py +1 -1
datahub/sql_parsing/sqlglot_lineage.py +1 -1
datahub/testing/check_sql_parser_result.py +5 -6
datahub/testing/compare_metadata_json.py +7 -6
datahub/testing/pytest_hooks.py +56 -0
datahub/upgrade/upgrade.py +2 -2
datahub/utilities/mapping.py +1 -1
{acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/LICENSE +0 -0
{acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/WHEEL +0 -0
{acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/entry_points.txt +0 -0
{acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/looker/looker_common.py CHANGED Viewed

@@ -923,7 +923,7 @@ class LookerExplore:
             tags=cast(List, dict.get("tags")) if dict.get("tags") is not None else [],
         )
-    @classmethod  # noqa: C901
+    @classmethod
     def from_api(  # noqa: C901
         cls,
         model: str,
@@ -931,7 +931,7 @@ class LookerExplore:
         client: LookerAPI,
         reporter: SourceReport,
         source_config: LookerDashboardSourceConfig,
-    ) -> Optional["LookerExplore"]:  # noqa: C901
+    ) -> Optional["LookerExplore"]:
         try:
             explore = client.lookml_model_explore(model, explore_name)
             views: Set[str] = set()
@@ -1183,7 +1183,7 @@ class LookerExplore:
         base_url = remove_port_from_url(base_url)
         return f"{base_url}/embed/explore/{self.model_name}/{self.name}"
-    def _to_metadata_events(  # noqa: C901
+    def _to_metadata_events(
         self,
         config: LookerCommonConfig,
         reporter: SourceReport,

datahub/ingestion/source/looker/looker_source.py CHANGED Viewed

@@ -383,7 +383,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
         self.reachable_explores[(model, explore)].append(via)
-    def _get_looker_dashboard_element(  # noqa: C901
+    def _get_looker_dashboard_element(
         self, element: DashboardElement
     ) -> Optional[LookerDashboardElement]:
         # Dashboard elements can use raw usage_queries against explores

datahub/ingestion/source/mode.py CHANGED Viewed

@@ -759,9 +759,9 @@ class ModeSource(StatefulIngestionSourceBase):
                 return platform, database
         else:
             self.report.report_warning(
-                title="Failed to create Data Platform Urn",
-                message=f"Cannot create datasource urn for datasource id: "
-                f"{data_source_id}",
+                title="Unable to construct upstream lineage",
+                message="We did not find a data source / connection with a matching ID, meaning that we do not know the platform/database to use in lineage.",
+                context=f"Data Source ID: {data_source_id}",
             )
         return None, None

datahub/ingestion/source/nifi.py CHANGED Viewed

@@ -488,7 +488,7 @@ class NifiSource(Source):
     def get_report(self) -> SourceReport:
         return self.report
-    def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None:  # noqa: C901
+    def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None:
         """
         Update self.nifi_flow with contents of the input process group `pg_flow_dto`
         """
@@ -894,7 +894,7 @@ class NifiSource(Source):
         if not delete_response.ok:
             logger.error("failed to delete provenance ", provenance_uri)
-    def construct_workunits(self) -> Iterable[MetadataWorkUnit]:  # noqa: C901
+    def construct_workunits(self) -> Iterable[MetadataWorkUnit]:
         rootpg = self.nifi_flow.root_process_group
         flow_name = rootpg.name  # self.config.site_name
         flow_urn = self.make_flow_urn()

datahub/ingestion/source/openapi.py CHANGED Viewed

@@ -270,7 +270,7 @@ class APISource(Source, ABC):
         mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
         return ApiWorkUnit(id=dataset_name, mce=mce)
-    def get_workunits_internal(self) -> Iterable[ApiWorkUnit]:  # noqa: C901
+    def get_workunits_internal(self) -> Iterable[ApiWorkUnit]:
         config = self.config
         sw_dict = self.config.get_swagger()

datahub/ingestion/source/openapi_parser.py CHANGED Viewed

@@ -111,7 +111,7 @@ def check_sw_version(sw_dict: dict) -> None:
         )
-def get_endpoints(sw_dict: dict) -> dict:  # noqa: C901
+def get_endpoints(sw_dict: dict) -> dict:
     """
     Get all the URLs, together with their description and the tags
     """

datahub/ingestion/source/powerbi_report_server/report_server_domain.py CHANGED Viewed

@@ -33,7 +33,7 @@ class CatalogItem(BaseModel):
     )
     @validator("display_name", always=True)
-    def validate_diplay_name(cls, value, values):  # noqa: N805
+    def validate_diplay_name(cls, value, values):
         if values["created_by"]:
             return values["created_by"].split("\\")[-1]
         return ""

datahub/ingestion/source/snowflake/snowflake_queries.py CHANGED Viewed

@@ -731,6 +731,9 @@ fingerprinted_queries as (
     JOIN filtered_access_history a USING (query_id)
 )
 SELECT * FROM query_access_history
+-- Our query aggregator expects the queries to be added in chronological order.
+-- It's easier for us to push down the sorting to Snowflake/SQL instead of doing it in Python.
+ORDER BY QUERY_START_TIME ASC
 """

datahub/ingestion/source/sql/athena.py CHANGED Viewed

@@ -55,7 +55,7 @@ try:
 except ImportError:
     _F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any])
-    def override(f: _F, /) -> _F:  # noqa: F811
+    def override(f: _F, /) -> _F:
         return f
@@ -104,7 +104,7 @@ class CustomAthenaRestDialect(AthenaRestDialect):
             return "\n".join([r for r in res])
     @typing.no_type_check
-    def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine:  # noqa: C901
+    def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine:
         """Derives the data type of the Athena column.
         This method is overwritten to extend the behavior of PyAthena.

datahub/ingestion/source/sql/hive_metastore.py CHANGED Viewed

@@ -67,7 +67,7 @@ TableKey = namedtuple("TableKey", ["schema", "table"])
 class HiveMetastoreConfigMode(StrEnum):
-    hive: str = "hive"  # noqa: F811
+    hive: str = "hive"
     presto: str = "presto"
     presto_on_hive: str = "presto-on-hive"
     trino: str = "trino"

datahub/ingestion/source/sql/mssql/source.py CHANGED Viewed

@@ -401,7 +401,7 @@ class SQLServerSource(SQLAlchemySource):
                 data_job.add_property(name=data_name, value=str(data_value))
             yield from self.construct_job_workunits(data_job)
-    def loop_stored_procedures(  # noqa: C901
+    def loop_stored_procedures(
         self,
         inspector: Inspector,
         schema: str,

datahub/ingestion/source/sql/sql_common.py CHANGED Viewed

@@ -635,7 +635,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
         return None
-    def loop_tables(  # noqa: C901
+    def loop_tables(
         self,
         inspector: Inspector,
         schema: str,

datahub/ingestion/source/sql/teradata.py CHANGED Viewed

@@ -649,7 +649,7 @@ ORDER by DataBaseName, TableName;
             )
             # Disabling the below because the cached view definition is not the view definition the column in tablesv actually holds the last statement executed against the object... not necessarily the view definition
-            # setattr(  # noqa: B010
+            # setattr(
             #   TeradataDialect,
             #    "get_view_definition",
             #   lambda self, connection, view_name, schema=None, **kw: optimized_get_view_definition(
@@ -746,7 +746,7 @@ ORDER by DataBaseName, TableName;
         else:
             raise Exception("Unable to get database name from Sqlalchemy inspector")
-    def cached_loop_tables(  # noqa: C901
+    def cached_loop_tables(
         self,
         inspector: Inspector,
         schema: str,
@@ -782,7 +782,7 @@ ORDER by DataBaseName, TableName;
                 break
         return description, properties, location
-    def cached_loop_views(  # noqa: C901
+    def cached_loop_views(
         self,
         inspector: Inspector,
         schema: str,

datahub/ingestion/source/tableau/tableau.py CHANGED Viewed

@@ -2190,6 +2190,10 @@ class TableauSiteSource:
                 dataset_snapshot.aspects.append(browse_paths)
             else:
                 logger.debug(f"Browse path not set for Custom SQL table {csql_id}")
+                logger.warning(
+                    f"Skipping Custom SQL table {csql_id} due to filtered downstream"
+                )
+                continue
             dataset_properties = DatasetPropertiesClass(
                 name=csql.get(c.NAME),
@@ -2628,6 +2632,15 @@ class TableauSiteSource:
             datasource_info = datasource
         browse_path = self._get_project_browse_path_name(datasource)
+        if (
+            not is_embedded_ds
+            and self._get_published_datasource_project_luid(datasource) is None
+        ):
+            logger.warning(
+                f"Skip ingesting published datasource {datasource.get(c.NAME)} because of filtered project"
+            )
+            return
         logger.debug(f"datasource {datasource.get(c.NAME)} browse-path {browse_path}")
         datasource_id = datasource[c.ID]
         datasource_urn = builder.make_dataset_urn_with_platform_instance(
@@ -2851,6 +2864,11 @@ class TableauSiteSource:
             query_filter=tables_filter,
             page_size=self.config.effective_database_table_page_size,
         ):
+            if tableau_database_table_id_to_urn_map.get(tableau_table[c.ID]) is None:
+                logger.warning(
+                    f"Skipping table {tableau_table[c.ID]} due to filtered out published datasource"
+                )
+                continue
             database_table = self.database_tables[
                 tableau_database_table_id_to_urn_map[tableau_table[c.ID]]
             ]
@@ -2905,6 +2923,7 @@ class TableauSiteSource:
             dataset_snapshot.aspects.append(browse_paths)
         else:
             logger.debug(f"Browse path not set for table {database_table.urn}")
+            return
         schema_metadata = self.get_schema_metadata_for_table(
             tableau_columns, database_table.parsed_columns

datahub/metadata/schema.avsc CHANGED Viewed

@@ -3504,7 +3504,9 @@
                   "fieldName": "glossaryTerms",
                   "fieldType": "URN",
                   "filterNameOverride": "Glossary Term",
-                  "hasValuesFieldName": "hasGlossaryTerms"
+                  "hasValuesFieldName": "hasGlossaryTerms",
+                  "includeSystemModifiedAt": true,
+                  "systemModifiedAtFieldName": "termsModifiedAt"
                 },
                 "java": {
                   "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -10339,7 +10341,9 @@
           "/terms/*/urn": {
             "boostScore": 0.5,
             "fieldName": "editedFieldGlossaryTerms",
-            "fieldType": "URN"
+            "fieldType": "URN",
+            "includeSystemModifiedAt": true,
+            "systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
           }
         },
         "type": [
@@ -10477,6 +10481,12 @@
             "namespace": "com.linkedin.pegasus2avro.businessattribute",
             "fields": [
               {
+                "Searchable": {
+                  "fieldName": "schemaFieldBusinessAttribute",
+                  "includeSystemModifiedAt": true,
+                  "queryByDefault": false,
+                  "systemModifiedAtFieldName": "schemaFieldBusinessAttributeModifiedAt"
+                },
                 "java": {
                   "class": "com.linkedin.pegasus2avro.common.urn.BusinessAttributeUrn"
                 },
@@ -15077,6 +15087,7 @@
       },
       {
         "Searchable": {
+          "boostScore": 10.0,
           "enableAutocomplete": true,
           "fieldNameAliases": [
             "_entityName"
@@ -16546,7 +16557,9 @@
                                     "/terms/*/urn": {
                                       "boostScore": 0.5,
                                       "fieldName": "editedFieldGlossaryTerms",
-                                      "fieldType": "URN"
+                                      "fieldType": "URN",
+                                      "includeSystemModifiedAt": true,
+                                      "systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
                                     }
                                   },
                                   "type": [
@@ -19423,6 +19436,13 @@
         "doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
       },
       {
+        "UrnValidation": {
+          "entityTypes": [
+            "dataType"
+          ],
+          "exist": true,
+          "strict": true
+        },
         "java": {
           "class": "com.linkedin.pegasus2avro.common.urn.Urn"
         },
@@ -19511,6 +19531,13 @@
             "fieldName": "entityTypes"
           }
         },
+        "UrnValidation": {
+          "entityTypes": [
+            "entityType"
+          ],
+          "exist": true,
+          "strict": true
+        },
         "Urn": "Urn",
         "urn_is_array": true,
         "type": {

datahub/metadata/schemas/AssertionInfo.avsc CHANGED Viewed

@@ -2010,7 +2010,9 @@
                                               "fieldName": "glossaryTerms",
                                               "fieldType": "URN",
                                               "filterNameOverride": "Glossary Term",
-                                              "hasValuesFieldName": "hasGlossaryTerms"
+                                              "hasValuesFieldName": "hasGlossaryTerms",
+                                              "includeSystemModifiedAt": true,
+                                              "systemModifiedAtFieldName": "termsModifiedAt"
                                             },
                                             "java": {
                                               "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"

datahub/metadata/schemas/BusinessAttributeInfo.avsc CHANGED Viewed

@@ -221,7 +221,9 @@
         "/terms/*/urn": {
           "boostScore": 0.5,
           "fieldName": "editedFieldGlossaryTerms",
-          "fieldType": "URN"
+          "fieldType": "URN",
+          "includeSystemModifiedAt": true,
+          "systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
         }
       },
       "type": [
@@ -254,7 +256,9 @@
                         "fieldName": "glossaryTerms",
                         "fieldType": "URN",
                         "filterNameOverride": "Glossary Term",
-                        "hasValuesFieldName": "hasGlossaryTerms"
+                        "hasValuesFieldName": "hasGlossaryTerms",
+                        "includeSystemModifiedAt": true,
+                        "systemModifiedAtFieldName": "termsModifiedAt"
                       },
                       "java": {
                         "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"

datahub/metadata/schemas/BusinessAttributes.avsc CHANGED Viewed

@@ -31,6 +31,12 @@
           "namespace": "com.linkedin.pegasus2avro.businessattribute",
           "fields": [
             {
+              "Searchable": {
+                "fieldName": "schemaFieldBusinessAttribute",
+                "includeSystemModifiedAt": true,
+                "queryByDefault": false,
+                "systemModifiedAtFieldName": "schemaFieldBusinessAttributeModifiedAt"
+              },
               "java": {
                 "class": "com.linkedin.pegasus2avro.common.urn.BusinessAttributeUrn"
               },

datahub/metadata/schemas/ChartInfo.avsc CHANGED Viewed

@@ -39,6 +39,7 @@
     },
     {
       "Searchable": {
+        "boostScore": 10.0,
         "enableAutocomplete": true,
         "fieldNameAliases": [
           "_entityName"

datahub/metadata/schemas/EditableSchemaMetadata.avsc CHANGED Viewed

@@ -303,7 +303,9 @@
                 "/terms/*/urn": {
                   "boostScore": 0.5,
                   "fieldName": "editedFieldGlossaryTerms",
-                  "fieldType": "URN"
+                  "fieldType": "URN",
+                  "includeSystemModifiedAt": true,
+                  "systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
                 }
               },
               "type": [
@@ -336,7 +338,9 @@
                                 "fieldName": "glossaryTerms",
                                 "fieldType": "URN",
                                 "filterNameOverride": "Glossary Term",
-                                "hasValuesFieldName": "hasGlossaryTerms"
+                                "hasValuesFieldName": "hasGlossaryTerms",
+                                "includeSystemModifiedAt": true,
+                                "systemModifiedAtFieldName": "termsModifiedAt"
                               },
                               "java": {
                                 "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"

datahub/metadata/schemas/GlossaryTerms.avsc CHANGED Viewed

@@ -26,7 +26,9 @@
                 "fieldName": "glossaryTerms",
                 "fieldType": "URN",
                 "filterNameOverride": "Glossary Term",
-                "hasValuesFieldName": "hasGlossaryTerms"
+                "hasValuesFieldName": "hasGlossaryTerms",
+                "includeSystemModifiedAt": true,
+                "systemModifiedAtFieldName": "termsModifiedAt"
               },
               "java": {
                 "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"

datahub/metadata/schemas/InputFields.avsc CHANGED Viewed

@@ -553,7 +553,9 @@
                                         "fieldName": "glossaryTerms",
                                         "fieldType": "URN",
                                         "filterNameOverride": "Glossary Term",
-                                        "hasValuesFieldName": "hasGlossaryTerms"
+                                        "hasValuesFieldName": "hasGlossaryTerms",
+                                        "includeSystemModifiedAt": true,
+                                        "systemModifiedAtFieldName": "termsModifiedAt"
                                       },
                                       "java": {
                                         "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"

datahub/metadata/schemas/MetadataChangeEvent.avsc CHANGED Viewed

@@ -183,6 +183,7 @@
                       },
                       {
                         "Searchable": {
+                          "boostScore": 10.0,
                           "enableAutocomplete": true,
                           "fieldNameAliases": [
                             "_entityName"
@@ -994,7 +995,9 @@
                                   "fieldName": "glossaryTerms",
                                   "fieldType": "URN",
                                   "filterNameOverride": "Glossary Term",
-                                  "hasValuesFieldName": "hasGlossaryTerms"
+                                  "hasValuesFieldName": "hasGlossaryTerms",
+                                  "includeSystemModifiedAt": true,
+                                  "systemModifiedAtFieldName": "termsModifiedAt"
                                 },
                                 "java": {
                                   "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -4644,7 +4647,9 @@
                                   "/terms/*/urn": {
                                     "boostScore": 0.5,
                                     "fieldName": "editedFieldGlossaryTerms",
-                                    "fieldType": "URN"
+                                    "fieldType": "URN",
+                                    "includeSystemModifiedAt": true,
+                                    "systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
                                   }
                                 },
                                 "type": [

datahub/metadata/schemas/SchemaMetadata.avsc CHANGED Viewed

@@ -777,7 +777,9 @@
                                 "fieldName": "glossaryTerms",
                                 "fieldType": "URN",
                                 "filterNameOverride": "Glossary Term",
-                                "hasValuesFieldName": "hasGlossaryTerms"
+                                "hasValuesFieldName": "hasGlossaryTerms",
+                                "includeSystemModifiedAt": true,
+                                "systemModifiedAtFieldName": "termsModifiedAt"
                               },
                               "java": {
                                 "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"

datahub/metadata/schemas/StructuredPropertyDefinition.avsc CHANGED Viewed

@@ -23,6 +23,13 @@
       "doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
     },
     {
+      "UrnValidation": {
+        "entityTypes": [
+          "dataType"
+        ],
+        "exist": true,
+        "strict": true
+      },
       "java": {
         "class": "com.linkedin.pegasus2avro.common.urn.Urn"
       },
@@ -111,6 +118,13 @@
           "fieldName": "entityTypes"
         }
       },
+      "UrnValidation": {
+        "entityTypes": [
+          "entityType"
+        ],
+        "exist": true,
+        "strict": true
+      },
       "type": {
         "type": "array",
         "items": "string"

datahub/sql_parsing/schema_resolver.py CHANGED Viewed

@@ -13,7 +13,7 @@ from datahub.ingestion.graph.client import DataHubGraph
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
 from datahub.metadata.schema_classes import SchemaFieldClass, SchemaMetadataClass
 from datahub.metadata.urns import DataPlatformUrn
-from datahub.sql_parsing._models import _TableName as _TableName  # noqa: I250
+from datahub.sql_parsing._models import _TableName as _TableName
 from datahub.sql_parsing.sql_parsing_common import PLATFORMS_WITH_CASE_SENSITIVE_TABLES
 from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict
 from datahub.utilities.urns.field_paths import get_simple_field_path_from_v2_field_path

datahub/sql_parsing/sqlglot_lineage.py CHANGED Viewed

@@ -473,7 +473,7 @@ def _create_table_ddl_cll(
     return column_lineage
-def _select_statement_cll(  # noqa: C901
+def _select_statement_cll(
     statement: _SupportedColumnLineageTypes,
     dialect: sqlglot.Dialect,
     root_scope: sqlglot.optimizer.Scope,

datahub/testing/check_sql_parser_result.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import logging
-import os
 import pathlib
 from typing import Any, Dict, Optional
@@ -8,11 +7,10 @@ import deepdiff
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
 from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver
 from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult, sqlglot_lineage
+from datahub.testing.pytest_hooks import get_golden_settings
 logger = logging.getLogger(__name__)
-UPDATE_FILES = os.environ.get("UPDATE_SQLPARSER_FILES", "false").lower() == "true"
 def assert_sql_result_with_resolver(
     sql: str,
@@ -22,6 +20,8 @@ def assert_sql_result_with_resolver(
     allow_table_error: bool = False,
     **kwargs: Any,
 ) -> None:
+    settings = get_golden_settings()
     # HACK: Our BigQuery source overwrites this value and doesn't undo it.
     # As such, we need to handle that here.
     BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = "_yyyymmdd"
@@ -47,15 +47,14 @@ def assert_sql_result_with_resolver(
         )
     txt = res.json(indent=4)
-    if UPDATE_FILES:
+    if settings.update_golden:
         expected_file.write_text(txt)
         return
     if not expected_file.exists():
         expected_file.write_text(txt)
         raise AssertionError(
-            f"Expected file {expected_file} does not exist. "
-            "Created it with the expected output. Please verify it."
+            f"Missing expected golden file; run with --update-golden-files to create it: {expected_file}"
         )
     expected = SqlParsingResult.parse_raw(expected_file.read_text())

datahub/testing/compare_metadata_json.py CHANGED Viewed

@@ -16,6 +16,7 @@ from deepdiff import DeepDiff
 from datahub.ingestion.sink.file import write_metadata_file
 from datahub.ingestion.source.file import read_metadata_file
 from datahub.testing.mcp_diff import CannotCompareMCPs, MCPDiff, get_aspects_by_urn
+from datahub.testing.pytest_hooks import get_golden_settings
 logger = logging.getLogger(__name__)
@@ -40,26 +41,26 @@ def load_json_file(filename: Union[str, os.PathLike]) -> MetadataJson:
 def assert_metadata_files_equal(
     output_path: Union[str, os.PathLike],
     golden_path: Union[str, os.PathLike],
-    update_golden: bool,
-    copy_output: bool,
     ignore_paths: Sequence[str] = (),
     ignore_paths_v2: Sequence[str] = (),
     ignore_order: bool = True,
 ) -> None:
+    settings = get_golden_settings()
     golden_exists = os.path.isfile(golden_path)
-    if copy_output:
+    if settings.copy_output:
         shutil.copyfile(str(output_path), str(golden_path) + ".output")
         logger.info(f"Copied output file to {golden_path}.output")
-    if not update_golden and not golden_exists:
+    if not settings.update_golden and not golden_exists:
         raise FileNotFoundError(
             "Golden file does not exist. Please run with the --update-golden-files option to create."
         )
     output = load_json_file(output_path)
-    if update_golden and not golden_exists:
+    if settings.update_golden and not golden_exists:
         shutil.copyfile(str(output_path), str(golden_path))
         return
     else:
@@ -87,7 +88,7 @@ def assert_metadata_files_equal(
     ignore_paths = (*ignore_paths, *default_exclude_paths)
     diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order)
-    if diff and update_golden:
+    if diff and settings.update_golden:
         if isinstance(diff, MCPDiff) and diff.is_delta_valid:
             logger.info(f"Applying delta to golden file {golden_path}")
             diff.apply_delta(golden)

acryl-datahub 1.0.0rc1__py3-none-any.whl → 1.0.0rc3__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.0.0rc1py3-none-any.whl → 1.0.0rc3py3-none-any.whl