PyPI - acryl-datahub - Versions diffs - 1.0.0rc6__py3-none-any.whl → 1.0.0rc8__py3-none-any.whl - Mend

acryl-datahub 1.0.0rc6py3-none-any.whl → 1.0.0rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (74) hide show

{acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/METADATA +2490 -2490
{acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/RECORD +74 -74
{acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/WHEEL +1 -1
datahub/_version.py +1 -1
datahub/cli/docker_cli.py +1 -1
datahub/cli/iceberg_cli.py +1 -1
datahub/cli/lite_cli.py +4 -2
datahub/cli/specific/dataproduct_cli.py +1 -1
datahub/configuration/git.py +1 -3
datahub/configuration/kafka.py +1 -1
datahub/ingestion/fs/s3_fs.py +2 -2
datahub/ingestion/glossary/classification_mixin.py +1 -1
datahub/ingestion/graph/client.py +16 -7
datahub/ingestion/graph/entity_versioning.py +3 -3
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
datahub/ingestion/source/abs/config.py +2 -4
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
datahub/ingestion/source/bigquery_v2/bigquery_schema.py +1 -1
datahub/ingestion/source/cassandra/cassandra_api.py +2 -1
datahub/ingestion/source/csv_enricher.py +3 -3
datahub/ingestion/source/dbt/dbt_common.py +1 -1
datahub/ingestion/source/dremio/dremio_api.py +3 -3
datahub/ingestion/source/dremio/dremio_aspects.py +2 -1
datahub/ingestion/source/file.py +5 -2
datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
datahub/ingestion/source/ge_data_profiler.py +11 -14
datahub/ingestion/source/iceberg/iceberg.py +46 -12
datahub/ingestion/source/iceberg/iceberg_common.py +31 -20
datahub/ingestion/source/identity/okta.py +1 -3
datahub/ingestion/source/kafka/kafka.py +1 -1
datahub/ingestion/source/kafka_connect/source_connectors.py +4 -7
datahub/ingestion/source/looker/looker_file_loader.py +2 -2
datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
datahub/ingestion/source/looker/looker_template_language.py +4 -2
datahub/ingestion/source/looker/lookml_source.py +3 -2
datahub/ingestion/source/metabase.py +54 -32
datahub/ingestion/source/metadata/lineage.py +2 -2
datahub/ingestion/source/mode.py +1 -1
datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
datahub/ingestion/source/nifi.py +6 -3
datahub/ingestion/source/openapi_parser.py +2 -2
datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
datahub/ingestion/source/powerbi/powerbi.py +1 -3
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
datahub/ingestion/source/pulsar.py +2 -2
datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
datahub/ingestion/source/redash.py +2 -1
datahub/ingestion/source/s3/config.py +2 -4
datahub/ingestion/source/s3/source.py +20 -41
datahub/ingestion/source/salesforce.py +1 -1
datahub/ingestion/source/schema_inference/object.py +1 -1
datahub/ingestion/source/sigma/sigma.py +1 -1
datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
datahub/ingestion/source/sql/athena.py +2 -2
datahub/ingestion/source/sql/druid.py +1 -5
datahub/ingestion/source/sql/sql_common.py +2 -2
datahub/ingestion/source/sql/sql_types.py +2 -2
datahub/ingestion/source/sql/teradata.py +4 -2
datahub/ingestion/source/sql/trino.py +2 -2
datahub/ingestion/source/superset.py +65 -37
datahub/ingestion/source/tableau/tableau.py +3 -6
datahub/ingestion/source/tableau/tableau_common.py +2 -1
datahub/lite/duckdb_lite.py +5 -10
datahub/lite/lite_local.py +1 -1
datahub/lite/lite_util.py +4 -3
datahub/sdk/dataset.py +3 -3
datahub/utilities/memory_footprint.py +3 -2
{acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/LICENSE +0 -0
{acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/entry_points.txt +0 -0
{acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/metabase.py CHANGED Viewed

@@ -69,9 +69,19 @@ class MetabaseConfig(DatasetLineageProviderConfigBase, StatefulIngestionConfigBa
         default=None,
         description="optional URL to use in links (if `connect_uri` is only for ingestion)",
     )
-    username: Optional[str] = Field(default=None, description="Metabase username.")
+    username: Optional[str] = Field(
+        default=None,
+        description="Metabase username, used when an API key is not provided.",
+    )
     password: Optional[pydantic.SecretStr] = Field(
-        default=None, description="Metabase password."
+        default=None,
+        description="Metabase password, used when an API key is not provided.",
+    )
+    # https://www.metabase.com/learn/metabase-basics/administration/administration-and-operation/metabase-api#example-get-request
+    api_key: Optional[pydantic.SecretStr] = Field(
+        default=None,
+        description="Metabase API key. If provided, the username and password will be ignored. Recommended method.",
     )
     # TODO: Check and remove this if no longer needed.
     # Config database_alias is removed from sql sources.
@@ -178,30 +188,40 @@ class MetabaseSource(StatefulIngestionSourceBase):
         self.source_config: MetabaseConfig = config
     def setup_session(self) -> None:
-        login_response = requests.post(
-            f"{self.config.connect_uri}/api/session",
-            None,
-            {
-                "username": self.config.username,
-                "password": (
-                    self.config.password.get_secret_value()
-                    if self.config.password
-                    else None
-                ),
-            },
-        )
+        self.session = requests.session()
+        if self.config.api_key:
+            self.session.headers.update(
+                {
+                    "x-api-key": self.config.api_key.get_secret_value(),
+                    "Content-Type": "application/json",
+                    "Accept": "*/*",
+                }
+            )
+        else:
+            # If no API key is provided, generate a session token using username and password.
+            login_response = requests.post(
+                f"{self.config.connect_uri}/api/session",
+                None,
+                {
+                    "username": self.config.username,
+                    "password": (
+                        self.config.password.get_secret_value()
+                        if self.config.password
+                        else None
+                    ),
+                },
+            )
-        login_response.raise_for_status()
-        self.access_token = login_response.json().get("id", "")
+            login_response.raise_for_status()
+            self.access_token = login_response.json().get("id", "")
-        self.session = requests.session()
-        self.session.headers.update(
-            {
-                "X-Metabase-Session": f"{self.access_token}",
-                "Content-Type": "application/json",
-                "Accept": "*/*",
-            }
-        )
+            self.session.headers.update(
+                {
+                    "X-Metabase-Session": f"{self.access_token}",
+                    "Content-Type": "application/json",
+                    "Accept": "*/*",
+                }
+            )
         # Test the connection
         try:
@@ -217,15 +237,17 @@ class MetabaseSource(StatefulIngestionSourceBase):
             )
     def close(self) -> None:
-        response = requests.delete(
-            f"{self.config.connect_uri}/api/session",
-            headers={"X-Metabase-Session": self.access_token},
-        )
-        if response.status_code not in (200, 204):
-            self.report.report_failure(
-                title="Unable to Log User Out",
-                message=f"Unable to logout for user {self.config.username}",
+        # API key authentication does not require session closure.
+        if not self.config.api_key:
+            response = requests.delete(
+                f"{self.config.connect_uri}/api/session",
+                headers={"X-Metabase-Session": self.access_token},
             )
+            if response.status_code not in (200, 204):
+                self.report.report_failure(
+                    title="Unable to Log User Out",
+                    message=f"Unable to logout for user {self.config.username}",
+                )
         super().close()
     def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]:

datahub/ingestion/source/metadata/lineage.py CHANGED Viewed

@@ -104,8 +104,8 @@ class FineGrainedLineageConfig(ConfigModel):
 class EntityNodeConfig(ConfigModel):
     entity: EntityConfig
-    upstream: Optional[List["EntityNodeConfig"]]
-    fineGrainedLineages: Optional[List[FineGrainedLineageConfig]]
+    upstream: Optional[List["EntityNodeConfig"]] = None
+    fineGrainedLineages: Optional[List[FineGrainedLineageConfig]] = None
 # https://pydantic-docs.helpmanual.io/usage/postponed_annotations/ required for when you reference a model within itself

datahub/ingestion/source/mode.py CHANGED Viewed

@@ -1494,7 +1494,7 @@ class ModeSource(StatefulIngestionSourceBase):
                     sleep_time = error_response.headers.get("retry-after")
                     if sleep_time is not None:
                         time.sleep(float(sleep_time))
-                    raise HTTPError429
+                    raise HTTPError429 from None
                 raise http_error

datahub/ingestion/source/neo4j/neo4j_source.py CHANGED Viewed

@@ -292,7 +292,7 @@ class Neo4jSource(StatefulIngestionSourceBase):
         return record["properties"]
     def get_relationships(self, record: dict) -> dict:
-        return record.get("relationships", None)
+        return record.get("relationships", {})
     def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         return [

datahub/ingestion/source/nifi.py CHANGED Viewed

@@ -1234,11 +1234,14 @@ class NifiSource(StatefulIngestionSourceBase):
         job_type: str,
         description: Optional[str],
         job_properties: Optional[Dict[str, str]] = None,
-        inlets: List[str] = [],
-        outlets: List[str] = [],
-        inputJobs: List[str] = [],
+        inlets: Optional[List[str]] = None,
+        outlets: Optional[List[str]] = None,
+        inputJobs: Optional[List[str]] = None,
         status: Optional[str] = None,
     ) -> Iterable[MetadataWorkUnit]:
+        inlets = inlets or []
+        outlets = outlets or []
+        inputJobs = inputJobs or []
         logger.debug(f"Begining construction of job workunit for {job_urn}")
         if job_properties:
             job_properties = {k: v for k, v in job_properties.items() if v is not None}

datahub/ingestion/source/openapi_parser.py CHANGED Viewed

@@ -167,7 +167,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
     Try to determine if example data is defined for the endpoint, and return it
     """
     data = {}
-    if "content" in base_res.keys():
+    if "content" in base_res:
         res_cont = base_res["content"]
         if "application/json" in res_cont.keys():
             ex_field = None
@@ -188,7 +188,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
                 )
         elif "text/csv" in res_cont.keys():
             data = res_cont["text/csv"]["schema"]
-    elif "examples" in base_res.keys():
+    elif "examples" in base_res:
         data = base_res["examples"]["application/json"]
     return data

datahub/ingestion/source/powerbi/m_query/parser.py CHANGED Viewed

@@ -2,7 +2,7 @@ import functools
 import importlib.resources as pkg_resource
 import logging
 import os
-from typing import Dict, List
+from typing import Dict, List, Optional
 import lark
 from lark import Lark, Tree
@@ -65,8 +65,9 @@ def get_upstream_tables(
     platform_instance_resolver: AbstractDataPlatformInstanceResolver,
     ctx: PipelineContext,
     config: PowerBiDashboardSourceConfig,
-    parameters: Dict[str, str] = {},
+    parameters: Optional[Dict[str, str]] = None,
 ) -> List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage]:
+    parameters = parameters or {}
     if table.expression is None:
         logger.debug(f"There is no M-Query expression in table {table.full_name}")
         return []

datahub/ingestion/source/powerbi/m_query/tree_function.py CHANGED Viewed

@@ -70,13 +70,14 @@ def get_first_rule(tree: Tree, rule: str) -> Optional[Tree]:
     return expression_tree
-def token_values(tree: Tree, parameters: Dict[str, str] = {}) -> List[str]:
+def token_values(tree: Tree, parameters: Optional[Dict[str, str]] = None) -> List[str]:
     """
     :param tree: Tree to traverse
     :param parameters: If parameters is not an empty dict, it will try to resolve identifier variable references
                        using the values in 'parameters'.
     :return: List of leaf token data
     """
+    parameters = parameters or {}
     values: List[str] = []
     def internal(node: Union[Tree, Token]) -> None:

datahub/ingestion/source/powerbi/powerbi.py CHANGED Viewed

@@ -890,9 +890,7 @@ class Mapper:
                         set(user_rights) & set(self.__config.ownership.owner_criteria)
                     )
                     > 0
-                ):
-                    user_mcps.extend(self.to_datahub_user(user))
-                elif self.__config.ownership.owner_criteria is None:
+                ) or self.__config.ownership.owner_criteria is None:
                     user_mcps.extend(self.to_datahub_user(user))
                 else:
                     continue

datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py CHANGED Viewed

@@ -380,8 +380,9 @@ class DataResolverBase(ABC):
     def itr_pages(
         self,
         endpoint: str,
-        parameter_override: Dict = {},
+        parameter_override: Optional[Dict] = None,
     ) -> Iterator[List[Dict]]:
+        parameter_override = parameter_override or {}
         params: dict = {
             "$skip": 0,
             "$top": self.TOP,

datahub/ingestion/source/powerbi_report_server/report_server.py CHANGED Viewed

@@ -196,7 +196,7 @@ class PowerBiReportServerAPI:
         }
         reports: List[Any] = []
-        for report_type in report_types_mapping.keys():
+        for report_type in report_types_mapping:
             report_get_endpoint: str = API_ENDPOINTS[report_type]
             # Replace place holders
             report_get_endpoint_http = report_get_endpoint.format(

datahub/ingestion/source/pulsar.py CHANGED Viewed

@@ -230,8 +230,8 @@ class PulsarSource(StatefulIngestionSourceBase):
                 self.report.report_warning("HTTPError", message)
         except requests.exceptions.RequestException as e:
             raise Exception(
-                f"An ambiguous exception occurred while handling the request: {e}"
-            )
+                "An ambiguous exception occurred while handling the request"
+            ) from e
     @classmethod
     def create(cls, config_dict, ctx):

datahub/ingestion/source/qlik_sense/websocket_connection.py CHANGED Viewed

@@ -17,8 +17,9 @@ class WebsocketConnection:
         self.handle = [-1]
     def _build_websocket_request_dict(
-        self, method: str, params: Union[Dict, List] = {}
+        self, method: str, params: Optional[Union[Dict, List]] = None
     ) -> Dict:
+        params = params or {}
         return {
             "jsonrpc": "2.0",
             "id": self.request_id,
@@ -37,11 +38,12 @@ class WebsocketConnection:
         return {}
     def websocket_send_request(
-        self, method: str, params: Union[Dict, List] = {}
+        self, method: str, params: Optional[Union[Dict, List]] = None
     ) -> Dict:
         """
         Method to send request to websocket
         """
+        params = params or {}
         self.request_id += 1
         request = self._build_websocket_request_dict(method, params)
         response = self._send_request(request=request)

datahub/ingestion/source/redash.py CHANGED Viewed

@@ -421,8 +421,9 @@ class RedashSource(StatefulIngestionSourceBase):
         return database_name
     def _get_datasource_urns(
-        self, data_source: Dict, sql_query_data: Dict = {}
+        self, data_source: Dict, sql_query_data: Optional[Dict] = None
     ) -> Optional[List[str]]:
+        sql_query_data = sql_query_data or {}
         platform = self._get_platform_based_on_datasource(data_source)
         database_name = self._get_database_name_based_on_datasource(data_source)
         data_source_syntax = data_source.get("syntax")

datahub/ingestion/source/s3/config.py CHANGED Viewed

@@ -154,10 +154,8 @@ class DataLakeSourceConfig(
         return path_specs
     @pydantic.validator("platform", always=True)
-    def platform_valid(cls, platform: str, values: dict) -> str:
-        inferred_platform = values.get(
-            "platform", None
-        )  # we may have inferred it above
+    def platform_valid(cls, platform: Any, values: dict) -> str:
+        inferred_platform = values.get("platform")  # we may have inferred it above
         platform = platform or inferred_platform
         if not platform:
             raise ValueError("platform must not be empty")

datahub/ingestion/source/s3/source.py CHANGED Viewed

@@ -834,7 +834,7 @@ class S3Source(StatefulIngestionSourceBase):
                         min=min,
                     )
                     folders.extend(folders_list)
-                    if not path_spec.traversal_method == FolderTraversalMethod.ALL:
+                    if path_spec.traversal_method != FolderTraversalMethod.ALL:
                         return folders
             if folders:
                 return folders
@@ -847,7 +847,7 @@ class S3Source(StatefulIngestionSourceBase):
         path_spec: PathSpec,
         bucket: "Bucket",
         prefix: str,
-    ) -> List[Folder]:
+    ) -> Iterable[Folder]:
         """
         Retrieves all the folders in a path by listing all the files in the prefix.
         If the prefix is a full path then only that folder will be extracted.
@@ -877,51 +877,30 @@ class S3Source(StatefulIngestionSourceBase):
         s3_objects = (
             obj
             for obj in bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
-            if _is_allowed_path(path_spec, f"s3://{obj.bucket_name}/{obj.key}")
+            if _is_allowed_path(
+                path_spec, self.create_s3_path(obj.bucket_name, obj.key)
+            )
         )
-        partitions: List[Folder] = []
         grouped_s3_objects_by_dirname = groupby_unsorted(
             s3_objects,
             key=lambda obj: obj.key.rsplit("/", 1)[0],
         )
-        for key, group in grouped_s3_objects_by_dirname:
-            file_size = 0
-            creation_time = None
-            modification_time = None
-            for item in group:
-                file_size += item.size
-                if creation_time is None or item.last_modified < creation_time:
-                    creation_time = item.last_modified
-                if modification_time is None or item.last_modified > modification_time:
-                    modification_time = item.last_modified
-                    max_file = item
-            if modification_time is None:
-                logger.warning(
-                    f"Unable to find any files in the folder {key}. Skipping..."
-                )
-                continue
-            id = path_spec.get_partition_from_path(
-                self.create_s3_path(max_file.bucket_name, max_file.key)
+        for _, group in grouped_s3_objects_by_dirname:
+            max_file = max(group, key=lambda x: x.last_modified)
+            max_file_s3_path = self.create_s3_path(max_file.bucket_name, max_file.key)
+            # If partition_id is None, it means the folder is not a partition
+            partition_id = path_spec.get_partition_from_path(max_file_s3_path)
+            yield Folder(
+                partition_id=partition_id,
+                is_partition=bool(partition_id),
+                creation_time=min(obj.last_modified for obj in group),
+                modification_time=max_file.last_modified,
+                sample_file=max_file_s3_path,
+                size=sum(obj.size for obj in group),
             )
-            # If id is None, it means the folder is not a partition
-            partitions.append(
-                Folder(
-                    partition_id=id,
-                    is_partition=bool(id),
-                    creation_time=creation_time if creation_time else None,  # type: ignore[arg-type]
-                    modification_time=modification_time,
-                    sample_file=self.create_s3_path(max_file.bucket_name, max_file.key),
-                    size=file_size,
-                )
-            )
-        return partitions
     def s3_browser(self, path_spec: PathSpec, sample_size: int) -> Iterable[BrowsePath]:
         if self.source_config.aws_config is None:
             raise ValueError("aws_config not set. Cannot browse s3")
@@ -1000,7 +979,7 @@ class S3Source(StatefulIngestionSourceBase):
                                     min=True,
                                 )
                                 dirs_to_process.append(dirs_to_process_min[0])
-                        folders = []
+                        folders: List[Folder] = []
                         for dir in dirs_to_process:
                             logger.info(f"Getting files from folder: {dir}")
                             prefix_to_process = urlparse(dir).path.lstrip("/")

datahub/ingestion/source/salesforce.py CHANGED Viewed

@@ -615,7 +615,7 @@ class SalesforceSource(StatefulIngestionSourceBase):
             prefix = "\\" if text.startswith("#") else ""
             desc += f"\n\n{prefix}{text}"
-        text = field.get("InlineHelpText", None)
+        text = field.get("InlineHelpText")
         if text:
             prefix = "\\" if text.startswith("#") else ""
             desc += f"\n\n{prefix}{text}"

datahub/ingestion/source/schema_inference/object.py CHANGED Viewed

@@ -149,7 +149,7 @@ def construct_schema(
     extended_schema: Dict[Tuple[str, ...], SchemaDescription] = {}
-    for field_path in schema.keys():
+    for field_path in schema:
         field_types = schema[field_path]["types"]
         field_type: Union[str, type] = "mixed"

datahub/ingestion/source/sigma/sigma.py CHANGED Viewed

@@ -124,7 +124,7 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
         try:
             self.sigma_api = SigmaAPI(self.config, self.reporter)
         except Exception as e:
-            raise ConfigurationError(f"Unable to connect sigma API. Exception: {e}")
+            raise ConfigurationError("Unable to connect sigma API") from e
     @staticmethod
     def test_connection(config_dict: dict) -> TestConnectionReport:

datahub/ingestion/source/snowflake/snowflake_connection.py CHANGED Viewed

@@ -125,7 +125,7 @@ class SnowflakeConnectionConfig(ConfigModel):
     @pydantic.validator("authentication_type", always=True)
     def authenticator_type_is_valid(cls, v, values):
-        if v not in _VALID_AUTH_TYPES.keys():
+        if v not in _VALID_AUTH_TYPES:
             raise ValueError(
                 f"unsupported authenticator type '{v}' was provided,"
                 f" use one of {list(_VALID_AUTH_TYPES.keys())}"
@@ -312,7 +312,7 @@ class SnowflakeConnectionConfig(ConfigModel):
             raise ValueError(
                 f"access_token not found in response {response}. "
                 "Please check your OAuth configuration."
-            )
+            ) from None
         connect_args = self.get_options()["connect_args"]
         return snowflake.connector.connect(
             user=self.username,

datahub/ingestion/source/snowflake/snowflake_v2.py CHANGED Viewed

@@ -439,7 +439,7 @@ class SnowflakeV2Source(
                     failure_reason=failure_message,
                 )
-            if c in _report.keys():
+            if c in _report:
                 continue
             # If some capabilities are missing, then mark them as not capable

datahub/ingestion/source/sql/athena.py CHANGED Viewed

@@ -396,7 +396,7 @@ class AthenaSource(SQLAlchemySource):
             metadata.table_type if metadata.table_type else ""
         )
-        location: Optional[str] = custom_properties.get("location", None)
+        location: Optional[str] = custom_properties.get("location")
         if location is not None:
             if location.startswith("s3://"):
                 location = make_s3_urn(location, self.config.env)
@@ -538,7 +538,7 @@ class AthenaSource(SQLAlchemySource):
             column_name=column["name"],
             column_type=column["type"],
             inspector=inspector,
-            description=column.get("comment", None),
+            description=column.get("comment"),
             nullable=column.get("nullable", True),
             is_part_of_key=(
                 True

datahub/ingestion/source/sql/druid.py CHANGED Viewed

@@ -50,11 +50,7 @@ class DruidConfig(BasicSQLAlchemyConfig):
     """
     def get_identifier(self, schema: str, table: str) -> str:
-        return (
-            f"{self.platform_instance}.{table}"
-            if self.platform_instance
-            else f"{table}"
-        )
+        return f"{table}"
 @platform_name("Druid")

datahub/ingestion/source/sql/sql_common.py CHANGED Viewed

@@ -204,7 +204,7 @@ def get_column_type(
     """
     TypeClass: Optional[Type] = None
-    for sql_type in _field_type_mapping.keys():
+    for sql_type in _field_type_mapping:
         if isinstance(column_type, sql_type):
             TypeClass = _field_type_mapping[sql_type]
             break
@@ -973,7 +973,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
                     inspector=inspector,
                 )
             ),
-            description=column.get("comment", None),
+            description=column.get("comment"),
             nullable=column["nullable"],
             recursive=False,
             globalTags=gtc,

datahub/ingestion/source/sql/sql_types.py CHANGED Viewed

@@ -317,10 +317,10 @@ def resolve_snowflake_modified_type(type_string: str) -> Any:
     match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string)
     if match:
         modified_type_base = match.group(1)  # Extract the base type
-        return SNOWFLAKE_TYPES_MAP.get(modified_type_base, None)
+        return SNOWFLAKE_TYPES_MAP.get(modified_type_base)
     # Fallback for types without precision/scale
-    return SNOWFLAKE_TYPES_MAP.get(type_string, None)
+    return SNOWFLAKE_TYPES_MAP.get(type_string)
 # see https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_types.py#L32

datahub/ingestion/source/sql/teradata.py CHANGED Viewed

@@ -180,10 +180,11 @@ def optimized_get_columns(
     connection: Connection,
     table_name: str,
     schema: Optional[str] = None,
-    tables_cache: MutableMapping[str, List[TeradataTable]] = {},
+    tables_cache: Optional[MutableMapping[str, List[TeradataTable]]] = None,
     use_qvci: bool = False,
     **kw: Dict[str, Any],
 ) -> List[Dict]:
+    tables_cache = tables_cache or {}
     if schema is None:
         schema = self.default_schema_name
@@ -314,9 +315,10 @@ def optimized_get_view_definition(
     connection: Connection,
     view_name: str,
     schema: Optional[str] = None,
-    tables_cache: MutableMapping[str, List[TeradataTable]] = {},
+    tables_cache: Optional[MutableMapping[str, List[TeradataTable]]] = None,
     **kw: Dict[str, Any],
 ) -> Optional[str]:
+    tables_cache = tables_cache or {}
     if schema is None:
         schema = self.default_schema_name

datahub/ingestion/source/sql/trino.py CHANGED Viewed

@@ -142,7 +142,7 @@ def get_table_comment(self, connection, table_name: str, schema: str = None, **k
                     if col_value is not None:
                         properties[col_name] = col_value
-            return {"text": properties.get("comment", None), "properties": properties}
+            return {"text": properties.get("comment"), "properties": properties}
         else:
             return self.get_table_comment_default(connection, table_name, schema)
     except Exception:
@@ -483,7 +483,7 @@ def _parse_struct_fields(parts):
 def _parse_basic_datatype(s):
-    for sql_type in _all_atomic_types.keys():
+    for sql_type in _all_atomic_types:
         if isinstance(s, sql_type):
             return {
                 "type": _all_atomic_types[sql_type],

acryl-datahub 1.0.0rc6__py3-none-any.whl → 1.0.0rc8__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.0.0rc6py3-none-any.whl → 1.0.0rc8py3-none-any.whl