PyPI - acryl-datahub - Versions diffs - 1.0.0rc7__py3-none-any.whl → 1.0.0rc8__py3-none-any.whl - Mend

acryl-datahub 1.0.0rc7py3-none-any.whl → 1.0.0rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (52) hide show

datahub/ingestion/source/iceberg/iceberg.py CHANGED Viewed

@@ -2,8 +2,9 @@ import json
 import logging
 import threading
 import uuid
-from typing import Any, Dict, Iterable, List, Optional
+from typing import Any, Dict, Iterable, List, Optional, Tuple
+from dateutil import parser as dateutil_parser
 from pyiceberg.catalog import Catalog
 from pyiceberg.exceptions import (
     NoSuchIcebergTableError,
@@ -81,6 +82,7 @@ from datahub.metadata.schema_classes import (
     OwnerClass,
     OwnershipClass,
     OwnershipTypeClass,
+    TimeStampClass,
 )
 from datahub.utilities.perf_timer import PerfTimer
 from datahub.utilities.threaded_iterator_executor import ThreadedIteratorExecutor
@@ -183,16 +185,9 @@ class IcebergSource(StatefulIngestionSourceBase):
     def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
         thread_local = threading.local()
-        def _process_dataset(dataset_path: Identifier) -> Iterable[MetadataWorkUnit]:
-            LOGGER.debug(f"Processing dataset for path {dataset_path}")
-            dataset_name = ".".join(dataset_path)
-            if not self.config.table_pattern.allowed(dataset_name):
-                # Dataset name is rejected by pattern, report as dropped.
-                self.report.report_dropped(dataset_name)
-                LOGGER.debug(
-                    f"Skipping table {dataset_name} due to not being allowed by the config pattern"
-                )
-                return
+        def _try_processing_dataset(
+            dataset_path: Tuple[str, ...], dataset_name: str
+        ) -> Iterable[MetadataWorkUnit]:
             try:
                 if not hasattr(thread_local, "local_catalog"):
                     LOGGER.debug(
@@ -248,10 +243,31 @@ class IcebergSource(StatefulIngestionSourceBase):
                 LOGGER.warning(
                     f"Iceberg Rest Catalog server error (500 status) encountered when processing table {dataset_path}, skipping it."
                 )
+            except ValueError as e:
+                if "Could not initialize FileIO" not in str(e):
+                    raise
+                self.report.warning(
+                    "Could not initialize FileIO",
+                    f"Could not initialize FileIO for {dataset_path} due to: {e}",
+                )
+        def _process_dataset(dataset_path: Identifier) -> Iterable[MetadataWorkUnit]:
+            try:
+                LOGGER.debug(f"Processing dataset for path {dataset_path}")
+                dataset_name = ".".join(dataset_path)
+                if not self.config.table_pattern.allowed(dataset_name):
+                    # Dataset name is rejected by pattern, report as dropped.
+                    self.report.report_dropped(dataset_name)
+                    LOGGER.debug(
+                        f"Skipping table {dataset_name} due to not being allowed by the config pattern"
+                    )
+                    return
+                yield from _try_processing_dataset(dataset_path, dataset_name)
             except Exception as e:
                 self.report.report_failure(
                     "general",
-                    f"Failed to create workunit for dataset {dataset_name}: {e}",
+                    f"Failed to create workunit for dataset {dataset_path}: {e}",
                 )
                 LOGGER.exception(
                     f"Exception while processing table {dataset_path}, skipping it.",
@@ -288,6 +304,7 @@ class IcebergSource(StatefulIngestionSourceBase):
             )
             # Dataset properties aspect.
+            additional_properties = {}
             custom_properties = table.metadata.properties.copy()
             custom_properties["location"] = table.metadata.location
             custom_properties["format-version"] = str(table.metadata.format_version)
@@ -299,10 +316,27 @@ class IcebergSource(StatefulIngestionSourceBase):
                 custom_properties["manifest-list"] = (
                     table.current_snapshot().manifest_list
                 )
+                additional_properties["lastModified"] = TimeStampClass(
+                    int(table.current_snapshot().timestamp_ms)
+                )
+            if "created-at" in custom_properties:
+                try:
+                    dt = dateutil_parser.isoparse(custom_properties["created-at"])
+                    additional_properties["created"] = TimeStampClass(
+                        int(dt.timestamp() * 1000)
+                    )
+                except Exception as ex:
+                    LOGGER.warning(
+                        f"Exception while trying to parse creation date {custom_properties['created-at']}, ignoring: {ex}"
+                    )
             dataset_properties = DatasetPropertiesClass(
                 name=table.name()[-1],
                 description=table.metadata.properties.get("comment", None),
                 customProperties=custom_properties,
+                lastModified=additional_properties.get("lastModified"),
+                created=additional_properties.get("created"),
+                qualifiedName=dataset_name,
             )
             dataset_snapshot.aspects.append(dataset_properties)
             # Dataset ownership aspect.

datahub/ingestion/source/iceberg/iceberg_common.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import logging
+import threading
 from dataclasses import dataclass, field
 from typing import Any, Dict, Optional
@@ -156,18 +157,21 @@ class TopTableTimings:
     def __init__(self, size: int = 10):
         self._size = size
         self.top_entites = SortedList(key=lambda x: -x.get(self._VALUE_FIELD, 0))
+        self._lock = threading.Lock()
     def add(self, entity: Dict[str, Any]) -> None:
         if self._VALUE_FIELD not in entity:
             return
-        self.top_entites.add(entity)
-        if len(self.top_entites) > self._size:
-            self.top_entites.pop()
+        with self._lock:
+            self.top_entites.add(entity)
+            if len(self.top_entites) > self._size:
+                self.top_entites.pop()
     def __str__(self) -> str:
-        if len(self.top_entites) == 0:
-            return "no timings reported"
-        return str(list(self.top_entites))
+        with self._lock:
+            if len(self.top_entites) == 0:
+                return "no timings reported"
+            return str(list(self.top_entites))
 class TimingClass:
@@ -175,24 +179,31 @@ class TimingClass:
     def __init__(self):
         self.times = SortedList()
+        self._lock = threading.Lock()
     def add_timing(self, t: float) -> None:
-        self.times.add(t)
+        with self._lock:
+            self.times.add(t)
     def __str__(self) -> str:
-        if len(self.times) == 0:
-            return "no timings reported"
-        total = sum(self.times)
-        avg = total / len(self.times)
-        return str(
-            {
-                "average_time": format_timespan(avg, detailed=True, max_units=3),
-                "min_time": format_timespan(self.times[0], detailed=True, max_units=3),
-                "max_time": format_timespan(self.times[-1], detailed=True, max_units=3),
-                # total_time does not provide correct information in case we run in more than 1 thread
-                "total_time": format_timespan(total, detailed=True, max_units=3),
-            }
-        )
+        with self._lock:
+            if len(self.times) == 0:
+                return "no timings reported"
+            total = sum(self.times)
+            avg = total / len(self.times)
+            return str(
+                {
+                    "average_time": format_timespan(avg, detailed=True, max_units=3),
+                    "min_time": format_timespan(
+                        self.times[0], detailed=True, max_units=3
+                    ),
+                    "max_time": format_timespan(
+                        self.times[-1], detailed=True, max_units=3
+                    ),
+                    # total_time does not provide correct information in case we run in more than 1 thread
+                    "total_time": format_timespan(total, detailed=True, max_units=3),
+                }
+            )
 @dataclass

datahub/ingestion/source/identity/okta.py CHANGED Viewed

@@ -568,9 +568,7 @@ class OktaSource(StatefulIngestionSourceBase):
         if (
             self.config.include_deprovisioned_users is False
             and okta_user.status == UserStatus.DEPROVISIONED
-        ):
-            return False
-        elif (
+        ) or (
             self.config.include_suspended_users is False
             and okta_user.status == UserStatus.SUSPENDED
         ):

datahub/ingestion/source/kafka_connect/source_connectors.py CHANGED Viewed

@@ -447,13 +447,10 @@ class DebeziumSourceConnector(BaseConnector):
     ) -> DebeziumParser:
         connector_class = connector_manifest.config.get(CONNECTOR_CLASS, "")
-        if connector_class == "io.debezium.connector.mysql.MySqlConnector":
-            parser = self.DebeziumParser(
-                source_platform="mysql",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=None,
-            )
-        elif connector_class == "MySqlConnector":
+        if (
+            connector_class == "io.debezium.connector.mysql.MySqlConnector"
+            or connector_class == "MySqlConnector"
+        ):
             parser = self.DebeziumParser(
                 source_platform="mysql",
                 server_name=self.get_server_name(connector_manifest),

datahub/ingestion/source/looker/looker_lib_wrapper.py CHANGED Viewed

@@ -205,8 +205,9 @@ class LookerAPI:
     def folder_ancestors(
         self,
         folder_id: str,
-        fields: Union[str, List[str]] = ["id", "name", "parent_id"],
+        fields: Optional[Union[str, List[str]]] = None,
     ) -> Sequence[Folder]:
+        fields = fields or ["id", "name", "parent_id"]
         self.client_stats.folder_calls += 1
         try:
             return self.client.folder_ancestors(

datahub/ingestion/source/looker/looker_template_language.py CHANGED Viewed

@@ -464,9 +464,10 @@ def process_lookml_template_language(
     source_config: LookMLSourceConfig,
     view_lkml_file_dict: dict,
     reporter: LookMLSourceReport,
-    manifest_constants: Dict[str, "LookerConstant"] = {},
+    manifest_constants: Optional[Dict[str, "LookerConstant"]] = None,
     resolve_constants: bool = False,
 ) -> None:
+    manifest_constants = manifest_constants or {}
     if "views" not in view_lkml_file_dict:
         return
@@ -507,9 +508,10 @@ def load_and_preprocess_file(
     path: Union[str, pathlib.Path],
     source_config: LookMLSourceConfig,
     reporter: LookMLSourceReport,
-    manifest_constants: Dict[str, "LookerConstant"] = {},
+    manifest_constants: Optional[Dict[str, "LookerConstant"]] = None,
     resolve_constants: bool = False,
 ) -> dict:
+    manifest_constants = manifest_constants or {}
     parsed = load_lkml(path)
     process_lookml_template_language(

datahub/ingestion/source/looker/lookml_source.py CHANGED Viewed

@@ -1006,8 +1006,9 @@ class LookMLSource(StatefulIngestionSourceBase):
     def report_skipped_unreachable_views(
         self,
         viewfile_loader: LookerViewFileLoader,
-        processed_view_map: Dict[str, Set[str]] = {},
+        processed_view_map: Optional[Dict[str, Set[str]]] = None,
     ) -> None:
+        processed_view_map = processed_view_map or {}
         view_files: Dict[str, List[pathlib.Path]] = {}
         for project, folder_path in self.base_projects_folder.items():
             folder = pathlib.Path(folder_path)

datahub/ingestion/source/metadata/lineage.py CHANGED Viewed

@@ -104,8 +104,8 @@ class FineGrainedLineageConfig(ConfigModel):
 class EntityNodeConfig(ConfigModel):
     entity: EntityConfig
-    upstream: Optional[List["EntityNodeConfig"]]
-    fineGrainedLineages: Optional[List[FineGrainedLineageConfig]]
+    upstream: Optional[List["EntityNodeConfig"]] = None
+    fineGrainedLineages: Optional[List[FineGrainedLineageConfig]] = None
 # https://pydantic-docs.helpmanual.io/usage/postponed_annotations/ required for when you reference a model within itself

datahub/ingestion/source/neo4j/neo4j_source.py CHANGED Viewed

@@ -292,7 +292,7 @@ class Neo4jSource(StatefulIngestionSourceBase):
         return record["properties"]
     def get_relationships(self, record: dict) -> dict:
-        return record.get("relationships", None)
+        return record.get("relationships", {})
     def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         return [

datahub/ingestion/source/nifi.py CHANGED Viewed

@@ -1234,11 +1234,14 @@ class NifiSource(StatefulIngestionSourceBase):
         job_type: str,
         description: Optional[str],
         job_properties: Optional[Dict[str, str]] = None,
-        inlets: List[str] = [],
-        outlets: List[str] = [],
-        inputJobs: List[str] = [],
+        inlets: Optional[List[str]] = None,
+        outlets: Optional[List[str]] = None,
+        inputJobs: Optional[List[str]] = None,
         status: Optional[str] = None,
     ) -> Iterable[MetadataWorkUnit]:
+        inlets = inlets or []
+        outlets = outlets or []
+        inputJobs = inputJobs or []
         logger.debug(f"Begining construction of job workunit for {job_urn}")
         if job_properties:
             job_properties = {k: v for k, v in job_properties.items() if v is not None}

datahub/ingestion/source/openapi_parser.py CHANGED Viewed

@@ -167,7 +167,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
     Try to determine if example data is defined for the endpoint, and return it
     """
     data = {}
-    if "content" in base_res.keys():
+    if "content" in base_res:
         res_cont = base_res["content"]
         if "application/json" in res_cont.keys():
             ex_field = None
@@ -188,7 +188,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
                 )
         elif "text/csv" in res_cont.keys():
             data = res_cont["text/csv"]["schema"]
-    elif "examples" in base_res.keys():
+    elif "examples" in base_res:
         data = base_res["examples"]["application/json"]
     return data

datahub/ingestion/source/powerbi/m_query/parser.py CHANGED Viewed

@@ -2,7 +2,7 @@ import functools
 import importlib.resources as pkg_resource
 import logging
 import os
-from typing import Dict, List
+from typing import Dict, List, Optional
 import lark
 from lark import Lark, Tree
@@ -65,8 +65,9 @@ def get_upstream_tables(
     platform_instance_resolver: AbstractDataPlatformInstanceResolver,
     ctx: PipelineContext,
     config: PowerBiDashboardSourceConfig,
-    parameters: Dict[str, str] = {},
+    parameters: Optional[Dict[str, str]] = None,
 ) -> List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage]:
+    parameters = parameters or {}
     if table.expression is None:
         logger.debug(f"There is no M-Query expression in table {table.full_name}")
         return []

datahub/ingestion/source/powerbi/m_query/tree_function.py CHANGED Viewed

@@ -70,13 +70,14 @@ def get_first_rule(tree: Tree, rule: str) -> Optional[Tree]:
     return expression_tree
-def token_values(tree: Tree, parameters: Dict[str, str] = {}) -> List[str]:
+def token_values(tree: Tree, parameters: Optional[Dict[str, str]] = None) -> List[str]:
     """
     :param tree: Tree to traverse
     :param parameters: If parameters is not an empty dict, it will try to resolve identifier variable references
                        using the values in 'parameters'.
     :return: List of leaf token data
     """
+    parameters = parameters or {}
     values: List[str] = []
     def internal(node: Union[Tree, Token]) -> None:

datahub/ingestion/source/powerbi/powerbi.py CHANGED Viewed

@@ -890,9 +890,7 @@ class Mapper:
                         set(user_rights) & set(self.__config.ownership.owner_criteria)
                     )
                     > 0
-                ):
-                    user_mcps.extend(self.to_datahub_user(user))
-                elif self.__config.ownership.owner_criteria is None:
+                ) or self.__config.ownership.owner_criteria is None:
                     user_mcps.extend(self.to_datahub_user(user))
                 else:
                     continue

datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py CHANGED Viewed

@@ -380,8 +380,9 @@ class DataResolverBase(ABC):
     def itr_pages(
         self,
         endpoint: str,
-        parameter_override: Dict = {},
+        parameter_override: Optional[Dict] = None,
     ) -> Iterator[List[Dict]]:
+        parameter_override = parameter_override or {}
         params: dict = {
             "$skip": 0,
             "$top": self.TOP,

datahub/ingestion/source/powerbi_report_server/report_server.py CHANGED Viewed

@@ -196,7 +196,7 @@ class PowerBiReportServerAPI:
         }
         reports: List[Any] = []
-        for report_type in report_types_mapping.keys():
+        for report_type in report_types_mapping:
             report_get_endpoint: str = API_ENDPOINTS[report_type]
             # Replace place holders
             report_get_endpoint_http = report_get_endpoint.format(

datahub/ingestion/source/qlik_sense/websocket_connection.py CHANGED Viewed

@@ -17,8 +17,9 @@ class WebsocketConnection:
         self.handle = [-1]
     def _build_websocket_request_dict(
-        self, method: str, params: Union[Dict, List] = {}
+        self, method: str, params: Optional[Union[Dict, List]] = None
     ) -> Dict:
+        params = params or {}
         return {
             "jsonrpc": "2.0",
             "id": self.request_id,
@@ -37,11 +38,12 @@ class WebsocketConnection:
         return {}
     def websocket_send_request(
-        self, method: str, params: Union[Dict, List] = {}
+        self, method: str, params: Optional[Union[Dict, List]] = None
     ) -> Dict:
         """
         Method to send request to websocket
         """
+        params = params or {}
         self.request_id += 1
         request = self._build_websocket_request_dict(method, params)
         response = self._send_request(request=request)

datahub/ingestion/source/redash.py CHANGED Viewed

@@ -421,8 +421,9 @@ class RedashSource(StatefulIngestionSourceBase):
         return database_name
     def _get_datasource_urns(
-        self, data_source: Dict, sql_query_data: Dict = {}
+        self, data_source: Dict, sql_query_data: Optional[Dict] = None
     ) -> Optional[List[str]]:
+        sql_query_data = sql_query_data or {}
         platform = self._get_platform_based_on_datasource(data_source)
         database_name = self._get_database_name_based_on_datasource(data_source)
         data_source_syntax = data_source.get("syntax")

datahub/ingestion/source/s3/config.py CHANGED Viewed

@@ -154,10 +154,8 @@ class DataLakeSourceConfig(
         return path_specs
     @pydantic.validator("platform", always=True)
-    def platform_valid(cls, platform: str, values: dict) -> str:
-        inferred_platform = values.get(
-            "platform", None
-        )  # we may have inferred it above
+    def platform_valid(cls, platform: Any, values: dict) -> str:
+        inferred_platform = values.get("platform")  # we may have inferred it above
         platform = platform or inferred_platform
         if not platform:
             raise ValueError("platform must not be empty")

datahub/ingestion/source/s3/source.py CHANGED Viewed

@@ -834,7 +834,7 @@ class S3Source(StatefulIngestionSourceBase):
                         min=min,
                     )
                     folders.extend(folders_list)
-                    if not path_spec.traversal_method == FolderTraversalMethod.ALL:
+                    if path_spec.traversal_method != FolderTraversalMethod.ALL:
                         return folders
             if folders:
                 return folders
@@ -847,7 +847,7 @@ class S3Source(StatefulIngestionSourceBase):
         path_spec: PathSpec,
         bucket: "Bucket",
         prefix: str,
-    ) -> List[Folder]:
+    ) -> Iterable[Folder]:
         """
         Retrieves all the folders in a path by listing all the files in the prefix.
         If the prefix is a full path then only that folder will be extracted.
@@ -877,51 +877,30 @@ class S3Source(StatefulIngestionSourceBase):
         s3_objects = (
             obj
             for obj in bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
-            if _is_allowed_path(path_spec, f"s3://{obj.bucket_name}/{obj.key}")
+            if _is_allowed_path(
+                path_spec, self.create_s3_path(obj.bucket_name, obj.key)
+            )
         )
-        partitions: List[Folder] = []
         grouped_s3_objects_by_dirname = groupby_unsorted(
             s3_objects,
             key=lambda obj: obj.key.rsplit("/", 1)[0],
         )
-        for key, group in grouped_s3_objects_by_dirname:
-            file_size = 0
-            creation_time = None
-            modification_time = None
-            for item in group:
-                file_size += item.size
-                if creation_time is None or item.last_modified < creation_time:
-                    creation_time = item.last_modified
-                if modification_time is None or item.last_modified > modification_time:
-                    modification_time = item.last_modified
-                    max_file = item
-            if modification_time is None:
-                logger.warning(
-                    f"Unable to find any files in the folder {key}. Skipping..."
-                )
-                continue
-            id = path_spec.get_partition_from_path(
-                self.create_s3_path(max_file.bucket_name, max_file.key)
+        for _, group in grouped_s3_objects_by_dirname:
+            max_file = max(group, key=lambda x: x.last_modified)
+            max_file_s3_path = self.create_s3_path(max_file.bucket_name, max_file.key)
+            # If partition_id is None, it means the folder is not a partition
+            partition_id = path_spec.get_partition_from_path(max_file_s3_path)
+            yield Folder(
+                partition_id=partition_id,
+                is_partition=bool(partition_id),
+                creation_time=min(obj.last_modified for obj in group),
+                modification_time=max_file.last_modified,
+                sample_file=max_file_s3_path,
+                size=sum(obj.size for obj in group),
             )
-            # If id is None, it means the folder is not a partition
-            partitions.append(
-                Folder(
-                    partition_id=id,
-                    is_partition=bool(id),
-                    creation_time=creation_time if creation_time else None,  # type: ignore[arg-type]
-                    modification_time=modification_time,
-                    sample_file=self.create_s3_path(max_file.bucket_name, max_file.key),
-                    size=file_size,
-                )
-            )
-        return partitions
     def s3_browser(self, path_spec: PathSpec, sample_size: int) -> Iterable[BrowsePath]:
         if self.source_config.aws_config is None:
             raise ValueError("aws_config not set. Cannot browse s3")
@@ -1000,7 +979,7 @@ class S3Source(StatefulIngestionSourceBase):
                                     min=True,
                                 )
                                 dirs_to_process.append(dirs_to_process_min[0])
-                        folders = []
+                        folders: List[Folder] = []
                         for dir in dirs_to_process:
                             logger.info(f"Getting files from folder: {dir}")
                             prefix_to_process = urlparse(dir).path.lstrip("/")

datahub/ingestion/source/salesforce.py CHANGED Viewed

@@ -615,7 +615,7 @@ class SalesforceSource(StatefulIngestionSourceBase):
             prefix = "\\" if text.startswith("#") else ""
             desc += f"\n\n{prefix}{text}"
-        text = field.get("InlineHelpText", None)
+        text = field.get("InlineHelpText")
         if text:
             prefix = "\\" if text.startswith("#") else ""
             desc += f"\n\n{prefix}{text}"

datahub/ingestion/source/schema_inference/object.py CHANGED Viewed

@@ -149,7 +149,7 @@ def construct_schema(
     extended_schema: Dict[Tuple[str, ...], SchemaDescription] = {}
-    for field_path in schema.keys():
+    for field_path in schema:
         field_types = schema[field_path]["types"]
         field_type: Union[str, type] = "mixed"

datahub/ingestion/source/snowflake/snowflake_connection.py CHANGED Viewed

@@ -125,7 +125,7 @@ class SnowflakeConnectionConfig(ConfigModel):
     @pydantic.validator("authentication_type", always=True)
     def authenticator_type_is_valid(cls, v, values):
-        if v not in _VALID_AUTH_TYPES.keys():
+        if v not in _VALID_AUTH_TYPES:
             raise ValueError(
                 f"unsupported authenticator type '{v}' was provided,"
                 f" use one of {list(_VALID_AUTH_TYPES.keys())}"

datahub/ingestion/source/snowflake/snowflake_v2.py CHANGED Viewed

@@ -439,7 +439,7 @@ class SnowflakeV2Source(
                     failure_reason=failure_message,
                 )
-            if c in _report.keys():
+            if c in _report:
                 continue
             # If some capabilities are missing, then mark them as not capable

datahub/ingestion/source/sql/athena.py CHANGED Viewed

@@ -396,7 +396,7 @@ class AthenaSource(SQLAlchemySource):
             metadata.table_type if metadata.table_type else ""
         )
-        location: Optional[str] = custom_properties.get("location", None)
+        location: Optional[str] = custom_properties.get("location")
         if location is not None:
             if location.startswith("s3://"):
                 location = make_s3_urn(location, self.config.env)
@@ -538,7 +538,7 @@ class AthenaSource(SQLAlchemySource):
             column_name=column["name"],
             column_type=column["type"],
             inspector=inspector,
-            description=column.get("comment", None),
+            description=column.get("comment"),
             nullable=column.get("nullable", True),
             is_part_of_key=(
                 True

datahub/ingestion/source/sql/sql_common.py CHANGED Viewed

@@ -204,7 +204,7 @@ def get_column_type(
     """
     TypeClass: Optional[Type] = None
-    for sql_type in _field_type_mapping.keys():
+    for sql_type in _field_type_mapping:
         if isinstance(column_type, sql_type):
             TypeClass = _field_type_mapping[sql_type]
             break
@@ -973,7 +973,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
                     inspector=inspector,
                 )
             ),
-            description=column.get("comment", None),
+            description=column.get("comment"),
             nullable=column["nullable"],
             recursive=False,
             globalTags=gtc,

datahub/ingestion/source/sql/sql_types.py CHANGED Viewed

@@ -317,10 +317,10 @@ def resolve_snowflake_modified_type(type_string: str) -> Any:
     match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string)
     if match:
         modified_type_base = match.group(1)  # Extract the base type
-        return SNOWFLAKE_TYPES_MAP.get(modified_type_base, None)
+        return SNOWFLAKE_TYPES_MAP.get(modified_type_base)
     # Fallback for types without precision/scale
-    return SNOWFLAKE_TYPES_MAP.get(type_string, None)
+    return SNOWFLAKE_TYPES_MAP.get(type_string)
 # see https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_types.py#L32

acryl-datahub 1.0.0rc7__py3-none-any.whl → 1.0.0rc8__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.0.0rc7py3-none-any.whl → 1.0.0rc8py3-none-any.whl