PyPI - acryl-datahub - Versions diffs - 0.15.0rc5__py3-none-any.whl → 0.15.0rc7__py3-none-any.whl - Mend

acryl-datahub 0.15.0rc5py3-none-any.whl → 0.15.0rc7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (43) hide show

{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/METADATA +2456 -2426
{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/RECORD +43 -41
{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/entry_points.txt +1 -0
datahub/__init__.py +1 -1
datahub/api/entities/structuredproperties/structuredproperties.py +1 -1
datahub/cli/put_cli.py +1 -1
datahub/cli/specific/dataproduct_cli.py +1 -1
datahub/emitter/mcp_patch_builder.py +43 -0
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +9 -4
datahub/ingestion/source/aws/sagemaker_processors/models.py +30 -1
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
datahub/ingestion/source/common/subtypes.py +2 -0
datahub/ingestion/source/csv_enricher.py +1 -1
datahub/ingestion/source/dbt/dbt_common.py +7 -61
datahub/ingestion/source/dremio/dremio_api.py +11 -0
datahub/ingestion/source/dremio/dremio_aspects.py +19 -15
datahub/ingestion/source/dremio/dremio_config.py +5 -0
datahub/ingestion/source/dremio/dremio_entities.py +4 -0
datahub/ingestion/source/dremio/dremio_source.py +7 -2
datahub/ingestion/source/elastic_search.py +1 -1
datahub/ingestion/source/gc/dataprocess_cleanup.py +6 -1
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +1 -1
datahub/ingestion/source/ge_data_profiler.py +23 -1
datahub/ingestion/source/neo4j/__init__.py +0 -0
datahub/ingestion/source/neo4j/neo4j_source.py +331 -0
datahub/ingestion/source/qlik_sense/data_classes.py +1 -0
datahub/ingestion/source/redshift/redshift.py +1 -0
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +1 -0
datahub/ingestion/source/sql/athena.py +46 -22
datahub/ingestion/source/sql/sql_types.py +85 -8
datahub/ingestion/source/unity/proxy_types.py +1 -0
datahub/ingestion/transformer/add_dataset_tags.py +1 -1
datahub/ingestion/transformer/generic_aspect_transformer.py +1 -1
datahub/integrations/assertion/common.py +1 -1
datahub/lite/duckdb_lite.py +12 -17
datahub/specific/chart.py +0 -39
datahub/specific/dashboard.py +0 -39
datahub/specific/datajob.py +3 -47
datahub/utilities/urn_encoder.py +2 -1
datahub/utilities/urns/_urn_base.py +1 -1
datahub/utilities/urns/structured_properties_urn.py +1 -1
{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/dremio/dremio_api.py CHANGED Viewed

@@ -774,3 +774,14 @@ class DremioAPIOperations:
                 containers.extend(future.result())
         return containers
+    def get_context_for_vds(self, resource_id: str) -> str:
+        context_array = self.get(
+            url=f"/catalog/{resource_id}",
+        ).get("sqlContext")
+        if context_array:
+            return ".".join(
+                f'"{part}"' if "." in part else f"{part}" for part in context_array
+            )
+        else:
+            return ""

datahub/ingestion/source/dremio/dremio_aspects.py CHANGED Viewed

@@ -142,6 +142,7 @@ class DremioAspects:
         platform: str,
         ui_url: str,
         env: str,
+        ingest_owner: bool,
         domain: Optional[str] = None,
         platform_instance: Optional[str] = None,
     ):
@@ -150,6 +151,7 @@ class DremioAspects:
         self.env = env
         self.domain = domain
         self.ui_url = ui_url
+        self.ingest_owner = ingest_owner
     def get_container_key(
         self, name: Optional[str], path: Optional[List[str]]
@@ -426,21 +428,23 @@ class DremioAspects:
         return f'{self.ui_url}/{container_type}/{dataset_url_path}"{dataset.resource_name}"'
     def _create_ownership(self, dataset: DremioDataset) -> Optional[OwnershipClass]:
-        if not dataset.owner:
-            return None
-        owner = (
-            make_user_urn(dataset.owner)
-            if dataset.owner_type == "USER"
-            else make_group_urn(dataset.owner)
-        )
-        return OwnershipClass(
-            owners=[
-                OwnerClass(
-                    owner=owner,
-                    type=OwnershipTypeClass.TECHNICAL_OWNER,
-                )
-            ]
-        )
+        if self.ingest_owner and dataset.owner:
+            owner_urn = (
+                make_user_urn(dataset.owner)
+                if dataset.owner_type == "USER"
+                else make_group_urn(dataset.owner)
+            )
+            ownership: OwnershipClass = OwnershipClass(
+                owners=[
+                    OwnerClass(
+                        owner=owner_urn,
+                        type=OwnershipTypeClass.TECHNICAL_OWNER,
+                    )
+                ]
+            )
+            return ownership
+        return None
     def _create_glossary_terms(self, entity: DremioDataset) -> GlossaryTermsClass:
         return GlossaryTermsClass(

datahub/ingestion/source/dremio/dremio_config.py CHANGED Viewed

@@ -174,3 +174,8 @@ class DremioSourceConfig(
         default=False,
         description="Whether to include query-based lineage information.",
     )
+    ingest_owner: bool = Field(
+        default=True,
+        description="Ingest Owner from source. This will override Owner info entered from UI",
+    )

datahub/ingestion/source/dremio/dremio_entities.py CHANGED Viewed

@@ -200,6 +200,7 @@ class DremioDataset:
     columns: List[DremioDatasetColumn]
     sql_definition: Optional[str]
     dataset_type: DremioDatasetType
+    default_schema: Optional[str]
     owner: Optional[str]
     owner_type: Optional[str]
     created: str
@@ -235,6 +236,9 @@ class DremioDataset:
         if self.sql_definition:
             self.dataset_type = DremioDatasetType.VIEW
+            self.default_schema = api_operations.get_context_for_vds(
+                resource_id=self.resource_id
+            )
         else:
             self.dataset_type = DremioDatasetType.TABLE

datahub/ingestion/source/dremio/dremio_source.py CHANGED Viewed

@@ -97,6 +97,7 @@ class DremioSource(StatefulIngestionSourceBase):
     - Ownership and Glossary Terms:
         - Metadata related to ownership of datasets, extracted from Dremio’s ownership model.
         - Glossary terms and business metadata associated with datasets, providing additional context to the data.
+        - Note: Ownership information will only be available for the Cloud and Enterprise editions, it will not be available for the Community edition.
     - Optional SQL Profiling (if enabled):
         - Table, row, and column statistics can be profiled and ingested via optional SQL queries.
@@ -123,6 +124,7 @@ class DremioSource(StatefulIngestionSourceBase):
         self.dremio_aspects = DremioAspects(
             platform=self.get_platform(),
             domain=self.config.domain,
+            ingest_owner=self.config.ingest_owner,
             platform_instance=self.config.platform_instance,
             env=self.config.env,
             ui_url=dremio_api.ui_url,
@@ -394,10 +396,12 @@ class DremioSource(StatefulIngestionSourceBase):
         ):
             yield dremio_mcp
             # Check if the emitted aspect is SchemaMetadataClass
-            if isinstance(dremio_mcp.metadata, SchemaMetadataClass):
+            if isinstance(
+                dremio_mcp.metadata, MetadataChangeProposalWrapper
+            ) and isinstance(dremio_mcp.metadata.aspect, SchemaMetadataClass):
                 self.sql_parsing_aggregator.register_schema(
                     urn=dataset_urn,
-                    schema=dremio_mcp.metadata,
+                    schema=dremio_mcp.metadata.aspect,
                 )
         if dataset_info.dataset_type == DremioDatasetType.VIEW:
@@ -415,6 +419,7 @@ class DremioSource(StatefulIngestionSourceBase):
                     view_urn=dataset_urn,
                     view_definition=dataset_info.sql_definition,
                     default_db=self.default_db,
+                    default_schema=dataset_info.default_schema,
                 )
         elif dataset_info.dataset_type == DremioDatasetType.TABLE:

datahub/ingestion/source/elastic_search.py CHANGED Viewed

@@ -227,7 +227,7 @@ def collapse_name(name: str, collapse_urns: CollapseUrns) -> str:
 def collapse_urn(urn: str, collapse_urns: CollapseUrns) -> str:
     if len(collapse_urns.urns_suffix_regex) == 0:
         return urn
-    urn_obj = DatasetUrn.create_from_string(urn)
+    urn_obj = DatasetUrn.from_string(urn)
     name = collapse_name(name=urn_obj.get_dataset_name(), collapse_urns=collapse_urns)
     data_platform_urn = urn_obj.get_data_platform_urn()
     return str(

datahub/ingestion/source/gc/dataprocess_cleanup.py CHANGED Viewed

@@ -277,7 +277,12 @@ class DataProcessCleanup:
         assert self.ctx.graph
         dpis = self.fetch_dpis(job.urn, self.config.batch_size)
-        dpis.sort(key=lambda x: x["created"]["time"], reverse=True)
+        dpis.sort(
+            key=lambda x: x["created"]["time"]
+            if x["created"] and x["created"]["time"]
+            else 0,
+            reverse=True,
+        )
         with ThreadPoolExecutor(max_workers=self.config.max_workers) as executor:
             if self.config.keep_last_n:

datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py CHANGED Viewed

@@ -104,7 +104,7 @@ class SoftDeletedEntitiesCleanup:
     def delete_entity(self, urn: str) -> None:
         assert self.ctx.graph
-        entity_urn = Urn.create_from_string(urn)
+        entity_urn = Urn.from_string(urn)
         self.report.num_soft_deleted_entity_removed += 1
         self.report.num_soft_deleted_entity_removed_by_type[entity_urn.entity_type] = (
             self.report.num_soft_deleted_entity_removed_by_type.get(

datahub/ingestion/source/ge_data_profiler.py CHANGED Viewed

@@ -57,7 +57,11 @@ from datahub.ingestion.source.profiling.common import (
     convert_to_cardinality,
 )
 from datahub.ingestion.source.sql.sql_report import SQLSourceReport
-from datahub.metadata.com.linkedin.pegasus2avro.schema import EditableSchemaMetadata
+from datahub.ingestion.source.sql.sql_types import resolve_sql_type
+from datahub.metadata.com.linkedin.pegasus2avro.schema import (
+    EditableSchemaMetadata,
+    NumberType,
+)
 from datahub.metadata.schema_classes import (
     DatasetFieldProfileClass,
     DatasetProfileClass,
@@ -361,6 +365,8 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
     platform: str
     env: str
+    column_types: Dict[str, str] = dataclasses.field(default_factory=dict)
     def _get_columns_to_profile(self) -> List[str]:
         if not self.config.any_field_level_metrics_enabled():
             return []
@@ -374,6 +380,7 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
         for col_dict in self.dataset.columns:
             col = col_dict["name"]
+            self.column_types[col] = str(col_dict["type"])
             # We expect the allow/deny patterns to specify '<table_pattern>.<column_pattern>'
             if not self.config._allow_deny_patterns.allowed(
                 f"{self.dataset_name}.{col}"
@@ -430,6 +437,21 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
             self.dataset, column
         )
+        if column_spec.type_ == ProfilerDataType.UNKNOWN:
+            try:
+                datahub_field_type = resolve_sql_type(
+                    self.column_types[column], self.dataset.engine.dialect.name.lower()
+                )
+            except Exception as e:
+                logger.debug(
+                    f"Error resolving sql type {self.column_types[column]}: {e}"
+                )
+                datahub_field_type = None
+            if datahub_field_type is None:
+                return
+            if isinstance(datahub_field_type, NumberType):
+                column_spec.type_ = ProfilerDataType.NUMERIC
     @_run_with_query_combiner
     def _get_column_cardinality(
         self, column_spec: _SingleColumnSpec, column: str

datahub/ingestion/source/neo4j/__init__.py ADDED Viewed

File without changes

datahub/ingestion/source/neo4j/neo4j_source.py ADDED Viewed

@@ -0,0 +1,331 @@
+import logging
+import time
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, List, Optional, Type, Union
+import pandas as pd
+from neo4j import GraphDatabase
+from pydantic.fields import Field
+from datahub.configuration.source_common import EnvConfigMixin
+from datahub.emitter.mce_builder import make_data_platform_urn, make_dataset_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.decorators import (
+    SupportStatus,
+    config_class,
+    platform_name,
+    support_status,
+)
+from datahub.ingestion.api.source import Source, SourceReport
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.common.subtypes import DatasetSubTypes
+from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaFieldDataType
+from datahub.metadata.schema_classes import (
+    AuditStampClass,
+    BooleanTypeClass,
+    DatasetPropertiesClass,
+    DateTypeClass,
+    NullTypeClass,
+    NumberTypeClass,
+    OtherSchemaClass,
+    SchemaFieldClass,
+    SchemaMetadataClass,
+    StringTypeClass,
+    SubTypesClass,
+    UnionTypeClass,
+)
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+_type_mapping: Dict[Union[Type, str], Type] = {
+    "list": UnionTypeClass,
+    "boolean": BooleanTypeClass,
+    "integer": NumberTypeClass,
+    "local_date_time": DateTypeClass,
+    "float": NumberTypeClass,
+    "string": StringTypeClass,
+    "date": DateTypeClass,
+    "node": StringTypeClass,
+    "relationship": StringTypeClass,
+}
+class Neo4jConfig(EnvConfigMixin):
+    username: str = Field(description="Neo4j Username")
+    password: str = Field(description="Neo4j Password")
+    uri: str = Field(description="The URI for the Neo4j server")
+    env: str = Field(description="Neo4j env")
+@dataclass
+class Neo4jSourceReport(SourceReport):
+    obj_failures: int = 0
+    obj_created: int = 0
+@platform_name("Neo4j", id="neo4j")
+@config_class(Neo4jConfig)
+@support_status(SupportStatus.CERTIFIED)
+class Neo4jSource(Source):
+    NODE = "node"
+    RELATIONSHIP = "relationship"
+    PLATFORM = "neo4j"
+    def __init__(self, ctx: PipelineContext, config: Neo4jConfig):
+        self.ctx = ctx
+        self.config = config
+        self.report = Neo4jSourceReport()
+    @classmethod
+    def create(cls, config_dict, ctx):
+        config = Neo4jConfig.parse_obj(config_dict)
+        return cls(ctx, config)
+    def get_field_type(self, attribute_type: Union[type, str]) -> SchemaFieldDataType:
+        type_class: type = _type_mapping.get(attribute_type, NullTypeClass)
+        return SchemaFieldDataType(type=type_class())
+    def get_schema_field_class(
+        self, col_name: str, col_type: str, **kwargs: Any
+    ) -> SchemaFieldClass:
+        if kwargs["obj_type"] == self.NODE and col_type == self.RELATIONSHIP:
+            col_type = self.NODE
+        else:
+            col_type = col_type
+        return SchemaFieldClass(
+            fieldPath=col_name,
+            type=self.get_field_type(col_type),
+            nativeDataType=col_type,
+            description=col_type.upper()
+            if col_type in (self.NODE, self.RELATIONSHIP)
+            else col_type,
+            lastModified=AuditStampClass(
+                time=round(time.time() * 1000), actor="urn:li:corpuser:ingestion"
+            ),
+        )
+    def add_properties(
+        self,
+        dataset: str,
+        description: Optional[str] = None,
+        custom_properties: Optional[Dict[str, str]] = None,
+    ) -> MetadataChangeProposalWrapper:
+        dataset_properties = DatasetPropertiesClass(
+            description=description,
+            customProperties=custom_properties,
+        )
+        return MetadataChangeProposalWrapper(
+            entityUrn=make_dataset_urn(
+                platform=self.PLATFORM, name=dataset, env=self.config.env
+            ),
+            aspect=dataset_properties,
+        )
+    def generate_neo4j_object(
+        self, dataset: str, columns: list, obj_type: Optional[str] = None
+    ) -> MetadataChangeProposalWrapper:
+        try:
+            fields = [
+                self.get_schema_field_class(key, value.lower(), obj_type=obj_type)
+                for d in columns
+                for key, value in d.items()
+            ]
+            mcp = MetadataChangeProposalWrapper(
+                entityUrn=make_dataset_urn(
+                    platform=self.PLATFORM, name=dataset, env=self.config.env
+                ),
+                aspect=SchemaMetadataClass(
+                    schemaName=dataset,
+                    platform=make_data_platform_urn(self.PLATFORM),
+                    version=0,
+                    hash="",
+                    platformSchema=OtherSchemaClass(rawSchema=""),
+                    lastModified=AuditStampClass(
+                        time=round(time.time() * 1000),
+                        actor="urn:li:corpuser:ingestion",
+                    ),
+                    fields=fields,
+                ),
+            )
+            self.report.obj_created += 1
+        except Exception as e:
+            log.error(e)
+            self.report.obj_failures += 1
+        return mcp
+    def get_neo4j_metadata(self, query: str) -> pd.DataFrame:
+        driver = GraphDatabase.driver(
+            self.config.uri, auth=(self.config.username, self.config.password)
+        )
+        """
+        This process retrieves the metadata for Neo4j objects using an APOC query, which returns a dictionary
+        with two columns: key and value. The key represents the Neo4j object, while the value contains the
+        corresponding metadata.
+        When data is returned from Neo4j, much of the relationship metadata is stored with the relevant node's
+        metadata. Consequently, the objects are organized into two separate dataframes: one for nodes and one for
+        relationships.
+        In the node dataframe, several fields are extracted and added as new columns. Similarly, in the relationship
+        dataframe, certain fields are parsed out, while others require metadata from the nodes dataframe.
+        Once the data is parsed and these two dataframes are created, we combine a subset of their columns into a
+        single dataframe, which will be used to create the DataHub objects.
+        See the docs for examples of metadata:  metadata-ingestion/docs/sources/neo4j/neo4j.md
+        """
+        try:
+            log.info(f"{query}")
+            with driver.session() as session:
+                result = session.run(query)
+                data = [record for record in result]
+            log.info("Closing Neo4j driver")
+            driver.close()
+            node_df = self.process_nodes(data)
+            rel_df = self.process_relationships(data, node_df)
+            union_cols = ["key", "obj_type", "property_data_types", "description"]
+            df = pd.concat([node_df[union_cols], rel_df[union_cols]])
+        except Exception as e:
+            self.report.failure(
+                message="Failed to get neo4j metadata",
+                exc=e,
+            )
+        return df
+    def process_nodes(self, data: list) -> pd.DataFrame:
+        nodes = [record for record in data if record["value"]["type"] == self.NODE]
+        node_df = pd.DataFrame(
+            nodes,
+            columns=["key", "value"],
+        )
+        node_df["obj_type"] = node_df["value"].apply(
+            lambda record: self.get_obj_type(record)
+        )
+        node_df["relationships"] = node_df["value"].apply(
+            lambda record: self.get_relationships(record)
+        )
+        node_df["properties"] = node_df["value"].apply(
+            lambda record: self.get_properties(record)
+        )
+        node_df["property_data_types"] = node_df["properties"].apply(
+            lambda record: self.get_property_data_types(record)
+        )
+        node_df["description"] = node_df.apply(
+            lambda record: self.get_node_description(record, node_df), axis=1
+        )
+        return node_df
+    def process_relationships(self, data: list, node_df: pd.DataFrame) -> pd.DataFrame:
+        rels = [
+            record for record in data if record["value"]["type"] == self.RELATIONSHIP
+        ]
+        rel_df = pd.DataFrame(rels, columns=["key", "value"])
+        rel_df["obj_type"] = rel_df["value"].apply(
+            lambda record: self.get_obj_type(record)
+        )
+        rel_df["properties"] = rel_df["value"].apply(
+            lambda record: self.get_properties(record)
+        )
+        rel_df["property_data_types"] = rel_df["properties"].apply(
+            lambda record: self.get_property_data_types(record)
+        )
+        rel_df["description"] = rel_df.apply(
+            lambda record: self.get_rel_descriptions(record, node_df), axis=1
+        )
+        return rel_df
+    def get_obj_type(self, record: dict) -> str:
+        return record["type"]
+    def get_rel_descriptions(self, record: dict, df: pd.DataFrame) -> str:
+        descriptions = []
+        for _, row in df.iterrows():
+            relationships = row.get("relationships", {})
+            for relationship, props in relationships.items():
+                if record["key"] == relationship:
+                    if props["direction"] == "in":
+                        for prop in props["labels"]:
+                            descriptions.append(
+                                f"({row['key']})-[{record['key']}]->({prop})"
+                            )
+        return "\n".join(descriptions)
+    def get_node_description(self, record: dict, df: pd.DataFrame) -> str:
+        descriptions = []
+        for _, row in df.iterrows():
+            if record["key"] == row["key"]:
+                for relationship, props in row["relationships"].items():
+                    direction = props["direction"]
+                    for node in set(props["labels"]):
+                        if direction == "in":
+                            descriptions.append(
+                                f"({row['key']})<-[{relationship}]-({node})"
+                            )
+                        elif direction == "out":
+                            descriptions.append(
+                                f"({row['key']})-[{relationship}]->({node})"
+                            )
+        return "\n".join(descriptions)
+    def get_property_data_types(self, record: dict) -> List[dict]:
+        return [{k: v["type"]} for k, v in record.items()]
+    def get_properties(self, record: dict) -> str:
+        return record["properties"]
+    def get_relationships(self, record: dict) -> dict:
+        return record.get("relationships", None)
+    def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
+        df = self.get_neo4j_metadata(
+            "CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;"
+        )
+        for index, row in df.iterrows():
+            try:
+                yield MetadataWorkUnit(
+                    id=row["key"],
+                    mcp=self.generate_neo4j_object(
+                        columns=row["property_data_types"],
+                        dataset=row["key"],
+                    ),
+                    is_primary_source=True,
+                )
+                yield MetadataWorkUnit(
+                    id=row["key"],
+                    mcp=MetadataChangeProposalWrapper(
+                        entityUrn=make_dataset_urn(
+                            platform=self.PLATFORM,
+                            name=row["key"],
+                            env=self.config.env,
+                        ),
+                        aspect=SubTypesClass(
+                            typeNames=[
+                                DatasetSubTypes.NEO4J_NODE
+                                if row["obj_type"] == self.NODE
+                                else DatasetSubTypes.NEO4J_RELATIONSHIP
+                            ]
+                        ),
+                    ),
+                )
+                yield MetadataWorkUnit(
+                    id=row["key"],
+                    mcp=self.add_properties(
+                        dataset=row["key"],
+                        custom_properties=None,
+                        description=row["description"],
+                    ),
+                )
+            except Exception as e:
+                raise e
+    def get_report(self):
+        return self.report

datahub/ingestion/source/qlik_sense/data_classes.py CHANGED Viewed

@@ -15,6 +15,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     TimeType,
 )
+# TODO: Replace with standardized types in sql_types.py
 FIELD_TYPE_MAPPING: Dict[
     str,
     Type[

datahub/ingestion/source/redshift/redshift.py CHANGED Viewed

@@ -222,6 +222,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
     ```
     """
+    # TODO: Replace with standardized types in sql_types.py
     REDSHIFT_FIELD_TYPE_MAPPINGS: Dict[
         str,
         Type[

datahub/ingestion/source/snowflake/snowflake_schema_gen.py CHANGED Viewed

@@ -103,6 +103,7 @@ from datahub.utilities.threaded_iterator_executor import ThreadedIteratorExecuto
 logger = logging.getLogger(__name__)
 # https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html
+# TODO: Move to the standardized types in sql_types.py
 SNOWFLAKE_FIELD_TYPE_MAPPINGS = {
     "DATE": DateType,
     "BIGINT": NumberType,

acryl-datahub 0.15.0rc5__py3-none-any.whl → 0.15.0rc7__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0rc5py3-none-any.whl → 0.15.0rc7py3-none-any.whl