PyPI - acryl-datahub - Versions diffs - 1.0.0.1rc7__py3-none-any.whl → 1.0.0.2__py3-none-any.whl - Mend

acryl-datahub 1.0.0.1rc7py3-none-any.whl → 1.0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (76) hide show

{acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2.dist-info}/METADATA +2561 -2561
{acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2.dist-info}/RECORD +75 -73
datahub/_version.py +1 -1
datahub/api/entities/datajob/dataflow.py +15 -0
datahub/api/entities/datajob/datajob.py +17 -0
datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
datahub/api/entities/dataset/dataset.py +2 -2
datahub/api/entities/structuredproperties/structuredproperties.py +1 -1
datahub/cli/ingest_cli.py +4 -4
datahub/cli/migrate.py +6 -6
datahub/configuration/common.py +1 -1
datahub/emitter/mcp_builder.py +4 -0
datahub/ingestion/api/common.py +9 -0
datahub/ingestion/api/source.py +4 -1
datahub/ingestion/api/source_helpers.py +26 -1
datahub/ingestion/graph/client.py +104 -0
datahub/ingestion/run/pipeline.py +0 -6
datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
datahub/ingestion/source/fivetran/fivetran.py +1 -0
datahub/ingestion/source/fivetran/fivetran_log_api.py +1 -1
datahub/ingestion/source/hex/constants.py +5 -0
datahub/ingestion/source/hex/hex.py +150 -22
datahub/ingestion/source/hex/mapper.py +28 -2
datahub/ingestion/source/hex/model.py +10 -2
datahub/ingestion/source/hex/query_fetcher.py +300 -0
datahub/ingestion/source/iceberg/iceberg.py +106 -18
datahub/ingestion/source/kafka/kafka.py +1 -4
datahub/ingestion/source/kafka_connect/sink_connectors.py +1 -1
datahub/ingestion/source/kafka_connect/source_connectors.py +1 -1
datahub/ingestion/source/looker/looker_source.py +2 -3
datahub/ingestion/source/mlflow.py +6 -7
datahub/ingestion/source/mode.py +2 -2
datahub/ingestion/source/nifi.py +3 -3
datahub/ingestion/source/openapi.py +3 -3
datahub/ingestion/source/openapi_parser.py +8 -8
datahub/ingestion/source/powerbi/config.py +1 -1
datahub/ingestion/source/powerbi/powerbi.py +16 -3
datahub/ingestion/source/redshift/profile.py +2 -2
datahub/ingestion/source/sigma/sigma.py +6 -2
datahub/ingestion/source/snowflake/snowflake_utils.py +1 -1
datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
datahub/ingestion/source/sql/trino.py +4 -3
datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
datahub/ingestion/source/superset.py +108 -81
datahub/ingestion/source/tableau/tableau.py +4 -4
datahub/ingestion/source/tableau/tableau_common.py +2 -2
datahub/ingestion/source/unity/source.py +1 -1
datahub/ingestion/source/vertexai/vertexai.py +7 -7
datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
datahub/ingestion/transformer/add_dataset_ownership.py +1 -1
datahub/ingestion/transformer/dataset_domain.py +1 -1
datahub/lite/lite_util.py +2 -2
datahub/metadata/_schema_classes.py +47 -2
datahub/metadata/_urns/urn_defs.py +56 -0
datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
datahub/metadata/schema.avsc +121 -85
datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
datahub/metadata/schemas/FormInfo.avsc +5 -0
datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
datahub/metadata/schemas/QueryProperties.avsc +4 -2
datahub/metadata/schemas/SystemMetadata.avsc +86 -0
datahub/testing/mcp_diff.py +1 -1
datahub/utilities/file_backed_collections.py +6 -6
datahub/utilities/hive_schema_to_avro.py +2 -2
datahub/utilities/ingest_utils.py +2 -2
datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
{acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2.dist-info}/WHEEL +0 -0
{acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2.dist-info}/entry_points.txt +0 -0
{acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2.dist-info}/licenses/LICENSE +0 -0
{acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2.dist-info}/top_level.txt +0 -0

datahub/metadata/schemas/MetadataChangeLog.avsc CHANGED Viewed

@@ -187,6 +187,9 @@
         "null",
         {
           "type": "record",
+          "Aspect": {
+            "name": "systemMetadata"
+          },
           "name": "SystemMetadata",
           "namespace": "com.linkedin.pegasus2avro.mxe",
           "fields": [

datahub/metadata/schemas/MetadataChangeProposal.avsc CHANGED Viewed

@@ -187,6 +187,9 @@
         "null",
         {
           "type": "record",
+          "Aspect": {
+            "name": "systemMetadata"
+          },
           "name": "SystemMetadata",
           "namespace": "com.linkedin.pegasus2avro.mxe",
           "fields": [

datahub/metadata/schemas/QueryProperties.avsc CHANGED Viewed

@@ -36,12 +36,14 @@
             "type": {
               "type": "enum",
               "symbolDocs": {
-                "SQL": "A SQL Query"
+                "SQL": "A SQL Query",
+                "UNKNOWN": "Unknown query language"
               },
               "name": "QueryLanguage",
               "namespace": "com.linkedin.pegasus2avro.query",
               "symbols": [
-                "SQL"
+                "SQL",
+                "UNKNOWN"
               ]
             },
             "name": "language",

datahub/metadata/schemas/SystemMetadata.avsc ADDED Viewed

@@ -0,0 +1,86 @@
+{
+  "type": "record",
+  "Aspect": {
+    "name": "systemMetadata"
+  },
+  "name": "SystemMetadata",
+  "namespace": "com.linkedin.pegasus2avro.mxe",
+  "fields": [
+    {
+      "type": [
+        "long",
+        "null"
+      ],
+      "name": "lastObserved",
+      "default": 0,
+      "doc": "The timestamp the metadata was observed at"
+    },
+    {
+      "type": [
+        "string",
+        "null"
+      ],
+      "name": "runId",
+      "default": "no-run-id-provided",
+      "doc": "The original run id that produced the metadata. Populated in case of batch-ingestion."
+    },
+    {
+      "type": [
+        "string",
+        "null"
+      ],
+      "name": "lastRunId",
+      "default": "no-run-id-provided",
+      "doc": "The last run id that produced the metadata. Populated in case of batch-ingestion."
+    },
+    {
+      "type": [
+        "null",
+        "string"
+      ],
+      "name": "pipelineName",
+      "default": null,
+      "doc": "The ingestion pipeline id that produced the metadata. Populated in case of batch ingestion."
+    },
+    {
+      "type": [
+        "null",
+        "string"
+      ],
+      "name": "registryName",
+      "default": null,
+      "doc": "The model registry name that was used to process this event"
+    },
+    {
+      "type": [
+        "null",
+        "string"
+      ],
+      "name": "registryVersion",
+      "default": null,
+      "doc": "The model registry version that was used to process this event"
+    },
+    {
+      "type": [
+        "null",
+        {
+          "type": "map",
+          "values": "string"
+        }
+      ],
+      "name": "properties",
+      "default": null,
+      "doc": "Additional properties"
+    },
+    {
+      "type": [
+        "null",
+        "string"
+      ],
+      "name": "version",
+      "default": null,
+      "doc": "Aspect version\n   Initial implementation will use the aspect version's number, however stored as\n   a string in the case where a different aspect versioning scheme is later adopted."
+    }
+  ],
+  "doc": "Metadata associated with each metadata change that is processed by the system"
+}

datahub/testing/mcp_diff.py CHANGED Viewed

@@ -189,7 +189,7 @@ class MCPDiff:
         """
         aspect_diffs = [v for d in self.aspect_changes.values() for v in d.values()]
         for aspect_diff in aspect_diffs:
-            for _, old, new in aspect_diff.aspects_changed.keys():
+            for _, old, new in aspect_diff.aspects_changed:
                 golden[old.delta_info.idx] = new.delta_info.original
         indices_to_remove = set()

datahub/utilities/file_backed_collections.py CHANGED Viewed

@@ -250,7 +250,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                 rowid INTEGER PRIMARY KEY AUTOINCREMENT,
                 key TEXT UNIQUE,
                 value BLOB
-                {"".join(f", {column_name} BLOB" for column_name in self.extra_columns.keys())}
+                {"".join(f", {column_name} BLOB" for column_name in self.extra_columns)}
             )"""
         )
@@ -267,7 +267,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
         if self.indexes_created:
             return
         # The key column will automatically be indexed, but we need indexes for the extra columns.
-        for column_name in self.extra_columns.keys():
+        for column_name in self.extra_columns:
             self._conn.execute(
                 f"CREATE INDEX {self.tablename}_{column_name} ON {self.tablename} ({column_name})"
             )
@@ -305,12 +305,12 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                 f"""INSERT INTO {self.tablename} (
                     key,
                     value
-                    {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
+                    {"".join(f", {column_name}" for column_name in self.extra_columns)}
                 )
                 VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})
                 ON CONFLICT (key) DO UPDATE SET
                     value = excluded.value
-                    {"".join(f", {column_name} = excluded.{column_name}" for column_name in self.extra_columns.keys())}
+                    {"".join(f", {column_name} = excluded.{column_name}" for column_name in self.extra_columns)}
                 """,
                 items_to_write,
             )
@@ -321,7 +321,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                         f"""INSERT INTO {self.tablename} (
                             key,
                             value
-                            {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
+                            {"".join(f", {column_name}" for column_name in self.extra_columns)}
                         )
                         VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})""",
                         item,
@@ -330,7 +330,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                     self._conn.execute(
                         f"""UPDATE {self.tablename} SET
                             value = ?
-                            {"".join(f", {column_name} = ?" for column_name in self.extra_columns.keys())}
+                            {"".join(f", {column_name} = ?" for column_name in self.extra_columns)}
                         WHERE key = ?""",
                         (*item[1:], item[0]),
                     )

datahub/utilities/hive_schema_to_avro.py CHANGED Viewed

@@ -155,7 +155,7 @@ class HiveColumnToAvroConverter:
     @staticmethod
     def _parse_basic_datatype_string(s: str) -> Dict[str, object]:
-        if s in HiveColumnToAvroConverter._PRIVIMITE_HIVE_TYPE_TO_AVRO_TYPE.keys():
+        if s in HiveColumnToAvroConverter._PRIVIMITE_HIVE_TYPE_TO_AVRO_TYPE:
             return {
                 "type": HiveColumnToAvroConverter._PRIVIMITE_HIVE_TYPE_TO_AVRO_TYPE[s],
                 "native_data_type": s,
@@ -218,7 +218,7 @@ class HiveColumnToAvroConverter:
         buf = ""
         level = 0
         for c in s:
-            if c in HiveColumnToAvroConverter._BRACKETS.keys():
+            if c in HiveColumnToAvroConverter._BRACKETS:
                 level += 1
                 buf += c
             elif c in HiveColumnToAvroConverter._BRACKETS.values():

datahub/utilities/ingest_utils.py CHANGED Viewed

@@ -32,10 +32,10 @@ def deploy_source_vars(
     name: Optional[str],
     config: str,
     urn: Optional[str],
-    executor_id: str,
+    executor_id: Optional[str],
     cli_version: Optional[str],
     schedule: Optional[str],
-    time_zone: str,
+    time_zone: Optional[str],
     extra_pip: Optional[str],
     debug: bool = False,
 ) -> dict:

datahub/ingestion/transformer/system_metadata_transformer.py DELETED Viewed

@@ -1,45 +0,0 @@
-import functools
-from typing import Iterable
-from datahub.emitter.mce_builder import get_sys_time
-from datahub.ingestion.api.common import PipelineContext, RecordEnvelope
-from datahub.ingestion.api.transform import Transformer
-from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.transformer.auto_helper_transformer import AutoHelperTransformer
-from datahub.metadata.schema_classes import SystemMetadataClass
-def auto_system_metadata(
-    ctx: PipelineContext,
-    stream: Iterable[MetadataWorkUnit],
-) -> Iterable[MetadataWorkUnit]:
-    if not ctx.pipeline_config:
-        raise ValueError("Pipeline config is required for system metadata")
-    set_system_metadata = ctx.pipeline_config.flags.set_system_metadata
-    set_pipeline_name = ctx.pipeline_config.flags.set_system_metadata_pipeline_name
-    for workunit in stream:
-        if set_system_metadata:
-            workunit.metadata.systemMetadata = SystemMetadataClass(
-                lastObserved=get_sys_time(), runId=ctx.run_id
-            )
-            if set_pipeline_name:
-                workunit.metadata.systemMetadata.pipelineName = ctx.pipeline_name
-        yield workunit
-class SystemMetadataTransformer(Transformer):
-    def __init__(self, ctx: PipelineContext):
-        self._inner_transformer = AutoHelperTransformer(
-            functools.partial(auto_system_metadata, ctx)
-        )
-    def transform(
-        self, record_envelopes: Iterable[RecordEnvelope]
-    ) -> Iterable[RecordEnvelope]:
-        yield from self._inner_transformer.transform(record_envelopes)
-    @classmethod
-    def create(cls, config_dict: dict, ctx: PipelineContext) -> Transformer:
-        raise NotImplementedError(f"{cls.__name__} cannot be created from config")

{acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

acryl-datahub 1.0.0.1rc7__py3-none-any.whl → 1.0.0.2__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.0.0.1rc7py3-none-any.whl → 1.0.0.2py3-none-any.whl