PyPI - acryl-datahub - Versions diffs - 1.0.0rc13__py3-none-any.whl → 1.0.0rc14__py3-none-any.whl - Mend

acryl-datahub 1.0.0rc13py3-none-any.whl → 1.0.0rc14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (43) hide show

{acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc14.dist-info}/METADATA +2524 -2524
{acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc14.dist-info}/RECORD +43 -43
datahub/_version.py +1 -1
datahub/configuration/common.py +1 -1
datahub/emitter/rest_emitter.py +165 -10
datahub/ingestion/glossary/classification_mixin.py +1 -5
datahub/ingestion/graph/client.py +6 -3
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
datahub/ingestion/run/pipeline.py +2 -4
datahub/ingestion/sink/datahub_rest.py +4 -0
datahub/ingestion/source/common/subtypes.py +5 -0
datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
datahub/ingestion/source/dbt/dbt_common.py +2 -4
datahub/ingestion/source/dbt/dbt_tests.py +4 -8
datahub/ingestion/source/dremio/dremio_api.py +1 -5
datahub/ingestion/source/dremio/dremio_aspects.py +1 -4
datahub/ingestion/source/dynamodb/dynamodb.py +1 -0
datahub/ingestion/source/kafka_connect/common.py +1 -6
datahub/ingestion/source/mlflow.py +338 -31
datahub/ingestion/source/redshift/lineage.py +2 -2
datahub/ingestion/source/redshift/lineage_v2.py +19 -7
datahub/ingestion/source/redshift/profile.py +1 -1
datahub/ingestion/source/redshift/query.py +14 -6
datahub/ingestion/source/redshift/redshift.py +9 -5
datahub/ingestion/source/redshift/redshift_schema.py +27 -7
datahub/ingestion/source/sql/athena.py +6 -12
datahub/ingestion/source/sql/hive.py +2 -6
datahub/ingestion/source/sql/hive_metastore.py +2 -1
datahub/ingestion/source/sql/sql_common.py +3 -9
datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
datahub/ingestion/source/superset.py +1 -3
datahub/ingestion/source/tableau/tableau_common.py +1 -1
datahub/lite/duckdb_lite.py +1 -3
datahub/metadata/_schema_classes.py +31 -1
datahub/metadata/schema.avsc +56 -4
datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
datahub/sdk/dataset.py +2 -2
datahub/sql_parsing/sqlglot_utils.py +1 -4
{acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc14.dist-info}/LICENSE +0 -0
{acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc14.dist-info}/WHEEL +0 -0
{acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc14.dist-info}/entry_points.txt +0 -0
{acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc14.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/redshift/profile.py CHANGED Viewed

@@ -48,7 +48,7 @@ class RedshiftProfiler(GenericProfiler):
                 if not self.config.schema_pattern.allowed(schema):
                     continue
                 for table in tables[db].get(schema, {}):
-                    if table.is_external_table:
+                    if table.is_external_table() or self.report.is_shared_database:
                         if not self.config.profiling.profile_external_tables:
                             # Case 1: If user did not tell us to profile external tables, simply log this.
                             self.report.profiling_skipped_other[schema] += 1

datahub/ingestion/source/redshift/query.py CHANGED Viewed

@@ -83,7 +83,9 @@ class RedshiftCommonQuery:
     # NOTE: Tables from shared database are not available in pg_catalog.pg_class
     @staticmethod
     def list_tables(
-        skip_external_tables: bool = False, is_shared_database: bool = False
+        database: str,
+        skip_external_tables: bool = False,
+        is_shared_database: bool = False,
     ) -> str:
         # NOTE: it looks like description is available only in pg_description
         # So this remains preferrred way
@@ -123,7 +125,7 @@ class RedshiftCommonQuery:
         AND   n.nspname != 'information_schema'
 """
-        external_tables_query = """
+        external_tables_query = f"""
         SELECT 'EXTERNAL_TABLE' as tabletype,
             NULL AS "schema_oid",
             schemaname AS "schema",
@@ -142,10 +144,11 @@ class RedshiftCommonQuery:
             serde_parameters,
             NULL as table_description
         FROM pg_catalog.svv_external_tables
+        WHERE redshift_database_name='{database}'
         ORDER BY "schema",
                 "relname"
 """
-        shared_database_tables_query = """
+        shared_database_tables_query = f"""
         SELECT table_type as tabletype,
             NULL AS "schema_oid",
             schema_name AS "schema",
@@ -164,6 +167,7 @@ class RedshiftCommonQuery:
             NULL as serde_parameters,
             NULL as table_description
         FROM svv_redshift_tables
+        WHERE database_name='{database}'
         ORDER BY "schema",
                 "relname"
 """
@@ -175,9 +179,11 @@ class RedshiftCommonQuery:
             return f"{tables_query} UNION {external_tables_query}"
     @staticmethod
-    def list_columns(is_shared_database: bool = False) -> str:
+    def list_columns(
+        database_name: str, schema_name: str, is_shared_database: bool = False
+    ) -> str:
         if is_shared_database:
-            return """
+            return f"""
             SELECT
               schema_name as "schema",
               table_name as "table_name",
@@ -198,9 +204,10 @@ class RedshiftCommonQuery:
               null as "table_oid"
             FROM SVV_REDSHIFT_COLUMNS
             WHERE 1 and schema = '{schema_name}'
+            AND database_name = '{database_name}'
             ORDER BY "schema", "table_name", "attnum"
 """
-        return """
+        return f"""
             SELECT
               n.nspname as "schema",
               c.relname as "table_name",
@@ -275,6 +282,7 @@ class RedshiftCommonQuery:
               null as "table_oid"
             FROM SVV_EXTERNAL_COLUMNS
             WHERE 1 and schema = '{schema_name}'
+            AND redshift_database_name = '{database_name}'
             ORDER BY "schema", "table_name", "attnum"
 """

datahub/ingestion/source/redshift/redshift.py CHANGED Viewed

@@ -366,7 +366,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
         self.db = self.data_dictionary.get_database_details(connection, database)
         self.report.is_shared_database = (
-            self.db is not None and self.db.is_shared_database
+            self.db is not None and self.db.is_shared_database()
         )
         with self.report.new_stage(METADATA_EXTRACTION):
             self.db_tables[database] = defaultdict()
@@ -508,6 +508,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
             schema_columns: Dict[str, Dict[str, List[RedshiftColumn]]] = {}
             schema_columns[schema.name] = self.data_dictionary.get_columns_for_schema(
                 conn=connection,
+                database=database,
                 schema=schema,
                 is_shared_database=self.report.is_shared_database,
             )
@@ -829,9 +830,12 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
                 domain_config=self.config.domain,
             )
-    def cache_tables_and_views(self, connection, database):
+    def cache_tables_and_views(
+        self, connection: redshift_connector.Connection, database: str
+    ) -> None:
         tables, views = self.data_dictionary.get_tables_and_views(
             conn=connection,
+            database=database,
             skip_external_tables=self.config.skip_external_tables,
             is_shared_database=self.report.is_shared_database,
         )
@@ -982,7 +986,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
                 self.datashares_helper.to_platform_resource(list(outbound_shares))
             )
-            if self.db and self.db.is_shared_database:
+            if self.db and self.db.is_shared_database():
                 inbound_share = self.db.get_inbound_share()
                 if inbound_share is None:
                     self.report.warning(
@@ -996,8 +1000,8 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
                     ):
                         lineage_extractor.aggregator.add(known_lineage)
-        # TODO: distinguish between definition level lineage and audit log based lineage
-        # definition level lineage should never be skipped
+        # TODO: distinguish between definition level lineage and audit log based lineage.
+        # Definition level lineage should never be skipped
         if not self._should_ingest_lineage():
             return

datahub/ingestion/source/redshift/redshift_schema.py CHANGED Viewed

@@ -42,7 +42,6 @@ class RedshiftTable(BaseTable):
     serde_parameters: Optional[str] = None
     last_altered: Optional[datetime] = None
-    @property
     def is_external_table(self) -> bool:
         return self.type == "EXTERNAL_TABLE"
@@ -56,7 +55,6 @@ class RedshiftView(BaseTable):
     size_in_bytes: Optional[int] = None
     rows_count: Optional[int] = None
-    @property
     def is_external_table(self) -> bool:
         return self.type == "EXTERNAL_TABLE"
@@ -71,10 +69,28 @@ class RedshiftSchema:
     external_platform: Optional[str] = None
     external_database: Optional[str] = None
-    @property
     def is_external_schema(self) -> bool:
         return self.type == "external"
+    def get_upstream_schema_name(self) -> Optional[str]:
+        """Gets the schema name from the external schema option.
+        Returns:
+            Optional[str]: The schema name from the external schema option
+            if this is an external schema and has a valid option format, None otherwise.
+        """
+        if not self.is_external_schema() or not self.option:
+            return None
+        # For external schema on redshift, option is in form
+        # {"SCHEMA":"tickit"}
+        schema_match = re.search(r'"SCHEMA"\s*:\s*"([^"]*)"', self.option)
+        if not schema_match:
+            return None
+        else:
+            return schema_match.group(1)
 @dataclass
 class PartialInboundDatashare:
@@ -117,7 +133,6 @@ class RedshiftDatabase:
     type: str
     options: Optional[str] = None
-    @property
     def is_shared_database(self) -> bool:
         return self.type == "shared"
@@ -128,7 +143,7 @@ class RedshiftDatabase:
     def get_inbound_share(
         self,
     ) -> Optional[Union[InboundDatashare, PartialInboundDatashare]]:
-        if not self.is_shared_database or not self.options:
+        if not self.is_shared_database() or not self.options:
             return None
         # Convert into single regex ??
@@ -323,6 +338,7 @@ class RedshiftDataDictionary:
     def get_tables_and_views(
         self,
         conn: redshift_connector.Connection,
+        database: str,
         skip_external_tables: bool = False,
         is_shared_database: bool = False,
     ) -> Tuple[Dict[str, List[RedshiftTable]], Dict[str, List[RedshiftView]]]:
@@ -336,6 +352,7 @@ class RedshiftDataDictionary:
         cur = RedshiftDataDictionary.get_query_result(
             conn,
             RedshiftCommonQuery.list_tables(
+                database=database,
                 skip_external_tables=skip_external_tables,
                 is_shared_database=is_shared_database,
             ),
@@ -484,14 +501,17 @@ class RedshiftDataDictionary:
     @staticmethod
     def get_columns_for_schema(
         conn: redshift_connector.Connection,
+        database: str,
         schema: RedshiftSchema,
         is_shared_database: bool = False,
     ) -> Dict[str, List[RedshiftColumn]]:
         cursor = RedshiftDataDictionary.get_query_result(
             conn,
             RedshiftCommonQuery.list_columns(
-                is_shared_database=is_shared_database
-            ).format(schema_name=schema.name),
+                database_name=database,
+                schema_name=schema.name,
+                is_shared_database=is_shared_database,
+            ),
         )
         table_columns: Dict[str, List[RedshiftColumn]] = {}

datahub/ingestion/source/sql/athena.py CHANGED Viewed

@@ -540,19 +540,13 @@ class AthenaSource(SQLAlchemySource):
             inspector=inspector,
             description=column.get("comment"),
             nullable=column.get("nullable", True),
-            is_part_of_key=(
-                True
-                if (
-                    pk_constraints is not None
-                    and isinstance(pk_constraints, dict)
-                    and column["name"] in pk_constraints.get("constrained_columns", [])
-                )
-                else False
+            is_part_of_key=bool(
+                pk_constraints is not None
+                and isinstance(pk_constraints, dict)
+                and column["name"] in pk_constraints.get("constrained_columns", [])
             ),
-            is_partitioning_key=(
-                True
-                if (partition_keys is not None and column["name"] in partition_keys)
-                else False
+            is_partitioning_key=bool(
+                partition_keys is not None and column["name"] in partition_keys
             ),
         )

datahub/ingestion/source/sql/hive.py CHANGED Viewed

@@ -821,12 +821,8 @@ class HiveSource(TwoTierSQLAlchemySource):
         try:
             view_definition = inspector.get_view_definition(view, schema)
-            if view_definition is None:
-                view_definition = ""
-            else:
-                # Some dialects return a TextClause instead of a raw string,
-                # so we need to convert them to a string.
-                view_definition = str(view_definition)
+            # Some dialects return a TextClause instead of a raw string, so we need to convert them to a string.
+            view_definition = str(view_definition) if view_definition else ""
         except NotImplementedError:
             view_definition = ""

datahub/ingestion/source/sql/hive_metastore.py CHANGED Viewed

@@ -893,8 +893,9 @@ class HiveMetastoreSource(SQLAlchemySource):
         return get_schema_fields_for_hive_column(
             column["col_name"],
             column["col_type"],
+            # column is actually an sqlalchemy.engine.row.LegacyRow, not a Dict and we cannot make column.get("col_description", "")
             description=(
-                column["col_description"] if "col_description" in column else ""
+                column["col_description"] if "col_description" in column else ""  # noqa: SIM401
             ),
             default_nullable=True,
         )

datahub/ingestion/source/sql/sql_common.py CHANGED Viewed

@@ -1031,16 +1031,10 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
     def _get_view_definition(self, inspector: Inspector, schema: str, view: str) -> str:
         try:
             view_definition = inspector.get_view_definition(view, schema)
-            if view_definition is None:
-                view_definition = ""
-            else:
-                # Some dialects return a TextClause instead of a raw string,
-                # so we need to convert them to a string.
-                view_definition = str(view_definition)
+            # Some dialects return a TextClause instead of a raw string, so we need to convert them to a string.
+            return str(view_definition) if view_definition else ""
         except NotImplementedError:
-            view_definition = ""
-        return view_definition
+            return ""
     def _process_view(
         self,

datahub/ingestion/source/state/stale_entity_removal_handler.py CHANGED Viewed

@@ -114,14 +114,10 @@ class StaleEntityRemovalHandler(
         self.stateful_ingestion_config: Optional[StatefulStaleMetadataRemovalConfig] = (
             config.stateful_ingestion
         )
-        self.checkpointing_enabled: bool = (
-            True
-            if (
-                self.state_provider.is_stateful_ingestion_configured()
-                and self.stateful_ingestion_config
-                and self.stateful_ingestion_config.remove_stale_metadata
-            )
-            else False
+        self.checkpointing_enabled: bool = bool(
+            self.state_provider.is_stateful_ingestion_configured()
+            and self.stateful_ingestion_config
+            and self.stateful_ingestion_config.remove_stale_metadata
         )
         self._job_id = self._init_job_id()
         self._urns_to_skip: Set[str] = set()

datahub/ingestion/source/superset.py CHANGED Viewed

@@ -431,9 +431,7 @@ class SupersetSource(StatefulIngestionSourceBase):
                     dashboard_data.get("owners", []),
                 )
             ),
-            "IsCertified": str(
-                True if dashboard_data.get("certified_by") else False
-            ).lower(),
+            "IsCertified": str(bool(dashboard_data.get("certified_by"))).lower(),
         }
         if dashboard_data.get("certified_by"):

datahub/ingestion/source/tableau/tableau_common.py CHANGED Viewed

@@ -902,7 +902,7 @@ def get_unique_custom_sql(custom_sql_list: List[dict]) -> List[dict]:
             "name": custom_sql.get("name"),
             # We assume that this is unsupported custom sql if "actual tables that this query references"
             # are missing from api result.
-            "isUnsupportedCustomSql": True if not custom_sql.get("tables") else False,
+            "isUnsupportedCustomSql": not custom_sql.get("tables"),
             "query": custom_sql.get("query"),
             "connectionType": custom_sql.get("connectionType"),
             "columns": custom_sql.get("columns"),

datahub/lite/duckdb_lite.py CHANGED Viewed

@@ -760,9 +760,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
                 entity_id=[str(data_platform_urn), data_platform_instance],
             )
             self._create_edges_from_data_platform_instance(data_platform_instance_urn)
-        elif isinstance(aspect, ChartInfoClass) or isinstance(
-            aspect, DashboardInfoClass
-        ):
+        elif isinstance(aspect, (ChartInfoClass, DashboardInfoClass)):
             urn = Urn.from_string(entity_urn)
             self.add_edge(
                 entity_urn,

datahub/metadata/_schema_classes.py CHANGED Viewed

@@ -9326,13 +9326,16 @@ class DataProcessInstanceInputClass(_Aspect):
     def __init__(self,
         inputs: List[str],
+        inputEdges: Union[None, List["EdgeClass"]]=None,
     ):
         super().__init__()
         self.inputs = inputs
+        self.inputEdges = inputEdges
     def _restore_defaults(self) -> None:
         self.inputs = list()
+        self.inputEdges = self.RECORD_SCHEMA.fields_dict["inputEdges"].default
     @property
@@ -9345,6 +9348,18 @@ class DataProcessInstanceInputClass(_Aspect):
         self._inner_dict['inputs'] = value
+    @property
+    def inputEdges(self) -> Union[None, List["EdgeClass"]]:
+        """Input assets consumed by the data process instance, with additional metadata.
+    Counts as lineage.
+    Will eventually deprecate the inputs field."""
+        return self._inner_dict.get('inputEdges')  # type: ignore
+    @inputEdges.setter
+    def inputEdges(self, value: Union[None, List["EdgeClass"]]) -> None:
+        self._inner_dict['inputEdges'] = value
 class DataProcessInstanceOutputClass(_Aspect):
     """Information about the outputs of a Data process"""
@@ -9355,18 +9370,21 @@ class DataProcessInstanceOutputClass(_Aspect):
     def __init__(self,
         outputs: List[str],
+        outputEdges: Union[None, List["EdgeClass"]]=None,
     ):
         super().__init__()
         self.outputs = outputs
+        self.outputEdges = outputEdges
     def _restore_defaults(self) -> None:
         self.outputs = list()
+        self.outputEdges = self.RECORD_SCHEMA.fields_dict["outputEdges"].default
     @property
     def outputs(self) -> List[str]:
-        """Output datasets to be produced"""
+        """Output assets produced"""
         return self._inner_dict.get('outputs')  # type: ignore
     @outputs.setter
@@ -9374,6 +9392,18 @@ class DataProcessInstanceOutputClass(_Aspect):
         self._inner_dict['outputs'] = value
+    @property
+    def outputEdges(self) -> Union[None, List["EdgeClass"]]:
+        """Output assets produced by the data process instance during processing, with additional metadata.
+    Counts as lineage.
+    Will eventually deprecate the outputs field."""
+        return self._inner_dict.get('outputEdges')  # type: ignore
+    @outputEdges.setter
+    def outputEdges(self, value: Union[None, List["EdgeClass"]]) -> None:
+        self._inner_dict['outputEdges'] = value
 class DataProcessInstancePropertiesClass(_Aspect):
     """The inputs and outputs of this data process"""

datahub/metadata/schema.avsc CHANGED Viewed

@@ -16749,8 +16749,6 @@
               "dataset",
               "mlModel"
             ],
-            "isLineage": true,
-            "isUpstream": false,
             "name": "Produces"
           }
         },
@@ -16770,7 +16768,35 @@
           "items": "string"
         },
         "name": "outputs",
-        "doc": "Output datasets to be produced"
+        "doc": "Output assets produced"
+      },
+      {
+        "Relationship": {
+          "/*/destinationUrn": {
+            "createdActor": "outputEdges/*/created/actor",
+            "createdOn": "outputEdges/*/created/time",
+            "entityTypes": [
+              "dataset",
+              "mlModel"
+            ],
+            "isLineage": true,
+            "isUpstream": false,
+            "name": "DataProcessInstanceProduces",
+            "properties": "outputEdges/*/properties",
+            "updatedActor": "outputEdges/*/lastModified/actor",
+            "updatedOn": "outputEdges/*/lastModified/time"
+          }
+        },
+        "type": [
+          "null",
+          {
+            "type": "array",
+            "items": "com.linkedin.pegasus2avro.common.Edge"
+          }
+        ],
+        "name": "outputEdges",
+        "default": null,
+        "doc": "Output assets produced by the data process instance during processing, with additional metadata.\nCounts as lineage.\nWill eventually deprecate the outputs field."
       }
     ],
     "doc": "Information about the outputs of a Data process"
@@ -16977,7 +17003,6 @@
               "dataset",
               "mlModel"
             ],
-            "isLineage": true,
             "name": "Consumes"
           }
         },
@@ -16998,6 +17023,33 @@
         },
         "name": "inputs",
         "doc": "Input assets consumed"
+      },
+      {
+        "Relationship": {
+          "/*/destinationUrn": {
+            "createdActor": "inputEdges/*/created/actor",
+            "createdOn": "inputEdges/*/created/time",
+            "entityTypes": [
+              "dataset",
+              "mlModel"
+            ],
+            "isLineage": true,
+            "name": "DataProcessInstanceConsumes",
+            "properties": "inputEdges/*/properties",
+            "updatedActor": "inputEdges/*/lastModified/actor",
+            "updatedOn": "inputEdges/*/lastModified/time"
+          }
+        },
+        "type": [
+          "null",
+          {
+            "type": "array",
+            "items": "com.linkedin.pegasus2avro.common.Edge"
+          }
+        ],
+        "name": "inputEdges",
+        "default": null,
+        "doc": "Input assets consumed by the data process instance, with additional metadata.\nCounts as lineage.\nWill eventually deprecate the inputs field."
       }
     ],
     "doc": "Information about the inputs datasets of a Data process"

datahub/metadata/schemas/DataProcessInstanceInput.avsc CHANGED Viewed

@@ -13,7 +13,6 @@
             "dataset",
             "mlModel"
           ],
-          "isLineage": true,
           "name": "Consumes"
         }
       },
@@ -34,6 +33,135 @@
       "doc": "Input assets consumed",
       "Urn": "Urn",
       "urn_is_array": true
+    },
+    {
+      "Relationship": {
+        "/*/destinationUrn": {
+          "createdActor": "inputEdges/*/created/actor",
+          "createdOn": "inputEdges/*/created/time",
+          "entityTypes": [
+            "dataset",
+            "mlModel"
+          ],
+          "isLineage": true,
+          "name": "DataProcessInstanceConsumes",
+          "properties": "inputEdges/*/properties",
+          "updatedActor": "inputEdges/*/lastModified/actor",
+          "updatedOn": "inputEdges/*/lastModified/time"
+        }
+      },
+      "type": [
+        "null",
+        {
+          "type": "array",
+          "items": {
+            "type": "record",
+            "name": "Edge",
+            "namespace": "com.linkedin.pegasus2avro.common",
+            "fields": [
+              {
+                "java": {
+                  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
+                },
+                "type": [
+                  "null",
+                  "string"
+                ],
+                "name": "sourceUrn",
+                "default": null,
+                "doc": "Urn of the source of this relationship edge.\nIf not specified, assumed to be the entity that this aspect belongs to.",
+                "Urn": "Urn"
+              },
+              {
+                "java": {
+                  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
+                },
+                "type": "string",
+                "name": "destinationUrn",
+                "doc": "Urn of the destination of this relationship edge.",
+                "Urn": "Urn"
+              },
+              {
+                "type": [
+                  "null",
+                  {
+                    "type": "record",
+                    "name": "AuditStamp",
+                    "namespace": "com.linkedin.pegasus2avro.common",
+                    "fields": [
+                      {
+                        "type": "long",
+                        "name": "time",
+                        "doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
+                      },
+                      {
+                        "java": {
+                          "class": "com.linkedin.pegasus2avro.common.urn.Urn"
+                        },
+                        "type": "string",
+                        "name": "actor",
+                        "doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
+                        "Urn": "Urn"
+                      },
+                      {
+                        "java": {
+                          "class": "com.linkedin.pegasus2avro.common.urn.Urn"
+                        },
+                        "type": [
+                          "null",
+                          "string"
+                        ],
+                        "name": "impersonator",
+                        "default": null,
+                        "doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
+                        "Urn": "Urn"
+                      },
+                      {
+                        "type": [
+                          "null",
+                          "string"
+                        ],
+                        "name": "message",
+                        "default": null,
+                        "doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
+                      }
+                    ],
+                    "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
+                  }
+                ],
+                "name": "created",
+                "default": null,
+                "doc": "Audit stamp containing who created this relationship edge and when"
+              },
+              {
+                "type": [
+                  "null",
+                  "com.linkedin.pegasus2avro.common.AuditStamp"
+                ],
+                "name": "lastModified",
+                "default": null,
+                "doc": "Audit stamp containing who last modified this relationship edge and when"
+              },
+              {
+                "type": [
+                  "null",
+                  {
+                    "type": "map",
+                    "values": "string"
+                  }
+                ],
+                "name": "properties",
+                "default": null,
+                "doc": "A generic properties bag that allows us to store specific information on this graph edge."
+              }
+            ],
+            "doc": "A common structure to represent all edges to entities when used inside aspects as collections\nThis ensures that all edges have common structure around audit-stamps and will support PATCH, time-travel automatically."
+          }
+        }
+      ],
+      "name": "inputEdges",
+      "default": null,
+      "doc": "Input assets consumed by the data process instance, with additional metadata.\nCounts as lineage.\nWill eventually deprecate the inputs field."
     }
   ],
   "doc": "Information about the inputs datasets of a Data process"

acryl-datahub 1.0.0rc13__py3-none-any.whl → 1.0.0rc14__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.0.0rc13py3-none-any.whl → 1.0.0rc14py3-none-any.whl