acryl-datahub 1.0.0rc13__py3-none-any.whl → 1.0.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (45) hide show
  1. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/METADATA +2540 -2540
  2. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/RECORD +45 -45
  3. datahub/_version.py +1 -1
  4. datahub/configuration/common.py +1 -1
  5. datahub/emitter/rest_emitter.py +165 -10
  6. datahub/ingestion/glossary/classification_mixin.py +1 -5
  7. datahub/ingestion/graph/client.py +6 -3
  8. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  9. datahub/ingestion/run/pipeline.py +2 -4
  10. datahub/ingestion/sink/datahub_rest.py +4 -0
  11. datahub/ingestion/source/common/subtypes.py +5 -0
  12. datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
  13. datahub/ingestion/source/dbt/dbt_common.py +2 -4
  14. datahub/ingestion/source/dbt/dbt_tests.py +4 -8
  15. datahub/ingestion/source/dremio/dremio_api.py +1 -5
  16. datahub/ingestion/source/dremio/dremio_aspects.py +1 -4
  17. datahub/ingestion/source/dynamodb/dynamodb.py +1 -0
  18. datahub/ingestion/source/ge_data_profiler.py +1 -1
  19. datahub/ingestion/source/kafka_connect/common.py +1 -6
  20. datahub/ingestion/source/mlflow.py +338 -31
  21. datahub/ingestion/source/redshift/lineage.py +2 -2
  22. datahub/ingestion/source/redshift/lineage_v2.py +19 -7
  23. datahub/ingestion/source/redshift/profile.py +1 -1
  24. datahub/ingestion/source/redshift/query.py +14 -6
  25. datahub/ingestion/source/redshift/redshift.py +9 -5
  26. datahub/ingestion/source/redshift/redshift_schema.py +27 -7
  27. datahub/ingestion/source/sql/athena.py +6 -12
  28. datahub/ingestion/source/sql/hive.py +2 -6
  29. datahub/ingestion/source/sql/hive_metastore.py +2 -1
  30. datahub/ingestion/source/sql/sql_common.py +3 -9
  31. datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
  32. datahub/ingestion/source/superset.py +1 -3
  33. datahub/ingestion/source/tableau/tableau_common.py +1 -1
  34. datahub/ingestion/source/unity/ge_profiler.py +2 -1
  35. datahub/lite/duckdb_lite.py +1 -3
  36. datahub/metadata/_schema_classes.py +31 -1
  37. datahub/metadata/schema.avsc +56 -4
  38. datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
  39. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
  40. datahub/sdk/dataset.py +2 -2
  41. datahub/sql_parsing/sqlglot_utils.py +1 -4
  42. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/LICENSE +0 -0
  43. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/WHEEL +0 -0
  44. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/entry_points.txt +0 -0
  45. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/top_level.txt +0 -0
@@ -403,8 +403,8 @@ class RedshiftSqlLineageV2(Closeable):
403
403
  for table in tables:
404
404
  schema = db_schemas[self.database][schema_name]
405
405
  if (
406
- table.is_external_table
407
- and schema.is_external_schema
406
+ table.is_external_table()
407
+ and schema.is_external_schema()
408
408
  and schema.external_platform
409
409
  ):
410
410
  # external_db_params = schema.option
@@ -416,14 +416,26 @@ class RedshiftSqlLineageV2(Closeable):
416
416
  platform_instance=self.config.platform_instance,
417
417
  env=self.config.env,
418
418
  )
419
- upstream_urn = mce_builder.make_dataset_urn_with_platform_instance(
420
- upstream_platform,
421
- f"{schema.external_database}.{table.name}",
422
- platform_instance=(
419
+ if upstream_platform == self.platform:
420
+ upstream_schema = schema.get_upstream_schema_name() or "public"
421
+ upstream_dataset_name = (
422
+ f"{schema.external_database}.{upstream_schema}.{table.name}"
423
+ )
424
+ upstream_platform_instance = self.config.platform_instance
425
+ else:
426
+ upstream_dataset_name = (
427
+ f"{schema.external_database}.{table.name}"
428
+ )
429
+ upstream_platform_instance = (
423
430
  self.config.platform_instance_map.get(upstream_platform)
424
431
  if self.config.platform_instance_map
425
432
  else None
426
- ),
433
+ )
434
+
435
+ upstream_urn = mce_builder.make_dataset_urn_with_platform_instance(
436
+ upstream_platform,
437
+ upstream_dataset_name,
438
+ platform_instance=upstream_platform_instance,
427
439
  env=self.config.env,
428
440
  )
429
441
 
@@ -48,7 +48,7 @@ class RedshiftProfiler(GenericProfiler):
48
48
  if not self.config.schema_pattern.allowed(schema):
49
49
  continue
50
50
  for table in tables[db].get(schema, {}):
51
- if table.is_external_table:
51
+ if table.is_external_table() or self.report.is_shared_database:
52
52
  if not self.config.profiling.profile_external_tables:
53
53
  # Case 1: If user did not tell us to profile external tables, simply log this.
54
54
  self.report.profiling_skipped_other[schema] += 1
@@ -83,7 +83,9 @@ class RedshiftCommonQuery:
83
83
  # NOTE: Tables from shared database are not available in pg_catalog.pg_class
84
84
  @staticmethod
85
85
  def list_tables(
86
- skip_external_tables: bool = False, is_shared_database: bool = False
86
+ database: str,
87
+ skip_external_tables: bool = False,
88
+ is_shared_database: bool = False,
87
89
  ) -> str:
88
90
  # NOTE: it looks like description is available only in pg_description
89
91
  # So this remains preferrred way
@@ -123,7 +125,7 @@ class RedshiftCommonQuery:
123
125
  AND n.nspname != 'information_schema'
124
126
  """
125
127
 
126
- external_tables_query = """
128
+ external_tables_query = f"""
127
129
  SELECT 'EXTERNAL_TABLE' as tabletype,
128
130
  NULL AS "schema_oid",
129
131
  schemaname AS "schema",
@@ -142,10 +144,11 @@ class RedshiftCommonQuery:
142
144
  serde_parameters,
143
145
  NULL as table_description
144
146
  FROM pg_catalog.svv_external_tables
147
+ WHERE redshift_database_name='{database}'
145
148
  ORDER BY "schema",
146
149
  "relname"
147
150
  """
148
- shared_database_tables_query = """
151
+ shared_database_tables_query = f"""
149
152
  SELECT table_type as tabletype,
150
153
  NULL AS "schema_oid",
151
154
  schema_name AS "schema",
@@ -164,6 +167,7 @@ class RedshiftCommonQuery:
164
167
  NULL as serde_parameters,
165
168
  NULL as table_description
166
169
  FROM svv_redshift_tables
170
+ WHERE database_name='{database}'
167
171
  ORDER BY "schema",
168
172
  "relname"
169
173
  """
@@ -175,9 +179,11 @@ class RedshiftCommonQuery:
175
179
  return f"{tables_query} UNION {external_tables_query}"
176
180
 
177
181
  @staticmethod
178
- def list_columns(is_shared_database: bool = False) -> str:
182
+ def list_columns(
183
+ database_name: str, schema_name: str, is_shared_database: bool = False
184
+ ) -> str:
179
185
  if is_shared_database:
180
- return """
186
+ return f"""
181
187
  SELECT
182
188
  schema_name as "schema",
183
189
  table_name as "table_name",
@@ -198,9 +204,10 @@ class RedshiftCommonQuery:
198
204
  null as "table_oid"
199
205
  FROM SVV_REDSHIFT_COLUMNS
200
206
  WHERE 1 and schema = '{schema_name}'
207
+ AND database_name = '{database_name}'
201
208
  ORDER BY "schema", "table_name", "attnum"
202
209
  """
203
- return """
210
+ return f"""
204
211
  SELECT
205
212
  n.nspname as "schema",
206
213
  c.relname as "table_name",
@@ -275,6 +282,7 @@ class RedshiftCommonQuery:
275
282
  null as "table_oid"
276
283
  FROM SVV_EXTERNAL_COLUMNS
277
284
  WHERE 1 and schema = '{schema_name}'
285
+ AND redshift_database_name = '{database_name}'
278
286
  ORDER BY "schema", "table_name", "attnum"
279
287
  """
280
288
 
@@ -366,7 +366,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
366
366
 
367
367
  self.db = self.data_dictionary.get_database_details(connection, database)
368
368
  self.report.is_shared_database = (
369
- self.db is not None and self.db.is_shared_database
369
+ self.db is not None and self.db.is_shared_database()
370
370
  )
371
371
  with self.report.new_stage(METADATA_EXTRACTION):
372
372
  self.db_tables[database] = defaultdict()
@@ -508,6 +508,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
508
508
  schema_columns: Dict[str, Dict[str, List[RedshiftColumn]]] = {}
509
509
  schema_columns[schema.name] = self.data_dictionary.get_columns_for_schema(
510
510
  conn=connection,
511
+ database=database,
511
512
  schema=schema,
512
513
  is_shared_database=self.report.is_shared_database,
513
514
  )
@@ -829,9 +830,12 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
829
830
  domain_config=self.config.domain,
830
831
  )
831
832
 
832
- def cache_tables_and_views(self, connection, database):
833
+ def cache_tables_and_views(
834
+ self, connection: redshift_connector.Connection, database: str
835
+ ) -> None:
833
836
  tables, views = self.data_dictionary.get_tables_and_views(
834
837
  conn=connection,
838
+ database=database,
835
839
  skip_external_tables=self.config.skip_external_tables,
836
840
  is_shared_database=self.report.is_shared_database,
837
841
  )
@@ -982,7 +986,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
982
986
  self.datashares_helper.to_platform_resource(list(outbound_shares))
983
987
  )
984
988
 
985
- if self.db and self.db.is_shared_database:
989
+ if self.db and self.db.is_shared_database():
986
990
  inbound_share = self.db.get_inbound_share()
987
991
  if inbound_share is None:
988
992
  self.report.warning(
@@ -996,8 +1000,8 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
996
1000
  ):
997
1001
  lineage_extractor.aggregator.add(known_lineage)
998
1002
 
999
- # TODO: distinguish between definition level lineage and audit log based lineage
1000
- # definition level lineage should never be skipped
1003
+ # TODO: distinguish between definition level lineage and audit log based lineage.
1004
+ # Definition level lineage should never be skipped
1001
1005
  if not self._should_ingest_lineage():
1002
1006
  return
1003
1007
 
@@ -42,7 +42,6 @@ class RedshiftTable(BaseTable):
42
42
  serde_parameters: Optional[str] = None
43
43
  last_altered: Optional[datetime] = None
44
44
 
45
- @property
46
45
  def is_external_table(self) -> bool:
47
46
  return self.type == "EXTERNAL_TABLE"
48
47
 
@@ -56,7 +55,6 @@ class RedshiftView(BaseTable):
56
55
  size_in_bytes: Optional[int] = None
57
56
  rows_count: Optional[int] = None
58
57
 
59
- @property
60
58
  def is_external_table(self) -> bool:
61
59
  return self.type == "EXTERNAL_TABLE"
62
60
 
@@ -71,10 +69,28 @@ class RedshiftSchema:
71
69
  external_platform: Optional[str] = None
72
70
  external_database: Optional[str] = None
73
71
 
74
- @property
75
72
  def is_external_schema(self) -> bool:
76
73
  return self.type == "external"
77
74
 
75
+ def get_upstream_schema_name(self) -> Optional[str]:
76
+ """Gets the schema name from the external schema option.
77
+
78
+ Returns:
79
+ Optional[str]: The schema name from the external schema option
80
+ if this is an external schema and has a valid option format, None otherwise.
81
+ """
82
+
83
+ if not self.is_external_schema() or not self.option:
84
+ return None
85
+
86
+ # For external schema on redshift, option is in form
87
+ # {"SCHEMA":"tickit"}
88
+ schema_match = re.search(r'"SCHEMA"\s*:\s*"([^"]*)"', self.option)
89
+ if not schema_match:
90
+ return None
91
+ else:
92
+ return schema_match.group(1)
93
+
78
94
 
79
95
  @dataclass
80
96
  class PartialInboundDatashare:
@@ -117,7 +133,6 @@ class RedshiftDatabase:
117
133
  type: str
118
134
  options: Optional[str] = None
119
135
 
120
- @property
121
136
  def is_shared_database(self) -> bool:
122
137
  return self.type == "shared"
123
138
 
@@ -128,7 +143,7 @@ class RedshiftDatabase:
128
143
  def get_inbound_share(
129
144
  self,
130
145
  ) -> Optional[Union[InboundDatashare, PartialInboundDatashare]]:
131
- if not self.is_shared_database or not self.options:
146
+ if not self.is_shared_database() or not self.options:
132
147
  return None
133
148
 
134
149
  # Convert into single regex ??
@@ -323,6 +338,7 @@ class RedshiftDataDictionary:
323
338
  def get_tables_and_views(
324
339
  self,
325
340
  conn: redshift_connector.Connection,
341
+ database: str,
326
342
  skip_external_tables: bool = False,
327
343
  is_shared_database: bool = False,
328
344
  ) -> Tuple[Dict[str, List[RedshiftTable]], Dict[str, List[RedshiftView]]]:
@@ -336,6 +352,7 @@ class RedshiftDataDictionary:
336
352
  cur = RedshiftDataDictionary.get_query_result(
337
353
  conn,
338
354
  RedshiftCommonQuery.list_tables(
355
+ database=database,
339
356
  skip_external_tables=skip_external_tables,
340
357
  is_shared_database=is_shared_database,
341
358
  ),
@@ -484,14 +501,17 @@ class RedshiftDataDictionary:
484
501
  @staticmethod
485
502
  def get_columns_for_schema(
486
503
  conn: redshift_connector.Connection,
504
+ database: str,
487
505
  schema: RedshiftSchema,
488
506
  is_shared_database: bool = False,
489
507
  ) -> Dict[str, List[RedshiftColumn]]:
490
508
  cursor = RedshiftDataDictionary.get_query_result(
491
509
  conn,
492
510
  RedshiftCommonQuery.list_columns(
493
- is_shared_database=is_shared_database
494
- ).format(schema_name=schema.name),
511
+ database_name=database,
512
+ schema_name=schema.name,
513
+ is_shared_database=is_shared_database,
514
+ ),
495
515
  )
496
516
 
497
517
  table_columns: Dict[str, List[RedshiftColumn]] = {}
@@ -540,19 +540,13 @@ class AthenaSource(SQLAlchemySource):
540
540
  inspector=inspector,
541
541
  description=column.get("comment"),
542
542
  nullable=column.get("nullable", True),
543
- is_part_of_key=(
544
- True
545
- if (
546
- pk_constraints is not None
547
- and isinstance(pk_constraints, dict)
548
- and column["name"] in pk_constraints.get("constrained_columns", [])
549
- )
550
- else False
543
+ is_part_of_key=bool(
544
+ pk_constraints is not None
545
+ and isinstance(pk_constraints, dict)
546
+ and column["name"] in pk_constraints.get("constrained_columns", [])
551
547
  ),
552
- is_partitioning_key=(
553
- True
554
- if (partition_keys is not None and column["name"] in partition_keys)
555
- else False
548
+ is_partitioning_key=bool(
549
+ partition_keys is not None and column["name"] in partition_keys
556
550
  ),
557
551
  )
558
552
 
@@ -821,12 +821,8 @@ class HiveSource(TwoTierSQLAlchemySource):
821
821
 
822
822
  try:
823
823
  view_definition = inspector.get_view_definition(view, schema)
824
- if view_definition is None:
825
- view_definition = ""
826
- else:
827
- # Some dialects return a TextClause instead of a raw string,
828
- # so we need to convert them to a string.
829
- view_definition = str(view_definition)
824
+ # Some dialects return a TextClause instead of a raw string, so we need to convert them to a string.
825
+ view_definition = str(view_definition) if view_definition else ""
830
826
  except NotImplementedError:
831
827
  view_definition = ""
832
828
 
@@ -893,8 +893,9 @@ class HiveMetastoreSource(SQLAlchemySource):
893
893
  return get_schema_fields_for_hive_column(
894
894
  column["col_name"],
895
895
  column["col_type"],
896
+ # column is actually an sqlalchemy.engine.row.LegacyRow, not a Dict and we cannot make column.get("col_description", "")
896
897
  description=(
897
- column["col_description"] if "col_description" in column else ""
898
+ column["col_description"] if "col_description" in column else "" # noqa: SIM401
898
899
  ),
899
900
  default_nullable=True,
900
901
  )
@@ -1031,16 +1031,10 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1031
1031
  def _get_view_definition(self, inspector: Inspector, schema: str, view: str) -> str:
1032
1032
  try:
1033
1033
  view_definition = inspector.get_view_definition(view, schema)
1034
- if view_definition is None:
1035
- view_definition = ""
1036
- else:
1037
- # Some dialects return a TextClause instead of a raw string,
1038
- # so we need to convert them to a string.
1039
- view_definition = str(view_definition)
1034
+ # Some dialects return a TextClause instead of a raw string, so we need to convert them to a string.
1035
+ return str(view_definition) if view_definition else ""
1040
1036
  except NotImplementedError:
1041
- view_definition = ""
1042
-
1043
- return view_definition
1037
+ return ""
1044
1038
 
1045
1039
  def _process_view(
1046
1040
  self,
@@ -114,14 +114,10 @@ class StaleEntityRemovalHandler(
114
114
  self.stateful_ingestion_config: Optional[StatefulStaleMetadataRemovalConfig] = (
115
115
  config.stateful_ingestion
116
116
  )
117
- self.checkpointing_enabled: bool = (
118
- True
119
- if (
120
- self.state_provider.is_stateful_ingestion_configured()
121
- and self.stateful_ingestion_config
122
- and self.stateful_ingestion_config.remove_stale_metadata
123
- )
124
- else False
117
+ self.checkpointing_enabled: bool = bool(
118
+ self.state_provider.is_stateful_ingestion_configured()
119
+ and self.stateful_ingestion_config
120
+ and self.stateful_ingestion_config.remove_stale_metadata
125
121
  )
126
122
  self._job_id = self._init_job_id()
127
123
  self._urns_to_skip: Set[str] = set()
@@ -431,9 +431,7 @@ class SupersetSource(StatefulIngestionSourceBase):
431
431
  dashboard_data.get("owners", []),
432
432
  )
433
433
  ),
434
- "IsCertified": str(
435
- True if dashboard_data.get("certified_by") else False
436
- ).lower(),
434
+ "IsCertified": str(bool(dashboard_data.get("certified_by"))).lower(),
437
435
  }
438
436
 
439
437
  if dashboard_data.get("certified_by"):
@@ -902,7 +902,7 @@ def get_unique_custom_sql(custom_sql_list: List[dict]) -> List[dict]:
902
902
  "name": custom_sql.get("name"),
903
903
  # We assume that this is unsupported custom sql if "actual tables that this query references"
904
904
  # are missing from api result.
905
- "isUnsupportedCustomSql": True if not custom_sql.get("tables") else False,
905
+ "isUnsupportedCustomSql": not custom_sql.get("tables"),
906
906
  "query": custom_sql.get("query"),
907
907
  "connectionType": custom_sql.get("connectionType"),
908
908
  "columns": custom_sql.get("columns"),
@@ -1,3 +1,4 @@
1
+ import concurrent.futures
1
2
  import logging
2
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
3
4
  from dataclasses import dataclass, field
@@ -91,7 +92,7 @@ class UnityCatalogGEProfiler(GenericProfiler):
91
92
  profile_requests.append(profile_request)
92
93
  if i > 0 and i % 100 == 0:
93
94
  logger.info(f"Finished table-level profiling for {i} tables")
94
- except TimeoutError:
95
+ except (TimeoutError, concurrent.futures.TimeoutError):
95
96
  logger.warning("Timed out waiting to complete table-level profiling.")
96
97
 
97
98
  if len(profile_requests) == 0:
@@ -760,9 +760,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
760
760
  entity_id=[str(data_platform_urn), data_platform_instance],
761
761
  )
762
762
  self._create_edges_from_data_platform_instance(data_platform_instance_urn)
763
- elif isinstance(aspect, ChartInfoClass) or isinstance(
764
- aspect, DashboardInfoClass
765
- ):
763
+ elif isinstance(aspect, (ChartInfoClass, DashboardInfoClass)):
766
764
  urn = Urn.from_string(entity_urn)
767
765
  self.add_edge(
768
766
  entity_urn,
@@ -9326,13 +9326,16 @@ class DataProcessInstanceInputClass(_Aspect):
9326
9326
 
9327
9327
  def __init__(self,
9328
9328
  inputs: List[str],
9329
+ inputEdges: Union[None, List["EdgeClass"]]=None,
9329
9330
  ):
9330
9331
  super().__init__()
9331
9332
 
9332
9333
  self.inputs = inputs
9334
+ self.inputEdges = inputEdges
9333
9335
 
9334
9336
  def _restore_defaults(self) -> None:
9335
9337
  self.inputs = list()
9338
+ self.inputEdges = self.RECORD_SCHEMA.fields_dict["inputEdges"].default
9336
9339
 
9337
9340
 
9338
9341
  @property
@@ -9345,6 +9348,18 @@ class DataProcessInstanceInputClass(_Aspect):
9345
9348
  self._inner_dict['inputs'] = value
9346
9349
 
9347
9350
 
9351
+ @property
9352
+ def inputEdges(self) -> Union[None, List["EdgeClass"]]:
9353
+ """Input assets consumed by the data process instance, with additional metadata.
9354
+ Counts as lineage.
9355
+ Will eventually deprecate the inputs field."""
9356
+ return self._inner_dict.get('inputEdges') # type: ignore
9357
+
9358
+ @inputEdges.setter
9359
+ def inputEdges(self, value: Union[None, List["EdgeClass"]]) -> None:
9360
+ self._inner_dict['inputEdges'] = value
9361
+
9362
+
9348
9363
  class DataProcessInstanceOutputClass(_Aspect):
9349
9364
  """Information about the outputs of a Data process"""
9350
9365
 
@@ -9355,18 +9370,21 @@ class DataProcessInstanceOutputClass(_Aspect):
9355
9370
 
9356
9371
  def __init__(self,
9357
9372
  outputs: List[str],
9373
+ outputEdges: Union[None, List["EdgeClass"]]=None,
9358
9374
  ):
9359
9375
  super().__init__()
9360
9376
 
9361
9377
  self.outputs = outputs
9378
+ self.outputEdges = outputEdges
9362
9379
 
9363
9380
  def _restore_defaults(self) -> None:
9364
9381
  self.outputs = list()
9382
+ self.outputEdges = self.RECORD_SCHEMA.fields_dict["outputEdges"].default
9365
9383
 
9366
9384
 
9367
9385
  @property
9368
9386
  def outputs(self) -> List[str]:
9369
- """Output datasets to be produced"""
9387
+ """Output assets produced"""
9370
9388
  return self._inner_dict.get('outputs') # type: ignore
9371
9389
 
9372
9390
  @outputs.setter
@@ -9374,6 +9392,18 @@ class DataProcessInstanceOutputClass(_Aspect):
9374
9392
  self._inner_dict['outputs'] = value
9375
9393
 
9376
9394
 
9395
+ @property
9396
+ def outputEdges(self) -> Union[None, List["EdgeClass"]]:
9397
+ """Output assets produced by the data process instance during processing, with additional metadata.
9398
+ Counts as lineage.
9399
+ Will eventually deprecate the outputs field."""
9400
+ return self._inner_dict.get('outputEdges') # type: ignore
9401
+
9402
+ @outputEdges.setter
9403
+ def outputEdges(self, value: Union[None, List["EdgeClass"]]) -> None:
9404
+ self._inner_dict['outputEdges'] = value
9405
+
9406
+
9377
9407
  class DataProcessInstancePropertiesClass(_Aspect):
9378
9408
  """The inputs and outputs of this data process"""
9379
9409
 
@@ -16749,8 +16749,6 @@
16749
16749
  "dataset",
16750
16750
  "mlModel"
16751
16751
  ],
16752
- "isLineage": true,
16753
- "isUpstream": false,
16754
16752
  "name": "Produces"
16755
16753
  }
16756
16754
  },
@@ -16770,7 +16768,35 @@
16770
16768
  "items": "string"
16771
16769
  },
16772
16770
  "name": "outputs",
16773
- "doc": "Output datasets to be produced"
16771
+ "doc": "Output assets produced"
16772
+ },
16773
+ {
16774
+ "Relationship": {
16775
+ "/*/destinationUrn": {
16776
+ "createdActor": "outputEdges/*/created/actor",
16777
+ "createdOn": "outputEdges/*/created/time",
16778
+ "entityTypes": [
16779
+ "dataset",
16780
+ "mlModel"
16781
+ ],
16782
+ "isLineage": true,
16783
+ "isUpstream": false,
16784
+ "name": "DataProcessInstanceProduces",
16785
+ "properties": "outputEdges/*/properties",
16786
+ "updatedActor": "outputEdges/*/lastModified/actor",
16787
+ "updatedOn": "outputEdges/*/lastModified/time"
16788
+ }
16789
+ },
16790
+ "type": [
16791
+ "null",
16792
+ {
16793
+ "type": "array",
16794
+ "items": "com.linkedin.pegasus2avro.common.Edge"
16795
+ }
16796
+ ],
16797
+ "name": "outputEdges",
16798
+ "default": null,
16799
+ "doc": "Output assets produced by the data process instance during processing, with additional metadata.\nCounts as lineage.\nWill eventually deprecate the outputs field."
16774
16800
  }
16775
16801
  ],
16776
16802
  "doc": "Information about the outputs of a Data process"
@@ -16977,7 +17003,6 @@
16977
17003
  "dataset",
16978
17004
  "mlModel"
16979
17005
  ],
16980
- "isLineage": true,
16981
17006
  "name": "Consumes"
16982
17007
  }
16983
17008
  },
@@ -16998,6 +17023,33 @@
16998
17023
  },
16999
17024
  "name": "inputs",
17000
17025
  "doc": "Input assets consumed"
17026
+ },
17027
+ {
17028
+ "Relationship": {
17029
+ "/*/destinationUrn": {
17030
+ "createdActor": "inputEdges/*/created/actor",
17031
+ "createdOn": "inputEdges/*/created/time",
17032
+ "entityTypes": [
17033
+ "dataset",
17034
+ "mlModel"
17035
+ ],
17036
+ "isLineage": true,
17037
+ "name": "DataProcessInstanceConsumes",
17038
+ "properties": "inputEdges/*/properties",
17039
+ "updatedActor": "inputEdges/*/lastModified/actor",
17040
+ "updatedOn": "inputEdges/*/lastModified/time"
17041
+ }
17042
+ },
17043
+ "type": [
17044
+ "null",
17045
+ {
17046
+ "type": "array",
17047
+ "items": "com.linkedin.pegasus2avro.common.Edge"
17048
+ }
17049
+ ],
17050
+ "name": "inputEdges",
17051
+ "default": null,
17052
+ "doc": "Input assets consumed by the data process instance, with additional metadata.\nCounts as lineage.\nWill eventually deprecate the inputs field."
17001
17053
  }
17002
17054
  ],
17003
17055
  "doc": "Information about the inputs datasets of a Data process"