acryl-datahub-cloud 0.3.8.3rc1__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (79) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py +1 -1
  3. acryl_datahub_cloud/action_request/__init__.py +0 -0
  4. acryl_datahub_cloud/action_request/action_request_owner_source.py +174 -0
  5. acryl_datahub_cloud/api/__init__.py +1 -1
  6. acryl_datahub_cloud/api/client.py +2 -2
  7. acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +6 -6
  8. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +69 -35
  9. acryl_datahub_cloud/datahub_reporting/extract_sql.py +4 -4
  10. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_patch_builder.py +21 -21
  11. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +14 -13
  12. acryl_datahub_cloud/metadata/_urns/urn_defs.py +1130 -484
  13. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionrequest/__init__.py +6 -0
  14. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/dataplatforminstance/__init__.py +2 -0
  15. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/dataset/__init__.py +2 -0
  16. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/executorglobalconfig/__init__.py +15 -0
  17. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/executorpool/__init__.py +4 -0
  18. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  19. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metric/__init__.py +29 -0
  20. acryl_datahub_cloud/metadata/schema.avsc +839 -49
  21. acryl_datahub_cloud/metadata/schema_classes.py +1286 -63
  22. acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +422 -12
  23. acryl_datahub_cloud/metadata/schemas/ActionRequestStatus.avsc +12 -0
  24. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +5 -3
  25. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +5 -3
  26. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +5 -3
  27. acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
  28. acryl_datahub_cloud/metadata/schemas/BusinessAttributes.avsc +6 -0
  29. acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +1 -0
  30. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +3 -3
  31. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -1
  32. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +1 -1
  33. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +3 -3
  34. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -1
  35. acryl_datahub_cloud/metadata/schemas/DataHubActionInfo.avsc +1 -1
  36. acryl_datahub_cloud/metadata/schemas/DataHubConnectionKey.avsc +2 -1
  37. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceInfo.avsc +9 -4
  38. acryl_datahub_cloud/metadata/schemas/DataHubMetricCubeDefinition.avsc +185 -0
  39. acryl_datahub_cloud/metadata/schemas/DataHubMetricCubeEvent.avsc +184 -0
  40. acryl_datahub_cloud/metadata/schemas/DataHubMetricCubeKey.avsc +22 -0
  41. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +4 -4
  42. acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceKey.avsc +2 -1
  43. acryl_datahub_cloud/metadata/schemas/DataProcessInstanceInput.avsc +132 -2
  44. acryl_datahub_cloud/metadata/schemas/DataProcessInstanceOutput.avsc +131 -1
  45. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +14 -13
  46. acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
  47. acryl_datahub_cloud/metadata/schemas/ExecutionRequestInput.avsc +6 -1
  48. acryl_datahub_cloud/metadata/schemas/ExecutionRequestSignal.avsc +1 -1
  49. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +5 -0
  50. acryl_datahub_cloud/metadata/schemas/GlossaryTerms.avsc +3 -1
  51. acryl_datahub_cloud/metadata/schemas/IcebergCatalogInfo.avsc +28 -0
  52. acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +96 -0
  53. acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +4 -1
  54. acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +4 -1
  55. acryl_datahub_cloud/metadata/schemas/InputFields.avsc +3 -1
  56. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -1
  57. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -1
  58. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -1
  59. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +3 -3
  60. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -1
  61. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +399 -176
  62. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +6 -4
  63. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -1
  64. acryl_datahub_cloud/metadata/schemas/Operation.avsc +4 -2
  65. acryl_datahub_cloud/metadata/schemas/RemoteExecutorGlobalConfigKey.avsc +21 -0
  66. acryl_datahub_cloud/metadata/schemas/RemoteExecutorPoolGlobalConfig.avsc +16 -0
  67. acryl_datahub_cloud/metadata/schemas/RemoteExecutorPoolInfo.avsc +85 -0
  68. acryl_datahub_cloud/metadata/schemas/RemoteExecutorPoolKey.avsc +1 -1
  69. acryl_datahub_cloud/metadata/schemas/RemoteExecutorStatus.avsc +5 -5
  70. acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +2 -2
  71. acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +3 -1
  72. acryl_datahub_cloud/metadata/schemas/VersionProperties.avsc +18 -0
  73. acryl_datahub_cloud/metadata/schemas/VersionSetProperties.avsc +5 -0
  74. {acryl_datahub_cloud-0.3.8.3rc1.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/METADATA +35 -35
  75. {acryl_datahub_cloud-0.3.8.3rc1.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/RECORD +78 -68
  76. {acryl_datahub_cloud-0.3.8.3rc1.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/entry_points.txt +1 -0
  77. acryl_datahub_cloud/api/entity_versioning.py +0 -167
  78. {acryl_datahub_cloud-0.3.8.3rc1.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/WHEEL +0 -0
  79. {acryl_datahub_cloud-0.3.8.3rc1.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/top_level.txt +0 -0
@@ -370,7 +370,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
370
370
  for doc in results:
371
371
  if "platform" not in doc["_source"] or not doc["_source"]["platform"]:
372
372
  logger.warning(
373
- f"Platform not found in query { doc['_source']['urn']}. Skipping..."
373
+ f"Platform not found in query {doc['_source']['urn']}. Skipping..."
374
374
  )
375
375
  continue
376
376
 
@@ -688,10 +688,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
688
688
 
689
689
  @staticmethod
690
690
  def polars_to_arrow_schema(
691
- polars_schema: Dict[str, Union[DataTypeClass, polars.DataType]]
691
+ polars_schema: Dict[str, Union[DataTypeClass, polars.DataType]],
692
692
  ) -> pa.Schema:
693
693
  def convert_dtype(
694
- polars_dtype: Union[DataTypeClass, polars.DataType]
694
+ polars_dtype: Union[DataTypeClass, polars.DataType],
695
695
  ) -> pa.DataType:
696
696
  type_mapping: Dict[Union[DataTypeClass, polars.DataType], pa.DataType] = {
697
697
  polars.Boolean(): pa.bool_(),
@@ -949,14 +949,14 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
949
949
  def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
950
950
  if self.config.dataset_usage_enabled:
951
951
  with self.report.dataset_usage_processing_time as timer:
952
- self.report.report_ingestion_stage_start("generate dataset usage")
952
+ self.report.new_stage("generate dataset usage")
953
953
  yield from self.generate_dataset_usage_mcps()
954
954
  time_taken = timer.elapsed_seconds()
955
955
  logger.info(f"Dataset Usage generation took {time_taken:.3f} seconds")
956
956
 
957
957
  if self.config.dashboard_usage_enabled:
958
958
  with self.report.dashboard_usage_processing_time as timer:
959
- self.report.report_ingestion_stage_start("generate dashboard usage")
959
+ self.report.new_stage("generate dashboard usage")
960
960
  yield from self.generate_dashboard_usage_mcps()
961
961
 
962
962
  time_taken = timer.elapsed_seconds()
@@ -964,7 +964,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
964
964
 
965
965
  if self.config.chart_usage_enabled:
966
966
  with self.report.chart_usage_processing_time as timer:
967
- self.report.report_ingestion_stage_start("generate chart usage")
967
+ self.report.new_stage("generate chart usage")
968
968
 
969
969
  yield from self.generate_chart_usage_mcps()
970
970
 
@@ -973,7 +973,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
973
973
 
974
974
  if self.config.query_usage_enabled:
975
975
  with self.report.query_usage_processing_time as timer:
976
- self.report.report_ingestion_stage_start("generate query usage")
976
+ self.report.new_stage("generate query usage")
977
977
 
978
978
  yield from self.generate_query_usage_mcps()
979
979
 
@@ -1260,7 +1260,8 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1260
1260
  # We drop last_modified_at to not use it in merge because we are getting last_modified_at from the usage index
1261
1261
  df_with_search_scores = (
1262
1262
  entities_lf.filter(
1263
- polars.col("combinedSearchRankingMultiplier").is_not_null()
1263
+ polars.col("combinedSearchRankingMultiplier")
1264
+ .is_not_null()
1264
1265
  # We only want to downrank datasets that have a search score multiplier greater than 1. 1 is the minimum score of a dataset
1265
1266
  .and_(polars.col("combinedSearchRankingMultiplier").ne(1))
1266
1267
  ) # noqa: E712
@@ -1277,7 +1278,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1277
1278
  usages_lf = df_with_search_scores.join(
1278
1279
  usages_lf, on="urn", how="full", suffix="_right"
1279
1280
  )
1280
- # Merge all common fields automatically
1281
+ ## Merge all common fields automatically
1281
1282
  for common_field in common_fields:
1282
1283
  right_col = f"{common_field}_right"
1283
1284
  usages_lf = usages_lf.with_columns(
@@ -1451,9 +1452,9 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1451
1452
  if not self.config.streaming_mode:
1452
1453
  return polars.LazyFrame(data, schema)
1453
1454
  else:
1454
- assert (
1455
- self.temp_dir is not None
1456
- ), "In Streaming mode temp dir should be set. Normally this should not happen..."
1455
+ assert self.temp_dir is not None, (
1456
+ "In Streaming mode temp dir should be set. Normally this should not happen..."
1457
+ )
1457
1458
 
1458
1459
  with tempfile.NamedTemporaryFile(
1459
1460
  delete=False,
@@ -1623,7 +1624,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1623
1624
  time_taken = timer.elapsed_seconds()
1624
1625
  processed_count += len(results["hits"]["hits"])
1625
1626
  logger.info(
1626
- f"Processed {len(results['hits']['hits'''])} data from {index} index in {time_taken:.3f} seconds. Total: {processed_count} processed."
1627
+ f"Processed {len(results['hits']['hits'])} data from {index} index in {time_taken:.3f} seconds. Total: {processed_count} processed."
1627
1628
  )
1628
1629
  if len(results["hits"]["hits"]) < batch_size:
1629
1630
  break