acryl-datahub 1.3.0.1rc9__py3-none-any.whl → 1.3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (263) hide show
  1. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2550 -2543
  2. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +263 -261
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/graph/client.py +5 -1
  39. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  40. datahub/ingestion/reporting/file_reporter.py +5 -4
  41. datahub/ingestion/run/pipeline.py +7 -6
  42. datahub/ingestion/run/pipeline_config.py +12 -14
  43. datahub/ingestion/run/sink_callback.py +1 -1
  44. datahub/ingestion/sink/datahub_rest.py +6 -4
  45. datahub/ingestion/source/abs/config.py +19 -19
  46. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  47. datahub/ingestion/source/abs/source.py +2 -2
  48. datahub/ingestion/source/aws/aws_common.py +1 -1
  49. datahub/ingestion/source/aws/glue.py +6 -4
  50. datahub/ingestion/source/aws/sagemaker.py +1 -1
  51. datahub/ingestion/source/azure/azure_common.py +8 -12
  52. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  53. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  54. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  55. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  56. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  57. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  58. datahub/ingestion/source/datahub/config.py +8 -8
  59. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  60. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  61. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  62. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  63. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  64. datahub/ingestion/source/delta_lake/config.py +6 -4
  65. datahub/ingestion/source/dremio/dremio_api.py +212 -78
  66. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  67. datahub/ingestion/source/dremio/dremio_entities.py +55 -39
  68. datahub/ingestion/source/dremio/dremio_profiling.py +14 -3
  69. datahub/ingestion/source/dremio/dremio_source.py +24 -26
  70. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  71. datahub/ingestion/source/elastic_search.py +110 -32
  72. datahub/ingestion/source/excel/source.py +1 -1
  73. datahub/ingestion/source/feast.py +1 -1
  74. datahub/ingestion/source/file.py +5 -4
  75. datahub/ingestion/source/fivetran/config.py +17 -16
  76. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  77. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  78. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  79. datahub/ingestion/source/ge_profiling_config.py +8 -5
  80. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  81. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  82. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  83. datahub/ingestion/source/grafana/models.py +23 -5
  84. datahub/ingestion/source/hex/api.py +7 -5
  85. datahub/ingestion/source/hex/hex.py +4 -3
  86. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  87. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  88. datahub/ingestion/source/identity/azure_ad.py +1 -1
  89. datahub/ingestion/source/identity/okta.py +10 -10
  90. datahub/ingestion/source/kafka/kafka.py +1 -1
  91. datahub/ingestion/source/ldap.py +1 -1
  92. datahub/ingestion/source/looker/looker_common.py +7 -5
  93. datahub/ingestion/source/looker/looker_config.py +21 -20
  94. datahub/ingestion/source/looker/lookml_config.py +47 -47
  95. datahub/ingestion/source/metabase.py +8 -8
  96. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  97. datahub/ingestion/source/metadata/lineage.py +13 -8
  98. datahub/ingestion/source/mlflow.py +1 -1
  99. datahub/ingestion/source/mode.py +6 -4
  100. datahub/ingestion/source/mongodb.py +4 -3
  101. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  102. datahub/ingestion/source/nifi.py +17 -23
  103. datahub/ingestion/source/openapi.py +6 -8
  104. datahub/ingestion/source/powerbi/config.py +33 -32
  105. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  106. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  107. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  108. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  109. datahub/ingestion/source/preset.py +8 -8
  110. datahub/ingestion/source/pulsar.py +1 -1
  111. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  112. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  113. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  114. datahub/ingestion/source/redshift/config.py +18 -20
  115. datahub/ingestion/source/redshift/redshift.py +2 -2
  116. datahub/ingestion/source/redshift/usage.py +23 -3
  117. datahub/ingestion/source/s3/config.py +83 -62
  118. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  119. datahub/ingestion/source/s3/source.py +8 -5
  120. datahub/ingestion/source/sac/sac.py +5 -4
  121. datahub/ingestion/source/salesforce.py +3 -2
  122. datahub/ingestion/source/schema/json_schema.py +2 -2
  123. datahub/ingestion/source/sigma/data_classes.py +3 -2
  124. datahub/ingestion/source/sigma/sigma.py +1 -1
  125. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  126. datahub/ingestion/source/slack/slack.py +1 -1
  127. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  128. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  129. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  130. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  131. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  132. datahub/ingestion/source/snowflake/snowflake_queries.py +28 -4
  133. datahub/ingestion/source/sql/athena.py +1 -1
  134. datahub/ingestion/source/sql/clickhouse.py +4 -2
  135. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  136. datahub/ingestion/source/sql/druid.py +1 -1
  137. datahub/ingestion/source/sql/hana.py +1 -1
  138. datahub/ingestion/source/sql/hive.py +7 -5
  139. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  140. datahub/ingestion/source/sql/mssql/source.py +13 -6
  141. datahub/ingestion/source/sql/mysql.py +1 -1
  142. datahub/ingestion/source/sql/oracle.py +17 -10
  143. datahub/ingestion/source/sql/postgres.py +2 -2
  144. datahub/ingestion/source/sql/presto.py +1 -1
  145. datahub/ingestion/source/sql/sql_config.py +8 -9
  146. datahub/ingestion/source/sql/sql_generic.py +1 -1
  147. datahub/ingestion/source/sql/teradata.py +1 -1
  148. datahub/ingestion/source/sql/trino.py +1 -1
  149. datahub/ingestion/source/sql/vertica.py +5 -4
  150. datahub/ingestion/source/sql_queries.py +174 -22
  151. datahub/ingestion/source/state/checkpoint.py +2 -2
  152. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  153. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  154. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  155. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  156. datahub/ingestion/source/superset.py +9 -9
  157. datahub/ingestion/source/tableau/tableau.py +14 -16
  158. datahub/ingestion/source/unity/azure_auth_config.py +15 -0
  159. datahub/ingestion/source/unity/config.py +51 -34
  160. datahub/ingestion/source/unity/connection.py +7 -1
  161. datahub/ingestion/source/unity/connection_test.py +1 -1
  162. datahub/ingestion/source/unity/proxy.py +216 -7
  163. datahub/ingestion/source/unity/proxy_types.py +91 -0
  164. datahub/ingestion/source/unity/source.py +29 -3
  165. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  166. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  167. datahub/ingestion/source/usage/usage_common.py +5 -3
  168. datahub/ingestion/source_config/csv_enricher.py +7 -6
  169. datahub/ingestion/source_config/operation_config.py +7 -4
  170. datahub/ingestion/source_config/pulsar.py +11 -15
  171. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  172. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  173. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  174. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  175. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  176. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  177. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  178. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  179. datahub/ingestion/transformer/dataset_domain.py +3 -3
  180. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  181. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  182. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  183. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  184. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  185. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  186. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  187. datahub/ingestion/transformer/replace_external_url.py +2 -2
  188. datahub/ingestion/transformer/set_browse_path.py +1 -1
  189. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  190. datahub/lite/duckdb_lite.py +1 -1
  191. datahub/lite/lite_util.py +2 -2
  192. datahub/metadata/_internal_schema_classes.py +62 -2
  193. datahub/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -0
  194. datahub/metadata/schema.avsc +271 -91
  195. datahub/metadata/schemas/ApplicationProperties.avsc +5 -2
  196. datahub/metadata/schemas/AssertionInfo.avsc +48 -5
  197. datahub/metadata/schemas/BusinessAttributeInfo.avsc +8 -4
  198. datahub/metadata/schemas/ChartInfo.avsc +12 -5
  199. datahub/metadata/schemas/ContainerProperties.avsc +12 -5
  200. datahub/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
  201. datahub/metadata/schemas/CorpGroupInfo.avsc +7 -3
  202. datahub/metadata/schemas/CorpUserInfo.avsc +5 -2
  203. datahub/metadata/schemas/CorpUserSettings.avsc +4 -2
  204. datahub/metadata/schemas/DashboardInfo.avsc +16 -4
  205. datahub/metadata/schemas/DataFlowInfo.avsc +11 -5
  206. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +4 -2
  207. datahub/metadata/schemas/DataJobInfo.avsc +9 -4
  208. datahub/metadata/schemas/DataPlatformInfo.avsc +3 -1
  209. datahub/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
  210. datahub/metadata/schemas/DataProductProperties.avsc +5 -2
  211. datahub/metadata/schemas/DataTypeInfo.avsc +5 -0
  212. datahub/metadata/schemas/DatasetKey.avsc +2 -1
  213. datahub/metadata/schemas/DatasetProperties.avsc +12 -5
  214. datahub/metadata/schemas/DomainProperties.avsc +7 -3
  215. datahub/metadata/schemas/EditableContainerProperties.avsc +2 -1
  216. datahub/metadata/schemas/EditableDashboardProperties.avsc +2 -1
  217. datahub/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
  218. datahub/metadata/schemas/EditableDataJobProperties.avsc +2 -1
  219. datahub/metadata/schemas/EditableDatasetProperties.avsc +2 -1
  220. datahub/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
  221. datahub/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
  222. datahub/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
  223. datahub/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
  224. datahub/metadata/schemas/EditableMLModelProperties.avsc +2 -1
  225. datahub/metadata/schemas/EditableNotebookProperties.avsc +2 -1
  226. datahub/metadata/schemas/EditableSchemaMetadata.avsc +5 -3
  227. datahub/metadata/schemas/EntityTypeInfo.avsc +5 -0
  228. datahub/metadata/schemas/GlobalTags.avsc +3 -2
  229. datahub/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
  230. datahub/metadata/schemas/GlossaryTermInfo.avsc +3 -1
  231. datahub/metadata/schemas/InputFields.avsc +3 -2
  232. datahub/metadata/schemas/MLFeatureKey.avsc +3 -1
  233. datahub/metadata/schemas/MLFeatureTableKey.avsc +3 -1
  234. datahub/metadata/schemas/MLModelDeploymentKey.avsc +3 -1
  235. datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
  236. datahub/metadata/schemas/MLModelKey.avsc +3 -1
  237. datahub/metadata/schemas/MLModelProperties.avsc +4 -2
  238. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +3 -1
  239. datahub/metadata/schemas/MetadataChangeEvent.avsc +124 -50
  240. datahub/metadata/schemas/NotebookInfo.avsc +5 -2
  241. datahub/metadata/schemas/Ownership.avsc +3 -2
  242. datahub/metadata/schemas/QuerySubjects.avsc +1 -1
  243. datahub/metadata/schemas/RoleProperties.avsc +3 -1
  244. datahub/metadata/schemas/SchemaFieldInfo.avsc +3 -1
  245. datahub/metadata/schemas/SchemaMetadata.avsc +3 -2
  246. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
  247. datahub/metadata/schemas/TagProperties.avsc +3 -1
  248. datahub/metadata/schemas/TestInfo.avsc +2 -1
  249. datahub/sdk/__init__.py +1 -0
  250. datahub/sdk/_all_entities.py +2 -0
  251. datahub/sdk/search_filters.py +68 -40
  252. datahub/sdk/tag.py +112 -0
  253. datahub/secret/datahub_secret_store.py +7 -4
  254. datahub/secret/file_secret_store.py +1 -1
  255. datahub/sql_parsing/schema_resolver.py +29 -0
  256. datahub/sql_parsing/sql_parsing_aggregator.py +15 -0
  257. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  258. datahub/testing/check_sql_parser_result.py +2 -2
  259. datahub/utilities/ingest_utils.py +1 -1
  260. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
  261. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
  262. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
  263. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
@@ -55,7 +55,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
55
55
  from datahub.ingestion.source_report.ingestion_stage import (
56
56
  LINEAGE_EXTRACTION,
57
57
  METADATA_EXTRACTION,
58
- IngestionHighStage,
58
+ PROFILING,
59
59
  )
60
60
  from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
61
61
  DatasetLineageTypeClass,
@@ -201,7 +201,7 @@ class DremioSource(StatefulIngestionSourceBase):
201
201
  return "dremio"
202
202
 
203
203
  def _build_source_map(self) -> Dict[str, DremioSourceMapEntry]:
204
- dremio_sources = self.dremio_catalog.get_sources()
204
+ dremio_sources = list(self.dremio_catalog.get_sources())
205
205
  source_mappings_config = self.config.source_mappings or []
206
206
 
207
207
  source_map = build_dremio_source_map(dremio_sources, source_mappings_config)
@@ -242,9 +242,7 @@ class DremioSource(StatefulIngestionSourceBase):
242
242
  )
243
243
 
244
244
  # Process Datasets
245
- datasets = self.dremio_catalog.get_datasets()
246
-
247
- for dataset_info in datasets:
245
+ for dataset_info in self.dremio_catalog.get_datasets():
248
246
  try:
249
247
  yield from self.process_dataset(dataset_info)
250
248
  logger.info(
@@ -258,10 +256,8 @@ class DremioSource(StatefulIngestionSourceBase):
258
256
  exc=exc,
259
257
  )
260
258
 
261
- # Process Glossary Terms
262
- glossary_terms = self.dremio_catalog.get_glossary_terms()
263
-
264
- for glossary_term in glossary_terms:
259
+ # Process Glossary Terms using streaming
260
+ for glossary_term in self.dremio_catalog.get_glossary_terms():
265
261
  try:
266
262
  yield from self.process_glossary_term(glossary_term)
267
263
  except Exception as exc:
@@ -283,14 +279,16 @@ class DremioSource(StatefulIngestionSourceBase):
283
279
  # Profiling
284
280
  if self.config.is_profiling_enabled():
285
281
  with (
286
- self.report.new_high_stage(IngestionHighStage.PROFILING),
282
+ self.report.new_stage(PROFILING),
287
283
  ThreadPoolExecutor(
288
284
  max_workers=self.config.profiling.max_workers
289
285
  ) as executor,
290
286
  ):
287
+ # Collect datasets for profiling
288
+ datasets_for_profiling = list(self.dremio_catalog.get_datasets())
291
289
  future_to_dataset = {
292
290
  executor.submit(self.generate_profiles, dataset): dataset
293
- for dataset in datasets
291
+ for dataset in datasets_for_profiling
294
292
  }
295
293
 
296
294
  for future in as_completed(future_to_dataset):
@@ -338,10 +336,10 @@ class DremioSource(StatefulIngestionSourceBase):
338
336
  return
339
337
 
340
338
  dataset_urn = make_dataset_urn_with_platform_instance(
341
- platform=self.get_platform(),
342
- name=dataset_name,
343
- platform_instance=self.config.platform_instance,
339
+ platform=make_data_platform_urn(self.get_platform()),
340
+ name=f"dremio.{dataset_name}",
344
341
  env=self.config.env,
342
+ platform_instance=self.config.platform_instance,
345
343
  )
346
344
 
347
345
  for dremio_mcp in self.dremio_aspects.populate_dataset_mcp(
@@ -421,10 +419,10 @@ class DremioSource(StatefulIngestionSourceBase):
421
419
  schema_str = ".".join(dataset_info.path)
422
420
  dataset_name = f"{schema_str}.{dataset_info.resource_name}".lower()
423
421
  dataset_urn = make_dataset_urn_with_platform_instance(
424
- platform=self.get_platform(),
425
- name=dataset_name,
426
- platform_instance=self.config.platform_instance,
422
+ platform=make_data_platform_urn(self.get_platform()),
423
+ name=f"dremio.{dataset_name}",
427
424
  env=self.config.env,
425
+ platform_instance=self.config.platform_instance,
428
426
  )
429
427
  yield from self.profiler.get_workunits(dataset_info, dataset_urn)
430
428
 
@@ -436,10 +434,10 @@ class DremioSource(StatefulIngestionSourceBase):
436
434
  """
437
435
  upstream_urns = [
438
436
  make_dataset_urn_with_platform_instance(
439
- platform=self.get_platform(),
440
- name=upstream_table.lower(),
441
- platform_instance=self.config.platform_instance,
437
+ platform=make_data_platform_urn(self.get_platform()),
438
+ name=f"dremio.{upstream_table.lower()}",
442
439
  env=self.config.env,
440
+ platform_instance=self.config.platform_instance,
443
441
  )
444
442
  for upstream_table in parents
445
443
  ]
@@ -498,19 +496,19 @@ class DremioSource(StatefulIngestionSourceBase):
498
496
  if query.query and query.affected_dataset:
499
497
  upstream_urns = [
500
498
  make_dataset_urn_with_platform_instance(
501
- platform=self.get_platform(),
502
- name=ds.lower(),
503
- platform_instance=self.config.platform_instance,
499
+ platform=make_data_platform_urn(self.get_platform()),
500
+ name=f"dremio.{ds.lower()}",
504
501
  env=self.config.env,
502
+ platform_instance=self.config.platform_instance,
505
503
  )
506
504
  for ds in query.queried_datasets
507
505
  ]
508
506
 
509
507
  downstream_urn = make_dataset_urn_with_platform_instance(
510
- platform=self.get_platform(),
511
- name=query.affected_dataset.lower(),
512
- platform_instance=self.config.platform_instance,
508
+ platform=make_data_platform_urn(self.get_platform()),
509
+ name=f"dremio.{query.affected_dataset.lower()}",
513
510
  env=self.config.env,
511
+ platform_instance=self.config.platform_instance,
514
512
  )
515
513
 
516
514
  # Add query to SqlParsingAggregator
@@ -200,7 +200,7 @@ class DynamoDBSource(StatefulIngestionSourceBase):
200
200
 
201
201
  @classmethod
202
202
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "DynamoDBSource":
203
- config = DynamoDBConfig.parse_obj(config_dict)
203
+ config = DynamoDBConfig.model_validate(config_dict)
204
204
  return cls(ctx, config, "dynamodb")
205
205
 
206
206
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -8,7 +8,7 @@ from hashlib import md5
8
8
  from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, Type, Union
9
9
 
10
10
  from elasticsearch import Elasticsearch
11
- from pydantic import validator
11
+ from pydantic import field_validator
12
12
  from pydantic.fields import Field
13
13
 
14
14
  from datahub.configuration.common import AllowDenyPattern, ConfigModel
@@ -330,7 +330,8 @@ class ElasticsearchSourceConfig(
330
330
  self.profiling.operation_config
331
331
  )
332
332
 
333
- @validator("host")
333
+ @field_validator("host", mode="after")
334
+ @classmethod
334
335
  def host_colon_port_comma(cls, host_val: str) -> str:
335
336
  for entry in host_val.split(","):
336
337
  entry = remove_protocol(entry)
@@ -382,7 +383,7 @@ class ElasticsearchSource(StatefulIngestionSourceBase):
382
383
  def create(
383
384
  cls, config_dict: Dict[str, Any], ctx: PipelineContext
384
385
  ) -> "ElasticsearchSource":
385
- config = ElasticsearchSourceConfig.parse_obj(config_dict)
386
+ config = ElasticsearchSourceConfig.model_validate(config_dict)
386
387
  return cls(config, ctx)
387
388
 
388
389
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -407,12 +408,78 @@ class ElasticsearchSource(StatefulIngestionSourceBase):
407
408
  for mcp in self._get_data_stream_index_count_mcps():
408
409
  yield mcp.as_workunit()
409
410
  if self.source_config.ingest_index_templates:
410
- templates = self.client.indices.get_template()
411
- for template in templates:
411
+ # Fetch legacy index templates
412
+ legacy_templates = self.client.indices.get_template()
413
+ for template in legacy_templates:
412
414
  if self.source_config.index_template_pattern.allowed(template):
413
415
  for mcp in self._extract_mcps(template, is_index=False):
414
416
  yield mcp.as_workunit()
415
417
 
418
+ # Fetch composable index templates (ES 7.8+ / OpenSearch)
419
+ try:
420
+ composable_templates = self.client.indices.get_index_template()
421
+ for template_info in composable_templates.get("index_templates", []):
422
+ template = template_info.get("name")
423
+ if template and self.source_config.index_template_pattern.allowed(
424
+ template
425
+ ):
426
+ for mcp in self._extract_mcps(
427
+ template, is_index=False, is_composable_template=True
428
+ ):
429
+ yield mcp.as_workunit()
430
+ except Exception as e:
431
+ logger.warning(f"Unable to fetch composable index templates: {e}")
432
+
433
+ def _get_template_metadata(
434
+ self, template_name: str, is_composable: bool
435
+ ) -> Dict[str, Any]:
436
+ """Fetch template metadata from Elasticsearch/OpenSearch."""
437
+ if is_composable:
438
+ # For composable templates (ES 7.8+ / OpenSearch)
439
+ raw_response = self.client.indices.get_index_template(name=template_name)
440
+ template_data = raw_response.get("index_templates", [{}])[0]
441
+ return template_data.get("index_template", {})
442
+ else:
443
+ # For legacy templates
444
+ raw_response = self.client.indices.get_template(name=template_name)
445
+ return raw_response[template_name]
446
+
447
+ def _extract_template_custom_properties(
448
+ self, raw_metadata: Dict[str, Any], is_composable: bool
449
+ ) -> Dict[str, str]:
450
+ """Extract custom properties from template metadata."""
451
+ custom_properties: Dict[str, str] = {}
452
+
453
+ # Extract aliases
454
+ if is_composable:
455
+ aliases_dict = raw_metadata.get("template", {}).get("aliases", {})
456
+ else:
457
+ aliases_dict = raw_metadata.get("aliases", {})
458
+ index_aliases: List[str] = list(aliases_dict.keys()) if aliases_dict else []
459
+ if index_aliases:
460
+ custom_properties["aliases"] = ",".join(index_aliases)
461
+
462
+ # Extract index_patterns
463
+ index_patterns: List[str] = raw_metadata.get("index_patterns", [])
464
+ if index_patterns:
465
+ custom_properties["index_patterns"] = ",".join(index_patterns)
466
+
467
+ # Extract settings
468
+ if is_composable:
469
+ index_settings: Dict[str, Any] = (
470
+ raw_metadata.get("template", {}).get("settings", {}).get("index", {})
471
+ )
472
+ else:
473
+ index_settings = raw_metadata.get("settings", {}).get("index", {})
474
+ num_shards: str = index_settings.get("number_of_shards", "")
475
+ if num_shards:
476
+ custom_properties["num_shards"] = num_shards
477
+ num_replicas: str = index_settings.get("number_of_replicas", "")
478
+ if num_replicas:
479
+ custom_properties["num_replicas"] = num_replicas
480
+
481
+ return custom_properties
482
+
416
483
  def _get_data_stream_index_count_mcps(
417
484
  self,
418
485
  ) -> Iterable[MetadataChangeProposalWrapper]:
@@ -434,9 +501,11 @@ class ElasticsearchSource(StatefulIngestionSourceBase):
434
501
  )
435
502
 
436
503
  def _extract_mcps(
437
- self, index: str, is_index: bool = True
504
+ self, index: str, is_index: bool = True, is_composable_template: bool = False
438
505
  ) -> Iterable[MetadataChangeProposalWrapper]:
439
- logger.debug(f"index='{index}', is_index={is_index}")
506
+ logger.debug(
507
+ f"index='{index}', is_index={is_index}, is_composable_template={is_composable_template}"
508
+ )
440
509
 
441
510
  if is_index:
442
511
  raw_index = self.client.indices.get(index=index)
@@ -451,15 +520,20 @@ class ElasticsearchSource(StatefulIngestionSourceBase):
451
520
  # This is a duplicate, skip processing it further.
452
521
  return
453
522
  else:
454
- raw_index = self.client.indices.get_template(name=index)
455
- raw_index_metadata = raw_index[index]
523
+ raw_index_metadata = self._get_template_metadata(
524
+ index, is_composable_template
525
+ )
456
526
  collapsed_index_name = collapse_name(
457
527
  name=index, collapse_urns=self.source_config.collapse_urns
458
528
  )
459
529
 
460
530
  # 1. Construct and emit the schemaMetadata aspect
461
531
  # 1.1 Generate the schema fields from ES mappings.
462
- index_mappings = raw_index_metadata["mappings"]
532
+ # For composable templates, mappings are under 'template.mappings'
533
+ if is_composable_template:
534
+ index_mappings = raw_index_metadata.get("template", {}).get("mappings", {})
535
+ else:
536
+ index_mappings = raw_index_metadata.get("mappings", {})
463
537
  index_mappings_json_str: str = json.dumps(index_mappings)
464
538
  md5_hash = md5(index_mappings_json_str.encode()).hexdigest()
465
539
  schema_fields = list(
@@ -517,28 +591,32 @@ class ElasticsearchSource(StatefulIngestionSourceBase):
517
591
  ),
518
592
  )
519
593
 
520
- # 4. Construct and emit properties if needed. Will attempt to get the following properties
521
- custom_properties: Dict[str, str] = {}
522
- # 4.1 aliases
523
- index_aliases: List[str] = raw_index_metadata.get("aliases", {}).keys()
524
- if index_aliases:
525
- custom_properties["aliases"] = ",".join(index_aliases)
526
- # 4.2 index_patterns
527
- index_patterns: List[str] = raw_index_metadata.get("index_patterns", [])
528
- if index_patterns:
529
- custom_properties["index_patterns"] = ",".join(index_patterns)
530
-
531
- # 4.3 number_of_shards
532
- index_settings: Dict[str, Any] = raw_index_metadata.get("settings", {}).get(
533
- "index", {}
534
- )
535
- num_shards: str = index_settings.get("number_of_shards", "")
536
- if num_shards:
537
- custom_properties["num_shards"] = num_shards
538
- # 4.4 number_of_replicas
539
- num_replicas: str = index_settings.get("number_of_replicas", "")
540
- if num_replicas:
541
- custom_properties["num_replicas"] = num_replicas
594
+ # 4. Construct and emit properties
595
+ if is_index:
596
+ custom_properties: Dict[str, str] = {}
597
+ # Extract properties for indices
598
+ index_aliases: List[str] = list(
599
+ raw_index_metadata.get("aliases", {}).keys()
600
+ )
601
+ if index_aliases:
602
+ custom_properties["aliases"] = ",".join(index_aliases)
603
+ index_patterns: List[str] = raw_index_metadata.get("index_patterns", [])
604
+ if index_patterns:
605
+ custom_properties["index_patterns"] = ",".join(index_patterns)
606
+ index_settings: Dict[str, Any] = raw_index_metadata.get("settings", {}).get(
607
+ "index", {}
608
+ )
609
+ num_shards: str = index_settings.get("number_of_shards", "")
610
+ if num_shards:
611
+ custom_properties["num_shards"] = num_shards
612
+ num_replicas: str = index_settings.get("number_of_replicas", "")
613
+ if num_replicas:
614
+ custom_properties["num_replicas"] = num_replicas
615
+ else:
616
+ # Extract properties for templates
617
+ custom_properties = self._extract_template_custom_properties(
618
+ raw_index_metadata, is_composable_template
619
+ )
542
620
 
543
621
  yield MetadataChangeProposalWrapper(
544
622
  entityUrn=dataset_urn,
@@ -156,7 +156,7 @@ class ExcelSource(StatefulIngestionSourceBase):
156
156
 
157
157
  @classmethod
158
158
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "ExcelSource":
159
- config = ExcelSourceConfig.parse_obj(config_dict)
159
+ config = ExcelSourceConfig.model_validate(config_dict)
160
160
  return cls(ctx, config)
161
161
 
162
162
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -462,7 +462,7 @@ class FeastRepositorySource(StatefulIngestionSourceBase):
462
462
 
463
463
  @classmethod
464
464
  def create(cls, config_dict, ctx):
465
- config = FeastRepositorySourceConfig.parse_obj(config_dict)
465
+ config = FeastRepositorySourceConfig.model_validate(config_dict)
466
466
  return cls(config, ctx)
467
467
 
468
468
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -9,7 +9,7 @@ from functools import partial
9
9
  from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union
10
10
 
11
11
  import ijson
12
- from pydantic import validator
12
+ from pydantic import field_validator
13
13
  from pydantic.fields import Field
14
14
 
15
15
  from datahub.configuration.common import ConfigEnum
@@ -103,7 +103,8 @@ class FileSourceConfig(StatefulIngestionConfigBase):
103
103
 
104
104
  stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
105
105
 
106
- @validator("file_extension", always=True)
106
+ @field_validator("file_extension", mode="after")
107
+ @classmethod
107
108
  def add_leading_dot_to_extension(cls, v: str) -> str:
108
109
  if v:
109
110
  if v.startswith("."):
@@ -205,7 +206,7 @@ class GenericFileSource(StatefulIngestionSourceBase, TestableSource):
205
206
 
206
207
  @classmethod
207
208
  def create(cls, config_dict, ctx):
208
- config = FileSourceConfig.parse_obj(config_dict)
209
+ config = FileSourceConfig.model_validate(config_dict)
209
210
  return cls(ctx, config)
210
211
 
211
212
  def get_filenames(self) -> Iterable[FileInfo]:
@@ -358,7 +359,7 @@ class GenericFileSource(StatefulIngestionSourceBase, TestableSource):
358
359
 
359
360
  @staticmethod
360
361
  def test_connection(config_dict: dict) -> TestConnectionReport:
361
- config = FileSourceConfig.parse_obj(config_dict)
362
+ config = FileSourceConfig.model_validate(config_dict)
362
363
  exists = os.path.exists(config.path)
363
364
  if not exists:
364
365
  return TestConnectionReport(
@@ -1,10 +1,10 @@
1
1
  import dataclasses
2
2
  import logging
3
3
  import warnings
4
- from typing import Dict, Optional
4
+ from typing import Any, Dict, Optional
5
5
 
6
6
  import pydantic
7
- from pydantic import Field, root_validator
7
+ from pydantic import Field, field_validator, model_validator
8
8
  from typing_extensions import Literal
9
9
 
10
10
  from datahub.configuration.common import (
@@ -98,7 +98,8 @@ class DatabricksDestinationConfig(UnityCatalogConnectionConfig):
98
98
  catalog: str = Field(description="The fivetran connector log catalog.")
99
99
  log_schema: str = Field(description="The fivetran connector log schema.")
100
100
 
101
- @pydantic.validator("warehouse_id")
101
+ @field_validator("warehouse_id", mode="after")
102
+ @classmethod
102
103
  def warehouse_id_should_not_be_empty(cls, warehouse_id: Optional[str]) -> str:
103
104
  if warehouse_id is None or (warehouse_id and warehouse_id.strip() == ""):
104
105
  raise ValueError("Fivetran requires warehouse_id to be set")
@@ -141,29 +142,28 @@ class FivetranLogConfig(ConfigModel):
141
142
  "destination_config", "snowflake_destination_config"
142
143
  )
143
144
 
144
- @root_validator(skip_on_failure=True)
145
- def validate_destination_platfrom_and_config(cls, values: Dict) -> Dict:
146
- destination_platform = values["destination_platform"]
147
- if destination_platform == "snowflake":
148
- if "snowflake_destination_config" not in values:
145
+ @model_validator(mode="after")
146
+ def validate_destination_platform_and_config(self) -> "FivetranLogConfig":
147
+ if self.destination_platform == "snowflake":
148
+ if self.snowflake_destination_config is None:
149
149
  raise ValueError(
150
150
  "If destination platform is 'snowflake', user must provide snowflake destination configuration in the recipe."
151
151
  )
152
- elif destination_platform == "bigquery":
153
- if "bigquery_destination_config" not in values:
152
+ elif self.destination_platform == "bigquery":
153
+ if self.bigquery_destination_config is None:
154
154
  raise ValueError(
155
155
  "If destination platform is 'bigquery', user must provide bigquery destination configuration in the recipe."
156
156
  )
157
- elif destination_platform == "databricks":
158
- if "databricks_destination_config" not in values:
157
+ elif self.destination_platform == "databricks":
158
+ if self.databricks_destination_config is None:
159
159
  raise ValueError(
160
160
  "If destination platform is 'databricks', user must provide databricks destination configuration in the recipe."
161
161
  )
162
162
  else:
163
163
  raise ValueError(
164
- f"Destination platform '{destination_platform}' is not yet supported."
164
+ f"Destination platform '{self.destination_platform}' is not yet supported."
165
165
  )
166
- return values
166
+ return self
167
167
 
168
168
 
169
169
  @dataclasses.dataclass
@@ -267,8 +267,9 @@ class FivetranSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin
267
267
  description="Fivetran REST API configuration, used to provide wider support for connections.",
268
268
  )
269
269
 
270
- @pydantic.root_validator(pre=True)
271
- def compat_sources_to_database(cls, values: Dict) -> Dict:
270
+ @model_validator(mode="before")
271
+ @classmethod
272
+ def compat_sources_to_database(cls, values: Any) -> Any:
272
273
  if "sources_to_database" in values:
273
274
  warnings.warn(
274
275
  "The sources_to_database field is deprecated, please use sources_to_platform_instance instead.",
@@ -234,12 +234,12 @@ class FivetranSource(StatefulIngestionSourceBase):
234
234
  return dict(
235
235
  **{
236
236
  f"source.{k}": str(v)
237
- for k, v in source_details.dict().items()
237
+ for k, v in source_details.model_dump().items()
238
238
  if v is not None and not isinstance(v, bool)
239
239
  },
240
240
  **{
241
241
  f"destination.{k}": str(v)
242
- for k, v in destination_details.dict().items()
242
+ for k, v in destination_details.model_dump().items()
243
243
  if v is not None and not isinstance(v, bool)
244
244
  },
245
245
  )
@@ -127,7 +127,7 @@ class DataHubGcSource(Source):
127
127
 
128
128
  @classmethod
129
129
  def create(cls, config_dict, ctx):
130
- config = DataHubGcSourceConfig.parse_obj(config_dict)
130
+ config = DataHubGcSourceConfig.model_validate(config_dict)
131
131
  return cls(ctx, config)
132
132
 
133
133
  # auto_work_unit_report is overriden to disable a couple of automation like auto status aspect, etc. which is not needed her.
@@ -1,7 +1,7 @@
1
1
  import logging
2
- from typing import Dict, Iterable, List, Optional
2
+ from typing import Iterable, List, Optional
3
3
 
4
- from pydantic import Field, SecretStr, validator
4
+ from pydantic import Field, SecretStr, model_validator
5
5
 
6
6
  from datahub.configuration.common import ConfigModel
7
7
  from datahub.configuration.source_common import DatasetSourceConfigMixin
@@ -64,18 +64,16 @@ class GCSSourceConfig(
64
64
 
65
65
  stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
66
66
 
67
- @validator("path_specs", always=True)
68
- def check_path_specs_and_infer_platform(
69
- cls, path_specs: List[PathSpec], values: Dict
70
- ) -> List[PathSpec]:
71
- if len(path_specs) == 0:
67
+ @model_validator(mode="after")
68
+ def check_path_specs_and_infer_platform(self) -> "GCSSourceConfig":
69
+ if len(self.path_specs) == 0:
72
70
  raise ValueError("path_specs must not be empty")
73
71
 
74
72
  # Check that all path specs have the gs:// prefix.
75
- if any([not is_gcs_uri(path_spec.include) for path_spec in path_specs]):
73
+ if any([not is_gcs_uri(path_spec.include) for path_spec in self.path_specs]):
76
74
  raise ValueError("All path_spec.include should start with gs://")
77
75
 
78
- return path_specs
76
+ return self
79
77
 
80
78
 
81
79
  class GCSSourceReport(DataLakeSourceReport):
@@ -105,7 +103,7 @@ class GCSSource(StatefulIngestionSourceBase):
105
103
 
106
104
  @classmethod
107
105
  def create(cls, config_dict, ctx):
108
- config = GCSSourceConfig.parse_obj(config_dict)
106
+ config = GCSSourceConfig.model_validate(config_dict)
109
107
  return cls(config, ctx)
110
108
 
111
109
  def create_equivalent_s3_config(self):
@@ -4,6 +4,7 @@ import os
4
4
  from typing import Annotated, Any, Dict, List, Optional
5
5
 
6
6
  import pydantic
7
+ from pydantic import model_validator
7
8
  from pydantic.fields import Field
8
9
 
9
10
  from datahub.configuration.common import AllowDenyPattern, ConfigModel, SupportedSources
@@ -212,7 +213,8 @@ class GEProfilingConfig(GEProfilingBaseConfig):
212
213
  description="Whether to profile complex types like structs, arrays and maps. ",
213
214
  )
214
215
 
215
- @pydantic.root_validator(pre=True)
216
+ @model_validator(mode="before")
217
+ @classmethod
216
218
  def deprecate_bigquery_temp_table_schema(cls, values):
217
219
  # TODO: Update docs to remove mention of this field.
218
220
  if "bigquery_temp_table_schema" in values:
@@ -222,16 +224,17 @@ class GEProfilingConfig(GEProfilingBaseConfig):
222
224
  del values["bigquery_temp_table_schema"]
223
225
  return values
224
226
 
225
- @pydantic.root_validator(pre=True)
227
+ @model_validator(mode="before")
228
+ @classmethod
226
229
  def ensure_field_level_settings_are_normalized(
227
- cls: "GEProfilingConfig", values: Dict[str, Any]
230
+ cls, values: Dict[str, Any]
228
231
  ) -> Dict[str, Any]:
229
232
  max_num_fields_to_profile_key = "max_number_of_fields_to_profile"
230
233
  max_num_fields_to_profile = values.get(max_num_fields_to_profile_key)
231
234
 
232
235
  # Disable all field-level metrics.
233
236
  if values.get("profile_table_level_only"):
234
- for field_level_metric in cls.__fields__:
237
+ for field_level_metric in cls.model_fields:
235
238
  if field_level_metric.startswith("include_field_"):
236
239
  if values.get(field_level_metric):
237
240
  raise ValueError(
@@ -267,7 +270,7 @@ class GEProfilingConfig(GEProfilingBaseConfig):
267
270
  )
268
271
 
269
272
  def config_for_telemetry(self) -> Dict[str, Any]:
270
- config_dict = self.dict()
273
+ config_dict = self.model_dump()
271
274
 
272
275
  return {
273
276
  flag: config_dict[flag]
@@ -69,7 +69,7 @@ class GrafanaAPIClient:
69
69
  if not batch:
70
70
  break
71
71
 
72
- folders.extend(Folder.parse_obj(folder) for folder in batch)
72
+ folders.extend(Folder.model_validate(folder) for folder in batch)
73
73
  page += 1
74
74
  except requests.exceptions.RequestException as e:
75
75
  self.report.report_failure(
@@ -88,7 +88,7 @@ class GrafanaAPIClient:
88
88
  try:
89
89
  response = self.session.get(f"{self.base_url}/api/dashboards/uid/{uid}")
90
90
  response.raise_for_status()
91
- return Dashboard.parse_obj(response.json())
91
+ return Dashboard.model_validate(response.json())
92
92
  except requests.exceptions.RequestException as e:
93
93
  self.report.warning(
94
94
  title="Dashboard Fetch Error",
@@ -1,6 +1,6 @@
1
1
  from typing import Dict, Optional
2
2
 
3
- from pydantic import Field, SecretStr, validator
3
+ from pydantic import Field, SecretStr, field_validator
4
4
 
5
5
  from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
6
6
  from datahub.configuration.source_common import (
@@ -99,6 +99,7 @@ class GrafanaSourceConfig(
99
99
  description="Map of Grafana datasource types/UIDs to platform connection configs for lineage extraction",
100
100
  )
101
101
 
102
- @validator("url", allow_reuse=True)
103
- def remove_trailing_slash(cls, v):
102
+ @field_validator("url", mode="after")
103
+ @classmethod
104
+ def remove_trailing_slash(cls, v: str) -> str:
104
105
  return config_clean.remove_trailing_slashes(v)
@@ -171,7 +171,7 @@ class GrafanaSource(StatefulIngestionSourceBase):
171
171
 
172
172
  @classmethod
173
173
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "GrafanaSource":
174
- config = GrafanaSourceConfig.parse_obj(config_dict)
174
+ config = GrafanaSourceConfig.model_validate(config_dict)
175
175
  return cls(config, ctx)
176
176
 
177
177
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: