acryl-datahub 0.15.0.1rc17__py3-none-any.whl → 0.15.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (211) hide show
  1. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/METADATA +2440 -2438
  2. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/RECORD +211 -207
  3. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/WHEEL +1 -1
  4. datahub/__init__.py +1 -1
  5. datahub/api/entities/assertion/assertion_operator.py +3 -5
  6. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  7. datahub/api/entities/datacontract/assertion_operator.py +3 -5
  8. datahub/api/entities/dataproduct/dataproduct.py +4 -4
  9. datahub/api/entities/dataset/dataset.py +2 -1
  10. datahub/api/entities/structuredproperties/structuredproperties.py +18 -7
  11. datahub/cli/cli_utils.py +13 -2
  12. datahub/cli/delete_cli.py +3 -3
  13. datahub/cli/docker_cli.py +6 -6
  14. datahub/cli/ingest_cli.py +25 -15
  15. datahub/cli/lite_cli.py +2 -2
  16. datahub/cli/migrate.py +5 -5
  17. datahub/cli/specific/assertions_cli.py +3 -3
  18. datahub/cli/specific/structuredproperties_cli.py +84 -0
  19. datahub/cli/timeline_cli.py +1 -1
  20. datahub/configuration/common.py +1 -2
  21. datahub/configuration/config_loader.py +73 -50
  22. datahub/configuration/git.py +2 -2
  23. datahub/configuration/time_window_config.py +10 -5
  24. datahub/emitter/mce_builder.py +4 -8
  25. datahub/emitter/mcp_builder.py +27 -0
  26. datahub/emitter/mcp_patch_builder.py +1 -2
  27. datahub/emitter/rest_emitter.py +126 -85
  28. datahub/entrypoints.py +6 -0
  29. datahub/ingestion/api/incremental_lineage_helper.py +2 -8
  30. datahub/ingestion/api/report.py +1 -2
  31. datahub/ingestion/api/source.py +4 -2
  32. datahub/ingestion/api/source_helpers.py +1 -1
  33. datahub/ingestion/extractor/json_schema_util.py +3 -3
  34. datahub/ingestion/extractor/schema_util.py +3 -5
  35. datahub/ingestion/fs/s3_fs.py +3 -3
  36. datahub/ingestion/glossary/datahub_classifier.py +6 -4
  37. datahub/ingestion/graph/client.py +22 -19
  38. datahub/ingestion/graph/config.py +1 -1
  39. datahub/ingestion/run/pipeline.py +8 -7
  40. datahub/ingestion/run/pipeline_config.py +3 -3
  41. datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
  42. datahub/ingestion/source/abs/source.py +19 -8
  43. datahub/ingestion/source/aws/glue.py +77 -47
  44. datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
  45. datahub/ingestion/source/aws/s3_util.py +24 -1
  46. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  47. datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
  48. datahub/ingestion/source/bigquery_v2/bigquery.py +34 -34
  49. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
  50. datahub/ingestion/source/bigquery_v2/bigquery_config.py +14 -6
  51. datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
  52. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -3
  53. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +22 -16
  54. datahub/ingestion/source/bigquery_v2/lineage.py +16 -16
  55. datahub/ingestion/source/bigquery_v2/queries.py +1 -3
  56. datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
  57. datahub/ingestion/source/bigquery_v2/usage.py +60 -60
  58. datahub/ingestion/source/cassandra/cassandra.py +0 -1
  59. datahub/ingestion/source/cassandra/cassandra_profiling.py +24 -24
  60. datahub/ingestion/source/cassandra/cassandra_utils.py +4 -7
  61. datahub/ingestion/source/confluent_schema_registry.py +6 -6
  62. datahub/ingestion/source/csv_enricher.py +29 -29
  63. datahub/ingestion/source/datahub/config.py +10 -0
  64. datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
  65. datahub/ingestion/source/datahub/datahub_source.py +12 -2
  66. datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
  67. datahub/ingestion/source/dbt/dbt_common.py +9 -7
  68. datahub/ingestion/source/delta_lake/source.py +0 -5
  69. datahub/ingestion/source/demo_data.py +1 -1
  70. datahub/ingestion/source/dremio/dremio_api.py +4 -4
  71. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
  72. datahub/ingestion/source/dremio/dremio_reporting.py +0 -3
  73. datahub/ingestion/source/dremio/dremio_source.py +2 -2
  74. datahub/ingestion/source/elastic_search.py +4 -4
  75. datahub/ingestion/source/fivetran/fivetran.py +1 -6
  76. datahub/ingestion/source/gc/datahub_gc.py +11 -14
  77. datahub/ingestion/source/gc/execution_request_cleanup.py +31 -6
  78. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +48 -15
  79. datahub/ingestion/source/gcs/gcs_source.py +3 -2
  80. datahub/ingestion/source/ge_data_profiler.py +2 -5
  81. datahub/ingestion/source/ge_profiling_config.py +3 -3
  82. datahub/ingestion/source/iceberg/iceberg.py +13 -6
  83. datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
  84. datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
  85. datahub/ingestion/source/identity/azure_ad.py +3 -3
  86. datahub/ingestion/source/identity/okta.py +3 -3
  87. datahub/ingestion/source/kafka/kafka.py +11 -9
  88. datahub/ingestion/source/kafka_connect/kafka_connect.py +3 -9
  89. datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
  90. datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
  91. datahub/ingestion/source/looker/looker_common.py +19 -19
  92. datahub/ingestion/source/looker/looker_config.py +11 -6
  93. datahub/ingestion/source/looker/looker_source.py +25 -25
  94. datahub/ingestion/source/looker/looker_template_language.py +3 -3
  95. datahub/ingestion/source/looker/looker_usage.py +5 -7
  96. datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
  97. datahub/ingestion/source/looker/lookml_source.py +13 -15
  98. datahub/ingestion/source/looker/view_upstream.py +5 -5
  99. datahub/ingestion/source/metabase.py +1 -6
  100. datahub/ingestion/source/mlflow.py +4 -9
  101. datahub/ingestion/source/mode.py +5 -5
  102. datahub/ingestion/source/mongodb.py +6 -4
  103. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  104. datahub/ingestion/source/nifi.py +24 -31
  105. datahub/ingestion/source/openapi.py +9 -9
  106. datahub/ingestion/source/powerbi/config.py +12 -12
  107. datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
  108. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
  109. datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
  110. datahub/ingestion/source/powerbi/powerbi.py +6 -6
  111. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
  112. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
  113. datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
  114. datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
  115. datahub/ingestion/source/redash.py +0 -5
  116. datahub/ingestion/source/redshift/config.py +3 -3
  117. datahub/ingestion/source/redshift/redshift.py +45 -46
  118. datahub/ingestion/source/redshift/usage.py +33 -33
  119. datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
  120. datahub/ingestion/source/s3/source.py +11 -15
  121. datahub/ingestion/source/salesforce.py +26 -25
  122. datahub/ingestion/source/schema/json_schema.py +1 -1
  123. datahub/ingestion/source/sigma/sigma.py +3 -3
  124. datahub/ingestion/source/sigma/sigma_api.py +12 -10
  125. datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
  126. datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
  127. datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
  128. datahub/ingestion/source/snowflake/snowflake_report.py +0 -3
  129. datahub/ingestion/source/snowflake/snowflake_schema.py +8 -5
  130. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +136 -42
  131. datahub/ingestion/source/snowflake/snowflake_tag.py +21 -11
  132. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +49 -50
  133. datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
  134. datahub/ingestion/source/snowflake/snowflake_v2.py +51 -47
  135. datahub/ingestion/source/sql/athena.py +1 -3
  136. datahub/ingestion/source/sql/clickhouse.py +8 -14
  137. datahub/ingestion/source/sql/oracle.py +1 -3
  138. datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
  139. datahub/ingestion/source/sql/sql_types.py +1 -2
  140. datahub/ingestion/source/sql/sql_utils.py +5 -0
  141. datahub/ingestion/source/sql/teradata.py +18 -5
  142. datahub/ingestion/source/state/profiling_state_handler.py +3 -3
  143. datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
  144. datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
  145. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
  146. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  147. datahub/ingestion/source/superset.py +1 -6
  148. datahub/ingestion/source/tableau/tableau.py +343 -117
  149. datahub/ingestion/source/tableau/tableau_common.py +5 -2
  150. datahub/ingestion/source/unity/config.py +3 -1
  151. datahub/ingestion/source/unity/proxy.py +1 -1
  152. datahub/ingestion/source/unity/source.py +74 -74
  153. datahub/ingestion/source/unity/usage.py +3 -1
  154. datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
  155. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
  156. datahub/ingestion/source/usage/usage_common.py +1 -1
  157. datahub/ingestion/source_report/ingestion_stage.py +24 -20
  158. datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
  159. datahub/ingestion/transformer/add_dataset_properties.py +3 -3
  160. datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
  161. datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
  162. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
  163. datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
  164. datahub/ingestion/transformer/tags_to_terms.py +7 -7
  165. datahub/integrations/assertion/snowflake/compiler.py +10 -10
  166. datahub/lite/duckdb_lite.py +12 -10
  167. datahub/metadata/_schema_classes.py +317 -44
  168. datahub/metadata/_urns/urn_defs.py +69 -15
  169. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  170. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  171. datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
  172. datahub/metadata/schema.avsc +302 -89
  173. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  174. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  175. datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
  176. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
  177. datahub/metadata/schemas/DatasetKey.avsc +2 -1
  178. datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
  179. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
  180. datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
  181. datahub/metadata/schemas/MLModelKey.avsc +2 -1
  182. datahub/metadata/schemas/MLModelProperties.avsc +96 -48
  183. datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
  184. datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
  185. datahub/metadata/schemas/VersionProperties.avsc +216 -0
  186. datahub/metadata/schemas/VersionSetKey.avsc +26 -0
  187. datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
  188. datahub/secret/datahub_secrets_client.py +12 -21
  189. datahub/secret/secret_common.py +14 -8
  190. datahub/specific/aspect_helpers/custom_properties.py +1 -2
  191. datahub/sql_parsing/schema_resolver.py +5 -10
  192. datahub/sql_parsing/sql_parsing_aggregator.py +18 -16
  193. datahub/sql_parsing/sqlglot_lineage.py +3 -3
  194. datahub/sql_parsing/sqlglot_utils.py +1 -1
  195. datahub/telemetry/stats.py +1 -2
  196. datahub/testing/mcp_diff.py +1 -1
  197. datahub/utilities/file_backed_collections.py +11 -11
  198. datahub/utilities/hive_schema_to_avro.py +2 -2
  199. datahub/utilities/logging_manager.py +2 -2
  200. datahub/utilities/lossy_collections.py +3 -3
  201. datahub/utilities/mapping.py +3 -3
  202. datahub/utilities/memory_footprint.py +3 -2
  203. datahub/utilities/perf_timer.py +11 -6
  204. datahub/utilities/serialized_lru_cache.py +3 -1
  205. datahub/utilities/sqlalchemy_query_combiner.py +6 -6
  206. datahub/utilities/sqllineage_patch.py +1 -1
  207. datahub/utilities/stats_collections.py +3 -1
  208. datahub/utilities/urns/_urn_base.py +28 -5
  209. datahub/utilities/urns/urn_iter.py +2 -2
  210. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/entry_points.txt +0 -0
  211. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/top_level.txt +0 -0
@@ -218,9 +218,7 @@ def _get_all_table_comments_and_properties(self, connection, **kw):
218
218
  , comment
219
219
  , {properties_clause} AS properties
220
220
  FROM system.tables
221
- WHERE name NOT LIKE '.inner%'""".format(
222
- properties_clause=properties_clause
223
- )
221
+ WHERE name NOT LIKE '.inner%'""".format(properties_clause=properties_clause)
224
222
  )
225
223
 
226
224
  all_table_comments: Dict[Tuple[str, str], Dict[str, Any]] = {}
@@ -268,7 +266,7 @@ def _get_table_or_view_names(self, relkind, connection, schema=None, **kw):
268
266
  info_cache = kw.get("info_cache")
269
267
  all_relations = self._get_all_relation_info(connection, info_cache=info_cache)
270
268
  relation_names = []
271
- for key, relation in all_relations.items():
269
+ for _, relation in all_relations.items():
272
270
  if relation.database == schema and relation.relkind == relkind:
273
271
  relation_names.append(relation.relname)
274
272
  return relation_names
@@ -301,9 +299,7 @@ def _get_schema_column_info(self, connection, schema=None, **kw):
301
299
  , comment
302
300
  FROM system.columns
303
301
  WHERE {schema_clause}
304
- ORDER BY database, table, position""".format(
305
- schema_clause=schema_clause
306
- )
302
+ ORDER BY database, table, position""".format(schema_clause=schema_clause)
307
303
  )
308
304
  )
309
305
  )
@@ -474,7 +470,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
474
470
  logger.debug(f"sql_alchemy_url={url}")
475
471
  engine = create_engine(url, **self.config.options)
476
472
  for db_row in engine.execute(text(all_tables_query)):
477
- all_tables_set.add(f'{db_row["database"]}.{db_row["table_name"]}')
473
+ all_tables_set.add(f"{db_row['database']}.{db_row['table_name']}")
478
474
 
479
475
  return all_tables_set
480
476
 
@@ -503,7 +499,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
503
499
 
504
500
  try:
505
501
  for db_row in engine.execute(text(query)):
506
- dataset_name = f'{db_row["target_schema"]}.{db_row["target_table"]}'
502
+ dataset_name = f"{db_row['target_schema']}.{db_row['target_table']}"
507
503
  if not self.config.database_pattern.allowed(
508
504
  db_row["target_schema"]
509
505
  ) or not self.config.table_pattern.allowed(dataset_name):
@@ -512,7 +508,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
512
508
 
513
509
  # Target
514
510
  target_path = (
515
- f'{self.config.platform_instance+"." if self.config.platform_instance else ""}'
511
+ f"{self.config.platform_instance + '.' if self.config.platform_instance else ''}"
516
512
  f"{dataset_name}"
517
513
  )
518
514
  target = LineageItem(
@@ -525,7 +521,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
525
521
 
526
522
  # Source
527
523
  platform = LineageDatasetPlatform.CLICKHOUSE
528
- path = f'{db_row["source_schema"]}.{db_row["source_table"]}'
524
+ path = f"{db_row['source_schema']}.{db_row['source_table']}"
529
525
 
530
526
  sources = [
531
527
  LineageDataset(
@@ -552,9 +548,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
552
548
  target.dataset.path
553
549
  ].upstreams = self._lineage_map[
554
550
  target.dataset.path
555
- ].upstreams.union(
556
- target.upstreams
557
- )
551
+ ].upstreams.union(target.upstreams)
558
552
 
559
553
  else:
560
554
  self._lineage_map[target.dataset.path] = target
@@ -234,9 +234,7 @@ class OracleInspectorObjectWrapper:
234
234
  WHERE col.table_name = id.table_name
235
235
  AND col.column_name = id.column_name
236
236
  AND col.owner = id.owner
237
- ) AS identity_options""".format(
238
- dblink=dblink
239
- )
237
+ ) AS identity_options""".format(dblink=dblink)
240
238
  else:
241
239
  identity_cols = "NULL as default_on_null, NULL as identity_options"
242
240
 
@@ -278,8 +278,7 @@ class GenericProfiler:
278
278
 
279
279
  if self.config.profiling.profile_table_size_limit is not None and (
280
280
  size_in_bytes is not None
281
- and size_in_bytes / (2**30)
282
- > self.config.profiling.profile_table_size_limit
281
+ and size_in_bytes / (2**30) > self.config.profiling.profile_table_size_limit
283
282
  ):
284
283
  self.report.profiling_skipped_size_limit[schema_name] += 1
285
284
  logger.debug(
@@ -93,7 +93,7 @@ POSTGRES_TYPES_MAP: Dict[str, Any] = {
93
93
  "regtype": None,
94
94
  "regrole": None,
95
95
  "regnamespace": None,
96
- "super": None,
96
+ "super": NullType,
97
97
  "uuid": StringType,
98
98
  "pg_lsn": None,
99
99
  "tsvector": None, # text search vector
@@ -384,7 +384,6 @@ TRINO_SQL_TYPES_MAP: Dict[str, Any] = {
384
384
  "varchar": StringType,
385
385
  "char": StringType,
386
386
  "varbinary": BytesType,
387
- "json": RecordType,
388
387
  "date": DateType,
389
388
  "time": TimeType,
390
389
  "timestamp": TimeType,
@@ -20,6 +20,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
20
20
  from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage
21
21
  from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
22
22
  from datahub.metadata.schema_classes import DataPlatformInstanceClass
23
+ from datahub.metadata.urns import StructuredPropertyUrn
23
24
  from datahub.utilities.registries.domain_registry import DomainRegistry
24
25
  from datahub.utilities.urns.dataset_urn import DatasetUrn
25
26
 
@@ -75,6 +76,7 @@ def gen_schema_container(
75
76
  created: Optional[int] = None,
76
77
  last_modified: Optional[int] = None,
77
78
  extra_properties: Optional[Dict[str, str]] = None,
79
+ structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None,
78
80
  ) -> Iterable[MetadataWorkUnit]:
79
81
  domain_urn: Optional[str] = None
80
82
  if domain_registry:
@@ -99,6 +101,7 @@ def gen_schema_container(
99
101
  owner_urn=owner_urn,
100
102
  qualified_name=qualified_name,
101
103
  extra_properties=extra_properties,
104
+ structured_properties=structured_properties,
102
105
  )
103
106
 
104
107
 
@@ -133,6 +136,7 @@ def gen_database_container(
133
136
  created: Optional[int] = None,
134
137
  last_modified: Optional[int] = None,
135
138
  extra_properties: Optional[Dict[str, str]] = None,
139
+ structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None,
136
140
  ) -> Iterable[MetadataWorkUnit]:
137
141
  domain_urn: Optional[str] = None
138
142
  if domain_registry:
@@ -154,6 +158,7 @@ def gen_database_container(
154
158
  owner_urn=owner_urn,
155
159
  qualified_name=qualified_name,
156
160
  extra_properties=extra_properties,
161
+ structured_properties=structured_properties,
157
162
  )
158
163
 
159
164
 
@@ -599,7 +599,12 @@ ORDER by DataBaseName, TableName;
599
599
  setattr( # noqa: B010
600
600
  TeradataDialect,
601
601
  "get_columns",
602
- lambda self, connection, table_name, schema=None, use_qvci=self.config.use_qvci, **kw: optimized_get_columns(
602
+ lambda self,
603
+ connection,
604
+ table_name,
605
+ schema=None,
606
+ use_qvci=self.config.use_qvci,
607
+ **kw: optimized_get_columns(
603
608
  self,
604
609
  connection,
605
610
  table_name,
@@ -613,7 +618,11 @@ ORDER by DataBaseName, TableName;
613
618
  setattr( # noqa: B010
614
619
  TeradataDialect,
615
620
  "get_pk_constraint",
616
- lambda self, connection, table_name, schema=None, **kw: optimized_get_pk_constraint(
621
+ lambda self,
622
+ connection,
623
+ table_name,
624
+ schema=None,
625
+ **kw: optimized_get_pk_constraint(
617
626
  self, connection, table_name, schema, **kw
618
627
  ),
619
628
  )
@@ -621,7 +630,11 @@ ORDER by DataBaseName, TableName;
621
630
  setattr( # noqa: B010
622
631
  TeradataDialect,
623
632
  "get_foreign_keys",
624
- lambda self, connection, table_name, schema=None, **kw: optimized_get_foreign_keys(
633
+ lambda self,
634
+ connection,
635
+ table_name,
636
+ schema=None,
637
+ **kw: optimized_get_foreign_keys(
625
638
  self, connection, table_name, schema, **kw
626
639
  ),
627
640
  )
@@ -878,7 +891,7 @@ ORDER by DataBaseName, TableName;
878
891
 
879
892
  urns = self.schema_resolver.get_urns()
880
893
  if self.config.include_table_lineage or self.config.include_usage_statistics:
881
- self.report.report_ingestion_stage_start("audit log extraction")
882
- yield from self.get_audit_log_mcps(urns=urns)
894
+ with self.report.new_stage("Audit log extraction"):
895
+ yield from self.get_audit_log_mcps(urns=urns)
883
896
 
884
897
  yield from self.builder.gen_workunits()
@@ -41,9 +41,9 @@ class ProfilingHandler(StatefulIngestionUsecaseHandlerBase[ProfilingCheckpointSt
41
41
  run_id: str,
42
42
  ):
43
43
  self.state_provider = source.state_provider
44
- self.stateful_ingestion_config: Optional[
45
- ProfilingStatefulIngestionConfig
46
- ] = config.stateful_ingestion
44
+ self.stateful_ingestion_config: Optional[ProfilingStatefulIngestionConfig] = (
45
+ config.stateful_ingestion
46
+ )
47
47
  self.pipeline_name = pipeline_name
48
48
  self.run_id = run_id
49
49
  self.checkpointing_enabled: bool = (
@@ -48,9 +48,9 @@ class RedundantRunSkipHandler(
48
48
  ):
49
49
  self.source = source
50
50
  self.state_provider = source.state_provider
51
- self.stateful_ingestion_config: Optional[
52
- StatefulIngestionConfig
53
- ] = config.stateful_ingestion
51
+ self.stateful_ingestion_config: Optional[StatefulIngestionConfig] = (
52
+ config.stateful_ingestion
53
+ )
54
54
  self.pipeline_name = pipeline_name
55
55
  self.run_id = run_id
56
56
  self._job_id = self._init_job_id()
@@ -145,8 +145,7 @@ class RedundantRunSkipHandler(
145
145
  )
146
146
 
147
147
  logger.debug(
148
- f"{self.job_id} : Last run start, end times:"
149
- f"({last_run_time_window})"
148
+ f"{self.job_id} : Last run start, end times:({last_run_time_window})"
150
149
  )
151
150
 
152
151
  # If current run's time window is subset of last run's time window, then skip.
@@ -212,8 +211,7 @@ class RedundantRunSkipHandler(
212
211
  )
213
212
 
214
213
  self.log(
215
- "Adjusted start, end times: "
216
- f"({suggested_start_time}, {suggested_end_time})"
214
+ f"Adjusted start, end times: ({suggested_start_time}, {suggested_end_time})"
217
215
  )
218
216
  return (suggested_start_time, suggested_end_time)
219
217
 
@@ -111,9 +111,9 @@ class StaleEntityRemovalHandler(
111
111
  self.state_type_class = state_type_class
112
112
  self.pipeline_name = pipeline_name
113
113
  self.run_id = run_id
114
- self.stateful_ingestion_config: Optional[
115
- StatefulStaleMetadataRemovalConfig
116
- ] = config.stateful_ingestion
114
+ self.stateful_ingestion_config: Optional[StatefulStaleMetadataRemovalConfig] = (
115
+ config.stateful_ingestion
116
+ )
117
117
  self.checkpointing_enabled: bool = (
118
118
  True
119
119
  if (
@@ -70,20 +70,20 @@ class DatahubIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
70
70
  self.orchestrator_name, pipeline_name, job_name
71
71
  )
72
72
 
73
- latest_checkpoint: Optional[
74
- DatahubIngestionCheckpointClass
75
- ] = self.graph.get_latest_timeseries_value(
76
- entity_urn=data_job_urn,
77
- aspect_type=DatahubIngestionCheckpointClass,
78
- filter_criteria_map={
79
- "pipelineName": pipeline_name,
80
- },
73
+ latest_checkpoint: Optional[DatahubIngestionCheckpointClass] = (
74
+ self.graph.get_latest_timeseries_value(
75
+ entity_urn=data_job_urn,
76
+ aspect_type=DatahubIngestionCheckpointClass,
77
+ filter_criteria_map={
78
+ "pipelineName": pipeline_name,
79
+ },
80
+ )
81
81
  )
82
82
  if latest_checkpoint:
83
83
  logger.debug(
84
84
  f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
85
85
  f" job_name:'{job_name}' found with start_time:"
86
- f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
86
+ f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
87
87
  )
88
88
  return latest_checkpoint
89
89
  else:
@@ -67,7 +67,7 @@ class FileIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
67
67
  logger.debug(
68
68
  f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
69
69
  f" job_name:'{job_name}' found with start_time:"
70
- f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
70
+ f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
71
71
  )
72
72
  return latest_checkpoint
73
73
  else:
@@ -33,7 +33,7 @@ from datahub.ingestion.api.decorators import (
33
33
  platform_name,
34
34
  support_status,
35
35
  )
36
- from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
36
+ from datahub.ingestion.api.source import MetadataWorkUnitProcessor
37
37
  from datahub.ingestion.api.workunit import MetadataWorkUnit
38
38
  from datahub.ingestion.source.sql.sql_types import resolve_sql_type
39
39
  from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
@@ -265,11 +265,6 @@ class SupersetSource(StatefulIngestionSourceBase):
265
265
  # TODO(Gabe): how should we message about this error?
266
266
  return requests_session
267
267
 
268
- @classmethod
269
- def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
270
- config = SupersetConfig.parse_obj(config_dict)
271
- return cls(ctx, config)
272
-
273
268
  def paginate_entity_api_results(self, entity_type, page_size=100):
274
269
  current_page = 0
275
270
  total_items = page_size