acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (161) hide show
  1. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/METADATA +2378 -2380
  2. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/RECORD +161 -161
  3. datahub/__init__.py +1 -1
  4. datahub/api/entities/assertion/assertion_operator.py +3 -5
  5. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  6. datahub/api/entities/datacontract/assertion_operator.py +3 -5
  7. datahub/api/entities/dataproduct/dataproduct.py +4 -4
  8. datahub/api/entities/dataset/dataset.py +2 -1
  9. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  10. datahub/cli/cli_utils.py +1 -1
  11. datahub/cli/delete_cli.py +16 -2
  12. datahub/cli/docker_cli.py +6 -6
  13. datahub/cli/lite_cli.py +2 -2
  14. datahub/cli/migrate.py +3 -3
  15. datahub/cli/specific/assertions_cli.py +3 -3
  16. datahub/cli/timeline_cli.py +1 -1
  17. datahub/configuration/common.py +1 -2
  18. datahub/configuration/config_loader.py +73 -50
  19. datahub/configuration/git.py +2 -2
  20. datahub/configuration/time_window_config.py +10 -5
  21. datahub/emitter/mce_builder.py +4 -8
  22. datahub/emitter/mcp_patch_builder.py +1 -2
  23. datahub/ingestion/api/incremental_lineage_helper.py +2 -8
  24. datahub/ingestion/api/report.py +1 -2
  25. datahub/ingestion/api/source_helpers.py +1 -1
  26. datahub/ingestion/extractor/json_schema_util.py +3 -3
  27. datahub/ingestion/extractor/schema_util.py +3 -5
  28. datahub/ingestion/fs/s3_fs.py +3 -3
  29. datahub/ingestion/glossary/datahub_classifier.py +6 -4
  30. datahub/ingestion/graph/client.py +4 -6
  31. datahub/ingestion/run/pipeline.py +8 -7
  32. datahub/ingestion/run/pipeline_config.py +3 -3
  33. datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
  34. datahub/ingestion/source/abs/source.py +19 -8
  35. datahub/ingestion/source/aws/glue.py +11 -11
  36. datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
  37. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  38. datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
  39. datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
  40. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
  41. datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
  42. datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
  43. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
  44. datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
  45. datahub/ingestion/source/bigquery_v2/queries.py +1 -3
  46. datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
  47. datahub/ingestion/source/bigquery_v2/usage.py +3 -3
  48. datahub/ingestion/source/cassandra/cassandra.py +0 -1
  49. datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
  50. datahub/ingestion/source/confluent_schema_registry.py +6 -6
  51. datahub/ingestion/source/csv_enricher.py +29 -29
  52. datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
  53. datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
  54. datahub/ingestion/source/dbt/dbt_common.py +9 -7
  55. datahub/ingestion/source/dremio/dremio_api.py +4 -4
  56. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
  57. datahub/ingestion/source/elastic_search.py +4 -4
  58. datahub/ingestion/source/fivetran/config.py +4 -0
  59. datahub/ingestion/source/fivetran/fivetran.py +15 -5
  60. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
  61. datahub/ingestion/source/gcs/gcs_source.py +5 -3
  62. datahub/ingestion/source/ge_data_profiler.py +4 -5
  63. datahub/ingestion/source/ge_profiling_config.py +3 -3
  64. datahub/ingestion/source/iceberg/iceberg.py +3 -3
  65. datahub/ingestion/source/identity/azure_ad.py +3 -3
  66. datahub/ingestion/source/identity/okta.py +3 -3
  67. datahub/ingestion/source/kafka/kafka.py +11 -9
  68. datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
  69. datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
  70. datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
  71. datahub/ingestion/source/looker/looker_common.py +19 -19
  72. datahub/ingestion/source/looker/looker_config.py +3 -3
  73. datahub/ingestion/source/looker/looker_source.py +25 -25
  74. datahub/ingestion/source/looker/looker_template_language.py +3 -3
  75. datahub/ingestion/source/looker/looker_usage.py +5 -7
  76. datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
  77. datahub/ingestion/source/looker/lookml_source.py +13 -15
  78. datahub/ingestion/source/looker/view_upstream.py +5 -5
  79. datahub/ingestion/source/mlflow.py +4 -4
  80. datahub/ingestion/source/mongodb.py +6 -4
  81. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  82. datahub/ingestion/source/nifi.py +24 -26
  83. datahub/ingestion/source/openapi.py +9 -9
  84. datahub/ingestion/source/powerbi/config.py +12 -12
  85. datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
  86. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
  87. datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
  88. datahub/ingestion/source/powerbi/powerbi.py +6 -6
  89. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
  90. datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
  91. datahub/ingestion/source/redshift/config.py +3 -3
  92. datahub/ingestion/source/redshift/query.py +77 -47
  93. datahub/ingestion/source/redshift/redshift.py +12 -12
  94. datahub/ingestion/source/redshift/usage.py +8 -8
  95. datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
  96. datahub/ingestion/source/s3/source.py +1 -1
  97. datahub/ingestion/source/salesforce.py +26 -25
  98. datahub/ingestion/source/schema/json_schema.py +1 -1
  99. datahub/ingestion/source/sigma/sigma.py +3 -3
  100. datahub/ingestion/source/sigma/sigma_api.py +12 -10
  101. datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
  102. datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
  103. datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
  104. datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
  105. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
  106. datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
  107. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
  108. datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
  109. datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
  110. datahub/ingestion/source/sql/athena.py +1 -3
  111. datahub/ingestion/source/sql/clickhouse.py +8 -14
  112. datahub/ingestion/source/sql/oracle.py +1 -3
  113. datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
  114. datahub/ingestion/source/sql/teradata.py +16 -3
  115. datahub/ingestion/source/state/profiling_state_handler.py +3 -3
  116. datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
  117. datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
  118. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
  119. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  120. datahub/ingestion/source/tableau/tableau.py +48 -49
  121. datahub/ingestion/source/unity/config.py +3 -1
  122. datahub/ingestion/source/unity/proxy.py +1 -1
  123. datahub/ingestion/source/unity/source.py +3 -3
  124. datahub/ingestion/source/unity/usage.py +3 -1
  125. datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
  126. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
  127. datahub/ingestion/source/usage/usage_common.py +1 -1
  128. datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
  129. datahub/ingestion/transformer/add_dataset_properties.py +3 -3
  130. datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
  131. datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
  132. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
  133. datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
  134. datahub/ingestion/transformer/tags_to_terms.py +7 -7
  135. datahub/integrations/assertion/snowflake/compiler.py +10 -10
  136. datahub/lite/duckdb_lite.py +12 -10
  137. datahub/metadata/_schema_classes.py +1 -1
  138. datahub/metadata/schema.avsc +6 -2
  139. datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
  140. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
  141. datahub/secret/secret_common.py +14 -8
  142. datahub/specific/aspect_helpers/custom_properties.py +1 -2
  143. datahub/sql_parsing/schema_resolver.py +5 -10
  144. datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
  145. datahub/sql_parsing/sqlglot_lineage.py +5 -4
  146. datahub/sql_parsing/sqlglot_utils.py +3 -2
  147. datahub/telemetry/stats.py +1 -2
  148. datahub/testing/mcp_diff.py +1 -1
  149. datahub/utilities/file_backed_collections.py +10 -10
  150. datahub/utilities/hive_schema_to_avro.py +2 -2
  151. datahub/utilities/logging_manager.py +2 -2
  152. datahub/utilities/lossy_collections.py +3 -3
  153. datahub/utilities/mapping.py +3 -3
  154. datahub/utilities/serialized_lru_cache.py +3 -1
  155. datahub/utilities/sqlalchemy_query_combiner.py +6 -6
  156. datahub/utilities/sqllineage_patch.py +1 -1
  157. datahub/utilities/stats_collections.py +3 -1
  158. datahub/utilities/urns/urn_iter.py +2 -2
  159. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/WHEEL +0 -0
  160. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/entry_points.txt +0 -0
  161. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/top_level.txt +0 -0
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0.2rc7"
6
+ __version__ = "0.15.0.3rc1"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -20,15 +20,13 @@ class Operator(Protocol):
20
20
 
21
21
  operator: str
22
22
 
23
- def id(self) -> str:
24
- ...
23
+ def id(self) -> str: ...
25
24
 
26
- def generate_parameters(self) -> AssertionStdParametersClass:
27
- ...
25
+ def generate_parameters(self) -> AssertionStdParametersClass: ...
28
26
 
29
27
 
30
28
  def _generate_assertion_std_parameter(
31
- value: Union[str, int, float, list]
29
+ value: Union[str, int, float, list],
32
30
  ) -> AssertionStdParameterClass:
33
31
  if isinstance(value, str):
34
32
  return AssertionStdParameterClass(
@@ -114,7 +114,7 @@ class CorpGroup(BaseModel):
114
114
  )
115
115
  urns_created.add(m.urn)
116
116
  else:
117
- logger.warn(
117
+ logger.warning(
118
118
  f"Suppressing emission of member {m.urn} before we already emitted metadata for it"
119
119
  )
120
120
 
@@ -19,15 +19,13 @@ class Operator(Protocol):
19
19
 
20
20
  operator: str
21
21
 
22
- def id(self) -> str:
23
- ...
22
+ def id(self) -> str: ...
24
23
 
25
- def generate_parameters(self) -> AssertionStdParametersClass:
26
- ...
24
+ def generate_parameters(self) -> AssertionStdParametersClass: ...
27
25
 
28
26
 
29
27
  def _generate_assertion_std_parameter(
30
- value: Union[str, int, float]
28
+ value: Union[str, int, float],
31
29
  ) -> AssertionStdParameterClass:
32
30
  if isinstance(value, str):
33
31
  return AssertionStdParameterClass(
@@ -321,9 +321,9 @@ class DataProduct(ConfigModel):
321
321
 
322
322
  @classmethod
323
323
  def from_datahub(cls, graph: DataHubGraph, id: str) -> DataProduct:
324
- data_product_properties: Optional[
325
- DataProductPropertiesClass
326
- ] = graph.get_aspect(id, DataProductPropertiesClass)
324
+ data_product_properties: Optional[DataProductPropertiesClass] = (
325
+ graph.get_aspect(id, DataProductPropertiesClass)
326
+ )
327
327
  domains: Optional[DomainsClass] = graph.get_aspect(id, DomainsClass)
328
328
  assert domains, "Data Product must have an associated domain. Found none."
329
329
  owners: Optional[OwnershipClass] = graph.get_aspect(id, OwnershipClass)
@@ -438,7 +438,7 @@ class DataProduct(ConfigModel):
438
438
  for replace_index, replace_value in patches_replace.items():
439
439
  list_to_manipulate[replace_index] = replace_value
440
440
 
441
- for drop_index, drop_value in patches_drop.items():
441
+ for drop_value in patches_drop.values():
442
442
  list_to_manipulate.remove(drop_value)
443
443
 
444
444
  for add_value in patches_add:
@@ -266,7 +266,8 @@ class Dataset(BaseModel):
266
266
  if self.schema_metadata.fields:
267
267
  for field in self.schema_metadata.fields:
268
268
  field_urn = field.urn or make_schema_field_urn(
269
- self.urn, field.id # type: ignore[arg-type]
269
+ self.urn, # type: ignore[arg-type]
270
+ field.id, # type: ignore[arg-type]
270
271
  )
271
272
  assert field_urn.startswith("urn:li:schemaField:")
272
273
 
@@ -118,9 +118,9 @@ class StructuredProperties(ConfigModel):
118
118
  id = StructuredPropertyUrn.from_string(self.urn).id
119
119
  if self.qualified_name is not None:
120
120
  # ensure that qualified name and ID match
121
- assert (
122
- self.qualified_name == id
123
- ), "ID in the urn and the qualified_name must match"
121
+ assert self.qualified_name == id, (
122
+ "ID in the urn and the qualified_name must match"
123
+ )
124
124
  return id
125
125
 
126
126
  @validator("urn", pre=True, always=True)
@@ -184,9 +184,9 @@ class StructuredProperties(ConfigModel):
184
184
 
185
185
  @classmethod
186
186
  def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
187
- structured_property: Optional[
188
- StructuredPropertyDefinitionClass
189
- ] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
187
+ structured_property: Optional[StructuredPropertyDefinitionClass] = (
188
+ graph.get_aspect(urn, StructuredPropertyDefinitionClass)
189
+ )
190
190
  if structured_property is None:
191
191
  raise Exception(
192
192
  "StructuredPropertyDefinition aspect is None. Unable to create structured property."
datahub/cli/cli_utils.py CHANGED
@@ -412,7 +412,7 @@ def generate_access_token(
412
412
  def ensure_has_system_metadata(
413
413
  event: Union[
414
414
  MetadataChangeProposal, MetadataChangeProposalWrapper, MetadataChangeEvent
415
- ]
415
+ ],
416
416
  ) -> None:
417
417
  if event.systemMetadata is None:
418
418
  event.systemMetadata = SystemMetadataClass()
datahub/cli/delete_cli.py CHANGED
@@ -265,6 +265,11 @@ def undo_by_filter(
265
265
  type=str,
266
266
  help="Urn of the entity to delete, for single entity deletion",
267
267
  )
268
+ @click.option(
269
+ "--urn-file",
270
+ required=False,
271
+ help="Path of file with urns (one per line) to be deleted",
272
+ )
268
273
  @click.option(
269
274
  "-a",
270
275
  "--aspect",
@@ -353,6 +358,7 @@ def undo_by_filter(
353
358
  @telemetry.with_telemetry()
354
359
  def by_filter(
355
360
  urn: Optional[str],
361
+ urn_file: Optional[str],
356
362
  aspect: Optional[str],
357
363
  force: bool,
358
364
  soft: bool,
@@ -373,6 +379,7 @@ def by_filter(
373
379
  # Validate the cli arguments.
374
380
  _validate_user_urn_and_filters(
375
381
  urn=urn,
382
+ urn_file=urn_file,
376
383
  entity_type=entity_type,
377
384
  platform=platform,
378
385
  env=env,
@@ -429,6 +436,12 @@ def by_filter(
429
436
  batch_size=batch_size,
430
437
  )
431
438
  )
439
+ elif urn_file:
440
+ with open(urn_file, "r") as r:
441
+ urns = []
442
+ for line in r.readlines():
443
+ urn = line.strip().strip('"')
444
+ urns.append(urn)
432
445
  else:
433
446
  urns = list(
434
447
  graph.get_urns_by_filter(
@@ -537,6 +550,7 @@ def _delete_urns_parallel(
537
550
 
538
551
  def _validate_user_urn_and_filters(
539
552
  urn: Optional[str],
553
+ urn_file: Optional[str],
540
554
  entity_type: Optional[str],
541
555
  platform: Optional[str],
542
556
  env: Optional[str],
@@ -549,9 +563,9 @@ def _validate_user_urn_and_filters(
549
563
  raise click.UsageError(
550
564
  "You cannot provide both an urn and a filter rule (entity-type / platform / env / query)."
551
565
  )
552
- elif not urn and not (entity_type or platform or env or query):
566
+ elif not urn and not urn_file and not (entity_type or platform or env or query):
553
567
  raise click.UsageError(
554
- "You must provide either an urn or at least one filter (entity-type / platform / env / query) in order to delete entities."
568
+ "You must provide either an urn or urn_file or at least one filter (entity-type / platform / env / query) in order to delete entities."
555
569
  )
556
570
  elif query:
557
571
  logger.warning(
datahub/cli/docker_cli.py CHANGED
@@ -296,9 +296,9 @@ def _restore(
296
296
  restore_indices: Optional[bool],
297
297
  primary_restore_file: Optional[str],
298
298
  ) -> int:
299
- assert (
300
- restore_primary or restore_indices
301
- ), "Either restore_primary or restore_indices must be set"
299
+ assert restore_primary or restore_indices, (
300
+ "Either restore_primary or restore_indices must be set"
301
+ )
302
302
  msg = "datahub> "
303
303
  if restore_primary:
304
304
  msg += f"Will restore primary database from {primary_restore_file}. "
@@ -314,9 +314,9 @@ def _restore(
314
314
  assert primary_restore_file
315
315
  resolved_restore_file = os.path.expanduser(primary_restore_file)
316
316
  logger.info(f"Restoring primary db from backup at {resolved_restore_file}")
317
- assert os.path.exists(
318
- resolved_restore_file
319
- ), f"File {resolved_restore_file} does not exist"
317
+ assert os.path.exists(resolved_restore_file), (
318
+ f"File {resolved_restore_file} does not exist"
319
+ )
320
320
  with open(resolved_restore_file) as fp:
321
321
  result = subprocess.run(
322
322
  [
datahub/cli/lite_cli.py CHANGED
@@ -176,7 +176,7 @@ def get(
176
176
  )
177
177
  )
178
178
  end_time = time.time()
179
- logger.debug(f"Time taken: {int((end_time - start_time)*1000.0)} millis")
179
+ logger.debug(f"Time taken: {int((end_time - start_time) * 1000.0)} millis")
180
180
 
181
181
 
182
182
  @lite.command()
@@ -228,7 +228,7 @@ def ls(path: Optional[str]) -> None:
228
228
  try:
229
229
  browseables = lite.ls(path)
230
230
  end_time = time.time()
231
- logger.debug(f"Time taken: {int((end_time - start_time)*1000.0)} millis")
231
+ logger.debug(f"Time taken: {int((end_time - start_time) * 1000.0)} millis")
232
232
  auto_complete: List[AutoComplete] = [
233
233
  b.auto_complete for b in browseables if b.auto_complete is not None
234
234
  ]
datahub/cli/migrate.py CHANGED
@@ -426,9 +426,9 @@ def batch_get_ids(
426
426
  entities_yielded += 1
427
427
  log.debug(f"yielding {x}")
428
428
  yield x
429
- assert (
430
- entities_yielded == num_entities
431
- ), "Did not delete all entities, try running this command again!"
429
+ assert entities_yielded == num_entities, (
430
+ "Did not delete all entities, try running this command again!"
431
+ )
432
432
  else:
433
433
  log.error(f"Failed to execute batch get with {str(response.content)}")
434
434
  response.raise_for_status()
@@ -136,9 +136,9 @@ def extras_list_to_dict(extras: List[str]) -> Dict[str, str]:
136
136
  extra_properties: Dict[str, str] = dict()
137
137
  for x in extras:
138
138
  parts = x.split("=")
139
- assert (
140
- len(parts) == 2
141
- ), f"Invalid value for extras {x}, should be in format key=value"
139
+ assert len(parts) == 2, (
140
+ f"Invalid value for extras {x}, should be in format key=value"
141
+ )
142
142
  extra_properties[parts[0]] = parts[1]
143
143
  return extra_properties
144
144
 
@@ -50,7 +50,7 @@ def pretty_id(id: Optional[str]) -> str:
50
50
  if id.startswith("urn:li:dataset"):
51
51
  dataset_key = dataset_urn_to_key(id)
52
52
  if dataset_key:
53
- return f"{click.style('dataset', fg='cyan')}:{click.style(dataset_key.platform[len('urn:li:dataPlatform:'):], fg='white')}:{click.style(dataset_key.name, fg='white')}"
53
+ return f"{click.style('dataset', fg='cyan')}:{click.style(dataset_key.platform[len('urn:li:dataPlatform:') :], fg='white')}:{click.style(dataset_key.name, fg='white')}"
54
54
  # failed to prettify, return original
55
55
  return id
56
56
 
@@ -200,8 +200,7 @@ class IgnorableError(MetaError):
200
200
 
201
201
  @runtime_checkable
202
202
  class ExceptionWithProps(Protocol):
203
- def get_telemetry_props(self) -> Dict[str, Any]:
204
- ...
203
+ def get_telemetry_props(self) -> Dict[str, Any]: ...
205
204
 
206
205
 
207
206
  def should_show_stack_trace(exc: Exception) -> bool:
@@ -19,64 +19,87 @@ from datahub.configuration.yaml import YamlConfigurationMechanism
19
19
  Environ = Mapping[str, str]
20
20
 
21
21
 
22
- def _resolve_element(element: str, environ: Environ) -> str:
23
- if re.search(r"(\$\{).+(\})", element):
24
- return expand(element, nounset=True, environ=environ)
25
- elif element.startswith("$"):
26
- try:
27
- return expand(element, nounset=True, environ=environ)
28
- except UnboundVariable:
29
- return element
30
- else:
31
- return element
32
-
33
-
34
- def _resolve_list(ele_list: list, environ: Environ) -> list:
35
- new_v: list = []
36
- for ele in ele_list:
37
- if isinstance(ele, str):
38
- new_v.append(_resolve_element(ele, environ=environ))
39
- elif isinstance(ele, list):
40
- new_v.append(_resolve_list(ele, environ=environ))
41
- elif isinstance(ele, dict):
42
- new_v.append(resolve_env_variables(ele, environ=environ))
43
- else:
44
- new_v.append(ele)
45
- return new_v
46
-
47
-
48
22
  def resolve_env_variables(config: dict, environ: Environ) -> dict:
49
- new_dict: Dict[Any, Any] = {}
50
- for k, v in config.items():
51
- if isinstance(v, dict):
52
- new_dict[k] = resolve_env_variables(v, environ=environ)
53
- elif isinstance(v, list):
54
- new_dict[k] = _resolve_list(v, environ=environ)
55
- elif isinstance(v, str):
56
- new_dict[k] = _resolve_element(v, environ=environ)
57
- else:
58
- new_dict[k] = v
59
- return new_dict
23
+ # TODO: This is kept around for backwards compatibility.
24
+ return EnvResolver(environ).resolve(config)
60
25
 
61
26
 
62
27
  def list_referenced_env_variables(config: dict) -> Set[str]:
63
- # This is a bit of a hack, but expandvars does a bunch of escaping
64
- # and other logic that we don't want to duplicate here.
28
+ # TODO: This is kept around for backwards compatibility.
29
+ return EnvResolver(environ=os.environ).list_referenced_variables(config)
30
+
31
+
32
+ class EnvResolver:
33
+ def __init__(self, environ: Environ, strict_env_syntax: bool = False):
34
+ self.environ = environ
35
+ self.strict_env_syntax = strict_env_syntax
65
36
 
66
- vars = set()
37
+ def resolve(self, config: dict) -> dict:
38
+ return self._resolve_dict(config)
67
39
 
68
- def mock_get_env(key: str, default: Optional[str] = None) -> str:
69
- vars.add(key)
70
- if default is not None:
71
- return default
72
- return "mocked_value"
40
+ @classmethod
41
+ def list_referenced_variables(
42
+ cls,
43
+ config: dict,
44
+ strict_env_syntax: bool = False,
45
+ ) -> Set[str]:
46
+ # This is a bit of a hack, but expandvars does a bunch of escaping
47
+ # and other logic that we don't want to duplicate here.
73
48
 
74
- mock = unittest.mock.MagicMock()
75
- mock.get.side_effect = mock_get_env
49
+ vars = set()
76
50
 
77
- resolve_env_variables(config, environ=mock)
51
+ def mock_get_env(key: str, default: Optional[str] = None) -> str:
52
+ vars.add(key)
53
+ if default is not None:
54
+ return default
55
+ return "mocked_value"
56
+
57
+ mock = unittest.mock.MagicMock()
58
+ mock.get.side_effect = mock_get_env
59
+
60
+ resolver = EnvResolver(environ=mock, strict_env_syntax=strict_env_syntax)
61
+ resolver._resolve_dict(config)
62
+
63
+ return vars
64
+
65
+ def _resolve_element(self, element: str) -> str:
66
+ if re.search(r"(\$\{).+(\})", element):
67
+ return expand(element, nounset=True, environ=self.environ)
68
+ elif not self.strict_env_syntax and element.startswith("$"):
69
+ try:
70
+ return expand(element, nounset=True, environ=self.environ)
71
+ except UnboundVariable:
72
+ # TODO: This fallback is kept around for backwards compatibility, but
73
+ # doesn't make a ton of sense from first principles.
74
+ return element
75
+ else:
76
+ return element
78
77
 
79
- return vars
78
+ def _resolve_list(self, ele_list: list) -> list:
79
+ new_v: list = []
80
+ for ele in ele_list:
81
+ if isinstance(ele, str):
82
+ new_v.append(self._resolve_element(ele))
83
+ elif isinstance(ele, list):
84
+ new_v.append(self._resolve_list(ele))
85
+ elif isinstance(ele, dict):
86
+ new_v.append(self._resolve_dict(ele))
87
+ else:
88
+ new_v.append(ele)
89
+ return new_v
90
+
91
+ def _resolve_dict(self, config: dict) -> dict:
92
+ new_dict: Dict[Any, Any] = {}
93
+ for k, v in config.items():
94
+ if isinstance(v, dict):
95
+ new_dict[k] = self._resolve_dict(v)
96
+ elif isinstance(v, list):
97
+ new_dict[k] = self._resolve_list(v)
98
+ elif isinstance(v, str):
99
+ new_dict[k] = self._resolve_element(v)
100
+ else:
101
+ new_dict[k] = v
102
+ return new_dict
80
103
 
81
104
 
82
105
  WRITE_TO_FILE_DIRECTIVE_PREFIX = "__DATAHUB_TO_FILE_"
@@ -159,7 +182,7 @@ def load_config_file(
159
182
 
160
183
  config = raw_config.copy()
161
184
  if resolve_env_vars:
162
- config = resolve_env_variables(config, environ=os.environ)
185
+ config = EnvResolver(environ=os.environ).resolve(config)
163
186
  if process_directives:
164
187
  config = _process_directives(config)
165
188
 
@@ -121,9 +121,9 @@ class GitInfo(GitReference):
121
121
 
122
122
  repo: str = values["repo"]
123
123
  if repo.startswith(_GITHUB_PREFIX):
124
- return f"git@github.com:{repo[len(_GITHUB_PREFIX):]}.git"
124
+ return f"git@github.com:{repo[len(_GITHUB_PREFIX) :]}.git"
125
125
  elif repo.startswith(_GITLAB_PREFIX):
126
- return f"git@gitlab.com:{repo[len(_GITLAB_PREFIX):]}.git"
126
+ return f"git@gitlab.com:{repo[len(_GITLAB_PREFIX) :]}.git"
127
127
  else:
128
128
  raise ValueError(
129
129
  "Unable to infer repo_ssh_locator from repo. Please set repo_ssh_locator manually."
@@ -47,7 +47,10 @@ class BaseTimeWindowConfig(ConfigModel):
47
47
  default_factory=lambda: datetime.now(tz=timezone.utc),
48
48
  description="Latest date of lineage/usage to consider. Default: Current time in UTC",
49
49
  )
50
- start_time: datetime = Field(default=None, description="Earliest date of lineage/usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`). You can also specify relative time with respect to end_time such as '-7 days' Or '-7d'.") # type: ignore
50
+ start_time: datetime = Field(
51
+ default=None,
52
+ description="Earliest date of lineage/usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`). You can also specify relative time with respect to end_time such as '-7 days' Or '-7d'.",
53
+ ) # type: ignore
51
54
 
52
55
  @pydantic.validator("start_time", pre=True, always=True)
53
56
  def default_start_time(
@@ -63,12 +66,14 @@ class BaseTimeWindowConfig(ConfigModel):
63
66
  # This is where start_time str is resolved to datetime
64
67
  try:
65
68
  delta = parse_relative_timespan(v)
66
- assert delta < timedelta(
67
- 0
68
- ), "Relative start time should start with minus sign (-) e.g. '-2 days'."
69
+ assert delta < timedelta(0), (
70
+ "Relative start time should start with minus sign (-) e.g. '-2 days'."
71
+ )
69
72
  assert abs(delta) >= get_bucket_duration_delta(
70
73
  values["bucket_duration"]
71
- ), "Relative start time should be in terms of configured bucket duration. e.g '-2 days' or '-2 hours'."
74
+ ), (
75
+ "Relative start time should be in terms of configured bucket duration. e.g '-2 days' or '-2 hours'."
76
+ )
72
77
 
73
78
  # The end_time's default value is not yet populated, in which case
74
79
  # we can just manually generate it here.
@@ -88,13 +88,11 @@ def get_sys_time() -> int:
88
88
 
89
89
 
90
90
  @overload
91
- def make_ts_millis(ts: None) -> None:
92
- ...
91
+ def make_ts_millis(ts: None) -> None: ...
93
92
 
94
93
 
95
94
  @overload
96
- def make_ts_millis(ts: datetime) -> int:
97
- ...
95
+ def make_ts_millis(ts: datetime) -> int: ...
98
96
 
99
97
 
100
98
  def make_ts_millis(ts: Optional[datetime]) -> Optional[int]:
@@ -105,13 +103,11 @@ def make_ts_millis(ts: Optional[datetime]) -> Optional[int]:
105
103
 
106
104
 
107
105
  @overload
108
- def parse_ts_millis(ts: float) -> datetime:
109
- ...
106
+ def parse_ts_millis(ts: float) -> datetime: ...
110
107
 
111
108
 
112
109
  @overload
113
- def parse_ts_millis(ts: None) -> None:
114
- ...
110
+ def parse_ts_millis(ts: None) -> None: ...
115
111
 
116
112
 
117
113
  def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
@@ -33,8 +33,7 @@ from datahub.utilities.urns.urn import guess_entity_type
33
33
 
34
34
  @runtime_checkable
35
35
  class SupportsToObj(Protocol):
36
- def to_obj(self) -> Any:
37
- ...
36
+ def to_obj(self) -> Any: ...
38
37
 
39
38
 
40
39
  def _recursive_to_obj(obj: Any) -> Any:
@@ -55,15 +55,9 @@ def convert_chart_info_to_patch(
55
55
  aspect.externalUrl
56
56
  ).set_type(aspect.type).set_title(aspect.title).set_access(
57
57
  aspect.access
58
- ).set_last_modified(
59
- aspect.lastModified
60
- ).set_last_refreshed(
58
+ ).set_last_modified(aspect.lastModified).set_last_refreshed(
61
59
  aspect.lastRefreshed
62
- ).set_description(
63
- aspect.description
64
- ).add_inputs(
65
- aspect.inputs
66
- )
60
+ ).set_description(aspect.description).add_inputs(aspect.inputs)
67
61
 
68
62
  values = patch_builder.build()
69
63
  if values:
@@ -21,8 +21,7 @@ LogLevel = Literal["ERROR", "WARNING", "INFO", "DEBUG"]
21
21
 
22
22
  @runtime_checkable
23
23
  class SupportsAsObj(Protocol):
24
- def as_obj(self) -> dict:
25
- ...
24
+ def as_obj(self) -> dict: ...
26
25
 
27
26
 
28
27
  @dataclass
@@ -48,7 +48,7 @@ logger = logging.getLogger(__name__)
48
48
 
49
49
 
50
50
  def auto_workunit(
51
- stream: Iterable[Union[MetadataChangeEventClass, MetadataChangeProposalWrapper]]
51
+ stream: Iterable[Union[MetadataChangeEventClass, MetadataChangeProposalWrapper]],
52
52
  ) -> Iterable[MetadataWorkUnit]:
53
53
  """Convert a stream of MCEs and MCPs to a stream of :class:`MetadataWorkUnit`s."""
54
54
 
@@ -131,9 +131,9 @@ class FieldPath:
131
131
  for i, schema_type in enumerate(p.schema_types):
132
132
  if schema_type == schema_str:
133
133
  # return the corresponding type for the schema that's a match
134
- assert (
135
- len(p.type) > i
136
- ), f"p.type({len(p.type)})) and p.schema_types({len(p.schema_types)}) should have the same length"
134
+ assert len(p.type) > i, (
135
+ f"p.type({len(p.type)})) and p.schema_types({len(p.schema_types)}) should have the same length"
136
+ )
137
137
  return p.type[i]
138
138
  return None
139
139
 
@@ -263,15 +263,13 @@ class AvroToMceSchemaConverter:
263
263
  @overload
264
264
  def _get_underlying_type_if_option_as_union(
265
265
  schema: SchemaOrField, default: SchemaOrField
266
- ) -> SchemaOrField:
267
- ...
266
+ ) -> SchemaOrField: ...
268
267
 
269
268
  @staticmethod
270
269
  @overload
271
270
  def _get_underlying_type_if_option_as_union(
272
271
  schema: SchemaOrField, default: Optional[SchemaOrField] = None
273
- ) -> Optional[SchemaOrField]:
274
- ...
272
+ ) -> Optional[SchemaOrField]: ...
275
273
 
276
274
  @staticmethod
277
275
  def _get_underlying_type_if_option_as_union(
@@ -386,7 +384,7 @@ class AvroToMceSchemaConverter:
386
384
 
387
385
  if "deprecated" in merged_props:
388
386
  description = (
389
- f"<span style=\"color:red\">DEPRECATED: {merged_props['deprecated']}</span>\n"
387
+ f'<span style="color:red">DEPRECATED: {merged_props["deprecated"]}</span>\n'
390
388
  + description
391
389
  if description
392
390
  else ""
@@ -17,9 +17,9 @@ def parse_s3_path(path: str) -> "S3Path":
17
17
 
18
18
  def assert_ok_status(s3_response):
19
19
  is_ok = s3_response["ResponseMetadata"]["HTTPStatusCode"] == 200
20
- assert (
21
- is_ok
22
- ), f"Failed to fetch S3 object, error message: {s3_response['Error']['Message']}"
20
+ assert is_ok, (
21
+ f"Failed to fetch S3 object, error message: {s3_response['Error']['Message']}"
22
+ )
23
23
 
24
24
 
25
25
  @dataclass
@@ -148,9 +148,9 @@ class DataHubClassifierConfig(ConfigModel):
148
148
  weight,
149
149
  ) in custom_infotype_config.Prediction_Factors_and_Weights.dict().items():
150
150
  if weight > 0:
151
- assert (
152
- getattr(custom_infotype_config, factor) is not None
153
- ), f"Missing Configuration for Prediction Factor {factor} for Custom Info Type {custom_infotype}"
151
+ assert getattr(custom_infotype_config, factor) is not None, (
152
+ f"Missing Configuration for Prediction Factor {factor} for Custom Info Type {custom_infotype}"
153
+ )
154
154
 
155
155
  # Custom infotype supports only regex based prediction for column values
156
156
  if custom_infotype_config.Prediction_Factors_and_Weights.Values > 0:
@@ -158,7 +158,9 @@ class DataHubClassifierConfig(ConfigModel):
158
158
  assert (
159
159
  custom_infotype_config.Values.prediction_type
160
160
  == ValuePredictionType.REGEX
161
- ), f"Invalid Prediction Type for Values for Custom Info Type {custom_infotype}. Only `regex` is supported."
161
+ ), (
162
+ f"Invalid Prediction Type for Values for Custom Info Type {custom_infotype}. Only `regex` is supported."
163
+ )
162
164
 
163
165
  return info_types_config
164
166