acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/METADATA +2378 -2380
- {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/RECORD +161 -161
- datahub/__init__.py +1 -1
- datahub/api/entities/assertion/assertion_operator.py +3 -5
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/assertion_operator.py +3 -5
- datahub/api/entities/dataproduct/dataproduct.py +4 -4
- datahub/api/entities/dataset/dataset.py +2 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/cli/cli_utils.py +1 -1
- datahub/cli/delete_cli.py +16 -2
- datahub/cli/docker_cli.py +6 -6
- datahub/cli/lite_cli.py +2 -2
- datahub/cli/migrate.py +3 -3
- datahub/cli/specific/assertions_cli.py +3 -3
- datahub/cli/timeline_cli.py +1 -1
- datahub/configuration/common.py +1 -2
- datahub/configuration/config_loader.py +73 -50
- datahub/configuration/git.py +2 -2
- datahub/configuration/time_window_config.py +10 -5
- datahub/emitter/mce_builder.py +4 -8
- datahub/emitter/mcp_patch_builder.py +1 -2
- datahub/ingestion/api/incremental_lineage_helper.py +2 -8
- datahub/ingestion/api/report.py +1 -2
- datahub/ingestion/api/source_helpers.py +1 -1
- datahub/ingestion/extractor/json_schema_util.py +3 -3
- datahub/ingestion/extractor/schema_util.py +3 -5
- datahub/ingestion/fs/s3_fs.py +3 -3
- datahub/ingestion/glossary/datahub_classifier.py +6 -4
- datahub/ingestion/graph/client.py +4 -6
- datahub/ingestion/run/pipeline.py +8 -7
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/abs/source.py +19 -8
- datahub/ingestion/source/aws/glue.py +11 -11
- datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
- datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
- datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
- datahub/ingestion/source/bigquery_v2/queries.py +1 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
- datahub/ingestion/source/bigquery_v2/usage.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +0 -1
- datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
- datahub/ingestion/source/confluent_schema_registry.py +6 -6
- datahub/ingestion/source/csv_enricher.py +29 -29
- datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
- datahub/ingestion/source/dbt/dbt_common.py +9 -7
- datahub/ingestion/source/dremio/dremio_api.py +4 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
- datahub/ingestion/source/elastic_search.py +4 -4
- datahub/ingestion/source/fivetran/config.py +4 -0
- datahub/ingestion/source/fivetran/fivetran.py +15 -5
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
- datahub/ingestion/source/gcs/gcs_source.py +5 -3
- datahub/ingestion/source/ge_data_profiler.py +4 -5
- datahub/ingestion/source/ge_profiling_config.py +3 -3
- datahub/ingestion/source/iceberg/iceberg.py +3 -3
- datahub/ingestion/source/identity/azure_ad.py +3 -3
- datahub/ingestion/source/identity/okta.py +3 -3
- datahub/ingestion/source/kafka/kafka.py +11 -9
- datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
- datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
- datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
- datahub/ingestion/source/looker/looker_common.py +19 -19
- datahub/ingestion/source/looker/looker_config.py +3 -3
- datahub/ingestion/source/looker/looker_source.py +25 -25
- datahub/ingestion/source/looker/looker_template_language.py +3 -3
- datahub/ingestion/source/looker/looker_usage.py +5 -7
- datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
- datahub/ingestion/source/looker/lookml_source.py +13 -15
- datahub/ingestion/source/looker/view_upstream.py +5 -5
- datahub/ingestion/source/mlflow.py +4 -4
- datahub/ingestion/source/mongodb.py +6 -4
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +24 -26
- datahub/ingestion/source/openapi.py +9 -9
- datahub/ingestion/source/powerbi/config.py +12 -12
- datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
- datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
- datahub/ingestion/source/powerbi/powerbi.py +6 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
- datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
- datahub/ingestion/source/redshift/config.py +3 -3
- datahub/ingestion/source/redshift/query.py +77 -47
- datahub/ingestion/source/redshift/redshift.py +12 -12
- datahub/ingestion/source/redshift/usage.py +8 -8
- datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/s3/source.py +1 -1
- datahub/ingestion/source/salesforce.py +26 -25
- datahub/ingestion/source/schema/json_schema.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +3 -3
- datahub/ingestion/source/sigma/sigma_api.py +12 -10
- datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
- datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
- datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
- datahub/ingestion/source/sql/athena.py +1 -3
- datahub/ingestion/source/sql/clickhouse.py +8 -14
- datahub/ingestion/source/sql/oracle.py +1 -3
- datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
- datahub/ingestion/source/sql/teradata.py +16 -3
- datahub/ingestion/source/state/profiling_state_handler.py +3 -3
- datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
- datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/tableau/tableau.py +48 -49
- datahub/ingestion/source/unity/config.py +3 -1
- datahub/ingestion/source/unity/proxy.py +1 -1
- datahub/ingestion/source/unity/source.py +3 -3
- datahub/ingestion/source/unity/usage.py +3 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
- datahub/ingestion/source/usage/usage_common.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
- datahub/ingestion/transformer/add_dataset_properties.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
- datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
- datahub/ingestion/transformer/tags_to_terms.py +7 -7
- datahub/integrations/assertion/snowflake/compiler.py +10 -10
- datahub/lite/duckdb_lite.py +12 -10
- datahub/metadata/_schema_classes.py +1 -1
- datahub/metadata/schema.avsc +6 -2
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
- datahub/secret/secret_common.py +14 -8
- datahub/specific/aspect_helpers/custom_properties.py +1 -2
- datahub/sql_parsing/schema_resolver.py +5 -10
- datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
- datahub/sql_parsing/sqlglot_lineage.py +5 -4
- datahub/sql_parsing/sqlglot_utils.py +3 -2
- datahub/telemetry/stats.py +1 -2
- datahub/testing/mcp_diff.py +1 -1
- datahub/utilities/file_backed_collections.py +10 -10
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/logging_manager.py +2 -2
- datahub/utilities/lossy_collections.py +3 -3
- datahub/utilities/mapping.py +3 -3
- datahub/utilities/serialized_lru_cache.py +3 -1
- datahub/utilities/sqlalchemy_query_combiner.py +6 -6
- datahub/utilities/sqllineage_patch.py +1 -1
- datahub/utilities/stats_collections.py +3 -1
- datahub/utilities/urns/urn_iter.py +2 -2
- {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/top_level.txt +0 -0
datahub/__init__.py
CHANGED
|
@@ -20,15 +20,13 @@ class Operator(Protocol):
|
|
|
20
20
|
|
|
21
21
|
operator: str
|
|
22
22
|
|
|
23
|
-
def id(self) -> str:
|
|
24
|
-
...
|
|
23
|
+
def id(self) -> str: ...
|
|
25
24
|
|
|
26
|
-
def generate_parameters(self) -> AssertionStdParametersClass:
|
|
27
|
-
...
|
|
25
|
+
def generate_parameters(self) -> AssertionStdParametersClass: ...
|
|
28
26
|
|
|
29
27
|
|
|
30
28
|
def _generate_assertion_std_parameter(
|
|
31
|
-
value: Union[str, int, float, list]
|
|
29
|
+
value: Union[str, int, float, list],
|
|
32
30
|
) -> AssertionStdParameterClass:
|
|
33
31
|
if isinstance(value, str):
|
|
34
32
|
return AssertionStdParameterClass(
|
|
@@ -19,15 +19,13 @@ class Operator(Protocol):
|
|
|
19
19
|
|
|
20
20
|
operator: str
|
|
21
21
|
|
|
22
|
-
def id(self) -> str:
|
|
23
|
-
...
|
|
22
|
+
def id(self) -> str: ...
|
|
24
23
|
|
|
25
|
-
def generate_parameters(self) -> AssertionStdParametersClass:
|
|
26
|
-
...
|
|
24
|
+
def generate_parameters(self) -> AssertionStdParametersClass: ...
|
|
27
25
|
|
|
28
26
|
|
|
29
27
|
def _generate_assertion_std_parameter(
|
|
30
|
-
value: Union[str, int, float]
|
|
28
|
+
value: Union[str, int, float],
|
|
31
29
|
) -> AssertionStdParameterClass:
|
|
32
30
|
if isinstance(value, str):
|
|
33
31
|
return AssertionStdParameterClass(
|
|
@@ -321,9 +321,9 @@ class DataProduct(ConfigModel):
|
|
|
321
321
|
|
|
322
322
|
@classmethod
|
|
323
323
|
def from_datahub(cls, graph: DataHubGraph, id: str) -> DataProduct:
|
|
324
|
-
data_product_properties: Optional[
|
|
325
|
-
DataProductPropertiesClass
|
|
326
|
-
|
|
324
|
+
data_product_properties: Optional[DataProductPropertiesClass] = (
|
|
325
|
+
graph.get_aspect(id, DataProductPropertiesClass)
|
|
326
|
+
)
|
|
327
327
|
domains: Optional[DomainsClass] = graph.get_aspect(id, DomainsClass)
|
|
328
328
|
assert domains, "Data Product must have an associated domain. Found none."
|
|
329
329
|
owners: Optional[OwnershipClass] = graph.get_aspect(id, OwnershipClass)
|
|
@@ -438,7 +438,7 @@ class DataProduct(ConfigModel):
|
|
|
438
438
|
for replace_index, replace_value in patches_replace.items():
|
|
439
439
|
list_to_manipulate[replace_index] = replace_value
|
|
440
440
|
|
|
441
|
-
for
|
|
441
|
+
for drop_value in patches_drop.values():
|
|
442
442
|
list_to_manipulate.remove(drop_value)
|
|
443
443
|
|
|
444
444
|
for add_value in patches_add:
|
|
@@ -266,7 +266,8 @@ class Dataset(BaseModel):
|
|
|
266
266
|
if self.schema_metadata.fields:
|
|
267
267
|
for field in self.schema_metadata.fields:
|
|
268
268
|
field_urn = field.urn or make_schema_field_urn(
|
|
269
|
-
self.urn,
|
|
269
|
+
self.urn, # type: ignore[arg-type]
|
|
270
|
+
field.id, # type: ignore[arg-type]
|
|
270
271
|
)
|
|
271
272
|
assert field_urn.startswith("urn:li:schemaField:")
|
|
272
273
|
|
|
@@ -118,9 +118,9 @@ class StructuredProperties(ConfigModel):
|
|
|
118
118
|
id = StructuredPropertyUrn.from_string(self.urn).id
|
|
119
119
|
if self.qualified_name is not None:
|
|
120
120
|
# ensure that qualified name and ID match
|
|
121
|
-
assert (
|
|
122
|
-
|
|
123
|
-
)
|
|
121
|
+
assert self.qualified_name == id, (
|
|
122
|
+
"ID in the urn and the qualified_name must match"
|
|
123
|
+
)
|
|
124
124
|
return id
|
|
125
125
|
|
|
126
126
|
@validator("urn", pre=True, always=True)
|
|
@@ -184,9 +184,9 @@ class StructuredProperties(ConfigModel):
|
|
|
184
184
|
|
|
185
185
|
@classmethod
|
|
186
186
|
def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
|
|
187
|
-
structured_property: Optional[
|
|
188
|
-
StructuredPropertyDefinitionClass
|
|
189
|
-
|
|
187
|
+
structured_property: Optional[StructuredPropertyDefinitionClass] = (
|
|
188
|
+
graph.get_aspect(urn, StructuredPropertyDefinitionClass)
|
|
189
|
+
)
|
|
190
190
|
if structured_property is None:
|
|
191
191
|
raise Exception(
|
|
192
192
|
"StructuredPropertyDefinition aspect is None. Unable to create structured property."
|
datahub/cli/cli_utils.py
CHANGED
|
@@ -412,7 +412,7 @@ def generate_access_token(
|
|
|
412
412
|
def ensure_has_system_metadata(
|
|
413
413
|
event: Union[
|
|
414
414
|
MetadataChangeProposal, MetadataChangeProposalWrapper, MetadataChangeEvent
|
|
415
|
-
]
|
|
415
|
+
],
|
|
416
416
|
) -> None:
|
|
417
417
|
if event.systemMetadata is None:
|
|
418
418
|
event.systemMetadata = SystemMetadataClass()
|
datahub/cli/delete_cli.py
CHANGED
|
@@ -265,6 +265,11 @@ def undo_by_filter(
|
|
|
265
265
|
type=str,
|
|
266
266
|
help="Urn of the entity to delete, for single entity deletion",
|
|
267
267
|
)
|
|
268
|
+
@click.option(
|
|
269
|
+
"--urn-file",
|
|
270
|
+
required=False,
|
|
271
|
+
help="Path of file with urns (one per line) to be deleted",
|
|
272
|
+
)
|
|
268
273
|
@click.option(
|
|
269
274
|
"-a",
|
|
270
275
|
"--aspect",
|
|
@@ -353,6 +358,7 @@ def undo_by_filter(
|
|
|
353
358
|
@telemetry.with_telemetry()
|
|
354
359
|
def by_filter(
|
|
355
360
|
urn: Optional[str],
|
|
361
|
+
urn_file: Optional[str],
|
|
356
362
|
aspect: Optional[str],
|
|
357
363
|
force: bool,
|
|
358
364
|
soft: bool,
|
|
@@ -373,6 +379,7 @@ def by_filter(
|
|
|
373
379
|
# Validate the cli arguments.
|
|
374
380
|
_validate_user_urn_and_filters(
|
|
375
381
|
urn=urn,
|
|
382
|
+
urn_file=urn_file,
|
|
376
383
|
entity_type=entity_type,
|
|
377
384
|
platform=platform,
|
|
378
385
|
env=env,
|
|
@@ -429,6 +436,12 @@ def by_filter(
|
|
|
429
436
|
batch_size=batch_size,
|
|
430
437
|
)
|
|
431
438
|
)
|
|
439
|
+
elif urn_file:
|
|
440
|
+
with open(urn_file, "r") as r:
|
|
441
|
+
urns = []
|
|
442
|
+
for line in r.readlines():
|
|
443
|
+
urn = line.strip().strip('"')
|
|
444
|
+
urns.append(urn)
|
|
432
445
|
else:
|
|
433
446
|
urns = list(
|
|
434
447
|
graph.get_urns_by_filter(
|
|
@@ -537,6 +550,7 @@ def _delete_urns_parallel(
|
|
|
537
550
|
|
|
538
551
|
def _validate_user_urn_and_filters(
|
|
539
552
|
urn: Optional[str],
|
|
553
|
+
urn_file: Optional[str],
|
|
540
554
|
entity_type: Optional[str],
|
|
541
555
|
platform: Optional[str],
|
|
542
556
|
env: Optional[str],
|
|
@@ -549,9 +563,9 @@ def _validate_user_urn_and_filters(
|
|
|
549
563
|
raise click.UsageError(
|
|
550
564
|
"You cannot provide both an urn and a filter rule (entity-type / platform / env / query)."
|
|
551
565
|
)
|
|
552
|
-
elif not urn and not (entity_type or platform or env or query):
|
|
566
|
+
elif not urn and not urn_file and not (entity_type or platform or env or query):
|
|
553
567
|
raise click.UsageError(
|
|
554
|
-
"You must provide either an urn or at least one filter (entity-type / platform / env / query) in order to delete entities."
|
|
568
|
+
"You must provide either an urn or urn_file or at least one filter (entity-type / platform / env / query) in order to delete entities."
|
|
555
569
|
)
|
|
556
570
|
elif query:
|
|
557
571
|
logger.warning(
|
datahub/cli/docker_cli.py
CHANGED
|
@@ -296,9 +296,9 @@ def _restore(
|
|
|
296
296
|
restore_indices: Optional[bool],
|
|
297
297
|
primary_restore_file: Optional[str],
|
|
298
298
|
) -> int:
|
|
299
|
-
assert (
|
|
300
|
-
restore_primary or restore_indices
|
|
301
|
-
)
|
|
299
|
+
assert restore_primary or restore_indices, (
|
|
300
|
+
"Either restore_primary or restore_indices must be set"
|
|
301
|
+
)
|
|
302
302
|
msg = "datahub> "
|
|
303
303
|
if restore_primary:
|
|
304
304
|
msg += f"Will restore primary database from {primary_restore_file}. "
|
|
@@ -314,9 +314,9 @@ def _restore(
|
|
|
314
314
|
assert primary_restore_file
|
|
315
315
|
resolved_restore_file = os.path.expanduser(primary_restore_file)
|
|
316
316
|
logger.info(f"Restoring primary db from backup at {resolved_restore_file}")
|
|
317
|
-
assert os.path.exists(
|
|
318
|
-
resolved_restore_file
|
|
319
|
-
)
|
|
317
|
+
assert os.path.exists(resolved_restore_file), (
|
|
318
|
+
f"File {resolved_restore_file} does not exist"
|
|
319
|
+
)
|
|
320
320
|
with open(resolved_restore_file) as fp:
|
|
321
321
|
result = subprocess.run(
|
|
322
322
|
[
|
datahub/cli/lite_cli.py
CHANGED
|
@@ -176,7 +176,7 @@ def get(
|
|
|
176
176
|
)
|
|
177
177
|
)
|
|
178
178
|
end_time = time.time()
|
|
179
|
-
logger.debug(f"Time taken: {int((end_time - start_time)*1000.0)} millis")
|
|
179
|
+
logger.debug(f"Time taken: {int((end_time - start_time) * 1000.0)} millis")
|
|
180
180
|
|
|
181
181
|
|
|
182
182
|
@lite.command()
|
|
@@ -228,7 +228,7 @@ def ls(path: Optional[str]) -> None:
|
|
|
228
228
|
try:
|
|
229
229
|
browseables = lite.ls(path)
|
|
230
230
|
end_time = time.time()
|
|
231
|
-
logger.debug(f"Time taken: {int((end_time - start_time)*1000.0)} millis")
|
|
231
|
+
logger.debug(f"Time taken: {int((end_time - start_time) * 1000.0)} millis")
|
|
232
232
|
auto_complete: List[AutoComplete] = [
|
|
233
233
|
b.auto_complete for b in browseables if b.auto_complete is not None
|
|
234
234
|
]
|
datahub/cli/migrate.py
CHANGED
|
@@ -426,9 +426,9 @@ def batch_get_ids(
|
|
|
426
426
|
entities_yielded += 1
|
|
427
427
|
log.debug(f"yielding {x}")
|
|
428
428
|
yield x
|
|
429
|
-
assert (
|
|
430
|
-
|
|
431
|
-
)
|
|
429
|
+
assert entities_yielded == num_entities, (
|
|
430
|
+
"Did not delete all entities, try running this command again!"
|
|
431
|
+
)
|
|
432
432
|
else:
|
|
433
433
|
log.error(f"Failed to execute batch get with {str(response.content)}")
|
|
434
434
|
response.raise_for_status()
|
|
@@ -136,9 +136,9 @@ def extras_list_to_dict(extras: List[str]) -> Dict[str, str]:
|
|
|
136
136
|
extra_properties: Dict[str, str] = dict()
|
|
137
137
|
for x in extras:
|
|
138
138
|
parts = x.split("=")
|
|
139
|
-
assert (
|
|
140
|
-
|
|
141
|
-
)
|
|
139
|
+
assert len(parts) == 2, (
|
|
140
|
+
f"Invalid value for extras {x}, should be in format key=value"
|
|
141
|
+
)
|
|
142
142
|
extra_properties[parts[0]] = parts[1]
|
|
143
143
|
return extra_properties
|
|
144
144
|
|
datahub/cli/timeline_cli.py
CHANGED
|
@@ -50,7 +50,7 @@ def pretty_id(id: Optional[str]) -> str:
|
|
|
50
50
|
if id.startswith("urn:li:dataset"):
|
|
51
51
|
dataset_key = dataset_urn_to_key(id)
|
|
52
52
|
if dataset_key:
|
|
53
|
-
return f"{click.style('dataset', fg='cyan')}:{click.style(dataset_key.platform[len('urn:li:dataPlatform:'):], fg='white')}:{click.style(dataset_key.name, fg='white')}"
|
|
53
|
+
return f"{click.style('dataset', fg='cyan')}:{click.style(dataset_key.platform[len('urn:li:dataPlatform:') :], fg='white')}:{click.style(dataset_key.name, fg='white')}"
|
|
54
54
|
# failed to prettify, return original
|
|
55
55
|
return id
|
|
56
56
|
|
datahub/configuration/common.py
CHANGED
|
@@ -200,8 +200,7 @@ class IgnorableError(MetaError):
|
|
|
200
200
|
|
|
201
201
|
@runtime_checkable
|
|
202
202
|
class ExceptionWithProps(Protocol):
|
|
203
|
-
def get_telemetry_props(self) -> Dict[str, Any]:
|
|
204
|
-
...
|
|
203
|
+
def get_telemetry_props(self) -> Dict[str, Any]: ...
|
|
205
204
|
|
|
206
205
|
|
|
207
206
|
def should_show_stack_trace(exc: Exception) -> bool:
|
|
@@ -19,64 +19,87 @@ from datahub.configuration.yaml import YamlConfigurationMechanism
|
|
|
19
19
|
Environ = Mapping[str, str]
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
def _resolve_element(element: str, environ: Environ) -> str:
|
|
23
|
-
if re.search(r"(\$\{).+(\})", element):
|
|
24
|
-
return expand(element, nounset=True, environ=environ)
|
|
25
|
-
elif element.startswith("$"):
|
|
26
|
-
try:
|
|
27
|
-
return expand(element, nounset=True, environ=environ)
|
|
28
|
-
except UnboundVariable:
|
|
29
|
-
return element
|
|
30
|
-
else:
|
|
31
|
-
return element
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def _resolve_list(ele_list: list, environ: Environ) -> list:
|
|
35
|
-
new_v: list = []
|
|
36
|
-
for ele in ele_list:
|
|
37
|
-
if isinstance(ele, str):
|
|
38
|
-
new_v.append(_resolve_element(ele, environ=environ))
|
|
39
|
-
elif isinstance(ele, list):
|
|
40
|
-
new_v.append(_resolve_list(ele, environ=environ))
|
|
41
|
-
elif isinstance(ele, dict):
|
|
42
|
-
new_v.append(resolve_env_variables(ele, environ=environ))
|
|
43
|
-
else:
|
|
44
|
-
new_v.append(ele)
|
|
45
|
-
return new_v
|
|
46
|
-
|
|
47
|
-
|
|
48
22
|
def resolve_env_variables(config: dict, environ: Environ) -> dict:
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if isinstance(v, dict):
|
|
52
|
-
new_dict[k] = resolve_env_variables(v, environ=environ)
|
|
53
|
-
elif isinstance(v, list):
|
|
54
|
-
new_dict[k] = _resolve_list(v, environ=environ)
|
|
55
|
-
elif isinstance(v, str):
|
|
56
|
-
new_dict[k] = _resolve_element(v, environ=environ)
|
|
57
|
-
else:
|
|
58
|
-
new_dict[k] = v
|
|
59
|
-
return new_dict
|
|
23
|
+
# TODO: This is kept around for backwards compatibility.
|
|
24
|
+
return EnvResolver(environ).resolve(config)
|
|
60
25
|
|
|
61
26
|
|
|
62
27
|
def list_referenced_env_variables(config: dict) -> Set[str]:
|
|
63
|
-
# This is
|
|
64
|
-
|
|
28
|
+
# TODO: This is kept around for backwards compatibility.
|
|
29
|
+
return EnvResolver(environ=os.environ).list_referenced_variables(config)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class EnvResolver:
|
|
33
|
+
def __init__(self, environ: Environ, strict_env_syntax: bool = False):
|
|
34
|
+
self.environ = environ
|
|
35
|
+
self.strict_env_syntax = strict_env_syntax
|
|
65
36
|
|
|
66
|
-
|
|
37
|
+
def resolve(self, config: dict) -> dict:
|
|
38
|
+
return self._resolve_dict(config)
|
|
67
39
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
40
|
+
@classmethod
|
|
41
|
+
def list_referenced_variables(
|
|
42
|
+
cls,
|
|
43
|
+
config: dict,
|
|
44
|
+
strict_env_syntax: bool = False,
|
|
45
|
+
) -> Set[str]:
|
|
46
|
+
# This is a bit of a hack, but expandvars does a bunch of escaping
|
|
47
|
+
# and other logic that we don't want to duplicate here.
|
|
73
48
|
|
|
74
|
-
|
|
75
|
-
mock.get.side_effect = mock_get_env
|
|
49
|
+
vars = set()
|
|
76
50
|
|
|
77
|
-
|
|
51
|
+
def mock_get_env(key: str, default: Optional[str] = None) -> str:
|
|
52
|
+
vars.add(key)
|
|
53
|
+
if default is not None:
|
|
54
|
+
return default
|
|
55
|
+
return "mocked_value"
|
|
56
|
+
|
|
57
|
+
mock = unittest.mock.MagicMock()
|
|
58
|
+
mock.get.side_effect = mock_get_env
|
|
59
|
+
|
|
60
|
+
resolver = EnvResolver(environ=mock, strict_env_syntax=strict_env_syntax)
|
|
61
|
+
resolver._resolve_dict(config)
|
|
62
|
+
|
|
63
|
+
return vars
|
|
64
|
+
|
|
65
|
+
def _resolve_element(self, element: str) -> str:
|
|
66
|
+
if re.search(r"(\$\{).+(\})", element):
|
|
67
|
+
return expand(element, nounset=True, environ=self.environ)
|
|
68
|
+
elif not self.strict_env_syntax and element.startswith("$"):
|
|
69
|
+
try:
|
|
70
|
+
return expand(element, nounset=True, environ=self.environ)
|
|
71
|
+
except UnboundVariable:
|
|
72
|
+
# TODO: This fallback is kept around for backwards compatibility, but
|
|
73
|
+
# doesn't make a ton of sense from first principles.
|
|
74
|
+
return element
|
|
75
|
+
else:
|
|
76
|
+
return element
|
|
78
77
|
|
|
79
|
-
|
|
78
|
+
def _resolve_list(self, ele_list: list) -> list:
|
|
79
|
+
new_v: list = []
|
|
80
|
+
for ele in ele_list:
|
|
81
|
+
if isinstance(ele, str):
|
|
82
|
+
new_v.append(self._resolve_element(ele))
|
|
83
|
+
elif isinstance(ele, list):
|
|
84
|
+
new_v.append(self._resolve_list(ele))
|
|
85
|
+
elif isinstance(ele, dict):
|
|
86
|
+
new_v.append(self._resolve_dict(ele))
|
|
87
|
+
else:
|
|
88
|
+
new_v.append(ele)
|
|
89
|
+
return new_v
|
|
90
|
+
|
|
91
|
+
def _resolve_dict(self, config: dict) -> dict:
|
|
92
|
+
new_dict: Dict[Any, Any] = {}
|
|
93
|
+
for k, v in config.items():
|
|
94
|
+
if isinstance(v, dict):
|
|
95
|
+
new_dict[k] = self._resolve_dict(v)
|
|
96
|
+
elif isinstance(v, list):
|
|
97
|
+
new_dict[k] = self._resolve_list(v)
|
|
98
|
+
elif isinstance(v, str):
|
|
99
|
+
new_dict[k] = self._resolve_element(v)
|
|
100
|
+
else:
|
|
101
|
+
new_dict[k] = v
|
|
102
|
+
return new_dict
|
|
80
103
|
|
|
81
104
|
|
|
82
105
|
WRITE_TO_FILE_DIRECTIVE_PREFIX = "__DATAHUB_TO_FILE_"
|
|
@@ -159,7 +182,7 @@ def load_config_file(
|
|
|
159
182
|
|
|
160
183
|
config = raw_config.copy()
|
|
161
184
|
if resolve_env_vars:
|
|
162
|
-
config =
|
|
185
|
+
config = EnvResolver(environ=os.environ).resolve(config)
|
|
163
186
|
if process_directives:
|
|
164
187
|
config = _process_directives(config)
|
|
165
188
|
|
datahub/configuration/git.py
CHANGED
|
@@ -121,9 +121,9 @@ class GitInfo(GitReference):
|
|
|
121
121
|
|
|
122
122
|
repo: str = values["repo"]
|
|
123
123
|
if repo.startswith(_GITHUB_PREFIX):
|
|
124
|
-
return f"git@github.com:{repo[len(_GITHUB_PREFIX):]}.git"
|
|
124
|
+
return f"git@github.com:{repo[len(_GITHUB_PREFIX) :]}.git"
|
|
125
125
|
elif repo.startswith(_GITLAB_PREFIX):
|
|
126
|
-
return f"git@gitlab.com:{repo[len(_GITLAB_PREFIX):]}.git"
|
|
126
|
+
return f"git@gitlab.com:{repo[len(_GITLAB_PREFIX) :]}.git"
|
|
127
127
|
else:
|
|
128
128
|
raise ValueError(
|
|
129
129
|
"Unable to infer repo_ssh_locator from repo. Please set repo_ssh_locator manually."
|
|
@@ -47,7 +47,10 @@ class BaseTimeWindowConfig(ConfigModel):
|
|
|
47
47
|
default_factory=lambda: datetime.now(tz=timezone.utc),
|
|
48
48
|
description="Latest date of lineage/usage to consider. Default: Current time in UTC",
|
|
49
49
|
)
|
|
50
|
-
start_time: datetime = Field(
|
|
50
|
+
start_time: datetime = Field(
|
|
51
|
+
default=None,
|
|
52
|
+
description="Earliest date of lineage/usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`). You can also specify relative time with respect to end_time such as '-7 days' Or '-7d'.",
|
|
53
|
+
) # type: ignore
|
|
51
54
|
|
|
52
55
|
@pydantic.validator("start_time", pre=True, always=True)
|
|
53
56
|
def default_start_time(
|
|
@@ -63,12 +66,14 @@ class BaseTimeWindowConfig(ConfigModel):
|
|
|
63
66
|
# This is where start_time str is resolved to datetime
|
|
64
67
|
try:
|
|
65
68
|
delta = parse_relative_timespan(v)
|
|
66
|
-
assert delta < timedelta(
|
|
67
|
-
|
|
68
|
-
)
|
|
69
|
+
assert delta < timedelta(0), (
|
|
70
|
+
"Relative start time should start with minus sign (-) e.g. '-2 days'."
|
|
71
|
+
)
|
|
69
72
|
assert abs(delta) >= get_bucket_duration_delta(
|
|
70
73
|
values["bucket_duration"]
|
|
71
|
-
),
|
|
74
|
+
), (
|
|
75
|
+
"Relative start time should be in terms of configured bucket duration. e.g '-2 days' or '-2 hours'."
|
|
76
|
+
)
|
|
72
77
|
|
|
73
78
|
# The end_time's default value is not yet populated, in which case
|
|
74
79
|
# we can just manually generate it here.
|
datahub/emitter/mce_builder.py
CHANGED
|
@@ -88,13 +88,11 @@ def get_sys_time() -> int:
|
|
|
88
88
|
|
|
89
89
|
|
|
90
90
|
@overload
|
|
91
|
-
def make_ts_millis(ts: None) -> None:
|
|
92
|
-
...
|
|
91
|
+
def make_ts_millis(ts: None) -> None: ...
|
|
93
92
|
|
|
94
93
|
|
|
95
94
|
@overload
|
|
96
|
-
def make_ts_millis(ts: datetime) -> int:
|
|
97
|
-
...
|
|
95
|
+
def make_ts_millis(ts: datetime) -> int: ...
|
|
98
96
|
|
|
99
97
|
|
|
100
98
|
def make_ts_millis(ts: Optional[datetime]) -> Optional[int]:
|
|
@@ -105,13 +103,11 @@ def make_ts_millis(ts: Optional[datetime]) -> Optional[int]:
|
|
|
105
103
|
|
|
106
104
|
|
|
107
105
|
@overload
|
|
108
|
-
def parse_ts_millis(ts: float) -> datetime:
|
|
109
|
-
...
|
|
106
|
+
def parse_ts_millis(ts: float) -> datetime: ...
|
|
110
107
|
|
|
111
108
|
|
|
112
109
|
@overload
|
|
113
|
-
def parse_ts_millis(ts: None) -> None:
|
|
114
|
-
...
|
|
110
|
+
def parse_ts_millis(ts: None) -> None: ...
|
|
115
111
|
|
|
116
112
|
|
|
117
113
|
def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
|
|
@@ -55,15 +55,9 @@ def convert_chart_info_to_patch(
|
|
|
55
55
|
aspect.externalUrl
|
|
56
56
|
).set_type(aspect.type).set_title(aspect.title).set_access(
|
|
57
57
|
aspect.access
|
|
58
|
-
).set_last_modified(
|
|
59
|
-
aspect.lastModified
|
|
60
|
-
).set_last_refreshed(
|
|
58
|
+
).set_last_modified(aspect.lastModified).set_last_refreshed(
|
|
61
59
|
aspect.lastRefreshed
|
|
62
|
-
).set_description(
|
|
63
|
-
aspect.description
|
|
64
|
-
).add_inputs(
|
|
65
|
-
aspect.inputs
|
|
66
|
-
)
|
|
60
|
+
).set_description(aspect.description).add_inputs(aspect.inputs)
|
|
67
61
|
|
|
68
62
|
values = patch_builder.build()
|
|
69
63
|
if values:
|
datahub/ingestion/api/report.py
CHANGED
|
@@ -48,7 +48,7 @@ logger = logging.getLogger(__name__)
|
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
def auto_workunit(
|
|
51
|
-
stream: Iterable[Union[MetadataChangeEventClass, MetadataChangeProposalWrapper]]
|
|
51
|
+
stream: Iterable[Union[MetadataChangeEventClass, MetadataChangeProposalWrapper]],
|
|
52
52
|
) -> Iterable[MetadataWorkUnit]:
|
|
53
53
|
"""Convert a stream of MCEs and MCPs to a stream of :class:`MetadataWorkUnit`s."""
|
|
54
54
|
|
|
@@ -131,9 +131,9 @@ class FieldPath:
|
|
|
131
131
|
for i, schema_type in enumerate(p.schema_types):
|
|
132
132
|
if schema_type == schema_str:
|
|
133
133
|
# return the corresponding type for the schema that's a match
|
|
134
|
-
assert (
|
|
135
|
-
len(p.type)
|
|
136
|
-
)
|
|
134
|
+
assert len(p.type) > i, (
|
|
135
|
+
f"p.type({len(p.type)})) and p.schema_types({len(p.schema_types)}) should have the same length"
|
|
136
|
+
)
|
|
137
137
|
return p.type[i]
|
|
138
138
|
return None
|
|
139
139
|
|
|
@@ -263,15 +263,13 @@ class AvroToMceSchemaConverter:
|
|
|
263
263
|
@overload
|
|
264
264
|
def _get_underlying_type_if_option_as_union(
|
|
265
265
|
schema: SchemaOrField, default: SchemaOrField
|
|
266
|
-
) -> SchemaOrField:
|
|
267
|
-
...
|
|
266
|
+
) -> SchemaOrField: ...
|
|
268
267
|
|
|
269
268
|
@staticmethod
|
|
270
269
|
@overload
|
|
271
270
|
def _get_underlying_type_if_option_as_union(
|
|
272
271
|
schema: SchemaOrField, default: Optional[SchemaOrField] = None
|
|
273
|
-
) -> Optional[SchemaOrField]:
|
|
274
|
-
...
|
|
272
|
+
) -> Optional[SchemaOrField]: ...
|
|
275
273
|
|
|
276
274
|
@staticmethod
|
|
277
275
|
def _get_underlying_type_if_option_as_union(
|
|
@@ -386,7 +384,7 @@ class AvroToMceSchemaConverter:
|
|
|
386
384
|
|
|
387
385
|
if "deprecated" in merged_props:
|
|
388
386
|
description = (
|
|
389
|
-
f
|
|
387
|
+
f'<span style="color:red">DEPRECATED: {merged_props["deprecated"]}</span>\n'
|
|
390
388
|
+ description
|
|
391
389
|
if description
|
|
392
390
|
else ""
|
datahub/ingestion/fs/s3_fs.py
CHANGED
|
@@ -17,9 +17,9 @@ def parse_s3_path(path: str) -> "S3Path":
|
|
|
17
17
|
|
|
18
18
|
def assert_ok_status(s3_response):
|
|
19
19
|
is_ok = s3_response["ResponseMetadata"]["HTTPStatusCode"] == 200
|
|
20
|
-
assert (
|
|
21
|
-
|
|
22
|
-
)
|
|
20
|
+
assert is_ok, (
|
|
21
|
+
f"Failed to fetch S3 object, error message: {s3_response['Error']['Message']}"
|
|
22
|
+
)
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
@dataclass
|
|
@@ -148,9 +148,9 @@ class DataHubClassifierConfig(ConfigModel):
|
|
|
148
148
|
weight,
|
|
149
149
|
) in custom_infotype_config.Prediction_Factors_and_Weights.dict().items():
|
|
150
150
|
if weight > 0:
|
|
151
|
-
assert (
|
|
152
|
-
|
|
153
|
-
)
|
|
151
|
+
assert getattr(custom_infotype_config, factor) is not None, (
|
|
152
|
+
f"Missing Configuration for Prediction Factor {factor} for Custom Info Type {custom_infotype}"
|
|
153
|
+
)
|
|
154
154
|
|
|
155
155
|
# Custom infotype supports only regex based prediction for column values
|
|
156
156
|
if custom_infotype_config.Prediction_Factors_and_Weights.Values > 0:
|
|
@@ -158,7 +158,9 @@ class DataHubClassifierConfig(ConfigModel):
|
|
|
158
158
|
assert (
|
|
159
159
|
custom_infotype_config.Values.prediction_type
|
|
160
160
|
== ValuePredictionType.REGEX
|
|
161
|
-
),
|
|
161
|
+
), (
|
|
162
|
+
f"Invalid Prediction Type for Values for Custom Info Type {custom_infotype}. Only `regex` is supported."
|
|
163
|
+
)
|
|
162
164
|
|
|
163
165
|
return info_types_config
|
|
164
166
|
|