acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
4
|
+
from typing_extensions import Self
|
|
5
|
+
|
|
6
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
7
|
+
from datahub.metadata.schema_classes import (
|
|
8
|
+
FineGrainedLineageClass as FineGrainedLineage,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HasFineGrainedLineagePatch(MetadataPatchProposal):
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def _fine_grained_lineage_location(self) -> Tuple[str, PatchPath]:
|
|
15
|
+
"""Return the aspect name where fine-grained lineage is stored."""
|
|
16
|
+
raise NotImplementedError("Subclasses must implement this method.")
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def _get_fine_grained_key(
|
|
20
|
+
fine_grained_lineage: FineGrainedLineage,
|
|
21
|
+
) -> Tuple[str, str, str]:
|
|
22
|
+
downstreams = fine_grained_lineage.downstreams or []
|
|
23
|
+
if len(downstreams) != 1:
|
|
24
|
+
raise TypeError("Cannot patch with more or less than one downstream.")
|
|
25
|
+
transform_op = fine_grained_lineage.transformOperation or "NONE"
|
|
26
|
+
downstream_urn = downstreams[0]
|
|
27
|
+
query_id = fine_grained_lineage.query or "NONE"
|
|
28
|
+
return transform_op, downstream_urn, query_id
|
|
29
|
+
|
|
30
|
+
def add_fine_grained_lineage(
|
|
31
|
+
self, fine_grained_lineage: FineGrainedLineage
|
|
32
|
+
) -> Self:
|
|
33
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
34
|
+
(
|
|
35
|
+
transform_op,
|
|
36
|
+
downstream_urn,
|
|
37
|
+
query_id,
|
|
38
|
+
) = self._get_fine_grained_key(fine_grained_lineage)
|
|
39
|
+
for upstream_urn in fine_grained_lineage.upstreams or []:
|
|
40
|
+
self._add_patch(
|
|
41
|
+
aspect_name,
|
|
42
|
+
"add",
|
|
43
|
+
path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
|
|
44
|
+
value={"confidenceScore": fine_grained_lineage.confidenceScore},
|
|
45
|
+
)
|
|
46
|
+
return self
|
|
47
|
+
|
|
48
|
+
def remove_fine_grained_lineage(
|
|
49
|
+
self, fine_grained_lineage: FineGrainedLineage
|
|
50
|
+
) -> Self:
|
|
51
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
52
|
+
(
|
|
53
|
+
transform_op,
|
|
54
|
+
downstream_urn,
|
|
55
|
+
query_id,
|
|
56
|
+
) = self._get_fine_grained_key(fine_grained_lineage)
|
|
57
|
+
for upstream_urn in fine_grained_lineage.upstreams or []:
|
|
58
|
+
self._add_patch(
|
|
59
|
+
aspect_name,
|
|
60
|
+
"remove",
|
|
61
|
+
path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
|
|
62
|
+
value={},
|
|
63
|
+
)
|
|
64
|
+
return self
|
|
65
|
+
|
|
66
|
+
def set_fine_grained_lineages(
|
|
67
|
+
self, fine_grained_lineages: List[FineGrainedLineage]
|
|
68
|
+
) -> Self:
|
|
69
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
70
|
+
self._add_patch(
|
|
71
|
+
aspect_name,
|
|
72
|
+
"add",
|
|
73
|
+
path=path,
|
|
74
|
+
value=fine_grained_lineages,
|
|
75
|
+
)
|
|
76
|
+
return self
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Self
|
|
4
|
+
|
|
5
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
6
|
+
from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class HasSiblingsPatch(MetadataPatchProposal):
|
|
10
|
+
def add_sibling(self, sibling_urn: str, primary: bool = False) -> Self:
|
|
11
|
+
"""Add a sibling relationship to the entity.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
sibling_urn: The URN of the sibling entity to add.
|
|
15
|
+
primary: Whether this entity should be marked as primary in the relationship.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
The patch builder instance.
|
|
19
|
+
"""
|
|
20
|
+
self._add_patch(
|
|
21
|
+
Siblings.ASPECT_NAME,
|
|
22
|
+
"add",
|
|
23
|
+
path=("siblings", sibling_urn),
|
|
24
|
+
value=sibling_urn,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Set primary flag if specified
|
|
28
|
+
if primary:
|
|
29
|
+
self._add_patch(
|
|
30
|
+
Siblings.ASPECT_NAME,
|
|
31
|
+
"add",
|
|
32
|
+
path=("primary",),
|
|
33
|
+
value=primary,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
return self
|
|
37
|
+
|
|
38
|
+
def remove_sibling(self, sibling_urn: str) -> Self:
|
|
39
|
+
"""Remove a sibling relationship from the entity.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
sibling_urn: The URN of the sibling entity to remove.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
The patch builder instance.
|
|
46
|
+
"""
|
|
47
|
+
self._add_patch(
|
|
48
|
+
Siblings.ASPECT_NAME,
|
|
49
|
+
"remove",
|
|
50
|
+
path=("siblings", sibling_urn),
|
|
51
|
+
value={},
|
|
52
|
+
)
|
|
53
|
+
return self
|
|
54
|
+
|
|
55
|
+
def set_siblings(self, sibling_urns: List[str], primary: bool = False) -> Self:
|
|
56
|
+
"""Set the complete list of siblings for the entity.
|
|
57
|
+
|
|
58
|
+
This will replace all existing siblings with the new list.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
sibling_urns: The list of sibling URNs to set.
|
|
62
|
+
primary: Whether this entity should be marked as primary.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
The patch builder instance.
|
|
66
|
+
"""
|
|
67
|
+
self._add_patch(
|
|
68
|
+
Siblings.ASPECT_NAME, "add", path=("siblings",), value=sibling_urns
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
self._add_patch(Siblings.ASPECT_NAME, "add", path=("primary",), value=primary)
|
|
72
|
+
|
|
73
|
+
return self
|
|
@@ -70,3 +70,30 @@ class HasStructuredPropertiesPatch(MetadataPatchProposal):
|
|
|
70
70
|
),
|
|
71
71
|
)
|
|
72
72
|
return self
|
|
73
|
+
|
|
74
|
+
def set_structured_property_manual(
|
|
75
|
+
self, property: StructuredPropertyValueAssignmentClass
|
|
76
|
+
) -> Self:
|
|
77
|
+
"""Add or update a structured property, using a StructuredPropertyValueAssignmentClass object."""
|
|
78
|
+
|
|
79
|
+
self.remove_structured_property(property.propertyUrn)
|
|
80
|
+
self._add_patch(
|
|
81
|
+
StructuredPropertiesClass.ASPECT_NAME,
|
|
82
|
+
"add",
|
|
83
|
+
path=("properties", property.propertyUrn),
|
|
84
|
+
value=property,
|
|
85
|
+
)
|
|
86
|
+
return self
|
|
87
|
+
|
|
88
|
+
def add_structured_property_manual(
|
|
89
|
+
self, property: StructuredPropertyValueAssignmentClass
|
|
90
|
+
) -> Self:
|
|
91
|
+
"""Add a structured property, using a StructuredPropertyValueAssignmentClass object."""
|
|
92
|
+
|
|
93
|
+
self._add_patch(
|
|
94
|
+
StructuredPropertiesClass.ASPECT_NAME,
|
|
95
|
+
"add",
|
|
96
|
+
path=("properties", property.propertyUrn),
|
|
97
|
+
value=property,
|
|
98
|
+
)
|
|
99
|
+
return self
|
datahub/specific/chart.py
CHANGED
datahub/specific/datajob.py
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
|
-
from typing import List, Optional, Tuple, Union
|
|
1
|
+
from typing import List, Optional, Set, Tuple, Union
|
|
2
2
|
|
|
3
3
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
4
4
|
from datahub.metadata.schema_classes import (
|
|
5
5
|
DataJobInfoClass as DataJobInfo,
|
|
6
6
|
DataJobInputOutputClass as DataJobInputOutput,
|
|
7
7
|
EdgeClass as Edge,
|
|
8
|
+
FineGrainedLineageClass as FineGrainedLineage,
|
|
8
9
|
KafkaAuditHeaderClass,
|
|
9
10
|
SystemMetadataClass,
|
|
10
11
|
)
|
|
11
12
|
from datahub.metadata.urns import SchemaFieldUrn, Urn
|
|
12
13
|
from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
|
|
14
|
+
from datahub.specific.aspect_helpers.fine_grained_lineage import (
|
|
15
|
+
HasFineGrainedLineagePatch,
|
|
16
|
+
)
|
|
13
17
|
from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
|
|
14
18
|
from datahub.specific.aspect_helpers.tags import HasTagsPatch
|
|
15
19
|
from datahub.specific.aspect_helpers.terms import HasTermsPatch
|
|
@@ -20,6 +24,7 @@ class DataJobPatchBuilder(
|
|
|
20
24
|
HasCustomPropertiesPatch,
|
|
21
25
|
HasTagsPatch,
|
|
22
26
|
HasTermsPatch,
|
|
27
|
+
HasFineGrainedLineagePatch,
|
|
23
28
|
MetadataPatchProposal,
|
|
24
29
|
):
|
|
25
30
|
def __init__(
|
|
@@ -40,10 +45,19 @@ class DataJobPatchBuilder(
|
|
|
40
45
|
urn, system_metadata=system_metadata, audit_header=audit_header
|
|
41
46
|
)
|
|
42
47
|
|
|
48
|
+
# Track fine-grained lineages for DataJob-specific handling
|
|
49
|
+
self._fine_grained_lineages_to_add: List[FineGrainedLineage] = []
|
|
50
|
+
self._fine_grained_lineage_keys_to_remove: Set[Tuple[str, str, str]] = set()
|
|
51
|
+
self._fine_grained_lineages_set: Optional[List[FineGrainedLineage]] = None
|
|
52
|
+
|
|
43
53
|
@classmethod
|
|
44
54
|
def _custom_properties_location(cls) -> Tuple[str, PatchPath]:
|
|
45
55
|
return DataJobInfo.ASPECT_NAME, ("customProperties",)
|
|
46
56
|
|
|
57
|
+
@classmethod
|
|
58
|
+
def _fine_grained_lineage_location(cls) -> Tuple[str, PatchPath]:
|
|
59
|
+
return DataJobInputOutput.ASPECT_NAME, ("fineGrainedLineages",)
|
|
60
|
+
|
|
47
61
|
def add_input_datajob(self, input: Union[Edge, Urn, str]) -> "DataJobPatchBuilder":
|
|
48
62
|
"""
|
|
49
63
|
Adds an input data job to the DataJobPatchBuilder.
|
datahub/specific/dataproduct.py
CHANGED
|
@@ -9,6 +9,9 @@ from datahub.metadata.schema_classes import (
|
|
|
9
9
|
)
|
|
10
10
|
from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
|
|
11
11
|
from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
|
|
12
|
+
from datahub.specific.aspect_helpers.structured_properties import (
|
|
13
|
+
HasStructuredPropertiesPatch,
|
|
14
|
+
)
|
|
12
15
|
from datahub.specific.aspect_helpers.tags import HasTagsPatch
|
|
13
16
|
from datahub.specific.aspect_helpers.terms import HasTermsPatch
|
|
14
17
|
|
|
@@ -16,6 +19,7 @@ from datahub.specific.aspect_helpers.terms import HasTermsPatch
|
|
|
16
19
|
class DataProductPatchBuilder(
|
|
17
20
|
HasOwnershipPatch,
|
|
18
21
|
HasCustomPropertiesPatch,
|
|
22
|
+
HasStructuredPropertiesPatch,
|
|
19
23
|
HasTagsPatch,
|
|
20
24
|
HasTermsPatch,
|
|
21
25
|
MetadataPatchProposal,
|
datahub/specific/dataset.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from typing import Generic, List, Optional, Tuple, TypeVar, Union
|
|
2
3
|
|
|
3
4
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
@@ -17,7 +18,11 @@ from datahub.metadata.schema_classes import (
|
|
|
17
18
|
)
|
|
18
19
|
from datahub.metadata.urns import DatasetUrn, TagUrn, Urn
|
|
19
20
|
from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
|
|
21
|
+
from datahub.specific.aspect_helpers.fine_grained_lineage import (
|
|
22
|
+
HasFineGrainedLineagePatch,
|
|
23
|
+
)
|
|
20
24
|
from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
|
|
25
|
+
from datahub.specific.aspect_helpers.siblings import HasSiblingsPatch
|
|
21
26
|
from datahub.specific.aspect_helpers.structured_properties import (
|
|
22
27
|
HasStructuredPropertiesPatch,
|
|
23
28
|
)
|
|
@@ -99,6 +104,8 @@ class DatasetPatchBuilder(
|
|
|
99
104
|
HasStructuredPropertiesPatch,
|
|
100
105
|
HasTagsPatch,
|
|
101
106
|
HasTermsPatch,
|
|
107
|
+
HasFineGrainedLineagePatch,
|
|
108
|
+
HasSiblingsPatch,
|
|
102
109
|
MetadataPatchProposal,
|
|
103
110
|
):
|
|
104
111
|
def __init__(
|
|
@@ -115,6 +122,10 @@ class DatasetPatchBuilder(
|
|
|
115
122
|
def _custom_properties_location(cls) -> Tuple[str, PatchPath]:
|
|
116
123
|
return DatasetProperties.ASPECT_NAME, ("customProperties",)
|
|
117
124
|
|
|
125
|
+
@classmethod
|
|
126
|
+
def _fine_grained_lineage_location(cls) -> Tuple[str, PatchPath]:
|
|
127
|
+
return UpstreamLineage.ASPECT_NAME, ("fineGrainedLineages",)
|
|
128
|
+
|
|
118
129
|
def add_upstream_lineage(self, upstream: Upstream) -> "DatasetPatchBuilder":
|
|
119
130
|
self._add_patch(
|
|
120
131
|
UpstreamLineage.ASPECT_NAME,
|
|
@@ -144,75 +155,44 @@ class DatasetPatchBuilder(
|
|
|
144
155
|
def add_fine_grained_upstream_lineage(
|
|
145
156
|
self, fine_grained_lineage: FineGrainedLineage
|
|
146
157
|
) -> "DatasetPatchBuilder":
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
"add",
|
|
156
|
-
path=self._build_fine_grained_path(
|
|
157
|
-
transform_op, downstream_urn, query_id, upstream_urn
|
|
158
|
-
),
|
|
159
|
-
value={"confidenceScore": fine_grained_lineage.confidenceScore},
|
|
160
|
-
)
|
|
161
|
-
return self
|
|
162
|
-
|
|
163
|
-
@staticmethod
|
|
164
|
-
def get_fine_grained_key(
|
|
165
|
-
fine_grained_lineage: FineGrainedLineage,
|
|
166
|
-
) -> Tuple[str, str, str]:
|
|
167
|
-
downstreams = fine_grained_lineage.downstreams or []
|
|
168
|
-
if len(downstreams) != 1:
|
|
169
|
-
raise TypeError("Cannot patch with more or less than one downstream.")
|
|
170
|
-
transform_op = fine_grained_lineage.transformOperation or "NONE"
|
|
171
|
-
downstream_urn = downstreams[0]
|
|
172
|
-
query_id = fine_grained_lineage.query or "NONE"
|
|
173
|
-
return transform_op, downstream_urn, query_id
|
|
174
|
-
|
|
175
|
-
@classmethod
|
|
176
|
-
def _build_fine_grained_path(
|
|
177
|
-
cls, transform_op: str, downstream_urn: str, query_id: str, upstream_urn: str
|
|
178
|
-
) -> PatchPath:
|
|
179
|
-
return (
|
|
180
|
-
"fineGrainedLineages",
|
|
181
|
-
transform_op,
|
|
182
|
-
downstream_urn,
|
|
183
|
-
query_id,
|
|
184
|
-
upstream_urn,
|
|
158
|
+
"""
|
|
159
|
+
Deprecated: Use `add_fine_grained_lineage` instead.
|
|
160
|
+
"""
|
|
161
|
+
warnings.warn(
|
|
162
|
+
"add_fine_grained_upstream_lineage() is deprecated."
|
|
163
|
+
" Use add_fine_grained_lineage() instead.",
|
|
164
|
+
DeprecationWarning,
|
|
165
|
+
stacklevel=2,
|
|
185
166
|
)
|
|
167
|
+
return self.add_fine_grained_lineage(fine_grained_lineage)
|
|
186
168
|
|
|
187
169
|
def remove_fine_grained_upstream_lineage(
|
|
188
170
|
self, fine_grained_lineage: FineGrainedLineage
|
|
189
171
|
) -> "DatasetPatchBuilder":
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
transform_op, downstream_urn, query_id, upstream_urn
|
|
201
|
-
),
|
|
202
|
-
value={},
|
|
203
|
-
)
|
|
204
|
-
return self
|
|
172
|
+
"""
|
|
173
|
+
Deprecated: Use `remove_fine_grained_lineage` instead.
|
|
174
|
+
"""
|
|
175
|
+
warnings.warn(
|
|
176
|
+
"remove_fine_grained_upstream_lineage() is deprecated."
|
|
177
|
+
" Use remove_fine_grained_lineage() instead.",
|
|
178
|
+
DeprecationWarning,
|
|
179
|
+
stacklevel=2,
|
|
180
|
+
)
|
|
181
|
+
return self.remove_fine_grained_lineage(fine_grained_lineage)
|
|
205
182
|
|
|
206
183
|
def set_fine_grained_upstream_lineages(
|
|
207
184
|
self, fine_grained_lineages: List[FineGrainedLineage]
|
|
208
185
|
) -> "DatasetPatchBuilder":
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
186
|
+
"""
|
|
187
|
+
Deprecated: Use `set_fine_grained_lineages` instead.
|
|
188
|
+
"""
|
|
189
|
+
warnings.warn(
|
|
190
|
+
"set_fine_grained_upstream_lineages() is deprecated."
|
|
191
|
+
" Use set_fine_grained_lineages() instead.",
|
|
192
|
+
DeprecationWarning,
|
|
193
|
+
stacklevel=2,
|
|
214
194
|
)
|
|
215
|
-
return self
|
|
195
|
+
return self.set_fine_grained_lineages(fine_grained_lineages)
|
|
216
196
|
|
|
217
197
|
def for_field(
|
|
218
198
|
self, field_path: str, editable: bool = True
|
|
@@ -292,3 +272,15 @@ class DatasetPatchBuilder(
|
|
|
292
272
|
value=timestamp,
|
|
293
273
|
)
|
|
294
274
|
return self
|
|
275
|
+
|
|
276
|
+
def set_external_url(
|
|
277
|
+
self, external_url: Optional[str] = None
|
|
278
|
+
) -> "DatasetPatchBuilder":
|
|
279
|
+
if external_url is not None:
|
|
280
|
+
self._add_patch(
|
|
281
|
+
DatasetProperties.ASPECT_NAME,
|
|
282
|
+
"add",
|
|
283
|
+
path=("externalUrl",),
|
|
284
|
+
value=external_url,
|
|
285
|
+
)
|
|
286
|
+
return self
|
|
@@ -163,8 +163,7 @@ def _patch_lineage() -> None:
|
|
|
163
163
|
- source_columns = set(find_all_in_scope(select, exp.Column))
|
|
164
164
|
+ source_columns = list(find_all_in_scope(select, exp.Column))
|
|
165
165
|
|
|
166
|
-
|
|
167
|
-
+ # If the source is a UDTF find columns used in the UDTF to generate the table
|
|
166
|
+
# If the source is a UDTF find columns used in the UDTF to generate the table
|
|
168
167
|
+ source = scope.expression
|
|
169
168
|
if isinstance(source, exp.UDTF):
|
|
170
169
|
- source_columns |= set(source.find_all(exp.Column))
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import re
|
|
2
3
|
from enum import Enum
|
|
3
4
|
from typing import Iterator, List, Tuple
|
|
4
5
|
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
5
7
|
SELECT_KEYWORD = "SELECT"
|
|
6
8
|
CASE_KEYWORD = "CASE"
|
|
7
9
|
END_KEYWORD = "END"
|
|
@@ -50,6 +52,7 @@ class ParserState(Enum):
|
|
|
50
52
|
STRING = 2
|
|
51
53
|
COMMENT = 3
|
|
52
54
|
MULTILINE_COMMENT = 4
|
|
55
|
+
BRACKETED_IDENTIFIER = 5
|
|
53
56
|
|
|
54
57
|
|
|
55
58
|
class _StatementSplitter:
|
|
@@ -120,7 +123,9 @@ class _StatementSplitter:
|
|
|
120
123
|
# Reset current_statement-specific state.
|
|
121
124
|
self.does_select_mean_new_statement = False
|
|
122
125
|
if self.current_case_statements != 0:
|
|
123
|
-
|
|
126
|
+
logger.warning(
|
|
127
|
+
f"Unexpected END keyword. Current case statements: {self.current_case_statements}"
|
|
128
|
+
)
|
|
124
129
|
self.current_case_statements = 0
|
|
125
130
|
|
|
126
131
|
def process(self) -> Iterator[str]:
|
|
@@ -137,6 +142,10 @@ class _StatementSplitter:
|
|
|
137
142
|
self.state = ParserState.STRING
|
|
138
143
|
self.current_statement.append(c)
|
|
139
144
|
prev_real_char = c
|
|
145
|
+
elif c == "[":
|
|
146
|
+
self.state = ParserState.BRACKETED_IDENTIFIER
|
|
147
|
+
self.current_statement.append(c)
|
|
148
|
+
prev_real_char = c
|
|
140
149
|
elif c == "-" and next_char == "-":
|
|
141
150
|
self.state = ParserState.COMMENT
|
|
142
151
|
self.current_statement.append(c)
|
|
@@ -168,6 +177,14 @@ class _StatementSplitter:
|
|
|
168
177
|
elif c == "'":
|
|
169
178
|
self.state = ParserState.NORMAL
|
|
170
179
|
|
|
180
|
+
elif self.state == ParserState.BRACKETED_IDENTIFIER:
|
|
181
|
+
self.current_statement.append(c)
|
|
182
|
+
if c == "]" and next_char == "]":
|
|
183
|
+
self.current_statement.append(next_char)
|
|
184
|
+
self.i += 1
|
|
185
|
+
elif c == "]":
|
|
186
|
+
self.state = ParserState.NORMAL
|
|
187
|
+
|
|
171
188
|
elif self.state == ParserState.COMMENT:
|
|
172
189
|
self.current_statement.append(c)
|
|
173
190
|
if c == "\n":
|
|
@@ -233,8 +250,10 @@ class _StatementSplitter:
|
|
|
233
250
|
),
|
|
234
251
|
)
|
|
235
252
|
if (
|
|
236
|
-
is_force_new_statement_keyword
|
|
237
|
-
|
|
253
|
+
is_force_new_statement_keyword
|
|
254
|
+
and not self._has_preceding_cte(most_recent_real_char)
|
|
255
|
+
and not self._is_part_of_merge_query()
|
|
256
|
+
):
|
|
238
257
|
# Force termination of current statement
|
|
239
258
|
yield from self._yield_if_complete()
|
|
240
259
|
|
|
@@ -247,6 +266,14 @@ class _StatementSplitter:
|
|
|
247
266
|
else:
|
|
248
267
|
self.current_statement.append(c)
|
|
249
268
|
|
|
269
|
+
def _has_preceding_cte(self, most_recent_real_char: str) -> bool:
|
|
270
|
+
# usually we'd have a close paren that closes a CTE
|
|
271
|
+
return most_recent_real_char == ")"
|
|
272
|
+
|
|
273
|
+
def _is_part_of_merge_query(self) -> bool:
|
|
274
|
+
# In merge statement we'd have `when matched then` or `when not matched then"
|
|
275
|
+
return "".join(self.current_statement).strip().lower().endswith("then")
|
|
276
|
+
|
|
250
277
|
|
|
251
278
|
def split_statements(sql: str) -> Iterator[str]:
|
|
252
279
|
"""
|