acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -3,7 +3,9 @@ from abc import ABC, abstractmethod
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from typing import Dict, List, Optional, Tuple, Type, cast
|
|
5
5
|
|
|
6
|
+
import sqlglot
|
|
6
7
|
from lark import Tree
|
|
8
|
+
from sqlglot import ParseError, expressions as exp
|
|
7
9
|
|
|
8
10
|
from datahub.configuration.source_common import PlatformDetail
|
|
9
11
|
from datahub.emitter import mce_builder as builder
|
|
@@ -29,8 +31,20 @@ from datahub.ingestion.source.powerbi.m_query.data_classes import (
|
|
|
29
31
|
Lineage,
|
|
30
32
|
ReferencedTable,
|
|
31
33
|
)
|
|
34
|
+
from datahub.ingestion.source.powerbi.m_query.odbc import (
|
|
35
|
+
extract_dsn,
|
|
36
|
+
extract_platform,
|
|
37
|
+
extract_server,
|
|
38
|
+
normalize_platform_name,
|
|
39
|
+
)
|
|
32
40
|
from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import Table
|
|
33
|
-
from datahub.
|
|
41
|
+
from datahub.metadata.schema_classes import SchemaFieldDataTypeClass
|
|
42
|
+
from datahub.sql_parsing.sqlglot_lineage import (
|
|
43
|
+
ColumnLineageInfo,
|
|
44
|
+
ColumnRef,
|
|
45
|
+
DownstreamColumnRef,
|
|
46
|
+
SqlParsingResult,
|
|
47
|
+
)
|
|
34
48
|
|
|
35
49
|
logger = logging.getLogger(__name__)
|
|
36
50
|
|
|
@@ -149,6 +163,7 @@ class AbstractLineage(ABC):
|
|
|
149
163
|
tree_function.token_values(arg_list)
|
|
150
164
|
),
|
|
151
165
|
)
|
|
166
|
+
logger.debug(f"DB Details: {arguments}")
|
|
152
167
|
|
|
153
168
|
if len(arguments) < 2:
|
|
154
169
|
logger.debug(f"Expected minimum 2 arguments, but got {len(arguments)}")
|
|
@@ -196,15 +211,34 @@ class AbstractLineage(ABC):
|
|
|
196
211
|
|
|
197
212
|
return None
|
|
198
213
|
|
|
214
|
+
@staticmethod
|
|
215
|
+
def is_sql_query(query: Optional[str]) -> bool:
|
|
216
|
+
if not query:
|
|
217
|
+
return False
|
|
218
|
+
query = native_sql_parser.remove_special_characters(query)
|
|
219
|
+
try:
|
|
220
|
+
expression = sqlglot.parse_one(query)
|
|
221
|
+
return isinstance(expression, exp.Select)
|
|
222
|
+
except (ParseError, Exception):
|
|
223
|
+
logger.debug(f"Failed to parse query as SQL: {query}")
|
|
224
|
+
return False
|
|
225
|
+
|
|
199
226
|
def parse_custom_sql(
|
|
200
|
-
self,
|
|
227
|
+
self,
|
|
228
|
+
query: str,
|
|
229
|
+
server: str,
|
|
230
|
+
database: Optional[str],
|
|
231
|
+
schema: Optional[str],
|
|
232
|
+
platform_pair: Optional[DataPlatformPair] = None,
|
|
201
233
|
) -> Lineage:
|
|
202
234
|
dataplatform_tables: List[DataPlatformTable] = []
|
|
235
|
+
if not platform_pair:
|
|
236
|
+
platform_pair = self.get_platform_pair()
|
|
203
237
|
|
|
204
238
|
platform_detail: PlatformDetail = (
|
|
205
239
|
self.platform_instance_resolver.get_platform_instance(
|
|
206
240
|
PowerBIPlatformDetail(
|
|
207
|
-
data_platform_pair=
|
|
241
|
+
data_platform_pair=platform_pair,
|
|
208
242
|
data_platform_server=server,
|
|
209
243
|
)
|
|
210
244
|
)
|
|
@@ -218,7 +252,7 @@ class AbstractLineage(ABC):
|
|
|
218
252
|
native_sql_parser.parse_custom_sql(
|
|
219
253
|
ctx=self.ctx,
|
|
220
254
|
query=query,
|
|
221
|
-
platform=
|
|
255
|
+
platform=platform_pair.datahub_data_platform_name,
|
|
222
256
|
platform_instance=platform_detail.platform_instance,
|
|
223
257
|
env=platform_detail.env,
|
|
224
258
|
database=database,
|
|
@@ -245,7 +279,7 @@ class AbstractLineage(ABC):
|
|
|
245
279
|
for urn in parsed_result.in_tables:
|
|
246
280
|
dataplatform_tables.append(
|
|
247
281
|
DataPlatformTable(
|
|
248
|
-
data_platform_pair=
|
|
282
|
+
data_platform_pair=platform_pair,
|
|
249
283
|
urn=urn,
|
|
250
284
|
)
|
|
251
285
|
)
|
|
@@ -262,6 +296,33 @@ class AbstractLineage(ABC):
|
|
|
262
296
|
),
|
|
263
297
|
)
|
|
264
298
|
|
|
299
|
+
def create_table_column_lineage(self, urn: str) -> List[ColumnLineageInfo]:
|
|
300
|
+
column_lineage = []
|
|
301
|
+
|
|
302
|
+
if self.table.columns is not None:
|
|
303
|
+
for column in self.table.columns:
|
|
304
|
+
downstream = DownstreamColumnRef(
|
|
305
|
+
table=self.table.name,
|
|
306
|
+
column=column.name,
|
|
307
|
+
column_type=SchemaFieldDataTypeClass(type=column.datahubDataType),
|
|
308
|
+
native_column_type=column.dataType or "UNKNOWN",
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
upstreams = [
|
|
312
|
+
ColumnRef(
|
|
313
|
+
table=urn,
|
|
314
|
+
column=column.name.lower(),
|
|
315
|
+
)
|
|
316
|
+
]
|
|
317
|
+
|
|
318
|
+
column_lineage_info = ColumnLineageInfo(
|
|
319
|
+
downstream=downstream, upstreams=upstreams
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
column_lineage.append(column_lineage_info)
|
|
323
|
+
|
|
324
|
+
return column_lineage
|
|
325
|
+
|
|
265
326
|
|
|
266
327
|
class AmazonRedshiftLineage(AbstractLineage):
|
|
267
328
|
def get_platform_pair(self) -> DataPlatformPair:
|
|
@@ -299,6 +360,8 @@ class AmazonRedshiftLineage(AbstractLineage):
|
|
|
299
360
|
qualified_table_name=qualified_table_name,
|
|
300
361
|
)
|
|
301
362
|
|
|
363
|
+
column_lineage = self.create_table_column_lineage(urn)
|
|
364
|
+
|
|
302
365
|
return Lineage(
|
|
303
366
|
upstreams=[
|
|
304
367
|
DataPlatformTable(
|
|
@@ -306,7 +369,7 @@ class AmazonRedshiftLineage(AbstractLineage):
|
|
|
306
369
|
urn=urn,
|
|
307
370
|
)
|
|
308
371
|
],
|
|
309
|
-
column_lineage=
|
|
372
|
+
column_lineage=column_lineage,
|
|
310
373
|
)
|
|
311
374
|
|
|
312
375
|
|
|
@@ -364,6 +427,8 @@ class OracleLineage(AbstractLineage):
|
|
|
364
427
|
qualified_table_name=qualified_table_name,
|
|
365
428
|
)
|
|
366
429
|
|
|
430
|
+
column_lineage = self.create_table_column_lineage(urn)
|
|
431
|
+
|
|
367
432
|
return Lineage(
|
|
368
433
|
upstreams=[
|
|
369
434
|
DataPlatformTable(
|
|
@@ -371,7 +436,7 @@ class OracleLineage(AbstractLineage):
|
|
|
371
436
|
urn=urn,
|
|
372
437
|
)
|
|
373
438
|
],
|
|
374
|
-
column_lineage=
|
|
439
|
+
column_lineage=column_lineage,
|
|
375
440
|
)
|
|
376
441
|
|
|
377
442
|
|
|
@@ -449,6 +514,8 @@ class DatabricksLineage(AbstractLineage):
|
|
|
449
514
|
qualified_table_name=qualified_table_name,
|
|
450
515
|
)
|
|
451
516
|
|
|
517
|
+
column_lineage = self.create_table_column_lineage(urn)
|
|
518
|
+
|
|
452
519
|
return Lineage(
|
|
453
520
|
upstreams=[
|
|
454
521
|
DataPlatformTable(
|
|
@@ -456,7 +523,7 @@ class DatabricksLineage(AbstractLineage):
|
|
|
456
523
|
urn=urn,
|
|
457
524
|
)
|
|
458
525
|
],
|
|
459
|
-
column_lineage=
|
|
526
|
+
column_lineage=column_lineage,
|
|
460
527
|
)
|
|
461
528
|
|
|
462
529
|
return Lineage.empty()
|
|
@@ -509,6 +576,9 @@ class TwoStepDataAccessPattern(AbstractLineage, ABC):
|
|
|
509
576
|
server=server,
|
|
510
577
|
qualified_table_name=qualified_table_name,
|
|
511
578
|
)
|
|
579
|
+
|
|
580
|
+
column_lineage = self.create_table_column_lineage(urn)
|
|
581
|
+
|
|
512
582
|
return Lineage(
|
|
513
583
|
upstreams=[
|
|
514
584
|
DataPlatformTable(
|
|
@@ -516,10 +586,62 @@ class TwoStepDataAccessPattern(AbstractLineage, ABC):
|
|
|
516
586
|
urn=urn,
|
|
517
587
|
)
|
|
518
588
|
],
|
|
519
|
-
column_lineage=
|
|
589
|
+
column_lineage=column_lineage,
|
|
520
590
|
)
|
|
521
591
|
|
|
522
592
|
|
|
593
|
+
class MySQLLineage(AbstractLineage):
|
|
594
|
+
def create_lineage(
|
|
595
|
+
self, data_access_func_detail: DataAccessFunctionDetail
|
|
596
|
+
) -> Lineage:
|
|
597
|
+
logger.debug(
|
|
598
|
+
f"Processing {self.get_platform_pair().powerbi_data_platform_name} data-access function detail {data_access_func_detail}"
|
|
599
|
+
)
|
|
600
|
+
|
|
601
|
+
server, db_name = self.get_db_detail_from_argument(
|
|
602
|
+
data_access_func_detail.arg_list
|
|
603
|
+
)
|
|
604
|
+
if server is None or db_name is None:
|
|
605
|
+
return Lineage.empty() # Return an empty list
|
|
606
|
+
|
|
607
|
+
schema_name: str = cast(
|
|
608
|
+
IdentifierAccessor, data_access_func_detail.identifier_accessor
|
|
609
|
+
).items["Schema"]
|
|
610
|
+
|
|
611
|
+
table_name: str = cast(
|
|
612
|
+
IdentifierAccessor, data_access_func_detail.identifier_accessor
|
|
613
|
+
).items["Item"]
|
|
614
|
+
|
|
615
|
+
qualified_table_name: str = f"{schema_name}.{table_name}"
|
|
616
|
+
|
|
617
|
+
logger.debug(
|
|
618
|
+
f"Platform({self.get_platform_pair().datahub_data_platform_name}) qualified_table_name= {qualified_table_name}"
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
urn = make_urn(
|
|
622
|
+
config=self.config,
|
|
623
|
+
platform_instance_resolver=self.platform_instance_resolver,
|
|
624
|
+
data_platform_pair=self.get_platform_pair(),
|
|
625
|
+
server=server,
|
|
626
|
+
qualified_table_name=qualified_table_name,
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
column_lineage = self.create_table_column_lineage(urn)
|
|
630
|
+
|
|
631
|
+
return Lineage(
|
|
632
|
+
upstreams=[
|
|
633
|
+
DataPlatformTable(
|
|
634
|
+
data_platform_pair=self.get_platform_pair(),
|
|
635
|
+
urn=urn,
|
|
636
|
+
)
|
|
637
|
+
],
|
|
638
|
+
column_lineage=column_lineage,
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
def get_platform_pair(self) -> DataPlatformPair:
|
|
642
|
+
return SupportedDataPlatform.MYSQL.value
|
|
643
|
+
|
|
644
|
+
|
|
523
645
|
class PostgresLineage(TwoStepDataAccessPattern):
|
|
524
646
|
def create_lineage(
|
|
525
647
|
self, data_access_func_detail: DataAccessFunctionDetail
|
|
@@ -671,6 +793,8 @@ class ThreeStepDataAccessPattern(AbstractLineage, ABC):
|
|
|
671
793
|
qualified_table_name=qualified_table_name,
|
|
672
794
|
)
|
|
673
795
|
|
|
796
|
+
column_lineage = self.create_table_column_lineage(urn)
|
|
797
|
+
|
|
674
798
|
return Lineage(
|
|
675
799
|
upstreams=[
|
|
676
800
|
DataPlatformTable(
|
|
@@ -678,7 +802,7 @@ class ThreeStepDataAccessPattern(AbstractLineage, ABC):
|
|
|
678
802
|
urn=urn,
|
|
679
803
|
)
|
|
680
804
|
],
|
|
681
|
-
column_lineage=
|
|
805
|
+
column_lineage=column_lineage,
|
|
682
806
|
)
|
|
683
807
|
|
|
684
808
|
|
|
@@ -726,6 +850,7 @@ class NativeQueryLineage(AbstractLineage):
|
|
|
726
850
|
|
|
727
851
|
tables: List[str] = native_sql_parser.get_tables(query)
|
|
728
852
|
|
|
853
|
+
column_lineage = []
|
|
729
854
|
for qualified_table_name in tables:
|
|
730
855
|
if len(qualified_table_name.split(".")) != 3:
|
|
731
856
|
logger.debug(
|
|
@@ -748,12 +873,11 @@ class NativeQueryLineage(AbstractLineage):
|
|
|
748
873
|
)
|
|
749
874
|
)
|
|
750
875
|
|
|
876
|
+
column_lineage = self.create_table_column_lineage(urn)
|
|
877
|
+
|
|
751
878
|
logger.debug(f"Generated dataplatform_tables {dataplatform_tables}")
|
|
752
879
|
|
|
753
|
-
return Lineage(
|
|
754
|
-
upstreams=dataplatform_tables,
|
|
755
|
-
column_lineage=[],
|
|
756
|
-
)
|
|
880
|
+
return Lineage(upstreams=dataplatform_tables, column_lineage=column_lineage)
|
|
757
881
|
|
|
758
882
|
def get_db_name(self, data_access_tokens: List[str]) -> Optional[str]:
|
|
759
883
|
if (
|
|
@@ -844,6 +968,211 @@ class NativeQueryLineage(AbstractLineage):
|
|
|
844
968
|
)
|
|
845
969
|
|
|
846
970
|
|
|
971
|
+
class OdbcLineage(AbstractLineage):
|
|
972
|
+
def create_lineage(
|
|
973
|
+
self, data_access_func_detail: DataAccessFunctionDetail
|
|
974
|
+
) -> Lineage:
|
|
975
|
+
logger.debug(
|
|
976
|
+
f"Processing {self.get_platform_pair().powerbi_data_platform_name} "
|
|
977
|
+
f"data-access function detail {data_access_func_detail}"
|
|
978
|
+
)
|
|
979
|
+
|
|
980
|
+
connect_string, query = self.get_db_detail_from_argument(
|
|
981
|
+
data_access_func_detail.arg_list
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
if not connect_string:
|
|
985
|
+
self.reporter.warning(
|
|
986
|
+
title="Can not extract ODBC connect string",
|
|
987
|
+
message="Can not extract ODBC connect string from data access function. Skipping Lineage creation.",
|
|
988
|
+
context=f"table-name={self.table.full_name}, data-access-func-detail={data_access_func_detail}",
|
|
989
|
+
)
|
|
990
|
+
return Lineage.empty()
|
|
991
|
+
|
|
992
|
+
logger.debug(f"ODBC connect string: {connect_string}")
|
|
993
|
+
data_platform, powerbi_platform = extract_platform(connect_string)
|
|
994
|
+
server_name = extract_server(connect_string)
|
|
995
|
+
|
|
996
|
+
dsn = extract_dsn(connect_string)
|
|
997
|
+
if not dsn:
|
|
998
|
+
self.reporter.warning(
|
|
999
|
+
title="Can not determine ODBC DSN",
|
|
1000
|
+
message="Can not extract DSN from ODBC connect string. Skipping Lineage creation.",
|
|
1001
|
+
context=f"table-name={self.table.full_name}, connect-string={connect_string}",
|
|
1002
|
+
)
|
|
1003
|
+
return Lineage.empty()
|
|
1004
|
+
logger.debug(f"Extracted DSN: {dsn}")
|
|
1005
|
+
|
|
1006
|
+
if not data_platform:
|
|
1007
|
+
server_name = dsn
|
|
1008
|
+
if self.config.dsn_to_platform_name:
|
|
1009
|
+
logger.debug(f"Attempting to map DSN {dsn} to platform")
|
|
1010
|
+
name = self.config.dsn_to_platform_name.get(dsn)
|
|
1011
|
+
if name:
|
|
1012
|
+
logger.debug(f"Found DSN {dsn} mapped to platform {name}")
|
|
1013
|
+
data_platform, powerbi_platform = normalize_platform_name(name)
|
|
1014
|
+
|
|
1015
|
+
if not data_platform or not powerbi_platform:
|
|
1016
|
+
self.reporter.warning(
|
|
1017
|
+
title="Can not determine ODBC platform",
|
|
1018
|
+
message="Can not determine platform from ODBC connect string. Skipping Lineage creation.",
|
|
1019
|
+
context=f"table-name={self.table.full_name}, connect-string={connect_string}",
|
|
1020
|
+
)
|
|
1021
|
+
return Lineage.empty()
|
|
1022
|
+
|
|
1023
|
+
platform_pair: DataPlatformPair = self.create_platform_pair(
|
|
1024
|
+
data_platform, powerbi_platform
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
if not server_name and self.config.server_to_platform_instance:
|
|
1028
|
+
self.reporter.warning(
|
|
1029
|
+
title="Can not determine ODBC server name",
|
|
1030
|
+
message="Can not determine server name with server_to_platform_instance mapping. Skipping Lineage creation.",
|
|
1031
|
+
context=f"table-name={self.table.full_name}",
|
|
1032
|
+
)
|
|
1033
|
+
return Lineage.empty()
|
|
1034
|
+
elif not server_name:
|
|
1035
|
+
server_name = "unknown"
|
|
1036
|
+
|
|
1037
|
+
if self.is_sql_query(query):
|
|
1038
|
+
return self.query_lineage(query, platform_pair, server_name, dsn)
|
|
1039
|
+
else:
|
|
1040
|
+
return self.expression_lineage(
|
|
1041
|
+
data_access_func_detail, data_platform, platform_pair, server_name
|
|
1042
|
+
)
|
|
1043
|
+
|
|
1044
|
+
def query_lineage(
|
|
1045
|
+
self,
|
|
1046
|
+
query: Optional[str],
|
|
1047
|
+
platform_pair: DataPlatformPair,
|
|
1048
|
+
server_name: str,
|
|
1049
|
+
dsn: str,
|
|
1050
|
+
) -> Lineage:
|
|
1051
|
+
database = None
|
|
1052
|
+
schema = None
|
|
1053
|
+
|
|
1054
|
+
if not query:
|
|
1055
|
+
# query should never be None as it is checked before calling this function.
|
|
1056
|
+
# however, we need to check just in case.
|
|
1057
|
+
self.reporter.warning(
|
|
1058
|
+
title="ODBC Query is null",
|
|
1059
|
+
message="No SQL to parse. Skipping Lineage creation.",
|
|
1060
|
+
context=f"table-name={self.table.full_name}",
|
|
1061
|
+
)
|
|
1062
|
+
return Lineage.empty()
|
|
1063
|
+
|
|
1064
|
+
if self.config.dsn_to_database_schema:
|
|
1065
|
+
value = self.config.dsn_to_database_schema.get(dsn)
|
|
1066
|
+
if value:
|
|
1067
|
+
parts = value.split(".")
|
|
1068
|
+
if len(parts) == 1:
|
|
1069
|
+
database = parts[0]
|
|
1070
|
+
elif len(parts) == 2:
|
|
1071
|
+
database = parts[0]
|
|
1072
|
+
schema = parts[1]
|
|
1073
|
+
|
|
1074
|
+
logger.debug(
|
|
1075
|
+
f"ODBC query processing: dsn={dsn} mapped to database={database}, schema={schema}"
|
|
1076
|
+
)
|
|
1077
|
+
result = self.parse_custom_sql(
|
|
1078
|
+
query=query,
|
|
1079
|
+
server=server_name,
|
|
1080
|
+
database=database,
|
|
1081
|
+
schema=schema,
|
|
1082
|
+
platform_pair=platform_pair,
|
|
1083
|
+
)
|
|
1084
|
+
logger.debug(f"ODBC query lineage generated {len(result.upstreams)} upstreams")
|
|
1085
|
+
return result
|
|
1086
|
+
|
|
1087
|
+
def expression_lineage(
|
|
1088
|
+
self,
|
|
1089
|
+
data_access_func_detail: DataAccessFunctionDetail,
|
|
1090
|
+
data_platform: str,
|
|
1091
|
+
platform_pair: DataPlatformPair,
|
|
1092
|
+
server_name: str,
|
|
1093
|
+
) -> Lineage:
|
|
1094
|
+
database_name = None
|
|
1095
|
+
schema_name = None
|
|
1096
|
+
table_name = None
|
|
1097
|
+
qualified_table_name = None
|
|
1098
|
+
|
|
1099
|
+
temp_accessor: Optional[IdentifierAccessor] = (
|
|
1100
|
+
data_access_func_detail.identifier_accessor
|
|
1101
|
+
)
|
|
1102
|
+
|
|
1103
|
+
while temp_accessor:
|
|
1104
|
+
logger.debug(
|
|
1105
|
+
f"identifier = {temp_accessor.identifier} items = {temp_accessor.items}"
|
|
1106
|
+
)
|
|
1107
|
+
if temp_accessor.items.get("Kind") == "Database":
|
|
1108
|
+
database_name = temp_accessor.items["Name"]
|
|
1109
|
+
|
|
1110
|
+
if temp_accessor.items.get("Kind") == "Schema":
|
|
1111
|
+
schema_name = temp_accessor.items["Name"]
|
|
1112
|
+
|
|
1113
|
+
if temp_accessor.items.get("Kind") == "Table":
|
|
1114
|
+
table_name = temp_accessor.items["Name"]
|
|
1115
|
+
|
|
1116
|
+
if temp_accessor.next is not None:
|
|
1117
|
+
temp_accessor = temp_accessor.next
|
|
1118
|
+
else:
|
|
1119
|
+
break
|
|
1120
|
+
|
|
1121
|
+
if (
|
|
1122
|
+
database_name is not None
|
|
1123
|
+
and schema_name is not None
|
|
1124
|
+
and table_name is not None
|
|
1125
|
+
):
|
|
1126
|
+
qualified_table_name = f"{database_name}.{schema_name}.{table_name}"
|
|
1127
|
+
elif database_name is not None and table_name is not None:
|
|
1128
|
+
qualified_table_name = f"{database_name}.{table_name}"
|
|
1129
|
+
|
|
1130
|
+
if not qualified_table_name:
|
|
1131
|
+
self.reporter.warning(
|
|
1132
|
+
title="Can not determine qualified table name",
|
|
1133
|
+
message="Can not determine qualified table name for ODBC data source. Skipping Lineage creation.",
|
|
1134
|
+
context=f"table-name={self.table.full_name}, data-platform={data_platform}",
|
|
1135
|
+
)
|
|
1136
|
+
logger.warning(
|
|
1137
|
+
f"Can not determine qualified table name for ODBC data source {data_platform} "
|
|
1138
|
+
f"table {self.table.full_name}."
|
|
1139
|
+
)
|
|
1140
|
+
return Lineage.empty()
|
|
1141
|
+
|
|
1142
|
+
logger.debug(
|
|
1143
|
+
f"ODBC Platform {data_platform} found qualified table name {qualified_table_name}"
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
urn = make_urn(
|
|
1147
|
+
config=self.config,
|
|
1148
|
+
platform_instance_resolver=self.platform_instance_resolver,
|
|
1149
|
+
data_platform_pair=platform_pair,
|
|
1150
|
+
server=server_name,
|
|
1151
|
+
qualified_table_name=qualified_table_name,
|
|
1152
|
+
)
|
|
1153
|
+
|
|
1154
|
+
column_lineage = self.create_table_column_lineage(urn)
|
|
1155
|
+
|
|
1156
|
+
return Lineage(
|
|
1157
|
+
upstreams=[
|
|
1158
|
+
DataPlatformTable(
|
|
1159
|
+
data_platform_pair=platform_pair,
|
|
1160
|
+
urn=urn,
|
|
1161
|
+
)
|
|
1162
|
+
],
|
|
1163
|
+
column_lineage=column_lineage,
|
|
1164
|
+
)
|
|
1165
|
+
|
|
1166
|
+
@staticmethod
|
|
1167
|
+
def create_platform_pair(
|
|
1168
|
+
data_platform: str, powerbi_platform: str
|
|
1169
|
+
) -> DataPlatformPair:
|
|
1170
|
+
return DataPlatformPair(data_platform, powerbi_platform)
|
|
1171
|
+
|
|
1172
|
+
def get_platform_pair(self) -> DataPlatformPair:
|
|
1173
|
+
return SupportedDataPlatform.ODBC.value
|
|
1174
|
+
|
|
1175
|
+
|
|
847
1176
|
class SupportedPattern(Enum):
|
|
848
1177
|
DATABRICKS_QUERY = (
|
|
849
1178
|
DatabricksLineage,
|
|
@@ -885,11 +1214,26 @@ class SupportedPattern(Enum):
|
|
|
885
1214
|
FunctionName.AMAZON_REDSHIFT_DATA_ACCESS,
|
|
886
1215
|
)
|
|
887
1216
|
|
|
1217
|
+
MYSQL = (
|
|
1218
|
+
MySQLLineage,
|
|
1219
|
+
FunctionName.MYSQL_DATA_ACCESS,
|
|
1220
|
+
)
|
|
1221
|
+
|
|
888
1222
|
NATIVE_QUERY = (
|
|
889
1223
|
NativeQueryLineage,
|
|
890
1224
|
FunctionName.NATIVE_QUERY,
|
|
891
1225
|
)
|
|
892
1226
|
|
|
1227
|
+
ODBC = (
|
|
1228
|
+
OdbcLineage,
|
|
1229
|
+
FunctionName.ODBC_DATA_ACCESS,
|
|
1230
|
+
)
|
|
1231
|
+
|
|
1232
|
+
ODBC_QUERY = (
|
|
1233
|
+
OdbcLineage,
|
|
1234
|
+
FunctionName.ODBC_QUERY,
|
|
1235
|
+
)
|
|
1236
|
+
|
|
893
1237
|
def handler(self) -> Type[AbstractLineage]:
|
|
894
1238
|
return self.value[0]
|
|
895
1239
|
|
|
@@ -361,6 +361,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
|
|
|
361
361
|
)
|
|
362
362
|
|
|
363
363
|
if output_variable is None:
|
|
364
|
+
logger.debug(
|
|
365
|
+
f"Table: {self.table.full_name}: output-variable not found in tree"
|
|
366
|
+
)
|
|
364
367
|
self.reporter.report_warning(
|
|
365
368
|
f"{self.table.full_name}-output-variable",
|
|
366
369
|
"output-variable not found in table expression",
|
|
@@ -374,6 +377,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
|
|
|
374
377
|
|
|
375
378
|
# Each item is data-access function
|
|
376
379
|
for f_detail in table_links:
|
|
380
|
+
logger.debug(
|
|
381
|
+
f"Processing data-access-function {f_detail.data_access_function_name}"
|
|
382
|
+
)
|
|
377
383
|
# Get & Check if we support data-access-function available in M-Query
|
|
378
384
|
supported_resolver = SupportedPattern.get_pattern_handler(
|
|
379
385
|
f_detail.data_access_function_name
|
|
@@ -390,6 +396,10 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
|
|
|
390
396
|
|
|
391
397
|
# From supported_resolver enum get respective handler like AmazonRedshift or Snowflake or Oracle or NativeQuery and create instance of it
|
|
392
398
|
# & also pass additional information that will be need to generate lineage
|
|
399
|
+
logger.debug(
|
|
400
|
+
f"Creating instance of {supported_resolver.handler().__name__} "
|
|
401
|
+
f"for data-access-function {f_detail.data_access_function_name}"
|
|
402
|
+
)
|
|
393
403
|
pattern_handler: AbstractLineage = supported_resolver.handler()(
|
|
394
404
|
ctx=ctx,
|
|
395
405
|
table=self.table,
|