acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@ from sqlalchemy.sql import sqltypes
|
|
|
18
18
|
from sqlalchemy.types import BOOLEAN, DATE, DATETIME, INTEGER
|
|
19
19
|
|
|
20
20
|
import datahub.emitter.mce_builder as builder
|
|
21
|
+
from datahub.configuration.common import HiddenFromDocs, LaxStr
|
|
21
22
|
from datahub.configuration.source_common import DatasetLineageProviderConfigBase
|
|
22
23
|
from datahub.configuration.time_window_config import BaseTimeWindowConfig
|
|
23
24
|
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
|
|
@@ -32,6 +33,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
32
33
|
support_status,
|
|
33
34
|
)
|
|
34
35
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
36
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
35
37
|
from datahub.ingestion.source.sql.sql_common import (
|
|
36
38
|
SqlWorkUnit,
|
|
37
39
|
logger,
|
|
@@ -127,16 +129,20 @@ class ClickHouseConfig(
|
|
|
127
129
|
):
|
|
128
130
|
# defaults
|
|
129
131
|
host_port: str = Field(default="localhost:8123", description="ClickHouse host URL.")
|
|
130
|
-
scheme: str = Field(default="clickhouse"
|
|
132
|
+
scheme: HiddenFromDocs[str] = Field(default="clickhouse")
|
|
131
133
|
password: pydantic.SecretStr = Field(
|
|
132
134
|
default=pydantic.SecretStr(""), description="password"
|
|
133
135
|
)
|
|
134
|
-
secure: Optional[bool] = Field(
|
|
135
|
-
|
|
136
|
+
secure: Optional[bool] = Field(
|
|
137
|
+
default=None, description="[deprecated] Use uri_opts instead."
|
|
138
|
+
)
|
|
139
|
+
protocol: Optional[str] = Field(
|
|
140
|
+
default=None, description="[deprecated] Use uri_opts instead."
|
|
141
|
+
)
|
|
136
142
|
_deprecate_secure = pydantic_field_deprecated("secure")
|
|
137
143
|
_deprecate_protocol = pydantic_field_deprecated("protocol")
|
|
138
144
|
|
|
139
|
-
uri_opts: Dict[str,
|
|
145
|
+
uri_opts: Dict[str, LaxStr] = Field(
|
|
140
146
|
default={},
|
|
141
147
|
description="The part of the URI and it's used to provide additional configuration options or parameters for the database connection.",
|
|
142
148
|
)
|
|
@@ -145,7 +151,11 @@ class ClickHouseConfig(
|
|
|
145
151
|
)
|
|
146
152
|
include_materialized_views: Optional[bool] = Field(default=True, description="")
|
|
147
153
|
|
|
148
|
-
def get_sql_alchemy_url(
|
|
154
|
+
def get_sql_alchemy_url(
|
|
155
|
+
self,
|
|
156
|
+
uri_opts: Optional[Dict[str, Any]] = None,
|
|
157
|
+
current_db: Optional[str] = None,
|
|
158
|
+
) -> str:
|
|
149
159
|
url = make_url(
|
|
150
160
|
super().get_sql_alchemy_url(uri_opts=self.uri_opts, current_db=current_db)
|
|
151
161
|
)
|
|
@@ -180,9 +190,9 @@ class ClickHouseConfig(
|
|
|
180
190
|
"Initializing uri_opts from deprecated secure or protocol options"
|
|
181
191
|
)
|
|
182
192
|
values["uri_opts"] = {}
|
|
183
|
-
if secure:
|
|
184
|
-
values["uri_opts"]["secure"] = secure
|
|
185
|
-
if protocol:
|
|
193
|
+
if secure is not None:
|
|
194
|
+
values["uri_opts"]["secure"] = str(secure)
|
|
195
|
+
if protocol is not None:
|
|
186
196
|
values["uri_opts"]["protocol"] = protocol
|
|
187
197
|
logger.debug(f"uri_opts: {uri_opts}")
|
|
188
198
|
elif (secure or protocol) and uri_opts:
|
|
@@ -375,8 +385,18 @@ clickhouse_datetime_format = "%Y-%m-%d %H:%M:%S"
|
|
|
375
385
|
@platform_name("ClickHouse")
|
|
376
386
|
@config_class(ClickHouseConfig)
|
|
377
387
|
@support_status(SupportStatus.CERTIFIED)
|
|
378
|
-
@capability(
|
|
388
|
+
@capability(
|
|
389
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
390
|
+
)
|
|
379
391
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
392
|
+
@capability(
|
|
393
|
+
SourceCapability.LINEAGE_COARSE,
|
|
394
|
+
"Enabled by default to get lineage for views via `include_view_lineage`",
|
|
395
|
+
subtype_modifier=[
|
|
396
|
+
SourceCapabilityModifier.VIEW,
|
|
397
|
+
SourceCapabilityModifier.TABLE,
|
|
398
|
+
],
|
|
399
|
+
)
|
|
380
400
|
class ClickHouseSource(TwoTierSQLAlchemySource):
|
|
381
401
|
"""
|
|
382
402
|
This plugin extracts the following:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from pydantic.fields import Field
|
|
2
2
|
|
|
3
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
3
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
4
4
|
from datahub.ingestion.api.common import PipelineContext
|
|
5
5
|
from datahub.ingestion.api.decorators import (
|
|
6
6
|
SourceCapability,
|
|
@@ -14,8 +14,10 @@ from datahub.ingestion.source.sql.postgres import PostgresConfig, PostgresSource
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class CockroachDBConfig(PostgresConfig):
|
|
17
|
-
scheme = Field(
|
|
18
|
-
|
|
17
|
+
scheme: HiddenFromDocs[str] = Field(
|
|
18
|
+
default="cockroachdb+psycopg2", description="database scheme"
|
|
19
|
+
)
|
|
20
|
+
schema_pattern: AllowDenyPattern = Field(
|
|
19
21
|
default=AllowDenyPattern(deny=["information_schema", "crdb_internal"])
|
|
20
22
|
)
|
|
21
23
|
|
|
@@ -26,7 +28,6 @@ class CockroachDBConfig(PostgresConfig):
|
|
|
26
28
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
27
29
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
28
30
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
29
|
-
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
30
31
|
class CockroachDBSource(PostgresSource):
|
|
31
32
|
config: CockroachDBConfig
|
|
32
33
|
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
# This import verifies that the dependencies are available.
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
|
+
|
|
2
4
|
import pydruid # noqa: F401
|
|
3
5
|
from pydantic.fields import Field
|
|
4
6
|
from pydruid.db.sqlalchemy import DruidDialect
|
|
5
7
|
from sqlalchemy.exc import ResourceClosedError
|
|
6
8
|
|
|
7
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
9
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
8
10
|
from datahub.ingestion.api.decorators import (
|
|
9
11
|
SourceCapability,
|
|
10
12
|
SupportStatus,
|
|
@@ -32,14 +34,17 @@ DruidDialect.get_table_names = get_table_names
|
|
|
32
34
|
|
|
33
35
|
class DruidConfig(BasicSQLAlchemyConfig):
|
|
34
36
|
# defaults
|
|
35
|
-
scheme: str = "druid"
|
|
37
|
+
scheme: HiddenFromDocs[str] = "druid"
|
|
36
38
|
schema_pattern: AllowDenyPattern = Field(
|
|
37
39
|
default=AllowDenyPattern(deny=["^(lookup|sysgit|view).*"]),
|
|
38
40
|
description="regex patterns for schemas to filter in ingestion.",
|
|
39
41
|
)
|
|
40
42
|
|
|
41
|
-
def get_sql_alchemy_url(
|
|
42
|
-
|
|
43
|
+
def get_sql_alchemy_url(
|
|
44
|
+
self, uri_opts: Optional[Dict[str, Any]] = None, database: Optional[str] = None
|
|
45
|
+
) -> str:
|
|
46
|
+
base_url = super().get_sql_alchemy_url(uri_opts=uri_opts, database=database)
|
|
47
|
+
return f"{base_url}/druid/v2/sql/"
|
|
43
48
|
|
|
44
49
|
"""
|
|
45
50
|
The pydruid library already formats the table name correctly, so we do not
|
|
@@ -27,7 +27,9 @@ class HanaConfig(BasicSQLAlchemyConfig):
|
|
|
27
27
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
28
28
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
29
29
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
30
|
-
@capability(
|
|
30
|
+
@capability(
|
|
31
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
32
|
+
)
|
|
31
33
|
class HanaSource(SQLAlchemySource):
|
|
32
34
|
def __init__(self, config: HanaConfig, ctx: PipelineContext):
|
|
33
35
|
super().__init__(config, ctx, "hana")
|
|
@@ -6,7 +6,7 @@ from enum import Enum
|
|
|
6
6
|
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
|
7
7
|
from urllib.parse import urlparse
|
|
8
8
|
|
|
9
|
-
from pydantic
|
|
9
|
+
from pydantic import validator
|
|
10
10
|
from pydantic.fields import Field
|
|
11
11
|
|
|
12
12
|
# This import verifies that the dependencies are available.
|
|
@@ -14,6 +14,7 @@ from pyhive import hive # noqa: F401
|
|
|
14
14
|
from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveDialect, HiveTimestamp
|
|
15
15
|
from sqlalchemy.engine.reflection import Inspector
|
|
16
16
|
|
|
17
|
+
from datahub.configuration.common import HiddenFromDocs
|
|
17
18
|
from datahub.emitter.mce_builder import (
|
|
18
19
|
make_data_platform_urn,
|
|
19
20
|
make_dataplatform_instance_urn,
|
|
@@ -139,7 +140,7 @@ class StoragePathParser:
|
|
|
139
140
|
path = f"{parsed.netloc}/{parsed.path.lstrip('/')}"
|
|
140
141
|
|
|
141
142
|
elif platform == StoragePlatform.AZURE:
|
|
142
|
-
if scheme in ("abfs", "abfss"):
|
|
143
|
+
if scheme in ("abfs", "abfss", "wasbs"):
|
|
143
144
|
# Format: abfss://container@account.dfs.core.windows.net/path
|
|
144
145
|
container = parsed.netloc.split("@")[0]
|
|
145
146
|
path = f"{container}/{parsed.path.lstrip('/')}"
|
|
@@ -153,7 +154,7 @@ class StoragePathParser:
|
|
|
153
154
|
|
|
154
155
|
elif platform == StoragePlatform.DBFS:
|
|
155
156
|
# For DBFS, use path as-is
|
|
156
|
-
path = parsed.path.lstrip("/")
|
|
157
|
+
path = "/" + parsed.path.lstrip("/")
|
|
157
158
|
|
|
158
159
|
elif platform == StoragePlatform.LOCAL:
|
|
159
160
|
# For local files, use full path
|
|
@@ -169,7 +170,6 @@ class StoragePathParser:
|
|
|
169
170
|
# Clean up the path
|
|
170
171
|
path = path.rstrip("/") # Remove trailing slashes
|
|
171
172
|
path = re.sub(r"/+", "/", path) # Normalize multiple slashes
|
|
172
|
-
path = f"/{path}"
|
|
173
173
|
|
|
174
174
|
return platform, path
|
|
175
175
|
|
|
@@ -637,8 +637,13 @@ def get_view_definition_patched(self, connection, view_name, schema=None, **kw):
|
|
|
637
637
|
self.identifier_preparer.quote_identifier(schema),
|
|
638
638
|
self.identifier_preparer.quote_identifier(view_name),
|
|
639
639
|
)
|
|
640
|
-
|
|
641
|
-
|
|
640
|
+
# Hive responds to the SHOW CREATE TABLE with the full view DDL,
|
|
641
|
+
# including the view definition. However, for multiline view definitions,
|
|
642
|
+
# it returns multiple rows (of one column each), each with a part of the definition.
|
|
643
|
+
# Any whitespace at the beginning/end of each view definition line is lost.
|
|
644
|
+
rows = connection.execute(f"SHOW CREATE TABLE {full_table}").fetchall()
|
|
645
|
+
parts = [row[0] for row in rows]
|
|
646
|
+
return "\n".join(parts)
|
|
642
647
|
|
|
643
648
|
|
|
644
649
|
HiveDialect.get_view_names = get_view_names_patched
|
|
@@ -647,10 +652,10 @@ HiveDialect.get_view_definition = get_view_definition_patched
|
|
|
647
652
|
|
|
648
653
|
class HiveConfig(TwoTierSQLAlchemyConfig):
|
|
649
654
|
# defaults
|
|
650
|
-
scheme: str = Field(default="hive"
|
|
655
|
+
scheme: HiddenFromDocs[str] = Field(default="hive")
|
|
651
656
|
|
|
652
657
|
# Overriding as table location lineage is richer implementation here than with include_table_location_lineage
|
|
653
|
-
include_table_location_lineage: bool = Field(default=False
|
|
658
|
+
include_table_location_lineage: HiddenFromDocs[bool] = Field(default=False)
|
|
654
659
|
|
|
655
660
|
emit_storage_lineage: bool = Field(
|
|
656
661
|
default=False,
|
|
@@ -862,3 +867,18 @@ class HiveSource(TwoTierSQLAlchemySource):
|
|
|
862
867
|
return partition_column.get("column_names")
|
|
863
868
|
|
|
864
869
|
return []
|
|
870
|
+
|
|
871
|
+
def get_table_properties(
|
|
872
|
+
self, inspector: Inspector, schema: str, table: str
|
|
873
|
+
) -> Tuple[Optional[str], Dict[str, str], Optional[str]]:
|
|
874
|
+
(description, properties, location) = super().get_table_properties(
|
|
875
|
+
inspector, schema, table
|
|
876
|
+
)
|
|
877
|
+
|
|
878
|
+
new_properties = {}
|
|
879
|
+
for key, value in properties.items():
|
|
880
|
+
if key and key[-1] == ":":
|
|
881
|
+
new_properties[key[:-1]] = value
|
|
882
|
+
else:
|
|
883
|
+
new_properties[key] = value
|
|
884
|
+
return (description, new_properties, location)
|
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
import base64
|
|
2
|
+
import dataclasses
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
4
5
|
from collections import namedtuple
|
|
5
6
|
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
|
6
7
|
|
|
7
|
-
from pydantic
|
|
8
|
-
from pydantic.fields import Field
|
|
9
|
-
|
|
10
|
-
# This import verifies that the dependencies are available.
|
|
8
|
+
from pydantic import Field
|
|
11
9
|
from sqlalchemy import create_engine, text
|
|
12
10
|
from sqlalchemy.engine.reflection import Inspector
|
|
13
11
|
|
|
14
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
12
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
15
13
|
from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
|
|
16
14
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
17
15
|
from datahub.ingestion.api.common import PipelineContext
|
|
@@ -27,6 +25,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
27
25
|
from datahub.ingestion.source.common.subtypes import (
|
|
28
26
|
DatasetContainerSubTypes,
|
|
29
27
|
DatasetSubTypes,
|
|
28
|
+
SourceCapabilityModifier,
|
|
30
29
|
)
|
|
31
30
|
from datahub.ingestion.source.sql.sql_common import (
|
|
32
31
|
SQLAlchemySource,
|
|
@@ -36,7 +35,6 @@ from datahub.ingestion.source.sql.sql_common import (
|
|
|
36
35
|
from datahub.ingestion.source.sql.sql_config import (
|
|
37
36
|
BasicSQLAlchemyConfig,
|
|
38
37
|
SQLCommonConfig,
|
|
39
|
-
make_sqlalchemy_uri,
|
|
40
38
|
)
|
|
41
39
|
from datahub.ingestion.source.sql.sql_utils import (
|
|
42
40
|
add_table_to_schema_container,
|
|
@@ -46,13 +44,13 @@ from datahub.ingestion.source.sql.sql_utils import (
|
|
|
46
44
|
gen_schema_key,
|
|
47
45
|
get_domain_wu,
|
|
48
46
|
)
|
|
47
|
+
from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
|
|
49
48
|
from datahub.ingestion.source.state.stateful_ingestion_base import JobId
|
|
50
49
|
from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass
|
|
51
50
|
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
|
|
52
51
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
53
52
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
|
|
54
53
|
from datahub.metadata.schema_classes import (
|
|
55
|
-
ChangeTypeClass,
|
|
56
54
|
DatasetPropertiesClass,
|
|
57
55
|
SubTypesClass,
|
|
58
56
|
ViewPropertiesClass,
|
|
@@ -67,13 +65,13 @@ TableKey = namedtuple("TableKey", ["schema", "table"])
|
|
|
67
65
|
|
|
68
66
|
|
|
69
67
|
class HiveMetastoreConfigMode(StrEnum):
|
|
70
|
-
hive
|
|
71
|
-
presto
|
|
72
|
-
presto_on_hive
|
|
73
|
-
trino
|
|
68
|
+
hive = "hive"
|
|
69
|
+
presto = "presto"
|
|
70
|
+
presto_on_hive = "presto-on-hive"
|
|
71
|
+
trino = "trino"
|
|
74
72
|
|
|
75
73
|
|
|
76
|
-
@dataclass
|
|
74
|
+
@dataclasses.dataclass
|
|
77
75
|
class ViewDataset:
|
|
78
76
|
dataset_name: str
|
|
79
77
|
schema_name: str
|
|
@@ -99,7 +97,7 @@ class HiveMetastore(BasicSQLAlchemyConfig):
|
|
|
99
97
|
default="localhost:3306",
|
|
100
98
|
description="Host URL and port to connect to. Example: localhost:3306",
|
|
101
99
|
)
|
|
102
|
-
scheme: str = Field(default="mysql+pymysql"
|
|
100
|
+
scheme: HiddenFromDocs[str] = Field(default="mysql+pymysql")
|
|
103
101
|
|
|
104
102
|
database_pattern: AllowDenyPattern = Field(
|
|
105
103
|
default=AllowDenyPattern.allow_all(),
|
|
@@ -123,8 +121,8 @@ class HiveMetastore(BasicSQLAlchemyConfig):
|
|
|
123
121
|
description="Dataset Subtype name to be 'Table' or 'View' Valid options: ['True', 'False']",
|
|
124
122
|
)
|
|
125
123
|
|
|
126
|
-
include_view_lineage: bool = Field(
|
|
127
|
-
default=False,
|
|
124
|
+
include_view_lineage: HiddenFromDocs[bool] = Field(
|
|
125
|
+
default=False,
|
|
128
126
|
)
|
|
129
127
|
|
|
130
128
|
include_catalog_name_in_ids: bool = Field(
|
|
@@ -161,12 +159,22 @@ class HiveMetastore(BasicSQLAlchemyConfig):
|
|
|
161
159
|
@platform_name("Hive Metastore")
|
|
162
160
|
@config_class(HiveMetastore)
|
|
163
161
|
@support_status(SupportStatus.CERTIFIED)
|
|
164
|
-
@capability(
|
|
162
|
+
@capability(
|
|
163
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
164
|
+
)
|
|
165
165
|
@capability(SourceCapability.DATA_PROFILING, "Not Supported", False)
|
|
166
166
|
@capability(SourceCapability.CLASSIFICATION, "Not Supported", False)
|
|
167
167
|
@capability(
|
|
168
168
|
SourceCapability.LINEAGE_COARSE, "View lineage is not supported", supported=False
|
|
169
169
|
)
|
|
170
|
+
@capability(
|
|
171
|
+
SourceCapability.CONTAINERS,
|
|
172
|
+
"Enabled by default",
|
|
173
|
+
subtype_modifier=[
|
|
174
|
+
SourceCapabilityModifier.CATALOG,
|
|
175
|
+
SourceCapabilityModifier.SCHEMA,
|
|
176
|
+
],
|
|
177
|
+
)
|
|
170
178
|
class HiveMetastoreSource(SQLAlchemySource):
|
|
171
179
|
"""
|
|
172
180
|
This plugin extracts the following:
|
|
@@ -599,10 +607,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
599
607
|
yield dpi_aspect
|
|
600
608
|
|
|
601
609
|
yield MetadataChangeProposalWrapper(
|
|
602
|
-
entityType="dataset",
|
|
603
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
604
610
|
entityUrn=dataset_urn,
|
|
605
|
-
aspectName="subTypes",
|
|
606
611
|
aspect=SubTypesClass(typeNames=[self.table_subtype]),
|
|
607
612
|
).as_workunit()
|
|
608
613
|
|
|
@@ -808,10 +813,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
808
813
|
|
|
809
814
|
# Add views subtype
|
|
810
815
|
yield MetadataChangeProposalWrapper(
|
|
811
|
-
entityType="dataset",
|
|
812
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
813
816
|
entityUrn=dataset_urn,
|
|
814
|
-
aspectName="subTypes",
|
|
815
817
|
aspect=SubTypesClass(typeNames=[self.view_subtype]),
|
|
816
818
|
).as_workunit()
|
|
817
819
|
|
|
@@ -822,10 +824,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
822
824
|
viewLogic=dataset.view_definition if dataset.view_definition else "",
|
|
823
825
|
)
|
|
824
826
|
yield MetadataChangeProposalWrapper(
|
|
825
|
-
entityType="dataset",
|
|
826
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
827
827
|
entityUrn=dataset_urn,
|
|
828
|
-
aspectName="viewProperties",
|
|
829
828
|
aspect=view_properties_aspect,
|
|
830
829
|
).as_workunit()
|
|
831
830
|
|
|
@@ -15,7 +15,6 @@ from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource
|
|
|
15
15
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
16
16
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
17
17
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
18
|
-
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
19
18
|
class MariaDBSource(MySQLSource):
|
|
20
19
|
def get_platform(self):
|
|
21
20
|
return "mariadb"
|
|
@@ -15,6 +15,7 @@ from datahub.ingestion.source.common.subtypes import (
|
|
|
15
15
|
FlowContainerSubTypes,
|
|
16
16
|
JobContainerSubTypes,
|
|
17
17
|
)
|
|
18
|
+
from datahub.ingestion.source.sql.stored_procedures.base import BaseProcedure
|
|
18
19
|
from datahub.metadata.schema_classes import (
|
|
19
20
|
ContainerClass,
|
|
20
21
|
DataFlowInfoClass,
|
|
@@ -133,7 +134,22 @@ class StoredProcedure:
|
|
|
133
134
|
|
|
134
135
|
@property
|
|
135
136
|
def escape_full_name(self) -> str:
|
|
136
|
-
return f"[{self.db}].[{self.schema}].[{self.formatted_name}]"
|
|
137
|
+
return f"[{self.db}].[{self.schema}].[{self.formatted_name}]".replace(
|
|
138
|
+
"'", r"''"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
def to_base_procedure(self) -> BaseProcedure:
|
|
142
|
+
return BaseProcedure(
|
|
143
|
+
name=self.formatted_name,
|
|
144
|
+
procedure_definition=self.code,
|
|
145
|
+
created=None,
|
|
146
|
+
last_altered=None,
|
|
147
|
+
comment=None,
|
|
148
|
+
argument_signature=None,
|
|
149
|
+
return_type=None,
|
|
150
|
+
language="SQL",
|
|
151
|
+
extra_properties=None,
|
|
152
|
+
)
|
|
137
153
|
|
|
138
154
|
|
|
139
155
|
@dataclass
|
|
@@ -222,7 +238,7 @@ class MSSQLDataJob:
|
|
|
222
238
|
type = (
|
|
223
239
|
JobContainerSubTypes.MSSQL_JOBSTEP
|
|
224
240
|
if isinstance(self.entity, JobStep)
|
|
225
|
-
else JobContainerSubTypes.
|
|
241
|
+
else JobContainerSubTypes.STORED_PROCEDURE
|
|
226
242
|
)
|
|
227
243
|
return SubTypesClass(
|
|
228
244
|
typeNames=[type],
|