acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/upgrade/upgrade.py
CHANGED
|
@@ -7,7 +7,7 @@ from typing import Any, Callable, Optional, Tuple, TypeVar
|
|
|
7
7
|
|
|
8
8
|
import click
|
|
9
9
|
import humanfriendly
|
|
10
|
-
from packaging.version import Version
|
|
10
|
+
from packaging.version import InvalidVersion, Version
|
|
11
11
|
from pydantic import BaseModel
|
|
12
12
|
|
|
13
13
|
from datahub._version import __version__
|
|
@@ -28,10 +28,24 @@ class VersionStats(BaseModel, arbitrary_types_allowed=True):
|
|
|
28
28
|
release_date: Optional[datetime] = None
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
def _safe_version_stats(version_string: str) -> Optional[VersionStats]:
|
|
32
|
+
"""
|
|
33
|
+
Safely create a VersionStats object from a version string.
|
|
34
|
+
Returns None if the version string is invalid.
|
|
35
|
+
"""
|
|
36
|
+
try:
|
|
37
|
+
return VersionStats(version=Version(version_string), release_date=None)
|
|
38
|
+
except InvalidVersion:
|
|
39
|
+
log.warning(f"Invalid version format received: {version_string!r}")
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
31
43
|
class ServerVersionStats(BaseModel):
|
|
32
44
|
current: VersionStats
|
|
33
45
|
latest: Optional[VersionStats] = None
|
|
34
46
|
current_server_type: Optional[str] = None
|
|
47
|
+
current_server_default_cli_version: Optional[VersionStats] = None
|
|
48
|
+
is_cloud_server: Optional[bool] = None
|
|
35
49
|
|
|
36
50
|
|
|
37
51
|
class ClientVersionStats(BaseModel):
|
|
@@ -44,7 +58,7 @@ class DataHubVersionStats(BaseModel):
|
|
|
44
58
|
client: ClientVersionStats
|
|
45
59
|
|
|
46
60
|
|
|
47
|
-
async def get_client_version_stats():
|
|
61
|
+
async def get_client_version_stats() -> ClientVersionStats:
|
|
48
62
|
import aiohttp
|
|
49
63
|
|
|
50
64
|
current_version_string = __version__
|
|
@@ -52,6 +66,7 @@ async def get_client_version_stats():
|
|
|
52
66
|
client_version_stats: ClientVersionStats = ClientVersionStats(
|
|
53
67
|
current=VersionStats(version=current_version, release_date=None), latest=None
|
|
54
68
|
)
|
|
69
|
+
|
|
55
70
|
async with aiohttp.ClientSession() as session:
|
|
56
71
|
pypi_url = "https://pypi.org/pypi/acryl_datahub/json"
|
|
57
72
|
async with session.get(pypi_url) as resp:
|
|
@@ -131,7 +146,9 @@ async def get_server_config(gms_url: str, token: Optional[str]) -> RestServiceCo
|
|
|
131
146
|
|
|
132
147
|
async def get_server_version_stats(
|
|
133
148
|
server: Optional[DataHubGraph] = None,
|
|
134
|
-
) -> Tuple[
|
|
149
|
+
) -> Tuple[
|
|
150
|
+
Optional[str], Optional[Version], Optional[str], Optional[datetime], Optional[bool]
|
|
151
|
+
]:
|
|
135
152
|
import aiohttp
|
|
136
153
|
|
|
137
154
|
server_config: Optional[RestServiceConfig] = None
|
|
@@ -151,12 +168,15 @@ async def get_server_version_stats(
|
|
|
151
168
|
|
|
152
169
|
server_type = None
|
|
153
170
|
server_version: Optional[Version] = None
|
|
171
|
+
current_server_default_cli_version = None
|
|
154
172
|
current_server_release_date = None
|
|
173
|
+
is_cloud_server: Optional[bool] = None
|
|
155
174
|
if server_config:
|
|
156
175
|
server_version_string = server_config.service_version
|
|
157
176
|
commit_hash = server_config.commit_hash
|
|
158
177
|
server_type = server_config.server_type
|
|
159
|
-
|
|
178
|
+
current_server_default_cli_version = server_config.default_cli_version
|
|
179
|
+
is_cloud_server = server_config.is_datahub_cloud
|
|
160
180
|
if server_type == "quickstart" and commit_hash:
|
|
161
181
|
async with aiohttp.ClientSession(
|
|
162
182
|
headers={"Accept": "application/vnd.github.v3+json"}
|
|
@@ -171,7 +191,13 @@ async def get_server_version_stats(
|
|
|
171
191
|
if server_version_string and server_version_string.startswith("v"):
|
|
172
192
|
server_version = Version(server_version_string[1:])
|
|
173
193
|
|
|
174
|
-
return (
|
|
194
|
+
return (
|
|
195
|
+
server_type,
|
|
196
|
+
server_version,
|
|
197
|
+
current_server_default_cli_version,
|
|
198
|
+
current_server_release_date,
|
|
199
|
+
is_cloud_server,
|
|
200
|
+
)
|
|
175
201
|
|
|
176
202
|
|
|
177
203
|
def retrieve_version_stats(
|
|
@@ -214,7 +240,9 @@ async def _retrieve_version_stats(
|
|
|
214
240
|
(
|
|
215
241
|
current_server_type,
|
|
216
242
|
current_server_version,
|
|
243
|
+
current_server_default_cli_version,
|
|
217
244
|
current_server_release_date,
|
|
245
|
+
is_cloud_server,
|
|
218
246
|
) = results[2]
|
|
219
247
|
|
|
220
248
|
server_version_stats = None
|
|
@@ -223,12 +251,18 @@ async def _retrieve_version_stats(
|
|
|
223
251
|
current=VersionStats(
|
|
224
252
|
version=current_server_version, release_date=current_server_release_date
|
|
225
253
|
),
|
|
254
|
+
current_server_default_cli_version=(
|
|
255
|
+
_safe_version_stats(current_server_default_cli_version)
|
|
256
|
+
if current_server_default_cli_version
|
|
257
|
+
else None
|
|
258
|
+
),
|
|
226
259
|
latest=(
|
|
227
260
|
VersionStats(version=last_server_version, release_date=last_server_date)
|
|
228
261
|
if last_server_version
|
|
229
262
|
else None
|
|
230
263
|
),
|
|
231
264
|
current_server_type=current_server_type,
|
|
265
|
+
is_cloud_server=is_cloud_server,
|
|
232
266
|
)
|
|
233
267
|
|
|
234
268
|
if client_version_stats and server_version_stats:
|
|
@@ -255,21 +289,14 @@ def valid_client_version(version: Version) -> bool:
|
|
|
255
289
|
"""Only version strings like 0.4.5 and 0.6.7.8 are valid. 0.8.6.7rc1 is not"""
|
|
256
290
|
if version.is_prerelease or version.is_postrelease or version.is_devrelease:
|
|
257
291
|
return False
|
|
258
|
-
|
|
259
|
-
return True
|
|
260
|
-
|
|
261
|
-
return False
|
|
292
|
+
return True
|
|
262
293
|
|
|
263
294
|
|
|
264
295
|
def valid_server_version(version: Version) -> bool:
|
|
265
296
|
"""Only version strings like 0.8.x, 0.9.x or 0.10.x are valid. 0.1.x is not"""
|
|
266
297
|
if version.is_prerelease or version.is_postrelease or version.is_devrelease:
|
|
267
298
|
return False
|
|
268
|
-
|
|
269
|
-
if version.major == 0 and version.minor in [8, 9, 10]:
|
|
270
|
-
return True
|
|
271
|
-
|
|
272
|
-
return False
|
|
299
|
+
return True
|
|
273
300
|
|
|
274
301
|
|
|
275
302
|
def is_client_server_compatible(client: VersionStats, server: VersionStats) -> int:
|
|
@@ -291,6 +318,27 @@ def is_client_server_compatible(client: VersionStats, server: VersionStats) -> i
|
|
|
291
318
|
return server.version.micro - client.version.micro
|
|
292
319
|
|
|
293
320
|
|
|
321
|
+
def is_server_default_cli_ahead(version_stats: DataHubVersionStats) -> bool:
|
|
322
|
+
"""
|
|
323
|
+
Check if the server default CLI version is ahead of the current CLI version.
|
|
324
|
+
Returns True if server default CLI is newer and both versions are valid.
|
|
325
|
+
"""
|
|
326
|
+
if not version_stats.server.current_server_default_cli_version:
|
|
327
|
+
return False
|
|
328
|
+
|
|
329
|
+
current_cli = version_stats.client.current
|
|
330
|
+
server_default_cli = version_stats.server.current_server_default_cli_version
|
|
331
|
+
|
|
332
|
+
is_valid_client_version = valid_client_version(current_cli.version)
|
|
333
|
+
is_valid_server_version = valid_client_version(server_default_cli.version)
|
|
334
|
+
|
|
335
|
+
if not (is_valid_client_version and is_valid_server_version):
|
|
336
|
+
return False
|
|
337
|
+
|
|
338
|
+
compatibility_result = is_client_server_compatible(current_cli, server_default_cli)
|
|
339
|
+
return compatibility_result > 0
|
|
340
|
+
|
|
341
|
+
|
|
294
342
|
def _maybe_print_upgrade_message(
|
|
295
343
|
version_stats: Optional[DataHubVersionStats],
|
|
296
344
|
) -> None:
|
|
@@ -312,9 +360,15 @@ def _maybe_print_upgrade_message(
|
|
|
312
360
|
if version_stats.client.latest
|
|
313
361
|
else None
|
|
314
362
|
)
|
|
315
|
-
client_server_compat =
|
|
316
|
-
|
|
317
|
-
|
|
363
|
+
client_server_compat = 0
|
|
364
|
+
# Skip version compatibility checks for cloud servers (serverEnv="cloud")
|
|
365
|
+
# Cloud servers use different versioning schemes between server and CLI
|
|
366
|
+
is_cloud = version_stats.server.is_cloud_server
|
|
367
|
+
|
|
368
|
+
if not is_cloud:
|
|
369
|
+
client_server_compat = is_client_server_compatible(
|
|
370
|
+
version_stats.client.current, version_stats.server.current
|
|
371
|
+
)
|
|
318
372
|
|
|
319
373
|
if latest_release_date and current_release_date:
|
|
320
374
|
assert version_stats.client.latest
|
|
@@ -429,6 +483,8 @@ def check_upgrade_post(
|
|
|
429
483
|
|
|
430
484
|
|
|
431
485
|
def check_upgrade(func: Callable[..., T]) -> Callable[..., T]:
|
|
486
|
+
log.debug(f"Checking upgrade for {func.__module__}.{func.__name__}")
|
|
487
|
+
|
|
432
488
|
@wraps(func)
|
|
433
489
|
def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
434
490
|
with PerfTimer() as timer:
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import collections
|
|
2
2
|
import gzip
|
|
3
3
|
import logging
|
|
4
|
-
import os
|
|
5
4
|
import pathlib
|
|
6
5
|
import pickle
|
|
7
6
|
import shutil
|
|
@@ -28,18 +27,18 @@ from typing import (
|
|
|
28
27
|
Union,
|
|
29
28
|
)
|
|
30
29
|
|
|
30
|
+
from datahub.configuration.env_vars import get_override_sqlite_version_req
|
|
31
31
|
from datahub.ingestion.api.closeable import Closeable
|
|
32
32
|
from datahub.utilities.sentinels import Unset, unset
|
|
33
33
|
|
|
34
34
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
)
|
|
36
|
+
|
|
37
|
+
def _get_sqlite_version_override() -> bool:
|
|
38
|
+
"""Check if SQLite version requirement should be overridden at runtime."""
|
|
39
|
+
override_str = get_override_sqlite_version_req()
|
|
40
|
+
return bool(override_str and override_str.lower() != "false")
|
|
41
|
+
|
|
43
42
|
|
|
44
43
|
_DEFAULT_FILE_NAME = "sqlite.db"
|
|
45
44
|
_DEFAULT_TABLE_NAME = "data"
|
|
@@ -231,7 +230,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
|
|
|
231
230
|
# We use the ON CONFLICT clause to implement UPSERTs with sqlite.
|
|
232
231
|
# This was added in 3.24.0 from 2018-06-04.
|
|
233
232
|
# See https://www.sqlite.org/lang_conflict.html
|
|
234
|
-
if
|
|
233
|
+
if _get_sqlite_version_override():
|
|
235
234
|
self._use_sqlite_on_conflict = False
|
|
236
235
|
else:
|
|
237
236
|
raise RuntimeError("SQLite version 3.24.0 or later is required")
|
datahub/utilities/is_pytest.py
CHANGED
|
@@ -15,13 +15,13 @@ import collections
|
|
|
15
15
|
import contextlib
|
|
16
16
|
import itertools
|
|
17
17
|
import logging
|
|
18
|
-
import os
|
|
19
18
|
import pathlib
|
|
20
19
|
import sys
|
|
21
20
|
from typing import Deque, Iterator, Optional
|
|
22
21
|
|
|
23
22
|
import click
|
|
24
23
|
|
|
24
|
+
from datahub.configuration.env_vars import get_no_color, get_suppress_logging_manager
|
|
25
25
|
from datahub.utilities.tee_io import TeeIO
|
|
26
26
|
|
|
27
27
|
BASE_LOGGING_FORMAT = (
|
|
@@ -38,7 +38,7 @@ IN_MEMORY_LOG_BUFFER_SIZE = 2000 # lines
|
|
|
38
38
|
IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH = 2000 # characters
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
NO_COLOR =
|
|
41
|
+
NO_COLOR = get_no_color()
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
def extract_name_from_filename(filename: str, fallback_name: str) -> str:
|
|
@@ -179,6 +179,18 @@ class _LogBuffer:
|
|
|
179
179
|
return text
|
|
180
180
|
|
|
181
181
|
|
|
182
|
+
class _ResilientStreamHandler(logging.StreamHandler):
|
|
183
|
+
"""StreamHandler that gracefully handles closed streams."""
|
|
184
|
+
|
|
185
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
186
|
+
try:
|
|
187
|
+
super().emit(record)
|
|
188
|
+
except (ValueError, OSError):
|
|
189
|
+
# Stream was closed (e.g., during pytest teardown)
|
|
190
|
+
# Silently ignore to prevent test failures
|
|
191
|
+
pass
|
|
192
|
+
|
|
193
|
+
|
|
182
194
|
class _BufferLogHandler(logging.Handler):
|
|
183
195
|
def __init__(self, storage: _LogBuffer) -> None:
|
|
184
196
|
super().__init__()
|
|
@@ -201,7 +213,11 @@ class _BufferLogHandler(logging.Handler):
|
|
|
201
213
|
def _remove_all_handlers(logger: logging.Logger) -> None:
|
|
202
214
|
for handler in logger.handlers[:]:
|
|
203
215
|
logger.removeHandler(handler)
|
|
204
|
-
|
|
216
|
+
try:
|
|
217
|
+
handler.close()
|
|
218
|
+
except (ValueError, OSError):
|
|
219
|
+
# Handler stream may already be closed (e.g., during pytest teardown)
|
|
220
|
+
pass
|
|
205
221
|
|
|
206
222
|
|
|
207
223
|
_log_buffer = _LogBuffer(maxlen=IN_MEMORY_LOG_BUFFER_SIZE)
|
|
@@ -219,14 +235,14 @@ _default_formatter = logging.Formatter(BASE_LOGGING_FORMAT)
|
|
|
219
235
|
def configure_logging(debug: bool, log_file: Optional[str] = None) -> Iterator[None]:
|
|
220
236
|
_log_buffer.clear()
|
|
221
237
|
|
|
222
|
-
if
|
|
238
|
+
if get_suppress_logging_manager() == "1":
|
|
223
239
|
# If we're running in pytest, we don't want to configure logging.
|
|
224
240
|
yield
|
|
225
241
|
return
|
|
226
242
|
|
|
227
243
|
with contextlib.ExitStack() as stack:
|
|
228
244
|
# Create stdout handler.
|
|
229
|
-
stream_handler =
|
|
245
|
+
stream_handler = _ResilientStreamHandler()
|
|
230
246
|
stream_handler.addFilter(_DatahubLogFilter(debug=debug))
|
|
231
247
|
stream_handler.setFormatter(_stream_formatter)
|
|
232
248
|
|
|
@@ -237,7 +253,7 @@ def configure_logging(debug: bool, log_file: Optional[str] = None) -> Iterator[N
|
|
|
237
253
|
tee = TeeIO(sys.stdout, file)
|
|
238
254
|
stack.enter_context(contextlib.redirect_stdout(tee)) # type: ignore
|
|
239
255
|
|
|
240
|
-
file_handler =
|
|
256
|
+
file_handler = _ResilientStreamHandler(file)
|
|
241
257
|
file_handler.addFilter(_DatahubLogFilter(debug=True))
|
|
242
258
|
file_handler.setFormatter(_default_formatter)
|
|
243
259
|
else:
|
datahub/utilities/mapping.py
CHANGED
|
@@ -83,7 +83,7 @@ class Constants:
|
|
|
83
83
|
MATCH = "match"
|
|
84
84
|
USER_OWNER = "user"
|
|
85
85
|
GROUP_OWNER = "group"
|
|
86
|
-
OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float]
|
|
86
|
+
OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float, list]
|
|
87
87
|
TAG_PARTITION_KEY = "PARTITION_KEY"
|
|
88
88
|
TAG_DIST_KEY = "DIST_KEY"
|
|
89
89
|
TAG_SORT_KEY = "SORT_KEY"
|
|
@@ -455,7 +455,34 @@ class OperationProcessor:
|
|
|
455
455
|
# function to check if a match clause is satisfied to a value.
|
|
456
456
|
if not any(
|
|
457
457
|
isinstance(raw_props_value, t) for t in Constants.OPERAND_DATATYPE_SUPPORTED
|
|
458
|
-
)
|
|
458
|
+
):
|
|
459
|
+
return None
|
|
460
|
+
|
|
461
|
+
# Handle list values by checking if any item in the list matches
|
|
462
|
+
if isinstance(raw_props_value, list):
|
|
463
|
+
# For lists, we need to find at least one matching item
|
|
464
|
+
# Return a match with the concatenated values of all matching items
|
|
465
|
+
matching_items = []
|
|
466
|
+
for item in raw_props_value:
|
|
467
|
+
if isinstance(item, str):
|
|
468
|
+
match = re.match(match_clause, item)
|
|
469
|
+
if match:
|
|
470
|
+
matching_items.append(item)
|
|
471
|
+
elif isinstance(match_clause, type(item)):
|
|
472
|
+
match = re.match(str(match_clause), str(item))
|
|
473
|
+
if match:
|
|
474
|
+
matching_items.append(str(item))
|
|
475
|
+
|
|
476
|
+
if matching_items:
|
|
477
|
+
# Create a synthetic match object with all matching items joined
|
|
478
|
+
combined_value = ",".join(matching_items)
|
|
479
|
+
return re.match(
|
|
480
|
+
".*", combined_value
|
|
481
|
+
) # Always matches, returns combined value
|
|
482
|
+
return None
|
|
483
|
+
|
|
484
|
+
# Handle scalar values (existing logic)
|
|
485
|
+
elif not isinstance(raw_props_value, type(match_clause)):
|
|
459
486
|
return None
|
|
460
487
|
elif isinstance(raw_props_value, str):
|
|
461
488
|
return re.match(match_clause, raw_props_value)
|
datahub/utilities/sample_data.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import pathlib
|
|
3
2
|
import tempfile
|
|
4
3
|
|
|
5
4
|
import requests
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
6
|
+
from datahub.configuration.env_vars import get_docker_compose_base
|
|
7
|
+
|
|
8
|
+
DOCKER_COMPOSE_BASE = (
|
|
9
|
+
get_docker_compose_base()
|
|
10
|
+
or "https://raw.githubusercontent.com/datahub-project/datahub/master"
|
|
10
11
|
)
|
|
11
12
|
BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json"
|
|
12
13
|
BOOTSTRAP_MCES_URL = f"{DOCKER_COMPOSE_BASE}/{BOOTSTRAP_MCES_FILE}"
|
|
@@ -183,6 +183,14 @@ class RestServiceConfig:
|
|
|
183
183
|
managed_ingestion = self.raw_config.get("managedIngestion") or {}
|
|
184
184
|
return managed_ingestion.get("enabled", False)
|
|
185
185
|
|
|
186
|
+
@property
|
|
187
|
+
def default_cli_version(self) -> Optional[str]:
|
|
188
|
+
"""
|
|
189
|
+
Get the default CLI version.
|
|
190
|
+
"""
|
|
191
|
+
managed_ingestion = self.raw_config.get("managedIngestion") or {}
|
|
192
|
+
return managed_ingestion.get("defaultCliVersion")
|
|
193
|
+
|
|
186
194
|
@property
|
|
187
195
|
def is_datahub_cloud(self) -> bool:
|
|
188
196
|
"""
|
|
@@ -234,7 +242,8 @@ class RestServiceConfig:
|
|
|
234
242
|
|
|
235
243
|
# Check if this is a config-based feature
|
|
236
244
|
if feature in config_based_features:
|
|
237
|
-
|
|
245
|
+
result = config_based_features[feature]()
|
|
246
|
+
return bool(result) if result is not None else False
|
|
238
247
|
|
|
239
248
|
# For environment-based features, determine requirements based on cloud vs. non-cloud
|
|
240
249
|
deployment_type = "cloud" if self.is_datahub_cloud else "core"
|
|
@@ -272,8 +272,11 @@ class SQLAlchemyQueryCombiner:
|
|
|
272
272
|
self.report.uncombined_queries_issued += 1
|
|
273
273
|
return _sa_execute_underlying_method(conn, query, *args, **kwargs)
|
|
274
274
|
|
|
275
|
-
with
|
|
276
|
-
|
|
275
|
+
with (
|
|
276
|
+
_sa_execute_method_patching_lock,
|
|
277
|
+
unittest.mock.patch(
|
|
278
|
+
"sqlalchemy.engine.Connection.execute", _sa_execute_fake
|
|
279
|
+
),
|
|
277
280
|
):
|
|
278
281
|
yield self
|
|
279
282
|
|
datahub/utilities/urns/urn.py
CHANGED
|
@@ -1,8 +1,47 @@
|
|
|
1
|
-
from
|
|
1
|
+
from typing import Optional
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
from datahub.metadata.urns import (
|
|
4
|
+
DataPlatformUrn,
|
|
5
|
+
Urn,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
__all__ = ["Urn", "guess_entity_type", "guess_platform_name"]
|
|
4
9
|
|
|
5
10
|
|
|
6
11
|
def guess_entity_type(urn: str) -> str:
|
|
7
12
|
assert urn.startswith("urn:li:"), "urns must start with urn:li:"
|
|
8
13
|
return urn.split(":")[2]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def guess_platform_name(urn: str) -> Optional[str]:
|
|
17
|
+
"""Extract platform from URN using a mapping dictionary."""
|
|
18
|
+
urn_obj = Urn.from_string(urn)
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
platform = None
|
|
22
|
+
try:
|
|
23
|
+
platform = urn_obj.platform # type: ignore[attr-defined]
|
|
24
|
+
platform_name = DataPlatformUrn.from_string(
|
|
25
|
+
platform
|
|
26
|
+
).get_entity_id_as_string()
|
|
27
|
+
return platform_name
|
|
28
|
+
except AttributeError:
|
|
29
|
+
pass
|
|
30
|
+
try:
|
|
31
|
+
return urn_obj.orchestrator # type: ignore[attr-defined]
|
|
32
|
+
except AttributeError:
|
|
33
|
+
pass
|
|
34
|
+
try:
|
|
35
|
+
return urn_obj.dashboard_tool # type: ignore[attr-defined]
|
|
36
|
+
except AttributeError:
|
|
37
|
+
pass
|
|
38
|
+
try:
|
|
39
|
+
return urn_obj.ml_model_tool # type: ignore[attr-defined]
|
|
40
|
+
except AttributeError:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
if platform is None:
|
|
44
|
+
return None
|
|
45
|
+
except AttributeError:
|
|
46
|
+
pass
|
|
47
|
+
return None
|