acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,23 +1,311 @@
|
|
|
1
|
-
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import (
|
|
6
|
+
Any,
|
|
7
|
+
Dict,
|
|
8
|
+
Optional,
|
|
9
|
+
Tuple,
|
|
10
|
+
Union,
|
|
11
|
+
)
|
|
2
12
|
|
|
3
13
|
from datahub.telemetry.telemetry import suppress_telemetry
|
|
4
14
|
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
5
17
|
# Only to be written to for logging server related information
|
|
6
18
|
global_debug: Dict[str, Any] = {}
|
|
7
19
|
|
|
8
20
|
|
|
9
|
-
def
|
|
21
|
+
def get_gms_config() -> Dict:
|
|
22
|
+
return global_debug.get("gms_config", {})
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ServiceFeature(Enum):
|
|
26
|
+
"""
|
|
27
|
+
Enum representing supported features in the REST service.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
OPEN_API_SDK = "openapi_sdk"
|
|
31
|
+
API_TRACING = "api_tracing"
|
|
32
|
+
NO_CODE = "no_code"
|
|
33
|
+
STATEFUL_INGESTION = "stateful_ingestion"
|
|
34
|
+
IMPACT_ANALYSIS = "impact_analysis"
|
|
35
|
+
PATCH_CAPABLE = "patch_capable"
|
|
36
|
+
CLI_TELEMETRY = "cli_telemetry"
|
|
37
|
+
DATAHUB_CLOUD = "datahub_cloud"
|
|
38
|
+
# Add more features as needed
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
_REQUIRED_VERSION_OPENAPI_TRACING = {
|
|
42
|
+
"cloud": (0, 3, 11, 0),
|
|
43
|
+
"core": (1, 0, 1, 0),
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class RestServiceConfig:
|
|
49
|
+
"""
|
|
50
|
+
A class to represent REST service configuration with semantic version parsing capabilities.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
raw_config: Dict[str, Any] = field(default_factory=dict)
|
|
54
|
+
_version_cache: Optional[Tuple[int, int, int, int]] = None
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def commit_hash(self) -> Optional[str]:
|
|
58
|
+
"""
|
|
59
|
+
Get the commit hash for the current version.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
The commit hash or None if not found
|
|
63
|
+
"""
|
|
64
|
+
versions = self.raw_config.get("versions") or {}
|
|
65
|
+
datahub_info = versions.get("acryldata/datahub") or {}
|
|
66
|
+
return datahub_info.get("commit")
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def server_type(self) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Get the server type.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
The server type or "unknown" if not found
|
|
75
|
+
"""
|
|
76
|
+
datahub = self.raw_config.get("datahub") or {}
|
|
77
|
+
return datahub.get("serverType", "unknown")
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def service_version(self) -> Optional[str]:
|
|
81
|
+
"""
|
|
82
|
+
Get the raw service version string.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
The version string or None if not found
|
|
86
|
+
"""
|
|
87
|
+
versions = self.raw_config.get("versions") or {}
|
|
88
|
+
datahub_info = versions.get("acryldata/datahub") or {}
|
|
89
|
+
return datahub_info.get("version")
|
|
90
|
+
|
|
91
|
+
def _parse_version(
|
|
92
|
+
self, version_str: Optional[str] = None
|
|
93
|
+
) -> Tuple[int, int, int, int]:
|
|
94
|
+
"""
|
|
95
|
+
Parse a semantic version string into its components, ignoring rc and suffixes.
|
|
96
|
+
Supports standard three-part versions (1.0.0) and four-part versions (1.0.0.1).
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
version_str: Version string to parse. If None, uses the service version.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Tuple of (major, minor, patch, build) version numbers where build is 0 for three-part versions
|
|
103
|
+
|
|
104
|
+
Raises:
|
|
105
|
+
ValueError: If the version string cannot be parsed
|
|
106
|
+
"""
|
|
107
|
+
if version_str is None:
|
|
108
|
+
version_str = self.service_version
|
|
109
|
+
|
|
110
|
+
if not version_str:
|
|
111
|
+
return (0, 0, 0, 0)
|
|
112
|
+
|
|
113
|
+
# Remove 'v' prefix if present
|
|
114
|
+
if version_str.startswith("v"):
|
|
115
|
+
version_str = version_str[1:]
|
|
116
|
+
|
|
117
|
+
# Extract the semantic version part (before any rc or suffix)
|
|
118
|
+
# This pattern will match both three-part (1.0.0) and four-part (1.0.0.1) versions
|
|
119
|
+
match = re.match(r"(\d+)\.(\d+)\.(\d+)(?:\.(\d+))?(?:rc\d+|-.*)?", version_str)
|
|
120
|
+
if not match:
|
|
121
|
+
raise ValueError(f"Invalid version format: {version_str}")
|
|
122
|
+
|
|
123
|
+
major = int(match.group(1))
|
|
124
|
+
minor = int(match.group(2))
|
|
125
|
+
patch = int(match.group(3))
|
|
126
|
+
build = (
|
|
127
|
+
int(match.group(4)) if match.group(4) else 0
|
|
128
|
+
) # Default to 0 if not present
|
|
129
|
+
|
|
130
|
+
return (major, minor, patch, build)
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def parsed_version(self) -> Optional[Tuple[int, int, int, int]]:
|
|
134
|
+
"""
|
|
135
|
+
Get the parsed semantic version of the service.
|
|
136
|
+
Uses caching for efficiency.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Tuple of (major, minor, patch) version numbers
|
|
140
|
+
"""
|
|
141
|
+
if self._version_cache is None:
|
|
142
|
+
self._version_cache = self._parse_version()
|
|
143
|
+
return self._version_cache
|
|
144
|
+
|
|
145
|
+
def is_version_at_least(
|
|
146
|
+
self, major: int, minor: int = 0, patch: int = 0, build: int = 0
|
|
147
|
+
) -> bool:
|
|
148
|
+
"""
|
|
149
|
+
Check if the service version is at least the specified version.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
major: Major version to check against
|
|
153
|
+
minor: Minor version to check against
|
|
154
|
+
patch: Patch version to check against
|
|
155
|
+
build: Build version to check against (for four-part versions)
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
True if the service version is at least the specified version
|
|
159
|
+
"""
|
|
160
|
+
current_version = self.parsed_version or (0, 0, 0, 0)
|
|
161
|
+
requested_version = (major, minor, patch, build)
|
|
162
|
+
|
|
163
|
+
return current_version >= requested_version
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def is_no_code_enabled(self) -> bool:
|
|
167
|
+
"""
|
|
168
|
+
Check if noCode is enabled.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
True if noCode is set to "true"
|
|
172
|
+
"""
|
|
173
|
+
return self.raw_config.get("noCode") == "true"
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def is_managed_ingestion_enabled(self) -> bool:
|
|
177
|
+
"""
|
|
178
|
+
Check if managedIngestion is enabled.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
True if managedIngestion.enabled is True
|
|
182
|
+
"""
|
|
183
|
+
managed_ingestion = self.raw_config.get("managedIngestion") or {}
|
|
184
|
+
return managed_ingestion.get("enabled", False)
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def default_cli_version(self) -> Optional[str]:
|
|
188
|
+
"""
|
|
189
|
+
Get the default CLI version.
|
|
190
|
+
"""
|
|
191
|
+
managed_ingestion = self.raw_config.get("managedIngestion") or {}
|
|
192
|
+
return managed_ingestion.get("defaultCliVersion")
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def is_datahub_cloud(self) -> bool:
|
|
196
|
+
"""
|
|
197
|
+
Check if DataHub Cloud is enabled.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
True if the server environment is not 'core'
|
|
201
|
+
"""
|
|
202
|
+
datahub_config = self.raw_config.get("datahub") or {}
|
|
203
|
+
server_env = datahub_config.get("serverEnv")
|
|
204
|
+
|
|
205
|
+
# Return False if serverEnv is None or empty string
|
|
206
|
+
if not server_env:
|
|
207
|
+
return False
|
|
208
|
+
|
|
209
|
+
return server_env != "core"
|
|
210
|
+
|
|
211
|
+
def supports_feature(self, feature: ServiceFeature) -> bool:
|
|
212
|
+
"""
|
|
213
|
+
Determines whether a specific feature is supported based on service version
|
|
214
|
+
and whether this is a cloud deployment or not.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
feature: Feature enum value to check
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Boolean indicating whether the feature is supported
|
|
221
|
+
"""
|
|
222
|
+
# Special handling for features that rely on config flags
|
|
223
|
+
config_based_features = {
|
|
224
|
+
ServiceFeature.NO_CODE: lambda: self.is_no_code_enabled,
|
|
225
|
+
ServiceFeature.STATEFUL_INGESTION: lambda: self.raw_config.get(
|
|
226
|
+
"statefulIngestionCapable", False
|
|
227
|
+
)
|
|
228
|
+
is True,
|
|
229
|
+
ServiceFeature.IMPACT_ANALYSIS: lambda: self.raw_config.get(
|
|
230
|
+
"supportsImpactAnalysis", False
|
|
231
|
+
)
|
|
232
|
+
is True,
|
|
233
|
+
ServiceFeature.PATCH_CAPABLE: lambda: self.raw_config.get(
|
|
234
|
+
"patchCapable", False
|
|
235
|
+
)
|
|
236
|
+
is True,
|
|
237
|
+
ServiceFeature.CLI_TELEMETRY: lambda: (
|
|
238
|
+
self.raw_config.get("telemetry") or {}
|
|
239
|
+
).get("enabledCli", None),
|
|
240
|
+
ServiceFeature.DATAHUB_CLOUD: lambda: self.is_datahub_cloud,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
# Check if this is a config-based feature
|
|
244
|
+
if feature in config_based_features:
|
|
245
|
+
result = config_based_features[feature]()
|
|
246
|
+
return bool(result) if result is not None else False
|
|
247
|
+
|
|
248
|
+
# For environment-based features, determine requirements based on cloud vs. non-cloud
|
|
249
|
+
deployment_type = "cloud" if self.is_datahub_cloud else "core"
|
|
250
|
+
|
|
251
|
+
# Define feature requirements
|
|
252
|
+
feature_requirements = {
|
|
253
|
+
ServiceFeature.OPEN_API_SDK: _REQUIRED_VERSION_OPENAPI_TRACING,
|
|
254
|
+
ServiceFeature.API_TRACING: _REQUIRED_VERSION_OPENAPI_TRACING,
|
|
255
|
+
# Additional features can be defined here
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
# Check if the feature exists in our requirements dictionary
|
|
259
|
+
if feature not in feature_requirements:
|
|
260
|
+
# Unknown feature, assume not supported
|
|
261
|
+
return False
|
|
262
|
+
|
|
263
|
+
# Get version requirements for this feature and deployment type
|
|
264
|
+
feature_reqs = feature_requirements[feature]
|
|
265
|
+
requirements = feature_reqs.get(deployment_type)
|
|
266
|
+
|
|
267
|
+
if not requirements:
|
|
268
|
+
# If no specific requirements defined for this deployment type,
|
|
269
|
+
# assume feature is not supported
|
|
270
|
+
return False
|
|
271
|
+
|
|
272
|
+
# Check if the current version meets the requirements
|
|
273
|
+
req_major, req_minor, req_patch, req_build = requirements
|
|
274
|
+
return self.is_version_at_least(req_major, req_minor, req_patch, req_build)
|
|
275
|
+
|
|
276
|
+
def __str__(self) -> str:
|
|
277
|
+
"""
|
|
278
|
+
Return a string representation of the configuration as JSON.
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
A string representation of the configuration dictionary
|
|
282
|
+
"""
|
|
283
|
+
return str(self.raw_config)
|
|
284
|
+
|
|
285
|
+
def __repr__(self) -> str:
|
|
286
|
+
"""
|
|
287
|
+
Return a representation of the object that can be used to recreate it.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
A string representation that can be used with pprint
|
|
291
|
+
"""
|
|
292
|
+
return str(self.raw_config)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def set_gms_config(config: Union[Dict[str, Any], RestServiceConfig]) -> None:
|
|
10
296
|
global_debug["gms_config"] = config
|
|
11
297
|
|
|
12
|
-
|
|
13
|
-
|
|
298
|
+
config_obj = (
|
|
299
|
+
config
|
|
300
|
+
if isinstance(config, RestServiceConfig)
|
|
301
|
+
else RestServiceConfig(raw_config=config)
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
cli_telemetry_enabled = is_cli_telemetry_enabled(config_obj)
|
|
305
|
+
if cli_telemetry_enabled is not None and not cli_telemetry_enabled:
|
|
14
306
|
# server requires telemetry to be disabled on client
|
|
15
307
|
suppress_telemetry()
|
|
16
308
|
|
|
17
309
|
|
|
18
|
-
def
|
|
19
|
-
return
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def is_cli_telemetry_enabled() -> Optional[bool]:
|
|
23
|
-
return get_gms_config().get("telemetry", {}).get("enabledCli", None)
|
|
310
|
+
def is_cli_telemetry_enabled(config: RestServiceConfig) -> bool:
|
|
311
|
+
return config.supports_feature(ServiceFeature.CLI_TELEMETRY)
|
|
@@ -272,11 +272,13 @@ class SQLAlchemyQueryCombiner:
|
|
|
272
272
|
self.report.uncombined_queries_issued += 1
|
|
273
273
|
return _sa_execute_underlying_method(conn, query, *args, **kwargs)
|
|
274
274
|
|
|
275
|
-
with
|
|
276
|
-
|
|
275
|
+
with (
|
|
276
|
+
_sa_execute_method_patching_lock,
|
|
277
|
+
unittest.mock.patch(
|
|
277
278
|
"sqlalchemy.engine.Connection.execute", _sa_execute_fake
|
|
278
|
-
)
|
|
279
|
-
|
|
279
|
+
),
|
|
280
|
+
):
|
|
281
|
+
yield self
|
|
280
282
|
|
|
281
283
|
def run(self, method: Callable[[], None]) -> None:
|
|
282
284
|
"""
|
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
import concurrent.futures
|
|
2
2
|
import contextlib
|
|
3
3
|
import queue
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import (
|
|
5
|
+
Any,
|
|
6
|
+
Callable,
|
|
7
|
+
Iterable,
|
|
8
|
+
Iterator,
|
|
9
|
+
Optional,
|
|
10
|
+
Tuple,
|
|
11
|
+
TypeVar,
|
|
12
|
+
)
|
|
5
13
|
|
|
6
14
|
T = TypeVar("T")
|
|
7
15
|
|
|
@@ -18,8 +26,13 @@ class ThreadedIteratorExecutor:
|
|
|
18
26
|
worker_func: Callable[..., Iterable[T]],
|
|
19
27
|
args_list: Iterable[Tuple[Any, ...]],
|
|
20
28
|
max_workers: int,
|
|
21
|
-
|
|
22
|
-
|
|
29
|
+
max_backpressure: Optional[int] = None,
|
|
30
|
+
) -> Iterator[T]:
|
|
31
|
+
if max_backpressure is None:
|
|
32
|
+
max_backpressure = 10 * max_workers
|
|
33
|
+
assert max_backpressure >= max_workers
|
|
34
|
+
|
|
35
|
+
out_q: queue.Queue[T] = queue.Queue(maxsize=max_backpressure)
|
|
23
36
|
|
|
24
37
|
def _worker_wrapper(
|
|
25
38
|
worker_func: Callable[..., Iterable[T]], *args: Any
|
datahub/utilities/urn_encoder.py
CHANGED
|
@@ -4,7 +4,7 @@ from typing import List
|
|
|
4
4
|
# NOTE: Frontend relies on encoding these three characters. Specifically, we decode and encode schema fields for column level lineage.
|
|
5
5
|
# If this changes, make appropriate changes to datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
|
|
6
6
|
# We also rely on encoding these exact three characters when generating schemaField urns in our graphQL layer. Update SchemaFieldUtils if this changes.
|
|
7
|
-
# Also see https://
|
|
7
|
+
# Also see https://docs.datahub.com/docs/what/urn/#restrictions
|
|
8
8
|
RESERVED_CHARS = {",", "(", ")", "␟"}
|
|
9
9
|
RESERVED_CHARS_EXTENDED = RESERVED_CHARS.union({"%"})
|
|
10
10
|
|
datahub/utilities/urns/urn.py
CHANGED
|
@@ -1,8 +1,47 @@
|
|
|
1
|
-
from
|
|
1
|
+
from typing import Optional
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
from datahub.metadata.urns import (
|
|
4
|
+
DataPlatformUrn,
|
|
5
|
+
Urn,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
__all__ = ["Urn", "guess_entity_type", "guess_platform_name"]
|
|
4
9
|
|
|
5
10
|
|
|
6
11
|
def guess_entity_type(urn: str) -> str:
|
|
7
12
|
assert urn.startswith("urn:li:"), "urns must start with urn:li:"
|
|
8
13
|
return urn.split(":")[2]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def guess_platform_name(urn: str) -> Optional[str]:
|
|
17
|
+
"""Extract platform from URN using a mapping dictionary."""
|
|
18
|
+
urn_obj = Urn.from_string(urn)
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
platform = None
|
|
22
|
+
try:
|
|
23
|
+
platform = urn_obj.platform # type: ignore[attr-defined]
|
|
24
|
+
platform_name = DataPlatformUrn.from_string(
|
|
25
|
+
platform
|
|
26
|
+
).get_entity_id_as_string()
|
|
27
|
+
return platform_name
|
|
28
|
+
except AttributeError:
|
|
29
|
+
pass
|
|
30
|
+
try:
|
|
31
|
+
return urn_obj.orchestrator # type: ignore[attr-defined]
|
|
32
|
+
except AttributeError:
|
|
33
|
+
pass
|
|
34
|
+
try:
|
|
35
|
+
return urn_obj.dashboard_tool # type: ignore[attr-defined]
|
|
36
|
+
except AttributeError:
|
|
37
|
+
pass
|
|
38
|
+
try:
|
|
39
|
+
return urn_obj.ml_model_tool # type: ignore[attr-defined]
|
|
40
|
+
except AttributeError:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
if platform is None:
|
|
44
|
+
return None
|
|
45
|
+
except AttributeError:
|
|
46
|
+
pass
|
|
47
|
+
return None
|