acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
|
|
2
2
|
|
|
3
3
|
import functools
|
|
4
|
-
import hashlib
|
|
5
4
|
import logging
|
|
6
5
|
import re
|
|
7
6
|
from typing import Dict, Iterable, Optional, Tuple, Union
|
|
@@ -10,6 +9,8 @@ import sqlglot
|
|
|
10
9
|
import sqlglot.errors
|
|
11
10
|
import sqlglot.optimizer.eliminate_ctes
|
|
12
11
|
|
|
12
|
+
from datahub.sql_parsing.fingerprint_utils import generate_hash
|
|
13
|
+
|
|
13
14
|
assert SQLGLOT_PATCHED
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
@@ -39,9 +40,6 @@ def _get_dialect_str(platform: str) -> str:
|
|
|
39
40
|
# let the fuzzy resolution logic handle it.
|
|
40
41
|
# MariaDB is a fork of MySQL, so we reuse the same dialect.
|
|
41
42
|
return "mysql, normalization_strategy = lowercase"
|
|
42
|
-
# Dremio is based upon drill. Not 100% compatibility
|
|
43
|
-
elif platform == "dremio":
|
|
44
|
-
return "drill"
|
|
45
43
|
else:
|
|
46
44
|
return platform
|
|
47
45
|
|
|
@@ -114,6 +112,8 @@ def _expression_to_string(
|
|
|
114
112
|
return expression.sql(dialect=get_dialect(platform))
|
|
115
113
|
|
|
116
114
|
|
|
115
|
+
PLACEHOLDER_BACKWARD_FINGERPRINT_NORMALIZATION = re.compile(r"(%s|\$\d|\?)")
|
|
116
|
+
|
|
117
117
|
_BASIC_NORMALIZATION_RULES = {
|
|
118
118
|
# Remove /* */ comments.
|
|
119
119
|
re.compile(r"/\*.*?\*/", re.DOTALL): "",
|
|
@@ -129,7 +129,9 @@ _BASIC_NORMALIZATION_RULES = {
|
|
|
129
129
|
re.compile(r"'[^']*'"): "?",
|
|
130
130
|
# Replace sequences of IN/VALUES with a single placeholder.
|
|
131
131
|
# The r" ?" makes it more robust to uneven spacing.
|
|
132
|
-
re.compile(
|
|
132
|
+
re.compile(
|
|
133
|
+
r"\b(IN|VALUES)\s*\( ?(?:%s|\$\d|\?)(?:, ?(?:%s|\$\d|\?))* ?\)", re.IGNORECASE
|
|
134
|
+
): r"\1 (?)",
|
|
133
135
|
# Normalize parenthesis spacing.
|
|
134
136
|
re.compile(r"\( "): "(",
|
|
135
137
|
re.compile(r" \)"): ")",
|
|
@@ -138,6 +140,9 @@ _BASIC_NORMALIZATION_RULES = {
|
|
|
138
140
|
# e.g. "col1,col2" -> "col1, col2"
|
|
139
141
|
re.compile(r"\b ,"): ",",
|
|
140
142
|
re.compile(r"\b,\b"): ", ",
|
|
143
|
+
# MAKE SURE THAT THIS IS AFTER THE ABOVE REPLACEMENT
|
|
144
|
+
# Replace all versions of placeholders with generic ? placeholder.
|
|
145
|
+
PLACEHOLDER_BACKWARD_FINGERPRINT_NORMALIZATION: "?",
|
|
141
146
|
}
|
|
142
147
|
_TABLE_NAME_NORMALIZATION_RULES = {
|
|
143
148
|
# Replace UUID-like strings with a placeholder (both - and _ variants).
|
|
@@ -251,18 +256,20 @@ def generalize_query(expression: sqlglot.exp.ExpOrStr, dialect: DialectOrStr) ->
|
|
|
251
256
|
return expression.transform(_strip_expression, copy=True).sql(dialect=dialect)
|
|
252
257
|
|
|
253
258
|
|
|
254
|
-
def generate_hash(text: str) -> str:
|
|
255
|
-
# Once we move to Python 3.9+, we can set `usedforsecurity=False`.
|
|
256
|
-
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
|
257
|
-
|
|
258
|
-
|
|
259
259
|
def get_query_fingerprint_debug(
|
|
260
|
-
expression: sqlglot.exp.ExpOrStr,
|
|
260
|
+
expression: sqlglot.exp.ExpOrStr,
|
|
261
|
+
platform: DialectOrStr,
|
|
262
|
+
fast: bool = False,
|
|
263
|
+
secondary_id: Optional[str] = None,
|
|
261
264
|
) -> Tuple[str, Optional[str]]:
|
|
262
265
|
try:
|
|
263
266
|
if not fast:
|
|
264
267
|
dialect = get_dialect(platform)
|
|
265
268
|
expression_sql = generalize_query(expression, dialect=dialect)
|
|
269
|
+
# Normalize placeholders for consistent fingerprinting -> this only needs to be backward compatible with earlier sqglot generated generalized queries where the placeholders were always ?
|
|
270
|
+
expression_sql = PLACEHOLDER_BACKWARD_FINGERPRINT_NORMALIZATION.sub(
|
|
271
|
+
"?", expression_sql
|
|
272
|
+
)
|
|
266
273
|
else:
|
|
267
274
|
expression_sql = generalize_query_fast(expression, dialect=platform)
|
|
268
275
|
except (ValueError, sqlglot.errors.SqlglotError) as e:
|
|
@@ -272,16 +279,18 @@ def get_query_fingerprint_debug(
|
|
|
272
279
|
logger.debug("Failed to generalize query for fingerprinting: %s", e)
|
|
273
280
|
expression_sql = None
|
|
274
281
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
)
|
|
282
|
+
text = expression_sql or _expression_to_string(expression, platform=platform)
|
|
283
|
+
if secondary_id:
|
|
284
|
+
text = text + " -- " + secondary_id
|
|
285
|
+
fingerprint = generate_hash(text=text)
|
|
280
286
|
return fingerprint, expression_sql
|
|
281
287
|
|
|
282
288
|
|
|
283
289
|
def get_query_fingerprint(
|
|
284
|
-
expression: sqlglot.exp.ExpOrStr,
|
|
290
|
+
expression: sqlglot.exp.ExpOrStr,
|
|
291
|
+
platform: DialectOrStr,
|
|
292
|
+
fast: bool = False,
|
|
293
|
+
secondary_id: Optional[str] = None,
|
|
285
294
|
) -> str:
|
|
286
295
|
"""Get a fingerprint for a SQL query.
|
|
287
296
|
|
|
@@ -298,12 +307,15 @@ def get_query_fingerprint(
|
|
|
298
307
|
Args:
|
|
299
308
|
expression: The SQL query to fingerprint.
|
|
300
309
|
platform: The SQL dialect to use.
|
|
310
|
+
secondary_id: An optional additional id string to included in the final fingerprint.
|
|
301
311
|
|
|
302
312
|
Returns:
|
|
303
313
|
The fingerprint for the SQL query.
|
|
304
314
|
"""
|
|
305
315
|
|
|
306
|
-
return get_query_fingerprint_debug(
|
|
316
|
+
return get_query_fingerprint_debug(
|
|
317
|
+
expression=expression, platform=platform, fast=fast, secondary_id=secondary_id
|
|
318
|
+
)[0]
|
|
307
319
|
|
|
308
320
|
|
|
309
321
|
@functools.lru_cache(maxsize=FORMAT_QUERY_CACHE_SIZE)
|
|
@@ -13,7 +13,7 @@ from datahub.api.entities.platformresource.platform_resource import (
|
|
|
13
13
|
)
|
|
14
14
|
from datahub.ingestion.api.report import Report
|
|
15
15
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
16
|
-
from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn
|
|
16
|
+
from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn, DataPlatformUrn, Urn
|
|
17
17
|
from datahub.utilities.search_utils import LogicalOperator
|
|
18
18
|
from datahub.utilities.stats_collections import int_top_k_dict
|
|
19
19
|
|
|
@@ -21,6 +21,10 @@ UrnStr = str
|
|
|
21
21
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
23
23
|
|
|
24
|
+
MODE_PLATFORM_URN = DataPlatformUrn.from_string("urn:li:dataPlatform:mode")
|
|
25
|
+
LOOKER_PLATFORM_URN = DataPlatformUrn.from_string("urn:li:dataPlatform:looker")
|
|
26
|
+
HEX_PLATFORM_URN = DataPlatformUrn.from_string("urn:li:dataPlatform:hex")
|
|
27
|
+
|
|
24
28
|
|
|
25
29
|
class QueryLog(Protocol):
|
|
26
30
|
"""Represents Query Log Entry
|
|
@@ -30,6 +34,7 @@ class QueryLog(Protocol):
|
|
|
30
34
|
query_text: str
|
|
31
35
|
user: Optional[Union[CorpUserUrn, CorpGroupUrn]]
|
|
32
36
|
extra_info: Optional[dict]
|
|
37
|
+
origin: Optional[Urn]
|
|
33
38
|
|
|
34
39
|
|
|
35
40
|
def _get_last_line(query: str) -> str:
|
|
@@ -67,6 +72,10 @@ class ToolMetaExtractor:
|
|
|
67
72
|
"looker",
|
|
68
73
|
self._extract_looker_query,
|
|
69
74
|
),
|
|
75
|
+
(
|
|
76
|
+
"hex",
|
|
77
|
+
self._extract_hex_query,
|
|
78
|
+
),
|
|
70
79
|
]
|
|
71
80
|
# maps user id (as string) to email address
|
|
72
81
|
self.looker_user_mapping = looker_user_mapping
|
|
@@ -153,7 +162,7 @@ class ToolMetaExtractor:
|
|
|
153
162
|
entry.extra_info = entry.extra_info or {}
|
|
154
163
|
entry.extra_info["user_via"] = original_user
|
|
155
164
|
|
|
156
|
-
|
|
165
|
+
entry.origin = MODE_PLATFORM_URN
|
|
157
166
|
|
|
158
167
|
return True
|
|
159
168
|
|
|
@@ -190,6 +199,20 @@ class ToolMetaExtractor:
|
|
|
190
199
|
entry.extra_info = entry.extra_info or {}
|
|
191
200
|
entry.extra_info["user_via"] = original_user
|
|
192
201
|
|
|
202
|
+
entry.origin = LOOKER_PLATFORM_URN
|
|
203
|
+
|
|
204
|
+
return True
|
|
205
|
+
|
|
206
|
+
def _extract_hex_query(self, entry: QueryLog) -> bool:
|
|
207
|
+
"""
|
|
208
|
+
Returns:
|
|
209
|
+
bool: whether QueryLog entry is that of hex.
|
|
210
|
+
"""
|
|
211
|
+
if "-- Hex query metadata:" not in entry.query_text:
|
|
212
|
+
return False
|
|
213
|
+
|
|
214
|
+
entry.origin = HEX_PLATFORM_URN
|
|
215
|
+
|
|
193
216
|
return True
|
|
194
217
|
|
|
195
218
|
def extract_bi_metadata(self, entry: QueryLog) -> bool:
|
datahub/telemetry/telemetry.py
CHANGED
|
@@ -16,6 +16,11 @@ from datahub._version import __version__, nice_version_name
|
|
|
16
16
|
from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER
|
|
17
17
|
from datahub.cli.env_utils import get_boolean_env_variable
|
|
18
18
|
from datahub.configuration.common import ExceptionWithProps
|
|
19
|
+
from datahub.configuration.env_vars import (
|
|
20
|
+
get_sentry_dsn,
|
|
21
|
+
get_sentry_environment,
|
|
22
|
+
get_telemetry_timeout,
|
|
23
|
+
)
|
|
19
24
|
from datahub.metadata.schema_classes import _custom_package_path
|
|
20
25
|
from datahub.utilities.perf_timer import PerfTimer
|
|
21
26
|
|
|
@@ -97,14 +102,14 @@ if any(var in os.environ for var in CI_ENV_VARS):
|
|
|
97
102
|
if _custom_package_path:
|
|
98
103
|
ENV_ENABLED = False
|
|
99
104
|
|
|
100
|
-
TIMEOUT = int(
|
|
105
|
+
TIMEOUT = int(get_telemetry_timeout())
|
|
101
106
|
MIXPANEL_ENDPOINT = "track.datahubproject.io/mp"
|
|
102
107
|
MIXPANEL_TOKEN = "5ee83d940754d63cacbf7d34daa6f44a"
|
|
103
|
-
SENTRY_DSN: Optional[str] =
|
|
104
|
-
SENTRY_ENVIRONMENT: str =
|
|
108
|
+
SENTRY_DSN: Optional[str] = get_sentry_dsn()
|
|
109
|
+
SENTRY_ENVIRONMENT: str = get_sentry_environment()
|
|
105
110
|
|
|
106
111
|
|
|
107
|
-
def
|
|
112
|
+
def _default_global_properties() -> Dict[str, Any]:
|
|
108
113
|
return {
|
|
109
114
|
"datahub_version": nice_version_name(),
|
|
110
115
|
"python_version": platform.python_version(),
|
|
@@ -122,6 +127,7 @@ class Telemetry:
|
|
|
122
127
|
context_properties: Dict[str, Any] = {}
|
|
123
128
|
|
|
124
129
|
def __init__(self):
|
|
130
|
+
self.global_properties = _default_global_properties()
|
|
125
131
|
self.context_properties = {}
|
|
126
132
|
|
|
127
133
|
if SENTRY_DSN:
|
|
@@ -247,6 +253,10 @@ class Telemetry:
|
|
|
247
253
|
|
|
248
254
|
return False
|
|
249
255
|
|
|
256
|
+
def add_global_property(self, key: str, value: Any) -> None:
|
|
257
|
+
self.global_properties[key] = value
|
|
258
|
+
self._update_sentry_properties()
|
|
259
|
+
|
|
250
260
|
def set_context(
|
|
251
261
|
self,
|
|
252
262
|
server: Optional["DataHubGraph"] = None,
|
|
@@ -257,16 +267,20 @@ class Telemetry:
|
|
|
257
267
|
**(properties or {}),
|
|
258
268
|
}
|
|
259
269
|
|
|
260
|
-
|
|
261
|
-
from sentry_sdk import set_tag
|
|
270
|
+
self._update_sentry_properties()
|
|
262
271
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
272
|
+
def _update_sentry_properties(self) -> None:
|
|
273
|
+
properties = {
|
|
274
|
+
**self.global_properties,
|
|
275
|
+
**self.context_properties,
|
|
276
|
+
}
|
|
277
|
+
if self.sentry_enabled:
|
|
278
|
+
import sentry_sdk
|
|
267
279
|
|
|
268
|
-
|
|
269
|
-
|
|
280
|
+
# Note: once we're on sentry-sdk 2.1.0+, we can use sentry_sdk.set_tags(properties)
|
|
281
|
+
# See https://github.com/getsentry/sentry-python/commit/6c960d752c7c7aff3fd7469d2e9ad98f19663aa8
|
|
282
|
+
for key, value in properties.items():
|
|
283
|
+
sentry_sdk.set_tag(key, value)
|
|
270
284
|
|
|
271
285
|
def init_capture_exception(self) -> None:
|
|
272
286
|
if self.sentry_enabled:
|
|
@@ -300,7 +314,7 @@ class Telemetry:
|
|
|
300
314
|
try:
|
|
301
315
|
self.mp.people_set(
|
|
302
316
|
self.client_id,
|
|
303
|
-
|
|
317
|
+
self.global_properties,
|
|
304
318
|
)
|
|
305
319
|
except Exception as e:
|
|
306
320
|
logger.debug(f"Error initializing telemetry: {e}")
|
|
@@ -334,7 +348,7 @@ class Telemetry:
|
|
|
334
348
|
logger.debug(f"Sending telemetry for {event_name}")
|
|
335
349
|
|
|
336
350
|
properties = {
|
|
337
|
-
**
|
|
351
|
+
**self.global_properties,
|
|
338
352
|
**self.context_properties,
|
|
339
353
|
**properties,
|
|
340
354
|
}
|
|
@@ -352,10 +366,10 @@ class Telemetry:
|
|
|
352
366
|
}
|
|
353
367
|
else:
|
|
354
368
|
return {
|
|
355
|
-
"server_type": server.server_config.get("datahub", {}).get(
|
|
369
|
+
"server_type": server.server_config.raw_config.get("datahub", {}).get(
|
|
356
370
|
"serverType", "missing"
|
|
357
371
|
),
|
|
358
|
-
"server_version": server.server_config.get("versions", {})
|
|
372
|
+
"server_version": server.server_config.raw_config.get("versions", {})
|
|
359
373
|
.get("acryldata/datahub", {})
|
|
360
374
|
.get("version", "missing"),
|
|
361
375
|
"server_id": server.server_id or "missing",
|
datahub/testing/check_imports.py
CHANGED
|
@@ -9,7 +9,7 @@ def ensure_no_indirect_model_imports(dirs: List[pathlib.Path]) -> None:
|
|
|
9
9
|
# If our needs become more complex, we should move to a proper linter.
|
|
10
10
|
denied_imports = {
|
|
11
11
|
"src.": "datahub.*",
|
|
12
|
-
"datahub.metadata.
|
|
12
|
+
"datahub.metadata._internal_schema_classes": "datahub.metadata.schema_classes",
|
|
13
13
|
"datahub.metadata._urns": "datahub.metadata.urns",
|
|
14
14
|
}
|
|
15
15
|
ignored_files = {
|
datahub/testing/docker_utils.py
CHANGED
|
@@ -48,16 +48,22 @@ def wait_for_port(
|
|
|
48
48
|
subprocess.run(f"docker logs {container_name}", shell=True, check=True)
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
DOCKER_DEFAULT_UNLIMITED_PARALLELISM = -1
|
|
52
|
+
|
|
53
|
+
|
|
51
54
|
@pytest.fixture(scope="module")
|
|
52
55
|
def docker_compose_runner(
|
|
53
56
|
docker_compose_command, docker_compose_project_name, docker_setup, docker_cleanup
|
|
54
57
|
):
|
|
55
58
|
@contextlib.contextmanager
|
|
56
59
|
def run(
|
|
57
|
-
compose_file_path: Union[str, List[str]],
|
|
60
|
+
compose_file_path: Union[str, List[str]],
|
|
61
|
+
key: str,
|
|
62
|
+
cleanup: bool = True,
|
|
63
|
+
parallel: int = DOCKER_DEFAULT_UNLIMITED_PARALLELISM,
|
|
58
64
|
) -> Iterator[pytest_docker.plugin.Services]:
|
|
59
65
|
with pytest_docker.plugin.get_docker_services(
|
|
60
|
-
docker_compose_command=docker_compose_command,
|
|
66
|
+
docker_compose_command=f"{docker_compose_command} --parallel {parallel}",
|
|
61
67
|
# We can remove the type ignore once this is merged:
|
|
62
68
|
# https://github.com/avast/pytest-docker/pull/108
|
|
63
69
|
docker_compose_file=compose_file_path, # type: ignore
|