acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/cli/specific/user_cli.py
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import pathlib
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
4
5
|
|
|
5
6
|
import click
|
|
6
7
|
from click_default_group import DefaultGroup
|
|
7
8
|
|
|
8
9
|
from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
|
|
9
10
|
from datahub.cli.specific.file_loader import load_file
|
|
10
|
-
from datahub.
|
|
11
|
+
from datahub.configuration.common import OperationalError
|
|
12
|
+
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
11
13
|
from datahub.ingestion.graph.config import ClientMode
|
|
12
|
-
from datahub.telemetry import telemetry
|
|
13
14
|
from datahub.upgrade import upgrade
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
@@ -33,7 +34,6 @@ def user() -> None:
|
|
|
33
34
|
help="Use this flag to overwrite the information that is set via the UI",
|
|
34
35
|
)
|
|
35
36
|
@upgrade.check_upgrade
|
|
36
|
-
@telemetry.with_telemetry()
|
|
37
37
|
def upsert(file: Path, override_editable: bool) -> None:
|
|
38
38
|
"""Create or Update a User in DataHub"""
|
|
39
39
|
|
|
@@ -57,3 +57,172 @@ def upsert(file: Path, override_editable: bool) -> None:
|
|
|
57
57
|
f"Update failed for id {user_config.get('id')}. due to {e}",
|
|
58
58
|
fg="red",
|
|
59
59
|
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def validate_user_id_options(
|
|
63
|
+
user_id: Optional[str], email_as_id: bool, email: str
|
|
64
|
+
) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Validate user ID options and return the final user ID to use.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
user_id: Optional explicit user ID
|
|
70
|
+
email_as_id: Whether to use email as the user ID
|
|
71
|
+
email: User's email address
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
The final user ID to use for the URN
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
ValueError: If validation fails (neither or both options provided)
|
|
78
|
+
"""
|
|
79
|
+
if not user_id and not email_as_id:
|
|
80
|
+
raise ValueError("Must specify either --id or --email-as-id flag")
|
|
81
|
+
|
|
82
|
+
if user_id and email_as_id:
|
|
83
|
+
raise ValueError("Cannot specify both --id and --email-as-id flag")
|
|
84
|
+
|
|
85
|
+
if email_as_id:
|
|
86
|
+
return email
|
|
87
|
+
|
|
88
|
+
assert user_id is not None
|
|
89
|
+
return user_id
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def create_native_user_in_datahub(
|
|
93
|
+
graph: DataHubGraph,
|
|
94
|
+
user_id: str,
|
|
95
|
+
email: str,
|
|
96
|
+
display_name: str,
|
|
97
|
+
password: str,
|
|
98
|
+
role: Optional[str] = None,
|
|
99
|
+
) -> str:
|
|
100
|
+
"""
|
|
101
|
+
Create a native DataHub user.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
graph: DataHubGraph client
|
|
105
|
+
user_id: User identifier (used in URN)
|
|
106
|
+
email: User's email address
|
|
107
|
+
display_name: User's full display name
|
|
108
|
+
password: User's password
|
|
109
|
+
role: Optional role to assign (Admin, Editor, or Reader)
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
The created user's URN
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
ValueError: If user already exists or role is invalid
|
|
116
|
+
OperationalError: If user creation fails due to API/network errors
|
|
117
|
+
"""
|
|
118
|
+
user_urn = f"urn:li:corpuser:{user_id}"
|
|
119
|
+
|
|
120
|
+
if graph.exists(user_urn):
|
|
121
|
+
raise ValueError(f"User with ID {user_id} already exists (urn: {user_urn})")
|
|
122
|
+
|
|
123
|
+
created_user_urn = graph.create_native_user(
|
|
124
|
+
user_id=user_id,
|
|
125
|
+
email=email,
|
|
126
|
+
display_name=display_name,
|
|
127
|
+
password=password,
|
|
128
|
+
role=role,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return created_user_urn
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@user.command(name="add")
|
|
135
|
+
@click.option("--id", "user_id", type=str, help="User identifier (used in URN)")
|
|
136
|
+
@click.option("--email", required=True, type=str, help="User's email address")
|
|
137
|
+
@click.option(
|
|
138
|
+
"--email-as-id",
|
|
139
|
+
is_flag=True,
|
|
140
|
+
default=False,
|
|
141
|
+
help="Use email address as user ID (alternative to --id)",
|
|
142
|
+
)
|
|
143
|
+
@click.option(
|
|
144
|
+
"--display-name", required=True, type=str, help="User's full display name"
|
|
145
|
+
)
|
|
146
|
+
@click.option(
|
|
147
|
+
"--password",
|
|
148
|
+
is_flag=True,
|
|
149
|
+
default=False,
|
|
150
|
+
help="Prompt for password (hidden input)",
|
|
151
|
+
)
|
|
152
|
+
@click.option(
|
|
153
|
+
"--role",
|
|
154
|
+
required=False,
|
|
155
|
+
type=click.Choice(
|
|
156
|
+
["Admin", "Editor", "Reader", "admin", "editor", "reader"], case_sensitive=False
|
|
157
|
+
),
|
|
158
|
+
help="Optional role to assign (Admin, Editor, or Reader)",
|
|
159
|
+
)
|
|
160
|
+
@upgrade.check_upgrade
|
|
161
|
+
def add(
|
|
162
|
+
user_id: str,
|
|
163
|
+
email: str,
|
|
164
|
+
email_as_id: bool,
|
|
165
|
+
display_name: str,
|
|
166
|
+
password: bool,
|
|
167
|
+
role: str,
|
|
168
|
+
) -> None:
|
|
169
|
+
"""Create a native DataHub user with email/password authentication"""
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
final_user_id = validate_user_id_options(user_id, email_as_id, email)
|
|
173
|
+
except ValueError as e:
|
|
174
|
+
click.secho(f"Error: {str(e)}", fg="red")
|
|
175
|
+
raise SystemExit(1) from e
|
|
176
|
+
|
|
177
|
+
if not password:
|
|
178
|
+
click.secho(
|
|
179
|
+
"Error: --password flag is required to prompt for password input",
|
|
180
|
+
fg="red",
|
|
181
|
+
)
|
|
182
|
+
raise SystemExit(1)
|
|
183
|
+
|
|
184
|
+
password_value = click.prompt(
|
|
185
|
+
"Enter password", hide_input=True, confirmation_prompt=True
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
189
|
+
try:
|
|
190
|
+
created_user_urn = create_native_user_in_datahub(
|
|
191
|
+
graph, final_user_id, email, display_name, password_value, role
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
if role:
|
|
195
|
+
click.secho(
|
|
196
|
+
f"Successfully created user {final_user_id} with role {role.capitalize()} (URN: {created_user_urn})",
|
|
197
|
+
fg="green",
|
|
198
|
+
)
|
|
199
|
+
else:
|
|
200
|
+
click.secho(
|
|
201
|
+
f"Successfully created user {final_user_id} (URN: {created_user_urn})",
|
|
202
|
+
fg="green",
|
|
203
|
+
)
|
|
204
|
+
except ValueError as e:
|
|
205
|
+
click.secho(f"Error: {str(e)}", fg="red")
|
|
206
|
+
raise SystemExit(1) from e
|
|
207
|
+
except OperationalError as e:
|
|
208
|
+
error_msg = e.message if hasattr(e, "message") else str(e.args[0])
|
|
209
|
+
click.secho(f"Error: {error_msg}", fg="red")
|
|
210
|
+
|
|
211
|
+
if hasattr(e, "info") and e.info:
|
|
212
|
+
logger.debug(f"Error details: {e.info}")
|
|
213
|
+
if "status_code" in e.info:
|
|
214
|
+
click.secho(f" HTTP Status: {e.info['status_code']}", fg="red")
|
|
215
|
+
if "response_text" in e.info:
|
|
216
|
+
click.secho(
|
|
217
|
+
f" Response: {e.info['response_text'][:200]}", fg="red"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
click.secho(
|
|
221
|
+
"\nTip: Run with DATAHUB_DEBUG=1 environment variable for detailed logs",
|
|
222
|
+
fg="yellow",
|
|
223
|
+
)
|
|
224
|
+
raise SystemExit(1) from e
|
|
225
|
+
except Exception as e:
|
|
226
|
+
click.secho(f"Unexpected error: {str(e)}", fg="red")
|
|
227
|
+
logger.exception("Unexpected error during user creation")
|
|
228
|
+
raise SystemExit(1) from e
|
datahub/cli/state_cli.py
CHANGED
|
@@ -6,7 +6,6 @@ from click_default_group import DefaultGroup
|
|
|
6
6
|
|
|
7
7
|
from datahub.ingestion.graph.client import get_default_graph
|
|
8
8
|
from datahub.ingestion.graph.config import ClientMode
|
|
9
|
-
from datahub.telemetry import telemetry
|
|
10
9
|
from datahub.upgrade import upgrade
|
|
11
10
|
|
|
12
11
|
logger = logging.getLogger(__name__)
|
|
@@ -22,7 +21,6 @@ def state() -> None:
|
|
|
22
21
|
@click.option("--pipeline-name", required=True, type=str)
|
|
23
22
|
@click.option("--platform", required=True, type=str)
|
|
24
23
|
@upgrade.check_upgrade
|
|
25
|
-
@telemetry.with_telemetry()
|
|
26
24
|
def inspect(pipeline_name: str, platform: str) -> None:
|
|
27
25
|
"""
|
|
28
26
|
Get the latest stateful ingestion state for a given pipeline.
|
datahub/cli/timeline_cli.py
CHANGED
|
@@ -10,7 +10,6 @@ from requests import Response
|
|
|
10
10
|
from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key
|
|
11
11
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
12
12
|
from datahub.ingestion.graph.config import ClientMode
|
|
13
|
-
from datahub.telemetry import telemetry
|
|
14
13
|
from datahub.upgrade import upgrade
|
|
15
14
|
from datahub.utilities.urns.urn import Urn
|
|
16
15
|
|
|
@@ -130,7 +129,6 @@ def get_timeline(
|
|
|
130
129
|
@click.option("--raw", type=bool, is_flag=True, help="Show the raw diff")
|
|
131
130
|
@click.pass_context
|
|
132
131
|
@upgrade.check_upgrade
|
|
133
|
-
@telemetry.with_telemetry()
|
|
134
132
|
def timeline(
|
|
135
133
|
ctx: Any,
|
|
136
134
|
urn: str,
|
datahub/configuration/common.py
CHANGED
|
@@ -1,20 +1,25 @@
|
|
|
1
|
+
import dataclasses
|
|
1
2
|
import re
|
|
2
3
|
import unittest.mock
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
5
|
from enum import auto
|
|
5
6
|
from typing import (
|
|
6
7
|
IO,
|
|
8
|
+
TYPE_CHECKING,
|
|
9
|
+
Annotated,
|
|
7
10
|
Any,
|
|
8
11
|
ClassVar,
|
|
9
12
|
Dict,
|
|
10
13
|
List,
|
|
11
14
|
Optional,
|
|
12
15
|
Type,
|
|
16
|
+
TypeVar,
|
|
13
17
|
Union,
|
|
14
18
|
runtime_checkable,
|
|
15
19
|
)
|
|
16
20
|
|
|
17
21
|
import pydantic
|
|
22
|
+
import pydantic_core
|
|
18
23
|
from cached_property import cached_property
|
|
19
24
|
from pydantic import BaseModel, Extra, ValidationError
|
|
20
25
|
from pydantic.fields import Field
|
|
@@ -83,6 +88,29 @@ def redact_raw_config(obj: Any) -> Any:
|
|
|
83
88
|
return obj
|
|
84
89
|
|
|
85
90
|
|
|
91
|
+
if TYPE_CHECKING:
|
|
92
|
+
AnyType = TypeVar("AnyType")
|
|
93
|
+
HiddenFromDocs = Annotated[AnyType, ...]
|
|
94
|
+
else:
|
|
95
|
+
HiddenFromDocs = pydantic.json_schema.SkipJsonSchema
|
|
96
|
+
|
|
97
|
+
LaxStr = Annotated[str, pydantic.BeforeValidator(lambda v: str(v))]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclasses.dataclass(frozen=True)
|
|
101
|
+
class SupportedSources:
|
|
102
|
+
sources: List[str]
|
|
103
|
+
|
|
104
|
+
def __get_pydantic_json_schema__(
|
|
105
|
+
self,
|
|
106
|
+
core_schema: pydantic_core.core_schema.CoreSchema,
|
|
107
|
+
handler: pydantic.GetJsonSchemaHandler,
|
|
108
|
+
) -> pydantic.json_schema.JsonSchemaValue:
|
|
109
|
+
json_schema = handler(core_schema)
|
|
110
|
+
json_schema.setdefault("schema_extra", {})["supported_sources"] = self.sources
|
|
111
|
+
return json_schema
|
|
112
|
+
|
|
113
|
+
|
|
86
114
|
class ConfigModel(BaseModel):
|
|
87
115
|
class Config:
|
|
88
116
|
@staticmethod
|
|
@@ -141,6 +169,17 @@ class PermissiveConfigModel(ConfigModel):
|
|
|
141
169
|
extra = Extra.allow
|
|
142
170
|
|
|
143
171
|
|
|
172
|
+
class ConnectionModel(BaseModel):
|
|
173
|
+
"""Represents the config associated with a connection"""
|
|
174
|
+
|
|
175
|
+
class Config:
|
|
176
|
+
if PYDANTIC_VERSION_2:
|
|
177
|
+
extra = "allow"
|
|
178
|
+
else:
|
|
179
|
+
extra = Extra.allow
|
|
180
|
+
underscore_attrs_are_private = True
|
|
181
|
+
|
|
182
|
+
|
|
144
183
|
class TransformerSemantics(ConfigEnum):
|
|
145
184
|
"""Describes semantics for aspect changes"""
|
|
146
185
|
|
|
@@ -334,4 +373,4 @@ class KeyValuePattern(ConfigModel):
|
|
|
334
373
|
|
|
335
374
|
|
|
336
375
|
class VersionedConfig(ConfigModel):
|
|
337
|
-
version:
|
|
376
|
+
version: LaxStr = "1"
|
|
@@ -1,13 +1,16 @@
|
|
|
1
|
-
from typing import Type
|
|
1
|
+
from typing import TYPE_CHECKING, Type
|
|
2
2
|
|
|
3
3
|
import pydantic
|
|
4
4
|
|
|
5
5
|
from datahub.ingestion.api.global_context import get_graph_context
|
|
6
6
|
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from pydantic.deprecated.class_validators import V1RootValidator
|
|
9
|
+
|
|
7
10
|
|
|
8
11
|
def auto_connection_resolver(
|
|
9
12
|
connection_field: str = "connection",
|
|
10
|
-
) ->
|
|
13
|
+
) -> "V1RootValidator":
|
|
11
14
|
def _resolve_connection(cls: Type, values: dict) -> dict:
|
|
12
15
|
if connection_field in values:
|
|
13
16
|
connection_urn = values.pop(connection_field)
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
# ABOUTME: Central registry for all environment variables used in metadata-ingestion.
|
|
2
|
+
# ABOUTME: All environment variable reads should go through this module for discoverability and maintainability.
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
# ============================================================================
|
|
8
|
+
# Core DataHub Configuration
|
|
9
|
+
# ============================================================================
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_gms_url() -> Optional[str]:
|
|
13
|
+
"""Complete GMS URL (takes precedence over separate host/port)."""
|
|
14
|
+
return os.getenv("DATAHUB_GMS_URL")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_gms_host() -> Optional[str]:
|
|
18
|
+
"""GMS host (fallback for URL, deprecated)."""
|
|
19
|
+
return os.getenv("DATAHUB_GMS_HOST")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_gms_port() -> Optional[str]:
|
|
23
|
+
"""GMS port number."""
|
|
24
|
+
return os.getenv("DATAHUB_GMS_PORT")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_gms_protocol() -> str:
|
|
28
|
+
"""Protocol for GMS connection (http/https)."""
|
|
29
|
+
return os.getenv("DATAHUB_GMS_PROTOCOL", "http")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_gms_token() -> Optional[str]:
|
|
33
|
+
"""Authentication token for GMS."""
|
|
34
|
+
return os.getenv("DATAHUB_GMS_TOKEN")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_system_client_id() -> Optional[str]:
|
|
38
|
+
"""System client ID for OAuth/auth."""
|
|
39
|
+
return os.getenv("DATAHUB_SYSTEM_CLIENT_ID")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_system_client_secret() -> Optional[str]:
|
|
43
|
+
"""System client secret for OAuth/auth."""
|
|
44
|
+
return os.getenv("DATAHUB_SYSTEM_CLIENT_SECRET")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_skip_config() -> bool:
|
|
48
|
+
"""Skip loading config file (forces env variables)."""
|
|
49
|
+
return os.getenv("DATAHUB_SKIP_CONFIG", "").lower() == "true"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_gms_base_path() -> str:
|
|
53
|
+
"""Base path for GMS API endpoints."""
|
|
54
|
+
return os.getenv("DATAHUB_GMS_BASE_PATH", "")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ============================================================================
|
|
58
|
+
# REST Emitter Configuration
|
|
59
|
+
# ============================================================================
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_rest_emitter_default_retry_max_times() -> str:
|
|
63
|
+
"""Max retry attempts for failed requests."""
|
|
64
|
+
return os.getenv("DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES", "4")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_rest_emitter_batch_max_payload_bytes() -> int:
|
|
68
|
+
"""Maximum payload size in bytes for batch operations."""
|
|
69
|
+
return int(
|
|
70
|
+
os.getenv("DATAHUB_REST_EMITTER_BATCH_MAX_PAYLOAD_BYTES", str(15 * 1024 * 1024))
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_rest_emitter_batch_max_payload_length() -> int:
|
|
75
|
+
"""Maximum number of MCPs per batch."""
|
|
76
|
+
return int(os.getenv("DATAHUB_REST_EMITTER_BATCH_MAX_PAYLOAD_LENGTH", "200"))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_emit_mode() -> Optional[str]:
|
|
80
|
+
"""Emission mode (SYNC_PRIMARY, SYNC_WAIT, ASYNC, ASYNC_WAIT)."""
|
|
81
|
+
return os.getenv("DATAHUB_EMIT_MODE")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def get_rest_emitter_default_endpoint() -> Optional[str]:
|
|
85
|
+
"""REST endpoint type (RESTLI or OPENAPI)."""
|
|
86
|
+
return os.getenv("DATAHUB_REST_EMITTER_DEFAULT_ENDPOINT")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def get_emitter_trace() -> bool:
|
|
90
|
+
"""Enable detailed emitter tracing."""
|
|
91
|
+
return os.getenv("DATAHUB_EMITTER_TRACE", "").lower() == "true"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ============================================================================
|
|
95
|
+
# REST Sink Configuration
|
|
96
|
+
# ============================================================================
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_rest_sink_default_max_threads() -> int:
|
|
100
|
+
"""Max thread pool size for async operations."""
|
|
101
|
+
return int(os.getenv("DATAHUB_REST_SINK_DEFAULT_MAX_THREADS", "15"))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def get_rest_sink_default_mode() -> Optional[str]:
|
|
105
|
+
"""Sink mode (SYNC, ASYNC, ASYNC_BATCH)."""
|
|
106
|
+
return os.getenv("DATAHUB_REST_SINK_DEFAULT_MODE")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# ============================================================================
|
|
110
|
+
# Telemetry & Monitoring
|
|
111
|
+
# ============================================================================
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_telemetry_timeout() -> str:
|
|
115
|
+
"""Telemetry timeout in seconds."""
|
|
116
|
+
return os.getenv("DATAHUB_TELEMETRY_TIMEOUT", "10")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_sentry_dsn() -> Optional[str]:
|
|
120
|
+
"""Sentry error tracking DSN."""
|
|
121
|
+
return os.getenv("SENTRY_DSN")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def get_sentry_environment() -> str:
|
|
125
|
+
"""Sentry environment (dev/prod)."""
|
|
126
|
+
return os.getenv("SENTRY_ENVIRONMENT", "dev")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ============================================================================
|
|
130
|
+
# Logging & Debug Configuration
|
|
131
|
+
# ============================================================================
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def get_suppress_logging_manager() -> Optional[str]:
|
|
135
|
+
"""Suppress DataHub logging manager initialization."""
|
|
136
|
+
return os.getenv("DATAHUB_SUPPRESS_LOGGING_MANAGER")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def get_no_color() -> bool:
|
|
140
|
+
"""Disable colored logging output."""
|
|
141
|
+
return os.getenv("NO_COLOR", "").lower() == "true"
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def get_test_mode() -> Optional[str]:
|
|
145
|
+
"""Indicates running in test context."""
|
|
146
|
+
return os.getenv("DATAHUB_TEST_MODE")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def get_debug() -> bool:
|
|
150
|
+
"""Enable debug mode."""
|
|
151
|
+
return os.getenv("DATAHUB_DEBUG", "").lower() == "true"
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# ============================================================================
|
|
155
|
+
# Data Processing Configuration
|
|
156
|
+
# ============================================================================
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def get_sql_agg_query_log() -> str:
|
|
160
|
+
"""SQL aggregator query logging level."""
|
|
161
|
+
return os.getenv("DATAHUB_SQL_AGG_QUERY_LOG", "DISABLED")
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def get_dataset_urn_to_lower() -> str:
|
|
165
|
+
"""Convert dataset URNs to lowercase."""
|
|
166
|
+
return os.getenv("DATAHUB_DATASET_URN_TO_LOWER", "false")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# ============================================================================
|
|
170
|
+
# Integration-Specific Configuration
|
|
171
|
+
# ============================================================================
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def get_kafka_schema_registry_url() -> Optional[str]:
|
|
175
|
+
"""Kafka schema registry URL."""
|
|
176
|
+
return os.getenv("KAFKA_SCHEMAREGISTRY_URL")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def get_spark_version() -> Optional[str]:
|
|
180
|
+
"""Spark version (for S3 source)."""
|
|
181
|
+
return os.getenv("SPARK_VERSION")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def get_bigquery_schema_parallelism() -> int:
|
|
185
|
+
"""Parallelism level for BigQuery schema extraction."""
|
|
186
|
+
return int(os.getenv("DATAHUB_BIGQUERY_SCHEMA_PARALLELISM", "20"))
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def get_snowflake_schema_parallelism() -> int:
|
|
190
|
+
"""Parallelism level for Snowflake schema extraction."""
|
|
191
|
+
return int(os.getenv("DATAHUB_SNOWFLAKE_SCHEMA_PARALLELISM", "20"))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def get_powerbi_m_query_parse_timeout() -> int:
|
|
195
|
+
"""Timeout for PowerBI M query parsing."""
|
|
196
|
+
return int(os.getenv("DATAHUB_POWERBI_M_QUERY_PARSE_TIMEOUT", "60"))
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def get_trace_powerbi_mquery_parser() -> bool:
|
|
200
|
+
"""Enable PowerBI M query parser tracing."""
|
|
201
|
+
return os.getenv("DATAHUB_TRACE_POWERBI_MQUERY_PARSER", "").lower() == "true"
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def get_lookml_git_test_ssh_key() -> Optional[str]:
|
|
205
|
+
"""SSH key for LookML Git tests."""
|
|
206
|
+
return os.getenv("DATAHUB_LOOKML_GIT_TEST_SSH_KEY")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# ============================================================================
|
|
210
|
+
# AWS/Cloud Configuration
|
|
211
|
+
# ============================================================================
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def get_aws_lambda_function_name() -> Optional[str]:
|
|
215
|
+
"""Indicates running in AWS Lambda."""
|
|
216
|
+
return os.getenv("AWS_LAMBDA_FUNCTION_NAME")
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def get_aws_execution_env() -> Optional[str]:
|
|
220
|
+
"""AWS execution environment."""
|
|
221
|
+
return os.getenv("AWS_EXECUTION_ENV")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def get_aws_web_identity_token_file() -> Optional[str]:
|
|
225
|
+
"""OIDC token file path."""
|
|
226
|
+
return os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def get_aws_role_arn() -> Optional[str]:
|
|
230
|
+
"""AWS role ARN for OIDC."""
|
|
231
|
+
return os.getenv("AWS_ROLE_ARN")
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def get_aws_app_runner_service_id() -> Optional[str]:
|
|
235
|
+
"""AWS App Runner service ID."""
|
|
236
|
+
return os.getenv("AWS_APP_RUNNER_SERVICE_ID")
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def get_ecs_container_metadata_uri_v4() -> Optional[str]:
|
|
240
|
+
"""ECS metadata endpoint v4."""
|
|
241
|
+
return os.getenv("ECS_CONTAINER_METADATA_URI_V4")
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def get_ecs_container_metadata_uri() -> Optional[str]:
|
|
245
|
+
"""ECS metadata endpoint v3."""
|
|
246
|
+
return os.getenv("ECS_CONTAINER_METADATA_URI")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def get_elastic_beanstalk_environment_name() -> Optional[str]:
|
|
250
|
+
"""Elastic Beanstalk environment."""
|
|
251
|
+
return os.getenv("ELASTIC_BEANSTALK_ENVIRONMENT_NAME")
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# ============================================================================
|
|
255
|
+
# Docker & Local Development
|
|
256
|
+
# ============================================================================
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def get_compose_project_name() -> str:
|
|
260
|
+
"""Docker Compose project name."""
|
|
261
|
+
return os.getenv("DATAHUB_COMPOSE_PROJECT_NAME", "datahub")
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def get_docker_compose_base() -> Optional[str]:
|
|
265
|
+
"""Base path for Docker Compose files."""
|
|
266
|
+
return os.getenv("DOCKER_COMPOSE_BASE")
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def get_datahub_version() -> Optional[str]:
|
|
270
|
+
"""DataHub version (set during docker init)."""
|
|
271
|
+
return os.getenv("DATAHUB_VERSION")
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def get_mapped_mysql_port() -> Optional[str]:
|
|
275
|
+
"""MySQL port mapping (set during docker init)."""
|
|
276
|
+
return os.getenv("DATAHUB_MAPPED_MYSQL_PORT")
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def get_mapped_kafka_broker_port() -> Optional[str]:
|
|
280
|
+
"""Kafka broker port mapping (set during docker init)."""
|
|
281
|
+
return os.getenv("DATAHUB_MAPPED_KAFKA_BROKER_PORT")
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def get_mapped_elastic_port() -> Optional[str]:
|
|
285
|
+
"""Elasticsearch port mapping (set during docker init)."""
|
|
286
|
+
return os.getenv("DATAHUB_MAPPED_ELASTIC_PORT")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def get_metadata_service_auth_enabled() -> str:
|
|
290
|
+
"""Enable/disable auth in Docker."""
|
|
291
|
+
return os.getenv("METADATA_SERVICE_AUTH_ENABLED", "false")
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def get_ui_ingestion_default_cli_version() -> Optional[str]:
|
|
295
|
+
"""CLI version for UI ingestion (set during init)."""
|
|
296
|
+
return os.getenv("UI_INGESTION_DEFAULT_CLI_VERSION")
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
# ============================================================================
|
|
300
|
+
# Utility & Helper Configuration
|
|
301
|
+
# ============================================================================
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def get_datahub_component() -> str:
|
|
305
|
+
"""Component name for user agent tracking."""
|
|
306
|
+
return os.getenv("DATAHUB_COMPONENT", "datahub")
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def get_force_local_quickstart_mapping() -> str:
|
|
310
|
+
"""Force local quickstart mapping file."""
|
|
311
|
+
return os.getenv("FORCE_LOCAL_QUICKSTART_MAPPING", "")
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def get_dataproduct_external_url() -> Optional[str]:
|
|
315
|
+
"""External URL for data products."""
|
|
316
|
+
return os.getenv("DATAHUB_DATAPRODUCT_EXTERNAL_URL")
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def get_override_sqlite_version_req() -> str:
|
|
320
|
+
"""Override SQLite version requirement."""
|
|
321
|
+
return os.getenv("OVERRIDE_SQLITE_VERSION_REQ", "")
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def get_update_entity_registry() -> str:
|
|
325
|
+
"""Update entity registry during tests."""
|
|
326
|
+
return os.getenv("UPDATE_ENTITY_REGISTRY", "false")
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def get_ci() -> Optional[str]:
|
|
330
|
+
"""Indicates running in CI environment."""
|
|
331
|
+
return os.getenv("CI")
|
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
from typing import TypeVar, Union
|
|
1
|
+
from typing import TYPE_CHECKING, Type, TypeVar, Union
|
|
2
2
|
|
|
3
3
|
import pydantic
|
|
4
4
|
|
|
5
5
|
from datahub.ingestion.api.registry import import_path
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from pydantic.deprecated.class_validators import V1Validator
|
|
8
9
|
|
|
10
|
+
_T = TypeVar("_T")
|
|
9
11
|
|
|
10
|
-
|
|
12
|
+
|
|
13
|
+
def _pydantic_resolver(cls: Type, v: Union[str, _T]) -> _T:
|
|
11
14
|
return import_path(v) if isinstance(v, str) else v
|
|
12
15
|
|
|
13
16
|
|
|
14
|
-
def pydantic_resolve_key(field: str) ->
|
|
17
|
+
def pydantic_resolve_key(field: str) -> "V1Validator":
|
|
15
18
|
return pydantic.validator(field, pre=True, allow_reuse=True)(_pydantic_resolver)
|