acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import os
|
|
3
2
|
from datetime import datetime, timedelta, timezone
|
|
4
3
|
from enum import Enum
|
|
5
4
|
from http import HTTPStatus
|
|
6
5
|
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
|
|
6
|
+
from urllib.parse import parse_qs, urlparse
|
|
7
7
|
|
|
8
8
|
import boto3
|
|
9
9
|
import requests
|
|
10
10
|
from boto3.session import Session
|
|
11
11
|
from botocore.config import DEFAULT_TIMEOUT, Config
|
|
12
|
+
from botocore.exceptions import ClientError, NoCredentialsError
|
|
12
13
|
from botocore.utils import fix_s3_host
|
|
13
14
|
from pydantic.fields import Field
|
|
14
15
|
|
|
@@ -17,6 +18,16 @@ from datahub.configuration.common import (
|
|
|
17
18
|
ConfigModel,
|
|
18
19
|
PermissiveConfigModel,
|
|
19
20
|
)
|
|
21
|
+
from datahub.configuration.env_vars import (
|
|
22
|
+
get_aws_app_runner_service_id,
|
|
23
|
+
get_aws_execution_env,
|
|
24
|
+
get_aws_lambda_function_name,
|
|
25
|
+
get_aws_role_arn,
|
|
26
|
+
get_aws_web_identity_token_file,
|
|
27
|
+
get_ecs_container_metadata_uri,
|
|
28
|
+
get_ecs_container_metadata_uri_v4,
|
|
29
|
+
get_elastic_beanstalk_environment_name,
|
|
30
|
+
)
|
|
20
31
|
from datahub.configuration.source_common import EnvConfigMixin
|
|
21
32
|
|
|
22
33
|
logger = logging.getLogger(__name__)
|
|
@@ -24,6 +35,7 @@ logger = logging.getLogger(__name__)
|
|
|
24
35
|
if TYPE_CHECKING:
|
|
25
36
|
from mypy_boto3_dynamodb import DynamoDBClient
|
|
26
37
|
from mypy_boto3_glue import GlueClient
|
|
38
|
+
from mypy_boto3_lakeformation import LakeFormationClient
|
|
27
39
|
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
|
28
40
|
from mypy_boto3_sagemaker import SageMakerClient
|
|
29
41
|
from mypy_boto3_sts import STSClient
|
|
@@ -99,27 +111,25 @@ def detect_aws_environment() -> AwsEnvironment:
|
|
|
99
111
|
Order matters as some environments may have multiple indicators.
|
|
100
112
|
"""
|
|
101
113
|
# Check Lambda first as it's most specific
|
|
102
|
-
if
|
|
103
|
-
if
|
|
114
|
+
if get_aws_lambda_function_name():
|
|
115
|
+
if (get_aws_execution_env() or "").startswith("CloudFormation"):
|
|
104
116
|
return AwsEnvironment.CLOUD_FORMATION
|
|
105
117
|
return AwsEnvironment.LAMBDA
|
|
106
118
|
|
|
107
119
|
# Check EKS (IRSA)
|
|
108
|
-
if
|
|
120
|
+
if get_aws_web_identity_token_file() and get_aws_role_arn():
|
|
109
121
|
return AwsEnvironment.EKS
|
|
110
122
|
|
|
111
123
|
# Check App Runner
|
|
112
|
-
if
|
|
124
|
+
if get_aws_app_runner_service_id():
|
|
113
125
|
return AwsEnvironment.APP_RUNNER
|
|
114
126
|
|
|
115
127
|
# Check ECS
|
|
116
|
-
if
|
|
117
|
-
"ECS_CONTAINER_METADATA_URI"
|
|
118
|
-
):
|
|
128
|
+
if get_ecs_container_metadata_uri_v4() or get_ecs_container_metadata_uri():
|
|
119
129
|
return AwsEnvironment.ECS
|
|
120
130
|
|
|
121
131
|
# Check Elastic Beanstalk
|
|
122
|
-
if
|
|
132
|
+
if get_elastic_beanstalk_environment_name():
|
|
123
133
|
return AwsEnvironment.BEANSTALK
|
|
124
134
|
|
|
125
135
|
if is_running_on_ec2():
|
|
@@ -154,7 +164,7 @@ def get_instance_role_arn() -> Optional[str]:
|
|
|
154
164
|
def get_lambda_role_arn() -> Optional[str]:
|
|
155
165
|
"""Get the Lambda function's role ARN"""
|
|
156
166
|
try:
|
|
157
|
-
function_name =
|
|
167
|
+
function_name = get_aws_lambda_function_name()
|
|
158
168
|
if not function_name:
|
|
159
169
|
return None
|
|
160
170
|
|
|
@@ -180,7 +190,7 @@ def get_current_identity() -> Tuple[Optional[str], Optional[str]]:
|
|
|
180
190
|
return role_arn, AwsServicePrincipal.LAMBDA.value
|
|
181
191
|
|
|
182
192
|
elif env == AwsEnvironment.EKS:
|
|
183
|
-
role_arn =
|
|
193
|
+
role_arn = get_aws_role_arn()
|
|
184
194
|
return role_arn, AwsServicePrincipal.EKS.value
|
|
185
195
|
|
|
186
196
|
elif env == AwsEnvironment.APP_RUNNER:
|
|
@@ -193,8 +203,8 @@ def get_current_identity() -> Tuple[Optional[str], Optional[str]]:
|
|
|
193
203
|
|
|
194
204
|
elif env == AwsEnvironment.ECS:
|
|
195
205
|
try:
|
|
196
|
-
metadata_uri =
|
|
197
|
-
|
|
206
|
+
metadata_uri = (
|
|
207
|
+
get_ecs_container_metadata_uri_v4() or get_ecs_container_metadata_uri()
|
|
198
208
|
)
|
|
199
209
|
if metadata_uri:
|
|
200
210
|
response = requests.get(f"{metadata_uri}/task", timeout=1)
|
|
@@ -454,6 +464,168 @@ class AwsConnectionConfig(ConfigModel):
|
|
|
454
464
|
def get_sagemaker_client(self) -> "SageMakerClient":
|
|
455
465
|
return self.get_session().client("sagemaker", config=self._aws_config())
|
|
456
466
|
|
|
467
|
+
def get_lakeformation_client(self) -> "LakeFormationClient":
|
|
468
|
+
return self.get_session().client("lakeformation", config=self._aws_config())
|
|
469
|
+
|
|
470
|
+
def get_rds_client(self):
|
|
471
|
+
"""Get an RDS client for generating IAM auth tokens."""
|
|
472
|
+
return self.get_session().client("rds", config=self._aws_config())
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def generate_rds_iam_token(
|
|
476
|
+
endpoint: str,
|
|
477
|
+
username: str,
|
|
478
|
+
port: int,
|
|
479
|
+
aws_config: AwsConnectionConfig,
|
|
480
|
+
) -> str:
|
|
481
|
+
"""
|
|
482
|
+
Generate an AWS RDS IAM authentication token.
|
|
483
|
+
|
|
484
|
+
boto3's generate_db_auth_token() returns a presigned URL in the format:
|
|
485
|
+
"hostname:port/?Action=connect&DBUser=username&X-Amz-Date=...&X-Amz-Expires=..."
|
|
486
|
+
|
|
487
|
+
This token should be used as-is by pymysql/psycopg2 drivers.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
endpoint: RDS endpoint hostname
|
|
491
|
+
username: Database username for IAM authentication
|
|
492
|
+
port: Database port (5432 for PostgreSQL, 3306 for MySQL)
|
|
493
|
+
aws_config: AwsConnectionConfig for session management and credentials
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
Authentication token (presigned URL format)
|
|
497
|
+
|
|
498
|
+
Raises:
|
|
499
|
+
ValueError: If AWS credentials are not found or token generation fails
|
|
500
|
+
|
|
501
|
+
"""
|
|
502
|
+
try:
|
|
503
|
+
client = aws_config.get_rds_client()
|
|
504
|
+
token = client.generate_db_auth_token(
|
|
505
|
+
DBHostname=endpoint, Port=port, DBUsername=username
|
|
506
|
+
)
|
|
507
|
+
logger.debug(f"Generated RDS IAM token for {username}@{endpoint}:{port}")
|
|
508
|
+
return token
|
|
509
|
+
except NoCredentialsError as e:
|
|
510
|
+
raise ValueError("AWS credentials not found") from e
|
|
511
|
+
except ClientError as e:
|
|
512
|
+
raise ValueError(f"Failed to generate RDS IAM token: {e}") from e
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
class RDSIAMTokenManager:
|
|
516
|
+
"""
|
|
517
|
+
Manages RDS IAM token lifecycle with automatic refresh.
|
|
518
|
+
|
|
519
|
+
RDS IAM tokens include expiration information in the URL parameters.
|
|
520
|
+
This manager parses the token expiry and refreshes before expiration
|
|
521
|
+
to ensure uninterrupted database access.
|
|
522
|
+
"""
|
|
523
|
+
|
|
524
|
+
def __init__(
|
|
525
|
+
self,
|
|
526
|
+
endpoint: str,
|
|
527
|
+
username: str,
|
|
528
|
+
port: int,
|
|
529
|
+
aws_config: AwsConnectionConfig,
|
|
530
|
+
refresh_threshold_minutes: int = 5,
|
|
531
|
+
):
|
|
532
|
+
"""
|
|
533
|
+
Initialize the token manager.
|
|
534
|
+
|
|
535
|
+
Args:
|
|
536
|
+
endpoint: RDS endpoint hostname
|
|
537
|
+
username: Database username for IAM authentication
|
|
538
|
+
port: Database port
|
|
539
|
+
aws_config: AwsConnectionConfig for session management and credentials
|
|
540
|
+
refresh_threshold_minutes: Refresh token when this many minutes remain before expiry
|
|
541
|
+
"""
|
|
542
|
+
self.endpoint = endpoint
|
|
543
|
+
self.username = username
|
|
544
|
+
self.port = port
|
|
545
|
+
self.aws_config = aws_config
|
|
546
|
+
self.refresh_threshold = timedelta(minutes=refresh_threshold_minutes)
|
|
547
|
+
|
|
548
|
+
self._current_token: Optional[str] = None
|
|
549
|
+
self._token_expires_at: Optional[datetime] = None
|
|
550
|
+
|
|
551
|
+
def get_token(self) -> str:
|
|
552
|
+
"""
|
|
553
|
+
Get current token, refreshing if necessary.
|
|
554
|
+
|
|
555
|
+
Returns:
|
|
556
|
+
Valid authentication token
|
|
557
|
+
|
|
558
|
+
Raises:
|
|
559
|
+
RuntimeError: If token generation or refresh fails
|
|
560
|
+
"""
|
|
561
|
+
if self._needs_refresh():
|
|
562
|
+
self._refresh_token()
|
|
563
|
+
|
|
564
|
+
assert self._current_token is not None
|
|
565
|
+
return self._current_token
|
|
566
|
+
|
|
567
|
+
def _needs_refresh(self) -> bool:
|
|
568
|
+
"""Check if token needs to be refreshed."""
|
|
569
|
+
if self._current_token is None or self._token_expires_at is None:
|
|
570
|
+
return True
|
|
571
|
+
|
|
572
|
+
time_until_expiry = self._token_expires_at - datetime.now(timezone.utc)
|
|
573
|
+
return time_until_expiry <= self.refresh_threshold
|
|
574
|
+
|
|
575
|
+
def _parse_token_expiry(self, token: str) -> datetime:
|
|
576
|
+
"""
|
|
577
|
+
Parse token expiry from X-Amz-Date and X-Amz-Expires URL parameters.
|
|
578
|
+
|
|
579
|
+
Args:
|
|
580
|
+
token: RDS IAM authentication token (presigned URL)
|
|
581
|
+
|
|
582
|
+
Returns:
|
|
583
|
+
Expiration datetime in UTC
|
|
584
|
+
|
|
585
|
+
Raises:
|
|
586
|
+
ValueError: If token URL format is invalid or missing required parameters
|
|
587
|
+
"""
|
|
588
|
+
try:
|
|
589
|
+
parsed_url = urlparse(token)
|
|
590
|
+
query_params = parse_qs(parsed_url.query)
|
|
591
|
+
|
|
592
|
+
# Extract X-Amz-Date (ISO 8601 format: YYYYMMDDTHHMMSSZ)
|
|
593
|
+
amz_date_list = query_params.get("X-Amz-Date")
|
|
594
|
+
if not amz_date_list:
|
|
595
|
+
raise ValueError("Missing X-Amz-Date parameter in RDS IAM token")
|
|
596
|
+
amz_date_str = amz_date_list[0]
|
|
597
|
+
|
|
598
|
+
# Extract X-Amz-Expires (duration in seconds)
|
|
599
|
+
amz_expires_list = query_params.get("X-Amz-Expires")
|
|
600
|
+
if not amz_expires_list:
|
|
601
|
+
raise ValueError("Missing X-Amz-Expires parameter in RDS IAM token")
|
|
602
|
+
amz_expires_seconds = int(amz_expires_list[0])
|
|
603
|
+
|
|
604
|
+
# Parse X-Amz-Date to datetime
|
|
605
|
+
token_issued_at = datetime.strptime(amz_date_str, "%Y%m%dT%H%M%SZ").replace(
|
|
606
|
+
tzinfo=timezone.utc
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
# Calculate expiration
|
|
610
|
+
return token_issued_at + timedelta(seconds=amz_expires_seconds)
|
|
611
|
+
|
|
612
|
+
except (ValueError, KeyError, IndexError) as e:
|
|
613
|
+
raise ValueError(
|
|
614
|
+
f"Failed to parse RDS IAM token expiry: {e}. Token format may be invalid."
|
|
615
|
+
) from e
|
|
616
|
+
|
|
617
|
+
def _refresh_token(self) -> None:
|
|
618
|
+
"""Generate and store a new token with parsed expiry."""
|
|
619
|
+
logger.info("Refreshing RDS IAM authentication token")
|
|
620
|
+
self._current_token = generate_rds_iam_token(
|
|
621
|
+
endpoint=self.endpoint,
|
|
622
|
+
username=self.username,
|
|
623
|
+
port=self.port,
|
|
624
|
+
aws_config=self.aws_config,
|
|
625
|
+
)
|
|
626
|
+
self._token_expires_at = self._parse_token_expiry(self._current_token)
|
|
627
|
+
logger.debug(f"Token will expire at {self._token_expires_at}")
|
|
628
|
+
|
|
457
629
|
|
|
458
630
|
class AwsSourceConfig(EnvConfigMixin, AwsConnectionConfig):
|
|
459
631
|
"""
|