acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@ from sqlalchemy.sql import sqltypes
|
|
|
18
18
|
from sqlalchemy.types import BOOLEAN, DATE, DATETIME, INTEGER
|
|
19
19
|
|
|
20
20
|
import datahub.emitter.mce_builder as builder
|
|
21
|
+
from datahub.configuration.common import HiddenFromDocs, LaxStr
|
|
21
22
|
from datahub.configuration.source_common import DatasetLineageProviderConfigBase
|
|
22
23
|
from datahub.configuration.time_window_config import BaseTimeWindowConfig
|
|
23
24
|
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
|
|
@@ -32,6 +33,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
32
33
|
support_status,
|
|
33
34
|
)
|
|
34
35
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
36
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
35
37
|
from datahub.ingestion.source.sql.sql_common import (
|
|
36
38
|
SqlWorkUnit,
|
|
37
39
|
logger,
|
|
@@ -127,16 +129,20 @@ class ClickHouseConfig(
|
|
|
127
129
|
):
|
|
128
130
|
# defaults
|
|
129
131
|
host_port: str = Field(default="localhost:8123", description="ClickHouse host URL.")
|
|
130
|
-
scheme: str = Field(default="clickhouse"
|
|
132
|
+
scheme: HiddenFromDocs[str] = Field(default="clickhouse")
|
|
131
133
|
password: pydantic.SecretStr = Field(
|
|
132
134
|
default=pydantic.SecretStr(""), description="password"
|
|
133
135
|
)
|
|
134
|
-
secure: Optional[bool] = Field(
|
|
135
|
-
|
|
136
|
+
secure: Optional[bool] = Field(
|
|
137
|
+
default=None, description="[deprecated] Use uri_opts instead."
|
|
138
|
+
)
|
|
139
|
+
protocol: Optional[str] = Field(
|
|
140
|
+
default=None, description="[deprecated] Use uri_opts instead."
|
|
141
|
+
)
|
|
136
142
|
_deprecate_secure = pydantic_field_deprecated("secure")
|
|
137
143
|
_deprecate_protocol = pydantic_field_deprecated("protocol")
|
|
138
144
|
|
|
139
|
-
uri_opts: Dict[str,
|
|
145
|
+
uri_opts: Dict[str, LaxStr] = Field(
|
|
140
146
|
default={},
|
|
141
147
|
description="The part of the URI and it's used to provide additional configuration options or parameters for the database connection.",
|
|
142
148
|
)
|
|
@@ -184,9 +190,9 @@ class ClickHouseConfig(
|
|
|
184
190
|
"Initializing uri_opts from deprecated secure or protocol options"
|
|
185
191
|
)
|
|
186
192
|
values["uri_opts"] = {}
|
|
187
|
-
if secure:
|
|
188
|
-
values["uri_opts"]["secure"] = secure
|
|
189
|
-
if protocol:
|
|
193
|
+
if secure is not None:
|
|
194
|
+
values["uri_opts"]["secure"] = str(secure)
|
|
195
|
+
if protocol is not None:
|
|
190
196
|
values["uri_opts"]["protocol"] = protocol
|
|
191
197
|
logger.debug(f"uri_opts: {uri_opts}")
|
|
192
198
|
elif (secure or protocol) and uri_opts:
|
|
@@ -379,8 +385,18 @@ clickhouse_datetime_format = "%Y-%m-%d %H:%M:%S"
|
|
|
379
385
|
@platform_name("ClickHouse")
|
|
380
386
|
@config_class(ClickHouseConfig)
|
|
381
387
|
@support_status(SupportStatus.CERTIFIED)
|
|
382
|
-
@capability(
|
|
388
|
+
@capability(
|
|
389
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
390
|
+
)
|
|
383
391
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
392
|
+
@capability(
|
|
393
|
+
SourceCapability.LINEAGE_COARSE,
|
|
394
|
+
"Enabled by default to get lineage for views via `include_view_lineage`",
|
|
395
|
+
subtype_modifier=[
|
|
396
|
+
SourceCapabilityModifier.VIEW,
|
|
397
|
+
SourceCapabilityModifier.TABLE,
|
|
398
|
+
],
|
|
399
|
+
)
|
|
384
400
|
class ClickHouseSource(TwoTierSQLAlchemySource):
|
|
385
401
|
"""
|
|
386
402
|
This plugin extracts the following:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from pydantic.fields import Field
|
|
2
2
|
|
|
3
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
3
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
4
4
|
from datahub.ingestion.api.common import PipelineContext
|
|
5
5
|
from datahub.ingestion.api.decorators import (
|
|
6
6
|
SourceCapability,
|
|
@@ -14,8 +14,10 @@ from datahub.ingestion.source.sql.postgres import PostgresConfig, PostgresSource
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class CockroachDBConfig(PostgresConfig):
|
|
17
|
-
scheme = Field(
|
|
18
|
-
|
|
17
|
+
scheme: HiddenFromDocs[str] = Field(
|
|
18
|
+
default="cockroachdb+psycopg2", description="database scheme"
|
|
19
|
+
)
|
|
20
|
+
schema_pattern: AllowDenyPattern = Field(
|
|
19
21
|
default=AllowDenyPattern(deny=["information_schema", "crdb_internal"])
|
|
20
22
|
)
|
|
21
23
|
|
|
@@ -26,7 +28,6 @@ class CockroachDBConfig(PostgresConfig):
|
|
|
26
28
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
27
29
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
28
30
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
29
|
-
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
30
31
|
class CockroachDBSource(PostgresSource):
|
|
31
32
|
config: CockroachDBConfig
|
|
32
33
|
|
|
@@ -6,7 +6,7 @@ from pydantic.fields import Field
|
|
|
6
6
|
from pydruid.db.sqlalchemy import DruidDialect
|
|
7
7
|
from sqlalchemy.exc import ResourceClosedError
|
|
8
8
|
|
|
9
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
9
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
10
10
|
from datahub.ingestion.api.decorators import (
|
|
11
11
|
SourceCapability,
|
|
12
12
|
SupportStatus,
|
|
@@ -34,7 +34,7 @@ DruidDialect.get_table_names = get_table_names
|
|
|
34
34
|
|
|
35
35
|
class DruidConfig(BasicSQLAlchemyConfig):
|
|
36
36
|
# defaults
|
|
37
|
-
scheme: str = "druid"
|
|
37
|
+
scheme: HiddenFromDocs[str] = "druid"
|
|
38
38
|
schema_pattern: AllowDenyPattern = Field(
|
|
39
39
|
default=AllowDenyPattern(deny=["^(lookup|sysgit|view).*"]),
|
|
40
40
|
description="regex patterns for schemas to filter in ingestion.",
|
|
@@ -27,7 +27,9 @@ class HanaConfig(BasicSQLAlchemyConfig):
|
|
|
27
27
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
28
28
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
29
29
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
30
|
-
@capability(
|
|
30
|
+
@capability(
|
|
31
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
32
|
+
)
|
|
31
33
|
class HanaSource(SQLAlchemySource):
|
|
32
34
|
def __init__(self, config: HanaConfig, ctx: PipelineContext):
|
|
33
35
|
super().__init__(config, ctx, "hana")
|
|
@@ -6,7 +6,7 @@ from enum import Enum
|
|
|
6
6
|
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
|
7
7
|
from urllib.parse import urlparse
|
|
8
8
|
|
|
9
|
-
from pydantic
|
|
9
|
+
from pydantic import validator
|
|
10
10
|
from pydantic.fields import Field
|
|
11
11
|
|
|
12
12
|
# This import verifies that the dependencies are available.
|
|
@@ -14,6 +14,7 @@ from pyhive import hive # noqa: F401
|
|
|
14
14
|
from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveDialect, HiveTimestamp
|
|
15
15
|
from sqlalchemy.engine.reflection import Inspector
|
|
16
16
|
|
|
17
|
+
from datahub.configuration.common import HiddenFromDocs
|
|
17
18
|
from datahub.emitter.mce_builder import (
|
|
18
19
|
make_data_platform_urn,
|
|
19
20
|
make_dataplatform_instance_urn,
|
|
@@ -651,10 +652,10 @@ HiveDialect.get_view_definition = get_view_definition_patched
|
|
|
651
652
|
|
|
652
653
|
class HiveConfig(TwoTierSQLAlchemyConfig):
|
|
653
654
|
# defaults
|
|
654
|
-
scheme: str = Field(default="hive"
|
|
655
|
+
scheme: HiddenFromDocs[str] = Field(default="hive")
|
|
655
656
|
|
|
656
657
|
# Overriding as table location lineage is richer implementation here than with include_table_location_lineage
|
|
657
|
-
include_table_location_lineage: bool = Field(default=False
|
|
658
|
+
include_table_location_lineage: HiddenFromDocs[bool] = Field(default=False)
|
|
658
659
|
|
|
659
660
|
emit_storage_lineage: bool = Field(
|
|
660
661
|
default=False,
|
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
import base64
|
|
2
|
+
import dataclasses
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
4
5
|
from collections import namedtuple
|
|
5
6
|
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
|
6
7
|
|
|
7
|
-
from pydantic
|
|
8
|
-
from pydantic.fields import Field
|
|
9
|
-
|
|
10
|
-
# This import verifies that the dependencies are available.
|
|
8
|
+
from pydantic import Field
|
|
11
9
|
from sqlalchemy import create_engine, text
|
|
12
10
|
from sqlalchemy.engine.reflection import Inspector
|
|
13
11
|
|
|
14
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
12
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
15
13
|
from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
|
|
16
14
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
17
15
|
from datahub.ingestion.api.common import PipelineContext
|
|
@@ -27,6 +25,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
27
25
|
from datahub.ingestion.source.common.subtypes import (
|
|
28
26
|
DatasetContainerSubTypes,
|
|
29
27
|
DatasetSubTypes,
|
|
28
|
+
SourceCapabilityModifier,
|
|
30
29
|
)
|
|
31
30
|
from datahub.ingestion.source.sql.sql_common import (
|
|
32
31
|
SQLAlchemySource,
|
|
@@ -52,7 +51,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import Dataset
|
|
|
52
51
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
53
52
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
|
|
54
53
|
from datahub.metadata.schema_classes import (
|
|
55
|
-
ChangeTypeClass,
|
|
56
54
|
DatasetPropertiesClass,
|
|
57
55
|
SubTypesClass,
|
|
58
56
|
ViewPropertiesClass,
|
|
@@ -73,7 +71,7 @@ class HiveMetastoreConfigMode(StrEnum):
|
|
|
73
71
|
trino = "trino"
|
|
74
72
|
|
|
75
73
|
|
|
76
|
-
@dataclass
|
|
74
|
+
@dataclasses.dataclass
|
|
77
75
|
class ViewDataset:
|
|
78
76
|
dataset_name: str
|
|
79
77
|
schema_name: str
|
|
@@ -99,7 +97,7 @@ class HiveMetastore(BasicSQLAlchemyConfig):
|
|
|
99
97
|
default="localhost:3306",
|
|
100
98
|
description="Host URL and port to connect to. Example: localhost:3306",
|
|
101
99
|
)
|
|
102
|
-
scheme: str = Field(default="mysql+pymysql"
|
|
100
|
+
scheme: HiddenFromDocs[str] = Field(default="mysql+pymysql")
|
|
103
101
|
|
|
104
102
|
database_pattern: AllowDenyPattern = Field(
|
|
105
103
|
default=AllowDenyPattern.allow_all(),
|
|
@@ -123,8 +121,8 @@ class HiveMetastore(BasicSQLAlchemyConfig):
|
|
|
123
121
|
description="Dataset Subtype name to be 'Table' or 'View' Valid options: ['True', 'False']",
|
|
124
122
|
)
|
|
125
123
|
|
|
126
|
-
include_view_lineage: bool = Field(
|
|
127
|
-
default=False,
|
|
124
|
+
include_view_lineage: HiddenFromDocs[bool] = Field(
|
|
125
|
+
default=False,
|
|
128
126
|
)
|
|
129
127
|
|
|
130
128
|
include_catalog_name_in_ids: bool = Field(
|
|
@@ -161,12 +159,22 @@ class HiveMetastore(BasicSQLAlchemyConfig):
|
|
|
161
159
|
@platform_name("Hive Metastore")
|
|
162
160
|
@config_class(HiveMetastore)
|
|
163
161
|
@support_status(SupportStatus.CERTIFIED)
|
|
164
|
-
@capability(
|
|
162
|
+
@capability(
|
|
163
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
164
|
+
)
|
|
165
165
|
@capability(SourceCapability.DATA_PROFILING, "Not Supported", False)
|
|
166
166
|
@capability(SourceCapability.CLASSIFICATION, "Not Supported", False)
|
|
167
167
|
@capability(
|
|
168
168
|
SourceCapability.LINEAGE_COARSE, "View lineage is not supported", supported=False
|
|
169
169
|
)
|
|
170
|
+
@capability(
|
|
171
|
+
SourceCapability.CONTAINERS,
|
|
172
|
+
"Enabled by default",
|
|
173
|
+
subtype_modifier=[
|
|
174
|
+
SourceCapabilityModifier.CATALOG,
|
|
175
|
+
SourceCapabilityModifier.SCHEMA,
|
|
176
|
+
],
|
|
177
|
+
)
|
|
170
178
|
class HiveMetastoreSource(SQLAlchemySource):
|
|
171
179
|
"""
|
|
172
180
|
This plugin extracts the following:
|
|
@@ -599,10 +607,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
599
607
|
yield dpi_aspect
|
|
600
608
|
|
|
601
609
|
yield MetadataChangeProposalWrapper(
|
|
602
|
-
entityType="dataset",
|
|
603
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
604
610
|
entityUrn=dataset_urn,
|
|
605
|
-
aspectName="subTypes",
|
|
606
611
|
aspect=SubTypesClass(typeNames=[self.table_subtype]),
|
|
607
612
|
).as_workunit()
|
|
608
613
|
|
|
@@ -808,10 +813,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
808
813
|
|
|
809
814
|
# Add views subtype
|
|
810
815
|
yield MetadataChangeProposalWrapper(
|
|
811
|
-
entityType="dataset",
|
|
812
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
813
816
|
entityUrn=dataset_urn,
|
|
814
|
-
aspectName="subTypes",
|
|
815
817
|
aspect=SubTypesClass(typeNames=[self.view_subtype]),
|
|
816
818
|
).as_workunit()
|
|
817
819
|
|
|
@@ -822,10 +824,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
822
824
|
viewLogic=dataset.view_definition if dataset.view_definition else "",
|
|
823
825
|
)
|
|
824
826
|
yield MetadataChangeProposalWrapper(
|
|
825
|
-
entityType="dataset",
|
|
826
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
827
827
|
entityUrn=dataset_urn,
|
|
828
|
-
aspectName="viewProperties",
|
|
829
828
|
aspect=view_properties_aspect,
|
|
830
829
|
).as_workunit()
|
|
831
830
|
|
|
@@ -15,7 +15,6 @@ from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource
|
|
|
15
15
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
16
16
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
17
17
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
18
|
-
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
19
18
|
class MariaDBSource(MySQLSource):
|
|
20
19
|
def get_platform(self):
|
|
21
20
|
return "mariadb"
|
|
@@ -134,7 +134,9 @@ class StoredProcedure:
|
|
|
134
134
|
|
|
135
135
|
@property
|
|
136
136
|
def escape_full_name(self) -> str:
|
|
137
|
-
return f"[{self.db}].[{self.schema}].[{self.formatted_name}]"
|
|
137
|
+
return f"[{self.db}].[{self.schema}].[{self.formatted_name}]".replace(
|
|
138
|
+
"'", r"''"
|
|
139
|
+
)
|
|
138
140
|
|
|
139
141
|
def to_base_procedure(self) -> BaseProcedure:
|
|
140
142
|
return BaseProcedure(
|