acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/cli/iceberg_cli.py
CHANGED
|
@@ -13,8 +13,10 @@ import datahub.metadata.schema_classes
|
|
|
13
13
|
from datahub.cli.cli_utils import post_entity
|
|
14
14
|
from datahub.configuration.common import GraphError
|
|
15
15
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
16
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
16
17
|
from datahub.metadata.schema_classes import SystemMetadataClass
|
|
17
18
|
from datahub.telemetry import telemetry
|
|
19
|
+
from datahub.upgrade import upgrade
|
|
18
20
|
|
|
19
21
|
logger = logging.getLogger(__name__)
|
|
20
22
|
|
|
@@ -163,6 +165,7 @@ def validate_warehouse(data_root: str) -> None:
|
|
|
163
165
|
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
|
|
164
166
|
)
|
|
165
167
|
@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
|
|
168
|
+
@upgrade.check_upgrade
|
|
166
169
|
def create(
|
|
167
170
|
warehouse: str,
|
|
168
171
|
description: Optional[str],
|
|
@@ -178,7 +181,7 @@ def create(
|
|
|
178
181
|
Create an iceberg warehouse.
|
|
179
182
|
"""
|
|
180
183
|
|
|
181
|
-
client = get_default_graph()
|
|
184
|
+
client = get_default_graph(ClientMode.CLI)
|
|
182
185
|
|
|
183
186
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
184
187
|
|
|
@@ -316,6 +319,7 @@ def create(
|
|
|
316
319
|
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
|
|
317
320
|
)
|
|
318
321
|
@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
|
|
322
|
+
@upgrade.check_upgrade
|
|
319
323
|
def update(
|
|
320
324
|
warehouse: str,
|
|
321
325
|
data_root: str,
|
|
@@ -331,7 +335,7 @@ def update(
|
|
|
331
335
|
Update iceberg warehouses. Can only update credentials, and role. Cannot update region
|
|
332
336
|
"""
|
|
333
337
|
|
|
334
|
-
client = get_default_graph()
|
|
338
|
+
client = get_default_graph(ClientMode.CLI)
|
|
335
339
|
|
|
336
340
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
337
341
|
|
|
@@ -402,12 +406,13 @@ def update(
|
|
|
402
406
|
|
|
403
407
|
@iceberg.command()
|
|
404
408
|
@telemetry.with_telemetry()
|
|
409
|
+
@upgrade.check_upgrade
|
|
405
410
|
def list() -> None:
|
|
406
411
|
"""
|
|
407
412
|
List iceberg warehouses
|
|
408
413
|
"""
|
|
409
414
|
|
|
410
|
-
client = get_default_graph()
|
|
415
|
+
client = get_default_graph(ClientMode.CLI)
|
|
411
416
|
|
|
412
417
|
for warehouse in get_all_warehouses(client):
|
|
413
418
|
click.echo(warehouse)
|
|
@@ -418,9 +423,10 @@ def list() -> None:
|
|
|
418
423
|
"-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
|
|
419
424
|
)
|
|
420
425
|
@telemetry.with_telemetry()
|
|
426
|
+
@upgrade.check_upgrade
|
|
421
427
|
def get(warehouse: str) -> None:
|
|
422
428
|
"""Fetches the details of the specified iceberg warehouse"""
|
|
423
|
-
client = get_default_graph()
|
|
429
|
+
client = get_default_graph(ClientMode.CLI)
|
|
424
430
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
425
431
|
|
|
426
432
|
if client.exists(urn):
|
|
@@ -455,7 +461,7 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
|
|
|
455
461
|
|
|
456
462
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
457
463
|
|
|
458
|
-
client = get_default_graph()
|
|
464
|
+
client = get_default_graph(ClientMode.CLI)
|
|
459
465
|
|
|
460
466
|
if not client.exists(urn):
|
|
461
467
|
raise click.ClickException(f"urn {urn} not found")
|
datahub/cli/ingest_cli.py
CHANGED
|
@@ -14,16 +14,16 @@ from tabulate import tabulate
|
|
|
14
14
|
|
|
15
15
|
from datahub._version import nice_version_name
|
|
16
16
|
from datahub.cli import cli_utils
|
|
17
|
-
from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH
|
|
17
|
+
from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH, load_client_config
|
|
18
18
|
from datahub.configuration.common import GraphError
|
|
19
19
|
from datahub.configuration.config_loader import load_config_file
|
|
20
20
|
from datahub.ingestion.graph.client import get_default_graph
|
|
21
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
21
22
|
from datahub.ingestion.run.connection import ConnectionManager
|
|
22
23
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
23
24
|
from datahub.telemetry import telemetry
|
|
24
25
|
from datahub.upgrade import upgrade
|
|
25
26
|
from datahub.utilities.ingest_utils import deploy_source_vars
|
|
26
|
-
from datahub.utilities.perf_timer import PerfTimer
|
|
27
27
|
|
|
28
28
|
logger = logging.getLogger(__name__)
|
|
29
29
|
|
|
@@ -113,6 +113,7 @@ def ingest() -> None:
|
|
|
113
113
|
"no_progress",
|
|
114
114
|
]
|
|
115
115
|
)
|
|
116
|
+
@upgrade.check_upgrade
|
|
116
117
|
def run(
|
|
117
118
|
config: str,
|
|
118
119
|
dry_run: bool,
|
|
@@ -177,14 +178,7 @@ def run(
|
|
|
177
178
|
no_progress=no_progress,
|
|
178
179
|
raw_config=raw_pipeline_config,
|
|
179
180
|
)
|
|
180
|
-
|
|
181
|
-
ret = run_pipeline_to_completion(pipeline)
|
|
182
|
-
|
|
183
|
-
# The main ingestion has completed. If it was successful, potentially show an upgrade nudge message.
|
|
184
|
-
if ret == 0:
|
|
185
|
-
upgrade.check_upgrade_post(
|
|
186
|
-
main_method_runtime=timer.elapsed_seconds(), graph=pipeline.ctx.graph
|
|
187
|
-
)
|
|
181
|
+
ret = run_pipeline_to_completion(pipeline)
|
|
188
182
|
|
|
189
183
|
if ret:
|
|
190
184
|
sys.exit(ret)
|
|
@@ -192,8 +186,6 @@ def run(
|
|
|
192
186
|
|
|
193
187
|
|
|
194
188
|
@ingest.command()
|
|
195
|
-
@upgrade.check_upgrade
|
|
196
|
-
@telemetry.with_telemetry()
|
|
197
189
|
@click.option(
|
|
198
190
|
"-n",
|
|
199
191
|
"--name",
|
|
@@ -216,9 +208,9 @@ def run(
|
|
|
216
208
|
@click.option(
|
|
217
209
|
"--executor-id",
|
|
218
210
|
type=str,
|
|
219
|
-
default="default",
|
|
220
211
|
help="Executor id to route execution requests to. Do not use this unless you have configured a custom executor.",
|
|
221
212
|
required=False,
|
|
213
|
+
default=None,
|
|
222
214
|
)
|
|
223
215
|
@click.option(
|
|
224
216
|
"--cli-version",
|
|
@@ -239,7 +231,7 @@ def run(
|
|
|
239
231
|
type=str,
|
|
240
232
|
help="Timezone for the schedule in 'America/New_York' format. Uses UTC by default.",
|
|
241
233
|
required=False,
|
|
242
|
-
default=
|
|
234
|
+
default=None,
|
|
243
235
|
)
|
|
244
236
|
@click.option(
|
|
245
237
|
"--debug", type=bool, help="Should we debug.", required=False, default=False
|
|
@@ -251,14 +243,15 @@ def run(
|
|
|
251
243
|
required=False,
|
|
252
244
|
default=None,
|
|
253
245
|
)
|
|
246
|
+
@upgrade.check_upgrade
|
|
254
247
|
def deploy(
|
|
255
248
|
name: Optional[str],
|
|
256
249
|
config: str,
|
|
257
250
|
urn: Optional[str],
|
|
258
|
-
executor_id: str,
|
|
251
|
+
executor_id: Optional[str],
|
|
259
252
|
cli_version: Optional[str],
|
|
260
253
|
schedule: Optional[str],
|
|
261
|
-
time_zone: str,
|
|
254
|
+
time_zone: Optional[str],
|
|
262
255
|
extra_pip: Optional[str],
|
|
263
256
|
debug: bool = False,
|
|
264
257
|
) -> None:
|
|
@@ -269,7 +262,7 @@ def deploy(
|
|
|
269
262
|
urn:li:dataHubIngestionSource:<name>
|
|
270
263
|
"""
|
|
271
264
|
|
|
272
|
-
datahub_graph = get_default_graph()
|
|
265
|
+
datahub_graph = get_default_graph(ClientMode.CLI)
|
|
273
266
|
|
|
274
267
|
variables = deploy_source_vars(
|
|
275
268
|
name=name,
|
|
@@ -360,6 +353,7 @@ def mcps(path: str) -> None:
|
|
|
360
353
|
"""
|
|
361
354
|
|
|
362
355
|
click.echo("Starting ingestion...")
|
|
356
|
+
datahub_config = load_client_config()
|
|
363
357
|
recipe: dict = {
|
|
364
358
|
"source": {
|
|
365
359
|
"type": "file",
|
|
@@ -367,6 +361,7 @@ def mcps(path: str) -> None:
|
|
|
367
361
|
"path": path,
|
|
368
362
|
},
|
|
369
363
|
},
|
|
364
|
+
"datahub_api": datahub_config,
|
|
370
365
|
}
|
|
371
366
|
|
|
372
367
|
pipeline = Pipeline.create(recipe, report_to=None)
|
|
@@ -383,9 +378,11 @@ def mcps(path: str) -> None:
|
|
|
383
378
|
"--source", type=str, default=None, help="Filter by ingestion source name."
|
|
384
379
|
)
|
|
385
380
|
@upgrade.check_upgrade
|
|
386
|
-
@telemetry.with_telemetry()
|
|
387
381
|
def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) -> None:
|
|
388
|
-
"""
|
|
382
|
+
"""
|
|
383
|
+
List ingestion source runs with their details, optionally filtered by URN or source.
|
|
384
|
+
Required the Manage Metadata Ingestion permission.
|
|
385
|
+
"""
|
|
389
386
|
|
|
390
387
|
query = """
|
|
391
388
|
query listIngestionRuns($input: ListIngestionSourcesInput!) {
|
|
@@ -422,7 +419,7 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
|
|
|
422
419
|
}
|
|
423
420
|
}
|
|
424
421
|
|
|
425
|
-
client = get_default_graph()
|
|
422
|
+
client = get_default_graph(ClientMode.CLI)
|
|
426
423
|
session = client._session
|
|
427
424
|
gms_host = client.config.server
|
|
428
425
|
|
|
@@ -443,6 +440,11 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
|
|
|
443
440
|
if not data:
|
|
444
441
|
click.echo("No response received from the server.")
|
|
445
442
|
return
|
|
443
|
+
if "errors" in data:
|
|
444
|
+
click.echo("Errors in response:")
|
|
445
|
+
for error in data["errors"]:
|
|
446
|
+
click.echo(f"- {error.get('message', 'Unknown error')}")
|
|
447
|
+
return
|
|
446
448
|
|
|
447
449
|
# a lot of responses can be null if there's errors in the run
|
|
448
450
|
ingestion_sources = (
|
|
@@ -504,11 +506,10 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
|
|
|
504
506
|
help="If enabled, will list ingestion runs which have been soft deleted",
|
|
505
507
|
)
|
|
506
508
|
@upgrade.check_upgrade
|
|
507
|
-
@telemetry.with_telemetry()
|
|
508
509
|
def list_runs(page_offset: int, page_size: int, include_soft_deletes: bool) -> None:
|
|
509
510
|
"""List recent ingestion runs to datahub"""
|
|
510
511
|
|
|
511
|
-
client = get_default_graph()
|
|
512
|
+
client = get_default_graph(ClientMode.CLI)
|
|
512
513
|
session = client._session
|
|
513
514
|
gms_host = client.config.server
|
|
514
515
|
|
|
@@ -554,12 +555,11 @@ def list_runs(page_offset: int, page_size: int, include_soft_deletes: bool) -> N
|
|
|
554
555
|
)
|
|
555
556
|
@click.option("-a", "--show-aspect", required=False, is_flag=True)
|
|
556
557
|
@upgrade.check_upgrade
|
|
557
|
-
@telemetry.with_telemetry()
|
|
558
558
|
def show(
|
|
559
559
|
run_id: str, start: int, count: int, include_soft_deletes: bool, show_aspect: bool
|
|
560
560
|
) -> None:
|
|
561
561
|
"""Describe a provided ingestion run to datahub"""
|
|
562
|
-
client = get_default_graph()
|
|
562
|
+
client = get_default_graph(ClientMode.CLI)
|
|
563
563
|
session = client._session
|
|
564
564
|
gms_host = client.config.server
|
|
565
565
|
|
|
@@ -604,12 +604,11 @@ def show(
|
|
|
604
604
|
help="Path to directory where rollback reports will be saved to",
|
|
605
605
|
)
|
|
606
606
|
@upgrade.check_upgrade
|
|
607
|
-
@telemetry.with_telemetry()
|
|
608
607
|
def rollback(
|
|
609
608
|
run_id: str, force: bool, dry_run: bool, safe: bool, report_dir: str
|
|
610
609
|
) -> None:
|
|
611
610
|
"""Rollback a provided ingestion run to datahub"""
|
|
612
|
-
client = get_default_graph()
|
|
611
|
+
client = get_default_graph(ClientMode.CLI)
|
|
613
612
|
|
|
614
613
|
if not force and not dry_run:
|
|
615
614
|
click.confirm(
|
datahub/cli/migrate.py
CHANGED
|
@@ -25,6 +25,7 @@ from datahub.emitter.mcp_builder import (
|
|
|
25
25
|
)
|
|
26
26
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
27
27
|
from datahub.ingestion.graph.client import (
|
|
28
|
+
ClientMode,
|
|
28
29
|
DataHubGraph,
|
|
29
30
|
RelatedEntity,
|
|
30
31
|
get_default_graph,
|
|
@@ -36,6 +37,7 @@ from datahub.metadata.schema_classes import (
|
|
|
36
37
|
SystemMetadataClass,
|
|
37
38
|
)
|
|
38
39
|
from datahub.telemetry import telemetry
|
|
40
|
+
from datahub.upgrade import upgrade
|
|
39
41
|
from datahub.utilities.urns.urn import Urn
|
|
40
42
|
|
|
41
43
|
log = logging.getLogger(__name__)
|
|
@@ -76,13 +78,13 @@ class MigrationReport:
|
|
|
76
78
|
def __repr__(self) -> str:
|
|
77
79
|
repr = f"{self._get_prefix()}Migration Report:\n--------------\n"
|
|
78
80
|
repr += f"{self._get_prefix()}Migration Run Id: {self.run_id}\n"
|
|
79
|
-
repr += f"{self._get_prefix()}Num entities created = {len(set([x[0] for x in self.entities_created
|
|
80
|
-
repr += f"{self._get_prefix()}Num entities affected = {len(set([x[0] for x in self.entities_affected
|
|
81
|
-
repr += f"{self._get_prefix()}Num entities {'kept' if self.keep else 'migrated'} = {len(set([x[0] for x in self.entities_migrated
|
|
81
|
+
repr += f"{self._get_prefix()}Num entities created = {len(set([x[0] for x in self.entities_created]))}\n"
|
|
82
|
+
repr += f"{self._get_prefix()}Num entities affected = {len(set([x[0] for x in self.entities_affected]))}\n"
|
|
83
|
+
repr += f"{self._get_prefix()}Num entities {'kept' if self.keep else 'migrated'} = {len(set([x[0] for x in self.entities_migrated]))}\n"
|
|
82
84
|
repr += f"{self._get_prefix()}Details:\n"
|
|
83
|
-
repr += f"{self._get_prefix()}New Entities Created: {set([x[0] for x in self.entities_created
|
|
84
|
-
repr += f"{self._get_prefix()}External Entities Affected: {set([x[0] for x in self.entities_affected
|
|
85
|
-
repr += f"{self._get_prefix()}Old Entities {'Kept' if self.keep else 'Migrated'} = {set([x[0] for x in self.entities_migrated
|
|
85
|
+
repr += f"{self._get_prefix()}New Entities Created: {set([x[0] for x in self.entities_created]) or 'None'}\n"
|
|
86
|
+
repr += f"{self._get_prefix()}External Entities Affected: {set([x[0] for x in self.entities_affected]) or 'None'}\n"
|
|
87
|
+
repr += f"{self._get_prefix()}Old Entities {'Kept' if self.keep else 'Migrated'} = {set([x[0] for x in self.entities_migrated]) or 'None'}\n"
|
|
86
88
|
return repr
|
|
87
89
|
|
|
88
90
|
|
|
@@ -118,6 +120,7 @@ def _get_type_from_urn(urn: str) -> str:
|
|
|
118
120
|
help="When enabled, will not delete (hard/soft) the previous entities.",
|
|
119
121
|
)
|
|
120
122
|
@telemetry.with_telemetry()
|
|
123
|
+
@upgrade.check_upgrade
|
|
121
124
|
def dataplatform2instance(
|
|
122
125
|
instance: str,
|
|
123
126
|
platform: str,
|
|
@@ -147,7 +150,7 @@ def dataplatform2instance_func(
|
|
|
147
150
|
migration_report = MigrationReport(run_id, dry_run, keep)
|
|
148
151
|
system_metadata = SystemMetadataClass(runId=run_id)
|
|
149
152
|
|
|
150
|
-
graph = get_default_graph()
|
|
153
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
151
154
|
|
|
152
155
|
urns_to_migrate: List[str] = []
|
|
153
156
|
|
|
@@ -386,7 +389,7 @@ def migrate_containers(
|
|
|
386
389
|
|
|
387
390
|
|
|
388
391
|
def get_containers_for_migration(env: str) -> List[Any]:
|
|
389
|
-
client = get_default_graph()
|
|
392
|
+
client = get_default_graph(ClientMode.CLI)
|
|
390
393
|
containers_to_migrate = list(
|
|
391
394
|
client.get_urns_by_filter(entity_types=["container"], env=env)
|
|
392
395
|
)
|
|
@@ -445,7 +448,7 @@ def process_container_relationships(
|
|
|
445
448
|
relationships: Iterable[RelatedEntity] = migration_utils.get_incoming_relationships(
|
|
446
449
|
urn=src_urn
|
|
447
450
|
)
|
|
448
|
-
client = get_default_graph()
|
|
451
|
+
client = get_default_graph(ClientMode.CLI)
|
|
449
452
|
for relationship in relationships:
|
|
450
453
|
log.debug(f"Incoming Relationship: {relationship}")
|
|
451
454
|
target_urn: str = relationship.urn
|
datahub/cli/migration_utils.py
CHANGED
|
@@ -12,6 +12,7 @@ from datahub.ingestion.graph.client import (
|
|
|
12
12
|
RelatedEntity,
|
|
13
13
|
get_default_graph,
|
|
14
14
|
)
|
|
15
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
15
16
|
from datahub.metadata.schema_classes import (
|
|
16
17
|
ChartInfoClass,
|
|
17
18
|
ContainerClass,
|
|
@@ -243,7 +244,7 @@ def clone_aspect(
|
|
|
243
244
|
run_id: str = str(uuid.uuid4()),
|
|
244
245
|
dry_run: bool = False,
|
|
245
246
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
246
|
-
client = get_default_graph()
|
|
247
|
+
client = get_default_graph(ClientMode.CLI)
|
|
247
248
|
aspect_map = cli_utils.get_aspects_for_entity(
|
|
248
249
|
client._session,
|
|
249
250
|
client.config.server,
|
|
@@ -274,7 +275,7 @@ def clone_aspect(
|
|
|
274
275
|
|
|
275
276
|
|
|
276
277
|
def get_incoming_relationships(urn: str) -> Iterable[RelatedEntity]:
|
|
277
|
-
client = get_default_graph()
|
|
278
|
+
client = get_default_graph(ClientMode.CLI)
|
|
278
279
|
yield from client.get_related_entities(
|
|
279
280
|
entity_urn=urn,
|
|
280
281
|
relationship_types=[
|
|
@@ -290,7 +291,7 @@ def get_incoming_relationships(urn: str) -> Iterable[RelatedEntity]:
|
|
|
290
291
|
|
|
291
292
|
|
|
292
293
|
def get_outgoing_relationships(urn: str) -> Iterable[RelatedEntity]:
|
|
293
|
-
client = get_default_graph()
|
|
294
|
+
client = get_default_graph(ClientMode.CLI)
|
|
294
295
|
yield from client.get_related_entities(
|
|
295
296
|
entity_urn=urn,
|
|
296
297
|
relationship_types=[
|
datahub/cli/put_cli.py
CHANGED
|
@@ -8,12 +8,12 @@ from datahub.cli.cli_utils import post_entity
|
|
|
8
8
|
from datahub.configuration.config_loader import load_config_file
|
|
9
9
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
10
10
|
from datahub.ingestion.graph.client import get_default_graph
|
|
11
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
11
12
|
from datahub.metadata.schema_classes import (
|
|
12
13
|
DataPlatformInfoClass as DataPlatformInfo,
|
|
13
14
|
PlatformTypeClass,
|
|
14
15
|
SystemMetadataClass,
|
|
15
16
|
)
|
|
16
|
-
from datahub.telemetry import telemetry
|
|
17
17
|
from datahub.upgrade import upgrade
|
|
18
18
|
from datahub.utilities.urns.data_platform_urn import DataPlatformUrn
|
|
19
19
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
@@ -44,7 +44,6 @@ def put() -> None:
|
|
|
44
44
|
help="Run ID into which we should log the aspect.",
|
|
45
45
|
)
|
|
46
46
|
@upgrade.check_upgrade
|
|
47
|
-
@telemetry.with_telemetry()
|
|
48
47
|
def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> None:
|
|
49
48
|
"""Update a single aspect of an entity"""
|
|
50
49
|
|
|
@@ -53,7 +52,7 @@ def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> No
|
|
|
53
52
|
aspect_data, allow_stdin=True, resolve_env_vars=False, process_directives=False
|
|
54
53
|
)
|
|
55
54
|
|
|
56
|
-
client = get_default_graph()
|
|
55
|
+
client = get_default_graph(ClientMode.CLI)
|
|
57
56
|
|
|
58
57
|
system_metadata: Union[None, SystemMetadataClass] = None
|
|
59
58
|
if run_id:
|
|
@@ -74,8 +73,6 @@ def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> No
|
|
|
74
73
|
|
|
75
74
|
@put.command()
|
|
76
75
|
@click.pass_context
|
|
77
|
-
@upgrade.check_upgrade
|
|
78
|
-
@telemetry.with_telemetry()
|
|
79
76
|
@click.option(
|
|
80
77
|
"--name",
|
|
81
78
|
type=str,
|
|
@@ -97,6 +94,7 @@ def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> No
|
|
|
97
94
|
@click.option(
|
|
98
95
|
"--run-id", type=str, help="Run ID into which we should log the platform."
|
|
99
96
|
)
|
|
97
|
+
@upgrade.check_upgrade
|
|
100
98
|
def platform(
|
|
101
99
|
ctx: click.Context, name: str, display_name: Optional[str], logo: str, run_id: str
|
|
102
100
|
) -> None:
|
|
@@ -118,7 +116,7 @@ def platform(
|
|
|
118
116
|
displayName=display_name or platform_name,
|
|
119
117
|
logoUrl=logo,
|
|
120
118
|
)
|
|
121
|
-
datahub_graph = get_default_graph()
|
|
119
|
+
datahub_graph = get_default_graph(ClientMode.CLI)
|
|
122
120
|
mcp = MetadataChangeProposalWrapper(
|
|
123
121
|
entityUrn=str(platform_urn),
|
|
124
122
|
aspect=data_platform_info,
|
|
@@ -1,27 +1,50 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
import os
|
|
4
3
|
import os.path
|
|
5
4
|
import re
|
|
6
5
|
from typing import Dict, Optional
|
|
7
6
|
|
|
8
7
|
import click
|
|
8
|
+
import packaging
|
|
9
9
|
import requests
|
|
10
10
|
import yaml
|
|
11
11
|
from packaging.version import parse
|
|
12
12
|
from pydantic import BaseModel
|
|
13
13
|
|
|
14
|
+
from datahub._version import nice_version_name
|
|
15
|
+
from datahub.configuration.env_vars import get_force_local_quickstart_mapping
|
|
16
|
+
|
|
14
17
|
logger = logging.getLogger(__name__)
|
|
15
18
|
|
|
16
|
-
LOCAL_QUICKSTART_MAPPING_FILE =
|
|
19
|
+
LOCAL_QUICKSTART_MAPPING_FILE = get_force_local_quickstart_mapping()
|
|
17
20
|
DEFAULT_LOCAL_CONFIG_PATH = "~/.datahub/quickstart/quickstart_version_mapping.yaml"
|
|
18
21
|
DEFAULT_REMOTE_CONFIG_PATH = "https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/quickstart_version_mapping.yaml"
|
|
19
22
|
|
|
23
|
+
MINIMUM_SUPPORTED_VERSION = "v1.1.0"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_minimum_supported_version_message(version: str) -> str:
|
|
27
|
+
MINIMUM_SUPPORTED_VERSION_MESSAGE = f"""
|
|
28
|
+
DataHub CLI Version Compatibility Issue
|
|
29
|
+
|
|
30
|
+
You're trying to install DataHub server version {version} which is not supported by this CLI version.
|
|
31
|
+
|
|
32
|
+
This CLI (version {nice_version_name()}) only supports installing DataHub server versions {MINIMUM_SUPPORTED_VERSION} and above.
|
|
33
|
+
|
|
34
|
+
To install older server versions:
|
|
35
|
+
1. Uninstall current CLI: pip uninstall acryl-datahub
|
|
36
|
+
2. Install older CLI: pip install acryl-datahub==1.1
|
|
37
|
+
3. Run quickstart with your desired version: datahub docker quickstart --version <version>
|
|
38
|
+
|
|
39
|
+
For more information: https://docs.datahub.com/docs/quickstart#install-datahub-server
|
|
40
|
+
"""
|
|
41
|
+
return MINIMUM_SUPPORTED_VERSION_MESSAGE
|
|
42
|
+
|
|
20
43
|
|
|
21
44
|
class QuickstartExecutionPlan(BaseModel):
|
|
22
45
|
composefile_git_ref: str
|
|
23
46
|
docker_tag: str
|
|
24
|
-
mysql_tag: Optional[str]
|
|
47
|
+
mysql_tag: Optional[str] = None
|
|
25
48
|
|
|
26
49
|
|
|
27
50
|
def _is_it_a_version(version: str) -> bool:
|
|
@@ -126,15 +149,23 @@ class QuickstartVersionMappingConfig(BaseModel):
|
|
|
126
149
|
mysql_tag=str(mysql_tag),
|
|
127
150
|
),
|
|
128
151
|
)
|
|
152
|
+
|
|
153
|
+
if not is_minimum_supported_version(requested_version):
|
|
154
|
+
click.secho(
|
|
155
|
+
get_minimum_supported_version_message(version=requested_version),
|
|
156
|
+
fg="red",
|
|
157
|
+
)
|
|
158
|
+
raise click.ClickException("Minimum supported version not met")
|
|
159
|
+
|
|
129
160
|
# new CLI version is downloading the composefile corresponding to the requested version
|
|
130
|
-
# if the version is older than
|
|
131
|
-
#
|
|
132
|
-
# the
|
|
161
|
+
# if the version is older than <MINIMUM_SUPPORTED_VERSION>, it doesn't contain the
|
|
162
|
+
# docker compose based resolved compose file. In those cases, we pick up the composefile from
|
|
163
|
+
# MINIMUM_SUPPORTED_VERSION which contains the compose file.
|
|
133
164
|
if _is_it_a_version(result.composefile_git_ref):
|
|
134
|
-
if parse("
|
|
135
|
-
# The merge commit where
|
|
136
|
-
# https://github.com/datahub-project/datahub/pull/
|
|
137
|
-
result.composefile_git_ref = "
|
|
165
|
+
if parse("v1.2.0") > parse(result.composefile_git_ref):
|
|
166
|
+
# The merge commit where profiles based resolved compose file was added.
|
|
167
|
+
# https://github.com/datahub-project/datahub/pull/13566
|
|
168
|
+
result.composefile_git_ref = "21726bc3341490f4182b904626c793091ac95edd"
|
|
138
169
|
|
|
139
170
|
return result
|
|
140
171
|
|
|
@@ -148,3 +179,15 @@ def save_quickstart_config(
|
|
|
148
179
|
with open(path, "w") as f:
|
|
149
180
|
yaml.dump(config.dict(), f)
|
|
150
181
|
logger.info(f"Saved quickstart config to {path}.")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def is_minimum_supported_version(version: str) -> bool:
|
|
185
|
+
if not _is_it_a_version(version):
|
|
186
|
+
return True
|
|
187
|
+
|
|
188
|
+
requested_version = packaging.version.parse(version)
|
|
189
|
+
minimum_supported_version = packaging.version.parse(MINIMUM_SUPPORTED_VERSION)
|
|
190
|
+
if requested_version < minimum_supported_version:
|
|
191
|
+
return False
|
|
192
|
+
|
|
193
|
+
return True
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DEPRECATED: This assertions CLI is no longer supported and will be removed in a future version.
|
|
3
|
+
Please use alternative methods for managing assertions in DataHub.
|
|
4
|
+
"""
|
|
5
|
+
|
|
1
6
|
import logging
|
|
2
7
|
import os
|
|
3
8
|
from pathlib import Path
|
|
@@ -15,8 +20,8 @@ from datahub.api.entities.assertion.compiler_interface import (
|
|
|
15
20
|
from datahub.emitter.mce_builder import make_assertion_urn
|
|
16
21
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
17
22
|
from datahub.ingestion.graph.client import get_default_graph
|
|
23
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
18
24
|
from datahub.integrations.assertion.registry import ASSERTION_PLATFORMS
|
|
19
|
-
from datahub.telemetry import telemetry
|
|
20
25
|
from datahub.upgrade import upgrade
|
|
21
26
|
|
|
22
27
|
logger = logging.getLogger(__name__)
|
|
@@ -26,20 +31,39 @@ REPORT_FILE_NAME = "compile_report.json"
|
|
|
26
31
|
|
|
27
32
|
@click.group(cls=DefaultGroup, default="upsert")
|
|
28
33
|
def assertions() -> None:
|
|
29
|
-
"""A group of commands to interact with the Assertion entity in DataHub.
|
|
34
|
+
"""A group of commands to interact with the Assertion entity in DataHub.
|
|
35
|
+
|
|
36
|
+
⚠️ DEPRECATED: This assertions CLI is no longer supported and will be removed
|
|
37
|
+
in a future version. Please use alternative methods for managing assertions in DataHub.
|
|
38
|
+
"""
|
|
39
|
+
click.secho(
|
|
40
|
+
"⚠️ WARNING: The assertions CLI is deprecated and no longer supported. "
|
|
41
|
+
"It may be removed in a future version. Please use alternative methods for managing assertions in DataHub.",
|
|
42
|
+
fg="yellow",
|
|
43
|
+
bold=True,
|
|
44
|
+
err=True,
|
|
45
|
+
)
|
|
30
46
|
pass
|
|
31
47
|
|
|
32
48
|
|
|
33
49
|
@assertions.command()
|
|
34
50
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
35
51
|
@upgrade.check_upgrade
|
|
36
|
-
@telemetry.with_telemetry()
|
|
37
52
|
def upsert(file: str) -> None:
|
|
38
|
-
"""Upsert (create or update) a set of assertions in DataHub.
|
|
53
|
+
"""Upsert (create or update) a set of assertions in DataHub.
|
|
54
|
+
|
|
55
|
+
⚠️ DEPRECATED: This command is deprecated and no longer supported.
|
|
56
|
+
"""
|
|
57
|
+
click.secho(
|
|
58
|
+
"⚠️ WARNING: The 'upsert' command is deprecated and no longer supported.",
|
|
59
|
+
fg="yellow",
|
|
60
|
+
bold=True,
|
|
61
|
+
err=True,
|
|
62
|
+
)
|
|
39
63
|
|
|
40
64
|
assertions_spec: AssertionsConfigSpec = AssertionsConfigSpec.from_yaml(file)
|
|
41
65
|
|
|
42
|
-
with get_default_graph() as graph:
|
|
66
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
43
67
|
for assertion_spec in assertions_spec.assertions:
|
|
44
68
|
try:
|
|
45
69
|
mcp = MetadataChangeProposalWrapper(
|
|
@@ -70,8 +94,6 @@ def upsert(file: str) -> None:
|
|
|
70
94
|
default=[],
|
|
71
95
|
help="Platform-specific extra key-value inputs in form key=value",
|
|
72
96
|
)
|
|
73
|
-
@upgrade.check_upgrade
|
|
74
|
-
@telemetry.with_telemetry()
|
|
75
97
|
def compile(
|
|
76
98
|
file: str, platform: str, output_to: Optional[str], extras: List[str]
|
|
77
99
|
) -> None:
|
|
@@ -81,7 +103,15 @@ def compile(
|
|
|
81
103
|
In future, we may introduce separate command to automatically apply these compiled changes
|
|
82
104
|
in assertion platform. Currently, generated result artifacts are stored in target folder
|
|
83
105
|
unless another folder is specified using option `--output-to <folder>`.
|
|
106
|
+
|
|
107
|
+
⚠️ DEPRECATED: This command is deprecated and no longer supported.
|
|
84
108
|
"""
|
|
109
|
+
click.secho(
|
|
110
|
+
"⚠️ WARNING: The 'compile' command is deprecated and no longer supported.",
|
|
111
|
+
fg="yellow",
|
|
112
|
+
bold=True,
|
|
113
|
+
err=True,
|
|
114
|
+
)
|
|
85
115
|
|
|
86
116
|
if platform not in ASSERTION_PLATFORMS:
|
|
87
117
|
click.secho(
|
|
@@ -149,3 +179,5 @@ def extras_list_to_dict(extras: List[str]) -> Dict[str, str]:
|
|
|
149
179
|
# Later:
|
|
150
180
|
# 3. execute compiled assertions on assertion platform (Later, requires connection details to platform),
|
|
151
181
|
# 4. cleanup assertions from assertion platform (generate artifacts. optionally execute)
|
|
182
|
+
#
|
|
183
|
+
# NOTE: This entire assertions CLI is deprecated and these TODOs will not be implemented.
|