acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import warnings
|
|
2
3
|
from typing import Optional
|
|
3
4
|
|
|
4
5
|
import click
|
|
@@ -6,7 +7,7 @@ from click_default_group import DefaultGroup
|
|
|
6
7
|
|
|
7
8
|
from datahub.api.entities.datacontract.datacontract import DataContract
|
|
8
9
|
from datahub.ingestion.graph.client import get_default_graph
|
|
9
|
-
from datahub.
|
|
10
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
10
11
|
from datahub.upgrade import upgrade
|
|
11
12
|
|
|
12
13
|
logger = logging.getLogger(__name__)
|
|
@@ -14,21 +15,57 @@ logger = logging.getLogger(__name__)
|
|
|
14
15
|
|
|
15
16
|
@click.group(cls=DefaultGroup, default="upsert")
|
|
16
17
|
def datacontract() -> None:
|
|
17
|
-
"""
|
|
18
|
-
|
|
18
|
+
"""
|
|
19
|
+
A group of commands to interact with the DataContract entity in DataHub.
|
|
20
|
+
|
|
21
|
+
WARNING: This CLI is DEPRECATED and no longer supported.
|
|
22
|
+
Please migrate to alternative data contract solutions.
|
|
23
|
+
"""
|
|
24
|
+
# Issue deprecation warning
|
|
25
|
+
warnings.warn(
|
|
26
|
+
"The datacontract CLI is deprecated and no longer supported. "
|
|
27
|
+
"Please migrate to alternative data contract solutions.",
|
|
28
|
+
DeprecationWarning,
|
|
29
|
+
stacklevel=2,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Log deprecation message for runtime visibility
|
|
33
|
+
logger.warning(
|
|
34
|
+
"DEPRECATED: The datacontract CLI is no longer supported and will be removed in a future version. "
|
|
35
|
+
"Please migrate to alternative data contract solutions."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Display deprecation message to user
|
|
39
|
+
click.secho(
|
|
40
|
+
"⚠️ WARNING: This datacontract CLI is DEPRECATED and no longer supported.",
|
|
41
|
+
fg="yellow",
|
|
42
|
+
bold=True,
|
|
43
|
+
)
|
|
44
|
+
click.secho("Please migrate to alternative data contract solutions.", fg="yellow")
|
|
19
45
|
|
|
20
46
|
|
|
21
47
|
@datacontract.command()
|
|
22
48
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
23
49
|
@upgrade.check_upgrade
|
|
24
|
-
@telemetry.with_telemetry()
|
|
25
50
|
def upsert(file: str) -> None:
|
|
26
|
-
"""
|
|
51
|
+
"""
|
|
52
|
+
Upsert (create or update) a Data Contract in DataHub.
|
|
53
|
+
|
|
54
|
+
WARNING: This command is DEPRECATED and no longer supported.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
click.secho(
|
|
58
|
+
"⚠️ WARNING: The 'upsert' command is deprecated and no longer supported.",
|
|
59
|
+
fg="yellow",
|
|
60
|
+
bold=True,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
logger.warning("DEPRECATED: datacontract upsert command is no longer supported")
|
|
27
64
|
|
|
28
65
|
data_contract: DataContract = DataContract.from_yaml(file)
|
|
29
66
|
urn = data_contract.urn
|
|
30
67
|
|
|
31
|
-
with get_default_graph() as graph:
|
|
68
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
32
69
|
if not graph.exists(data_contract.entity):
|
|
33
70
|
raise ValueError(
|
|
34
71
|
f"Cannot define a data contract for non-existent entity {data_contract.entity}"
|
|
@@ -59,9 +96,20 @@ def upsert(file: str) -> None:
|
|
|
59
96
|
)
|
|
60
97
|
@click.option("--hard/--soft", required=False, is_flag=True, default=False)
|
|
61
98
|
@upgrade.check_upgrade
|
|
62
|
-
@telemetry.with_telemetry()
|
|
63
99
|
def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
|
|
64
|
-
"""
|
|
100
|
+
"""
|
|
101
|
+
Delete a Data Contract in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata.
|
|
102
|
+
|
|
103
|
+
WARNING: This command is DEPRECATED and no longer supported.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
click.secho(
|
|
107
|
+
"⚠️ WARNING: The 'delete' command is deprecated and no longer supported.",
|
|
108
|
+
fg="yellow",
|
|
109
|
+
bold=True,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
logger.warning("DEPRECATED: datacontract delete command is no longer supported")
|
|
65
113
|
|
|
66
114
|
if not urn:
|
|
67
115
|
if not file:
|
|
@@ -72,7 +120,7 @@ def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
|
|
|
72
120
|
data_contract = DataContract.from_yaml(file)
|
|
73
121
|
urn = data_contract.urn
|
|
74
122
|
|
|
75
|
-
with get_default_graph() as graph:
|
|
123
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
76
124
|
if not graph.exists(urn):
|
|
77
125
|
raise ValueError(f"Data Contract {urn} does not exist")
|
|
78
126
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import difflib
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
import os
|
|
5
4
|
import pathlib
|
|
6
5
|
import sys
|
|
7
6
|
from pathlib import Path
|
|
@@ -14,15 +13,16 @@ from click_default_group import DefaultGroup
|
|
|
14
13
|
|
|
15
14
|
from datahub.api.entities.dataproduct.dataproduct import DataProduct
|
|
16
15
|
from datahub.cli.specific.file_loader import load_file
|
|
16
|
+
from datahub.configuration.env_vars import get_dataproduct_external_url
|
|
17
17
|
from datahub.emitter.mce_builder import (
|
|
18
18
|
make_group_urn,
|
|
19
19
|
make_user_urn,
|
|
20
20
|
validate_ownership_type,
|
|
21
21
|
)
|
|
22
22
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
23
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
23
24
|
from datahub.metadata.schema_classes import OwnerClass, OwnershipTypeClass
|
|
24
25
|
from datahub.specific.dataproduct import DataProductPatchBuilder
|
|
25
|
-
from datahub.telemetry import telemetry
|
|
26
26
|
from datahub.upgrade import upgrade
|
|
27
27
|
from datahub.utilities.urns.urn import Urn
|
|
28
28
|
|
|
@@ -81,12 +81,10 @@ def mutate(file: Path, validate_assets: bool, external_url: str, upsert: bool) -
|
|
|
81
81
|
|
|
82
82
|
config_dict = load_file(pathlib.Path(file))
|
|
83
83
|
id = config_dict.get("id") if isinstance(config_dict, dict) else None
|
|
84
|
-
with get_default_graph() as graph:
|
|
84
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
85
85
|
data_product: DataProduct = DataProduct.from_yaml(file, graph)
|
|
86
86
|
external_url_override = (
|
|
87
|
-
external_url
|
|
88
|
-
or os.getenv("DATAHUB_DATAPRODUCT_EXTERNAL_URL")
|
|
89
|
-
or data_product.external_url
|
|
87
|
+
external_url or get_dataproduct_external_url() or data_product.external_url
|
|
90
88
|
)
|
|
91
89
|
data_product.external_url = external_url_override
|
|
92
90
|
if upsert and not graph.exists(data_product.urn):
|
|
@@ -129,7 +127,6 @@ def mutate(file: Path, validate_assets: bool, external_url: str, upsert: bool) -
|
|
|
129
127
|
)
|
|
130
128
|
@click.option("--external-url", required=False, type=str)
|
|
131
129
|
@upgrade.check_upgrade
|
|
132
|
-
@telemetry.with_telemetry()
|
|
133
130
|
def update(file: Path, validate_assets: bool, external_url: str) -> None:
|
|
134
131
|
"""Create or Update a Data Product in DataHub. Use upsert if you want to apply partial updates."""
|
|
135
132
|
|
|
@@ -145,7 +142,6 @@ def update(file: Path, validate_assets: bool, external_url: str) -> None:
|
|
|
145
142
|
)
|
|
146
143
|
@click.option("--external-url", required=False, type=str)
|
|
147
144
|
@upgrade.check_upgrade
|
|
148
|
-
@telemetry.with_telemetry()
|
|
149
145
|
def upsert(file: Path, validate_assets: bool, external_url: str) -> None:
|
|
150
146
|
"""Upsert attributes to a Data Product in DataHub."""
|
|
151
147
|
|
|
@@ -158,11 +154,10 @@ def upsert(file: Path, validate_assets: bool, external_url: str) -> None:
|
|
|
158
154
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
159
155
|
@click.option("--update", required=False, is_flag=True, default=False)
|
|
160
156
|
@upgrade.check_upgrade
|
|
161
|
-
@telemetry.with_telemetry()
|
|
162
157
|
def diff(file: Path, update: bool) -> None:
|
|
163
158
|
"""Diff a Data Product file with its twin in DataHub"""
|
|
164
159
|
|
|
165
|
-
with get_default_graph() as emitter:
|
|
160
|
+
with get_default_graph(ClientMode.CLI) as emitter:
|
|
166
161
|
id: Optional[str] = None
|
|
167
162
|
try:
|
|
168
163
|
data_product_local: DataProduct = DataProduct.from_yaml(file, emitter)
|
|
@@ -205,7 +200,6 @@ def diff(file: Path, update: bool) -> None:
|
|
|
205
200
|
)
|
|
206
201
|
@click.option("--hard/--soft", required=False, is_flag=True, default=False)
|
|
207
202
|
@upgrade.check_upgrade
|
|
208
|
-
@telemetry.with_telemetry()
|
|
209
203
|
def delete(urn: str, file: Path, hard: bool) -> None:
|
|
210
204
|
"""Delete a Data Product in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata."""
|
|
211
205
|
|
|
@@ -216,7 +210,7 @@ def delete(urn: str, file: Path, hard: bool) -> None:
|
|
|
216
210
|
raise click.Abort()
|
|
217
211
|
|
|
218
212
|
graph: DataHubGraph
|
|
219
|
-
with get_default_graph() as graph:
|
|
213
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
220
214
|
data_product_urn = (
|
|
221
215
|
urn if urn.startswith("urn:li:dataProduct") else f"urn:li:dataProduct:{urn}"
|
|
222
216
|
)
|
|
@@ -241,14 +235,13 @@ def delete(urn: str, file: Path, hard: bool) -> None:
|
|
|
241
235
|
@click.option("--urn", required=True, type=str)
|
|
242
236
|
@click.option("--to-file", required=False, type=str)
|
|
243
237
|
@upgrade.check_upgrade
|
|
244
|
-
@telemetry.with_telemetry()
|
|
245
238
|
def get(urn: str, to_file: str) -> None:
|
|
246
239
|
"""Get a Data Product from DataHub"""
|
|
247
240
|
|
|
248
241
|
if not urn.startswith("urn:li:dataProduct:"):
|
|
249
242
|
urn = f"urn:li:dataProduct:{urn}"
|
|
250
243
|
|
|
251
|
-
with get_default_graph() as graph:
|
|
244
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
252
245
|
if graph.exists(urn):
|
|
253
246
|
dataproduct: DataProduct = DataProduct.from_datahub(graph=graph, id=urn)
|
|
254
247
|
click.secho(
|
|
@@ -278,7 +271,6 @@ def get(urn: str, to_file: str) -> None:
|
|
|
278
271
|
help="A markdown file that contains documentation for this data product",
|
|
279
272
|
)
|
|
280
273
|
@upgrade.check_upgrade
|
|
281
|
-
@telemetry.with_telemetry()
|
|
282
274
|
def set_description(urn: str, description: str, md_file: Path) -> None:
|
|
283
275
|
"""Set description for a Data Product in DataHub"""
|
|
284
276
|
|
|
@@ -306,7 +298,7 @@ def set_description(urn: str, description: str, md_file: Path) -> None:
|
|
|
306
298
|
|
|
307
299
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
308
300
|
dataproduct_patcher.set_description(description)
|
|
309
|
-
with get_default_graph() as graph:
|
|
301
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
310
302
|
_abort_if_non_existent_urn(graph, urn, "set description")
|
|
311
303
|
for mcp in dataproduct_patcher.build():
|
|
312
304
|
graph.emit(mcp)
|
|
@@ -329,7 +321,6 @@ def set_description(urn: str, description: str, md_file: Path) -> None:
|
|
|
329
321
|
default=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
330
322
|
)
|
|
331
323
|
@upgrade.check_upgrade
|
|
332
|
-
@telemetry.with_telemetry()
|
|
333
324
|
def add_owner(urn: str, owner: str, owner_type: str) -> None:
|
|
334
325
|
"""Add owner for a Data Product in DataHub"""
|
|
335
326
|
|
|
@@ -342,7 +333,7 @@ def add_owner(urn: str, owner: str, owner_type: str) -> None:
|
|
|
342
333
|
owner=_get_owner_urn(owner), type=owner_type, typeUrn=owner_type_urn
|
|
343
334
|
)
|
|
344
335
|
)
|
|
345
|
-
with get_default_graph() as graph:
|
|
336
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
346
337
|
_abort_if_non_existent_urn(graph, urn, "add owners")
|
|
347
338
|
for mcp in dataproduct_patcher.build():
|
|
348
339
|
graph.emit(mcp)
|
|
@@ -352,7 +343,6 @@ def add_owner(urn: str, owner: str, owner_type: str) -> None:
|
|
|
352
343
|
@click.option("--urn", required=True, type=str)
|
|
353
344
|
@click.argument("owner_urn", required=True, type=str)
|
|
354
345
|
@upgrade.check_upgrade
|
|
355
|
-
@telemetry.with_telemetry()
|
|
356
346
|
def remove_owner(urn: str, owner_urn: str) -> None:
|
|
357
347
|
"""Remove owner for a Data Product in DataHub"""
|
|
358
348
|
|
|
@@ -360,7 +350,7 @@ def remove_owner(urn: str, owner_urn: str) -> None:
|
|
|
360
350
|
urn = f"urn:li:dataProduct:{urn}"
|
|
361
351
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
362
352
|
dataproduct_patcher.remove_owner(owner=_get_owner_urn(owner_urn))
|
|
363
|
-
with get_default_graph() as graph:
|
|
353
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
364
354
|
_abort_if_non_existent_urn(graph, urn, "remove owners")
|
|
365
355
|
for mcp in dataproduct_patcher.build():
|
|
366
356
|
click.echo(json.dumps(mcp.to_obj()))
|
|
@@ -374,7 +364,6 @@ def remove_owner(urn: str, owner_urn: str) -> None:
|
|
|
374
364
|
"--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
|
|
375
365
|
)
|
|
376
366
|
@upgrade.check_upgrade
|
|
377
|
-
@telemetry.with_telemetry()
|
|
378
367
|
def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
379
368
|
"""Add asset for a Data Product in DataHub"""
|
|
380
369
|
|
|
@@ -382,7 +371,7 @@ def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
|
382
371
|
urn = f"urn:li:dataProduct:{urn}"
|
|
383
372
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
384
373
|
dataproduct_patcher.add_asset(asset)
|
|
385
|
-
with get_default_graph() as graph:
|
|
374
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
386
375
|
_abort_if_non_existent_urn(graph, urn, "add assets")
|
|
387
376
|
if validate_assets:
|
|
388
377
|
_abort_if_non_existent_urn(
|
|
@@ -401,7 +390,6 @@ def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
|
401
390
|
"--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
|
|
402
391
|
)
|
|
403
392
|
@upgrade.check_upgrade
|
|
404
|
-
@telemetry.with_telemetry()
|
|
405
393
|
def remove_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
406
394
|
"""Remove asset for a Data Product in DataHub"""
|
|
407
395
|
|
|
@@ -409,7 +397,7 @@ def remove_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
|
409
397
|
urn = f"urn:li:dataProduct:{urn}"
|
|
410
398
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
411
399
|
dataproduct_patcher.remove_asset(asset)
|
|
412
|
-
with get_default_graph() as graph:
|
|
400
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
413
401
|
_abort_if_non_existent_urn(graph, urn, "remove assets")
|
|
414
402
|
if validate_assets:
|
|
415
403
|
_abort_if_non_existent_urn(
|
|
@@ -12,8 +12,8 @@ from click_default_group import DefaultGroup
|
|
|
12
12
|
from datahub.api.entities.dataset.dataset import Dataset, DatasetRetrievalConfig
|
|
13
13
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
14
14
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
15
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
15
16
|
from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
|
|
16
|
-
from datahub.telemetry import telemetry
|
|
17
17
|
from datahub.upgrade import upgrade
|
|
18
18
|
|
|
19
19
|
logger = logging.getLogger(__name__)
|
|
@@ -29,13 +29,14 @@ def dataset() -> None:
|
|
|
29
29
|
name="upsert",
|
|
30
30
|
)
|
|
31
31
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
32
|
-
@
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
@click.option(
|
|
33
|
+
"-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
|
|
34
|
+
)
|
|
35
|
+
def upsert(file: Path, dry_run: bool) -> None:
|
|
35
36
|
"""Upsert attributes to a Dataset in DataHub."""
|
|
36
37
|
# Call the sync command with to_datahub=True to perform the upsert operation
|
|
37
38
|
ctx = click.get_current_context()
|
|
38
|
-
ctx.invoke(sync, file=str(file), to_datahub=True)
|
|
39
|
+
ctx.invoke(sync, file=str(file), dry_run=dry_run, to_datahub=True)
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
@dataset.command(
|
|
@@ -44,14 +45,13 @@ def upsert(file: Path) -> None:
|
|
|
44
45
|
@click.option("--urn", required=True, type=str)
|
|
45
46
|
@click.option("--to-file", required=False, type=str)
|
|
46
47
|
@upgrade.check_upgrade
|
|
47
|
-
@telemetry.with_telemetry()
|
|
48
48
|
def get(urn: str, to_file: str) -> None:
|
|
49
49
|
"""Get a Dataset from DataHub"""
|
|
50
50
|
|
|
51
51
|
if not urn.startswith("urn:li:dataset:"):
|
|
52
52
|
urn = f"urn:li:dataset:{urn}"
|
|
53
53
|
|
|
54
|
-
with get_default_graph() as graph:
|
|
54
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
55
55
|
if graph.exists(urn):
|
|
56
56
|
dataset: Dataset = Dataset.from_datahub(graph=graph, urn=urn)
|
|
57
57
|
click.secho(
|
|
@@ -73,13 +73,13 @@ def get(urn: str, to_file: str) -> None:
|
|
|
73
73
|
help="URN of secondary sibling(s)",
|
|
74
74
|
multiple=True,
|
|
75
75
|
)
|
|
76
|
-
@
|
|
76
|
+
@upgrade.check_upgrade
|
|
77
77
|
def add_sibling(urn: str, sibling_urns: Tuple[str]) -> None:
|
|
78
78
|
all_urns = set()
|
|
79
79
|
all_urns.add(urn)
|
|
80
80
|
for sibling_urn in sibling_urns:
|
|
81
81
|
all_urns.add(sibling_urn)
|
|
82
|
-
with get_default_graph() as graph:
|
|
82
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
83
83
|
for _urn in all_urns:
|
|
84
84
|
_emit_sibling(graph, urn, _urn, all_urns)
|
|
85
85
|
|
|
@@ -113,8 +113,6 @@ def _get_existing_siblings(graph: DataHubGraph, urn: str) -> Set[str]:
|
|
|
113
113
|
@click.option("--lintCheck", required=False, is_flag=True)
|
|
114
114
|
@click.option("--lintFix", required=False, is_flag=True)
|
|
115
115
|
@click.argument("file", type=click.Path(exists=True))
|
|
116
|
-
@upgrade.check_upgrade
|
|
117
|
-
@telemetry.with_telemetry()
|
|
118
116
|
def file(lintcheck: bool, lintfix: bool, file: str) -> None:
|
|
119
117
|
"""Operate on a Dataset file"""
|
|
120
118
|
|
|
@@ -167,13 +165,17 @@ def file(lintcheck: bool, lintfix: bool, file: str) -> None:
|
|
|
167
165
|
)
|
|
168
166
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
169
167
|
@click.option("--to-datahub/--from-datahub", required=True, is_flag=True)
|
|
168
|
+
@click.option(
|
|
169
|
+
"-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
|
|
170
|
+
)
|
|
170
171
|
@upgrade.check_upgrade
|
|
171
|
-
|
|
172
|
-
def sync(file: str, to_datahub: bool) -> None:
|
|
172
|
+
def sync(file: str, to_datahub: bool, dry_run: bool) -> None:
|
|
173
173
|
"""Sync a Dataset file to/from DataHub"""
|
|
174
174
|
|
|
175
|
+
dry_run_prefix = "[dry-run]: " if dry_run else "" # prefix to use in messages
|
|
176
|
+
|
|
175
177
|
failures: List[str] = []
|
|
176
|
-
with get_default_graph() as graph:
|
|
178
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
177
179
|
datasets = Dataset.from_yaml(file)
|
|
178
180
|
for dataset in datasets:
|
|
179
181
|
assert (
|
|
@@ -189,7 +191,7 @@ def sync(file: str, to_datahub: bool) -> None:
|
|
|
189
191
|
click.secho(
|
|
190
192
|
"\n\t- ".join(
|
|
191
193
|
[
|
|
192
|
-
f"Skipping Dataset {dataset.urn} due to missing entity references: "
|
|
194
|
+
f"{dry_run_prefix}Skipping Dataset {dataset.urn} due to missing entity references: "
|
|
193
195
|
]
|
|
194
196
|
+ missing_entity_references
|
|
195
197
|
),
|
|
@@ -199,13 +201,18 @@ def sync(file: str, to_datahub: bool) -> None:
|
|
|
199
201
|
continue
|
|
200
202
|
try:
|
|
201
203
|
for mcp in dataset.generate_mcp():
|
|
202
|
-
|
|
203
|
-
|
|
204
|
+
if not dry_run:
|
|
205
|
+
graph.emit(mcp)
|
|
206
|
+
click.secho(
|
|
207
|
+
f"{dry_run_prefix}Update succeeded for urn {dataset.urn}.",
|
|
208
|
+
fg="green",
|
|
209
|
+
)
|
|
204
210
|
except Exception as e:
|
|
205
211
|
click.secho(
|
|
206
|
-
f"Update failed for id {id}. due to {e}",
|
|
212
|
+
f"{dry_run_prefix}Update failed for id {id}. due to {e}",
|
|
207
213
|
fg="red",
|
|
208
214
|
)
|
|
215
|
+
failures.append(dataset.urn)
|
|
209
216
|
else:
|
|
210
217
|
# Sync from DataHub
|
|
211
218
|
if graph.exists(dataset.urn):
|
|
@@ -215,13 +222,16 @@ def sync(file: str, to_datahub: bool) -> None:
|
|
|
215
222
|
existing_dataset: Dataset = Dataset.from_datahub(
|
|
216
223
|
graph=graph, urn=dataset.urn, config=dataset_get_config
|
|
217
224
|
)
|
|
218
|
-
|
|
225
|
+
if not dry_run:
|
|
226
|
+
existing_dataset.to_yaml(Path(file))
|
|
227
|
+
else:
|
|
228
|
+
click.secho(f"{dry_run_prefix}Will update file {file}")
|
|
219
229
|
else:
|
|
220
|
-
click.secho(f"Dataset {dataset.urn} does not exist")
|
|
230
|
+
click.secho(f"{dry_run_prefix}Dataset {dataset.urn} does not exist")
|
|
221
231
|
failures.append(dataset.urn)
|
|
222
232
|
if failures:
|
|
223
233
|
click.secho(
|
|
224
|
-
f"\
|
|
234
|
+
f"\n{dry_run_prefix}Failed to sync the following Datasets: {', '.join(failures)}",
|
|
225
235
|
fg="red",
|
|
226
236
|
)
|
|
227
237
|
raise click.Abort()
|
|
@@ -7,7 +7,7 @@ from click_default_group import DefaultGroup
|
|
|
7
7
|
|
|
8
8
|
from datahub.api.entities.forms.forms import Forms
|
|
9
9
|
from datahub.ingestion.graph.client import get_default_graph
|
|
10
|
-
from datahub.
|
|
10
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
11
11
|
from datahub.upgrade import upgrade
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
@@ -23,8 +23,6 @@ def forms() -> None:
|
|
|
23
23
|
name="upsert",
|
|
24
24
|
)
|
|
25
25
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
26
|
-
@upgrade.check_upgrade
|
|
27
|
-
@telemetry.with_telemetry()
|
|
28
26
|
def upsert(file: Path) -> None:
|
|
29
27
|
"""Upsert forms in DataHub."""
|
|
30
28
|
|
|
@@ -37,10 +35,9 @@ def upsert(file: Path) -> None:
|
|
|
37
35
|
@click.option("--urn", required=True, type=str)
|
|
38
36
|
@click.option("--to-file", required=False, type=str)
|
|
39
37
|
@upgrade.check_upgrade
|
|
40
|
-
@telemetry.with_telemetry()
|
|
41
38
|
def get(urn: str, to_file: str) -> None:
|
|
42
39
|
"""Get form from DataHub"""
|
|
43
|
-
with get_default_graph() as graph:
|
|
40
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
44
41
|
if graph.exists(urn):
|
|
45
42
|
form: Forms = Forms.from_datahub(graph=graph, urn=urn)
|
|
46
43
|
click.secho(
|
|
@@ -10,7 +10,7 @@ from datahub.api.entities.corpgroup.corpgroup import (
|
|
|
10
10
|
)
|
|
11
11
|
from datahub.cli.specific.file_loader import load_file
|
|
12
12
|
from datahub.ingestion.graph.client import get_default_graph
|
|
13
|
-
from datahub.
|
|
13
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
14
14
|
from datahub.upgrade import upgrade
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
@@ -34,13 +34,12 @@ def group() -> None:
|
|
|
34
34
|
help="When set, writes to the editable section of the metadata graph, overwriting writes from the UI",
|
|
35
35
|
)
|
|
36
36
|
@upgrade.check_upgrade
|
|
37
|
-
@telemetry.with_telemetry()
|
|
38
37
|
def upsert(file: Path, override_editable: bool) -> None:
|
|
39
38
|
"""Create or Update a Group with embedded Users"""
|
|
40
39
|
|
|
41
40
|
config_dict = load_file(file)
|
|
42
41
|
group_configs = config_dict if isinstance(config_dict, list) else [config_dict]
|
|
43
|
-
with get_default_graph() as emitter:
|
|
42
|
+
with get_default_graph(ClientMode.CLI) as emitter:
|
|
44
43
|
for group_config in group_configs:
|
|
45
44
|
try:
|
|
46
45
|
datahub_group = CorpGroup.parse_obj(group_config)
|
|
@@ -11,7 +11,7 @@ from datahub.api.entities.structuredproperties.structuredproperties import (
|
|
|
11
11
|
StructuredProperties,
|
|
12
12
|
)
|
|
13
13
|
from datahub.ingestion.graph.client import get_default_graph
|
|
14
|
-
from datahub.
|
|
14
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
15
15
|
from datahub.upgrade import upgrade
|
|
16
16
|
from datahub.utilities.urns.urn import Urn
|
|
17
17
|
|
|
@@ -29,11 +29,10 @@ def properties() -> None:
|
|
|
29
29
|
)
|
|
30
30
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
31
31
|
@upgrade.check_upgrade
|
|
32
|
-
@telemetry.with_telemetry()
|
|
33
32
|
def upsert(file: Path) -> None:
|
|
34
33
|
"""Upsert structured properties in DataHub."""
|
|
35
34
|
|
|
36
|
-
with get_default_graph() as graph:
|
|
35
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
37
36
|
StructuredProperties.create(str(file), graph)
|
|
38
37
|
|
|
39
38
|
|
|
@@ -43,12 +42,11 @@ def upsert(file: Path) -> None:
|
|
|
43
42
|
@click.option("--urn", required=True, type=str)
|
|
44
43
|
@click.option("--to-file", required=False, type=str)
|
|
45
44
|
@upgrade.check_upgrade
|
|
46
|
-
@telemetry.with_telemetry()
|
|
47
45
|
def get(urn: str, to_file: str) -> None:
|
|
48
46
|
"""Get structured properties from DataHub"""
|
|
49
47
|
urn = Urn.make_structured_property_urn(urn)
|
|
50
48
|
|
|
51
|
-
with get_default_graph() as graph:
|
|
49
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
52
50
|
if graph.exists(urn):
|
|
53
51
|
structuredproperties: StructuredProperties = (
|
|
54
52
|
StructuredProperties.from_datahub(graph=graph, urn=urn)
|
|
@@ -70,7 +68,7 @@ def get(urn: str, to_file: str) -> None:
|
|
|
70
68
|
)
|
|
71
69
|
@click.option("--details/--no-details", is_flag=True, default=True)
|
|
72
70
|
@click.option("--to-file", required=False, type=str)
|
|
73
|
-
@
|
|
71
|
+
@upgrade.check_upgrade
|
|
74
72
|
def list(details: bool, to_file: str) -> None:
|
|
75
73
|
"""List structured properties in DataHub"""
|
|
76
74
|
|
|
@@ -117,7 +115,7 @@ def list(details: bool, to_file: str) -> None:
|
|
|
117
115
|
with open(file, "w") as fp:
|
|
118
116
|
yaml.dump(serialized_objects, fp)
|
|
119
117
|
|
|
120
|
-
with get_default_graph() as graph:
|
|
118
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
121
119
|
if details:
|
|
122
120
|
logger.info(
|
|
123
121
|
"Listing structured properties with details. Use --no-details for urns only"
|