acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
from typing import List, Optional, Union
|
|
6
6
|
|
|
7
7
|
import yaml
|
|
8
|
-
from pydantic import validator
|
|
8
|
+
from pydantic import Field, validator
|
|
9
9
|
from ruamel.yaml import YAML
|
|
10
10
|
from typing_extensions import Literal
|
|
11
11
|
|
|
@@ -26,6 +26,7 @@ from datahub.emitter.mce_builder import (
|
|
|
26
26
|
)
|
|
27
27
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
28
28
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
29
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
29
30
|
from datahub.metadata.schema_classes import (
|
|
30
31
|
FormActorAssignmentClass,
|
|
31
32
|
FormInfoClass,
|
|
@@ -66,7 +67,7 @@ class Prompt(ConfigModel):
|
|
|
66
67
|
description: Optional[str] = None
|
|
67
68
|
type: str
|
|
68
69
|
structured_property_id: Optional[str] = None
|
|
69
|
-
structured_property_urn: Optional[str] = None
|
|
70
|
+
structured_property_urn: Optional[str] = Field(default=None, validate_default=True)
|
|
70
71
|
required: Optional[bool] = None
|
|
71
72
|
|
|
72
73
|
@validator("structured_property_urn", pre=True, always=True)
|
|
@@ -110,7 +111,7 @@ class Actors(ConfigModel):
|
|
|
110
111
|
|
|
111
112
|
class Forms(ConfigModel):
|
|
112
113
|
id: Optional[str] = None
|
|
113
|
-
urn: Optional[str] = None
|
|
114
|
+
urn: Optional[str] = Field(default=None, validate_default=True)
|
|
114
115
|
name: str
|
|
115
116
|
description: Optional[str] = None
|
|
116
117
|
prompts: List[Prompt] = []
|
|
@@ -133,47 +134,46 @@ class Forms(ConfigModel):
|
|
|
133
134
|
def create(file: str) -> None:
|
|
134
135
|
emitter: DataHubGraph
|
|
135
136
|
|
|
136
|
-
with get_default_graph() as emitter:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
form = Forms.parse_obj(form_raw)
|
|
137
|
+
with get_default_graph(ClientMode.CLI) as emitter, open(file) as fp:
|
|
138
|
+
forms: List[dict] = yaml.safe_load(fp)
|
|
139
|
+
for form_raw in forms:
|
|
140
|
+
form = Forms.parse_obj(form_raw)
|
|
141
141
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
mcp = MetadataChangeProposalWrapper(
|
|
149
|
-
entityUrn=form.urn,
|
|
150
|
-
aspect=FormInfoClass(
|
|
151
|
-
name=form.name,
|
|
152
|
-
description=form.description,
|
|
153
|
-
prompts=form.validate_prompts(emitter),
|
|
154
|
-
type=form.type,
|
|
155
|
-
actors=form.create_form_actors(form.actors),
|
|
156
|
-
),
|
|
142
|
+
try:
|
|
143
|
+
if not FormType.has_value(form.type):
|
|
144
|
+
logger.error(
|
|
145
|
+
f"Form type {form.type} does not exist. Please try again with a valid type."
|
|
157
146
|
)
|
|
158
|
-
emitter.emit_mcp(mcp)
|
|
159
147
|
|
|
160
|
-
|
|
148
|
+
mcp = MetadataChangeProposalWrapper(
|
|
149
|
+
entityUrn=form.urn,
|
|
150
|
+
aspect=FormInfoClass(
|
|
151
|
+
name=form.name,
|
|
152
|
+
description=form.description,
|
|
153
|
+
prompts=form.validate_prompts(emitter),
|
|
154
|
+
type=form.type,
|
|
155
|
+
actors=form.create_form_actors(form.actors),
|
|
156
|
+
),
|
|
157
|
+
)
|
|
158
|
+
emitter.emit_mcp(mcp)
|
|
159
|
+
|
|
160
|
+
logger.info(f"Created form {form.urn}")
|
|
161
161
|
|
|
162
|
-
|
|
163
|
-
|
|
162
|
+
if form.owners or form.group_owners:
|
|
163
|
+
form.add_owners(emitter)
|
|
164
164
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
165
|
+
if form.entities:
|
|
166
|
+
if form.entities.urns:
|
|
167
|
+
# Associate specific entities with a form
|
|
168
|
+
form.upload_entities_for_form(emitter)
|
|
169
169
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
170
|
+
if form.entities.filters:
|
|
171
|
+
# Associate groups of entities with a form based on filters
|
|
172
|
+
form.create_form_filters(emitter)
|
|
173
173
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.error(e)
|
|
176
|
+
return
|
|
177
177
|
|
|
178
178
|
def validate_prompts(self, emitter: DataHubGraph) -> List[FormPromptClass]:
|
|
179
179
|
prompts = []
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Iterable, List, Optional, Union
|
|
4
|
+
from typing import Iterable, List, Optional, Type, Union
|
|
5
5
|
|
|
6
6
|
import yaml
|
|
7
|
-
from pydantic import validator
|
|
7
|
+
from pydantic import Field, StrictStr, validator
|
|
8
8
|
from ruamel.yaml import YAML
|
|
9
9
|
|
|
10
10
|
from datahub.configuration.common import ConfigModel
|
|
@@ -38,17 +38,17 @@ class AllowedTypes(Enum):
|
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
class AllowedValue(ConfigModel):
|
|
41
|
-
value: Union[
|
|
41
|
+
value: Union[StrictStr, float]
|
|
42
42
|
description: Optional[str] = None
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
VALID_ENTITY_TYPE_URNS = [
|
|
46
|
-
Urn.make_entity_type_urn(entity_type) for entity_type in URN_TYPES
|
|
46
|
+
Urn.make_entity_type_urn(entity_type) for entity_type in URN_TYPES
|
|
47
47
|
]
|
|
48
48
|
_VALID_ENTITY_TYPES_STRING = f"Valid entity type urns are {', '.join(VALID_ENTITY_TYPE_URNS)}, etc... Ensure that the entity type is valid."
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
def _validate_entity_type_urn(v: str) -> str:
|
|
51
|
+
def _validate_entity_type_urn(cls: Type, v: str) -> str:
|
|
52
52
|
urn = Urn.make_entity_type_urn(v)
|
|
53
53
|
if urn not in VALID_ENTITY_TYPE_URNS:
|
|
54
54
|
raise ValueError(
|
|
@@ -68,7 +68,7 @@ class TypeQualifierAllowedTypes(ConfigModel):
|
|
|
68
68
|
|
|
69
69
|
class StructuredProperties(ConfigModel):
|
|
70
70
|
id: Optional[str] = None
|
|
71
|
-
urn: Optional[str] = None
|
|
71
|
+
urn: Optional[str] = Field(None, validate_default=True)
|
|
72
72
|
qualified_name: Optional[str] = None
|
|
73
73
|
type: str
|
|
74
74
|
value_entity_types: Optional[List[str]] = None
|
datahub/api/graphql/assertion.py
CHANGED
|
@@ -65,7 +65,7 @@ query dataset($urn: String!, $start: Int, $count: Int, $status: AssertionRunStat
|
|
|
65
65
|
|
|
66
66
|
:param urn: The DataHub dataset unique identifier.
|
|
67
67
|
:param status: The assertion status to filter for. Every status will be accepted if it is not set.
|
|
68
|
-
See valid status at https://
|
|
68
|
+
See valid status at https://docs.datahub.com/docs/graphql/enums#assertionrunstatus
|
|
69
69
|
:param start_time_millis: The start time in milliseconds from the assertions will be queried.
|
|
70
70
|
:param end_time_millis: The end time in milliseconds until the assertions will be queried.
|
|
71
71
|
:param filter: Additional key value filters which will be applied as AND query
|
datahub/api/graphql/base.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict, List, Optional
|
|
1
|
+
from typing import Dict, List, Optional, Union
|
|
2
2
|
|
|
3
3
|
from gql import Client
|
|
4
4
|
from gql.transport.requests import RequestsHTTPTransport
|
|
@@ -39,16 +39,18 @@ class BaseApi:
|
|
|
39
39
|
|
|
40
40
|
def gen_filter(
|
|
41
41
|
self, filters: Dict[str, Optional[str]]
|
|
42
|
-
) -> Optional[Dict[str, List[Dict[str, str]]]]:
|
|
43
|
-
filter_expression: Optional[
|
|
42
|
+
) -> Optional[Dict[str, List[Dict[str, Union[str, List[str]]]]]]:
|
|
43
|
+
filter_expression: Optional[
|
|
44
|
+
Dict[str, List[Dict[str, Union[str, List[str]]]]]
|
|
45
|
+
] = None
|
|
44
46
|
if not filters:
|
|
45
47
|
return None
|
|
46
48
|
|
|
47
|
-
|
|
49
|
+
filter_list: List[Dict[str, Union[str, List[str]]]] = []
|
|
48
50
|
for key, value in filters.items():
|
|
49
51
|
if value is None:
|
|
50
52
|
continue
|
|
51
|
-
|
|
53
|
+
filter_list.append({"field": key, "values": [value]})
|
|
52
54
|
|
|
53
|
-
filter_expression = {"and":
|
|
55
|
+
filter_expression = {"and": filter_list}
|
|
54
56
|
return filter_expression
|
datahub/api/graphql/operation.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Any, Dict, List, Optional
|
|
3
3
|
|
|
4
|
-
from gql import
|
|
4
|
+
from gql import GraphQLRequest
|
|
5
5
|
|
|
6
6
|
from datahub.api.graphql.base import BaseApi
|
|
7
7
|
|
|
@@ -55,10 +55,10 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|
|
55
55
|
Report operation metadata for a dataset.
|
|
56
56
|
:param source_type: The source type to filter on. If not set it will accept any source type.
|
|
57
57
|
Default value: DATA_PROCESS
|
|
58
|
-
See valid types here: https://
|
|
58
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
|
|
59
59
|
:param operation_type: The operation type to filter on. If not set it will accept any source type.
|
|
60
60
|
Default value: "UPDATE"
|
|
61
|
-
See valid types here: https://
|
|
61
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums/#operationtype
|
|
62
62
|
:param partition: The partition to set the operation.
|
|
63
63
|
:param num_affected_rows: The number of rows affected by this operation.
|
|
64
64
|
:param custom_properties: Key/value pair of custom propertis
|
|
@@ -79,10 +79,12 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|
|
79
79
|
if custom_properties is not None:
|
|
80
80
|
variable_values["customProperties"] = custom_properties
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
82
|
+
request = GraphQLRequest(
|
|
83
|
+
Operation.REPORT_OPERATION_MUTATION, variable_values=variable_values
|
|
84
84
|
)
|
|
85
85
|
|
|
86
|
+
result = self.client.execute(request)
|
|
87
|
+
|
|
86
88
|
return result["reportOperation"]
|
|
87
89
|
|
|
88
90
|
def query_operations(
|
|
@@ -103,18 +105,18 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|
|
103
105
|
:param end_time_millis: The end time in milliseconds until the operations will be queried.
|
|
104
106
|
:param limit: The maximum number of items to return.
|
|
105
107
|
:param source_type: The source type to filter on. If not set it will accept any source type.
|
|
106
|
-
See valid types here: https://
|
|
108
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
|
|
107
109
|
:param operation_type: The operation type to filter on. If not set it will accept any source type.
|
|
108
|
-
See valid types here: https://
|
|
110
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
|
|
109
111
|
:param partition: The partition to check the operation.
|
|
110
112
|
"""
|
|
111
113
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
+
request = GraphQLRequest(
|
|
115
|
+
Operation.QUERY_OPERATIONS,
|
|
114
116
|
variable_values={
|
|
115
117
|
"urn": urn,
|
|
116
118
|
"startTimeMillis": start_time_millis,
|
|
117
|
-
"
|
|
119
|
+
"endTimeMillis": end_time_millis,
|
|
118
120
|
"limit": limit,
|
|
119
121
|
"filter": self.gen_filter(
|
|
120
122
|
{
|
|
@@ -125,6 +127,8 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|
|
125
127
|
),
|
|
126
128
|
},
|
|
127
129
|
)
|
|
130
|
+
|
|
131
|
+
result = self.client.execute(request)
|
|
128
132
|
if "dataset" in result and "operations" in result["dataset"]:
|
|
129
133
|
operations = []
|
|
130
134
|
if source_type is not None:
|
datahub/cli/check_cli.py
CHANGED
|
@@ -9,6 +9,7 @@ from datetime import datetime
|
|
|
9
9
|
from typing import Any, Dict, List, Optional, Union
|
|
10
10
|
|
|
11
11
|
import click
|
|
12
|
+
from tabulate import tabulate
|
|
12
13
|
|
|
13
14
|
from datahub._version import __package_name__
|
|
14
15
|
from datahub.cli.json_file import check_mce_file
|
|
@@ -16,11 +17,12 @@ from datahub.configuration import config_loader
|
|
|
16
17
|
from datahub.configuration.common import AllowDenyPattern
|
|
17
18
|
from datahub.emitter.mce_builder import DEFAULT_ENV
|
|
18
19
|
from datahub.ingestion.graph.client import get_default_graph
|
|
20
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
19
21
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
20
22
|
from datahub.ingestion.sink.sink_registry import sink_registry
|
|
21
23
|
from datahub.ingestion.source.source_registry import source_registry
|
|
22
24
|
from datahub.ingestion.transformer.transform_registry import transform_registry
|
|
23
|
-
from datahub.
|
|
25
|
+
from datahub.upgrade import upgrade
|
|
24
26
|
from datahub.utilities.file_backed_collections import (
|
|
25
27
|
ConnectionWrapper,
|
|
26
28
|
FileBackedDict,
|
|
@@ -46,7 +48,6 @@ def check() -> None:
|
|
|
46
48
|
@click.option(
|
|
47
49
|
"--unpack-mces", default=False, is_flag=True, help="Converts MCEs into MCPs"
|
|
48
50
|
)
|
|
49
|
-
@telemetry.with_telemetry()
|
|
50
51
|
def metadata_file(json_file: str, rewrite: bool, unpack_mces: bool) -> None:
|
|
51
52
|
"""Check the schema of a metadata (MCE or MCP) JSON file."""
|
|
52
53
|
|
|
@@ -104,7 +105,6 @@ def metadata_file(json_file: str, rewrite: bool, unpack_mces: bool) -> None:
|
|
|
104
105
|
default=(),
|
|
105
106
|
help="[Advanced] Paths in the deepdiff object to ignore",
|
|
106
107
|
)
|
|
107
|
-
@telemetry.with_telemetry()
|
|
108
108
|
def metadata_diff(
|
|
109
109
|
actual_file: str, expected_file: str, verbose: bool, ignore_path: List[str]
|
|
110
110
|
) -> None:
|
|
@@ -141,7 +141,6 @@ def metadata_diff(
|
|
|
141
141
|
type=str,
|
|
142
142
|
default=None,
|
|
143
143
|
)
|
|
144
|
-
@telemetry.with_telemetry()
|
|
145
144
|
def plugins(source: Optional[str], verbose: bool) -> None:
|
|
146
145
|
"""List the enabled ingestion plugins."""
|
|
147
146
|
|
|
@@ -233,7 +232,7 @@ def sql_format(sql: str, platform: str) -> None:
|
|
|
233
232
|
default=True,
|
|
234
233
|
help="Run in offline mode and disable schema-aware parsing.",
|
|
235
234
|
)
|
|
236
|
-
@
|
|
235
|
+
@upgrade.check_upgrade
|
|
237
236
|
def sql_lineage(
|
|
238
237
|
sql: Optional[str],
|
|
239
238
|
sql_file: Optional[str],
|
|
@@ -259,7 +258,7 @@ def sql_lineage(
|
|
|
259
258
|
|
|
260
259
|
graph = None
|
|
261
260
|
if online:
|
|
262
|
-
graph = get_default_graph()
|
|
261
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
263
262
|
|
|
264
263
|
lineage = create_lineage_sql_parsed_result(
|
|
265
264
|
sql,
|
|
@@ -296,7 +295,6 @@ def sql_lineage(
|
|
|
296
295
|
type=str,
|
|
297
296
|
help="the input to validate",
|
|
298
297
|
)
|
|
299
|
-
@telemetry.with_telemetry()
|
|
300
298
|
def test_allow_deny(config: str, input: str, pattern_key: str) -> None:
|
|
301
299
|
"""Test input string against AllowDeny pattern in a DataHub recipe.
|
|
302
300
|
|
|
@@ -345,7 +343,6 @@ def test_allow_deny(config: str, input: str, pattern_key: str) -> None:
|
|
|
345
343
|
type=str,
|
|
346
344
|
help="The input to validate",
|
|
347
345
|
)
|
|
348
|
-
@telemetry.with_telemetry()
|
|
349
346
|
def test_path_spec(config: str, input: str, path_spec_key: str) -> None:
|
|
350
347
|
"""Test input path string against PathSpec patterns in a DataHub recipe.
|
|
351
348
|
|
|
@@ -470,10 +467,95 @@ WHERE
|
|
|
470
467
|
|
|
471
468
|
|
|
472
469
|
@check.command()
|
|
470
|
+
@upgrade.check_upgrade
|
|
473
471
|
def server_config() -> None:
|
|
474
472
|
"""Print the server config."""
|
|
475
|
-
graph = get_default_graph()
|
|
473
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
476
474
|
|
|
477
475
|
server_config = graph.get_server_config()
|
|
478
476
|
|
|
479
477
|
click.echo(pprint.pformat(server_config))
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
@check.command()
|
|
481
|
+
@click.option(
|
|
482
|
+
"--urn", required=False, help="The urn or urn pattern (supports % for wildcard)"
|
|
483
|
+
)
|
|
484
|
+
@click.option("--aspect", default=None, help="Filter to a specific aspect name.")
|
|
485
|
+
@click.option(
|
|
486
|
+
"--start", type=int, default=None, help="Row number of sql store to restore from."
|
|
487
|
+
)
|
|
488
|
+
@click.option("--batch-size", type=int, default=None, help="How many rows to restore.")
|
|
489
|
+
@click.option(
|
|
490
|
+
"--file",
|
|
491
|
+
required=False,
|
|
492
|
+
type=click.Path(exists=True, dir_okay=True, readable=True),
|
|
493
|
+
help="File absolute path containing URNs (one per line) to restore indices",
|
|
494
|
+
)
|
|
495
|
+
@upgrade.check_upgrade
|
|
496
|
+
def restore_indices(
|
|
497
|
+
urn: Optional[str],
|
|
498
|
+
aspect: Optional[str],
|
|
499
|
+
start: Optional[int],
|
|
500
|
+
batch_size: Optional[int],
|
|
501
|
+
file: Optional[str],
|
|
502
|
+
) -> None:
|
|
503
|
+
"""Resync metadata changes into the search and graph indices."""
|
|
504
|
+
if urn is None and file is None:
|
|
505
|
+
raise click.UsageError("Either --urn or --file must be provided")
|
|
506
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
507
|
+
|
|
508
|
+
graph.restore_indices(
|
|
509
|
+
urn_pattern=urn,
|
|
510
|
+
aspect=aspect,
|
|
511
|
+
start=start,
|
|
512
|
+
batch_size=batch_size,
|
|
513
|
+
file=file,
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
@check.command()
|
|
518
|
+
@upgrade.check_upgrade
|
|
519
|
+
def get_kafka_consumer_offsets() -> None:
|
|
520
|
+
"""Get Kafka consumer offsets from the DataHub API."""
|
|
521
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
522
|
+
result = graph.get_kafka_consumer_offsets()
|
|
523
|
+
|
|
524
|
+
table_data = []
|
|
525
|
+
headers = [
|
|
526
|
+
"Topic",
|
|
527
|
+
"Consumer Group",
|
|
528
|
+
"Schema",
|
|
529
|
+
"Partition",
|
|
530
|
+
"Offset",
|
|
531
|
+
"Lag",
|
|
532
|
+
"Avg Lag",
|
|
533
|
+
"Max Lag",
|
|
534
|
+
"Total Lag",
|
|
535
|
+
]
|
|
536
|
+
|
|
537
|
+
for topic, consumers in result.items():
|
|
538
|
+
for consumer_group, schemas in consumers.items():
|
|
539
|
+
for schema, data in schemas.items():
|
|
540
|
+
metrics = data.get("metrics", {})
|
|
541
|
+
partitions = data.get("partitions", {})
|
|
542
|
+
|
|
543
|
+
for partition, partition_data in partitions.items():
|
|
544
|
+
table_data.append(
|
|
545
|
+
[
|
|
546
|
+
topic,
|
|
547
|
+
consumer_group,
|
|
548
|
+
schema,
|
|
549
|
+
partition,
|
|
550
|
+
partition_data.get("offset", "N/A"),
|
|
551
|
+
partition_data.get("lag", "N/A"),
|
|
552
|
+
metrics.get("avgLag", "N/A"),
|
|
553
|
+
metrics.get("maxLag", "N/A"),
|
|
554
|
+
metrics.get("totalLag", "N/A"),
|
|
555
|
+
]
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
if table_data:
|
|
559
|
+
click.echo(tabulate(table_data, headers=headers, tablefmt="grid"))
|
|
560
|
+
else:
|
|
561
|
+
click.echo("No Kafka consumer offset data found.")
|
datahub/cli/cli_utils.py
CHANGED
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
import time
|
|
4
4
|
import typing
|
|
5
5
|
from datetime import datetime
|
|
6
|
+
from functools import wraps
|
|
6
7
|
from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
|
|
7
8
|
|
|
8
9
|
import click
|
|
@@ -424,3 +425,65 @@ def ensure_has_system_metadata(
|
|
|
424
425
|
props = metadata.properties
|
|
425
426
|
props["clientId"] = datahub_version.__package_name__
|
|
426
427
|
props["clientVersion"] = datahub_version.__version__
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def enable_auto_decorators(main_group: click.Group) -> None:
|
|
431
|
+
"""
|
|
432
|
+
Enable automatic decorators for all click commands.
|
|
433
|
+
This wraps existing command callback functions to add upgrade and telemetry decorators.
|
|
434
|
+
"""
|
|
435
|
+
|
|
436
|
+
def has_decorator(func: Any, module_pattern: str, function_pattern: str) -> bool:
|
|
437
|
+
"""Check if function already has a specific decorator"""
|
|
438
|
+
if hasattr(func, "__wrapped__"):
|
|
439
|
+
current_func = func
|
|
440
|
+
while hasattr(current_func, "__wrapped__"):
|
|
441
|
+
# Check if this wrapper matches the module and function patterns
|
|
442
|
+
if (
|
|
443
|
+
hasattr(current_func, "__module__")
|
|
444
|
+
and module_pattern in current_func.__module__
|
|
445
|
+
and hasattr(current_func, "__name__")
|
|
446
|
+
and function_pattern in current_func.__name__
|
|
447
|
+
):
|
|
448
|
+
return True
|
|
449
|
+
current_func = current_func.__wrapped__
|
|
450
|
+
return False
|
|
451
|
+
|
|
452
|
+
def has_telemetry_decorator(func):
|
|
453
|
+
return has_decorator(func, "telemetry", "with_telemetry")
|
|
454
|
+
|
|
455
|
+
def wrap_command_callback(command_obj):
|
|
456
|
+
"""Wrap a command's callback function to add decorators"""
|
|
457
|
+
if hasattr(command_obj, "callback") and command_obj.callback:
|
|
458
|
+
original_callback = command_obj.callback
|
|
459
|
+
|
|
460
|
+
# Import here to avoid circular imports
|
|
461
|
+
from datahub.telemetry import telemetry
|
|
462
|
+
|
|
463
|
+
decorated_callback = original_callback
|
|
464
|
+
|
|
465
|
+
if not has_telemetry_decorator(decorated_callback):
|
|
466
|
+
log.debug(
|
|
467
|
+
f"Applying telemetry decorator to {original_callback.__module__}.{original_callback.__name__}"
|
|
468
|
+
)
|
|
469
|
+
decorated_callback = telemetry.with_telemetry()(decorated_callback)
|
|
470
|
+
|
|
471
|
+
# Preserve the original function's metadata
|
|
472
|
+
decorated_callback = wraps(original_callback)(decorated_callback)
|
|
473
|
+
|
|
474
|
+
command_obj.callback = decorated_callback
|
|
475
|
+
|
|
476
|
+
def wrap_group_commands(group_obj):
|
|
477
|
+
"""Recursively wrap all commands in a group"""
|
|
478
|
+
if hasattr(group_obj, "commands"):
|
|
479
|
+
for _, command_obj in group_obj.commands.items():
|
|
480
|
+
if isinstance(command_obj, click.Group):
|
|
481
|
+
# Recursively wrap sub-groups
|
|
482
|
+
wrap_group_commands(command_obj)
|
|
483
|
+
else:
|
|
484
|
+
# Wrap individual commands
|
|
485
|
+
wrap_command_callback(command_obj)
|
|
486
|
+
|
|
487
|
+
wrap_group_commands(main_group)
|
|
488
|
+
|
|
489
|
+
log.debug("Auto-decorators enabled successfully")
|
datahub/cli/config_utils.py
CHANGED
|
@@ -11,14 +11,23 @@ import click
|
|
|
11
11
|
import yaml
|
|
12
12
|
from pydantic import BaseModel, ValidationError
|
|
13
13
|
|
|
14
|
-
from datahub.
|
|
14
|
+
from datahub.configuration.env_vars import (
|
|
15
|
+
get_gms_host,
|
|
16
|
+
get_gms_port,
|
|
17
|
+
get_gms_protocol,
|
|
18
|
+
get_gms_token,
|
|
19
|
+
get_gms_url,
|
|
20
|
+
get_skip_config,
|
|
21
|
+
get_system_client_id,
|
|
22
|
+
get_system_client_secret,
|
|
23
|
+
)
|
|
15
24
|
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
16
25
|
|
|
17
26
|
logger = logging.getLogger(__name__)
|
|
18
27
|
|
|
19
28
|
CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv"
|
|
20
|
-
DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
|
|
21
|
-
DATAHUB_ROOT_FOLDER = os.path.expanduser("~/.datahub")
|
|
29
|
+
DATAHUB_CONFIG_PATH: str = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
|
|
30
|
+
DATAHUB_ROOT_FOLDER: str = os.path.expanduser("~/.datahub")
|
|
22
31
|
ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG"
|
|
23
32
|
|
|
24
33
|
ENV_DATAHUB_SYSTEM_CLIENT_ID = "DATAHUB_SYSTEM_CLIENT_ID"
|
|
@@ -36,15 +45,15 @@ class MissingConfigError(Exception):
|
|
|
36
45
|
|
|
37
46
|
|
|
38
47
|
def get_system_auth() -> Optional[str]:
|
|
39
|
-
system_client_id =
|
|
40
|
-
system_client_secret =
|
|
48
|
+
system_client_id = get_system_client_id()
|
|
49
|
+
system_client_secret = get_system_client_secret()
|
|
41
50
|
if system_client_id is not None and system_client_secret is not None:
|
|
42
51
|
return f"Basic {system_client_id}:{system_client_secret}"
|
|
43
52
|
return None
|
|
44
53
|
|
|
45
54
|
|
|
46
55
|
def _should_skip_config() -> bool:
|
|
47
|
-
return
|
|
56
|
+
return get_skip_config()
|
|
48
57
|
|
|
49
58
|
|
|
50
59
|
def persist_raw_datahub_config(config: dict) -> None:
|
|
@@ -67,11 +76,11 @@ class DatahubConfig(BaseModel):
|
|
|
67
76
|
|
|
68
77
|
|
|
69
78
|
def _get_config_from_env() -> Tuple[Optional[str], Optional[str]]:
|
|
70
|
-
host =
|
|
71
|
-
port =
|
|
72
|
-
token =
|
|
73
|
-
protocol =
|
|
74
|
-
url =
|
|
79
|
+
host = get_gms_host()
|
|
80
|
+
port = get_gms_port()
|
|
81
|
+
token = get_gms_token()
|
|
82
|
+
protocol = get_gms_protocol()
|
|
83
|
+
url = get_gms_url()
|
|
75
84
|
if port is not None:
|
|
76
85
|
url = f"{protocol}://{host}:{port}"
|
|
77
86
|
return url, token
|
|
@@ -108,7 +117,6 @@ def load_client_config() -> DatahubClientConfig:
|
|
|
108
117
|
datahub_config: DatahubClientConfig = DatahubConfig.parse_obj(
|
|
109
118
|
client_config_dict
|
|
110
119
|
).gms
|
|
111
|
-
|
|
112
120
|
return datahub_config
|
|
113
121
|
except ValidationError as e:
|
|
114
122
|
click.echo(f"Error loading your {CONDENSED_DATAHUB_CONFIG_PATH}")
|
datahub/cli/container_cli.py
CHANGED
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
import click
|
|
4
4
|
|
|
5
5
|
from datahub.ingestion.source.apply.datahub_apply import apply_association_to_container
|
|
6
|
+
from datahub.upgrade import upgrade
|
|
6
7
|
|
|
7
8
|
logger = logging.getLogger(__name__)
|
|
8
9
|
|
|
@@ -16,6 +17,7 @@ def container() -> None:
|
|
|
16
17
|
@container.command()
|
|
17
18
|
@click.option("--container-urn", required=True, type=str)
|
|
18
19
|
@click.option("--tag-urn", required=True, type=str)
|
|
20
|
+
@upgrade.check_upgrade
|
|
19
21
|
def tag(container_urn: str, tag_urn: str) -> None:
|
|
20
22
|
"""Add patch to add a tag to all datasets in a container"""
|
|
21
23
|
apply_association_to_container(container_urn, tag_urn, "tag")
|
|
@@ -24,6 +26,7 @@ def tag(container_urn: str, tag_urn: str) -> None:
|
|
|
24
26
|
@container.command()
|
|
25
27
|
@click.option("--container-urn", required=True, type=str)
|
|
26
28
|
@click.option("--term-urn", required=True, type=str)
|
|
29
|
+
@upgrade.check_upgrade
|
|
27
30
|
def term(container_urn: str, term_urn: str) -> None:
|
|
28
31
|
"""Add patch to add a term to all datasets in a container"""
|
|
29
32
|
apply_association_to_container(container_urn, term_urn, "term")
|
|
@@ -32,6 +35,7 @@ def term(container_urn: str, term_urn: str) -> None:
|
|
|
32
35
|
@container.command()
|
|
33
36
|
@click.option("--container-urn", required=True, type=str)
|
|
34
37
|
@click.option("--owner-urn", required=True, type=str)
|
|
38
|
+
@upgrade.check_upgrade
|
|
35
39
|
def owner(container_urn: str, owner_urn: str) -> None:
|
|
36
40
|
"""Add patch to add a owner to all datasets in a container"""
|
|
37
41
|
apply_association_to_container(container_urn, owner_urn, "owner")
|
|
@@ -40,6 +44,7 @@ def owner(container_urn: str, owner_urn: str) -> None:
|
|
|
40
44
|
@container.command()
|
|
41
45
|
@click.option("--container-urn", required=True, type=str)
|
|
42
46
|
@click.option("--domain-urn", required=True, type=str)
|
|
47
|
+
@upgrade.check_upgrade
|
|
43
48
|
def domain(container_urn: str, domain_urn: str) -> None:
|
|
44
49
|
"""Add patch to add a domain to all datasets in a container"""
|
|
45
50
|
apply_association_to_container(container_urn, domain_urn, "domain")
|