acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -36,8 +36,10 @@ csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource
|
|
|
36
36
|
datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource
|
|
37
37
|
datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource
|
|
38
38
|
datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource
|
|
39
|
+
datahub-debug = datahub.ingestion.source.debug.datahub_debug:DataHubDebugSource
|
|
39
40
|
datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
|
|
40
41
|
datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
|
|
42
|
+
datahub-mock-data = datahub.ingestion.source.mock_data.datahub_mock_data:DataHubMockDataSource
|
|
41
43
|
dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource
|
|
42
44
|
dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource
|
|
43
45
|
delta-lake = datahub.ingestion.source.delta_lake:DeltaLakeSource
|
|
@@ -46,6 +48,7 @@ dremio = datahub.ingestion.source.dremio.dremio_source:DremioSource
|
|
|
46
48
|
druid = datahub.ingestion.source.sql.druid:DruidSource
|
|
47
49
|
dynamodb = datahub.ingestion.source.dynamodb.dynamodb:DynamoDBSource
|
|
48
50
|
elasticsearch = datahub.ingestion.source.elastic_search:ElasticsearchSource
|
|
51
|
+
excel = datahub.ingestion.source.excel.source:ExcelSource
|
|
49
52
|
feast = datahub.ingestion.source.feast:FeastRepositorySource
|
|
50
53
|
file = datahub.ingestion.source.file:GenericFileSource
|
|
51
54
|
fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource
|
|
@@ -53,6 +56,7 @@ gcs = datahub.ingestion.source.gcs.gcs_source:GCSSource
|
|
|
53
56
|
glue = datahub.ingestion.source.aws.glue:GlueSource
|
|
54
57
|
grafana = datahub.ingestion.source.grafana.grafana_source:GrafanaSource
|
|
55
58
|
hana = datahub.ingestion.source.sql.hana:HanaSource
|
|
59
|
+
hex = datahub.ingestion.source.hex.hex:HexSource
|
|
56
60
|
hive = datahub.ingestion.source.sql.hive:HiveSource
|
|
57
61
|
hive-metastore = datahub.ingestion.source.sql.hive_metastore:HiveMetastoreSource
|
|
58
62
|
iceberg = datahub.ingestion.source.iceberg.iceberg:IcebergSource
|
|
@@ -90,6 +94,7 @@ sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource
|
|
|
90
94
|
salesforce = datahub.ingestion.source.salesforce:SalesforceSource
|
|
91
95
|
sigma = datahub.ingestion.source.sigma.sigma:SigmaSource
|
|
92
96
|
slack = datahub.ingestion.source.slack.slack:SlackSource
|
|
97
|
+
snaplogic = datahub.ingestion.source.snaplogic.snaplogic:SnaplogicSource
|
|
93
98
|
snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source
|
|
94
99
|
snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource
|
|
95
100
|
snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource
|
|
@@ -101,7 +106,7 @@ tableau = datahub.ingestion.source.tableau.tableau:TableauSource
|
|
|
101
106
|
teradata = datahub.ingestion.source.sql.teradata:TeradataSource
|
|
102
107
|
trino = datahub.ingestion.source.sql.trino:TrinoSource
|
|
103
108
|
unity-catalog = datahub.ingestion.source.unity.source:UnityCatalogSource
|
|
104
|
-
vertexai = datahub.ingestion.source.vertexai:VertexAISource
|
|
109
|
+
vertexai = datahub.ingestion.source.vertexai.vertexai:VertexAISource
|
|
105
110
|
vertica = datahub.ingestion.source.sql.vertica:VerticaSource
|
|
106
111
|
|
|
107
112
|
[datahub.ingestion.transformer.plugins]
|
|
@@ -126,6 +131,7 @@ pattern_cleanup_dataset_usage_user = datahub.ingestion.transformer.pattern_clean
|
|
|
126
131
|
pattern_cleanup_ownership = datahub.ingestion.transformer.pattern_cleanup_ownership:PatternCleanUpOwnership
|
|
127
132
|
replace_external_url = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlDataset
|
|
128
133
|
replace_external_url_container = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlContainer
|
|
134
|
+
set_browse_path = datahub.ingestion.transformer.set_browse_path:SetBrowsePathTransformer
|
|
129
135
|
set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer
|
|
130
136
|
simple_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:SimpleAddDatasetDataProduct
|
|
131
137
|
simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain
|
datahub/_version.py
CHANGED
|
@@ -55,9 +55,9 @@ class OperationCircuitBreaker(AbstractCircuitBreaker):
|
|
|
55
55
|
which is set as Airflow connection.
|
|
56
56
|
:param partition: The partition to check the operation.
|
|
57
57
|
:param source_type: The source type to filter on. If not set it will accept any source type.
|
|
58
|
-
See valid types here: https://
|
|
58
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
|
|
59
59
|
:param operation_type: The operation type to filter on. If not set it will accept any source type.
|
|
60
|
-
See valid types here: https://
|
|
60
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums/#operationtype
|
|
61
61
|
"""
|
|
62
62
|
|
|
63
63
|
start_time_millis: int = int(
|
|
@@ -131,7 +131,7 @@ class SerializedResourceValue(BaseModel):
|
|
|
131
131
|
elif isinstance(object, BaseModel):
|
|
132
132
|
return SerializedResourceValue(
|
|
133
133
|
content_type=models.SerializedValueContentTypeClass.JSON,
|
|
134
|
-
blob=json.dumps(object.dict()).encode("utf-8"),
|
|
134
|
+
blob=json.dumps(object.dict(), sort_keys=True).encode("utf-8"),
|
|
135
135
|
schema_type=models.SerializedValueSchemaTypeClass.JSON,
|
|
136
136
|
schema_ref=object.__class__.__name__,
|
|
137
137
|
)
|
|
@@ -71,7 +71,7 @@ class CorpGroup(BaseModel):
|
|
|
71
71
|
_rename_admins_to_owners = pydantic_renamed_field("admins", "owners")
|
|
72
72
|
|
|
73
73
|
@pydantic.validator("owners", "members", each_item=True)
|
|
74
|
-
def make_urn_if_needed(v):
|
|
74
|
+
def make_urn_if_needed(cls, v):
|
|
75
75
|
if isinstance(v, str):
|
|
76
76
|
return builder.make_user_urn(v)
|
|
77
77
|
return v
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import collections
|
|
2
|
-
from typing import Iterable, List, Optional, Tuple
|
|
2
|
+
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
|
3
3
|
|
|
4
4
|
from ruamel.yaml import YAML
|
|
5
5
|
from typing_extensions import Literal
|
|
@@ -25,6 +25,8 @@ from datahub.metadata.schema_classes import (
|
|
|
25
25
|
FreshnessContractClass,
|
|
26
26
|
SchemaContractClass,
|
|
27
27
|
StatusClass,
|
|
28
|
+
StructuredPropertiesClass,
|
|
29
|
+
StructuredPropertyValueAssignmentClass,
|
|
28
30
|
)
|
|
29
31
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
30
32
|
|
|
@@ -47,8 +49,12 @@ class DataContract(v1_ConfigModel):
|
|
|
47
49
|
entity: str = v1_Field(
|
|
48
50
|
description="The entity urn that the Data Contract is associated with"
|
|
49
51
|
)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
+
properties: Optional[Dict[str, Union[str, float, List[Union[str, float]]]]] = (
|
|
53
|
+
v1_Field(
|
|
54
|
+
default=None,
|
|
55
|
+
description="Structured properties associated with the data contract.",
|
|
56
|
+
)
|
|
57
|
+
)
|
|
52
58
|
|
|
53
59
|
schema_field: Optional[SchemaAssertion] = v1_Field(default=None, alias="schema")
|
|
54
60
|
|
|
@@ -172,6 +178,30 @@ class DataContract(v1_ConfigModel):
|
|
|
172
178
|
)
|
|
173
179
|
yield from dq_assertion_mcps
|
|
174
180
|
|
|
181
|
+
# Construct the structured properties aspect if properties are defined
|
|
182
|
+
structured_properties_aspect: Optional[StructuredPropertiesClass] = None
|
|
183
|
+
if self.properties:
|
|
184
|
+
property_assignments: List[StructuredPropertyValueAssignmentClass] = []
|
|
185
|
+
for key, value in self.properties.items():
|
|
186
|
+
# Use f-string formatting for the property URN, like in dataset.py
|
|
187
|
+
prop_urn = f"urn:li:structuredProperty:{key}"
|
|
188
|
+
# Ensure value is a list for StructuredPropertyValueAssignmentClass
|
|
189
|
+
values_list = value if isinstance(value, list) else [value]
|
|
190
|
+
property_assignments.append(
|
|
191
|
+
StructuredPropertyValueAssignmentClass(
|
|
192
|
+
propertyUrn=prop_urn,
|
|
193
|
+
values=[
|
|
194
|
+
str(v) for v in values_list
|
|
195
|
+
], # Ensure all values are strings
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
if (
|
|
199
|
+
property_assignments
|
|
200
|
+
): # Only create aspect if there are valid assignments
|
|
201
|
+
structured_properties_aspect = StructuredPropertiesClass(
|
|
202
|
+
properties=property_assignments
|
|
203
|
+
)
|
|
204
|
+
|
|
175
205
|
# Now that we've generated the assertions, we can generate
|
|
176
206
|
# the actual data contract.
|
|
177
207
|
yield from MetadataChangeProposalWrapper.construct_many(
|
|
@@ -202,6 +232,8 @@ class DataContract(v1_ConfigModel):
|
|
|
202
232
|
if True
|
|
203
233
|
else None
|
|
204
234
|
),
|
|
235
|
+
# Add structured properties aspect if defined
|
|
236
|
+
structured_properties_aspect,
|
|
205
237
|
],
|
|
206
238
|
)
|
|
207
239
|
|
|
@@ -9,6 +9,7 @@ from datahub.metadata.schema_classes import (
|
|
|
9
9
|
AuditStampClass,
|
|
10
10
|
DataFlowInfoClass,
|
|
11
11
|
DataFlowSnapshotClass,
|
|
12
|
+
DataPlatformInstanceClass,
|
|
12
13
|
GlobalTagsClass,
|
|
13
14
|
MetadataChangeEventClass,
|
|
14
15
|
OwnerClass,
|
|
@@ -29,7 +30,7 @@ class DataFlow:
|
|
|
29
30
|
"""The DataHub representation of data-flow.
|
|
30
31
|
|
|
31
32
|
Args:
|
|
32
|
-
urn (int): Unique identifier of the DataFlow in DataHub. For more detail refer https://
|
|
33
|
+
urn (int): Unique identifier of the DataFlow in DataHub. For more detail refer https://docs.datahub.com/docs/what/urn/.
|
|
33
34
|
id (str): Identifier of DataFlow in orchestrator.
|
|
34
35
|
orchestrator (str): orchestrator. for example airflow.
|
|
35
36
|
cluster (Optional[str]): [deprecated] Please use env.
|
|
@@ -39,8 +40,8 @@ class DataFlow:
|
|
|
39
40
|
url (Optional[str]): URL pointing to DataFlow.
|
|
40
41
|
tags (Set[str]): tags that need to be apply on DataFlow.
|
|
41
42
|
owners (Set[str]): owners that need to be apply on DataFlow.
|
|
42
|
-
platform_instance (Optional[str]): The instance of the platform that all assets produced by this orchestrator belong to. For more detail refer https://
|
|
43
|
-
env (Optional[str]): The environment that all assets produced by this orchestrator belong to. For more detail and possible values refer https://
|
|
43
|
+
platform_instance (Optional[str]): The instance of the platform that all assets produced by this orchestrator belong to. For more detail refer https://docs.datahub.com/docs/platform-instances/.
|
|
44
|
+
env (Optional[str]): The environment that all assets produced by this orchestrator belong to. For more detail and possible values refer https://docs.datahub.com/docs/graphql/enums/#fabrictype.
|
|
44
45
|
"""
|
|
45
46
|
|
|
46
47
|
urn: DataFlowUrn = field(init=False)
|
|
@@ -164,6 +165,20 @@ class DataFlow:
|
|
|
164
165
|
)
|
|
165
166
|
yield mcp
|
|
166
167
|
|
|
168
|
+
if self.platform_instance:
|
|
169
|
+
instance = builder.make_dataplatform_instance_urn(
|
|
170
|
+
platform=self.orchestrator,
|
|
171
|
+
instance=self.platform_instance,
|
|
172
|
+
)
|
|
173
|
+
mcp = MetadataChangeProposalWrapper(
|
|
174
|
+
entityUrn=str(self.urn),
|
|
175
|
+
aspect=DataPlatformInstanceClass(
|
|
176
|
+
platform=builder.make_data_platform_urn(self.orchestrator),
|
|
177
|
+
instance=instance,
|
|
178
|
+
),
|
|
179
|
+
)
|
|
180
|
+
yield mcp
|
|
181
|
+
|
|
167
182
|
for owner in self.generate_ownership_aspect():
|
|
168
183
|
mcp = MetadataChangeProposalWrapper(
|
|
169
184
|
entityUrn=str(self.urn),
|
|
@@ -10,6 +10,7 @@ from datahub.metadata.schema_classes import (
|
|
|
10
10
|
AzkabanJobTypeClass,
|
|
11
11
|
DataJobInfoClass,
|
|
12
12
|
DataJobInputOutputClass,
|
|
13
|
+
DataPlatformInstanceClass,
|
|
13
14
|
FineGrainedLineageClass,
|
|
14
15
|
GlobalTagsClass,
|
|
15
16
|
OwnerClass,
|
|
@@ -45,6 +46,7 @@ class DataJob:
|
|
|
45
46
|
outlets (List[str]): List of urns the DataProcessInstance produces
|
|
46
47
|
fine_grained_lineages: Column lineage for the inlets and outlets
|
|
47
48
|
upstream_urns: List[DataJobUrn] = field(default_factory=list)
|
|
49
|
+
platform_instance (Optional[str]): The instance of the platform that all assets produced by this orchestrator belong to.
|
|
48
50
|
"""
|
|
49
51
|
|
|
50
52
|
id: str
|
|
@@ -61,6 +63,7 @@ class DataJob:
|
|
|
61
63
|
outlets: List[DatasetUrn] = field(default_factory=list)
|
|
62
64
|
fine_grained_lineages: List[FineGrainedLineageClass] = field(default_factory=list)
|
|
63
65
|
upstream_urns: List[DataJobUrn] = field(default_factory=list)
|
|
66
|
+
platform_instance: Optional[str] = None
|
|
64
67
|
|
|
65
68
|
def __post_init__(self):
|
|
66
69
|
job_flow_urn = DataFlowUrn.create_from_ids(
|
|
@@ -105,7 +108,9 @@ class DataJob:
|
|
|
105
108
|
return [tags]
|
|
106
109
|
|
|
107
110
|
def generate_mcp(
|
|
108
|
-
self,
|
|
111
|
+
self,
|
|
112
|
+
generate_lineage: bool = True,
|
|
113
|
+
materialize_iolets: bool = True,
|
|
109
114
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
110
115
|
env: Optional[str] = None
|
|
111
116
|
if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
|
|
@@ -127,6 +132,20 @@ class DataJob:
|
|
|
127
132
|
)
|
|
128
133
|
yield mcp
|
|
129
134
|
|
|
135
|
+
if self.platform_instance:
|
|
136
|
+
instance = builder.make_dataplatform_instance_urn(
|
|
137
|
+
platform=self.flow_urn.orchestrator,
|
|
138
|
+
instance=self.platform_instance,
|
|
139
|
+
)
|
|
140
|
+
mcp = MetadataChangeProposalWrapper(
|
|
141
|
+
entityUrn=str(self.urn),
|
|
142
|
+
aspect=DataPlatformInstanceClass(
|
|
143
|
+
platform=builder.make_data_platform_urn(self.flow_urn.orchestrator),
|
|
144
|
+
instance=instance,
|
|
145
|
+
),
|
|
146
|
+
)
|
|
147
|
+
yield mcp
|
|
148
|
+
|
|
130
149
|
mcp = MetadataChangeProposalWrapper(
|
|
131
150
|
entityUrn=str(self.urn),
|
|
132
151
|
aspect=StatusClass(
|
|
@@ -135,9 +154,10 @@ class DataJob:
|
|
|
135
154
|
)
|
|
136
155
|
yield mcp
|
|
137
156
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
157
|
+
if generate_lineage:
|
|
158
|
+
yield from self.generate_data_input_output_mcp(
|
|
159
|
+
materialize_iolets=materialize_iolets
|
|
160
|
+
)
|
|
141
161
|
|
|
142
162
|
for owner in self.generate_ownership_aspect():
|
|
143
163
|
mcp = MetadataChangeProposalWrapper(
|
|
@@ -159,6 +159,7 @@ class DataProcessInstance:
|
|
|
159
159
|
env=self.template_urn.get_env(),
|
|
160
160
|
orchestrator=self.template_urn.get_orchestrator_name(),
|
|
161
161
|
id=self.template_urn.get_flow_id(),
|
|
162
|
+
platform_instance=self.data_platform_instance,
|
|
162
163
|
)
|
|
163
164
|
for mcp in template_object.generate_mcp():
|
|
164
165
|
self._emit_mcp(mcp, emitter, callback)
|
|
@@ -168,6 +169,7 @@ class DataProcessInstance:
|
|
|
168
169
|
id=self.template_urn.get_job_id(),
|
|
169
170
|
upstream_urns=input_datajob_urns,
|
|
170
171
|
flow_urn=self.template_urn.get_data_flow_urn(),
|
|
172
|
+
platform_instance=self.data_platform_instance,
|
|
171
173
|
)
|
|
172
174
|
for mcp in template_object.generate_mcp():
|
|
173
175
|
self._emit_mcp(mcp, emitter, callback)
|
|
@@ -382,6 +384,7 @@ class DataProcessInstance:
|
|
|
382
384
|
cluster=datajob.flow_urn.cluster,
|
|
383
385
|
template_urn=datajob.urn,
|
|
384
386
|
id=id,
|
|
387
|
+
data_platform_instance=datajob.platform_instance,
|
|
385
388
|
)
|
|
386
389
|
dpi._template_object = datajob
|
|
387
390
|
|
|
@@ -438,6 +441,7 @@ class DataProcessInstance:
|
|
|
438
441
|
orchestrator=dataflow.orchestrator,
|
|
439
442
|
cluster=cast(str, dataflow.env),
|
|
440
443
|
template_urn=dataflow.urn,
|
|
444
|
+
data_platform_instance=dataflow.platform_instance,
|
|
441
445
|
)
|
|
442
446
|
dpi._template_object = dataflow
|
|
443
447
|
return dpi
|
|
@@ -6,9 +6,10 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
|
|
|
6
6
|
|
|
7
7
|
import pydantic
|
|
8
8
|
from ruamel.yaml import YAML
|
|
9
|
+
from typing_extensions import assert_never
|
|
9
10
|
|
|
10
11
|
import datahub.emitter.mce_builder as builder
|
|
11
|
-
from datahub.configuration.common import ConfigModel
|
|
12
|
+
from datahub.configuration.common import ConfigModel, LaxStr
|
|
12
13
|
from datahub.emitter.generic_emitter import Emitter
|
|
13
14
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
14
15
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
@@ -110,8 +111,9 @@ class DataProduct(ConfigModel):
|
|
|
110
111
|
description: Optional[str] = None
|
|
111
112
|
tags: Optional[List[str]] = None
|
|
112
113
|
terms: Optional[List[str]] = None
|
|
113
|
-
properties: Optional[Dict[str,
|
|
114
|
+
properties: Optional[Dict[str, LaxStr]] = None
|
|
114
115
|
external_url: Optional[str] = None
|
|
116
|
+
output_ports: Optional[List[str]] = None
|
|
115
117
|
_original_yaml_dict: Optional[dict] = None
|
|
116
118
|
|
|
117
119
|
@pydantic.validator("assets", each_item=True)
|
|
@@ -123,6 +125,22 @@ class DataProduct(ConfigModel):
|
|
|
123
125
|
|
|
124
126
|
return v
|
|
125
127
|
|
|
128
|
+
@pydantic.validator("output_ports", each_item=True)
|
|
129
|
+
def output_ports_must_be_urns(cls, v: str) -> str:
|
|
130
|
+
try:
|
|
131
|
+
Urn.create_from_string(v)
|
|
132
|
+
except Exception as e:
|
|
133
|
+
raise ValueError(f"Output port {v} is not an urn: {e}") from e
|
|
134
|
+
|
|
135
|
+
return v
|
|
136
|
+
|
|
137
|
+
@pydantic.validator("output_ports", each_item=True)
|
|
138
|
+
def output_ports_must_be_from_asset_list(cls, v: str, values: dict) -> str:
|
|
139
|
+
assets = values.get("assets", [])
|
|
140
|
+
if v not in assets:
|
|
141
|
+
raise ValueError(f"Output port {v} is not in asset list")
|
|
142
|
+
return v
|
|
143
|
+
|
|
126
144
|
@property
|
|
127
145
|
def urn(self) -> str:
|
|
128
146
|
if self.id.startswith("urn:li:dataProduct:"):
|
|
@@ -180,6 +198,7 @@ class DataProduct(ConfigModel):
|
|
|
180
198
|
DataProductAssociationClass(
|
|
181
199
|
destinationUrn=asset,
|
|
182
200
|
created=self._mint_auditstamp("yaml"),
|
|
201
|
+
outputPort=asset in (self.output_ports or []),
|
|
183
202
|
)
|
|
184
203
|
for asset in self.assets
|
|
185
204
|
]
|
|
@@ -203,6 +222,7 @@ class DataProduct(ConfigModel):
|
|
|
203
222
|
DataProductAssociationClass(
|
|
204
223
|
destinationUrn=asset,
|
|
205
224
|
created=self._mint_auditstamp("yaml"),
|
|
225
|
+
outputPort=asset in (self.output_ports or []),
|
|
206
226
|
)
|
|
207
227
|
for asset in self.assets or []
|
|
208
228
|
],
|
|
@@ -368,6 +388,13 @@ class DataProduct(ConfigModel):
|
|
|
368
388
|
external_url=(
|
|
369
389
|
data_product_properties.externalUrl if data_product_properties else None
|
|
370
390
|
),
|
|
391
|
+
output_ports=[
|
|
392
|
+
e.destinationUrn
|
|
393
|
+
for e in (data_product_properties.assets or [])
|
|
394
|
+
if e.outputPort
|
|
395
|
+
]
|
|
396
|
+
if data_product_properties
|
|
397
|
+
else None,
|
|
371
398
|
)
|
|
372
399
|
|
|
373
400
|
def _patch_ownership(
|
|
@@ -414,7 +441,9 @@ class DataProduct(ConfigModel):
|
|
|
414
441
|
"type": new_owner_type_map[owner_urn],
|
|
415
442
|
}
|
|
416
443
|
else:
|
|
417
|
-
patches_drop[i] = o
|
|
444
|
+
patches_drop[i] = o.model_dump()
|
|
445
|
+
else:
|
|
446
|
+
assert_never(o)
|
|
418
447
|
|
|
419
448
|
# Figure out what if any are new owners to add
|
|
420
449
|
new_owners_to_add = {o for o in new_owner_type_map} - set(owners_matched)
|