acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/sdk/main_client.py
CHANGED
|
@@ -1,16 +1,30 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional, overload
|
|
3
|
+
from typing import TYPE_CHECKING, Optional, overload
|
|
4
4
|
|
|
5
5
|
from datahub.errors import SdkUsageError
|
|
6
6
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
7
|
-
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
7
|
+
from datahub.ingestion.graph.config import ClientMode, DatahubClientConfig
|
|
8
8
|
from datahub.sdk.entity_client import EntityClient
|
|
9
|
-
from datahub.sdk.
|
|
9
|
+
from datahub.sdk.lineage_client import LineageClient
|
|
10
10
|
from datahub.sdk.search_client import SearchClient
|
|
11
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from datahub.sdk.resolver_client import ResolverClient
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
class DataHubClient:
|
|
17
|
+
"""Main client for interacting with DataHub.
|
|
18
|
+
|
|
19
|
+
This class provides the primary interface for interacting with DataHub,
|
|
20
|
+
including entity management, search, and resolution capabilities.
|
|
21
|
+
|
|
22
|
+
The client can be initialized in three ways:
|
|
23
|
+
1. With a server URL and optional token
|
|
24
|
+
2. With a DatahubClientConfig object
|
|
25
|
+
3. With an existing (legacy) :py:class:`DataHubGraph` instance
|
|
26
|
+
"""
|
|
27
|
+
|
|
14
28
|
@overload
|
|
15
29
|
def __init__(self, *, server: str, token: Optional[str] = None): ...
|
|
16
30
|
@overload
|
|
@@ -25,6 +39,17 @@ class DataHubClient:
|
|
|
25
39
|
graph: Optional[DataHubGraph] = None,
|
|
26
40
|
config: Optional[DatahubClientConfig] = None,
|
|
27
41
|
):
|
|
42
|
+
"""Initialize a new DataHubClient instance.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
server: The URL of the DataHub server (e.g. "http://localhost:8080").
|
|
46
|
+
token: Optional authentication token.
|
|
47
|
+
graph: An existing DataHubGraph instance to use.
|
|
48
|
+
config: A DatahubClientConfig object with connection details.
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
SdkUsageError: If invalid combinations of arguments are provided.
|
|
52
|
+
"""
|
|
28
53
|
if server is not None:
|
|
29
54
|
if config is not None:
|
|
30
55
|
raise SdkUsageError("Cannot specify both server and config")
|
|
@@ -40,10 +65,16 @@ class DataHubClient:
|
|
|
40
65
|
|
|
41
66
|
self._graph = graph
|
|
42
67
|
|
|
43
|
-
|
|
68
|
+
def test_connection(self) -> None:
|
|
69
|
+
self._graph.test_connection()
|
|
44
70
|
|
|
45
71
|
@classmethod
|
|
46
|
-
def from_env(
|
|
72
|
+
def from_env(
|
|
73
|
+
cls,
|
|
74
|
+
*,
|
|
75
|
+
client_mode: ClientMode = ClientMode.SDK,
|
|
76
|
+
datahub_component: Optional[str] = None,
|
|
77
|
+
) -> "DataHubClient":
|
|
47
78
|
"""Initialize a DataHubClient from the environment variables or ~/.datahubenv file.
|
|
48
79
|
|
|
49
80
|
This will first check DATAHUB_GMS_URL and DATAHUB_GMS_TOKEN. If not present,
|
|
@@ -53,6 +84,10 @@ class DataHubClient:
|
|
|
53
84
|
If you're looking to specify the server/token in code, use the
|
|
54
85
|
DataHubClient(server=..., token=...) constructor instead.
|
|
55
86
|
|
|
87
|
+
Args:
|
|
88
|
+
client_mode: [internal] The client mode to use. Defaults to "SDK".
|
|
89
|
+
datahub_component: [internal] The DataHub component name to include in the user agent.
|
|
90
|
+
|
|
56
91
|
Returns:
|
|
57
92
|
A DataHubClient instance.
|
|
58
93
|
"""
|
|
@@ -60,7 +95,10 @@ class DataHubClient:
|
|
|
60
95
|
# Inspired by the DockerClient.from_env() method.
|
|
61
96
|
# TODO: This one also reads from ~/.datahubenv, so the "from_env" name might be a bit confusing.
|
|
62
97
|
# That file is part of the "environment", but is not a traditional "env variable".
|
|
63
|
-
graph = get_default_graph(
|
|
98
|
+
graph = get_default_graph(
|
|
99
|
+
client_mode=client_mode,
|
|
100
|
+
datahub_component=datahub_component,
|
|
101
|
+
)
|
|
64
102
|
|
|
65
103
|
return cls(graph=graph)
|
|
66
104
|
|
|
@@ -69,11 +107,48 @@ class DataHubClient:
|
|
|
69
107
|
return EntityClient(self)
|
|
70
108
|
|
|
71
109
|
@property
|
|
72
|
-
def resolve(self) -> ResolverClient:
|
|
110
|
+
def resolve(self) -> "ResolverClient":
|
|
111
|
+
try:
|
|
112
|
+
from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
|
|
113
|
+
ResolverClient,
|
|
114
|
+
)
|
|
115
|
+
except ImportError:
|
|
116
|
+
# If the client is not installed, use the one from the SDK.
|
|
117
|
+
from datahub.sdk.resolver_client import ( # type: ignore[assignment]
|
|
118
|
+
ResolverClient,
|
|
119
|
+
)
|
|
73
120
|
return ResolverClient(self)
|
|
74
121
|
|
|
75
122
|
@property
|
|
76
123
|
def search(self) -> SearchClient:
|
|
77
124
|
return SearchClient(self)
|
|
78
125
|
|
|
79
|
-
|
|
126
|
+
@property
|
|
127
|
+
def lineage(self) -> LineageClient:
|
|
128
|
+
return LineageClient(self)
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def assertions(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
|
|
132
|
+
try:
|
|
133
|
+
from acryl_datahub_cloud.sdk import AssertionsClient
|
|
134
|
+
except ImportError as e:
|
|
135
|
+
if "acryl_datahub_cloud" in str(e):
|
|
136
|
+
raise SdkUsageError(
|
|
137
|
+
"AssertionsClient is not installed, please install it with `pip install acryl-datahub-cloud`"
|
|
138
|
+
) from e
|
|
139
|
+
else:
|
|
140
|
+
raise e
|
|
141
|
+
return AssertionsClient(self)
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def subscriptions(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
|
|
145
|
+
try:
|
|
146
|
+
from acryl_datahub_cloud.sdk import SubscriptionClient
|
|
147
|
+
except ImportError as e:
|
|
148
|
+
if "acryl_datahub_cloud" in str(e):
|
|
149
|
+
raise SdkUsageError(
|
|
150
|
+
"SubscriptionClient is not installed, please install it with `pip install acryl-datahub-cloud`"
|
|
151
|
+
) from e
|
|
152
|
+
else:
|
|
153
|
+
raise e
|
|
154
|
+
return SubscriptionClient(self)
|
datahub/sdk/mlmodel.py
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Dict, List, Optional, Sequence, Type, Union
|
|
5
|
+
|
|
6
|
+
from typing_extensions import Self
|
|
7
|
+
|
|
8
|
+
from datahub.emitter.mce_builder import DEFAULT_ENV
|
|
9
|
+
from datahub.metadata.schema_classes import (
|
|
10
|
+
AspectBag,
|
|
11
|
+
MLHyperParamClass,
|
|
12
|
+
MLMetricClass,
|
|
13
|
+
MLModelPropertiesClass,
|
|
14
|
+
)
|
|
15
|
+
from datahub.metadata.urns import (
|
|
16
|
+
DataProcessInstanceUrn,
|
|
17
|
+
MlModelGroupUrn,
|
|
18
|
+
MlModelUrn,
|
|
19
|
+
Urn,
|
|
20
|
+
)
|
|
21
|
+
from datahub.sdk._shared import (
|
|
22
|
+
DomainInputType,
|
|
23
|
+
HasDomain,
|
|
24
|
+
HasInstitutionalMemory,
|
|
25
|
+
HasOwnership,
|
|
26
|
+
HasPlatformInstance,
|
|
27
|
+
HasStructuredProperties,
|
|
28
|
+
HasTags,
|
|
29
|
+
HasTerms,
|
|
30
|
+
HasVersion,
|
|
31
|
+
HyperParamsInputType,
|
|
32
|
+
LinksInputType,
|
|
33
|
+
MLTrainingJobInputType,
|
|
34
|
+
OwnersInputType,
|
|
35
|
+
StructuredPropertyInputType,
|
|
36
|
+
TagsInputType,
|
|
37
|
+
TermsInputType,
|
|
38
|
+
TrainingMetricsInputType,
|
|
39
|
+
convert_hyper_params,
|
|
40
|
+
convert_training_metrics,
|
|
41
|
+
make_time_stamp,
|
|
42
|
+
parse_time_stamp,
|
|
43
|
+
)
|
|
44
|
+
from datahub.sdk.entity import Entity, ExtraAspectsType
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class MLModel(
|
|
48
|
+
HasPlatformInstance,
|
|
49
|
+
HasOwnership,
|
|
50
|
+
HasInstitutionalMemory,
|
|
51
|
+
HasTags,
|
|
52
|
+
HasTerms,
|
|
53
|
+
HasDomain,
|
|
54
|
+
HasVersion,
|
|
55
|
+
HasStructuredProperties,
|
|
56
|
+
Entity,
|
|
57
|
+
):
|
|
58
|
+
__slots__ = ()
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def get_urn_type(cls) -> Type[MlModelUrn]:
|
|
62
|
+
return MlModelUrn
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
id: str,
|
|
67
|
+
platform: str,
|
|
68
|
+
version: Optional[str] = None,
|
|
69
|
+
aliases: Optional[List[str]] = None,
|
|
70
|
+
platform_instance: Optional[str] = None,
|
|
71
|
+
env: str = DEFAULT_ENV,
|
|
72
|
+
name: Optional[str] = None,
|
|
73
|
+
description: Optional[str] = None,
|
|
74
|
+
training_metrics: Optional[TrainingMetricsInputType] = None,
|
|
75
|
+
hyper_params: Optional[HyperParamsInputType] = None,
|
|
76
|
+
external_url: Optional[str] = None,
|
|
77
|
+
custom_properties: Optional[Dict[str, str]] = None,
|
|
78
|
+
created: Optional[datetime] = None,
|
|
79
|
+
last_modified: Optional[datetime] = None,
|
|
80
|
+
owners: Optional[OwnersInputType] = None,
|
|
81
|
+
links: Optional[LinksInputType] = None,
|
|
82
|
+
tags: Optional[TagsInputType] = None,
|
|
83
|
+
terms: Optional[TermsInputType] = None,
|
|
84
|
+
domain: Optional[DomainInputType] = None,
|
|
85
|
+
model_group: Optional[Union[str, MlModelGroupUrn]] = None,
|
|
86
|
+
training_jobs: Optional[MLTrainingJobInputType] = None,
|
|
87
|
+
downstream_jobs: Optional[MLTrainingJobInputType] = None,
|
|
88
|
+
structured_properties: Optional[StructuredPropertyInputType] = None,
|
|
89
|
+
extra_aspects: ExtraAspectsType = None,
|
|
90
|
+
):
|
|
91
|
+
urn = MlModelUrn(platform=platform, name=id, env=env)
|
|
92
|
+
super().__init__(urn)
|
|
93
|
+
self._set_extra_aspects(extra_aspects)
|
|
94
|
+
self._set_platform_instance(urn.platform, platform_instance)
|
|
95
|
+
self._ensure_model_props()
|
|
96
|
+
|
|
97
|
+
# Initialize properties in logical groups
|
|
98
|
+
self._init_basic_properties(
|
|
99
|
+
version=version,
|
|
100
|
+
name=name,
|
|
101
|
+
aliases=aliases,
|
|
102
|
+
description=description,
|
|
103
|
+
external_url=external_url,
|
|
104
|
+
custom_properties=custom_properties,
|
|
105
|
+
created=created,
|
|
106
|
+
last_modified=last_modified,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
self._init_ml_specific_properties(
|
|
110
|
+
training_metrics=training_metrics,
|
|
111
|
+
hyper_params=hyper_params,
|
|
112
|
+
model_group=model_group,
|
|
113
|
+
training_jobs=training_jobs,
|
|
114
|
+
downstream_jobs=downstream_jobs,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
self._init_metadata_properties(
|
|
118
|
+
owners=owners,
|
|
119
|
+
links=links,
|
|
120
|
+
tags=tags,
|
|
121
|
+
terms=terms,
|
|
122
|
+
domain=domain,
|
|
123
|
+
structured_properties=structured_properties,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
@classmethod
|
|
127
|
+
def _new_from_graph(cls, urn: Urn, current_aspects: AspectBag) -> Self:
|
|
128
|
+
assert isinstance(urn, MlModelUrn)
|
|
129
|
+
entity = cls(
|
|
130
|
+
id=urn.name,
|
|
131
|
+
platform=urn.platform,
|
|
132
|
+
env=urn.env,
|
|
133
|
+
)
|
|
134
|
+
return entity._init_from_graph(current_aspects)
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def urn(self) -> MlModelUrn:
|
|
138
|
+
return self._urn # type: ignore
|
|
139
|
+
|
|
140
|
+
def _ensure_model_props(
|
|
141
|
+
self,
|
|
142
|
+
) -> MLModelPropertiesClass:
|
|
143
|
+
return self._setdefault_aspect(MLModelPropertiesClass())
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def name(self) -> Optional[str]:
|
|
147
|
+
return self._ensure_model_props().name
|
|
148
|
+
|
|
149
|
+
def set_name(self, name: str) -> None:
|
|
150
|
+
self._ensure_model_props().name = name
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def description(self) -> Optional[str]:
|
|
154
|
+
return self._ensure_model_props().description
|
|
155
|
+
|
|
156
|
+
def set_description(self, description: str) -> None:
|
|
157
|
+
self._ensure_model_props().description = description
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def external_url(self) -> Optional[str]:
|
|
161
|
+
return self._ensure_model_props().externalUrl
|
|
162
|
+
|
|
163
|
+
def set_external_url(self, external_url: str) -> None:
|
|
164
|
+
self._ensure_model_props().externalUrl = external_url
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def custom_properties(self) -> Optional[Dict[str, str]]:
|
|
168
|
+
return self._ensure_model_props().customProperties
|
|
169
|
+
|
|
170
|
+
def set_custom_properties(self, custom_properties: Dict[str, str]) -> None:
|
|
171
|
+
self._ensure_model_props().customProperties = custom_properties
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def created(self) -> Optional[datetime]:
|
|
175
|
+
return parse_time_stamp(self._ensure_model_props().created)
|
|
176
|
+
|
|
177
|
+
def set_created(self, created: datetime) -> None:
|
|
178
|
+
self._ensure_model_props().created = make_time_stamp(created)
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def last_modified(self) -> Optional[datetime]:
|
|
182
|
+
return parse_time_stamp(self._ensure_model_props().lastModified)
|
|
183
|
+
|
|
184
|
+
def set_last_modified(self, last_modified: datetime) -> None:
|
|
185
|
+
self._ensure_model_props().lastModified = make_time_stamp(last_modified)
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def training_metrics(self) -> Optional[List[MLMetricClass]]:
|
|
189
|
+
return self._ensure_model_props().trainingMetrics
|
|
190
|
+
|
|
191
|
+
def set_training_metrics(self, metrics: TrainingMetricsInputType) -> None:
|
|
192
|
+
self._ensure_model_props().trainingMetrics = convert_training_metrics(metrics)
|
|
193
|
+
|
|
194
|
+
def add_training_metrics(self, metrics: TrainingMetricsInputType) -> None:
|
|
195
|
+
props = self._ensure_model_props()
|
|
196
|
+
if props.trainingMetrics is None:
|
|
197
|
+
props.trainingMetrics = []
|
|
198
|
+
if isinstance(metrics, list):
|
|
199
|
+
props.trainingMetrics.extend(
|
|
200
|
+
[
|
|
201
|
+
MLMetricClass(name=metric.name, value=metric.value)
|
|
202
|
+
for metric in metrics
|
|
203
|
+
]
|
|
204
|
+
)
|
|
205
|
+
else:
|
|
206
|
+
# For dictionary case, use the key as name and value as value
|
|
207
|
+
for name, value in metrics.items():
|
|
208
|
+
props.trainingMetrics.append(MLMetricClass(name=name, value=value))
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def hyper_params(self) -> Optional[List[MLHyperParamClass]]:
|
|
212
|
+
return self._ensure_model_props().hyperParams
|
|
213
|
+
|
|
214
|
+
def set_hyper_params(self, params: HyperParamsInputType) -> None:
|
|
215
|
+
self._ensure_model_props().hyperParams = convert_hyper_params(params)
|
|
216
|
+
|
|
217
|
+
def add_hyper_params(self, params: HyperParamsInputType) -> None:
|
|
218
|
+
props = self._ensure_model_props()
|
|
219
|
+
if props.hyperParams is None:
|
|
220
|
+
props.hyperParams = []
|
|
221
|
+
if isinstance(params, list):
|
|
222
|
+
props.hyperParams.extend(
|
|
223
|
+
[
|
|
224
|
+
MLHyperParamClass(name=param.name, value=param.value)
|
|
225
|
+
for param in params
|
|
226
|
+
]
|
|
227
|
+
)
|
|
228
|
+
else:
|
|
229
|
+
# For dictionary case, iterate through key-value pairs
|
|
230
|
+
for name, value in params.items():
|
|
231
|
+
props.hyperParams.append(MLHyperParamClass(name=name, value=value))
|
|
232
|
+
|
|
233
|
+
@property
|
|
234
|
+
def model_group(self) -> Optional[str]:
|
|
235
|
+
props = self._ensure_model_props()
|
|
236
|
+
groups = props.groups
|
|
237
|
+
if groups is None or len(groups) == 0:
|
|
238
|
+
return None
|
|
239
|
+
return groups[0]
|
|
240
|
+
|
|
241
|
+
def set_model_group(self, group: Union[str, MlModelGroupUrn]) -> None:
|
|
242
|
+
self._ensure_model_props().groups = [str(group)]
|
|
243
|
+
|
|
244
|
+
@property
|
|
245
|
+
def training_jobs(self) -> Optional[List[str]]:
|
|
246
|
+
return self._ensure_model_props().trainingJobs
|
|
247
|
+
|
|
248
|
+
def set_training_jobs(self, training_jobs: MLTrainingJobInputType) -> None:
|
|
249
|
+
self._ensure_model_props().trainingJobs = [str(job) for job in training_jobs]
|
|
250
|
+
|
|
251
|
+
def add_training_job(
|
|
252
|
+
self, training_job: Union[str, DataProcessInstanceUrn]
|
|
253
|
+
) -> None:
|
|
254
|
+
props = self._ensure_model_props()
|
|
255
|
+
if props.trainingJobs is None:
|
|
256
|
+
props.trainingJobs = []
|
|
257
|
+
props.trainingJobs.append(str(training_job))
|
|
258
|
+
|
|
259
|
+
def remove_training_job(
|
|
260
|
+
self, training_job: Union[str, DataProcessInstanceUrn]
|
|
261
|
+
) -> None:
|
|
262
|
+
props = self._ensure_model_props()
|
|
263
|
+
if props.trainingJobs is not None:
|
|
264
|
+
job_str = str(training_job)
|
|
265
|
+
props.trainingJobs = [job for job in props.trainingJobs if job != job_str]
|
|
266
|
+
|
|
267
|
+
@property
|
|
268
|
+
def downstream_jobs(self) -> Optional[List[str]]:
|
|
269
|
+
return self._ensure_model_props().downstreamJobs
|
|
270
|
+
|
|
271
|
+
def set_downstream_jobs(
|
|
272
|
+
self, downstream_jobs: Sequence[Union[str, DataProcessInstanceUrn]]
|
|
273
|
+
) -> None:
|
|
274
|
+
self._ensure_model_props().downstreamJobs = [
|
|
275
|
+
str(job) for job in downstream_jobs
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
def add_downstream_job(
|
|
279
|
+
self, downstream_job: Union[str, DataProcessInstanceUrn]
|
|
280
|
+
) -> None:
|
|
281
|
+
props = self._ensure_model_props()
|
|
282
|
+
if props.downstreamJobs is None:
|
|
283
|
+
props.downstreamJobs = []
|
|
284
|
+
props.downstreamJobs.append(str(downstream_job))
|
|
285
|
+
|
|
286
|
+
def remove_downstream_job(
|
|
287
|
+
self, downstream_job: Union[str, DataProcessInstanceUrn]
|
|
288
|
+
) -> None:
|
|
289
|
+
props = self._ensure_model_props()
|
|
290
|
+
if props.downstreamJobs is not None:
|
|
291
|
+
job_str = str(downstream_job)
|
|
292
|
+
props.downstreamJobs = [
|
|
293
|
+
job for job in props.downstreamJobs if job != job_str
|
|
294
|
+
]
|
|
295
|
+
|
|
296
|
+
@property
|
|
297
|
+
def deployments(self) -> Optional[List[str]]:
|
|
298
|
+
return self._ensure_model_props().deployments
|
|
299
|
+
|
|
300
|
+
def set_deployments(self, deployments: Sequence[str]) -> None:
|
|
301
|
+
self._ensure_model_props().deployments = list(deployments)
|
|
302
|
+
|
|
303
|
+
def add_deployment(self, deployment: str) -> None:
|
|
304
|
+
props = self._ensure_model_props()
|
|
305
|
+
if props.deployments is None:
|
|
306
|
+
props.deployments = []
|
|
307
|
+
if deployment not in props.deployments:
|
|
308
|
+
props.deployments.append(deployment)
|
|
309
|
+
|
|
310
|
+
def remove_deployment(self, deployment: str) -> None:
|
|
311
|
+
props = self._ensure_model_props()
|
|
312
|
+
if props.deployments is not None:
|
|
313
|
+
props.deployments = [d for d in props.deployments if d != deployment]
|
|
314
|
+
|
|
315
|
+
def _init_basic_properties(
|
|
316
|
+
self,
|
|
317
|
+
version: Optional[str] = None,
|
|
318
|
+
name: Optional[str] = None,
|
|
319
|
+
aliases: Optional[List[str]] = None,
|
|
320
|
+
description: Optional[str] = None,
|
|
321
|
+
external_url: Optional[str] = None,
|
|
322
|
+
custom_properties: Optional[Dict[str, str]] = None,
|
|
323
|
+
created: Optional[datetime] = None,
|
|
324
|
+
last_modified: Optional[datetime] = None,
|
|
325
|
+
) -> None:
|
|
326
|
+
if version is not None:
|
|
327
|
+
self.set_version(version)
|
|
328
|
+
if name is not None:
|
|
329
|
+
self.set_name(name)
|
|
330
|
+
if aliases is not None:
|
|
331
|
+
self.set_version_aliases(aliases)
|
|
332
|
+
if description is not None:
|
|
333
|
+
self.set_description(description)
|
|
334
|
+
if external_url is not None:
|
|
335
|
+
self.set_external_url(external_url)
|
|
336
|
+
if custom_properties is not None:
|
|
337
|
+
self.set_custom_properties(custom_properties)
|
|
338
|
+
if created is not None:
|
|
339
|
+
self.set_created(created)
|
|
340
|
+
if last_modified is not None:
|
|
341
|
+
self.set_last_modified(last_modified)
|
|
342
|
+
|
|
343
|
+
def _init_ml_specific_properties(
|
|
344
|
+
self,
|
|
345
|
+
training_metrics: Optional[TrainingMetricsInputType] = None,
|
|
346
|
+
hyper_params: Optional[HyperParamsInputType] = None,
|
|
347
|
+
model_group: Optional[Union[str, MlModelGroupUrn]] = None,
|
|
348
|
+
training_jobs: Optional[MLTrainingJobInputType] = None,
|
|
349
|
+
downstream_jobs: Optional[MLTrainingJobInputType] = None,
|
|
350
|
+
) -> None:
|
|
351
|
+
if training_metrics is not None:
|
|
352
|
+
self.set_training_metrics(training_metrics)
|
|
353
|
+
if hyper_params is not None:
|
|
354
|
+
self.set_hyper_params(hyper_params)
|
|
355
|
+
if model_group is not None:
|
|
356
|
+
self.set_model_group(model_group)
|
|
357
|
+
if training_jobs is not None:
|
|
358
|
+
self.set_training_jobs(training_jobs)
|
|
359
|
+
if downstream_jobs is not None:
|
|
360
|
+
self.set_downstream_jobs(downstream_jobs)
|
|
361
|
+
|
|
362
|
+
def _init_metadata_properties(
|
|
363
|
+
self,
|
|
364
|
+
owners: Optional[OwnersInputType] = None,
|
|
365
|
+
links: Optional[LinksInputType] = None,
|
|
366
|
+
tags: Optional[TagsInputType] = None,
|
|
367
|
+
terms: Optional[TermsInputType] = None,
|
|
368
|
+
domain: Optional[DomainInputType] = None,
|
|
369
|
+
structured_properties: Optional[StructuredPropertyInputType] = None,
|
|
370
|
+
) -> None:
|
|
371
|
+
if owners is not None:
|
|
372
|
+
self.set_owners(owners)
|
|
373
|
+
if links is not None:
|
|
374
|
+
self.set_links(links)
|
|
375
|
+
if tags is not None:
|
|
376
|
+
self.set_tags(tags)
|
|
377
|
+
if terms is not None:
|
|
378
|
+
self.set_terms(terms)
|
|
379
|
+
if domain is not None:
|
|
380
|
+
self.set_domain(domain)
|
|
381
|
+
if structured_properties is not None:
|
|
382
|
+
for key, value in structured_properties.items():
|
|
383
|
+
self.set_structured_property(property_urn=key, values=value)
|