acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -12,7 +12,7 @@ from pyiceberg.exceptions import (
|
|
|
12
12
|
NoSuchNamespaceError,
|
|
13
13
|
NoSuchPropertyException,
|
|
14
14
|
NoSuchTableError,
|
|
15
|
-
|
|
15
|
+
RESTError,
|
|
16
16
|
)
|
|
17
17
|
from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
|
|
18
18
|
from pyiceberg.table import Table
|
|
@@ -118,7 +118,7 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
|
|
|
118
118
|
|
|
119
119
|
|
|
120
120
|
@platform_name("Iceberg")
|
|
121
|
-
@support_status(SupportStatus.
|
|
121
|
+
@support_status(SupportStatus.INCUBATING)
|
|
122
122
|
@config_class(IcebergSourceConfig)
|
|
123
123
|
@capability(
|
|
124
124
|
SourceCapability.PLATFORM_INSTANCE,
|
|
@@ -134,7 +134,9 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
|
|
|
134
134
|
SourceCapability.OWNERSHIP,
|
|
135
135
|
"Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`",
|
|
136
136
|
)
|
|
137
|
-
@capability(
|
|
137
|
+
@capability(
|
|
138
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
139
|
+
)
|
|
138
140
|
class IcebergSource(StatefulIngestionSourceBase):
|
|
139
141
|
"""
|
|
140
142
|
## Integration Details
|
|
@@ -152,6 +154,10 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
152
154
|
self.report: IcebergSourceReport = IcebergSourceReport()
|
|
153
155
|
self.config: IcebergSourceConfig = config
|
|
154
156
|
self.ctx: PipelineContext = ctx
|
|
157
|
+
self.stamping_processor = AutoSystemMetadata(
|
|
158
|
+
self.ctx
|
|
159
|
+
) # single instance used only when processing namespaces
|
|
160
|
+
self.namespaces: List[Tuple[Identifier, str]] = []
|
|
155
161
|
|
|
156
162
|
@classmethod
|
|
157
163
|
def create(cls, config_dict: Dict, ctx: PipelineContext) -> "IcebergSource":
|
|
@@ -194,9 +200,9 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
194
200
|
auto_lowercase_dataset_urns,
|
|
195
201
|
auto_materialize_referenced_tags_terms,
|
|
196
202
|
partial(
|
|
197
|
-
auto_fix_duplicate_schema_field_paths, platform=self.
|
|
203
|
+
auto_fix_duplicate_schema_field_paths, platform=self.infer_platform()
|
|
198
204
|
),
|
|
199
|
-
partial(auto_fix_empty_field_paths, platform=self.
|
|
205
|
+
partial(auto_fix_empty_field_paths, platform=self.infer_platform()),
|
|
200
206
|
partial(auto_workunit_reporter, self.get_report()),
|
|
201
207
|
auto_patch_last_modified,
|
|
202
208
|
EnsureAspectSizeProcessor(self.get_report()).ensure_aspect_size,
|
|
@@ -244,6 +250,13 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
244
250
|
context=str(namespace),
|
|
245
251
|
exc=e,
|
|
246
252
|
)
|
|
253
|
+
except RESTError as e:
|
|
254
|
+
self.report.warning(
|
|
255
|
+
title="Iceberg REST Server Error",
|
|
256
|
+
message="Iceberg REST Server returned error status when trying to list tables for a namespace, skipping it.",
|
|
257
|
+
context=str(namespace),
|
|
258
|
+
exc=e,
|
|
259
|
+
)
|
|
247
260
|
except Exception as e:
|
|
248
261
|
self.report.report_failure(
|
|
249
262
|
title="Error when processing a namespace",
|
|
@@ -320,10 +333,10 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
320
333
|
context=dataset_name,
|
|
321
334
|
exc=e,
|
|
322
335
|
)
|
|
323
|
-
except
|
|
336
|
+
except RESTError as e:
|
|
324
337
|
self.report.warning(
|
|
325
338
|
title="Iceberg REST Server Error",
|
|
326
|
-
message="Iceberg returned
|
|
339
|
+
message="Iceberg REST Server returned error status when trying to process a table, skipping it.",
|
|
327
340
|
context=dataset_name,
|
|
328
341
|
exc=e,
|
|
329
342
|
)
|
|
@@ -363,7 +376,7 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
363
376
|
)
|
|
364
377
|
|
|
365
378
|
try:
|
|
366
|
-
catalog = self.config.get_catalog()
|
|
379
|
+
self.catalog = self.config.get_catalog()
|
|
367
380
|
except Exception as e:
|
|
368
381
|
self.report.report_failure(
|
|
369
382
|
title="Failed to initialize catalog object",
|
|
@@ -373,33 +386,7 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
373
386
|
return
|
|
374
387
|
|
|
375
388
|
try:
|
|
376
|
-
|
|
377
|
-
namespace_ids = self._get_namespaces(catalog)
|
|
378
|
-
namespaces: List[Tuple[Identifier, str]] = []
|
|
379
|
-
for namespace in namespace_ids:
|
|
380
|
-
namespace_repr = ".".join(namespace)
|
|
381
|
-
LOGGER.debug(f"Processing namespace {namespace_repr}")
|
|
382
|
-
namespace_urn = make_container_urn(
|
|
383
|
-
NamespaceKey(
|
|
384
|
-
namespace=namespace_repr,
|
|
385
|
-
platform=self.platform,
|
|
386
|
-
instance=self.config.platform_instance,
|
|
387
|
-
env=self.config.env,
|
|
388
|
-
)
|
|
389
|
-
)
|
|
390
|
-
namespace_properties: Properties = catalog.load_namespace_properties(
|
|
391
|
-
namespace
|
|
392
|
-
)
|
|
393
|
-
namespaces.append((namespace, namespace_urn))
|
|
394
|
-
for aspect in self._create_iceberg_namespace_aspects(
|
|
395
|
-
namespace, namespace_properties
|
|
396
|
-
):
|
|
397
|
-
yield stamping_processor.stamp_wu(
|
|
398
|
-
MetadataChangeProposalWrapper(
|
|
399
|
-
entityUrn=namespace_urn, aspect=aspect
|
|
400
|
-
).as_workunit()
|
|
401
|
-
)
|
|
402
|
-
LOGGER.debug("Namespaces ingestion completed")
|
|
389
|
+
yield from self._process_namespaces()
|
|
403
390
|
except Exception as e:
|
|
404
391
|
self.report.report_failure(
|
|
405
392
|
title="Failed to list namespaces",
|
|
@@ -413,13 +400,70 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
413
400
|
args_list=[
|
|
414
401
|
(dataset_path, namespace_urn)
|
|
415
402
|
for dataset_path, namespace_urn in self._get_datasets(
|
|
416
|
-
catalog, namespaces
|
|
403
|
+
self.catalog, self.namespaces
|
|
417
404
|
)
|
|
418
405
|
],
|
|
419
406
|
max_workers=self.config.processing_threads,
|
|
420
407
|
):
|
|
421
408
|
yield wu
|
|
422
409
|
|
|
410
|
+
def _try_processing_namespace(
|
|
411
|
+
self, namespace: Identifier
|
|
412
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
413
|
+
namespace_repr = ".".join(namespace)
|
|
414
|
+
try:
|
|
415
|
+
LOGGER.debug(f"Processing namespace {namespace_repr}")
|
|
416
|
+
namespace_urn = make_container_urn(
|
|
417
|
+
NamespaceKey(
|
|
418
|
+
namespace=namespace_repr,
|
|
419
|
+
platform=self.platform,
|
|
420
|
+
instance=self.config.platform_instance,
|
|
421
|
+
env=self.config.env,
|
|
422
|
+
)
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
namespace_properties: Properties = self.catalog.load_namespace_properties(
|
|
426
|
+
namespace
|
|
427
|
+
)
|
|
428
|
+
for aspect in self._create_iceberg_namespace_aspects(
|
|
429
|
+
namespace, namespace_properties
|
|
430
|
+
):
|
|
431
|
+
yield self.stamping_processor.stamp_wu(
|
|
432
|
+
MetadataChangeProposalWrapper(
|
|
433
|
+
entityUrn=namespace_urn, aspect=aspect
|
|
434
|
+
).as_workunit()
|
|
435
|
+
)
|
|
436
|
+
self.namespaces.append((namespace, namespace_urn))
|
|
437
|
+
except NoSuchNamespaceError as e:
|
|
438
|
+
self.report.report_warning(
|
|
439
|
+
title="Failed to retrieve namespace properties",
|
|
440
|
+
message="Couldn't find the namespace, was it deleted during the ingestion?",
|
|
441
|
+
context=namespace_repr,
|
|
442
|
+
exc=e,
|
|
443
|
+
)
|
|
444
|
+
return
|
|
445
|
+
except RESTError as e:
|
|
446
|
+
self.report.warning(
|
|
447
|
+
title="Iceberg REST Server Error",
|
|
448
|
+
message="Iceberg REST Server returned error status when trying to retrieve namespace properties, skipping it.",
|
|
449
|
+
context=str(namespace),
|
|
450
|
+
exc=e,
|
|
451
|
+
)
|
|
452
|
+
except Exception as e:
|
|
453
|
+
self.report.report_failure(
|
|
454
|
+
title="Failed to process namespace",
|
|
455
|
+
message="Unhandled exception happened during processing of the namespace",
|
|
456
|
+
context=namespace_repr,
|
|
457
|
+
exc=e,
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
def _process_namespaces(self) -> Iterable[MetadataWorkUnit]:
|
|
461
|
+
namespace_ids = self._get_namespaces(self.catalog)
|
|
462
|
+
for namespace in namespace_ids:
|
|
463
|
+
yield from self._try_processing_namespace(namespace)
|
|
464
|
+
|
|
465
|
+
LOGGER.debug("Namespaces ingestion completed")
|
|
466
|
+
|
|
423
467
|
def _create_iceberg_table_aspects(
|
|
424
468
|
self, dataset_name: str, table: Table, namespace_urn: str
|
|
425
469
|
) -> Iterable[_Aspect]:
|
|
@@ -522,11 +566,11 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
522
566
|
custom_properties["format-version"] = str(table.metadata.format_version)
|
|
523
567
|
custom_properties["partition-spec"] = str(self._get_partition_aspect(table))
|
|
524
568
|
last_modified: Optional[int] = table.metadata.last_updated_ms
|
|
525
|
-
if table.current_snapshot():
|
|
526
|
-
custom_properties["snapshot-id"] = str(
|
|
527
|
-
custom_properties["manifest-list"] =
|
|
569
|
+
if current_snapshot := table.current_snapshot():
|
|
570
|
+
custom_properties["snapshot-id"] = str(current_snapshot.snapshot_id)
|
|
571
|
+
custom_properties["manifest-list"] = current_snapshot.manifest_list
|
|
528
572
|
if not last_modified:
|
|
529
|
-
last_modified = int(
|
|
573
|
+
last_modified = int(current_snapshot.timestamp_ms)
|
|
530
574
|
if "created-at" in custom_properties:
|
|
531
575
|
try:
|
|
532
576
|
dt = dateutil_parser.isoparse(custom_properties["created-at"])
|
|
@@ -792,9 +836,6 @@ class ToAvroSchemaIcebergVisitor(SchemaVisitorPerPrimitiveType[Dict[str, Any]]):
|
|
|
792
836
|
"native_data_type": str(timestamp_type),
|
|
793
837
|
}
|
|
794
838
|
|
|
795
|
-
# visit_timestamptz() is required when using pyiceberg >= 0.5.0, which is essentially a duplicate
|
|
796
|
-
# of visit_timestampz(). The function has been renamed from visit_timestampz().
|
|
797
|
-
# Once Datahub can upgrade its pyiceberg dependency to >=0.5.0, the visit_timestampz() function can be safely removed.
|
|
798
839
|
def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> Dict[str, Any]:
|
|
799
840
|
# Avro supports 2 types of timestamp:
|
|
800
841
|
# - Timestamp: independent of a particular timezone or calendar (TZ information is lost)
|
|
@@ -811,22 +852,6 @@ class ToAvroSchemaIcebergVisitor(SchemaVisitorPerPrimitiveType[Dict[str, Any]]):
|
|
|
811
852
|
"native_data_type": str(timestamptz_type),
|
|
812
853
|
}
|
|
813
854
|
|
|
814
|
-
def visit_timestampz(self, timestamptz_type: TimestamptzType) -> Dict[str, Any]:
|
|
815
|
-
# Avro supports 2 types of timestamp:
|
|
816
|
-
# - Timestamp: independent of a particular timezone or calendar (TZ information is lost)
|
|
817
|
-
# - Local Timestamp: represents a timestamp in a local timezone, regardless of what specific time zone is considered local
|
|
818
|
-
# utcAdjustment: bool = True
|
|
819
|
-
return {
|
|
820
|
-
"type": "long",
|
|
821
|
-
"logicalType": "timestamp-micros",
|
|
822
|
-
# Commented out since Avro's Python implementation (1.11.0) does not support local-timestamp-micros, even though it exists in the spec.
|
|
823
|
-
# See bug report: https://issues.apache.org/jira/browse/AVRO-3476 and PR https://github.com/apache/avro/pull/1634
|
|
824
|
-
# "logicalType": "timestamp-micros"
|
|
825
|
-
# if timestamp_type.adjust_to_utc
|
|
826
|
-
# else "local-timestamp-micros",
|
|
827
|
-
"native_data_type": str(timestamptz_type),
|
|
828
|
-
}
|
|
829
|
-
|
|
830
855
|
def visit_string(self, string_type: StringType) -> Dict[str, Any]:
|
|
831
856
|
return {
|
|
832
857
|
"type": "string",
|
|
@@ -845,3 +870,42 @@ class ToAvroSchemaIcebergVisitor(SchemaVisitorPerPrimitiveType[Dict[str, Any]]):
|
|
|
845
870
|
"type": "bytes",
|
|
846
871
|
"native_data_type": str(binary_type),
|
|
847
872
|
}
|
|
873
|
+
|
|
874
|
+
def visit_timestamp_ns(self, timestamp_ns_type: Any) -> Dict[str, Any]:
|
|
875
|
+
# Handle nanosecond precision timestamps
|
|
876
|
+
# Avro supports 2 types of timestamp:
|
|
877
|
+
# - Timestamp: independent of a particular timezone or calendar (TZ information is lost)
|
|
878
|
+
# - Local Timestamp: represents a timestamp in a local timezone, regardless of what specific time zone is considered local
|
|
879
|
+
return {
|
|
880
|
+
"type": "long",
|
|
881
|
+
"logicalType": "timestamp-micros",
|
|
882
|
+
# Commented out since Avro's Python implementation (1.11.0) does not support local-timestamp-micros, even though it exists in the spec.
|
|
883
|
+
# See bug report: https://issues.apache.org/jira/browse/AVRO-3476 and PR https://github.com/apache/avro/pull/1634
|
|
884
|
+
# "logicalType": "timestamp-micros"
|
|
885
|
+
# if timestamp_ns_type.adjust_to_utc
|
|
886
|
+
# else "local-timestamp-micros",
|
|
887
|
+
"native_data_type": str(timestamp_ns_type),
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
def visit_timestamptz_ns(self, timestamptz_ns_type: Any) -> Dict[str, Any]:
|
|
891
|
+
# Handle nanosecond precision timestamps with timezone
|
|
892
|
+
# Avro supports 2 types of timestamp:
|
|
893
|
+
# - Timestamp: independent of a particular timezone or calendar (TZ information is lost)
|
|
894
|
+
# - Local Timestamp: represents a timestamp in a local timezone, regardless of what specific time zone is considered local
|
|
895
|
+
return {
|
|
896
|
+
"type": "long",
|
|
897
|
+
"logicalType": "timestamp-micros",
|
|
898
|
+
# Commented out since Avro's Python implementation (1.11.0) does not support local-timestamp-micros, even though it exists in the spec.
|
|
899
|
+
# See bug report: https://issues.apache.org/jira/browse/AVRO-3476 and PR https://github.com/apache/avro/pull/1634
|
|
900
|
+
# "logicalType": "timestamp-micros"
|
|
901
|
+
# if timestamptz_ns_type.adjust_to_utc
|
|
902
|
+
# else "local-timestamp-micros",
|
|
903
|
+
"native_data_type": str(timestamptz_ns_type),
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
def visit_unknown(self, unknown_type: Any) -> Dict[str, Any]:
|
|
907
|
+
# Handle unknown types
|
|
908
|
+
return {
|
|
909
|
+
"type": "string",
|
|
910
|
+
"native_data_type": str(unknown_type),
|
|
911
|
+
}
|
|
@@ -12,6 +12,7 @@ from pyiceberg.types import (
|
|
|
12
12
|
IcebergType,
|
|
13
13
|
IntegerType,
|
|
14
14
|
LongType,
|
|
15
|
+
PrimitiveType,
|
|
15
16
|
TimestampType,
|
|
16
17
|
TimestamptzType,
|
|
17
18
|
TimeType,
|
|
@@ -22,6 +23,7 @@ from pyiceberg.utils.datetime import (
|
|
|
22
23
|
to_human_timestamp,
|
|
23
24
|
to_human_timestamptz,
|
|
24
25
|
)
|
|
26
|
+
from typing_extensions import TypeGuard
|
|
25
27
|
|
|
26
28
|
from datahub.emitter.mce_builder import get_sys_time
|
|
27
29
|
from datahub.ingestion.source.iceberg.iceberg_common import (
|
|
@@ -65,7 +67,7 @@ class IcebergProfiler:
|
|
|
65
67
|
aggregated_values: Dict[int, Any],
|
|
66
68
|
manifest_values: Dict[int, bytes],
|
|
67
69
|
) -> None:
|
|
68
|
-
for field_id, value_encoded in manifest_values.items():
|
|
70
|
+
for field_id, value_encoded in manifest_values.items():
|
|
69
71
|
try:
|
|
70
72
|
field = schema.find_field(field_id)
|
|
71
73
|
except ValueError:
|
|
@@ -240,7 +242,7 @@ class IcebergProfiler:
|
|
|
240
242
|
return None
|
|
241
243
|
|
|
242
244
|
@staticmethod
|
|
243
|
-
def _is_numeric_type(type: IcebergType) ->
|
|
245
|
+
def _is_numeric_type(type: IcebergType) -> TypeGuard[PrimitiveType]:
|
|
244
246
|
return isinstance(
|
|
245
247
|
type,
|
|
246
248
|
(
|
|
@@ -167,7 +167,7 @@ class AzureADSourceReport(StaleEntityRemovalSourceReport):
|
|
|
167
167
|
@config_class(AzureADConfig)
|
|
168
168
|
@support_status(SupportStatus.CERTIFIED)
|
|
169
169
|
@capability(
|
|
170
|
-
SourceCapability.DELETION_DETECTION, "
|
|
170
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
171
171
|
)
|
|
172
172
|
class AzureADSource(StatefulIngestionSourceBase):
|
|
173
173
|
"""
|
|
@@ -41,7 +41,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
|
|
|
41
41
|
)
|
|
42
42
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
43
43
|
from datahub.metadata.schema_classes import (
|
|
44
|
-
ChangeTypeClass,
|
|
45
44
|
CorpGroupInfoClass,
|
|
46
45
|
CorpUserInfoClass,
|
|
47
46
|
GroupMembershipClass,
|
|
@@ -202,7 +201,7 @@ class OktaSourceReport(StaleEntityRemovalSourceReport):
|
|
|
202
201
|
@support_status(SupportStatus.CERTIFIED)
|
|
203
202
|
@capability(SourceCapability.DESCRIPTIONS, "Optionally enabled via configuration")
|
|
204
203
|
@capability(
|
|
205
|
-
SourceCapability.DELETION_DETECTION, "
|
|
204
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
206
205
|
)
|
|
207
206
|
class OktaSource(StatefulIngestionSourceBase):
|
|
208
207
|
"""
|
|
@@ -332,18 +331,12 @@ class OktaSource(StatefulIngestionSourceBase):
|
|
|
332
331
|
yield MetadataWorkUnit(id=wu_id, mce=mce)
|
|
333
332
|
|
|
334
333
|
yield MetadataChangeProposalWrapper(
|
|
335
|
-
entityType="corpGroup",
|
|
336
334
|
entityUrn=datahub_corp_group_snapshot.urn,
|
|
337
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
338
|
-
aspectName="origin",
|
|
339
335
|
aspect=OriginClass(OriginTypeClass.EXTERNAL, "OKTA"),
|
|
340
336
|
).as_workunit()
|
|
341
337
|
|
|
342
338
|
yield MetadataChangeProposalWrapper(
|
|
343
|
-
entityType="corpGroup",
|
|
344
339
|
entityUrn=datahub_corp_group_snapshot.urn,
|
|
345
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
346
|
-
aspectName="status",
|
|
347
340
|
aspect=StatusClass(removed=False),
|
|
348
341
|
).as_workunit()
|
|
349
342
|
|
|
@@ -418,18 +411,12 @@ class OktaSource(StatefulIngestionSourceBase):
|
|
|
418
411
|
yield MetadataWorkUnit(id=wu_id, mce=mce)
|
|
419
412
|
|
|
420
413
|
yield MetadataChangeProposalWrapper(
|
|
421
|
-
entityType="corpuser",
|
|
422
414
|
entityUrn=datahub_corp_user_snapshot.urn,
|
|
423
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
424
|
-
aspectName="origin",
|
|
425
415
|
aspect=OriginClass(OriginTypeClass.EXTERNAL, "OKTA"),
|
|
426
416
|
).as_workunit()
|
|
427
417
|
|
|
428
418
|
yield MetadataChangeProposalWrapper(
|
|
429
|
-
entityType="corpuser",
|
|
430
419
|
entityUrn=datahub_corp_user_snapshot.urn,
|
|
431
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
432
|
-
aspectName="status",
|
|
433
420
|
aspect=StatusClass(removed=False),
|
|
434
421
|
).as_workunit()
|
|
435
422
|
|
|
@@ -189,6 +189,22 @@ class KafkaConnectionTest:
|
|
|
189
189
|
SourceCapability.SCHEMA_METADATA,
|
|
190
190
|
"Schemas associated with each topic are extracted from the schema registry. Avro and Protobuf (certified), JSON (incubating). Schema references are supported.",
|
|
191
191
|
)
|
|
192
|
+
@capability(
|
|
193
|
+
SourceCapability.DATA_PROFILING,
|
|
194
|
+
"Not supported",
|
|
195
|
+
supported=False,
|
|
196
|
+
)
|
|
197
|
+
@capability(
|
|
198
|
+
SourceCapability.LINEAGE_COARSE,
|
|
199
|
+
"Not supported. If you use Kafka Connect, the kafka-connect source can generate lineage.",
|
|
200
|
+
supported=False,
|
|
201
|
+
)
|
|
202
|
+
@capability(
|
|
203
|
+
SourceCapability.LINEAGE_FINE,
|
|
204
|
+
"Not supported",
|
|
205
|
+
supported=False,
|
|
206
|
+
)
|
|
207
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
192
208
|
class KafkaSource(StatefulIngestionSourceBase, TestableSource):
|
|
193
209
|
"""
|
|
194
210
|
This plugin extracts the following:
|
|
@@ -4,7 +4,7 @@ from typing import Dict, Iterable, List, Optional
|
|
|
4
4
|
|
|
5
5
|
from pydantic.fields import Field
|
|
6
6
|
|
|
7
|
-
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
7
|
+
from datahub.configuration.common import AllowDenyPattern, ConfigModel, LaxStr
|
|
8
8
|
from datahub.configuration.source_common import (
|
|
9
9
|
DatasetLineageProviderConfigBase,
|
|
10
10
|
PlatformInstanceConfigMixin,
|
|
@@ -29,7 +29,7 @@ CONNECTOR_CLASS = "connector.class"
|
|
|
29
29
|
class ProvidedConfig(ConfigModel):
|
|
30
30
|
provider: str
|
|
31
31
|
path_key: str
|
|
32
|
-
value:
|
|
32
|
+
value: LaxStr
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class GenericConnectorConfig(ConfigModel):
|