acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -2,13 +2,14 @@ import logging
|
|
|
2
2
|
import time
|
|
3
3
|
import warnings
|
|
4
4
|
from abc import ABC
|
|
5
|
-
from typing import Dict, Iterable, Optional, Tuple
|
|
5
|
+
from typing import Dict, Iterable, List, Optional, Tuple
|
|
6
6
|
|
|
7
7
|
from pydantic import validator
|
|
8
8
|
from pydantic.fields import Field
|
|
9
9
|
|
|
10
10
|
from datahub.configuration.common import ConfigModel
|
|
11
11
|
from datahub.emitter.mce_builder import make_tag_urn
|
|
12
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
12
13
|
from datahub.ingestion.api.common import PipelineContext
|
|
13
14
|
from datahub.ingestion.api.decorators import (
|
|
14
15
|
SourceCapability,
|
|
@@ -20,6 +21,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
20
21
|
)
|
|
21
22
|
from datahub.ingestion.api.source import Source, SourceReport
|
|
22
23
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
24
|
+
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
|
23
25
|
from datahub.ingestion.source.openapi_parser import (
|
|
24
26
|
clean_url,
|
|
25
27
|
compose_url_attr,
|
|
@@ -32,14 +34,13 @@ from datahub.ingestion.source.openapi_parser import (
|
|
|
32
34
|
set_metadata,
|
|
33
35
|
try_guessing,
|
|
34
36
|
)
|
|
35
|
-
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
|
|
36
|
-
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
37
37
|
from datahub.metadata.schema_classes import (
|
|
38
38
|
AuditStampClass,
|
|
39
39
|
DatasetPropertiesClass,
|
|
40
40
|
GlobalTagsClass,
|
|
41
41
|
InstitutionalMemoryClass,
|
|
42
42
|
InstitutionalMemoryMetadataClass,
|
|
43
|
+
SubTypesClass,
|
|
43
44
|
TagAssociationClass,
|
|
44
45
|
)
|
|
45
46
|
|
|
@@ -81,6 +82,9 @@ class OpenApiConfig(ConfigModel):
|
|
|
81
82
|
get_token: dict = Field(
|
|
82
83
|
default={}, description="Retrieving a token from the endpoint."
|
|
83
84
|
)
|
|
85
|
+
verify_ssl: bool = Field(
|
|
86
|
+
default=True, description="Enable SSL certificate verification"
|
|
87
|
+
)
|
|
84
88
|
|
|
85
89
|
@validator("bearer_token", always=True)
|
|
86
90
|
def ensure_only_one_token(
|
|
@@ -101,7 +105,7 @@ class OpenApiConfig(ConfigModel):
|
|
|
101
105
|
# details there once, and then use that session for all requests.
|
|
102
106
|
self.token = f"Bearer {self.bearer_token}"
|
|
103
107
|
else:
|
|
104
|
-
assert "url_complement" in self.get_token
|
|
108
|
+
assert "url_complement" in self.get_token, (
|
|
105
109
|
"When 'request_type' is set to 'get', an url_complement is needed for the request."
|
|
106
110
|
)
|
|
107
111
|
if self.get_token["request_type"] == "get":
|
|
@@ -128,12 +132,14 @@ class OpenApiConfig(ConfigModel):
|
|
|
128
132
|
tok_url=url4req,
|
|
129
133
|
method=self.get_token["request_type"],
|
|
130
134
|
proxies=self.proxies,
|
|
135
|
+
verify_ssl=self.verify_ssl,
|
|
131
136
|
)
|
|
132
137
|
sw_dict = get_swag_json(
|
|
133
138
|
self.url,
|
|
134
139
|
token=self.token,
|
|
135
140
|
swagger_file=self.swagger_file,
|
|
136
141
|
proxies=self.proxies,
|
|
142
|
+
verify_ssl=self.verify_ssl,
|
|
137
143
|
) # load the swagger file
|
|
138
144
|
|
|
139
145
|
else: # using basic auth for accessing endpoints
|
|
@@ -143,6 +149,7 @@ class OpenApiConfig(ConfigModel):
|
|
|
143
149
|
password=self.password,
|
|
144
150
|
swagger_file=self.swagger_file,
|
|
145
151
|
proxies=self.proxies,
|
|
152
|
+
verify_ssl=self.verify_ssl,
|
|
146
153
|
)
|
|
147
154
|
return sw_dict
|
|
148
155
|
|
|
@@ -222,8 +229,9 @@ class APISource(Source, ABC):
|
|
|
222
229
|
|
|
223
230
|
def init_dataset(
|
|
224
231
|
self, endpoint_k: str, endpoint_dets: dict
|
|
225
|
-
) -> Tuple[
|
|
232
|
+
) -> Tuple[str, str, List[MetadataWorkUnit]]:
|
|
226
233
|
config = self.config
|
|
234
|
+
workunits = []
|
|
227
235
|
|
|
228
236
|
dataset_name = endpoint_k[1:].replace("/", ".")
|
|
229
237
|
|
|
@@ -233,22 +241,27 @@ class APISource(Source, ABC):
|
|
|
233
241
|
else:
|
|
234
242
|
dataset_name = "root"
|
|
235
243
|
|
|
236
|
-
|
|
237
|
-
urn=f"urn:li:dataset:(urn:li:dataPlatform:{self.platform},{config.name}.{dataset_name},PROD)",
|
|
238
|
-
aspects=[],
|
|
239
|
-
)
|
|
244
|
+
dataset_urn = f"urn:li:dataset:(urn:li:dataPlatform:{self.platform},{config.name}.{dataset_name},PROD)"
|
|
240
245
|
|
|
241
|
-
#
|
|
242
|
-
|
|
246
|
+
# Create dataset properties aspect
|
|
247
|
+
properties = DatasetPropertiesClass(
|
|
243
248
|
description=endpoint_dets["description"], customProperties={}
|
|
244
249
|
)
|
|
245
|
-
|
|
250
|
+
wu = MetadataWorkUnit(
|
|
251
|
+
id=dataset_name,
|
|
252
|
+
mcp=MetadataChangeProposalWrapper(entityUrn=dataset_urn, aspect=properties),
|
|
253
|
+
)
|
|
254
|
+
workunits.append(wu)
|
|
246
255
|
|
|
247
|
-
#
|
|
256
|
+
# Create tags aspect
|
|
248
257
|
tags_str = [make_tag_urn(t) for t in endpoint_dets["tags"]]
|
|
249
258
|
tags_tac = [TagAssociationClass(t) for t in tags_str]
|
|
250
259
|
gtc = GlobalTagsClass(tags_tac)
|
|
251
|
-
|
|
260
|
+
wu = MetadataWorkUnit(
|
|
261
|
+
id=f"{dataset_name}-tags",
|
|
262
|
+
mcp=MetadataChangeProposalWrapper(entityUrn=dataset_urn, aspect=gtc),
|
|
263
|
+
)
|
|
264
|
+
workunits.append(wu)
|
|
252
265
|
|
|
253
266
|
# the link will appear in the "documentation"
|
|
254
267
|
link_url = clean_url(config.url + self.url_basepath + endpoint_k)
|
|
@@ -260,17 +273,25 @@ class APISource(Source, ABC):
|
|
|
260
273
|
url=link_url, description=link_description, createStamp=creation
|
|
261
274
|
)
|
|
262
275
|
inst_memory = InstitutionalMemoryClass([link_metadata])
|
|
263
|
-
|
|
276
|
+
wu = MetadataWorkUnit(
|
|
277
|
+
id=f"{dataset_name}-docs",
|
|
278
|
+
mcp=MetadataChangeProposalWrapper(
|
|
279
|
+
entityUrn=dataset_urn, aspect=inst_memory
|
|
280
|
+
),
|
|
281
|
+
)
|
|
282
|
+
workunits.append(wu)
|
|
264
283
|
|
|
265
|
-
|
|
284
|
+
# Create subtype aspect
|
|
285
|
+
sub_types = SubTypesClass(typeNames=[DatasetSubTypes.API_ENDPOINT])
|
|
286
|
+
wu = MetadataWorkUnit(
|
|
287
|
+
id=f"{dataset_name}-subtype",
|
|
288
|
+
mcp=MetadataChangeProposalWrapper(entityUrn=dataset_urn, aspect=sub_types),
|
|
289
|
+
)
|
|
290
|
+
workunits.append(wu)
|
|
266
291
|
|
|
267
|
-
|
|
268
|
-
self, dataset_snapshot: DatasetSnapshot, dataset_name: str
|
|
269
|
-
) -> ApiWorkUnit:
|
|
270
|
-
mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
|
|
271
|
-
return ApiWorkUnit(id=dataset_name, mce=mce)
|
|
292
|
+
return dataset_name, dataset_urn, workunits
|
|
272
293
|
|
|
273
|
-
def get_workunits_internal(self) -> Iterable[
|
|
294
|
+
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
274
295
|
config = self.config
|
|
275
296
|
|
|
276
297
|
sw_dict = self.config.get_swagger()
|
|
@@ -294,17 +315,25 @@ class APISource(Source, ABC):
|
|
|
294
315
|
if endpoint_k in config.ignore_endpoints:
|
|
295
316
|
continue
|
|
296
317
|
|
|
297
|
-
|
|
318
|
+
# Initialize dataset and get common aspects
|
|
319
|
+
dataset_name, dataset_urn, workunits = self.init_dataset(
|
|
298
320
|
endpoint_k, endpoint_dets
|
|
299
321
|
)
|
|
322
|
+
for wu in workunits:
|
|
323
|
+
yield wu
|
|
300
324
|
|
|
301
|
-
#
|
|
302
|
-
if "data" in endpoint_dets
|
|
325
|
+
# Handle schema metadata if available
|
|
326
|
+
if "data" in endpoint_dets:
|
|
303
327
|
# we are lucky! data is defined in the swagger for this endpoint
|
|
304
328
|
schema_metadata = set_metadata(dataset_name, endpoint_dets["data"])
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
329
|
+
wu = MetadataWorkUnit(
|
|
330
|
+
id=f"{dataset_name}-schema",
|
|
331
|
+
mcp=MetadataChangeProposalWrapper(
|
|
332
|
+
entityUrn=dataset_urn, aspect=schema_metadata
|
|
333
|
+
),
|
|
334
|
+
)
|
|
335
|
+
yield wu
|
|
336
|
+
elif endpoint_dets["method"] != "GET":
|
|
308
337
|
self.report.report_warning(
|
|
309
338
|
title="Failed to Extract Endpoint Metadata",
|
|
310
339
|
message=f"No example provided for {endpoint_dets['method']}",
|
|
@@ -320,6 +349,7 @@ class APISource(Source, ABC):
|
|
|
320
349
|
tot_url,
|
|
321
350
|
token=config.token,
|
|
322
351
|
proxies=config.proxies,
|
|
352
|
+
verify_ssl=config.verify_ssl,
|
|
323
353
|
)
|
|
324
354
|
else:
|
|
325
355
|
response = request_call(
|
|
@@ -327,6 +357,7 @@ class APISource(Source, ABC):
|
|
|
327
357
|
username=config.username,
|
|
328
358
|
password=config.password,
|
|
329
359
|
proxies=config.proxies,
|
|
360
|
+
verify_ssl=config.verify_ssl,
|
|
330
361
|
)
|
|
331
362
|
if response.status_code == 200:
|
|
332
363
|
fields2add, root_dataset_samples[dataset_name] = extract_fields(
|
|
@@ -338,13 +369,17 @@ class APISource(Source, ABC):
|
|
|
338
369
|
context=f"Endpoint Type: {endpoint_k}, Name: {dataset_name}",
|
|
339
370
|
)
|
|
340
371
|
schema_metadata = set_metadata(dataset_name, fields2add)
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
372
|
+
wu = MetadataWorkUnit(
|
|
373
|
+
id=f"{dataset_name}-schema",
|
|
374
|
+
mcp=MetadataChangeProposalWrapper(
|
|
375
|
+
entityUrn=dataset_urn, aspect=schema_metadata
|
|
376
|
+
),
|
|
377
|
+
)
|
|
378
|
+
yield wu
|
|
344
379
|
else:
|
|
345
380
|
self.report_bad_responses(response.status_code, type=endpoint_k)
|
|
346
381
|
else:
|
|
347
|
-
if endpoint_k not in config.forced_examples
|
|
382
|
+
if endpoint_k not in config.forced_examples:
|
|
348
383
|
# start guessing...
|
|
349
384
|
url_guess = try_guessing(endpoint_k, root_dataset_samples)
|
|
350
385
|
tot_url = clean_url(config.url + self.url_basepath + url_guess)
|
|
@@ -353,6 +388,7 @@ class APISource(Source, ABC):
|
|
|
353
388
|
tot_url,
|
|
354
389
|
token=config.token,
|
|
355
390
|
proxies=config.proxies,
|
|
391
|
+
verify_ssl=config.verify_ssl,
|
|
356
392
|
)
|
|
357
393
|
else:
|
|
358
394
|
response = request_call(
|
|
@@ -360,6 +396,7 @@ class APISource(Source, ABC):
|
|
|
360
396
|
username=config.username,
|
|
361
397
|
password=config.password,
|
|
362
398
|
proxies=config.proxies,
|
|
399
|
+
verify_ssl=config.verify_ssl,
|
|
363
400
|
)
|
|
364
401
|
if response.status_code == 200:
|
|
365
402
|
fields2add, _ = extract_fields(response, dataset_name)
|
|
@@ -369,9 +406,13 @@ class APISource(Source, ABC):
|
|
|
369
406
|
context=f"Endpoint Type: {endpoint_k}, Name: {dataset_name}",
|
|
370
407
|
)
|
|
371
408
|
schema_metadata = set_metadata(dataset_name, fields2add)
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
409
|
+
wu = MetadataWorkUnit(
|
|
410
|
+
id=f"{dataset_name}-schema",
|
|
411
|
+
mcp=MetadataChangeProposalWrapper(
|
|
412
|
+
entityUrn=dataset_urn, aspect=schema_metadata
|
|
413
|
+
),
|
|
414
|
+
)
|
|
415
|
+
yield wu
|
|
375
416
|
else:
|
|
376
417
|
self.report_bad_responses(response.status_code, type=endpoint_k)
|
|
377
418
|
else:
|
|
@@ -384,6 +425,7 @@ class APISource(Source, ABC):
|
|
|
384
425
|
tot_url,
|
|
385
426
|
token=config.token,
|
|
386
427
|
proxies=config.proxies,
|
|
428
|
+
verify_ssl=config.verify_ssl,
|
|
387
429
|
)
|
|
388
430
|
else:
|
|
389
431
|
response = request_call(
|
|
@@ -391,6 +433,7 @@ class APISource(Source, ABC):
|
|
|
391
433
|
username=config.username,
|
|
392
434
|
password=config.password,
|
|
393
435
|
proxies=config.proxies,
|
|
436
|
+
verify_ssl=config.verify_ssl,
|
|
394
437
|
)
|
|
395
438
|
if response.status_code == 200:
|
|
396
439
|
fields2add, _ = extract_fields(response, dataset_name)
|
|
@@ -400,9 +443,13 @@ class APISource(Source, ABC):
|
|
|
400
443
|
context=f"Endpoint Type: {endpoint_k}, Name: {dataset_name}",
|
|
401
444
|
)
|
|
402
445
|
schema_metadata = set_metadata(dataset_name, fields2add)
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
446
|
+
wu = MetadataWorkUnit(
|
|
447
|
+
id=f"{dataset_name}-schema",
|
|
448
|
+
mcp=MetadataChangeProposalWrapper(
|
|
449
|
+
entityUrn=dataset_urn, aspect=schema_metadata
|
|
450
|
+
),
|
|
451
|
+
)
|
|
452
|
+
yield wu
|
|
406
453
|
else:
|
|
407
454
|
self.report_bad_responses(response.status_code, type=endpoint_k)
|
|
408
455
|
|
|
@@ -59,17 +59,21 @@ def request_call(
|
|
|
59
59
|
username: Optional[str] = None,
|
|
60
60
|
password: Optional[str] = None,
|
|
61
61
|
proxies: Optional[dict] = None,
|
|
62
|
+
verify_ssl: bool = True,
|
|
62
63
|
) -> requests.Response:
|
|
63
64
|
headers = {"accept": "application/json"}
|
|
64
65
|
if username is not None and password is not None:
|
|
65
66
|
return requests.get(
|
|
66
|
-
url,
|
|
67
|
+
url,
|
|
68
|
+
headers=headers,
|
|
69
|
+
auth=HTTPBasicAuth(username, password),
|
|
70
|
+
verify=verify_ssl,
|
|
67
71
|
)
|
|
68
72
|
elif token is not None:
|
|
69
73
|
headers["Authorization"] = f"{token}"
|
|
70
|
-
return requests.get(url, proxies=proxies, headers=headers)
|
|
74
|
+
return requests.get(url, proxies=proxies, headers=headers, verify=verify_ssl)
|
|
71
75
|
else:
|
|
72
|
-
return requests.get(url, headers=headers)
|
|
76
|
+
return requests.get(url, headers=headers, verify=verify_ssl)
|
|
73
77
|
|
|
74
78
|
|
|
75
79
|
def get_swag_json(
|
|
@@ -79,10 +83,16 @@ def get_swag_json(
|
|
|
79
83
|
password: Optional[str] = None,
|
|
80
84
|
swagger_file: str = "",
|
|
81
85
|
proxies: Optional[dict] = None,
|
|
86
|
+
verify_ssl: bool = True,
|
|
82
87
|
) -> Dict:
|
|
83
88
|
tot_url = url + swagger_file
|
|
84
89
|
response = request_call(
|
|
85
|
-
url=tot_url,
|
|
90
|
+
url=tot_url,
|
|
91
|
+
token=token,
|
|
92
|
+
username=username,
|
|
93
|
+
password=password,
|
|
94
|
+
proxies=proxies,
|
|
95
|
+
verify_ssl=verify_ssl,
|
|
86
96
|
)
|
|
87
97
|
|
|
88
98
|
if response.status_code != 200:
|
|
@@ -127,37 +137,45 @@ def get_endpoints(sw_dict: dict) -> dict:
|
|
|
127
137
|
check_sw_version(sw_dict)
|
|
128
138
|
|
|
129
139
|
for p_k, p_o in sw_dict["paths"].items():
|
|
130
|
-
method
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
url_details[p_k]
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
140
|
+
for method, method_spec in p_o.items():
|
|
141
|
+
# skip non-method keys like "parameters"
|
|
142
|
+
if method.lower() not in [
|
|
143
|
+
"get",
|
|
144
|
+
"post",
|
|
145
|
+
"put",
|
|
146
|
+
"delete",
|
|
147
|
+
"patch",
|
|
148
|
+
"options",
|
|
149
|
+
"head",
|
|
150
|
+
]:
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
responses = method_spec.get("responses", {})
|
|
154
|
+
base_res = responses.get("200") or responses.get(200)
|
|
155
|
+
if not base_res:
|
|
156
|
+
# if there is no 200 response, we skip this method
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
# if the description is not present, we will use the summary
|
|
160
|
+
# if both are not present, we will use an empty string
|
|
161
|
+
desc = method_spec.get("description") or method_spec.get("summary", "")
|
|
162
|
+
|
|
163
|
+
# if the tags are not present, we will use an empty list
|
|
164
|
+
tags = method_spec.get("tags", [])
|
|
165
|
+
|
|
166
|
+
url_details[p_k] = {
|
|
167
|
+
"description": desc,
|
|
168
|
+
"tags": tags,
|
|
169
|
+
"method": method.upper(),
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
example_data = check_for_api_example_data(base_res, p_k)
|
|
173
|
+
if example_data:
|
|
174
|
+
url_details[p_k]["data"] = example_data
|
|
175
|
+
|
|
176
|
+
# checking whether there are defined parameters to execute the call...
|
|
177
|
+
if "parameters" in p_o[method]:
|
|
178
|
+
url_details[p_k]["parameters"] = p_o[method]["parameters"]
|
|
161
179
|
|
|
162
180
|
return dict(sorted(url_details.items()))
|
|
163
181
|
|
|
@@ -169,7 +187,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
|
|
|
169
187
|
data = {}
|
|
170
188
|
if "content" in base_res:
|
|
171
189
|
res_cont = base_res["content"]
|
|
172
|
-
if "application/json" in res_cont
|
|
190
|
+
if "application/json" in res_cont:
|
|
173
191
|
ex_field = None
|
|
174
192
|
if "example" in res_cont["application/json"]:
|
|
175
193
|
ex_field = "example"
|
|
@@ -186,7 +204,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
|
|
|
186
204
|
logger.warning(
|
|
187
205
|
f"Field in swagger file does not give consistent data --- {key}"
|
|
188
206
|
)
|
|
189
|
-
elif "text/csv" in res_cont
|
|
207
|
+
elif "text/csv" in res_cont:
|
|
190
208
|
data = res_cont["text/csv"]["schema"]
|
|
191
209
|
elif "examples" in base_res:
|
|
192
210
|
data = base_res["examples"]["application/json"]
|
|
@@ -239,7 +257,7 @@ def guessing_url_name(url: str, examples: dict) -> str:
|
|
|
239
257
|
|
|
240
258
|
# substituting the parameter's name w the value
|
|
241
259
|
for name, clean_name in zip(needed_n, cleaned_needed_n):
|
|
242
|
-
if clean_name in examples[ex2use]
|
|
260
|
+
if clean_name in examples[ex2use]:
|
|
243
261
|
guessed_url = re.sub(name, str(examples[ex2use][clean_name]), guessed_url)
|
|
244
262
|
|
|
245
263
|
return guessed_url
|
|
@@ -358,6 +376,7 @@ def get_tok(
|
|
|
358
376
|
tok_url: str = "",
|
|
359
377
|
method: str = "post",
|
|
360
378
|
proxies: Optional[dict] = None,
|
|
379
|
+
verify_ssl: bool = True,
|
|
361
380
|
) -> str:
|
|
362
381
|
"""
|
|
363
382
|
Trying to post username/password to get auth.
|
|
@@ -368,7 +387,7 @@ def get_tok(
|
|
|
368
387
|
# this will make a POST call with username and password
|
|
369
388
|
data = {"username": username, "password": password, "maxDuration": True}
|
|
370
389
|
# url2post = url + "api/authenticate/"
|
|
371
|
-
response = requests.post(url4req, proxies=proxies, json=data)
|
|
390
|
+
response = requests.post(url4req, proxies=proxies, json=data, verify=verify_ssl)
|
|
372
391
|
if response.status_code == 200:
|
|
373
392
|
cont = json.loads(response.content)
|
|
374
393
|
if "token" in cont: # other authentication scheme
|
|
@@ -377,7 +396,7 @@ def get_tok(
|
|
|
377
396
|
token = f"Bearer {cont['tokens']['access']}"
|
|
378
397
|
elif method == "get":
|
|
379
398
|
# this will make a GET call with username and password
|
|
380
|
-
response = requests.get(url4req)
|
|
399
|
+
response = requests.get(url4req, verify=verify_ssl)
|
|
381
400
|
if response.status_code == 200:
|
|
382
401
|
cont = json.loads(response.content)
|
|
383
402
|
token = cont["token"]
|