acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/cli/docker_cli.py
CHANGED
|
@@ -20,6 +20,7 @@ import requests
|
|
|
20
20
|
from expandvars import expandvars
|
|
21
21
|
from requests_file import FileAdapter
|
|
22
22
|
|
|
23
|
+
from datahub._version import __version__, is_dev_mode, nice_version_name
|
|
23
24
|
from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER
|
|
24
25
|
from datahub.cli.docker_check import (
|
|
25
26
|
DATAHUB_COMPOSE_LEGACY_VOLUME_FILTERS,
|
|
@@ -28,45 +29,87 @@ from datahub.cli.docker_check import (
|
|
|
28
29
|
DockerComposeVersionError,
|
|
29
30
|
QuickstartStatus,
|
|
30
31
|
check_docker_quickstart,
|
|
32
|
+
check_upgrade_supported,
|
|
31
33
|
get_docker_client,
|
|
32
34
|
run_quickstart_preflight_checks,
|
|
33
35
|
)
|
|
34
|
-
from datahub.cli.quickstart_versioning import
|
|
36
|
+
from datahub.cli.quickstart_versioning import (
|
|
37
|
+
QuickstartVersionMappingConfig,
|
|
38
|
+
)
|
|
39
|
+
from datahub.configuration.env_vars import get_docker_compose_base
|
|
35
40
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
36
41
|
from datahub.telemetry import telemetry
|
|
37
42
|
from datahub.upgrade import upgrade
|
|
38
43
|
from datahub.utilities.perf_timer import PerfTimer
|
|
39
44
|
|
|
40
45
|
logger = logging.getLogger(__name__)
|
|
41
|
-
_ClickPositiveInt = click.IntRange(min=1)
|
|
42
46
|
|
|
43
|
-
|
|
44
|
-
"docker/quickstart/docker-compose.quickstart.yml"
|
|
45
|
-
)
|
|
46
|
-
ELASTIC_QUICKSTART_COMPOSE_FILE = (
|
|
47
|
-
"docker/quickstart/docker-compose-without-neo4j.quickstart.yml"
|
|
48
|
-
)
|
|
49
|
-
NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
|
|
50
|
-
"docker/quickstart/docker-compose-m1.quickstart.yml"
|
|
51
|
-
)
|
|
52
|
-
ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
|
|
53
|
-
"docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml"
|
|
54
|
-
)
|
|
55
|
-
CONSUMERS_QUICKSTART_COMPOSE_FILE = (
|
|
56
|
-
"docker/quickstart/docker-compose.consumers.quickstart.yml"
|
|
57
|
-
)
|
|
58
|
-
ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE = (
|
|
59
|
-
"docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml"
|
|
60
|
-
)
|
|
61
|
-
KAFKA_SETUP_QUICKSTART_COMPOSE_FILE = (
|
|
62
|
-
"docker/quickstart/docker-compose.kafka-setup.quickstart.yml"
|
|
63
|
-
)
|
|
47
|
+
_ClickPositiveInt = click.IntRange(min=1)
|
|
64
48
|
|
|
49
|
+
QUICKSTART_COMPOSE_FILE = "docker/quickstart/docker-compose.quickstart-profile.yml"
|
|
65
50
|
|
|
66
51
|
_QUICKSTART_MAX_WAIT_TIME = datetime.timedelta(minutes=10)
|
|
67
52
|
_QUICKSTART_UP_TIMEOUT = datetime.timedelta(seconds=100)
|
|
68
53
|
_QUICKSTART_STATUS_CHECK_INTERVAL = datetime.timedelta(seconds=2)
|
|
69
54
|
|
|
55
|
+
MIGRATION_REQUIRED_INSTRUCTIONS = f"""
|
|
56
|
+
Your existing DataHub server was installed with an \
|
|
57
|
+
older CLI and is incompatible with the current CLI (version {nice_version_name}).
|
|
58
|
+
|
|
59
|
+
Required steps to upgrade:
|
|
60
|
+
1. Backup your data (recommended): datahub docker quickstart --backup
|
|
61
|
+
Guide: https://docs.datahub.com/docs/quickstart#back-up-datahub
|
|
62
|
+
|
|
63
|
+
2. Remove old installation: datahub docker nuke
|
|
64
|
+
|
|
65
|
+
3. Start fresh installation: datahub docker quickstart
|
|
66
|
+
|
|
67
|
+
4. Restore data:
|
|
68
|
+
datahub docker quickstart --restore
|
|
69
|
+
|
|
70
|
+
⚠️ Without backup, all existing data will be lost.
|
|
71
|
+
|
|
72
|
+
For fresh start (if data is not needed):
|
|
73
|
+
1. Remove installation:
|
|
74
|
+
datahub docker nuke
|
|
75
|
+
|
|
76
|
+
2. Start fresh:
|
|
77
|
+
datahub docker quickstart
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
REPAIR_REQUIRED_INSTRUCTIONS = f"""
|
|
81
|
+
Unhealthy DataHub Installation Detected
|
|
82
|
+
|
|
83
|
+
Your DataHub installation has issues that cannot be fixed with the current CLI.
|
|
84
|
+
|
|
85
|
+
Your options:
|
|
86
|
+
|
|
87
|
+
OPTION 1 - Preserve data (if needed):
|
|
88
|
+
1. Downgrade CLI to version 1.1:
|
|
89
|
+
pip install acryl-datahub==1.1
|
|
90
|
+
2. Fix the installation:
|
|
91
|
+
datahub docker quickstart
|
|
92
|
+
3. Create backup:
|
|
93
|
+
datahub docker quickstart --backup
|
|
94
|
+
4. Upgrade CLI back:
|
|
95
|
+
pip install acryl-datahub=={nice_version_name()}
|
|
96
|
+
5. Migrate:
|
|
97
|
+
datahub docker nuke && datahub docker quickstart
|
|
98
|
+
6. Restore data:
|
|
99
|
+
datahub docker quickstart --restore
|
|
100
|
+
|
|
101
|
+
OPTION 2 - Fresh start (if data not needed):
|
|
102
|
+
1. Remove installation:
|
|
103
|
+
datahub docker nuke
|
|
104
|
+
2. Start fresh:
|
|
105
|
+
datahub docker quickstart
|
|
106
|
+
|
|
107
|
+
⚠️ The current CLI cannot repair installations created by older versions.
|
|
108
|
+
|
|
109
|
+
Additional information on backup and restore: https://docs.datahub.com/docs/quickstart#back-up-datahub
|
|
110
|
+
Troubleshooting guide: https://docs.datahub.com/docs/troubleshooting/quickstart
|
|
111
|
+
"""
|
|
112
|
+
|
|
70
113
|
|
|
71
114
|
class Architectures(Enum):
|
|
72
115
|
x86 = "x86"
|
|
@@ -89,6 +132,14 @@ def _docker_subprocess_env() -> Dict[str, str]:
|
|
|
89
132
|
return env
|
|
90
133
|
|
|
91
134
|
|
|
135
|
+
def show_migration_instructions():
|
|
136
|
+
click.secho(MIGRATION_REQUIRED_INSTRUCTIONS, fg="red")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def show_repair_instructions():
|
|
140
|
+
click.secho(REPAIR_REQUIRED_INSTRUCTIONS, fg="red")
|
|
141
|
+
|
|
142
|
+
|
|
92
143
|
@click.group()
|
|
93
144
|
def docker() -> None:
|
|
94
145
|
"""Helper commands for setting up and interacting with a local
|
|
@@ -97,19 +148,22 @@ def docker() -> None:
|
|
|
97
148
|
|
|
98
149
|
|
|
99
150
|
@docker.command()
|
|
100
|
-
@upgrade.check_upgrade
|
|
101
|
-
@telemetry.with_telemetry()
|
|
102
151
|
def check() -> None:
|
|
103
152
|
"""Check that the Docker containers are healthy"""
|
|
104
153
|
status = check_docker_quickstart()
|
|
154
|
+
|
|
105
155
|
if status.is_ok():
|
|
106
156
|
click.secho("✔ No issues detected", fg="green")
|
|
157
|
+
if status.running_unsupported_version:
|
|
158
|
+
show_migration_instructions()
|
|
107
159
|
else:
|
|
160
|
+
if status.running_unsupported_version:
|
|
161
|
+
show_repair_instructions()
|
|
108
162
|
raise status.to_exception("The following issues were detected:")
|
|
109
163
|
|
|
110
164
|
|
|
111
|
-
def
|
|
112
|
-
"""Check whether we are running on an
|
|
165
|
+
def is_apple_silicon() -> bool:
|
|
166
|
+
"""Check whether we are running on an Apple Silicon machine"""
|
|
113
167
|
try:
|
|
114
168
|
return (
|
|
115
169
|
platform.uname().machine == "arm64" and platform.uname().system == "Darwin"
|
|
@@ -119,52 +173,11 @@ def is_m1() -> bool:
|
|
|
119
173
|
return False
|
|
120
174
|
|
|
121
175
|
|
|
122
|
-
def is_arch_m1(arch: Architectures) -> bool:
|
|
123
|
-
return arch in [Architectures.arm64, Architectures.m1, Architectures.m2]
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> bool:
|
|
127
|
-
if graph_service_override is not None:
|
|
128
|
-
if graph_service_override == "elasticsearch":
|
|
129
|
-
click.echo("Starting with elasticsearch due to graph-service-impl param\n")
|
|
130
|
-
return False
|
|
131
|
-
if graph_service_override == "neo4j":
|
|
132
|
-
click.echo("Starting with neo4j due to graph-service-impl param\n")
|
|
133
|
-
return True
|
|
134
|
-
else:
|
|
135
|
-
click.secho(
|
|
136
|
-
graph_service_override
|
|
137
|
-
+ " is not a valid graph service option. Choose either `neo4j` or "
|
|
138
|
-
"`elasticsearch`\n",
|
|
139
|
-
fg="red",
|
|
140
|
-
)
|
|
141
|
-
raise ValueError(f"invalid graph service option: {graph_service_override}")
|
|
142
|
-
with get_docker_client() as client:
|
|
143
|
-
if len(client.volumes.list(filters={"name": "datahub_neo4jdata"})) > 0:
|
|
144
|
-
click.echo(
|
|
145
|
-
"Datahub Neo4j volume found, starting with neo4j as graph service.\n"
|
|
146
|
-
"If you want to run using elastic, run `datahub docker nuke` and re-ingest your data.\n"
|
|
147
|
-
)
|
|
148
|
-
return True
|
|
149
|
-
|
|
150
|
-
logger.debug(
|
|
151
|
-
"No Datahub Neo4j volume found, starting with elasticsearch as graph service.\n"
|
|
152
|
-
"To use neo4j as a graph backend, run \n"
|
|
153
|
-
"`datahub docker quickstart --graph-service-impl neo4j`"
|
|
154
|
-
"\nfrom the root of the datahub repo\n"
|
|
155
|
-
)
|
|
156
|
-
return False
|
|
157
|
-
|
|
158
|
-
|
|
159
176
|
def _set_environment_variables(
|
|
160
177
|
version: Optional[str],
|
|
161
|
-
mysql_version: Optional[str],
|
|
162
178
|
mysql_port: Optional[int],
|
|
163
|
-
zk_port: Optional[int],
|
|
164
179
|
kafka_broker_port: Optional[int],
|
|
165
|
-
schema_registry_port: Optional[int],
|
|
166
180
|
elastic_port: Optional[int],
|
|
167
|
-
kafka_setup: Optional[bool],
|
|
168
181
|
) -> None:
|
|
169
182
|
if version is not None:
|
|
170
183
|
if not version.startswith("v") and "." in version:
|
|
@@ -173,24 +186,25 @@ def _set_environment_variables(
|
|
|
173
186
|
)
|
|
174
187
|
version = f"v{version}"
|
|
175
188
|
os.environ["DATAHUB_VERSION"] = version
|
|
176
|
-
if mysql_version is not None:
|
|
177
|
-
os.environ["DATAHUB_MYSQL_VERSION"] = mysql_version
|
|
178
189
|
if mysql_port is not None:
|
|
179
190
|
os.environ["DATAHUB_MAPPED_MYSQL_PORT"] = str(mysql_port)
|
|
180
191
|
|
|
181
|
-
if zk_port is not None:
|
|
182
|
-
os.environ["DATAHUB_MAPPED_ZK_PORT"] = str(zk_port)
|
|
183
|
-
|
|
184
192
|
if kafka_broker_port is not None:
|
|
185
193
|
os.environ["DATAHUB_MAPPED_KAFKA_BROKER_PORT"] = str(kafka_broker_port)
|
|
186
194
|
|
|
187
|
-
if schema_registry_port is not None:
|
|
188
|
-
os.environ["DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT"] = str(schema_registry_port)
|
|
189
|
-
|
|
190
195
|
if elastic_port is not None:
|
|
191
196
|
os.environ["DATAHUB_MAPPED_ELASTIC_PORT"] = str(elastic_port)
|
|
192
|
-
|
|
193
|
-
|
|
197
|
+
|
|
198
|
+
os.environ["METADATA_SERVICE_AUTH_ENABLED"] = "false"
|
|
199
|
+
|
|
200
|
+
cliVersion = nice_version_name()
|
|
201
|
+
if is_dev_mode(): # This should only happen during development/CI.
|
|
202
|
+
cliVersion = __version__.replace(".dev0", "")
|
|
203
|
+
logger.info(
|
|
204
|
+
f"Development build: Using {cliVersion} instead of '{__version__}' version of CLI for UI ingestion"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
os.environ["UI_INGESTION_DEFAULT_CLI_VERSION"] = cliVersion
|
|
194
208
|
|
|
195
209
|
|
|
196
210
|
def _get_default_quickstart_compose_file() -> Optional[str]:
|
|
@@ -250,6 +264,8 @@ def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None:
|
|
|
250
264
|
compose = _docker_compose_v2()
|
|
251
265
|
base_command: List[str] = [
|
|
252
266
|
*compose,
|
|
267
|
+
"--profile",
|
|
268
|
+
"quickstart",
|
|
253
269
|
*itertools.chain.from_iterable(
|
|
254
270
|
("-f", f"{path}") for path in compose_files_for_stopping
|
|
255
271
|
),
|
|
@@ -346,12 +362,15 @@ EBEAN_DATASOURCE_HOST=mysql:${DATAHUB_MAPPED_MYSQL_PORT:-3306}
|
|
|
346
362
|
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:${DATAHUB_MAPPED_MYSQL_PORT:-3306}/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
|
|
347
363
|
EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
|
|
348
364
|
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
|
|
349
|
-
|
|
365
|
+
GRAPH_SERVICE_IMPL=elasticsearch
|
|
350
366
|
KAFKA_BOOTSTRAP_SERVER=broker:29092
|
|
351
|
-
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry
|
|
367
|
+
KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080${DATAHUB_GMS_BASE_PATH}/schema-registry/api/
|
|
368
|
+
SCHEMA_REGISTRY_TYPE=INTERNAL
|
|
352
369
|
|
|
353
|
-
ELASTICSEARCH_HOST=
|
|
370
|
+
ELASTICSEARCH_HOST=search
|
|
354
371
|
ELASTICSEARCH_PORT=${DATAHUB_MAPPED_ELASTIC_PORT:-9200}
|
|
372
|
+
ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
|
|
373
|
+
ELASTICSEARCH_PROTOCOL=http
|
|
355
374
|
|
|
356
375
|
#NEO4J_HOST=http://<your-neo-host>:7474
|
|
357
376
|
#NEO4J_URI=bolt://<your-neo-host>
|
|
@@ -385,6 +404,7 @@ DATAHUB_MAE_CONSUMER_PORT=9091
|
|
|
385
404
|
logger.debug(f"Env file contents: {env_fp_reader.read()}")
|
|
386
405
|
|
|
387
406
|
# continue to issue the restore indices command
|
|
407
|
+
# TODO Use --version if passed
|
|
388
408
|
command = (
|
|
389
409
|
"docker pull acryldata/datahub-upgrade:${DATAHUB_VERSION:-head}"
|
|
390
410
|
+ f" && docker run --network datahub_network --env-file {env_fp.name} "
|
|
@@ -412,12 +432,16 @@ DATAHUB_MAE_CONSUMER_PORT=9091
|
|
|
412
432
|
return result.returncode
|
|
413
433
|
|
|
414
434
|
|
|
435
|
+
# TODO: Do we really need this? If someone wants to use a different arg, they can still pass the standard docker env var DOCKER_DEFAULT_PLATFORM
|
|
436
|
+
# We dont really need to select a different image unlike earlier (mysql vs mariadb) since we do publish both archs for all images (or are available for external images).
|
|
415
437
|
def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
416
|
-
|
|
417
|
-
if
|
|
418
|
-
click.secho("Detected
|
|
438
|
+
running_on_apple_silicon = is_apple_silicon()
|
|
439
|
+
if running_on_apple_silicon:
|
|
440
|
+
click.secho("Detected Apple Silicon", fg="yellow")
|
|
419
441
|
|
|
420
|
-
quickstart_arch =
|
|
442
|
+
quickstart_arch = (
|
|
443
|
+
Architectures.x86 if not running_on_apple_silicon else Architectures.arm64
|
|
444
|
+
)
|
|
421
445
|
if arch:
|
|
422
446
|
matched_arch = [a for a in Architectures if arch.lower() == a.value]
|
|
423
447
|
if not matched_arch:
|
|
@@ -437,13 +461,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
437
461
|
default="default",
|
|
438
462
|
help="Datahub version to be deployed. If not set, deploy using the defaults from the quickstart compose. Use 'stable' to start the latest stable version.",
|
|
439
463
|
)
|
|
440
|
-
@click.option(
|
|
441
|
-
"--build-locally",
|
|
442
|
-
type=bool,
|
|
443
|
-
is_flag=True,
|
|
444
|
-
default=False,
|
|
445
|
-
help="Attempt to build the containers locally before starting",
|
|
446
|
-
)
|
|
447
464
|
@click.option(
|
|
448
465
|
"--pull-images/--no-pull-images",
|
|
449
466
|
type=bool,
|
|
@@ -466,13 +483,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
466
483
|
default=False,
|
|
467
484
|
help="If true, the docker-compose logs will be printed to console if something fails",
|
|
468
485
|
)
|
|
469
|
-
@click.option(
|
|
470
|
-
"--graph-service-impl",
|
|
471
|
-
type=str,
|
|
472
|
-
is_flag=False,
|
|
473
|
-
default=None,
|
|
474
|
-
help="If set, forces docker-compose to use that graph service implementation",
|
|
475
|
-
)
|
|
476
486
|
@click.option(
|
|
477
487
|
"--mysql-port",
|
|
478
488
|
type=_ClickPositiveInt,
|
|
@@ -480,13 +490,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
480
490
|
default=None,
|
|
481
491
|
help="If there is an existing mysql instance running on port 3306, set this to a free port to avoid port conflicts on startup",
|
|
482
492
|
)
|
|
483
|
-
@click.option(
|
|
484
|
-
"--zk-port",
|
|
485
|
-
type=_ClickPositiveInt,
|
|
486
|
-
is_flag=False,
|
|
487
|
-
default=None,
|
|
488
|
-
help="If there is an existing zookeeper instance running on port 2181, set this to a free port to avoid port conflicts on startup",
|
|
489
|
-
)
|
|
490
493
|
@click.option(
|
|
491
494
|
"--kafka-broker-port",
|
|
492
495
|
type=_ClickPositiveInt,
|
|
@@ -494,13 +497,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
494
497
|
default=None,
|
|
495
498
|
help="If there is an existing Kafka broker running on port 9092, set this to a free port to avoid port conflicts on startup",
|
|
496
499
|
)
|
|
497
|
-
@click.option(
|
|
498
|
-
"--schema-registry-port",
|
|
499
|
-
type=_ClickPositiveInt,
|
|
500
|
-
is_flag=False,
|
|
501
|
-
default=None,
|
|
502
|
-
help="If there is an existing process running on port 8081, set this to a free port to avoid port conflicts with Kafka schema registry on startup",
|
|
503
|
-
)
|
|
504
500
|
@click.option(
|
|
505
501
|
"--elastic-port",
|
|
506
502
|
type=_ClickPositiveInt,
|
|
@@ -558,51 +554,29 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
558
554
|
default=False,
|
|
559
555
|
help="Disables the restoration of indices of a running quickstart instance when used in conjunction with --restore.",
|
|
560
556
|
)
|
|
561
|
-
@click.option(
|
|
562
|
-
"--standalone_consumers",
|
|
563
|
-
required=False,
|
|
564
|
-
is_flag=True,
|
|
565
|
-
default=False,
|
|
566
|
-
help="Launches MAE & MCE consumers as stand alone docker containers",
|
|
567
|
-
)
|
|
568
|
-
@click.option(
|
|
569
|
-
"--kafka-setup",
|
|
570
|
-
required=False,
|
|
571
|
-
is_flag=True,
|
|
572
|
-
default=False,
|
|
573
|
-
help="Launches Kafka setup job as part of the compose deployment",
|
|
574
|
-
)
|
|
575
557
|
@click.option(
|
|
576
558
|
"--arch",
|
|
577
559
|
required=False,
|
|
578
560
|
help="Specify the architecture for the quickstart images to use. Options are x86, arm64, m1 etc.",
|
|
579
561
|
)
|
|
580
|
-
@upgrade.check_upgrade
|
|
581
562
|
@telemetry.with_telemetry(
|
|
582
563
|
capture_kwargs=[
|
|
583
564
|
"version",
|
|
584
|
-
"build_locally",
|
|
585
565
|
"pull_images",
|
|
586
566
|
"stop",
|
|
587
567
|
"backup",
|
|
588
568
|
"restore",
|
|
589
569
|
"restore_indices",
|
|
590
|
-
"standalone_consumers",
|
|
591
|
-
"kafka_setup",
|
|
592
570
|
"arch",
|
|
593
571
|
]
|
|
594
572
|
)
|
|
595
573
|
def quickstart(
|
|
596
574
|
version: Optional[str],
|
|
597
|
-
build_locally: bool,
|
|
598
575
|
pull_images: bool,
|
|
599
576
|
quickstart_compose_file: List[pathlib.Path],
|
|
600
577
|
dump_logs_on_failure: bool,
|
|
601
|
-
graph_service_impl: Optional[str],
|
|
602
578
|
mysql_port: Optional[int],
|
|
603
|
-
zk_port: Optional[int],
|
|
604
579
|
kafka_broker_port: Optional[int],
|
|
605
|
-
schema_registry_port: Optional[int],
|
|
606
580
|
elastic_port: Optional[int],
|
|
607
581
|
stop: bool,
|
|
608
582
|
backup: bool,
|
|
@@ -611,8 +585,6 @@ def quickstart(
|
|
|
611
585
|
restore_file: str,
|
|
612
586
|
restore_indices: bool,
|
|
613
587
|
no_restore_indices: bool,
|
|
614
|
-
standalone_consumers: bool,
|
|
615
|
-
kafka_setup: bool,
|
|
616
588
|
arch: Optional[str],
|
|
617
589
|
) -> None:
|
|
618
590
|
"""Start an instance of DataHub locally using docker-compose.
|
|
@@ -641,8 +613,8 @@ def quickstart(
|
|
|
641
613
|
)
|
|
642
614
|
return
|
|
643
615
|
|
|
644
|
-
quickstart_arch = detect_quickstart_arch(arch)
|
|
645
616
|
quickstart_versioning = QuickstartVersionMappingConfig.fetch_quickstart_config()
|
|
617
|
+
|
|
646
618
|
quickstart_execution_plan = quickstart_versioning.get_quickstart_execution_plan(
|
|
647
619
|
version
|
|
648
620
|
)
|
|
@@ -668,28 +640,26 @@ def quickstart(
|
|
|
668
640
|
download_compose_files(
|
|
669
641
|
quickstart_compose_file_name,
|
|
670
642
|
quickstart_compose_file,
|
|
671
|
-
graph_service_impl,
|
|
672
|
-
kafka_setup,
|
|
673
|
-
quickstart_arch,
|
|
674
|
-
standalone_consumers,
|
|
675
643
|
quickstart_execution_plan.composefile_git_ref,
|
|
676
644
|
)
|
|
677
645
|
|
|
646
|
+
# check if running datahub can be upgraded to the latest version.
|
|
647
|
+
if not _check_upgrade_and_show_instructions(quickstart_compose_file):
|
|
648
|
+
sys.exit(1)
|
|
649
|
+
|
|
678
650
|
# set version
|
|
679
651
|
_set_environment_variables(
|
|
680
652
|
version=quickstart_execution_plan.docker_tag,
|
|
681
|
-
mysql_version=quickstart_execution_plan.mysql_tag,
|
|
682
653
|
mysql_port=mysql_port,
|
|
683
|
-
zk_port=zk_port,
|
|
684
654
|
kafka_broker_port=kafka_broker_port,
|
|
685
|
-
schema_registry_port=schema_registry_port,
|
|
686
655
|
elastic_port=elastic_port,
|
|
687
|
-
kafka_setup=kafka_setup,
|
|
688
656
|
)
|
|
689
657
|
|
|
690
658
|
compose = _docker_compose_v2()
|
|
691
659
|
base_command: List[str] = [
|
|
692
660
|
*compose,
|
|
661
|
+
"--profile",
|
|
662
|
+
"quickstart",
|
|
693
663
|
*itertools.chain.from_iterable(
|
|
694
664
|
("-f", f"{path}") for path in quickstart_compose_file
|
|
695
665
|
),
|
|
@@ -697,6 +667,8 @@ def quickstart(
|
|
|
697
667
|
DOCKER_COMPOSE_PROJECT_NAME,
|
|
698
668
|
]
|
|
699
669
|
|
|
670
|
+
click.echo(f"base_command: {base_command}")
|
|
671
|
+
|
|
700
672
|
# Pull and possibly build the latest containers.
|
|
701
673
|
try:
|
|
702
674
|
if pull_images:
|
|
@@ -737,15 +709,6 @@ def quickstart(
|
|
|
737
709
|
fg="red",
|
|
738
710
|
)
|
|
739
711
|
|
|
740
|
-
if build_locally:
|
|
741
|
-
logger.info("Building docker images locally...")
|
|
742
|
-
subprocess.run(
|
|
743
|
-
base_command + ["build", "--pull", "-q"],
|
|
744
|
-
check=True,
|
|
745
|
-
env=_docker_subprocess_env(),
|
|
746
|
-
)
|
|
747
|
-
logger.info("Finished building docker images!")
|
|
748
|
-
|
|
749
712
|
# Start it up! (with retries)
|
|
750
713
|
click.echo("\nStarting up DataHub...")
|
|
751
714
|
start_time = datetime.datetime.now()
|
|
@@ -830,42 +793,24 @@ def quickstart(
|
|
|
830
793
|
|
|
831
794
|
|
|
832
795
|
def get_docker_compose_base_url(version_tag: str) -> str:
|
|
833
|
-
|
|
834
|
-
|
|
796
|
+
docker_compose_base = get_docker_compose_base()
|
|
797
|
+
if docker_compose_base:
|
|
798
|
+
return docker_compose_base
|
|
835
799
|
|
|
836
800
|
return f"https://raw.githubusercontent.com/datahub-project/datahub/{version_tag}"
|
|
837
801
|
|
|
838
802
|
|
|
839
|
-
def get_github_file_url(
|
|
803
|
+
def get_github_file_url(release_version_tag: str) -> str:
|
|
840
804
|
base_url = get_docker_compose_base_url(release_version_tag)
|
|
841
|
-
|
|
842
|
-
github_file = (
|
|
843
|
-
f"{base_url}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
|
|
844
|
-
if not is_m1
|
|
845
|
-
else f"{base_url}/{NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
|
|
846
|
-
)
|
|
847
|
-
else:
|
|
848
|
-
github_file = (
|
|
849
|
-
f"{base_url}/{ELASTIC_QUICKSTART_COMPOSE_FILE}"
|
|
850
|
-
if not is_m1
|
|
851
|
-
else f"{base_url}/{ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
|
|
852
|
-
)
|
|
805
|
+
github_file = f"{base_url}/{QUICKSTART_COMPOSE_FILE}"
|
|
853
806
|
return github_file
|
|
854
807
|
|
|
855
808
|
|
|
856
809
|
def download_compose_files(
|
|
857
|
-
quickstart_compose_file_name,
|
|
858
|
-
quickstart_compose_file_list,
|
|
859
|
-
graph_service_impl,
|
|
860
|
-
kafka_setup,
|
|
861
|
-
quickstart_arch,
|
|
862
|
-
standalone_consumers,
|
|
863
|
-
compose_git_ref,
|
|
810
|
+
quickstart_compose_file_name, quickstart_compose_file_list, compose_git_ref
|
|
864
811
|
):
|
|
865
812
|
# download appropriate quickstart file
|
|
866
|
-
|
|
867
|
-
is_m1 = is_arch_m1(quickstart_arch)
|
|
868
|
-
github_file = get_github_file_url(should_use_neo4j, is_m1, compose_git_ref)
|
|
813
|
+
github_file = get_github_file_url(compose_git_ref)
|
|
869
814
|
# also allow local files
|
|
870
815
|
request_session = requests.Session()
|
|
871
816
|
request_session.mount("file://", FileAdapter())
|
|
@@ -879,57 +824,14 @@ def download_compose_files(
|
|
|
879
824
|
logger.info(f"Fetching docker-compose file {github_file} from GitHub")
|
|
880
825
|
# Download the quickstart docker-compose file from GitHub.
|
|
881
826
|
quickstart_download_response = request_session.get(github_file)
|
|
827
|
+
if quickstart_download_response.status_code == 404:
|
|
828
|
+
raise click.ClickException(
|
|
829
|
+
f"Could not find quickstart compose file for version {compose_git_ref}. "
|
|
830
|
+
"Please try a different version or check the version exists at https://github.com/datahub-project/datahub/releases"
|
|
831
|
+
)
|
|
882
832
|
quickstart_download_response.raise_for_status()
|
|
883
833
|
tmp_file.write(quickstart_download_response.content)
|
|
884
834
|
logger.debug(f"Copied to {path}")
|
|
885
|
-
if standalone_consumers:
|
|
886
|
-
base_url = get_docker_compose_base_url(compose_git_ref)
|
|
887
|
-
consumer_github_file = (
|
|
888
|
-
f"{base_url}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}"
|
|
889
|
-
if should_use_neo4j
|
|
890
|
-
else f"{base_url}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}"
|
|
891
|
-
)
|
|
892
|
-
|
|
893
|
-
default_consumer_compose_file = (
|
|
894
|
-
Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml"
|
|
895
|
-
)
|
|
896
|
-
with (
|
|
897
|
-
open(default_consumer_compose_file, "wb")
|
|
898
|
-
if default_consumer_compose_file
|
|
899
|
-
else tempfile.NamedTemporaryFile(suffix=".yml", delete=False)
|
|
900
|
-
) as tmp_file:
|
|
901
|
-
path = pathlib.Path(tmp_file.name)
|
|
902
|
-
quickstart_compose_file_list.append(path)
|
|
903
|
-
click.echo(
|
|
904
|
-
f"Fetching consumer docker-compose file {consumer_github_file} from GitHub"
|
|
905
|
-
)
|
|
906
|
-
# Download the quickstart docker-compose file from GitHub.
|
|
907
|
-
quickstart_download_response = request_session.get(consumer_github_file)
|
|
908
|
-
quickstart_download_response.raise_for_status()
|
|
909
|
-
tmp_file.write(quickstart_download_response.content)
|
|
910
|
-
logger.debug(f"Copied to {path}")
|
|
911
|
-
if kafka_setup:
|
|
912
|
-
base_url = get_docker_compose_base_url(compose_git_ref)
|
|
913
|
-
kafka_setup_github_file = f"{base_url}/{KAFKA_SETUP_QUICKSTART_COMPOSE_FILE}"
|
|
914
|
-
|
|
915
|
-
default_kafka_compose_file = (
|
|
916
|
-
Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.kafka-setup.yml"
|
|
917
|
-
)
|
|
918
|
-
with (
|
|
919
|
-
open(default_kafka_compose_file, "wb")
|
|
920
|
-
if default_kafka_compose_file
|
|
921
|
-
else tempfile.NamedTemporaryFile(suffix=".yml", delete=False)
|
|
922
|
-
) as tmp_file:
|
|
923
|
-
path = pathlib.Path(tmp_file.name)
|
|
924
|
-
quickstart_compose_file_list.append(path)
|
|
925
|
-
click.echo(
|
|
926
|
-
f"Fetching consumer docker-compose file {kafka_setup_github_file} from GitHub"
|
|
927
|
-
)
|
|
928
|
-
# Download the quickstart docker-compose file from GitHub.
|
|
929
|
-
quickstart_download_response = request_session.get(kafka_setup_github_file)
|
|
930
|
-
quickstart_download_response.raise_for_status()
|
|
931
|
-
tmp_file.write(quickstart_download_response.content)
|
|
932
|
-
logger.debug(f"Copied to {path}")
|
|
933
835
|
|
|
934
836
|
|
|
935
837
|
def valid_restore_options(
|
|
@@ -963,7 +865,7 @@ def valid_restore_options(
|
|
|
963
865
|
default=None,
|
|
964
866
|
help="The token to be used when ingesting, used when datahub is deployed with METADATA_SERVICE_AUTH_ENABLED=true",
|
|
965
867
|
)
|
|
966
|
-
@
|
|
868
|
+
@upgrade.check_upgrade
|
|
967
869
|
def ingest_sample_data(token: Optional[str]) -> None:
|
|
968
870
|
"""Ingest sample data into a running DataHub instance."""
|
|
969
871
|
|
|
@@ -1031,3 +933,25 @@ def nuke(keep_data: bool) -> None:
|
|
|
1031
933
|
click.echo(f"Removing networks in the {DOCKER_COMPOSE_PROJECT_NAME} project")
|
|
1032
934
|
for network in client.networks.list(filters=DATAHUB_COMPOSE_PROJECT_FILTER):
|
|
1033
935
|
network.remove()
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
def _check_upgrade_and_show_instructions(
|
|
939
|
+
quickstart_compose_file: List[pathlib.Path],
|
|
940
|
+
) -> bool:
|
|
941
|
+
"""Check if running datahub can be upgraded to the latest version and show appropriate instructions.
|
|
942
|
+
|
|
943
|
+
Args:
|
|
944
|
+
quickstart_compose_file: List of compose file paths
|
|
945
|
+
|
|
946
|
+
Returns:
|
|
947
|
+
bool: True if upgrade is supported, False otherwise
|
|
948
|
+
"""
|
|
949
|
+
quickstart_status = check_docker_quickstart()
|
|
950
|
+
|
|
951
|
+
if not check_upgrade_supported(quickstart_compose_file, quickstart_status):
|
|
952
|
+
if quickstart_status.is_ok():
|
|
953
|
+
show_migration_instructions()
|
|
954
|
+
else:
|
|
955
|
+
show_repair_instructions()
|
|
956
|
+
return False
|
|
957
|
+
return True
|
datahub/cli/exists_cli.py
CHANGED
|
@@ -7,7 +7,6 @@ from click_default_group import DefaultGroup
|
|
|
7
7
|
|
|
8
8
|
from datahub.ingestion.graph.client import get_default_graph
|
|
9
9
|
from datahub.ingestion.graph.config import ClientMode
|
|
10
|
-
from datahub.telemetry import telemetry
|
|
11
10
|
from datahub.upgrade import upgrade
|
|
12
11
|
|
|
13
12
|
logger = logging.getLogger(__name__)
|
|
@@ -23,7 +22,6 @@ def exists() -> None:
|
|
|
23
22
|
@click.option("--urn", required=False, type=str)
|
|
24
23
|
@click.pass_context
|
|
25
24
|
@upgrade.check_upgrade
|
|
26
|
-
@telemetry.with_telemetry()
|
|
27
25
|
def urn(ctx: Any, urn: Optional[str]) -> None:
|
|
28
26
|
"""
|
|
29
27
|
Get metadata for an entity with an optional list of aspects to project.
|
datahub/cli/get_cli.py
CHANGED
|
@@ -8,7 +8,6 @@ from click_default_group import DefaultGroup
|
|
|
8
8
|
from datahub.cli.cli_utils import get_aspects_for_entity
|
|
9
9
|
from datahub.ingestion.graph.client import get_default_graph
|
|
10
10
|
from datahub.ingestion.graph.config import ClientMode
|
|
11
|
-
from datahub.telemetry import telemetry
|
|
12
11
|
from datahub.upgrade import upgrade
|
|
13
12
|
|
|
14
13
|
logger = logging.getLogger(__name__)
|
|
@@ -32,7 +31,6 @@ def get() -> None:
|
|
|
32
31
|
)
|
|
33
32
|
@click.pass_context
|
|
34
33
|
@upgrade.check_upgrade
|
|
35
|
-
@telemetry.with_telemetry()
|
|
36
34
|
def urn(ctx: Any, urn: Optional[str], aspect: List[str], details: bool) -> None:
|
|
37
35
|
"""
|
|
38
36
|
Get metadata for an entity with an optional list of aspects to project.
|