openmetadata-ingestion 1.3.0.1__py3-none-any.whl → 1.3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openmetadata-ingestion might be problematic. Click here for more details.
- metadata/cli/db_dump.py +1 -0
- metadata/data_insight/processor/reports/cost_analysis_report_data_processor.py +39 -47
- metadata/data_insight/processor/reports/data_processor.py +1 -0
- metadata/data_insight/producer/cost_analysis_producer.py +78 -14
- metadata/data_insight/producer/entity_producer.py +1 -1
- metadata/data_insight/producer/producer_interface.py +1 -1
- metadata/data_insight/producer/web_analytics_producer.py +1 -1
- metadata/data_insight/source/metadata.py +10 -1
- metadata/data_quality/validations/table/base/tableColumnToMatchSet.py +2 -1
- metadata/data_quality/validations/table/pandas/tableColumnToMatchSet.py +2 -1
- metadata/data_quality/validations/table/sqlalchemy/tableColumnToMatchSet.py +7 -2
- metadata/examples/workflows/bigtable.yaml +32 -0
- metadata/generated/antlr/EntityLinkLexer.py +353 -319
- metadata/generated/schema/analytics/__init__.py +1 -1
- metadata/generated/schema/analytics/basic.py +1 -1
- metadata/generated/schema/analytics/reportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
- metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
- metadata/generated/schema/api/__init__.py +1 -1
- metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
- metadata/generated/schema/api/analytics/__init__.py +1 -1
- metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
- metadata/generated/schema/api/automations/__init__.py +1 -1
- metadata/generated/schema/api/automations/createWorkflow.py +1 -1
- metadata/generated/schema/api/bulkAssets.py +1 -1
- metadata/generated/schema/api/classification/__init__.py +1 -1
- metadata/generated/schema/api/classification/createClassification.py +1 -1
- metadata/generated/schema/api/classification/createTag.py +1 -1
- metadata/generated/schema/api/classification/loadTags.py +1 -1
- metadata/generated/schema/api/createBot.py +1 -1
- metadata/generated/schema/api/createEventPublisherJob.py +2 -2
- metadata/generated/schema/api/createType.py +1 -1
- metadata/generated/schema/api/data/__init__.py +1 -1
- metadata/generated/schema/api/data/createChart.py +1 -1
- metadata/generated/schema/api/data/createContainer.py +1 -1
- metadata/generated/schema/api/data/createCustomProperty.py +6 -10
- metadata/generated/schema/api/data/createDashboard.py +1 -1
- metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
- metadata/generated/schema/api/data/createDatabase.py +1 -1
- metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
- metadata/generated/schema/api/data/createGlossary.py +1 -1
- metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
- metadata/generated/schema/api/data/createMlModel.py +1 -1
- metadata/generated/schema/api/data/createPipeline.py +1 -1
- metadata/generated/schema/api/data/createQuery.py +1 -1
- metadata/generated/schema/api/data/createSearchIndex.py +1 -1
- metadata/generated/schema/api/data/createStoredProcedure.py +1 -1
- metadata/generated/schema/api/data/createTable.py +1 -1
- metadata/generated/schema/api/data/createTableProfile.py +1 -1
- metadata/generated/schema/api/data/createTopic.py +1 -1
- metadata/generated/schema/api/data/loadGlossary.py +1 -1
- metadata/generated/schema/api/data/restoreEntity.py +1 -1
- metadata/generated/schema/api/dataInsight/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
- metadata/generated/schema/api/docStore/__init__.py +1 -1
- metadata/generated/schema/api/docStore/createDocument.py +1 -1
- metadata/generated/schema/api/domains/__init__.py +1 -1
- metadata/generated/schema/api/domains/createDataProduct.py +1 -1
- metadata/generated/schema/api/domains/createDomain.py +1 -1
- metadata/generated/schema/api/feed/__init__.py +1 -1
- metadata/generated/schema/api/feed/closeTask.py +1 -1
- metadata/generated/schema/api/feed/createPost.py +1 -1
- metadata/generated/schema/api/feed/createSuggestion.py +1 -1
- metadata/generated/schema/api/feed/createThread.py +1 -1
- metadata/generated/schema/api/feed/resolveTask.py +1 -1
- metadata/generated/schema/api/feed/threadCount.py +1 -1
- metadata/generated/schema/api/lineage/__init__.py +1 -1
- metadata/generated/schema/api/lineage/addLineage.py +1 -1
- metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
- metadata/generated/schema/api/policies/__init__.py +1 -1
- metadata/generated/schema/api/policies/createPolicy.py +1 -1
- metadata/generated/schema/api/services/__init__.py +1 -1
- metadata/generated/schema/api/services/createDashboardService.py +1 -1
- metadata/generated/schema/api/services/createDatabaseService.py +1 -1
- metadata/generated/schema/api/services/createMessagingService.py +1 -1
- metadata/generated/schema/api/services/createMetadataService.py +1 -1
- metadata/generated/schema/api/services/createMlModelService.py +1 -1
- metadata/generated/schema/api/services/createPipelineService.py +1 -1
- metadata/generated/schema/api/services/createSearchService.py +1 -1
- metadata/generated/schema/api/services/createStorageService.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +1 -1
- metadata/generated/schema/api/setOwner.py +1 -1
- metadata/generated/schema/api/teams/__init__.py +1 -1
- metadata/generated/schema/api/teams/createPersona.py +1 -1
- metadata/generated/schema/api/teams/createRole.py +1 -1
- metadata/generated/schema/api/teams/createTeam.py +1 -1
- metadata/generated/schema/api/teams/createUser.py +1 -1
- metadata/generated/schema/api/tests/__init__.py +1 -1
- metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
- metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
- metadata/generated/schema/api/tests/createTestCase.py +1 -1
- metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
- metadata/generated/schema/api/tests/createTestDefinition.py +2 -1
- metadata/generated/schema/api/tests/createTestSuite.py +1 -1
- metadata/generated/schema/api/voteRequest.py +1 -1
- metadata/generated/schema/auth/__init__.py +1 -1
- metadata/generated/schema/auth/basicAuth.py +1 -1
- metadata/generated/schema/auth/basicLoginRequest.py +1 -1
- metadata/generated/schema/auth/changePasswordRequest.py +1 -1
- metadata/generated/schema/auth/createPersonalToken.py +1 -1
- metadata/generated/schema/auth/emailRequest.py +1 -1
- metadata/generated/schema/auth/emailVerificationToken.py +1 -1
- metadata/generated/schema/auth/generateToken.py +1 -1
- metadata/generated/schema/auth/jwtAuth.py +1 -1
- metadata/generated/schema/auth/loginRequest.py +1 -1
- metadata/generated/schema/auth/logoutRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetToken.py +1 -1
- metadata/generated/schema/auth/personalAccessToken.py +1 -1
- metadata/generated/schema/auth/refreshToken.py +1 -1
- metadata/generated/schema/auth/registrationRequest.py +1 -1
- metadata/generated/schema/auth/revokePersonalToken.py +1 -1
- metadata/generated/schema/auth/revokeToken.py +1 -1
- metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
- metadata/generated/schema/auth/ssoAuth.py +1 -1
- metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
- metadata/generated/schema/configuration/__init__.py +1 -1
- metadata/generated/schema/configuration/appsPrivateConfiguration.py +1 -1
- metadata/generated/schema/configuration/authConfig.py +1 -1
- metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
- metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
- metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/dataQualityConfiguration.py +16 -0
- metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
- metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
- metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
- metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
- metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
- metadata/generated/schema/configuration/loginConfiguration.py +1 -1
- metadata/generated/schema/configuration/logoConfiguration.py +1 -1
- metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
- metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
- metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
- metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
- metadata/generated/schema/dataInsight/__init__.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
- metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
- metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
- metadata/generated/schema/dataInsight/type/__init__.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithDescriptionByType.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithOwnerByType.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfServicesWithDescription.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfServicesWithOwner.py +1 -1
- metadata/generated/schema/dataInsight/type/totalEntitiesByTier.py +1 -1
- metadata/generated/schema/dataInsight/type/totalEntitiesByType.py +1 -1
- metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
- metadata/generated/schema/email/__init__.py +1 -1
- metadata/generated/schema/email/emailRequest.py +1 -1
- metadata/generated/schema/email/smtpSettings.py +1 -1
- metadata/generated/schema/entity/__init__.py +1 -1
- metadata/generated/schema/entity/applications/__init__.py +1 -1
- metadata/generated/schema/entity/applications/app.py +1 -1
- metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
- metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/applicationConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/autoTaggerAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/metaPilotAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +2 -2
- metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/metaPilotAppPrivateConfig.py +1 -1
- metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
- metadata/generated/schema/entity/applications/jobStatus.py +1 -1
- metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +1 -1
- metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
- metadata/generated/schema/entity/automations/__init__.py +1 -1
- metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
- metadata/generated/schema/entity/automations/workflow.py +1 -1
- metadata/generated/schema/entity/bot.py +1 -1
- metadata/generated/schema/entity/classification/__init__.py +1 -1
- metadata/generated/schema/entity/classification/classification.py +1 -1
- metadata/generated/schema/entity/classification/tag.py +1 -1
- metadata/generated/schema/entity/data/__init__.py +1 -1
- metadata/generated/schema/entity/data/chart.py +1 -1
- metadata/generated/schema/entity/data/container.py +1 -1
- metadata/generated/schema/entity/data/dashboard.py +1 -1
- metadata/generated/schema/entity/data/dashboardDataModel.py +1 -1
- metadata/generated/schema/entity/data/database.py +1 -1
- metadata/generated/schema/entity/data/databaseSchema.py +1 -1
- metadata/generated/schema/entity/data/glossary.py +1 -1
- metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
- metadata/generated/schema/entity/data/metrics.py +1 -1
- metadata/generated/schema/entity/data/mlmodel.py +2 -2
- metadata/generated/schema/entity/data/pipeline.py +1 -1
- metadata/generated/schema/entity/data/query.py +1 -1
- metadata/generated/schema/entity/data/report.py +1 -1
- metadata/generated/schema/entity/data/searchIndex.py +1 -1
- metadata/generated/schema/entity/data/storedProcedure.py +1 -1
- metadata/generated/schema/entity/data/table.py +1 -1
- metadata/generated/schema/entity/data/topic.py +1 -1
- metadata/generated/schema/entity/docStore/__init__.py +1 -1
- metadata/generated/schema/entity/docStore/document.py +1 -1
- metadata/generated/schema/entity/domains/__init__.py +1 -1
- metadata/generated/schema/entity/domains/dataProduct.py +1 -1
- metadata/generated/schema/entity/domains/domain.py +1 -1
- metadata/generated/schema/entity/events/__init__.py +1 -1
- metadata/generated/schema/entity/events/webhook.py +1 -1
- metadata/generated/schema/entity/feed/__init__.py +1 -1
- metadata/generated/schema/entity/feed/suggestion.py +1 -1
- metadata/generated/schema/entity/feed/thread.py +1 -1
- metadata/generated/schema/entity/policies/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
- metadata/generated/schema/entity/policies/filters.py +1 -1
- metadata/generated/schema/entity/policies/policy.py +1 -1
- metadata/generated/schema/entity/services/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/connectionBasicType.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/mstrConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +41 -0
- metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +8 -6
- metadata/generated/schema/entity/services/connections/database/postgresConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +14 -2
- metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +14 -2
- metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +2 -2
- metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
- metadata/generated/schema/entity/services/dashboardService.py +1 -1
- metadata/generated/schema/entity/services/databaseService.py +4 -1
- metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
- metadata/generated/schema/entity/services/messagingService.py +1 -1
- metadata/generated/schema/entity/services/metadataService.py +1 -1
- metadata/generated/schema/entity/services/mlmodelService.py +1 -1
- metadata/generated/schema/entity/services/pipelineService.py +1 -1
- metadata/generated/schema/entity/services/searchService.py +1 -1
- metadata/generated/schema/entity/services/serviceType.py +1 -1
- metadata/generated/schema/entity/services/storageService.py +1 -1
- metadata/generated/schema/entity/teams/__init__.py +1 -1
- metadata/generated/schema/entity/teams/persona.py +1 -1
- metadata/generated/schema/entity/teams/role.py +1 -1
- metadata/generated/schema/entity/teams/team.py +1 -1
- metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
- metadata/generated/schema/entity/teams/user.py +1 -1
- metadata/generated/schema/entity/type.py +3 -18
- metadata/generated/schema/entity/utils/__init__.py +1 -1
- metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
- metadata/generated/schema/entity/utils/servicesCount.py +1 -1
- metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
- metadata/generated/schema/events/__init__.py +1 -1
- metadata/generated/schema/events/alertMetrics.py +1 -1
- metadata/generated/schema/events/api/__init__.py +1 -1
- metadata/generated/schema/events/api/createEventSubscription.py +1 -1
- metadata/generated/schema/events/emailAlertConfig.py +1 -1
- metadata/generated/schema/events/eventFilterRule.py +1 -1
- metadata/generated/schema/events/eventSubscription.py +3 -2
- metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
- metadata/generated/schema/events/failedEvent.py +1 -1
- metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
- metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
- metadata/generated/schema/metadataIngestion/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/application.py +1 -1
- metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/workflow.py +1 -1
- metadata/generated/schema/monitoring/__init__.py +1 -1
- metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
- metadata/generated/schema/security/__init__.py +1 -1
- metadata/generated/schema/security/client/__init__.py +1 -1
- metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
- metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
- metadata/generated/schema/security/credentials/__init__.py +1 -1
- metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
- metadata/generated/schema/security/credentials/azureCredentials.py +16 -9
- metadata/generated/schema/security/credentials/basicAuth.py +1 -1
- metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpValues.py +1 -1
- metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
- metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
- metadata/generated/schema/security/secrets/__init__.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerProvider.py +3 -1
- metadata/generated/schema/security/securityConfiguration.py +1 -1
- metadata/generated/schema/security/ssl/__init__.py +1 -1
- metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
- metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
- metadata/generated/schema/settings/__init__.py +1 -1
- metadata/generated/schema/settings/settings.py +1 -1
- metadata/generated/schema/system/__init__.py +1 -1
- metadata/generated/schema/system/entityError.py +1 -1
- metadata/generated/schema/system/eventPublisherJob.py +1 -1
- metadata/generated/schema/system/indexingError.py +1 -1
- metadata/generated/schema/system/ui/__init__.py +1 -1
- metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
- metadata/generated/schema/system/ui/page.py +1 -1
- metadata/generated/schema/tests/__init__.py +1 -1
- metadata/generated/schema/tests/assigned.py +1 -1
- metadata/generated/schema/tests/basic.py +1 -1
- metadata/generated/schema/tests/customMetric.py +1 -1
- metadata/generated/schema/tests/resolved.py +1 -1
- metadata/generated/schema/tests/testCase.py +1 -1
- metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
- metadata/generated/schema/tests/testDefinition.py +2 -1
- metadata/generated/schema/tests/testSuite.py +1 -1
- metadata/generated/schema/type/__init__.py +1 -1
- metadata/generated/schema/type/auditLog.py +1 -1
- metadata/generated/schema/type/basic.py +6 -2
- metadata/generated/schema/type/bulkOperationResult.py +1 -1
- metadata/generated/schema/type/changeEvent.py +1 -1
- metadata/generated/schema/type/changeEventType.py +1 -1
- metadata/generated/schema/type/collectionDescriptor.py +1 -1
- metadata/generated/schema/type/csvDocumentation.py +1 -1
- metadata/generated/schema/type/csvErrorType.py +1 -1
- metadata/generated/schema/type/csvFile.py +1 -1
- metadata/generated/schema/type/csvImportResult.py +1 -1
- metadata/generated/schema/type/customProperties/__init__.py +3 -0
- metadata/generated/schema/type/customProperties/enumConfig.py +17 -0
- metadata/generated/schema/type/customProperty.py +52 -0
- metadata/generated/schema/type/dailyCount.py +1 -1
- metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
- metadata/generated/schema/type/entityHistory.py +1 -1
- metadata/generated/schema/type/entityLineage.py +1 -1
- metadata/generated/schema/type/entityReference.py +1 -1
- metadata/generated/schema/type/entityReferenceList.py +1 -1
- metadata/generated/schema/type/entityRelationship.py +1 -1
- metadata/generated/schema/type/entityUsage.py +1 -1
- metadata/generated/schema/type/filterPattern.py +1 -1
- metadata/generated/schema/type/function.py +1 -1
- metadata/generated/schema/type/include.py +1 -1
- metadata/generated/schema/type/jdbcConnection.py +1 -1
- metadata/generated/schema/type/lifeCycle.py +1 -1
- metadata/generated/schema/type/paging.py +1 -1
- metadata/generated/schema/type/profile.py +1 -1
- metadata/generated/schema/type/queryParserData.py +1 -1
- metadata/generated/schema/type/reaction.py +1 -1
- metadata/generated/schema/type/schedule.py +1 -1
- metadata/generated/schema/type/schema.py +1 -1
- metadata/generated/schema/type/tableQuery.py +1 -1
- metadata/generated/schema/type/tableUsageCount.py +1 -1
- metadata/generated/schema/type/tagLabel.py +1 -1
- metadata/generated/schema/type/usageDetails.py +1 -1
- metadata/generated/schema/type/usageRequest.py +1 -1
- metadata/generated/schema/type/votes.py +1 -1
- metadata/great_expectations/action.py +4 -15
- metadata/ingestion/api/steps.py +14 -1
- metadata/ingestion/api/topology_runner.py +4 -1
- metadata/ingestion/models/custom_properties.py +0 -1
- metadata/ingestion/ometa/client.py +6 -0
- metadata/ingestion/ometa/mixins/custom_property_mixin.py +11 -11
- metadata/ingestion/ometa/ometa_api.py +1 -1
- metadata/ingestion/sink/metadata_rest.py +2 -2
- metadata/ingestion/source/dashboard/looker/metadata.py +3 -4
- metadata/ingestion/source/dashboard/metabase/client.py +4 -0
- metadata/ingestion/source/dashboard/metabase/metadata.py +5 -4
- metadata/ingestion/source/dashboard/metabase/models.py +2 -2
- metadata/ingestion/source/dashboard/tableau/metadata.py +18 -0
- metadata/ingestion/source/database/bigquery/helper.py +68 -1
- metadata/ingestion/source/database/bigquery/metadata.py +12 -3
- metadata/ingestion/source/database/bigquery/queries.py +22 -0
- metadata/ingestion/source/database/bigtable/client.py +62 -0
- metadata/ingestion/source/database/bigtable/connection.py +116 -0
- metadata/ingestion/source/database/bigtable/metadata.py +224 -0
- metadata/ingestion/source/database/bigtable/models.py +60 -0
- metadata/ingestion/source/database/common_db_source.py +2 -2
- metadata/ingestion/source/database/common_nosql_source.py +19 -2
- metadata/ingestion/source/database/databricks/metadata.py +132 -46
- metadata/ingestion/source/database/databricks/queries.py +3 -4
- metadata/ingestion/source/database/dbt/metadata.py +16 -28
- metadata/ingestion/source/database/oracle/queries.py +2 -2
- metadata/ingestion/source/messaging/common_broker_source.py +9 -7
- metadata/ingestion/source/messaging/kafka/connection.py +45 -4
- metadata/ingestion/source/mlmodel/sagemaker/metadata.py +20 -8
- metadata/profiler/orm/functions/conn_test.py +1 -0
- metadata/profiler/orm/functions/sum.py +1 -0
- metadata/profiler/orm/registry.py +1 -0
- metadata/profiler/processor/core.py +2 -2
- metadata/utils/datalake/datalake_utils.py +7 -1
- metadata/utils/execution_time_tracker.py +199 -0
- metadata/utils/filters.py +4 -0
- metadata/utils/helpers.py +0 -51
- metadata/utils/secrets/aws_based_secrets_manager.py +67 -4
- metadata/utils/secrets/aws_secrets_manager.py +7 -2
- metadata/utils/secrets/aws_ssm_secrets_manager.py +7 -2
- metadata/utils/secrets/azure_kv_secrets_manager.py +148 -0
- metadata/utils/secrets/external_secrets_manager.py +25 -3
- metadata/utils/secrets/secrets_manager_factory.py +13 -30
- metadata/workflow/base.py +4 -0
- metadata/workflow/output_handler.py +22 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.0.dist-info}/METADATA +297 -288
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.0.dist-info}/RECORD +573 -562
- metadata/utils/secrets/client/loader.py +0 -77
- /metadata/{utils/secrets/client → ingestion/source/database/bigtable}/__init__.py +0 -0
- /metadata/utils/secrets/{noop_secrets_manager.py → db_secrets_manager.py} +0 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.0.dist-info}/LICENSE +0 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.0.dist-info}/WHEEL +0 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.0.dist-info}/entry_points.txt +0 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# Copyright 2024 Collate
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Bigtable source methods.
|
|
13
|
+
"""
|
|
14
|
+
import traceback
|
|
15
|
+
from typing import Dict, Iterable, List, Optional, Union
|
|
16
|
+
|
|
17
|
+
from google.cloud.bigtable import row_filters
|
|
18
|
+
from google.cloud.bigtable.instance import Instance
|
|
19
|
+
from google.cloud.bigtable.table import Table
|
|
20
|
+
|
|
21
|
+
from metadata.generated.schema.entity.data.table import (
|
|
22
|
+
ConstraintType,
|
|
23
|
+
TableConstraint,
|
|
24
|
+
TableType,
|
|
25
|
+
)
|
|
26
|
+
from metadata.generated.schema.entity.services.connections.database.bigTableConnection import (
|
|
27
|
+
BigTableConnection,
|
|
28
|
+
)
|
|
29
|
+
from metadata.generated.schema.metadataIngestion.workflow import (
|
|
30
|
+
Source as WorkflowSource,
|
|
31
|
+
)
|
|
32
|
+
from metadata.ingestion.api.steps import InvalidSourceException
|
|
33
|
+
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
34
|
+
from metadata.ingestion.source.database.bigtable.client import MultiProjectClient
|
|
35
|
+
from metadata.ingestion.source.database.bigtable.models import Row
|
|
36
|
+
from metadata.ingestion.source.database.common_nosql_source import (
|
|
37
|
+
SAMPLE_SIZE as GLOBAL_SAMPLE_SIZE,
|
|
38
|
+
)
|
|
39
|
+
from metadata.ingestion.source.database.common_nosql_source import CommonNoSQLSource
|
|
40
|
+
from metadata.ingestion.source.database.multi_db_source import MultiDBSource
|
|
41
|
+
from metadata.utils.logger import ingestion_logger
|
|
42
|
+
|
|
43
|
+
logger = ingestion_logger()
|
|
44
|
+
|
|
45
|
+
# BigTable group's its columns in column families. We make an assumption that if the table has a big number of
|
|
46
|
+
# columns, we at least get a sample of the first 100 column families.
|
|
47
|
+
MAX_COLUMN_FAMILIES = 100
|
|
48
|
+
SAMPLES_PER_COLUMN_FAMILY = 100
|
|
49
|
+
|
|
50
|
+
ProjectId = str
|
|
51
|
+
InstanceId = str
|
|
52
|
+
TableId = str
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class BigtableSource(CommonNoSQLSource, MultiDBSource):
|
|
56
|
+
"""
|
|
57
|
+
Implements the necessary methods to extract database metadata from Google BigTable Source.
|
|
58
|
+
BigTable is a NoSQL database service for handling large amounts of data. Tha mapping is as follows:
|
|
59
|
+
project -> instance -> table -> column_family.column
|
|
60
|
+
(database) (schema)
|
|
61
|
+
For more infor about BigTable: https://cloud.google.com/bigtable/?hl=en
|
|
62
|
+
All data types are registered as bytes.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self, config: WorkflowSource, metadata: OpenMetadata):
|
|
66
|
+
super().__init__(config, metadata)
|
|
67
|
+
self.client: MultiProjectClient = self.connection_obj
|
|
68
|
+
|
|
69
|
+
# ths instances and tables are cached to avoid making redundant requests to the API.
|
|
70
|
+
self.instances: Dict[ProjectId, Dict[InstanceId, Instance]] = {}
|
|
71
|
+
self.tables: Dict[ProjectId, Dict[InstanceId, Dict[TableId, Table]]] = {}
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def create(cls, config_dict, metadata: OpenMetadata):
|
|
75
|
+
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
|
76
|
+
connection: BigTableConnection = config.serviceConnection.__root__.config
|
|
77
|
+
if not isinstance(connection, BigTableConnection):
|
|
78
|
+
raise InvalidSourceException(
|
|
79
|
+
f"Expected BigTableConnection, but got {connection}"
|
|
80
|
+
)
|
|
81
|
+
return cls(config, metadata)
|
|
82
|
+
|
|
83
|
+
def get_configured_database(self) -> Optional[str]:
|
|
84
|
+
"""
|
|
85
|
+
This connector uses "virtual databases" in the form of GCP projects.
|
|
86
|
+
The concept of a default project for the GCP client is not useful here because the project ID
|
|
87
|
+
is always an explicit part of the connection. Therefore, this method returns None and the databases
|
|
88
|
+
are resolved using `self.get_database_names`.
|
|
89
|
+
"""
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
def get_database_names(self) -> Iterable[str]:
|
|
93
|
+
return self.get_database_names_raw()
|
|
94
|
+
|
|
95
|
+
def get_database_names_raw(self) -> Iterable[str]:
|
|
96
|
+
yield from self.client.project_ids()
|
|
97
|
+
|
|
98
|
+
def get_schema_name_list(self) -> List[str]:
|
|
99
|
+
project_id = self.context.database
|
|
100
|
+
try:
|
|
101
|
+
# the first element is a list of instances
|
|
102
|
+
# the second element is another collection (seems empty) and I do not know what is its purpose
|
|
103
|
+
instances, _ = self.client.list_instances(project_id=project_id)
|
|
104
|
+
self.instances[project_id] = {
|
|
105
|
+
instance.instance_id: instance for instance in instances
|
|
106
|
+
}
|
|
107
|
+
return list(self.instances[project_id].keys())
|
|
108
|
+
except Exception as err:
|
|
109
|
+
logger.debug(traceback.format_exc())
|
|
110
|
+
logger.error(
|
|
111
|
+
f"Failed to list BigTable instances in project {project_id}: {err}"
|
|
112
|
+
)
|
|
113
|
+
raise
|
|
114
|
+
|
|
115
|
+
def get_table_name_list(self, schema_name: str) -> List[str]:
|
|
116
|
+
project_id = self.context.database
|
|
117
|
+
try:
|
|
118
|
+
instance = self._get_instance(project_id, schema_name)
|
|
119
|
+
if instance is None:
|
|
120
|
+
raise RuntimeError(f"Instance {project_id}/{schema_name} not found.")
|
|
121
|
+
tables = instance.list_tables()
|
|
122
|
+
for table in tables:
|
|
123
|
+
self._set_nested(
|
|
124
|
+
self.tables,
|
|
125
|
+
[project_id, instance.instance_id, table.table_id],
|
|
126
|
+
table,
|
|
127
|
+
)
|
|
128
|
+
return list(self.tables[project_id][schema_name].keys())
|
|
129
|
+
except Exception as err:
|
|
130
|
+
logger.debug(traceback.format_exc())
|
|
131
|
+
# add context to the error message
|
|
132
|
+
logger.error(
|
|
133
|
+
f"Failed to list BigTable table names in {project_id}.{schema_name}: {err}"
|
|
134
|
+
)
|
|
135
|
+
return []
|
|
136
|
+
|
|
137
|
+
def get_table_constraints(
|
|
138
|
+
self, db_name: str, schema_name: str, table_name: str
|
|
139
|
+
) -> List[TableConstraint]:
|
|
140
|
+
return [
|
|
141
|
+
TableConstraint(
|
|
142
|
+
constraintType=ConstraintType.PRIMARY_KEY, columns=["row_key"]
|
|
143
|
+
)
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
def get_table_columns_dict(
|
|
147
|
+
self, schema_name: str, table_name: str
|
|
148
|
+
) -> Union[List[Dict], Dict]:
|
|
149
|
+
project_id = self.context.database
|
|
150
|
+
try:
|
|
151
|
+
table = self._get_table(project_id, schema_name, table_name)
|
|
152
|
+
if table is None:
|
|
153
|
+
raise RuntimeError(
|
|
154
|
+
f"Table {project_id}/{schema_name}/{table_name} not found."
|
|
155
|
+
)
|
|
156
|
+
column_families = table.list_column_families()
|
|
157
|
+
# all BigTable tables have a "row_key" column. Even if there are no records in the table.
|
|
158
|
+
records = [{"row_key": b"row_key"}]
|
|
159
|
+
# In order to get a "good" sample of data, we try to distribute the sampling
|
|
160
|
+
# across multiple column families.
|
|
161
|
+
for column_family in list(column_families.keys())[:MAX_COLUMN_FAMILIES]:
|
|
162
|
+
records.extend(
|
|
163
|
+
self._get_records_for_column_family(
|
|
164
|
+
table, column_family, SAMPLES_PER_COLUMN_FAMILY
|
|
165
|
+
)
|
|
166
|
+
)
|
|
167
|
+
if len(records) >= GLOBAL_SAMPLE_SIZE:
|
|
168
|
+
break
|
|
169
|
+
return records
|
|
170
|
+
except Exception as err:
|
|
171
|
+
logger.debug(traceback.format_exc())
|
|
172
|
+
logger.warning(
|
|
173
|
+
f"Failed to read BigTable rows for [{project_id}.{schema_name}.{table_name}]: {err}"
|
|
174
|
+
)
|
|
175
|
+
return []
|
|
176
|
+
|
|
177
|
+
def get_source_url(
|
|
178
|
+
self,
|
|
179
|
+
database_name: Optional[str] = None,
|
|
180
|
+
schema_name: Optional[str] = None,
|
|
181
|
+
table_name: Optional[str] = None,
|
|
182
|
+
table_type: Optional[TableType] = None,
|
|
183
|
+
) -> Optional[str]:
|
|
184
|
+
"""
|
|
185
|
+
Method to get the source url for a BigTable table
|
|
186
|
+
"""
|
|
187
|
+
try:
|
|
188
|
+
if schema_name and table_name:
|
|
189
|
+
return (
|
|
190
|
+
"https://console.cloud.google.com/bigtable/instances/"
|
|
191
|
+
f"{schema_name}/tables/{table_name}/overview?project={database_name}"
|
|
192
|
+
)
|
|
193
|
+
except Exception as exc:
|
|
194
|
+
logger.debug(traceback.format_exc())
|
|
195
|
+
logger.error(f"Unable to get source url: {exc}")
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def _set_nested(dct: dict, keys: List[str], value: any) -> None:
|
|
200
|
+
for key in keys[:-1]:
|
|
201
|
+
dct = dct.setdefault(key, {})
|
|
202
|
+
dct[keys[-1]] = value
|
|
203
|
+
|
|
204
|
+
@staticmethod
|
|
205
|
+
def _get_records_for_column_family(
|
|
206
|
+
table: Table, column_family: str, limit: int
|
|
207
|
+
) -> List[Dict]:
|
|
208
|
+
filter_ = row_filters.ColumnRangeFilter(column_family_id=column_family)
|
|
209
|
+
rows = table.read_rows(limit=limit, filter_=filter_)
|
|
210
|
+
return [Row.from_partial_row(row).to_record() for row in rows]
|
|
211
|
+
|
|
212
|
+
def _get_table(
|
|
213
|
+
self, project_id: str, schema_name: str, table_name: str
|
|
214
|
+
) -> Optional[Table]:
|
|
215
|
+
try:
|
|
216
|
+
return self.tables[project_id][schema_name][table_name]
|
|
217
|
+
except KeyError:
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
def _get_instance(self, project_id: str, schema_name: str) -> Optional[Instance]:
|
|
221
|
+
try:
|
|
222
|
+
return self.instances[project_id][schema_name]
|
|
223
|
+
except KeyError:
|
|
224
|
+
return None
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Copyright 2024 Collate
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Bigtable source models.
|
|
13
|
+
"""
|
|
14
|
+
from typing import Dict, List
|
|
15
|
+
|
|
16
|
+
from google.cloud.bigtable.row import PartialRowData
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Value(BaseModel):
|
|
21
|
+
"""A Bigtable cell value."""
|
|
22
|
+
|
|
23
|
+
timestamp: int
|
|
24
|
+
value: bytes
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Cell(BaseModel):
|
|
28
|
+
"""A Bigtable cell."""
|
|
29
|
+
|
|
30
|
+
values: List[Value]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Row(BaseModel):
|
|
34
|
+
"""A Bigtable row."""
|
|
35
|
+
|
|
36
|
+
cells: Dict[str, Dict[bytes, Cell]]
|
|
37
|
+
row_key: bytes
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def from_partial_row(cls, row: PartialRowData):
|
|
41
|
+
cells = {}
|
|
42
|
+
for column_family, cf_cells in row.cells.items():
|
|
43
|
+
cells.setdefault(column_family, {})
|
|
44
|
+
for column, cell in cf_cells.items():
|
|
45
|
+
cells[column_family][column] = Cell(
|
|
46
|
+
values=[Value(timestamp=c.timestamp, value=c.value) for c in cell]
|
|
47
|
+
)
|
|
48
|
+
return cls(cells=cells, row_key=row.row_key)
|
|
49
|
+
|
|
50
|
+
def to_record(self) -> Dict[str, bytes]:
|
|
51
|
+
record = {}
|
|
52
|
+
for column_family, cells in self.cells.items():
|
|
53
|
+
for column, cell in cells.items():
|
|
54
|
+
# Since each cell can have multiple values and the API returns them in descending order
|
|
55
|
+
# from latest to oldest, we only take the latest value. This probably does not matter since
|
|
56
|
+
# all we care about is data types and all data stored in BigTable is of type `bytes`.
|
|
57
|
+
record[f"{column_family}.{column.decode()}"] = cell.values[0].value
|
|
58
|
+
record["row_key"] = self.row_key
|
|
59
|
+
|
|
60
|
+
return record
|
|
@@ -62,8 +62,8 @@ from metadata.ingestion.source.database.stored_procedures_mixin import QueryByPr
|
|
|
62
62
|
from metadata.ingestion.source.models import TableView
|
|
63
63
|
from metadata.utils import fqn
|
|
64
64
|
from metadata.utils.db_utils import get_view_lineage
|
|
65
|
+
from metadata.utils.execution_time_tracker import calculate_execution_time_generator
|
|
65
66
|
from metadata.utils.filters import filter_by_table
|
|
66
|
-
from metadata.utils.helpers import calculate_execution_time_generator
|
|
67
67
|
from metadata.utils.logger import ingestion_logger
|
|
68
68
|
|
|
69
69
|
logger = ingestion_logger()
|
|
@@ -405,7 +405,7 @@ class CommonDbSourceService(
|
|
|
405
405
|
"""Not Implemented"""
|
|
406
406
|
yield from []
|
|
407
407
|
|
|
408
|
-
@calculate_execution_time_generator
|
|
408
|
+
@calculate_execution_time_generator(store=False)
|
|
409
409
|
def yield_table(
|
|
410
410
|
self, table_name_and_type: Tuple[str, str]
|
|
411
411
|
) -> Iterable[Either[CreateTableRequest]]:
|
|
@@ -28,7 +28,11 @@ from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
|
|
28
28
|
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
|
29
29
|
from metadata.generated.schema.entity.data.database import Database
|
|
30
30
|
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
|
31
|
-
from metadata.generated.schema.entity.data.table import
|
|
31
|
+
from metadata.generated.schema.entity.data.table import (
|
|
32
|
+
Table,
|
|
33
|
+
TableConstraint,
|
|
34
|
+
TableType,
|
|
35
|
+
)
|
|
32
36
|
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
|
|
33
37
|
StackTraceError,
|
|
34
38
|
)
|
|
@@ -203,6 +207,15 @@ class CommonNoSQLSource(DatabaseServiceSource, ABC):
|
|
|
203
207
|
need to be overridden by sources
|
|
204
208
|
"""
|
|
205
209
|
|
|
210
|
+
def get_table_constraints(
|
|
211
|
+
self,
|
|
212
|
+
db_name: str,
|
|
213
|
+
schema_name: str,
|
|
214
|
+
table_name: str,
|
|
215
|
+
) -> Optional[List[TableConstraint]]:
|
|
216
|
+
# pylint: disable=unused-argument
|
|
217
|
+
return None
|
|
218
|
+
|
|
206
219
|
def yield_table(
|
|
207
220
|
self, table_name_and_type: Tuple[str, str]
|
|
208
221
|
) -> Iterable[Either[CreateTableRequest]]:
|
|
@@ -223,7 +236,11 @@ class CommonNoSQLSource(DatabaseServiceSource, ABC):
|
|
|
223
236
|
name=table_name,
|
|
224
237
|
tableType=table_type,
|
|
225
238
|
columns=columns,
|
|
226
|
-
tableConstraints=
|
|
239
|
+
tableConstraints=self.get_table_constraints(
|
|
240
|
+
schema_name=schema_name,
|
|
241
|
+
table_name=table_name,
|
|
242
|
+
db_name=self.context.database,
|
|
243
|
+
),
|
|
227
244
|
databaseSchema=fqn.build(
|
|
228
245
|
metadata=self.metadata,
|
|
229
246
|
entity_type=DatabaseSchema,
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
import re
|
|
14
14
|
import traceback
|
|
15
15
|
from copy import deepcopy
|
|
16
|
-
from typing import Iterable, Optional, Tuple
|
|
16
|
+
from typing import Iterable, Optional, Tuple, Union
|
|
17
17
|
|
|
18
18
|
from pyhive.sqlalchemy_hive import _type_map
|
|
19
19
|
from sqlalchemy import types, util
|
|
@@ -66,6 +66,7 @@ logger = ingestion_logger()
|
|
|
66
66
|
|
|
67
67
|
DATABRICKS_TAG = "DATABRICK TAG"
|
|
68
68
|
DATABRICKS_TAG_CLASSIFICATION = "DATABRICK TAG CLASSIFICATION"
|
|
69
|
+
DEFAULT_TAG_VALUE = "NONE"
|
|
69
70
|
|
|
70
71
|
|
|
71
72
|
class STRUCT(String):
|
|
@@ -268,6 +269,10 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
268
269
|
super().__init__(config, metadata)
|
|
269
270
|
self.is_older_version = False
|
|
270
271
|
self._init_version()
|
|
272
|
+
self.catalog_tags = {}
|
|
273
|
+
self.schema_tags = {}
|
|
274
|
+
self.table_tags = {}
|
|
275
|
+
self.column_tags = {}
|
|
271
276
|
|
|
272
277
|
def _init_version(self):
|
|
273
278
|
try:
|
|
@@ -314,10 +319,103 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
314
319
|
else:
|
|
315
320
|
yield DEFAULT_DATABASE
|
|
316
321
|
|
|
322
|
+
def _clear_tag_cache(self) -> None:
|
|
323
|
+
"""
|
|
324
|
+
Method to clean any existing tags available in memory
|
|
325
|
+
"""
|
|
326
|
+
self.catalog_tags.clear()
|
|
327
|
+
self.table_tags.clear()
|
|
328
|
+
self.schema_tags.clear()
|
|
329
|
+
self.column_tags.clear()
|
|
330
|
+
|
|
331
|
+
def _add_to_tag_cache(
|
|
332
|
+
self, tag_dict: dict, key: Union[str, Tuple], value: Tuple[str, str]
|
|
333
|
+
):
|
|
334
|
+
if tag_dict.get(key):
|
|
335
|
+
tag_dict.get(key).append(value)
|
|
336
|
+
else:
|
|
337
|
+
tag_dict[key] = [value]
|
|
338
|
+
|
|
339
|
+
def populate_tags_cache(self, database_name: str) -> None:
|
|
340
|
+
"""
|
|
341
|
+
Method to fetch all the tags and populate the relevant caches
|
|
342
|
+
"""
|
|
343
|
+
self._clear_tag_cache()
|
|
344
|
+
if self.source_config.includeTags is False:
|
|
345
|
+
return
|
|
346
|
+
try:
|
|
347
|
+
tags = self.connection.execute(
|
|
348
|
+
DATABRICKS_GET_CATALOGS_TAGS.format(database_name=database_name)
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
for tag in tags:
|
|
352
|
+
self._add_to_tag_cache(
|
|
353
|
+
self.catalog_tags,
|
|
354
|
+
tag.catalog_name,
|
|
355
|
+
# tag value is an optional field, if tag value is not available use default tag value
|
|
356
|
+
(tag.tag_name, tag.tag_value or DEFAULT_TAG_VALUE),
|
|
357
|
+
)
|
|
358
|
+
except Exception as exc:
|
|
359
|
+
logger.debug(f"Failed to fetch catalog tags due to - {exc}")
|
|
360
|
+
|
|
361
|
+
try:
|
|
362
|
+
tags = self.connection.execute(
|
|
363
|
+
DATABRICKS_GET_SCHEMA_TAGS.format(database_name=database_name)
|
|
364
|
+
)
|
|
365
|
+
for tag in tags:
|
|
366
|
+
self._add_to_tag_cache(
|
|
367
|
+
self.schema_tags,
|
|
368
|
+
(tag.catalog_name, tag.schema_name),
|
|
369
|
+
# tag value is an optional field, if tag value is not available use default tag value
|
|
370
|
+
(tag.tag_name, tag.tag_value or DEFAULT_TAG_VALUE),
|
|
371
|
+
)
|
|
372
|
+
except Exception as exc:
|
|
373
|
+
logger.debug(f"Failed to fetch schema tags due to - {exc}")
|
|
374
|
+
|
|
375
|
+
try:
|
|
376
|
+
tags = self.connection.execute(
|
|
377
|
+
DATABRICKS_GET_TABLE_TAGS.format(database_name=database_name)
|
|
378
|
+
)
|
|
379
|
+
for tag in tags:
|
|
380
|
+
self._add_to_tag_cache(
|
|
381
|
+
self.table_tags,
|
|
382
|
+
(tag.catalog_name, tag.schema_name, tag.table_name),
|
|
383
|
+
# tag value is an optional field, if tag value is not available use default tag value
|
|
384
|
+
(tag.tag_name, tag.tag_value or DEFAULT_TAG_VALUE),
|
|
385
|
+
)
|
|
386
|
+
except Exception as exc:
|
|
387
|
+
logger.debug(f"Failed to fetch table tags due to - {exc}")
|
|
388
|
+
|
|
389
|
+
try:
|
|
390
|
+
tags = self.connection.execute(
|
|
391
|
+
DATABRICKS_GET_COLUMN_TAGS.format(database_name=database_name)
|
|
392
|
+
)
|
|
393
|
+
for tag in tags:
|
|
394
|
+
tag_table_id = (tag.catalog_name, tag.schema_name, tag.table_name)
|
|
395
|
+
if self.column_tags.get(tag_table_id):
|
|
396
|
+
self._add_to_tag_cache(
|
|
397
|
+
self.column_tags.get(tag_table_id),
|
|
398
|
+
tag.column_name,
|
|
399
|
+
# tag value is an optional field, if tag value is not available use default tag value
|
|
400
|
+
(tag.tag_name, tag.tag_value or DEFAULT_TAG_VALUE),
|
|
401
|
+
)
|
|
402
|
+
else:
|
|
403
|
+
self.column_tags[tag_table_id] = {
|
|
404
|
+
tag.column_name: [
|
|
405
|
+
(
|
|
406
|
+
tag.tag_name,
|
|
407
|
+
tag.tag_value or DEFAULT_TAG_VALUE,
|
|
408
|
+
)
|
|
409
|
+
]
|
|
410
|
+
}
|
|
411
|
+
except Exception as exc:
|
|
412
|
+
logger.debug(f"Failed to fetch column tags due to - {exc}")
|
|
413
|
+
|
|
317
414
|
def get_database_names(self) -> Iterable[str]:
|
|
318
415
|
configured_catalog = self.service_connection.catalog
|
|
319
416
|
if configured_catalog:
|
|
320
417
|
self.set_inspector(database_name=configured_catalog)
|
|
418
|
+
self.populate_tags_cache(database_name=configured_catalog)
|
|
321
419
|
yield configured_catalog
|
|
322
420
|
else:
|
|
323
421
|
for new_catalog in self.get_database_names_raw():
|
|
@@ -337,6 +435,7 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
337
435
|
continue
|
|
338
436
|
try:
|
|
339
437
|
self.set_inspector(database_name=new_catalog)
|
|
438
|
+
self.populate_tags_cache(database_name=new_catalog)
|
|
340
439
|
yield new_catalog
|
|
341
440
|
except Exception as exc:
|
|
342
441
|
logger.error(traceback.format_exc())
|
|
@@ -361,10 +460,8 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
361
460
|
Method to yield database tags
|
|
362
461
|
"""
|
|
363
462
|
try:
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
)
|
|
367
|
-
for tag in tags:
|
|
463
|
+
catalog_tags = self.catalog_tags.get(database_name, [])
|
|
464
|
+
for tag_name, tag_value in catalog_tags:
|
|
368
465
|
yield from get_ometa_tag_and_classification(
|
|
369
466
|
tag_fqn=fqn.build(
|
|
370
467
|
self.metadata,
|
|
@@ -372,8 +469,8 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
372
469
|
service_name=self.context.database_service,
|
|
373
470
|
database_name=database_name,
|
|
374
471
|
),
|
|
375
|
-
tags=[
|
|
376
|
-
classification_name=
|
|
472
|
+
tags=[tag_value],
|
|
473
|
+
classification_name=tag_name,
|
|
377
474
|
tag_description=DATABRICKS_TAG,
|
|
378
475
|
classification_description=DATABRICKS_TAG_CLASSIFICATION,
|
|
379
476
|
)
|
|
@@ -394,12 +491,8 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
394
491
|
Method to yield schema tags
|
|
395
492
|
"""
|
|
396
493
|
try:
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
database_name=self.context.database, schema_name=schema_name
|
|
400
|
-
)
|
|
401
|
-
)
|
|
402
|
-
for tag in tags:
|
|
494
|
+
schema_tags = self.schema_tags.get((self.context.database, schema_name), [])
|
|
495
|
+
for tag_name, tag_value in schema_tags:
|
|
403
496
|
yield from get_ometa_tag_and_classification(
|
|
404
497
|
tag_fqn=fqn.build(
|
|
405
498
|
self.metadata,
|
|
@@ -408,8 +501,8 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
408
501
|
database_name=self.context.database,
|
|
409
502
|
schema_name=schema_name,
|
|
410
503
|
),
|
|
411
|
-
tags=[
|
|
412
|
-
classification_name=
|
|
504
|
+
tags=[tag_value],
|
|
505
|
+
classification_name=tag_name,
|
|
413
506
|
tag_description=DATABRICKS_TAG,
|
|
414
507
|
classification_description=DATABRICKS_TAG_CLASSIFICATION,
|
|
415
508
|
)
|
|
@@ -428,14 +521,10 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
428
521
|
) -> Iterable[Either[OMetaTagAndClassification]]:
|
|
429
522
|
table_name, _ = table_name_and_type
|
|
430
523
|
try:
|
|
431
|
-
table_tags = self.
|
|
432
|
-
|
|
433
|
-
database_name=self.context.database,
|
|
434
|
-
schema_name=self.context.database_schema,
|
|
435
|
-
table_name=table_name,
|
|
436
|
-
)
|
|
524
|
+
table_tags = self.table_tags.get(
|
|
525
|
+
(self.context.database, self.context.database_schema, table_name), []
|
|
437
526
|
)
|
|
438
|
-
for
|
|
527
|
+
for tag_name, tag_value in table_tags:
|
|
439
528
|
yield from get_ometa_tag_and_classification(
|
|
440
529
|
tag_fqn=fqn.build(
|
|
441
530
|
self.metadata,
|
|
@@ -445,35 +534,32 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
445
534
|
schema_name=self.context.database_schema,
|
|
446
535
|
table_name=table_name,
|
|
447
536
|
),
|
|
448
|
-
tags=[
|
|
449
|
-
classification_name=
|
|
537
|
+
tags=[tag_value],
|
|
538
|
+
classification_name=tag_name,
|
|
450
539
|
tag_description=DATABRICKS_TAG,
|
|
451
540
|
classification_description=DATABRICKS_TAG_CLASSIFICATION,
|
|
452
541
|
)
|
|
453
542
|
|
|
454
|
-
column_tags = self.
|
|
455
|
-
|
|
456
|
-
database_name=self.context.database,
|
|
457
|
-
schema_name=self.context.database_schema,
|
|
458
|
-
table_name=table_name,
|
|
459
|
-
)
|
|
543
|
+
column_tags = self.column_tags.get(
|
|
544
|
+
(self.context.database, self.context.database_schema, table_name), {}
|
|
460
545
|
)
|
|
461
|
-
for
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
546
|
+
for column_name, tags in column_tags.items():
|
|
547
|
+
for tag_name, tag_value in tags or []:
|
|
548
|
+
yield from get_ometa_tag_and_classification(
|
|
549
|
+
tag_fqn=fqn.build(
|
|
550
|
+
self.metadata,
|
|
551
|
+
Column,
|
|
552
|
+
service_name=self.context.database_service,
|
|
553
|
+
database_name=self.context.database,
|
|
554
|
+
schema_name=self.context.database_schema,
|
|
555
|
+
table_name=table_name,
|
|
556
|
+
column_name=column_name,
|
|
557
|
+
),
|
|
558
|
+
tags=[tag_value],
|
|
559
|
+
classification_name=tag_name,
|
|
560
|
+
tag_description=DATABRICKS_TAG,
|
|
561
|
+
classification_description=DATABRICKS_TAG_CLASSIFICATION,
|
|
562
|
+
)
|
|
477
563
|
|
|
478
564
|
except Exception as exc:
|
|
479
565
|
yield Either(
|
|
@@ -37,8 +37,7 @@ DATABRICKS_GET_SCHEMA_TAGS = textwrap.dedent(
|
|
|
37
37
|
"""
|
|
38
38
|
SELECT
|
|
39
39
|
*
|
|
40
|
-
FROM {database_name}.information_schema.schema_tags
|
|
41
|
-
WHERE schema_name='{schema_name}'"""
|
|
40
|
+
FROM {database_name}.information_schema.schema_tags"""
|
|
42
41
|
)
|
|
43
42
|
|
|
44
43
|
DATABRICKS_GET_TABLE_TAGS = textwrap.dedent(
|
|
@@ -46,7 +45,7 @@ DATABRICKS_GET_TABLE_TAGS = textwrap.dedent(
|
|
|
46
45
|
SELECT
|
|
47
46
|
*
|
|
48
47
|
FROM {database_name}.information_schema.table_tags
|
|
49
|
-
|
|
48
|
+
"""
|
|
50
49
|
)
|
|
51
50
|
|
|
52
51
|
DATABRICKS_GET_COLUMN_TAGS = textwrap.dedent(
|
|
@@ -54,5 +53,5 @@ DATABRICKS_GET_COLUMN_TAGS = textwrap.dedent(
|
|
|
54
53
|
SELECT
|
|
55
54
|
*
|
|
56
55
|
FROM {database_name}.information_schema.column_tags
|
|
57
|
-
|
|
56
|
+
"""
|
|
58
57
|
)
|