openmetadata-ingestion 1.3.0.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openmetadata-ingestion might be problematic. Click here for more details.
- metadata/cli/db_dump.py +1 -0
- metadata/data_insight/processor/reports/cost_analysis_report_data_processor.py +39 -47
- metadata/data_insight/processor/reports/data_processor.py +1 -0
- metadata/data_insight/producer/cost_analysis_producer.py +78 -14
- metadata/data_insight/producer/entity_producer.py +1 -1
- metadata/data_insight/producer/producer_interface.py +1 -1
- metadata/data_insight/producer/web_analytics_producer.py +1 -1
- metadata/data_insight/source/metadata.py +10 -1
- metadata/data_quality/validations/table/base/tableColumnToMatchSet.py +2 -1
- metadata/data_quality/validations/table/pandas/tableColumnToMatchSet.py +2 -1
- metadata/data_quality/validations/table/sqlalchemy/tableColumnToMatchSet.py +7 -2
- metadata/examples/workflows/bigtable.yaml +32 -0
- metadata/generated/antlr/EntityLinkLexer.py +353 -319
- metadata/generated/schema/analytics/__init__.py +1 -1
- metadata/generated/schema/analytics/basic.py +1 -1
- metadata/generated/schema/analytics/reportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
- metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
- metadata/generated/schema/api/__init__.py +1 -1
- metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
- metadata/generated/schema/api/analytics/__init__.py +1 -1
- metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
- metadata/generated/schema/api/automations/__init__.py +1 -1
- metadata/generated/schema/api/automations/createWorkflow.py +1 -1
- metadata/generated/schema/api/bulkAssets.py +1 -1
- metadata/generated/schema/api/classification/__init__.py +1 -1
- metadata/generated/schema/api/classification/createClassification.py +1 -1
- metadata/generated/schema/api/classification/createTag.py +1 -1
- metadata/generated/schema/api/classification/loadTags.py +1 -1
- metadata/generated/schema/api/createBot.py +1 -1
- metadata/generated/schema/api/createEventPublisherJob.py +2 -2
- metadata/generated/schema/api/createType.py +1 -1
- metadata/generated/schema/api/data/__init__.py +1 -1
- metadata/generated/schema/api/data/createChart.py +1 -1
- metadata/generated/schema/api/data/createContainer.py +1 -1
- metadata/generated/schema/api/data/createCustomProperty.py +6 -10
- metadata/generated/schema/api/data/createDashboard.py +1 -1
- metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
- metadata/generated/schema/api/data/createDatabase.py +1 -1
- metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
- metadata/generated/schema/api/data/createGlossary.py +1 -1
- metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
- metadata/generated/schema/api/data/createMlModel.py +1 -1
- metadata/generated/schema/api/data/createPipeline.py +1 -1
- metadata/generated/schema/api/data/createQuery.py +1 -1
- metadata/generated/schema/api/data/createSearchIndex.py +1 -1
- metadata/generated/schema/api/data/createStoredProcedure.py +1 -1
- metadata/generated/schema/api/data/createTable.py +1 -1
- metadata/generated/schema/api/data/createTableProfile.py +1 -1
- metadata/generated/schema/api/data/createTopic.py +1 -1
- metadata/generated/schema/api/data/loadGlossary.py +1 -1
- metadata/generated/schema/api/data/restoreEntity.py +1 -1
- metadata/generated/schema/api/dataInsight/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
- metadata/generated/schema/api/docStore/__init__.py +1 -1
- metadata/generated/schema/api/docStore/createDocument.py +1 -1
- metadata/generated/schema/api/domains/__init__.py +1 -1
- metadata/generated/schema/api/domains/createDataProduct.py +1 -1
- metadata/generated/schema/api/domains/createDomain.py +1 -1
- metadata/generated/schema/api/feed/__init__.py +1 -1
- metadata/generated/schema/api/feed/closeTask.py +1 -1
- metadata/generated/schema/api/feed/createPost.py +1 -1
- metadata/generated/schema/api/feed/createSuggestion.py +1 -1
- metadata/generated/schema/api/feed/createThread.py +1 -1
- metadata/generated/schema/api/feed/resolveTask.py +1 -1
- metadata/generated/schema/api/feed/threadCount.py +1 -1
- metadata/generated/schema/api/lineage/__init__.py +1 -1
- metadata/generated/schema/api/lineage/addLineage.py +1 -1
- metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
- metadata/generated/schema/api/policies/__init__.py +1 -1
- metadata/generated/schema/api/policies/createPolicy.py +1 -1
- metadata/generated/schema/api/services/__init__.py +1 -1
- metadata/generated/schema/api/services/createDashboardService.py +1 -1
- metadata/generated/schema/api/services/createDatabaseService.py +1 -1
- metadata/generated/schema/api/services/createMessagingService.py +1 -1
- metadata/generated/schema/api/services/createMetadataService.py +1 -1
- metadata/generated/schema/api/services/createMlModelService.py +1 -1
- metadata/generated/schema/api/services/createPipelineService.py +1 -1
- metadata/generated/schema/api/services/createSearchService.py +1 -1
- metadata/generated/schema/api/services/createStorageService.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +1 -1
- metadata/generated/schema/api/setOwner.py +1 -1
- metadata/generated/schema/api/teams/__init__.py +1 -1
- metadata/generated/schema/api/teams/createPersona.py +1 -1
- metadata/generated/schema/api/teams/createRole.py +1 -1
- metadata/generated/schema/api/teams/createTeam.py +1 -1
- metadata/generated/schema/api/teams/createUser.py +1 -1
- metadata/generated/schema/api/tests/__init__.py +1 -1
- metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
- metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
- metadata/generated/schema/api/tests/createTestCase.py +1 -1
- metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
- metadata/generated/schema/api/tests/createTestDefinition.py +2 -1
- metadata/generated/schema/api/tests/createTestSuite.py +1 -1
- metadata/generated/schema/api/voteRequest.py +1 -1
- metadata/generated/schema/auth/__init__.py +1 -1
- metadata/generated/schema/auth/basicAuth.py +1 -1
- metadata/generated/schema/auth/basicLoginRequest.py +1 -1
- metadata/generated/schema/auth/changePasswordRequest.py +1 -1
- metadata/generated/schema/auth/createPersonalToken.py +1 -1
- metadata/generated/schema/auth/emailRequest.py +1 -1
- metadata/generated/schema/auth/emailVerificationToken.py +1 -1
- metadata/generated/schema/auth/generateToken.py +1 -1
- metadata/generated/schema/auth/jwtAuth.py +1 -1
- metadata/generated/schema/auth/loginRequest.py +1 -1
- metadata/generated/schema/auth/logoutRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetToken.py +1 -1
- metadata/generated/schema/auth/personalAccessToken.py +1 -1
- metadata/generated/schema/auth/refreshToken.py +1 -1
- metadata/generated/schema/auth/registrationRequest.py +1 -1
- metadata/generated/schema/auth/revokePersonalToken.py +1 -1
- metadata/generated/schema/auth/revokeToken.py +1 -1
- metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
- metadata/generated/schema/auth/ssoAuth.py +1 -1
- metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
- metadata/generated/schema/configuration/__init__.py +1 -1
- metadata/generated/schema/configuration/appsPrivateConfiguration.py +1 -1
- metadata/generated/schema/configuration/authConfig.py +1 -1
- metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
- metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
- metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/dataQualityConfiguration.py +16 -0
- metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
- metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
- metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
- metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
- metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
- metadata/generated/schema/configuration/loginConfiguration.py +1 -1
- metadata/generated/schema/configuration/logoConfiguration.py +1 -1
- metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
- metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
- metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
- metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
- metadata/generated/schema/dataInsight/__init__.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
- metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
- metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
- metadata/generated/schema/dataInsight/type/__init__.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithDescriptionByType.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithOwnerByType.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfServicesWithDescription.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfServicesWithOwner.py +1 -1
- metadata/generated/schema/dataInsight/type/totalEntitiesByTier.py +1 -1
- metadata/generated/schema/dataInsight/type/totalEntitiesByType.py +1 -1
- metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
- metadata/generated/schema/email/__init__.py +1 -1
- metadata/generated/schema/email/emailRequest.py +1 -1
- metadata/generated/schema/email/smtpSettings.py +1 -1
- metadata/generated/schema/entity/__init__.py +1 -1
- metadata/generated/schema/entity/applications/__init__.py +1 -1
- metadata/generated/schema/entity/applications/app.py +1 -1
- metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
- metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/applicationConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/autoTaggerAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/metaPilotAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +2 -2
- metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/metaPilotAppPrivateConfig.py +1 -1
- metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
- metadata/generated/schema/entity/applications/jobStatus.py +1 -1
- metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +1 -1
- metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
- metadata/generated/schema/entity/automations/__init__.py +1 -1
- metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
- metadata/generated/schema/entity/automations/workflow.py +1 -1
- metadata/generated/schema/entity/bot.py +1 -1
- metadata/generated/schema/entity/classification/__init__.py +1 -1
- metadata/generated/schema/entity/classification/classification.py +1 -1
- metadata/generated/schema/entity/classification/tag.py +1 -1
- metadata/generated/schema/entity/data/__init__.py +1 -1
- metadata/generated/schema/entity/data/chart.py +1 -1
- metadata/generated/schema/entity/data/container.py +1 -1
- metadata/generated/schema/entity/data/dashboard.py +1 -1
- metadata/generated/schema/entity/data/dashboardDataModel.py +1 -1
- metadata/generated/schema/entity/data/database.py +1 -1
- metadata/generated/schema/entity/data/databaseSchema.py +1 -1
- metadata/generated/schema/entity/data/glossary.py +1 -1
- metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
- metadata/generated/schema/entity/data/metrics.py +1 -1
- metadata/generated/schema/entity/data/mlmodel.py +2 -2
- metadata/generated/schema/entity/data/pipeline.py +1 -1
- metadata/generated/schema/entity/data/query.py +1 -1
- metadata/generated/schema/entity/data/report.py +1 -1
- metadata/generated/schema/entity/data/searchIndex.py +1 -1
- metadata/generated/schema/entity/data/storedProcedure.py +1 -1
- metadata/generated/schema/entity/data/table.py +1 -1
- metadata/generated/schema/entity/data/topic.py +1 -1
- metadata/generated/schema/entity/docStore/__init__.py +1 -1
- metadata/generated/schema/entity/docStore/document.py +1 -1
- metadata/generated/schema/entity/domains/__init__.py +1 -1
- metadata/generated/schema/entity/domains/dataProduct.py +1 -1
- metadata/generated/schema/entity/domains/domain.py +1 -1
- metadata/generated/schema/entity/events/__init__.py +1 -1
- metadata/generated/schema/entity/events/webhook.py +1 -1
- metadata/generated/schema/entity/feed/__init__.py +1 -1
- metadata/generated/schema/entity/feed/suggestion.py +1 -1
- metadata/generated/schema/entity/feed/thread.py +1 -1
- metadata/generated/schema/entity/policies/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
- metadata/generated/schema/entity/policies/filters.py +1 -1
- metadata/generated/schema/entity/policies/policy.py +1 -1
- metadata/generated/schema/entity/services/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/connectionBasicType.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/mstrConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +41 -0
- metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +8 -6
- metadata/generated/schema/entity/services/connections/database/postgresConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +14 -2
- metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +14 -2
- metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +2 -2
- metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
- metadata/generated/schema/entity/services/dashboardService.py +1 -1
- metadata/generated/schema/entity/services/databaseService.py +4 -1
- metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
- metadata/generated/schema/entity/services/messagingService.py +1 -1
- metadata/generated/schema/entity/services/metadataService.py +1 -1
- metadata/generated/schema/entity/services/mlmodelService.py +1 -1
- metadata/generated/schema/entity/services/pipelineService.py +1 -1
- metadata/generated/schema/entity/services/searchService.py +1 -1
- metadata/generated/schema/entity/services/serviceType.py +1 -1
- metadata/generated/schema/entity/services/storageService.py +1 -1
- metadata/generated/schema/entity/teams/__init__.py +1 -1
- metadata/generated/schema/entity/teams/persona.py +1 -1
- metadata/generated/schema/entity/teams/role.py +1 -1
- metadata/generated/schema/entity/teams/team.py +1 -1
- metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
- metadata/generated/schema/entity/teams/user.py +1 -1
- metadata/generated/schema/entity/type.py +3 -18
- metadata/generated/schema/entity/utils/__init__.py +1 -1
- metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
- metadata/generated/schema/entity/utils/servicesCount.py +1 -1
- metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
- metadata/generated/schema/events/__init__.py +1 -1
- metadata/generated/schema/events/alertMetrics.py +1 -1
- metadata/generated/schema/events/api/__init__.py +1 -1
- metadata/generated/schema/events/api/createEventSubscription.py +1 -1
- metadata/generated/schema/events/emailAlertConfig.py +1 -1
- metadata/generated/schema/events/eventFilterRule.py +1 -1
- metadata/generated/schema/events/eventSubscription.py +5 -3
- metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
- metadata/generated/schema/events/failedEvent.py +1 -1
- metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
- metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
- metadata/generated/schema/metadataIngestion/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/application.py +1 -1
- metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/workflow.py +1 -1
- metadata/generated/schema/monitoring/__init__.py +1 -1
- metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
- metadata/generated/schema/security/__init__.py +1 -1
- metadata/generated/schema/security/client/__init__.py +1 -1
- metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
- metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
- metadata/generated/schema/security/credentials/__init__.py +1 -1
- metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
- metadata/generated/schema/security/credentials/azureCredentials.py +16 -9
- metadata/generated/schema/security/credentials/basicAuth.py +1 -1
- metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpValues.py +1 -1
- metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
- metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
- metadata/generated/schema/security/secrets/__init__.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerProvider.py +3 -1
- metadata/generated/schema/security/securityConfiguration.py +1 -1
- metadata/generated/schema/security/ssl/__init__.py +1 -1
- metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
- metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
- metadata/generated/schema/settings/__init__.py +1 -1
- metadata/generated/schema/settings/settings.py +1 -1
- metadata/generated/schema/system/__init__.py +1 -1
- metadata/generated/schema/system/entityError.py +1 -1
- metadata/generated/schema/system/eventPublisherJob.py +1 -1
- metadata/generated/schema/system/indexingError.py +1 -1
- metadata/generated/schema/system/ui/__init__.py +1 -1
- metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
- metadata/generated/schema/system/ui/page.py +1 -1
- metadata/generated/schema/tests/__init__.py +1 -1
- metadata/generated/schema/tests/assigned.py +1 -1
- metadata/generated/schema/tests/basic.py +1 -1
- metadata/generated/schema/tests/customMetric.py +1 -1
- metadata/generated/schema/tests/resolved.py +1 -1
- metadata/generated/schema/tests/testCase.py +1 -1
- metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
- metadata/generated/schema/tests/testDefinition.py +2 -1
- metadata/generated/schema/tests/testSuite.py +1 -1
- metadata/generated/schema/type/__init__.py +1 -1
- metadata/generated/schema/type/auditLog.py +1 -1
- metadata/generated/schema/type/basic.py +6 -2
- metadata/generated/schema/type/bulkOperationResult.py +1 -1
- metadata/generated/schema/type/changeEvent.py +1 -1
- metadata/generated/schema/type/changeEventType.py +1 -1
- metadata/generated/schema/type/collectionDescriptor.py +1 -1
- metadata/generated/schema/type/csvDocumentation.py +1 -1
- metadata/generated/schema/type/csvErrorType.py +1 -1
- metadata/generated/schema/type/csvFile.py +1 -1
- metadata/generated/schema/type/csvImportResult.py +1 -1
- metadata/generated/schema/type/customProperties/__init__.py +3 -0
- metadata/generated/schema/type/customProperties/enumConfig.py +17 -0
- metadata/generated/schema/type/customProperty.py +52 -0
- metadata/generated/schema/type/dailyCount.py +1 -1
- metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
- metadata/generated/schema/type/entityHistory.py +1 -1
- metadata/generated/schema/type/entityLineage.py +1 -1
- metadata/generated/schema/type/entityReference.py +1 -1
- metadata/generated/schema/type/entityReferenceList.py +1 -1
- metadata/generated/schema/type/entityRelationship.py +1 -1
- metadata/generated/schema/type/entityUsage.py +1 -1
- metadata/generated/schema/type/filterPattern.py +1 -1
- metadata/generated/schema/type/function.py +1 -1
- metadata/generated/schema/type/include.py +1 -1
- metadata/generated/schema/type/jdbcConnection.py +1 -1
- metadata/generated/schema/type/lifeCycle.py +1 -1
- metadata/generated/schema/type/paging.py +1 -1
- metadata/generated/schema/type/profile.py +1 -1
- metadata/generated/schema/type/queryParserData.py +1 -1
- metadata/generated/schema/type/reaction.py +1 -1
- metadata/generated/schema/type/schedule.py +1 -1
- metadata/generated/schema/type/schema.py +1 -1
- metadata/generated/schema/type/tableQuery.py +1 -1
- metadata/generated/schema/type/tableUsageCount.py +1 -1
- metadata/generated/schema/type/tagLabel.py +1 -1
- metadata/generated/schema/type/usageDetails.py +1 -1
- metadata/generated/schema/type/usageRequest.py +1 -1
- metadata/generated/schema/type/votes.py +1 -1
- metadata/great_expectations/action.py +4 -15
- metadata/ingestion/api/steps.py +14 -1
- metadata/ingestion/api/topology_runner.py +4 -1
- metadata/ingestion/models/custom_properties.py +0 -1
- metadata/ingestion/models/patch_request.py +61 -9
- metadata/ingestion/ometa/client.py +6 -0
- metadata/ingestion/ometa/mixins/custom_property_mixin.py +11 -11
- metadata/ingestion/ometa/mixins/patch_mixin.py +2 -0
- metadata/ingestion/ometa/ometa_api.py +1 -1
- metadata/ingestion/sink/metadata_rest.py +4 -2
- metadata/ingestion/source/dashboard/looker/metadata.py +3 -4
- metadata/ingestion/source/dashboard/metabase/client.py +4 -0
- metadata/ingestion/source/dashboard/metabase/metadata.py +5 -4
- metadata/ingestion/source/dashboard/metabase/models.py +2 -2
- metadata/ingestion/source/dashboard/tableau/metadata.py +18 -0
- metadata/ingestion/source/database/bigquery/helper.py +68 -1
- metadata/ingestion/source/database/bigquery/metadata.py +12 -3
- metadata/ingestion/source/database/bigquery/queries.py +22 -0
- metadata/ingestion/source/database/bigtable/client.py +62 -0
- metadata/ingestion/source/database/bigtable/connection.py +116 -0
- metadata/ingestion/source/database/bigtable/metadata.py +224 -0
- metadata/ingestion/source/database/bigtable/models.py +60 -0
- metadata/ingestion/source/database/common_db_source.py +2 -2
- metadata/ingestion/source/database/common_nosql_source.py +19 -2
- metadata/ingestion/source/database/databricks/metadata.py +132 -46
- metadata/ingestion/source/database/databricks/queries.py +3 -4
- metadata/ingestion/source/database/dbt/metadata.py +16 -28
- metadata/ingestion/source/database/oracle/queries.py +2 -2
- metadata/ingestion/source/messaging/common_broker_source.py +9 -7
- metadata/ingestion/source/messaging/kafka/connection.py +45 -4
- metadata/ingestion/source/mlmodel/sagemaker/metadata.py +20 -8
- metadata/profiler/orm/functions/conn_test.py +1 -0
- metadata/profiler/orm/functions/sum.py +1 -0
- metadata/profiler/orm/registry.py +1 -0
- metadata/profiler/processor/core.py +2 -2
- metadata/utils/datalake/datalake_utils.py +7 -1
- metadata/utils/execution_time_tracker.py +199 -0
- metadata/utils/filters.py +4 -0
- metadata/utils/helpers.py +0 -51
- metadata/utils/secrets/aws_based_secrets_manager.py +67 -4
- metadata/utils/secrets/aws_secrets_manager.py +7 -2
- metadata/utils/secrets/aws_ssm_secrets_manager.py +7 -2
- metadata/utils/secrets/azure_kv_secrets_manager.py +148 -0
- metadata/utils/secrets/external_secrets_manager.py +25 -3
- metadata/utils/secrets/secrets_manager_factory.py +13 -30
- metadata/workflow/base.py +4 -0
- metadata/workflow/output_handler.py +22 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/METADATA +298 -289
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/RECORD +575 -564
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/WHEEL +1 -1
- metadata/utils/secrets/client/loader.py +0 -77
- /metadata/{utils/secrets/client → ingestion/source/database/bigtable}/__init__.py +0 -0
- /metadata/utils/secrets/{noop_secrets_manager.py → db_secrets_manager.py} +0 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/LICENSE +0 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/entry_points.txt +0 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# Copyright 2024 Collate
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""BigTable connection"""
|
|
12
|
+
from typing import List, Optional
|
|
13
|
+
|
|
14
|
+
from google.cloud.bigtable import Client
|
|
15
|
+
|
|
16
|
+
from metadata.generated.schema.entity.automations.workflow import (
|
|
17
|
+
Workflow as AutomationWorkflow,
|
|
18
|
+
)
|
|
19
|
+
from metadata.generated.schema.entity.services.connections.database.bigTableConnection import (
|
|
20
|
+
BigTableConnection,
|
|
21
|
+
)
|
|
22
|
+
from metadata.generated.schema.security.credentials.gcpValues import (
|
|
23
|
+
GcpCredentialsValues,
|
|
24
|
+
SingleProjectId,
|
|
25
|
+
)
|
|
26
|
+
from metadata.ingestion.connections.test_connections import (
|
|
27
|
+
SourceConnectionException,
|
|
28
|
+
test_connection_steps,
|
|
29
|
+
)
|
|
30
|
+
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
31
|
+
from metadata.ingestion.source.database.bigtable.client import MultiProjectClient
|
|
32
|
+
from metadata.utils.credentials import set_google_credentials
|
|
33
|
+
from metadata.utils.logger import ingestion_logger
|
|
34
|
+
|
|
35
|
+
logger = ingestion_logger()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_connection(connection: BigTableConnection):
|
|
39
|
+
set_google_credentials(connection.credentials)
|
|
40
|
+
project_ids = None
|
|
41
|
+
if isinstance(connection.credentials.gcpConfig, GcpCredentialsValues):
|
|
42
|
+
project_ids = (
|
|
43
|
+
[connection.credentials.gcpConfig.projectId.__root__]
|
|
44
|
+
if isinstance(connection.credentials.gcpConfig.projectId, SingleProjectId)
|
|
45
|
+
else connection.credentials.gcpConfig.projectId.__root__
|
|
46
|
+
)
|
|
47
|
+
# admin=True is required to list instances and tables
|
|
48
|
+
return MultiProjectClient(client_class=Client, project_ids=project_ids, admin=True)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_nested_index(lst: list, index: List[int], default=None):
|
|
52
|
+
try:
|
|
53
|
+
for i in index:
|
|
54
|
+
lst = lst[i]
|
|
55
|
+
return lst
|
|
56
|
+
except IndexError:
|
|
57
|
+
return default
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class Tester:
|
|
61
|
+
"""
|
|
62
|
+
A wrapper class that holds state. We need it because the different testing stages
|
|
63
|
+
are not independent of each other. For example, we need to list instances before we can list
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(self, client: MultiProjectClient):
|
|
67
|
+
self.client = client
|
|
68
|
+
self.project_id = None
|
|
69
|
+
self.instance = None
|
|
70
|
+
self.table = None
|
|
71
|
+
|
|
72
|
+
def list_instances(self):
|
|
73
|
+
self.project_id = list(self.client.clients.keys())[0]
|
|
74
|
+
instances = list(self.client.list_instances(project_id=self.project_id))
|
|
75
|
+
self.instance = get_nested_index(instances, [0, 0])
|
|
76
|
+
|
|
77
|
+
def list_tables(self):
|
|
78
|
+
if not self.instance:
|
|
79
|
+
raise SourceConnectionException(
|
|
80
|
+
f"No instances found in project {self.project_id}"
|
|
81
|
+
)
|
|
82
|
+
tables = list(self.instance.list_tables())
|
|
83
|
+
self.table = tables[0]
|
|
84
|
+
|
|
85
|
+
def get_row(self):
|
|
86
|
+
if not self.table:
|
|
87
|
+
raise SourceConnectionException(
|
|
88
|
+
f"No tables found in project {self.instance.project_id} and instance {self.instance.instance_id}"
|
|
89
|
+
)
|
|
90
|
+
self.table.read_rows(limit=1)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_connection(
|
|
94
|
+
metadata: OpenMetadata,
|
|
95
|
+
client: MultiProjectClient,
|
|
96
|
+
service_connection: BigTableConnection,
|
|
97
|
+
automation_workflow: Optional[AutomationWorkflow] = None,
|
|
98
|
+
) -> None:
|
|
99
|
+
"""
|
|
100
|
+
Test connection. This can be executed either as part
|
|
101
|
+
of a metadata workflow or during an Automation Workflow
|
|
102
|
+
"""
|
|
103
|
+
tester = Tester(client)
|
|
104
|
+
|
|
105
|
+
test_fn = {
|
|
106
|
+
"GetInstances": tester.list_instances,
|
|
107
|
+
"GetTables": tester.list_tables,
|
|
108
|
+
"GetRows": tester.get_row,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
test_connection_steps(
|
|
112
|
+
metadata=metadata,
|
|
113
|
+
test_fn=test_fn,
|
|
114
|
+
service_type=service_connection.type.value,
|
|
115
|
+
automation_workflow=automation_workflow,
|
|
116
|
+
)
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# Copyright 2024 Collate
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Bigtable source methods.
|
|
13
|
+
"""
|
|
14
|
+
import traceback
|
|
15
|
+
from typing import Dict, Iterable, List, Optional, Union
|
|
16
|
+
|
|
17
|
+
from google.cloud.bigtable import row_filters
|
|
18
|
+
from google.cloud.bigtable.instance import Instance
|
|
19
|
+
from google.cloud.bigtable.table import Table
|
|
20
|
+
|
|
21
|
+
from metadata.generated.schema.entity.data.table import (
|
|
22
|
+
ConstraintType,
|
|
23
|
+
TableConstraint,
|
|
24
|
+
TableType,
|
|
25
|
+
)
|
|
26
|
+
from metadata.generated.schema.entity.services.connections.database.bigTableConnection import (
|
|
27
|
+
BigTableConnection,
|
|
28
|
+
)
|
|
29
|
+
from metadata.generated.schema.metadataIngestion.workflow import (
|
|
30
|
+
Source as WorkflowSource,
|
|
31
|
+
)
|
|
32
|
+
from metadata.ingestion.api.steps import InvalidSourceException
|
|
33
|
+
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
34
|
+
from metadata.ingestion.source.database.bigtable.client import MultiProjectClient
|
|
35
|
+
from metadata.ingestion.source.database.bigtable.models import Row
|
|
36
|
+
from metadata.ingestion.source.database.common_nosql_source import (
|
|
37
|
+
SAMPLE_SIZE as GLOBAL_SAMPLE_SIZE,
|
|
38
|
+
)
|
|
39
|
+
from metadata.ingestion.source.database.common_nosql_source import CommonNoSQLSource
|
|
40
|
+
from metadata.ingestion.source.database.multi_db_source import MultiDBSource
|
|
41
|
+
from metadata.utils.logger import ingestion_logger
|
|
42
|
+
|
|
43
|
+
logger = ingestion_logger()
|
|
44
|
+
|
|
45
|
+
# BigTable group's its columns in column families. We make an assumption that if the table has a big number of
|
|
46
|
+
# columns, we at least get a sample of the first 100 column families.
|
|
47
|
+
MAX_COLUMN_FAMILIES = 100
|
|
48
|
+
SAMPLES_PER_COLUMN_FAMILY = 100
|
|
49
|
+
|
|
50
|
+
ProjectId = str
|
|
51
|
+
InstanceId = str
|
|
52
|
+
TableId = str
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class BigtableSource(CommonNoSQLSource, MultiDBSource):
|
|
56
|
+
"""
|
|
57
|
+
Implements the necessary methods to extract database metadata from Google BigTable Source.
|
|
58
|
+
BigTable is a NoSQL database service for handling large amounts of data. Tha mapping is as follows:
|
|
59
|
+
project -> instance -> table -> column_family.column
|
|
60
|
+
(database) (schema)
|
|
61
|
+
For more infor about BigTable: https://cloud.google.com/bigtable/?hl=en
|
|
62
|
+
All data types are registered as bytes.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self, config: WorkflowSource, metadata: OpenMetadata):
|
|
66
|
+
super().__init__(config, metadata)
|
|
67
|
+
self.client: MultiProjectClient = self.connection_obj
|
|
68
|
+
|
|
69
|
+
# ths instances and tables are cached to avoid making redundant requests to the API.
|
|
70
|
+
self.instances: Dict[ProjectId, Dict[InstanceId, Instance]] = {}
|
|
71
|
+
self.tables: Dict[ProjectId, Dict[InstanceId, Dict[TableId, Table]]] = {}
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def create(cls, config_dict, metadata: OpenMetadata):
|
|
75
|
+
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
|
76
|
+
connection: BigTableConnection = config.serviceConnection.__root__.config
|
|
77
|
+
if not isinstance(connection, BigTableConnection):
|
|
78
|
+
raise InvalidSourceException(
|
|
79
|
+
f"Expected BigTableConnection, but got {connection}"
|
|
80
|
+
)
|
|
81
|
+
return cls(config, metadata)
|
|
82
|
+
|
|
83
|
+
def get_configured_database(self) -> Optional[str]:
|
|
84
|
+
"""
|
|
85
|
+
This connector uses "virtual databases" in the form of GCP projects.
|
|
86
|
+
The concept of a default project for the GCP client is not useful here because the project ID
|
|
87
|
+
is always an explicit part of the connection. Therefore, this method returns None and the databases
|
|
88
|
+
are resolved using `self.get_database_names`.
|
|
89
|
+
"""
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
def get_database_names(self) -> Iterable[str]:
|
|
93
|
+
return self.get_database_names_raw()
|
|
94
|
+
|
|
95
|
+
def get_database_names_raw(self) -> Iterable[str]:
|
|
96
|
+
yield from self.client.project_ids()
|
|
97
|
+
|
|
98
|
+
def get_schema_name_list(self) -> List[str]:
|
|
99
|
+
project_id = self.context.database
|
|
100
|
+
try:
|
|
101
|
+
# the first element is a list of instances
|
|
102
|
+
# the second element is another collection (seems empty) and I do not know what is its purpose
|
|
103
|
+
instances, _ = self.client.list_instances(project_id=project_id)
|
|
104
|
+
self.instances[project_id] = {
|
|
105
|
+
instance.instance_id: instance for instance in instances
|
|
106
|
+
}
|
|
107
|
+
return list(self.instances[project_id].keys())
|
|
108
|
+
except Exception as err:
|
|
109
|
+
logger.debug(traceback.format_exc())
|
|
110
|
+
logger.error(
|
|
111
|
+
f"Failed to list BigTable instances in project {project_id}: {err}"
|
|
112
|
+
)
|
|
113
|
+
raise
|
|
114
|
+
|
|
115
|
+
def get_table_name_list(self, schema_name: str) -> List[str]:
|
|
116
|
+
project_id = self.context.database
|
|
117
|
+
try:
|
|
118
|
+
instance = self._get_instance(project_id, schema_name)
|
|
119
|
+
if instance is None:
|
|
120
|
+
raise RuntimeError(f"Instance {project_id}/{schema_name} not found.")
|
|
121
|
+
tables = instance.list_tables()
|
|
122
|
+
for table in tables:
|
|
123
|
+
self._set_nested(
|
|
124
|
+
self.tables,
|
|
125
|
+
[project_id, instance.instance_id, table.table_id],
|
|
126
|
+
table,
|
|
127
|
+
)
|
|
128
|
+
return list(self.tables[project_id][schema_name].keys())
|
|
129
|
+
except Exception as err:
|
|
130
|
+
logger.debug(traceback.format_exc())
|
|
131
|
+
# add context to the error message
|
|
132
|
+
logger.error(
|
|
133
|
+
f"Failed to list BigTable table names in {project_id}.{schema_name}: {err}"
|
|
134
|
+
)
|
|
135
|
+
return []
|
|
136
|
+
|
|
137
|
+
def get_table_constraints(
|
|
138
|
+
self, db_name: str, schema_name: str, table_name: str
|
|
139
|
+
) -> List[TableConstraint]:
|
|
140
|
+
return [
|
|
141
|
+
TableConstraint(
|
|
142
|
+
constraintType=ConstraintType.PRIMARY_KEY, columns=["row_key"]
|
|
143
|
+
)
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
def get_table_columns_dict(
|
|
147
|
+
self, schema_name: str, table_name: str
|
|
148
|
+
) -> Union[List[Dict], Dict]:
|
|
149
|
+
project_id = self.context.database
|
|
150
|
+
try:
|
|
151
|
+
table = self._get_table(project_id, schema_name, table_name)
|
|
152
|
+
if table is None:
|
|
153
|
+
raise RuntimeError(
|
|
154
|
+
f"Table {project_id}/{schema_name}/{table_name} not found."
|
|
155
|
+
)
|
|
156
|
+
column_families = table.list_column_families()
|
|
157
|
+
# all BigTable tables have a "row_key" column. Even if there are no records in the table.
|
|
158
|
+
records = [{"row_key": b"row_key"}]
|
|
159
|
+
# In order to get a "good" sample of data, we try to distribute the sampling
|
|
160
|
+
# across multiple column families.
|
|
161
|
+
for column_family in list(column_families.keys())[:MAX_COLUMN_FAMILIES]:
|
|
162
|
+
records.extend(
|
|
163
|
+
self._get_records_for_column_family(
|
|
164
|
+
table, column_family, SAMPLES_PER_COLUMN_FAMILY
|
|
165
|
+
)
|
|
166
|
+
)
|
|
167
|
+
if len(records) >= GLOBAL_SAMPLE_SIZE:
|
|
168
|
+
break
|
|
169
|
+
return records
|
|
170
|
+
except Exception as err:
|
|
171
|
+
logger.debug(traceback.format_exc())
|
|
172
|
+
logger.warning(
|
|
173
|
+
f"Failed to read BigTable rows for [{project_id}.{schema_name}.{table_name}]: {err}"
|
|
174
|
+
)
|
|
175
|
+
return []
|
|
176
|
+
|
|
177
|
+
def get_source_url(
|
|
178
|
+
self,
|
|
179
|
+
database_name: Optional[str] = None,
|
|
180
|
+
schema_name: Optional[str] = None,
|
|
181
|
+
table_name: Optional[str] = None,
|
|
182
|
+
table_type: Optional[TableType] = None,
|
|
183
|
+
) -> Optional[str]:
|
|
184
|
+
"""
|
|
185
|
+
Method to get the source url for a BigTable table
|
|
186
|
+
"""
|
|
187
|
+
try:
|
|
188
|
+
if schema_name and table_name:
|
|
189
|
+
return (
|
|
190
|
+
"https://console.cloud.google.com/bigtable/instances/"
|
|
191
|
+
f"{schema_name}/tables/{table_name}/overview?project={database_name}"
|
|
192
|
+
)
|
|
193
|
+
except Exception as exc:
|
|
194
|
+
logger.debug(traceback.format_exc())
|
|
195
|
+
logger.error(f"Unable to get source url: {exc}")
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def _set_nested(dct: dict, keys: List[str], value: any) -> None:
|
|
200
|
+
for key in keys[:-1]:
|
|
201
|
+
dct = dct.setdefault(key, {})
|
|
202
|
+
dct[keys[-1]] = value
|
|
203
|
+
|
|
204
|
+
@staticmethod
|
|
205
|
+
def _get_records_for_column_family(
|
|
206
|
+
table: Table, column_family: str, limit: int
|
|
207
|
+
) -> List[Dict]:
|
|
208
|
+
filter_ = row_filters.ColumnRangeFilter(column_family_id=column_family)
|
|
209
|
+
rows = table.read_rows(limit=limit, filter_=filter_)
|
|
210
|
+
return [Row.from_partial_row(row).to_record() for row in rows]
|
|
211
|
+
|
|
212
|
+
def _get_table(
|
|
213
|
+
self, project_id: str, schema_name: str, table_name: str
|
|
214
|
+
) -> Optional[Table]:
|
|
215
|
+
try:
|
|
216
|
+
return self.tables[project_id][schema_name][table_name]
|
|
217
|
+
except KeyError:
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
def _get_instance(self, project_id: str, schema_name: str) -> Optional[Instance]:
|
|
221
|
+
try:
|
|
222
|
+
return self.instances[project_id][schema_name]
|
|
223
|
+
except KeyError:
|
|
224
|
+
return None
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Copyright 2024 Collate
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Bigtable source models.
|
|
13
|
+
"""
|
|
14
|
+
from typing import Dict, List
|
|
15
|
+
|
|
16
|
+
from google.cloud.bigtable.row import PartialRowData
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Value(BaseModel):
|
|
21
|
+
"""A Bigtable cell value."""
|
|
22
|
+
|
|
23
|
+
timestamp: int
|
|
24
|
+
value: bytes
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Cell(BaseModel):
|
|
28
|
+
"""A Bigtable cell."""
|
|
29
|
+
|
|
30
|
+
values: List[Value]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Row(BaseModel):
|
|
34
|
+
"""A Bigtable row."""
|
|
35
|
+
|
|
36
|
+
cells: Dict[str, Dict[bytes, Cell]]
|
|
37
|
+
row_key: bytes
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def from_partial_row(cls, row: PartialRowData):
|
|
41
|
+
cells = {}
|
|
42
|
+
for column_family, cf_cells in row.cells.items():
|
|
43
|
+
cells.setdefault(column_family, {})
|
|
44
|
+
for column, cell in cf_cells.items():
|
|
45
|
+
cells[column_family][column] = Cell(
|
|
46
|
+
values=[Value(timestamp=c.timestamp, value=c.value) for c in cell]
|
|
47
|
+
)
|
|
48
|
+
return cls(cells=cells, row_key=row.row_key)
|
|
49
|
+
|
|
50
|
+
def to_record(self) -> Dict[str, bytes]:
|
|
51
|
+
record = {}
|
|
52
|
+
for column_family, cells in self.cells.items():
|
|
53
|
+
for column, cell in cells.items():
|
|
54
|
+
# Since each cell can have multiple values and the API returns them in descending order
|
|
55
|
+
# from latest to oldest, we only take the latest value. This probably does not matter since
|
|
56
|
+
# all we care about is data types and all data stored in BigTable is of type `bytes`.
|
|
57
|
+
record[f"{column_family}.{column.decode()}"] = cell.values[0].value
|
|
58
|
+
record["row_key"] = self.row_key
|
|
59
|
+
|
|
60
|
+
return record
|
|
@@ -62,8 +62,8 @@ from metadata.ingestion.source.database.stored_procedures_mixin import QueryByPr
|
|
|
62
62
|
from metadata.ingestion.source.models import TableView
|
|
63
63
|
from metadata.utils import fqn
|
|
64
64
|
from metadata.utils.db_utils import get_view_lineage
|
|
65
|
+
from metadata.utils.execution_time_tracker import calculate_execution_time_generator
|
|
65
66
|
from metadata.utils.filters import filter_by_table
|
|
66
|
-
from metadata.utils.helpers import calculate_execution_time_generator
|
|
67
67
|
from metadata.utils.logger import ingestion_logger
|
|
68
68
|
|
|
69
69
|
logger = ingestion_logger()
|
|
@@ -405,7 +405,7 @@ class CommonDbSourceService(
|
|
|
405
405
|
"""Not Implemented"""
|
|
406
406
|
yield from []
|
|
407
407
|
|
|
408
|
-
@calculate_execution_time_generator
|
|
408
|
+
@calculate_execution_time_generator(store=False)
|
|
409
409
|
def yield_table(
|
|
410
410
|
self, table_name_and_type: Tuple[str, str]
|
|
411
411
|
) -> Iterable[Either[CreateTableRequest]]:
|
|
@@ -28,7 +28,11 @@ from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
|
|
28
28
|
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
|
29
29
|
from metadata.generated.schema.entity.data.database import Database
|
|
30
30
|
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
|
31
|
-
from metadata.generated.schema.entity.data.table import
|
|
31
|
+
from metadata.generated.schema.entity.data.table import (
|
|
32
|
+
Table,
|
|
33
|
+
TableConstraint,
|
|
34
|
+
TableType,
|
|
35
|
+
)
|
|
32
36
|
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
|
|
33
37
|
StackTraceError,
|
|
34
38
|
)
|
|
@@ -203,6 +207,15 @@ class CommonNoSQLSource(DatabaseServiceSource, ABC):
|
|
|
203
207
|
need to be overridden by sources
|
|
204
208
|
"""
|
|
205
209
|
|
|
210
|
+
def get_table_constraints(
|
|
211
|
+
self,
|
|
212
|
+
db_name: str,
|
|
213
|
+
schema_name: str,
|
|
214
|
+
table_name: str,
|
|
215
|
+
) -> Optional[List[TableConstraint]]:
|
|
216
|
+
# pylint: disable=unused-argument
|
|
217
|
+
return None
|
|
218
|
+
|
|
206
219
|
def yield_table(
|
|
207
220
|
self, table_name_and_type: Tuple[str, str]
|
|
208
221
|
) -> Iterable[Either[CreateTableRequest]]:
|
|
@@ -223,7 +236,11 @@ class CommonNoSQLSource(DatabaseServiceSource, ABC):
|
|
|
223
236
|
name=table_name,
|
|
224
237
|
tableType=table_type,
|
|
225
238
|
columns=columns,
|
|
226
|
-
tableConstraints=
|
|
239
|
+
tableConstraints=self.get_table_constraints(
|
|
240
|
+
schema_name=schema_name,
|
|
241
|
+
table_name=table_name,
|
|
242
|
+
db_name=self.context.database,
|
|
243
|
+
),
|
|
227
244
|
databaseSchema=fqn.build(
|
|
228
245
|
metadata=self.metadata,
|
|
229
246
|
entity_type=DatabaseSchema,
|