openmetadata-ingestion 1.3.0.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openmetadata-ingestion might be problematic. Click here for more details.
- metadata/cli/db_dump.py +1 -0
- metadata/data_insight/processor/reports/cost_analysis_report_data_processor.py +39 -47
- metadata/data_insight/processor/reports/data_processor.py +1 -0
- metadata/data_insight/producer/cost_analysis_producer.py +78 -14
- metadata/data_insight/producer/entity_producer.py +1 -1
- metadata/data_insight/producer/producer_interface.py +1 -1
- metadata/data_insight/producer/web_analytics_producer.py +1 -1
- metadata/data_insight/source/metadata.py +10 -1
- metadata/data_quality/validations/table/base/tableColumnToMatchSet.py +2 -1
- metadata/data_quality/validations/table/pandas/tableColumnToMatchSet.py +2 -1
- metadata/data_quality/validations/table/sqlalchemy/tableColumnToMatchSet.py +7 -2
- metadata/examples/workflows/bigtable.yaml +32 -0
- metadata/generated/antlr/EntityLinkLexer.py +353 -319
- metadata/generated/schema/analytics/__init__.py +1 -1
- metadata/generated/schema/analytics/basic.py +1 -1
- metadata/generated/schema/analytics/reportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
- metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
- metadata/generated/schema/api/__init__.py +1 -1
- metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
- metadata/generated/schema/api/analytics/__init__.py +1 -1
- metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
- metadata/generated/schema/api/automations/__init__.py +1 -1
- metadata/generated/schema/api/automations/createWorkflow.py +1 -1
- metadata/generated/schema/api/bulkAssets.py +1 -1
- metadata/generated/schema/api/classification/__init__.py +1 -1
- metadata/generated/schema/api/classification/createClassification.py +1 -1
- metadata/generated/schema/api/classification/createTag.py +1 -1
- metadata/generated/schema/api/classification/loadTags.py +1 -1
- metadata/generated/schema/api/createBot.py +1 -1
- metadata/generated/schema/api/createEventPublisherJob.py +2 -2
- metadata/generated/schema/api/createType.py +1 -1
- metadata/generated/schema/api/data/__init__.py +1 -1
- metadata/generated/schema/api/data/createChart.py +1 -1
- metadata/generated/schema/api/data/createContainer.py +1 -1
- metadata/generated/schema/api/data/createCustomProperty.py +6 -10
- metadata/generated/schema/api/data/createDashboard.py +1 -1
- metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
- metadata/generated/schema/api/data/createDatabase.py +1 -1
- metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
- metadata/generated/schema/api/data/createGlossary.py +1 -1
- metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
- metadata/generated/schema/api/data/createMlModel.py +1 -1
- metadata/generated/schema/api/data/createPipeline.py +1 -1
- metadata/generated/schema/api/data/createQuery.py +1 -1
- metadata/generated/schema/api/data/createSearchIndex.py +1 -1
- metadata/generated/schema/api/data/createStoredProcedure.py +1 -1
- metadata/generated/schema/api/data/createTable.py +1 -1
- metadata/generated/schema/api/data/createTableProfile.py +1 -1
- metadata/generated/schema/api/data/createTopic.py +1 -1
- metadata/generated/schema/api/data/loadGlossary.py +1 -1
- metadata/generated/schema/api/data/restoreEntity.py +1 -1
- metadata/generated/schema/api/dataInsight/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
- metadata/generated/schema/api/docStore/__init__.py +1 -1
- metadata/generated/schema/api/docStore/createDocument.py +1 -1
- metadata/generated/schema/api/domains/__init__.py +1 -1
- metadata/generated/schema/api/domains/createDataProduct.py +1 -1
- metadata/generated/schema/api/domains/createDomain.py +1 -1
- metadata/generated/schema/api/feed/__init__.py +1 -1
- metadata/generated/schema/api/feed/closeTask.py +1 -1
- metadata/generated/schema/api/feed/createPost.py +1 -1
- metadata/generated/schema/api/feed/createSuggestion.py +1 -1
- metadata/generated/schema/api/feed/createThread.py +1 -1
- metadata/generated/schema/api/feed/resolveTask.py +1 -1
- metadata/generated/schema/api/feed/threadCount.py +1 -1
- metadata/generated/schema/api/lineage/__init__.py +1 -1
- metadata/generated/schema/api/lineage/addLineage.py +1 -1
- metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
- metadata/generated/schema/api/policies/__init__.py +1 -1
- metadata/generated/schema/api/policies/createPolicy.py +1 -1
- metadata/generated/schema/api/services/__init__.py +1 -1
- metadata/generated/schema/api/services/createDashboardService.py +1 -1
- metadata/generated/schema/api/services/createDatabaseService.py +1 -1
- metadata/generated/schema/api/services/createMessagingService.py +1 -1
- metadata/generated/schema/api/services/createMetadataService.py +1 -1
- metadata/generated/schema/api/services/createMlModelService.py +1 -1
- metadata/generated/schema/api/services/createPipelineService.py +1 -1
- metadata/generated/schema/api/services/createSearchService.py +1 -1
- metadata/generated/schema/api/services/createStorageService.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +1 -1
- metadata/generated/schema/api/setOwner.py +1 -1
- metadata/generated/schema/api/teams/__init__.py +1 -1
- metadata/generated/schema/api/teams/createPersona.py +1 -1
- metadata/generated/schema/api/teams/createRole.py +1 -1
- metadata/generated/schema/api/teams/createTeam.py +1 -1
- metadata/generated/schema/api/teams/createUser.py +1 -1
- metadata/generated/schema/api/tests/__init__.py +1 -1
- metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
- metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
- metadata/generated/schema/api/tests/createTestCase.py +1 -1
- metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
- metadata/generated/schema/api/tests/createTestDefinition.py +2 -1
- metadata/generated/schema/api/tests/createTestSuite.py +1 -1
- metadata/generated/schema/api/voteRequest.py +1 -1
- metadata/generated/schema/auth/__init__.py +1 -1
- metadata/generated/schema/auth/basicAuth.py +1 -1
- metadata/generated/schema/auth/basicLoginRequest.py +1 -1
- metadata/generated/schema/auth/changePasswordRequest.py +1 -1
- metadata/generated/schema/auth/createPersonalToken.py +1 -1
- metadata/generated/schema/auth/emailRequest.py +1 -1
- metadata/generated/schema/auth/emailVerificationToken.py +1 -1
- metadata/generated/schema/auth/generateToken.py +1 -1
- metadata/generated/schema/auth/jwtAuth.py +1 -1
- metadata/generated/schema/auth/loginRequest.py +1 -1
- metadata/generated/schema/auth/logoutRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetToken.py +1 -1
- metadata/generated/schema/auth/personalAccessToken.py +1 -1
- metadata/generated/schema/auth/refreshToken.py +1 -1
- metadata/generated/schema/auth/registrationRequest.py +1 -1
- metadata/generated/schema/auth/revokePersonalToken.py +1 -1
- metadata/generated/schema/auth/revokeToken.py +1 -1
- metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
- metadata/generated/schema/auth/ssoAuth.py +1 -1
- metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
- metadata/generated/schema/configuration/__init__.py +1 -1
- metadata/generated/schema/configuration/appsPrivateConfiguration.py +1 -1
- metadata/generated/schema/configuration/authConfig.py +1 -1
- metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
- metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
- metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/dataQualityConfiguration.py +16 -0
- metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
- metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
- metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
- metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
- metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
- metadata/generated/schema/configuration/loginConfiguration.py +1 -1
- metadata/generated/schema/configuration/logoConfiguration.py +1 -1
- metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
- metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
- metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
- metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
- metadata/generated/schema/dataInsight/__init__.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
- metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
- metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
- metadata/generated/schema/dataInsight/type/__init__.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithDescriptionByType.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithOwnerByType.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfServicesWithDescription.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfServicesWithOwner.py +1 -1
- metadata/generated/schema/dataInsight/type/totalEntitiesByTier.py +1 -1
- metadata/generated/schema/dataInsight/type/totalEntitiesByType.py +1 -1
- metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
- metadata/generated/schema/email/__init__.py +1 -1
- metadata/generated/schema/email/emailRequest.py +1 -1
- metadata/generated/schema/email/smtpSettings.py +1 -1
- metadata/generated/schema/entity/__init__.py +1 -1
- metadata/generated/schema/entity/applications/__init__.py +1 -1
- metadata/generated/schema/entity/applications/app.py +1 -1
- metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
- metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/applicationConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/autoTaggerAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/metaPilotAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +2 -2
- metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/metaPilotAppPrivateConfig.py +1 -1
- metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
- metadata/generated/schema/entity/applications/jobStatus.py +1 -1
- metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +1 -1
- metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
- metadata/generated/schema/entity/automations/__init__.py +1 -1
- metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
- metadata/generated/schema/entity/automations/workflow.py +1 -1
- metadata/generated/schema/entity/bot.py +1 -1
- metadata/generated/schema/entity/classification/__init__.py +1 -1
- metadata/generated/schema/entity/classification/classification.py +1 -1
- metadata/generated/schema/entity/classification/tag.py +1 -1
- metadata/generated/schema/entity/data/__init__.py +1 -1
- metadata/generated/schema/entity/data/chart.py +1 -1
- metadata/generated/schema/entity/data/container.py +1 -1
- metadata/generated/schema/entity/data/dashboard.py +1 -1
- metadata/generated/schema/entity/data/dashboardDataModel.py +1 -1
- metadata/generated/schema/entity/data/database.py +1 -1
- metadata/generated/schema/entity/data/databaseSchema.py +1 -1
- metadata/generated/schema/entity/data/glossary.py +1 -1
- metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
- metadata/generated/schema/entity/data/metrics.py +1 -1
- metadata/generated/schema/entity/data/mlmodel.py +2 -2
- metadata/generated/schema/entity/data/pipeline.py +1 -1
- metadata/generated/schema/entity/data/query.py +1 -1
- metadata/generated/schema/entity/data/report.py +1 -1
- metadata/generated/schema/entity/data/searchIndex.py +1 -1
- metadata/generated/schema/entity/data/storedProcedure.py +1 -1
- metadata/generated/schema/entity/data/table.py +1 -1
- metadata/generated/schema/entity/data/topic.py +1 -1
- metadata/generated/schema/entity/docStore/__init__.py +1 -1
- metadata/generated/schema/entity/docStore/document.py +1 -1
- metadata/generated/schema/entity/domains/__init__.py +1 -1
- metadata/generated/schema/entity/domains/dataProduct.py +1 -1
- metadata/generated/schema/entity/domains/domain.py +1 -1
- metadata/generated/schema/entity/events/__init__.py +1 -1
- metadata/generated/schema/entity/events/webhook.py +1 -1
- metadata/generated/schema/entity/feed/__init__.py +1 -1
- metadata/generated/schema/entity/feed/suggestion.py +1 -1
- metadata/generated/schema/entity/feed/thread.py +1 -1
- metadata/generated/schema/entity/policies/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
- metadata/generated/schema/entity/policies/filters.py +1 -1
- metadata/generated/schema/entity/policies/policy.py +1 -1
- metadata/generated/schema/entity/services/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/connectionBasicType.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/mstrConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +41 -0
- metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +8 -6
- metadata/generated/schema/entity/services/connections/database/postgresConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +14 -2
- metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +14 -2
- metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +2 -2
- metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
- metadata/generated/schema/entity/services/dashboardService.py +1 -1
- metadata/generated/schema/entity/services/databaseService.py +4 -1
- metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
- metadata/generated/schema/entity/services/messagingService.py +1 -1
- metadata/generated/schema/entity/services/metadataService.py +1 -1
- metadata/generated/schema/entity/services/mlmodelService.py +1 -1
- metadata/generated/schema/entity/services/pipelineService.py +1 -1
- metadata/generated/schema/entity/services/searchService.py +1 -1
- metadata/generated/schema/entity/services/serviceType.py +1 -1
- metadata/generated/schema/entity/services/storageService.py +1 -1
- metadata/generated/schema/entity/teams/__init__.py +1 -1
- metadata/generated/schema/entity/teams/persona.py +1 -1
- metadata/generated/schema/entity/teams/role.py +1 -1
- metadata/generated/schema/entity/teams/team.py +1 -1
- metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
- metadata/generated/schema/entity/teams/user.py +1 -1
- metadata/generated/schema/entity/type.py +3 -18
- metadata/generated/schema/entity/utils/__init__.py +1 -1
- metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
- metadata/generated/schema/entity/utils/servicesCount.py +1 -1
- metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
- metadata/generated/schema/events/__init__.py +1 -1
- metadata/generated/schema/events/alertMetrics.py +1 -1
- metadata/generated/schema/events/api/__init__.py +1 -1
- metadata/generated/schema/events/api/createEventSubscription.py +1 -1
- metadata/generated/schema/events/emailAlertConfig.py +1 -1
- metadata/generated/schema/events/eventFilterRule.py +1 -1
- metadata/generated/schema/events/eventSubscription.py +5 -3
- metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
- metadata/generated/schema/events/failedEvent.py +1 -1
- metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
- metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
- metadata/generated/schema/metadataIngestion/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/application.py +1 -1
- metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/workflow.py +1 -1
- metadata/generated/schema/monitoring/__init__.py +1 -1
- metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
- metadata/generated/schema/security/__init__.py +1 -1
- metadata/generated/schema/security/client/__init__.py +1 -1
- metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
- metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
- metadata/generated/schema/security/credentials/__init__.py +1 -1
- metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
- metadata/generated/schema/security/credentials/azureCredentials.py +16 -9
- metadata/generated/schema/security/credentials/basicAuth.py +1 -1
- metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpValues.py +1 -1
- metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
- metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
- metadata/generated/schema/security/secrets/__init__.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerProvider.py +3 -1
- metadata/generated/schema/security/securityConfiguration.py +1 -1
- metadata/generated/schema/security/ssl/__init__.py +1 -1
- metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
- metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
- metadata/generated/schema/settings/__init__.py +1 -1
- metadata/generated/schema/settings/settings.py +1 -1
- metadata/generated/schema/system/__init__.py +1 -1
- metadata/generated/schema/system/entityError.py +1 -1
- metadata/generated/schema/system/eventPublisherJob.py +1 -1
- metadata/generated/schema/system/indexingError.py +1 -1
- metadata/generated/schema/system/ui/__init__.py +1 -1
- metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
- metadata/generated/schema/system/ui/page.py +1 -1
- metadata/generated/schema/tests/__init__.py +1 -1
- metadata/generated/schema/tests/assigned.py +1 -1
- metadata/generated/schema/tests/basic.py +1 -1
- metadata/generated/schema/tests/customMetric.py +1 -1
- metadata/generated/schema/tests/resolved.py +1 -1
- metadata/generated/schema/tests/testCase.py +1 -1
- metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
- metadata/generated/schema/tests/testDefinition.py +2 -1
- metadata/generated/schema/tests/testSuite.py +1 -1
- metadata/generated/schema/type/__init__.py +1 -1
- metadata/generated/schema/type/auditLog.py +1 -1
- metadata/generated/schema/type/basic.py +6 -2
- metadata/generated/schema/type/bulkOperationResult.py +1 -1
- metadata/generated/schema/type/changeEvent.py +1 -1
- metadata/generated/schema/type/changeEventType.py +1 -1
- metadata/generated/schema/type/collectionDescriptor.py +1 -1
- metadata/generated/schema/type/csvDocumentation.py +1 -1
- metadata/generated/schema/type/csvErrorType.py +1 -1
- metadata/generated/schema/type/csvFile.py +1 -1
- metadata/generated/schema/type/csvImportResult.py +1 -1
- metadata/generated/schema/type/customProperties/__init__.py +3 -0
- metadata/generated/schema/type/customProperties/enumConfig.py +17 -0
- metadata/generated/schema/type/customProperty.py +52 -0
- metadata/generated/schema/type/dailyCount.py +1 -1
- metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
- metadata/generated/schema/type/entityHistory.py +1 -1
- metadata/generated/schema/type/entityLineage.py +1 -1
- metadata/generated/schema/type/entityReference.py +1 -1
- metadata/generated/schema/type/entityReferenceList.py +1 -1
- metadata/generated/schema/type/entityRelationship.py +1 -1
- metadata/generated/schema/type/entityUsage.py +1 -1
- metadata/generated/schema/type/filterPattern.py +1 -1
- metadata/generated/schema/type/function.py +1 -1
- metadata/generated/schema/type/include.py +1 -1
- metadata/generated/schema/type/jdbcConnection.py +1 -1
- metadata/generated/schema/type/lifeCycle.py +1 -1
- metadata/generated/schema/type/paging.py +1 -1
- metadata/generated/schema/type/profile.py +1 -1
- metadata/generated/schema/type/queryParserData.py +1 -1
- metadata/generated/schema/type/reaction.py +1 -1
- metadata/generated/schema/type/schedule.py +1 -1
- metadata/generated/schema/type/schema.py +1 -1
- metadata/generated/schema/type/tableQuery.py +1 -1
- metadata/generated/schema/type/tableUsageCount.py +1 -1
- metadata/generated/schema/type/tagLabel.py +1 -1
- metadata/generated/schema/type/usageDetails.py +1 -1
- metadata/generated/schema/type/usageRequest.py +1 -1
- metadata/generated/schema/type/votes.py +1 -1
- metadata/great_expectations/action.py +4 -15
- metadata/ingestion/api/steps.py +14 -1
- metadata/ingestion/api/topology_runner.py +4 -1
- metadata/ingestion/models/custom_properties.py +0 -1
- metadata/ingestion/models/patch_request.py +61 -9
- metadata/ingestion/ometa/client.py +6 -0
- metadata/ingestion/ometa/mixins/custom_property_mixin.py +11 -11
- metadata/ingestion/ometa/mixins/patch_mixin.py +2 -0
- metadata/ingestion/ometa/ometa_api.py +1 -1
- metadata/ingestion/sink/metadata_rest.py +4 -2
- metadata/ingestion/source/dashboard/looker/metadata.py +3 -4
- metadata/ingestion/source/dashboard/metabase/client.py +4 -0
- metadata/ingestion/source/dashboard/metabase/metadata.py +5 -4
- metadata/ingestion/source/dashboard/metabase/models.py +2 -2
- metadata/ingestion/source/dashboard/tableau/metadata.py +18 -0
- metadata/ingestion/source/database/bigquery/helper.py +68 -1
- metadata/ingestion/source/database/bigquery/metadata.py +12 -3
- metadata/ingestion/source/database/bigquery/queries.py +22 -0
- metadata/ingestion/source/database/bigtable/client.py +62 -0
- metadata/ingestion/source/database/bigtable/connection.py +116 -0
- metadata/ingestion/source/database/bigtable/metadata.py +224 -0
- metadata/ingestion/source/database/bigtable/models.py +60 -0
- metadata/ingestion/source/database/common_db_source.py +2 -2
- metadata/ingestion/source/database/common_nosql_source.py +19 -2
- metadata/ingestion/source/database/databricks/metadata.py +132 -46
- metadata/ingestion/source/database/databricks/queries.py +3 -4
- metadata/ingestion/source/database/dbt/metadata.py +16 -28
- metadata/ingestion/source/database/oracle/queries.py +2 -2
- metadata/ingestion/source/messaging/common_broker_source.py +9 -7
- metadata/ingestion/source/messaging/kafka/connection.py +45 -4
- metadata/ingestion/source/mlmodel/sagemaker/metadata.py +20 -8
- metadata/profiler/orm/functions/conn_test.py +1 -0
- metadata/profiler/orm/functions/sum.py +1 -0
- metadata/profiler/orm/registry.py +1 -0
- metadata/profiler/processor/core.py +2 -2
- metadata/utils/datalake/datalake_utils.py +7 -1
- metadata/utils/execution_time_tracker.py +199 -0
- metadata/utils/filters.py +4 -0
- metadata/utils/helpers.py +0 -51
- metadata/utils/secrets/aws_based_secrets_manager.py +67 -4
- metadata/utils/secrets/aws_secrets_manager.py +7 -2
- metadata/utils/secrets/aws_ssm_secrets_manager.py +7 -2
- metadata/utils/secrets/azure_kv_secrets_manager.py +148 -0
- metadata/utils/secrets/external_secrets_manager.py +25 -3
- metadata/utils/secrets/secrets_manager_factory.py +13 -30
- metadata/workflow/base.py +4 -0
- metadata/workflow/output_handler.py +22 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/METADATA +298 -289
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/RECORD +575 -564
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/WHEEL +1 -1
- metadata/utils/secrets/client/loader.py +0 -77
- /metadata/{utils/secrets/client → ingestion/source/database/bigtable}/__init__.py +0 -0
- /metadata/utils/secrets/{noop_secrets_manager.py → db_secrets_manager.py} +0 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/LICENSE +0 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/entry_points.txt +0 -0
- {openmetadata_ingestion-1.3.0.1.dist-info → openmetadata_ingestion-1.3.1.1.dist-info}/top_level.txt +0 -0
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
import re
|
|
14
14
|
import traceback
|
|
15
15
|
from copy import deepcopy
|
|
16
|
-
from typing import Iterable, Optional, Tuple
|
|
16
|
+
from typing import Iterable, Optional, Tuple, Union
|
|
17
17
|
|
|
18
18
|
from pyhive.sqlalchemy_hive import _type_map
|
|
19
19
|
from sqlalchemy import types, util
|
|
@@ -66,6 +66,7 @@ logger = ingestion_logger()
|
|
|
66
66
|
|
|
67
67
|
DATABRICKS_TAG = "DATABRICK TAG"
|
|
68
68
|
DATABRICKS_TAG_CLASSIFICATION = "DATABRICK TAG CLASSIFICATION"
|
|
69
|
+
DEFAULT_TAG_VALUE = "NONE"
|
|
69
70
|
|
|
70
71
|
|
|
71
72
|
class STRUCT(String):
|
|
@@ -268,6 +269,10 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
268
269
|
super().__init__(config, metadata)
|
|
269
270
|
self.is_older_version = False
|
|
270
271
|
self._init_version()
|
|
272
|
+
self.catalog_tags = {}
|
|
273
|
+
self.schema_tags = {}
|
|
274
|
+
self.table_tags = {}
|
|
275
|
+
self.column_tags = {}
|
|
271
276
|
|
|
272
277
|
def _init_version(self):
|
|
273
278
|
try:
|
|
@@ -314,10 +319,103 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
314
319
|
else:
|
|
315
320
|
yield DEFAULT_DATABASE
|
|
316
321
|
|
|
322
|
+
def _clear_tag_cache(self) -> None:
|
|
323
|
+
"""
|
|
324
|
+
Method to clean any existing tags available in memory
|
|
325
|
+
"""
|
|
326
|
+
self.catalog_tags.clear()
|
|
327
|
+
self.table_tags.clear()
|
|
328
|
+
self.schema_tags.clear()
|
|
329
|
+
self.column_tags.clear()
|
|
330
|
+
|
|
331
|
+
def _add_to_tag_cache(
|
|
332
|
+
self, tag_dict: dict, key: Union[str, Tuple], value: Tuple[str, str]
|
|
333
|
+
):
|
|
334
|
+
if tag_dict.get(key):
|
|
335
|
+
tag_dict.get(key).append(value)
|
|
336
|
+
else:
|
|
337
|
+
tag_dict[key] = [value]
|
|
338
|
+
|
|
339
|
+
def populate_tags_cache(self, database_name: str) -> None:
|
|
340
|
+
"""
|
|
341
|
+
Method to fetch all the tags and populate the relevant caches
|
|
342
|
+
"""
|
|
343
|
+
self._clear_tag_cache()
|
|
344
|
+
if self.source_config.includeTags is False:
|
|
345
|
+
return
|
|
346
|
+
try:
|
|
347
|
+
tags = self.connection.execute(
|
|
348
|
+
DATABRICKS_GET_CATALOGS_TAGS.format(database_name=database_name)
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
for tag in tags:
|
|
352
|
+
self._add_to_tag_cache(
|
|
353
|
+
self.catalog_tags,
|
|
354
|
+
tag.catalog_name,
|
|
355
|
+
# tag value is an optional field, if tag value is not available use default tag value
|
|
356
|
+
(tag.tag_name, tag.tag_value or DEFAULT_TAG_VALUE),
|
|
357
|
+
)
|
|
358
|
+
except Exception as exc:
|
|
359
|
+
logger.debug(f"Failed to fetch catalog tags due to - {exc}")
|
|
360
|
+
|
|
361
|
+
try:
|
|
362
|
+
tags = self.connection.execute(
|
|
363
|
+
DATABRICKS_GET_SCHEMA_TAGS.format(database_name=database_name)
|
|
364
|
+
)
|
|
365
|
+
for tag in tags:
|
|
366
|
+
self._add_to_tag_cache(
|
|
367
|
+
self.schema_tags,
|
|
368
|
+
(tag.catalog_name, tag.schema_name),
|
|
369
|
+
# tag value is an optional field, if tag value is not available use default tag value
|
|
370
|
+
(tag.tag_name, tag.tag_value or DEFAULT_TAG_VALUE),
|
|
371
|
+
)
|
|
372
|
+
except Exception as exc:
|
|
373
|
+
logger.debug(f"Failed to fetch schema tags due to - {exc}")
|
|
374
|
+
|
|
375
|
+
try:
|
|
376
|
+
tags = self.connection.execute(
|
|
377
|
+
DATABRICKS_GET_TABLE_TAGS.format(database_name=database_name)
|
|
378
|
+
)
|
|
379
|
+
for tag in tags:
|
|
380
|
+
self._add_to_tag_cache(
|
|
381
|
+
self.table_tags,
|
|
382
|
+
(tag.catalog_name, tag.schema_name, tag.table_name),
|
|
383
|
+
# tag value is an optional field, if tag value is not available use default tag value
|
|
384
|
+
(tag.tag_name, tag.tag_value or DEFAULT_TAG_VALUE),
|
|
385
|
+
)
|
|
386
|
+
except Exception as exc:
|
|
387
|
+
logger.debug(f"Failed to fetch table tags due to - {exc}")
|
|
388
|
+
|
|
389
|
+
try:
|
|
390
|
+
tags = self.connection.execute(
|
|
391
|
+
DATABRICKS_GET_COLUMN_TAGS.format(database_name=database_name)
|
|
392
|
+
)
|
|
393
|
+
for tag in tags:
|
|
394
|
+
tag_table_id = (tag.catalog_name, tag.schema_name, tag.table_name)
|
|
395
|
+
if self.column_tags.get(tag_table_id):
|
|
396
|
+
self._add_to_tag_cache(
|
|
397
|
+
self.column_tags.get(tag_table_id),
|
|
398
|
+
tag.column_name,
|
|
399
|
+
# tag value is an optional field, if tag value is not available use default tag value
|
|
400
|
+
(tag.tag_name, tag.tag_value or DEFAULT_TAG_VALUE),
|
|
401
|
+
)
|
|
402
|
+
else:
|
|
403
|
+
self.column_tags[tag_table_id] = {
|
|
404
|
+
tag.column_name: [
|
|
405
|
+
(
|
|
406
|
+
tag.tag_name,
|
|
407
|
+
tag.tag_value or DEFAULT_TAG_VALUE,
|
|
408
|
+
)
|
|
409
|
+
]
|
|
410
|
+
}
|
|
411
|
+
except Exception as exc:
|
|
412
|
+
logger.debug(f"Failed to fetch column tags due to - {exc}")
|
|
413
|
+
|
|
317
414
|
def get_database_names(self) -> Iterable[str]:
|
|
318
415
|
configured_catalog = self.service_connection.catalog
|
|
319
416
|
if configured_catalog:
|
|
320
417
|
self.set_inspector(database_name=configured_catalog)
|
|
418
|
+
self.populate_tags_cache(database_name=configured_catalog)
|
|
321
419
|
yield configured_catalog
|
|
322
420
|
else:
|
|
323
421
|
for new_catalog in self.get_database_names_raw():
|
|
@@ -337,6 +435,7 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
337
435
|
continue
|
|
338
436
|
try:
|
|
339
437
|
self.set_inspector(database_name=new_catalog)
|
|
438
|
+
self.populate_tags_cache(database_name=new_catalog)
|
|
340
439
|
yield new_catalog
|
|
341
440
|
except Exception as exc:
|
|
342
441
|
logger.error(traceback.format_exc())
|
|
@@ -361,10 +460,8 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
361
460
|
Method to yield database tags
|
|
362
461
|
"""
|
|
363
462
|
try:
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
)
|
|
367
|
-
for tag in tags:
|
|
463
|
+
catalog_tags = self.catalog_tags.get(database_name, [])
|
|
464
|
+
for tag_name, tag_value in catalog_tags:
|
|
368
465
|
yield from get_ometa_tag_and_classification(
|
|
369
466
|
tag_fqn=fqn.build(
|
|
370
467
|
self.metadata,
|
|
@@ -372,8 +469,8 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
372
469
|
service_name=self.context.database_service,
|
|
373
470
|
database_name=database_name,
|
|
374
471
|
),
|
|
375
|
-
tags=[
|
|
376
|
-
classification_name=
|
|
472
|
+
tags=[tag_value],
|
|
473
|
+
classification_name=tag_name,
|
|
377
474
|
tag_description=DATABRICKS_TAG,
|
|
378
475
|
classification_description=DATABRICKS_TAG_CLASSIFICATION,
|
|
379
476
|
)
|
|
@@ -394,12 +491,8 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
394
491
|
Method to yield schema tags
|
|
395
492
|
"""
|
|
396
493
|
try:
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
database_name=self.context.database, schema_name=schema_name
|
|
400
|
-
)
|
|
401
|
-
)
|
|
402
|
-
for tag in tags:
|
|
494
|
+
schema_tags = self.schema_tags.get((self.context.database, schema_name), [])
|
|
495
|
+
for tag_name, tag_value in schema_tags:
|
|
403
496
|
yield from get_ometa_tag_and_classification(
|
|
404
497
|
tag_fqn=fqn.build(
|
|
405
498
|
self.metadata,
|
|
@@ -408,8 +501,8 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
408
501
|
database_name=self.context.database,
|
|
409
502
|
schema_name=schema_name,
|
|
410
503
|
),
|
|
411
|
-
tags=[
|
|
412
|
-
classification_name=
|
|
504
|
+
tags=[tag_value],
|
|
505
|
+
classification_name=tag_name,
|
|
413
506
|
tag_description=DATABRICKS_TAG,
|
|
414
507
|
classification_description=DATABRICKS_TAG_CLASSIFICATION,
|
|
415
508
|
)
|
|
@@ -428,14 +521,10 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
428
521
|
) -> Iterable[Either[OMetaTagAndClassification]]:
|
|
429
522
|
table_name, _ = table_name_and_type
|
|
430
523
|
try:
|
|
431
|
-
table_tags = self.
|
|
432
|
-
|
|
433
|
-
database_name=self.context.database,
|
|
434
|
-
schema_name=self.context.database_schema,
|
|
435
|
-
table_name=table_name,
|
|
436
|
-
)
|
|
524
|
+
table_tags = self.table_tags.get(
|
|
525
|
+
(self.context.database, self.context.database_schema, table_name), []
|
|
437
526
|
)
|
|
438
|
-
for
|
|
527
|
+
for tag_name, tag_value in table_tags:
|
|
439
528
|
yield from get_ometa_tag_and_classification(
|
|
440
529
|
tag_fqn=fqn.build(
|
|
441
530
|
self.metadata,
|
|
@@ -445,35 +534,32 @@ class DatabricksSource(CommonDbSourceService, MultiDBSource):
|
|
|
445
534
|
schema_name=self.context.database_schema,
|
|
446
535
|
table_name=table_name,
|
|
447
536
|
),
|
|
448
|
-
tags=[
|
|
449
|
-
classification_name=
|
|
537
|
+
tags=[tag_value],
|
|
538
|
+
classification_name=tag_name,
|
|
450
539
|
tag_description=DATABRICKS_TAG,
|
|
451
540
|
classification_description=DATABRICKS_TAG_CLASSIFICATION,
|
|
452
541
|
)
|
|
453
542
|
|
|
454
|
-
column_tags = self.
|
|
455
|
-
|
|
456
|
-
database_name=self.context.database,
|
|
457
|
-
schema_name=self.context.database_schema,
|
|
458
|
-
table_name=table_name,
|
|
459
|
-
)
|
|
543
|
+
column_tags = self.column_tags.get(
|
|
544
|
+
(self.context.database, self.context.database_schema, table_name), {}
|
|
460
545
|
)
|
|
461
|
-
for
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
546
|
+
for column_name, tags in column_tags.items():
|
|
547
|
+
for tag_name, tag_value in tags or []:
|
|
548
|
+
yield from get_ometa_tag_and_classification(
|
|
549
|
+
tag_fqn=fqn.build(
|
|
550
|
+
self.metadata,
|
|
551
|
+
Column,
|
|
552
|
+
service_name=self.context.database_service,
|
|
553
|
+
database_name=self.context.database,
|
|
554
|
+
schema_name=self.context.database_schema,
|
|
555
|
+
table_name=table_name,
|
|
556
|
+
column_name=column_name,
|
|
557
|
+
),
|
|
558
|
+
tags=[tag_value],
|
|
559
|
+
classification_name=tag_name,
|
|
560
|
+
tag_description=DATABRICKS_TAG,
|
|
561
|
+
classification_description=DATABRICKS_TAG_CLASSIFICATION,
|
|
562
|
+
)
|
|
477
563
|
|
|
478
564
|
except Exception as exc:
|
|
479
565
|
yield Either(
|
|
@@ -37,8 +37,7 @@ DATABRICKS_GET_SCHEMA_TAGS = textwrap.dedent(
|
|
|
37
37
|
"""
|
|
38
38
|
SELECT
|
|
39
39
|
*
|
|
40
|
-
FROM {database_name}.information_schema.schema_tags
|
|
41
|
-
WHERE schema_name='{schema_name}'"""
|
|
40
|
+
FROM {database_name}.information_schema.schema_tags"""
|
|
42
41
|
)
|
|
43
42
|
|
|
44
43
|
DATABRICKS_GET_TABLE_TAGS = textwrap.dedent(
|
|
@@ -46,7 +45,7 @@ DATABRICKS_GET_TABLE_TAGS = textwrap.dedent(
|
|
|
46
45
|
SELECT
|
|
47
46
|
*
|
|
48
47
|
FROM {database_name}.information_schema.table_tags
|
|
49
|
-
|
|
48
|
+
"""
|
|
50
49
|
)
|
|
51
50
|
|
|
52
51
|
DATABRICKS_GET_COLUMN_TAGS = textwrap.dedent(
|
|
@@ -54,5 +53,5 @@ DATABRICKS_GET_COLUMN_TAGS = textwrap.dedent(
|
|
|
54
53
|
SELECT
|
|
55
54
|
*
|
|
56
55
|
FROM {database_name}.information_schema.column_tags
|
|
57
|
-
|
|
56
|
+
"""
|
|
58
57
|
)
|
|
@@ -31,8 +31,6 @@ from metadata.generated.schema.entity.services.databaseService import DatabaseSe
|
|
|
31
31
|
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
|
|
32
32
|
StackTraceError,
|
|
33
33
|
)
|
|
34
|
-
from metadata.generated.schema.entity.teams.team import Team
|
|
35
|
-
from metadata.generated.schema.entity.teams.user import User
|
|
36
34
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
|
37
35
|
Source as WorkflowSource,
|
|
38
36
|
)
|
|
@@ -140,39 +138,29 @@ class DbtSource(DbtServiceSource):
|
|
|
140
138
|
|
|
141
139
|
def get_dbt_owner(
|
|
142
140
|
self, manifest_node: dict, catalog_node: Optional[dict]
|
|
143
|
-
) -> Optional[
|
|
141
|
+
) -> Optional[EntityReference]:
|
|
144
142
|
"""
|
|
145
143
|
Returns dbt owner
|
|
146
144
|
"""
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
)
|
|
158
|
-
if user_owner_fqn:
|
|
159
|
-
owner = self.metadata.get_entity_reference(
|
|
160
|
-
entity=User, fqn=user_owner_fqn
|
|
161
|
-
)
|
|
162
|
-
else:
|
|
163
|
-
team_owner_fqn = fqn.build(
|
|
164
|
-
self.metadata, entity_type=Team, team_name=owner_name
|
|
165
|
-
)
|
|
166
|
-
if team_owner_fqn:
|
|
167
|
-
owner = self.metadata.get_entity_reference(
|
|
168
|
-
entity=Team, fqn=team_owner_fqn
|
|
169
|
-
)
|
|
170
|
-
else:
|
|
145
|
+
try:
|
|
146
|
+
owner = None
|
|
147
|
+
dbt_owner = None
|
|
148
|
+
if catalog_node:
|
|
149
|
+
dbt_owner = catalog_node.metadata.owner
|
|
150
|
+
if manifest_node:
|
|
151
|
+
dbt_owner = manifest_node.meta.get(DbtCommonEnum.OWNER.value)
|
|
152
|
+
if dbt_owner:
|
|
153
|
+
owner = self.metadata.get_reference_by_name(name=dbt_owner)
|
|
154
|
+
if not owner:
|
|
171
155
|
logger.warning(
|
|
172
156
|
"Unable to ingest owner from DBT since no user or"
|
|
173
157
|
f" team was found with name {dbt_owner}"
|
|
174
158
|
)
|
|
175
|
-
|
|
159
|
+
return owner
|
|
160
|
+
except Exception as exc:
|
|
161
|
+
logger.debug(traceback.format_exc())
|
|
162
|
+
logger.warning(f"Unable to ingest owner from DBT due to: {exc}")
|
|
163
|
+
return None
|
|
176
164
|
|
|
177
165
|
def check_columns(self, catalog_node):
|
|
178
166
|
for catalog_key, catalog_column in catalog_node.get("columns").items():
|
|
@@ -172,14 +172,14 @@ SELECT
|
|
|
172
172
|
SQL_FULLTEXT AS query_text,
|
|
173
173
|
TO_TIMESTAMP(FIRST_LOAD_TIME, 'yy-MM-dd/HH24:MI:SS') AS start_time,
|
|
174
174
|
ELAPSED_TIME / 1000 AS duration,
|
|
175
|
-
TO_TIMESTAMP(FIRST_LOAD_TIME, 'yy-MM-dd/HH24:MI:SS') + NUMTODSINTERVAL(ELAPSED_TIME /
|
|
175
|
+
TO_TIMESTAMP(FIRST_LOAD_TIME, 'yy-MM-dd/HH24:MI:SS') + NUMTODSINTERVAL(ELAPSED_TIME / 1000000, 'SECOND') AS end_time
|
|
176
176
|
FROM gv$sql
|
|
177
177
|
WHERE OBJECT_STATUS = 'VALID'
|
|
178
178
|
{filters}
|
|
179
179
|
AND SQL_FULLTEXT NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%'
|
|
180
180
|
AND SQL_FULLTEXT NOT LIKE '/* {{"app": "dbt", %%}} */%%'
|
|
181
181
|
AND TO_TIMESTAMP(FIRST_LOAD_TIME, 'yy-MM-dd/HH24:MI:SS') >= TO_TIMESTAMP('{start_time}', 'yy-MM-dd HH24:MI:SS')
|
|
182
|
-
AND TO_TIMESTAMP(FIRST_LOAD_TIME, 'yy-MM-dd/HH24:MI:SS') + NUMTODSINTERVAL(ELAPSED_TIME /
|
|
182
|
+
AND TO_TIMESTAMP(FIRST_LOAD_TIME, 'yy-MM-dd/HH24:MI:SS') + NUMTODSINTERVAL(ELAPSED_TIME / 1000000, 'SECOND')
|
|
183
183
|
< TO_TIMESTAMP('{end_time}', 'yy-MM-dd HH24:MI:SS')
|
|
184
184
|
ORDER BY FIRST_LOAD_TIME DESC
|
|
185
185
|
OFFSET 0 ROWS FETCH NEXT {result_limit} ROWS ONLY
|
|
@@ -226,14 +226,16 @@ class CommonBrokerSource(MessagingServiceSource, ABC):
|
|
|
226
226
|
if topic_entity and self.generate_sample_data:
|
|
227
227
|
topic_name = topic_details.topic_name
|
|
228
228
|
sample_data = []
|
|
229
|
+
messages = None
|
|
229
230
|
try:
|
|
230
|
-
self.consumer_client
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
231
|
+
if self.consumer_client:
|
|
232
|
+
self.consumer_client.subscribe(
|
|
233
|
+
[topic_name], on_assign=on_partitions_assignment_to_consumer
|
|
234
|
+
)
|
|
235
|
+
logger.info(
|
|
236
|
+
f"Broker consumer polling for sample messages in topic {topic_name}"
|
|
237
|
+
)
|
|
238
|
+
messages = self.consumer_client.consume(num_messages=10, timeout=10)
|
|
237
239
|
except Exception as exc:
|
|
238
240
|
yield Either(
|
|
239
241
|
left=StackTraceError(
|
|
@@ -15,7 +15,7 @@ Source connection handler
|
|
|
15
15
|
from dataclasses import dataclass
|
|
16
16
|
from typing import Optional, Union
|
|
17
17
|
|
|
18
|
-
from confluent_kafka.admin import AdminClient
|
|
18
|
+
from confluent_kafka.admin import AdminClient, KafkaException
|
|
19
19
|
from confluent_kafka.avro import AvroConsumer
|
|
20
20
|
from confluent_kafka.schema_registry.schema_registry_client import SchemaRegistryClient
|
|
21
21
|
|
|
@@ -35,6 +35,21 @@ from metadata.utils.logger import ingestion_logger
|
|
|
35
35
|
logger = ingestion_logger()
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
class InvalidKafkaCreds(Exception):
|
|
39
|
+
"""
|
|
40
|
+
Class to indicate invalid kafka credentials exception
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class SchemaRegistryException(Exception):
|
|
45
|
+
"""
|
|
46
|
+
Class to indicate invalid schema registry not initialized
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
TIMEOUT_SECONDS = 10
|
|
51
|
+
|
|
52
|
+
|
|
38
53
|
@dataclass
|
|
39
54
|
class KafkaClient:
|
|
40
55
|
def __init__(self, admin_client, schema_registry_client, consumer_client) -> None:
|
|
@@ -60,6 +75,14 @@ def get_connection(
|
|
|
60
75
|
if connection.saslMechanism:
|
|
61
76
|
connection.consumerConfig["sasl.mechanism"] = connection.saslMechanism.value
|
|
62
77
|
|
|
78
|
+
if (
|
|
79
|
+
connection.consumerConfig.get("security.protocol") is None
|
|
80
|
+
and connection.securityProtocol
|
|
81
|
+
):
|
|
82
|
+
connection.consumerConfig[
|
|
83
|
+
"security.protocol"
|
|
84
|
+
] = connection.securityProtocol.value
|
|
85
|
+
|
|
63
86
|
if connection.basicAuthUserInfo:
|
|
64
87
|
connection.schemaRegistryConfig = connection.schemaRegistryConfig or {}
|
|
65
88
|
connection.schemaRegistryConfig[
|
|
@@ -109,9 +132,27 @@ def test_connection(
|
|
|
109
132
|
"""
|
|
110
133
|
|
|
111
134
|
def custom_executor():
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
135
|
+
try:
|
|
136
|
+
client.admin_client.list_topics(timeout=TIMEOUT_SECONDS).topics
|
|
137
|
+
except KafkaException as err:
|
|
138
|
+
raise InvalidKafkaCreds(
|
|
139
|
+
f"Failed to fetch topics due to: {err}. "
|
|
140
|
+
"Please validate credentials and check if you are using correct security protocol"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def schema_registry_test():
|
|
144
|
+
if client.schema_registry_client:
|
|
145
|
+
client.schema_registry_client.get_subjects()
|
|
146
|
+
else:
|
|
147
|
+
raise SchemaRegistryException(
|
|
148
|
+
"Schema Registry not initialized, please provide schema registry "
|
|
149
|
+
"credentials in case you want topic schema and sample data to be ingested"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
test_fn = {
|
|
153
|
+
"GetTopics": custom_executor,
|
|
154
|
+
"CheckSchemaRegistry": schema_registry_test,
|
|
155
|
+
}
|
|
115
156
|
|
|
116
157
|
test_connection_steps(
|
|
117
158
|
metadata=metadata,
|
|
@@ -24,6 +24,9 @@ from metadata.generated.schema.entity.data.mlmodel import (
|
|
|
24
24
|
from metadata.generated.schema.entity.services.connections.mlmodel.sageMakerConnection import (
|
|
25
25
|
SageMakerConnection,
|
|
26
26
|
)
|
|
27
|
+
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
|
|
28
|
+
StackTraceError,
|
|
29
|
+
)
|
|
27
30
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
|
28
31
|
Source as WorkflowSource,
|
|
29
32
|
)
|
|
@@ -128,14 +131,23 @@ class SagemakerSource(MlModelServiceSource):
|
|
|
128
131
|
"""
|
|
129
132
|
Prepare the Request model
|
|
130
133
|
"""
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
134
|
+
try:
|
|
135
|
+
mlmodel_request = CreateMlModelRequest(
|
|
136
|
+
name=model.name,
|
|
137
|
+
algorithm=self._get_algorithm(), # Setting this to a constant
|
|
138
|
+
mlStore=self._get_ml_store(model.name),
|
|
139
|
+
service=self.context.mlmodel_service,
|
|
140
|
+
)
|
|
141
|
+
yield Either(right=mlmodel_request)
|
|
142
|
+
self.register_record(mlmodel_request=mlmodel_request)
|
|
143
|
+
except Exception as exc: # pylint: disable=broad-except
|
|
144
|
+
yield Either(
|
|
145
|
+
left=StackTraceError(
|
|
146
|
+
name=model.name,
|
|
147
|
+
error=f"Error creating mlmodel: {exc}",
|
|
148
|
+
stackTrace=traceback.format_exc(),
|
|
149
|
+
)
|
|
150
|
+
)
|
|
139
151
|
|
|
140
152
|
def _get_ml_store( # pylint: disable=arguments-differ
|
|
141
153
|
self,
|
|
@@ -40,6 +40,7 @@ def _(element, compiler, **kw):
|
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
@compiles(SumFn, Dialects.BigQuery)
|
|
43
|
+
@compiles(SumFn, Dialects.Postgres)
|
|
43
44
|
def _(element, compiler, **kw):
|
|
44
45
|
"""Handle case where column type is INTEGER but SUM returns a NUMBER"""
|
|
45
46
|
proc = compiler.process(element.clauses, **kw)
|
|
@@ -53,7 +53,7 @@ from metadata.profiler.metrics.static.row_count import RowCount
|
|
|
53
53
|
from metadata.profiler.orm.registry import NOT_COMPUTE
|
|
54
54
|
from metadata.profiler.processor.sample_data_handler import upload_sample_data
|
|
55
55
|
from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
|
|
56
|
-
from metadata.utils.
|
|
56
|
+
from metadata.utils.execution_time_tracker import calculate_execution_time
|
|
57
57
|
from metadata.utils.logger import profiler_logger
|
|
58
58
|
|
|
59
59
|
logger = profiler_logger()
|
|
@@ -550,7 +550,7 @@ class Profiler(Generic[TMetric]):
|
|
|
550
550
|
|
|
551
551
|
return table_profile
|
|
552
552
|
|
|
553
|
-
@calculate_execution_time
|
|
553
|
+
@calculate_execution_time(store=False)
|
|
554
554
|
def generate_sample_data(self) -> Optional[TableData]:
|
|
555
555
|
"""Fetch and ingest sample data
|
|
556
556
|
|
|
@@ -169,6 +169,7 @@ class GenericDataFrameColumnParser:
|
|
|
169
169
|
["datetime64", "timedelta[ns]", "datetime64[ns]"], DataType.DATETIME
|
|
170
170
|
),
|
|
171
171
|
"str": DataType.STRING,
|
|
172
|
+
"bytes": DataType.BYTES,
|
|
172
173
|
}
|
|
173
174
|
|
|
174
175
|
def __init__(self, data_frame: "DataFrame"):
|
|
@@ -247,8 +248,13 @@ class GenericDataFrameColumnParser:
|
|
|
247
248
|
data_type = "string"
|
|
248
249
|
|
|
249
250
|
data_type = cls._data_formats.get(
|
|
250
|
-
data_type or data_frame[column_name].dtypes.name,
|
|
251
|
+
data_type or data_frame[column_name].dtypes.name,
|
|
251
252
|
)
|
|
253
|
+
if not data_type:
|
|
254
|
+
logger.debug(
|
|
255
|
+
f"unknown data type {data_frame[column_name].dtypes.name}. resolving to string."
|
|
256
|
+
)
|
|
257
|
+
data_type = data_type or DataType.STRING
|
|
252
258
|
except Exception as err:
|
|
253
259
|
logger.warning(
|
|
254
260
|
f"Failed to distinguish data type for column {column_name}, Falling back to {data_type}, exc: {err}"
|