openmetadata-ingestion 1.7.0.0rc2__py3-none-any.whl → 1.7.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openmetadata-ingestion might be problematic. Click here for more details.
- metadata/applications/example.py +74 -0
- metadata/cli/classify.py +2 -4
- metadata/cli/common.py +26 -0
- metadata/cli/dataquality.py +2 -4
- metadata/cli/ingest.py +2 -4
- metadata/cli/profile.py +2 -4
- metadata/cli/usage.py +2 -4
- metadata/data_quality/processor/test_case_runner.py +6 -8
- metadata/data_quality/runner/base_test_suite_source.py +10 -4
- metadata/data_quality/source/test_suite.py +11 -1
- metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeUnique.py +14 -2
- metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py +1 -1
- metadata/data_quality/validations/models.py +3 -0
- metadata/data_quality/validations/runtime_param_setter/base_diff_params_setter.py +120 -0
- metadata/data_quality/validations/runtime_param_setter/table_diff_params_setter.py +61 -47
- metadata/data_quality/validations/table/sqlalchemy/tableDiff.py +24 -0
- metadata/examples/workflows/tableau.yaml +0 -3
- metadata/generated/schema/analytics/__init__.py +1 -1
- metadata/generated/schema/analytics/basic.py +1 -1
- metadata/generated/schema/analytics/reportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
- metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
- metadata/generated/schema/api/__init__.py +1 -1
- metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
- metadata/generated/schema/api/addTagToAssetsRequest.py +1 -1
- metadata/generated/schema/api/analytics/__init__.py +1 -1
- metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
- metadata/generated/schema/api/automations/__init__.py +1 -1
- metadata/generated/schema/api/automations/createWorkflow.py +1 -1
- metadata/generated/schema/api/bulkAssets.py +1 -1
- metadata/generated/schema/api/classification/__init__.py +1 -1
- metadata/generated/schema/api/classification/createClassification.py +1 -1
- metadata/generated/schema/api/classification/createTag.py +1 -1
- metadata/generated/schema/api/classification/loadTags.py +1 -1
- metadata/generated/schema/api/createBot.py +1 -1
- metadata/generated/schema/api/createEventPublisherJob.py +1 -1
- metadata/generated/schema/api/createType.py +1 -1
- metadata/generated/schema/api/data/__init__.py +1 -1
- metadata/generated/schema/api/data/createAPICollection.py +1 -1
- metadata/generated/schema/api/data/createAPIEndpoint.py +1 -1
- metadata/generated/schema/api/data/createChart.py +1 -1
- metadata/generated/schema/api/data/createContainer.py +1 -1
- metadata/generated/schema/api/data/createCustomProperty.py +1 -1
- metadata/generated/schema/api/data/createDashboard.py +1 -1
- metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
- metadata/generated/schema/api/data/createDatabase.py +1 -1
- metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
- metadata/generated/schema/api/data/createGlossary.py +1 -1
- metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
- metadata/generated/schema/api/data/createMetric.py +1 -1
- metadata/generated/schema/api/data/createMlModel.py +1 -1
- metadata/generated/schema/api/data/createPipeline.py +1 -1
- metadata/generated/schema/api/data/createQuery.py +1 -1
- metadata/generated/schema/api/data/createQueryCostRecord.py +1 -1
- metadata/generated/schema/api/data/createSearchIndex.py +1 -1
- metadata/generated/schema/api/data/createStoredProcedure.py +1 -1
- metadata/generated/schema/api/data/createTable.py +1 -1
- metadata/generated/schema/api/data/createTableProfile.py +1 -1
- metadata/generated/schema/api/data/createTopic.py +1 -1
- metadata/generated/schema/api/data/loadGlossary.py +1 -1
- metadata/generated/schema/api/data/restoreEntity.py +1 -1
- metadata/generated/schema/api/dataInsight/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
- metadata/generated/schema/api/dataInsight/custom/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/custom/createDataInsightCustomChart.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
- metadata/generated/schema/api/docStore/__init__.py +1 -1
- metadata/generated/schema/api/docStore/createDocument.py +1 -1
- metadata/generated/schema/api/domains/__init__.py +1 -1
- metadata/generated/schema/api/domains/createDataProduct.py +1 -1
- metadata/generated/schema/api/domains/createDomain.py +1 -1
- metadata/generated/schema/api/feed/__init__.py +1 -1
- metadata/generated/schema/api/feed/closeTask.py +1 -1
- metadata/generated/schema/api/feed/createPost.py +1 -1
- metadata/generated/schema/api/feed/createSuggestion.py +1 -1
- metadata/generated/schema/api/feed/createThread.py +1 -1
- metadata/generated/schema/api/feed/resolveTask.py +1 -1
- metadata/generated/schema/api/feed/threadCount.py +1 -1
- metadata/generated/schema/api/governance/__init__.py +1 -1
- metadata/generated/schema/api/governance/createWorkflowDefinition.py +1 -1
- metadata/generated/schema/api/governance/createWorkflowInstanceState.py +1 -1
- metadata/generated/schema/api/lineage/__init__.py +1 -1
- metadata/generated/schema/api/lineage/addLineage.py +1 -1
- metadata/generated/schema/api/lineage/esLineageData.py +1 -1
- metadata/generated/schema/api/lineage/lineageDirection.py +1 -1
- metadata/generated/schema/api/lineage/nodeInformation.py +1 -1
- metadata/generated/schema/api/lineage/searchLineageRequest.py +1 -1
- metadata/generated/schema/api/lineage/searchLineageResult.py +1 -1
- metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
- metadata/generated/schema/api/policies/__init__.py +1 -1
- metadata/generated/schema/api/policies/createPolicy.py +1 -1
- metadata/generated/schema/api/search/__init__.py +1 -1
- metadata/generated/schema/api/search/previewSearchRequest.py +1 -1
- metadata/generated/schema/api/services/__init__.py +1 -1
- metadata/generated/schema/api/services/createApiService.py +1 -1
- metadata/generated/schema/api/services/createDashboardService.py +1 -1
- metadata/generated/schema/api/services/createDatabaseService.py +1 -1
- metadata/generated/schema/api/services/createMessagingService.py +1 -1
- metadata/generated/schema/api/services/createMetadataService.py +1 -1
- metadata/generated/schema/api/services/createMlModelService.py +1 -1
- metadata/generated/schema/api/services/createPipelineService.py +1 -1
- metadata/generated/schema/api/services/createSearchService.py +1 -1
- metadata/generated/schema/api/services/createStorageService.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +8 -1
- metadata/generated/schema/api/setOwner.py +1 -1
- metadata/generated/schema/api/teams/__init__.py +1 -1
- metadata/generated/schema/api/teams/createPersona.py +1 -1
- metadata/generated/schema/api/teams/createRole.py +1 -1
- metadata/generated/schema/api/teams/createTeam.py +1 -1
- metadata/generated/schema/api/teams/createUser.py +1 -1
- metadata/generated/schema/api/tests/__init__.py +1 -1
- metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
- metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
- metadata/generated/schema/api/tests/createTestCase.py +1 -1
- metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
- metadata/generated/schema/api/tests/createTestCaseResult.py +1 -1
- metadata/generated/schema/api/tests/createTestDefinition.py +1 -1
- metadata/generated/schema/api/tests/createTestSuite.py +1 -1
- metadata/generated/schema/api/voteRequest.py +1 -1
- metadata/generated/schema/auth/__init__.py +1 -1
- metadata/generated/schema/auth/basicAuth.py +1 -1
- metadata/generated/schema/auth/basicLoginRequest.py +1 -1
- metadata/generated/schema/auth/changePasswordRequest.py +1 -1
- metadata/generated/schema/auth/createPersonalToken.py +1 -1
- metadata/generated/schema/auth/emailRequest.py +1 -1
- metadata/generated/schema/auth/emailVerificationToken.py +1 -1
- metadata/generated/schema/auth/generateToken.py +1 -1
- metadata/generated/schema/auth/jwtAuth.py +1 -1
- metadata/generated/schema/auth/loginRequest.py +1 -1
- metadata/generated/schema/auth/logoutRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetToken.py +1 -1
- metadata/generated/schema/auth/personalAccessToken.py +1 -1
- metadata/generated/schema/auth/refreshToken.py +1 -1
- metadata/generated/schema/auth/registrationRequest.py +1 -1
- metadata/generated/schema/auth/revokePersonalToken.py +1 -1
- metadata/generated/schema/auth/revokeToken.py +1 -1
- metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
- metadata/generated/schema/auth/ssoAuth.py +1 -1
- metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
- metadata/generated/schema/configuration/__init__.py +1 -1
- metadata/generated/schema/configuration/appsPrivateConfiguration.py +1 -1
- metadata/generated/schema/configuration/assetCertificationSettings.py +1 -1
- metadata/generated/schema/configuration/authConfig.py +1 -1
- metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
- metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
- metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/dataQualityConfiguration.py +1 -1
- metadata/generated/schema/configuration/elasticSearchConfiguration.py +8 -1
- metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
- metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
- metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
- metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
- metadata/generated/schema/configuration/limitsConfiguration.py +1 -1
- metadata/generated/schema/configuration/lineageSettings.py +1 -1
- metadata/generated/schema/configuration/loginConfiguration.py +1 -1
- metadata/generated/schema/configuration/logoConfiguration.py +1 -1
- metadata/generated/schema/configuration/openMetadataBaseUrlConfiguration.py +3 -2
- metadata/generated/schema/configuration/opertionalConfiguration.py +24 -0
- metadata/generated/schema/configuration/opsConfig.py +25 -0
- metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
- metadata/generated/schema/configuration/profilerConfiguration.py +1 -1
- metadata/generated/schema/configuration/searchSettings.py +1 -1
- metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
- metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
- metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
- metadata/generated/schema/configuration/themeConfiguration.py +1 -1
- metadata/generated/schema/configuration/uiThemePreference.py +1 -1
- metadata/generated/schema/configuration/workflowSettings.py +3 -3
- metadata/generated/schema/dataInsight/__init__.py +1 -1
- metadata/generated/schema/dataInsight/custom/__init__.py +1 -1
- metadata/generated/schema/dataInsight/custom/dataInsightCustomChart.py +1 -1
- metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResult.py +1 -1
- metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResultList.py +1 -1
- metadata/generated/schema/dataInsight/custom/formulaHolder.py +1 -1
- metadata/generated/schema/dataInsight/custom/lineChart.py +1 -1
- metadata/generated/schema/dataInsight/custom/summaryCard.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
- metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
- metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
- metadata/generated/schema/dataInsight/type/__init__.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
- metadata/generated/schema/email/__init__.py +1 -1
- metadata/generated/schema/email/emailRequest.py +1 -1
- metadata/generated/schema/email/emailTemplate.py +1 -1
- metadata/generated/schema/email/emailTemplatePlaceholder.py +1 -1
- metadata/generated/schema/email/smtpSettings.py +6 -4
- metadata/generated/schema/email/templateValidationReponse.py +1 -1
- metadata/generated/schema/entity/__init__.py +1 -1
- metadata/generated/schema/entity/applications/__init__.py +1 -1
- metadata/generated/schema/entity/applications/app.py +4 -3
- metadata/generated/schema/entity/applications/appExtension.py +1 -1
- metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
- metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/applicationConfig.py +10 -4
- metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addCustomProperties.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addDataProductAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addDescriptionAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addDomainAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addOwnerAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addTagsAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addTestCaseAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addTierAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/lineagePropagationAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/mlTaggingAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeCustomPropertiesAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeDataProductAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeDescriptionAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeDomainAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeOwnerAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeTagsAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeTestCaseAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeTierAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automatorAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/collateAIAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/slackAppTokenConfiguration.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/autoPilotAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/collateAIQualityAgentAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/collateAITierAgentAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +3 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataRetentionConfiguration.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/helloPipelinesConfiguration.py +18 -0
- metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/collateAIAppPrivateConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/internal/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/internal/collateAITierAgentAppPrivateConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/limits.py +1 -1
- metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
- metadata/generated/schema/entity/applications/jobStatus.py +1 -1
- metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +5 -2
- metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +12 -2
- metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
- metadata/generated/schema/entity/automations/__init__.py +1 -1
- metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
- metadata/generated/schema/entity/automations/workflow.py +1 -1
- metadata/generated/schema/entity/bot.py +1 -1
- metadata/generated/schema/entity/classification/__init__.py +1 -1
- metadata/generated/schema/entity/classification/classification.py +1 -1
- metadata/generated/schema/entity/classification/tag.py +1 -1
- metadata/generated/schema/entity/data/__init__.py +1 -1
- metadata/generated/schema/entity/data/apiCollection.py +1 -1
- metadata/generated/schema/entity/data/apiEndpoint.py +1 -1
- metadata/generated/schema/entity/data/chart.py +1 -1
- metadata/generated/schema/entity/data/container.py +1 -1
- metadata/generated/schema/entity/data/dashboard.py +1 -1
- metadata/generated/schema/entity/data/dashboardDataModel.py +2 -1
- metadata/generated/schema/entity/data/database.py +5 -1
- metadata/generated/schema/entity/data/databaseSchema.py +5 -1
- metadata/generated/schema/entity/data/glossary.py +1 -1
- metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
- metadata/generated/schema/entity/data/metric.py +1 -1
- metadata/generated/schema/entity/data/mlmodel.py +1 -1
- metadata/generated/schema/entity/data/pipeline.py +1 -1
- metadata/generated/schema/entity/data/query.py +1 -1
- metadata/generated/schema/entity/data/queryCostRecord.py +1 -1
- metadata/generated/schema/entity/data/queryCostSearchResult.py +1 -1
- metadata/generated/schema/entity/data/report.py +1 -1
- metadata/generated/schema/entity/data/searchIndex.py +1 -1
- metadata/generated/schema/entity/data/storedProcedure.py +1 -1
- metadata/generated/schema/entity/data/table.py +1 -1
- metadata/generated/schema/entity/data/topic.py +1 -1
- metadata/generated/schema/entity/docStore/__init__.py +1 -1
- metadata/generated/schema/entity/docStore/document.py +1 -1
- metadata/generated/schema/entity/domains/__init__.py +1 -1
- metadata/generated/schema/entity/domains/dataProduct.py +1 -1
- metadata/generated/schema/entity/domains/domain.py +1 -1
- metadata/generated/schema/entity/events/__init__.py +1 -1
- metadata/generated/schema/entity/events/webhook.py +1 -1
- metadata/generated/schema/entity/feed/__init__.py +1 -1
- metadata/generated/schema/entity/feed/assets.py +1 -1
- metadata/generated/schema/entity/feed/customProperty.py +1 -1
- metadata/generated/schema/entity/feed/description.py +1 -1
- metadata/generated/schema/entity/feed/domain.py +1 -1
- metadata/generated/schema/entity/feed/entityInfo.py +1 -1
- metadata/generated/schema/entity/feed/owner.py +1 -1
- metadata/generated/schema/entity/feed/suggestion.py +1 -1
- metadata/generated/schema/entity/feed/tag.py +1 -1
- metadata/generated/schema/entity/feed/testCaseResult.py +1 -1
- metadata/generated/schema/entity/feed/thread.py +1 -1
- metadata/generated/schema/entity/policies/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
- metadata/generated/schema/entity/policies/filters.py +1 -1
- metadata/generated/schema/entity/policies/policy.py +1 -1
- metadata/generated/schema/entity/services/__init__.py +1 -1
- metadata/generated/schema/entity/services/apiService.py +5 -1
- metadata/generated/schema/entity/services/connections/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/api/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/api/restConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/connectionBasicType.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/microStrategyConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerBIReportServerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/bucketDetails.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/gcsConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/s3Config.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/qlikCloudConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/sigmaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +2 -11
- metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cassandra/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cassandra/cloudConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cassandraConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cockroachConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/noConfigAuthenticationTypes.py +1 -1
- metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltalake/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltalake/metastoreConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltalake/storageConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/exasolConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +9 -1
- metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/postgresConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapErpConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaHDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaSQLConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/synapseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/teradataConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/alationSinkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/vertexaiConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/datafactoryConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/dbtCloudConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/flinkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/kafkaConnectConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/matillion/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/matillion/matillionETL.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/matillionConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifi/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifi/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifi/clientCertificateAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/openLineageConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/stitchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/wherescapeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
- metadata/generated/schema/entity/services/dashboardService.py +5 -1
- metadata/generated/schema/entity/services/databaseService.py +5 -1
- metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +12 -1
- metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/reverseIngestionResponse.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
- metadata/generated/schema/entity/services/messagingService.py +5 -1
- metadata/generated/schema/entity/services/metadataService.py +5 -1
- metadata/generated/schema/entity/services/mlmodelService.py +5 -1
- metadata/generated/schema/entity/services/pipelineService.py +5 -1
- metadata/generated/schema/entity/services/searchService.py +5 -1
- metadata/generated/schema/entity/services/serviceType.py +1 -1
- metadata/generated/schema/entity/services/storageService.py +5 -1
- metadata/generated/schema/entity/teams/__init__.py +1 -1
- metadata/generated/schema/entity/teams/persona.py +1 -1
- metadata/generated/schema/entity/teams/role.py +1 -1
- metadata/generated/schema/entity/teams/team.py +1 -1
- metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
- metadata/generated/schema/entity/teams/user.py +1 -1
- metadata/generated/schema/entity/type.py +1 -1
- metadata/generated/schema/entity/utils/__init__.py +1 -1
- metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
- metadata/generated/schema/entity/utils/servicesCount.py +1 -1
- metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
- metadata/generated/schema/events/__init__.py +1 -1
- metadata/generated/schema/events/alertMetrics.py +1 -1
- metadata/generated/schema/events/api/__init__.py +1 -1
- metadata/generated/schema/events/api/createEventSubscription.py +1 -1
- metadata/generated/schema/events/api/eventSubscriptionDiagnosticInfo.py +1 -1
- metadata/generated/schema/events/api/eventsRecord.py +1 -1
- metadata/generated/schema/events/api/testEventSubscriptionDestination.py +1 -1
- metadata/generated/schema/events/api/typedEvent.py +1 -1
- metadata/generated/schema/events/emailAlertConfig.py +1 -1
- metadata/generated/schema/events/eventFilterRule.py +1 -1
- metadata/generated/schema/events/eventSubscription.py +1 -1
- metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
- metadata/generated/schema/events/failedEvent.py +1 -1
- metadata/generated/schema/events/failedEventResponse.py +1 -1
- metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
- metadata/generated/schema/events/statusContext.py +1 -1
- metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
- metadata/generated/schema/events/subscriptionStatus.py +1 -1
- metadata/generated/schema/events/testDestinationStatus.py +1 -1
- metadata/generated/schema/governance/workflows/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/edge.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodeSubType.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodeType.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/checkEntityAttributesTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/createAndRunIngestionPipelineTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/runAppTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setEntityCertificationTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setGlossaryTermStatusTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/endEvent/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/endEvent/endEvent.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/gateway/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/gateway/parallelGateway.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/startEvent/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/startEvent/startEvent.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/userTask/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/userTask/userApprovalTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/eventBasedEntityTrigger.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/noOpTrigger.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/periodicBatchEntityTrigger.py +1 -1
- metadata/generated/schema/governance/workflows/workflowDefinition.py +1 -1
- metadata/generated/schema/governance/workflows/workflowInstance.py +1 -1
- metadata/generated/schema/governance/workflows/workflowInstanceState.py +1 -1
- metadata/generated/schema/jobs/__init__.py +1 -1
- metadata/generated/schema/jobs/backgroundJob.py +1 -1
- metadata/generated/schema/jobs/enumCleanupArgs.py +1 -1
- metadata/generated/schema/metadataIngestion/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/apiServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/application.py +1 -1
- metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceAutoClassificationPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseIngestionPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/descriptionConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/ownerConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/tagsConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/workflow.py +8 -1
- metadata/generated/schema/monitoring/__init__.py +1 -1
- metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
- metadata/generated/schema/search/__init__.py +1 -1
- metadata/generated/schema/search/aggregationRequest.py +1 -1
- metadata/generated/schema/search/searchRequest.py +1 -1
- metadata/generated/schema/security/__init__.py +1 -1
- metadata/generated/schema/security/client/__init__.py +1 -1
- metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/oidcClientConfig.py +4 -1
- metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
- metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
- metadata/generated/schema/security/credentials/__init__.py +1 -1
- metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
- metadata/generated/schema/security/credentials/azureCredentials.py +1 -1
- metadata/generated/schema/security/credentials/basicAuth.py +1 -1
- metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpExternalAccount.py +1 -1
- metadata/generated/schema/security/credentials/gcpValues.py +1 -1
- metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
- metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gitlabCredentials.py +1 -1
- metadata/generated/schema/security/sasl/__init__.py +1 -1
- metadata/generated/schema/security/sasl/saslClientConfig.py +1 -1
- metadata/generated/schema/security/secrets/__init__.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerProvider.py +1 -1
- metadata/generated/schema/security/securityConfiguration.py +1 -1
- metadata/generated/schema/security/ssl/__init__.py +1 -1
- metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
- metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
- metadata/generated/schema/settings/__init__.py +1 -1
- metadata/generated/schema/settings/settings.py +1 -1
- metadata/generated/schema/system/__init__.py +1 -1
- metadata/generated/schema/system/entityError.py +1 -1
- metadata/generated/schema/system/eventPublisherJob.py +17 -5
- metadata/generated/schema/system/indexingError.py +2 -2
- metadata/generated/schema/system/limitsResponse.py +1 -1
- metadata/generated/schema/system/ui/__init__.py +1 -1
- metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
- metadata/generated/schema/system/ui/navigationItem.py +1 -1
- metadata/generated/schema/system/ui/page.py +1 -1
- metadata/generated/schema/system/ui/tab.py +1 -1
- metadata/generated/schema/system/ui/uiCustomization.py +1 -1
- metadata/generated/schema/system/validationResponse.py +1 -1
- metadata/generated/schema/tests/__init__.py +1 -1
- metadata/generated/schema/tests/assigned.py +1 -1
- metadata/generated/schema/tests/basic.py +20 -20
- metadata/generated/schema/tests/customMetric.py +1 -1
- metadata/generated/schema/tests/dataQualityReport.py +1 -1
- metadata/generated/schema/tests/resolved.py +1 -1
- metadata/generated/schema/tests/testCase.py +1 -1
- metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
- metadata/generated/schema/tests/testDefinition.py +1 -1
- metadata/generated/schema/tests/testSuite.py +1 -1
- metadata/generated/schema/type/__init__.py +1 -1
- metadata/generated/schema/type/apiSchema.py +1 -1
- metadata/generated/schema/type/assetCertification.py +1 -1
- metadata/generated/schema/type/auditLog.py +1 -1
- metadata/generated/schema/type/basic.py +1 -1
- metadata/generated/schema/type/bulkOperationResult.py +1 -1
- metadata/generated/schema/type/changeEvent.py +1 -1
- metadata/generated/schema/type/changeEventType.py +1 -1
- metadata/generated/schema/type/changeSummaryMap.py +1 -1
- metadata/generated/schema/type/collectionDescriptor.py +1 -1
- metadata/generated/schema/type/csvDocumentation.py +1 -1
- metadata/generated/schema/type/csvErrorType.py +1 -1
- metadata/generated/schema/type/csvFile.py +1 -1
- metadata/generated/schema/type/csvImportResult.py +1 -1
- metadata/generated/schema/type/customProperties/__init__.py +1 -1
- metadata/generated/schema/type/customProperties/complexTypes.py +1 -1
- metadata/generated/schema/type/customProperties/enumConfig.py +1 -1
- metadata/generated/schema/type/customProperties/tableConfig.py +1 -1
- metadata/generated/schema/type/customProperty.py +1 -1
- metadata/generated/schema/type/dailyCount.py +1 -1
- metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
- metadata/generated/schema/type/entityHierarchy.py +1 -1
- metadata/generated/schema/type/entityHistory.py +1 -1
- metadata/generated/schema/type/entityLineage.py +1 -1
- metadata/generated/schema/type/entityReference.py +1 -1
- metadata/generated/schema/type/entityReferenceList.py +1 -1
- metadata/generated/schema/type/entityRelationship.py +1 -1
- metadata/generated/schema/type/entityUsage.py +1 -1
- metadata/generated/schema/type/filterPattern.py +1 -1
- metadata/generated/schema/type/function.py +1 -1
- metadata/generated/schema/type/include.py +1 -1
- metadata/generated/schema/type/jdbcConnection.py +1 -1
- metadata/generated/schema/type/lifeCycle.py +1 -1
- metadata/generated/schema/type/paging.py +1 -1
- metadata/generated/schema/type/profile.py +1 -1
- metadata/generated/schema/type/queryParserData.py +1 -1
- metadata/generated/schema/type/reaction.py +1 -1
- metadata/generated/schema/type/schedule.py +1 -1
- metadata/generated/schema/type/schema.py +1 -1
- metadata/generated/schema/type/tableQuery.py +1 -1
- metadata/generated/schema/type/tableUsageCount.py +1 -1
- metadata/generated/schema/type/tagLabel.py +2 -1
- metadata/generated/schema/type/usageDetails.py +1 -1
- metadata/generated/schema/type/usageRequest.py +1 -1
- metadata/generated/schema/type/votes.py +1 -1
- metadata/ingestion/api/topology_runner.py +30 -7
- metadata/ingestion/lineage/parser.py +2 -1
- metadata/ingestion/models/custom_pydantic.py +30 -2
- metadata/ingestion/models/patch_request.py +71 -3
- metadata/ingestion/ometa/mixins/es_mixin.py +11 -5
- metadata/ingestion/source/api/rest/connection.py +14 -12
- metadata/ingestion/source/api/rest/metadata.py +15 -2
- metadata/ingestion/source/dashboard/dashboard_service.py +1 -0
- metadata/ingestion/source/dashboard/powerbi/metadata.py +122 -38
- metadata/ingestion/source/dashboard/powerbi/models.py +22 -0
- metadata/ingestion/source/dashboard/tableau/client.py +152 -171
- metadata/ingestion/source/dashboard/tableau/connection.py +23 -48
- metadata/ingestion/source/dashboard/tableau/metadata.py +73 -99
- metadata/ingestion/source/dashboard/tableau/models.py +8 -18
- metadata/ingestion/source/dashboard/tableau/queries.py +2 -2
- metadata/ingestion/source/database/athena/metadata.py +26 -0
- metadata/ingestion/source/database/bigquery/connection.py +8 -3
- metadata/ingestion/source/database/bigquery/helper.py +8 -6
- metadata/ingestion/source/database/bigquery/metadata.py +23 -14
- metadata/ingestion/source/database/clickhouse/metadata.py +0 -33
- metadata/ingestion/source/database/common_db_source.py +13 -26
- metadata/ingestion/source/database/dbt/metadata.py +30 -17
- metadata/ingestion/source/database/hive/metadata.py +8 -1
- metadata/ingestion/source/database/life_cycle_query_mixin.py +9 -0
- metadata/ingestion/source/database/mysql/connection.py +11 -3
- metadata/ingestion/source/database/mysql/lineage.py +4 -4
- metadata/ingestion/source/database/mysql/queries.py +29 -0
- metadata/ingestion/source/database/mysql/query_parser.py +31 -0
- metadata/ingestion/source/database/oracle/metadata.py +0 -39
- metadata/ingestion/source/database/oracle/queries.py +2 -2
- metadata/ingestion/source/database/oracle/utils.py +0 -14
- metadata/ingestion/source/database/postgres/metadata.py +3 -1
- metadata/ingestion/source/database/postgres/queries.py +7 -0
- metadata/ingestion/source/database/postgres/utils.py +28 -19
- metadata/ingestion/source/database/snowflake/data_diff/__init__.py +0 -0
- metadata/ingestion/source/database/snowflake/data_diff/data_diff.py +37 -0
- metadata/ingestion/source/database/snowflake/metadata.py +77 -31
- metadata/ingestion/source/database/snowflake/queries.py +15 -7
- metadata/ingestion/source/database/snowflake/service_spec.py +4 -0
- metadata/ingestion/source/database/snowflake/utils.py +37 -17
- metadata/ingestion/source/database/unitycatalog/metadata.py +0 -15
- metadata/ingestion/source/database/vertica/queries.py +5 -20
- metadata/ingestion/source/messaging/kinesis/metadata.py +3 -0
- metadata/ingestion/source/pipeline/airbyte/constants.py +29 -0
- metadata/ingestion/source/pipeline/airbyte/metadata.py +67 -26
- metadata/ingestion/source/pipeline/airbyte/utils.py +99 -0
- metadata/ingestion/source/pipeline/openlineage/models.py +3 -2
- metadata/ingestion/source/pipeline/pipeline_service.py +2 -3
- metadata/ingestion/source/storage/s3/metadata.py +7 -8
- metadata/pii/algorithms/classifiers.py +180 -0
- metadata/pii/algorithms/column_patterns.py +61 -0
- metadata/pii/algorithms/feature_extraction.py +154 -0
- metadata/pii/algorithms/preprocessing.py +62 -0
- metadata/pii/algorithms/presidio_patches.py +45 -0
- metadata/pii/algorithms/presidio_utils.py +119 -0
- metadata/pii/algorithms/tags.py +111 -0
- metadata/pii/algorithms/utils.py +38 -0
- metadata/pii/base_processor.py +125 -0
- metadata/pii/constants.py +8 -0
- metadata/pii/processor.py +42 -138
- metadata/profiler/interface/sqlalchemy/profiler_interface.py +66 -36
- metadata/profiler/processor/runner.py +29 -6
- metadata/profiler/source/database/bigquery/profiler_source.py +2 -20
- metadata/profiler/source/database/mssql/profiler_source.py +86 -0
- metadata/profiler/source/fetcher/profiler_source_factory.py +13 -0
- metadata/readers/dataframe/json.py +5 -1
- metadata/readers/dataframe/parquet.py +10 -2
- metadata/readers/dataframe/reader_factory.py +8 -0
- metadata/sampler/processor.py +8 -1
- metadata/sampler/sampler_interface.py +3 -0
- metadata/sampler/sqlalchemy/bigquery/sampler.py +5 -0
- metadata/sampler/sqlalchemy/sampler.py +32 -40
- metadata/utils/bigquery_utils.py +35 -0
- metadata/utils/datalake/datalake_utils.py +9 -3
- metadata/utils/fqn.py +4 -4
- metadata/utils/service_spec/default.py +4 -0
- metadata/utils/service_spec/service_spec.py +1 -0
- metadata/utils/sqa_utils.py +15 -0
- metadata/utils/sqlalchemy_utils.py +5 -2
- metadata/workflow/base.py +8 -1
- metadata/workflow/profiler.py +9 -9
- metadata/workflow/workflow_status_mixin.py +1 -7
- {openmetadata_ingestion-1.7.0.0rc2.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/METADATA +432 -424
- {openmetadata_ingestion-1.7.0.0rc2.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/RECORD +796 -776
- {openmetadata_ingestion-1.7.0.0rc2.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/LICENSE +0 -0
- {openmetadata_ingestion-1.7.0.0rc2.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/WHEEL +0 -0
- {openmetadata_ingestion-1.7.0.0rc2.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/entry_points.txt +0 -0
- {openmetadata_ingestion-1.7.0.0rc2.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
Utils for Airbyte
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from metadata.ingestion.source.pipeline.openlineage.models import TableDetails
|
|
17
|
+
from metadata.utils.logger import ingestion_logger
|
|
18
|
+
|
|
19
|
+
from .constants import AirbyteDestination, AirbyteSource
|
|
20
|
+
|
|
21
|
+
logger = ingestion_logger()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_source_table_details(stream: dict, source_connection: dict) -> TableDetails:
|
|
25
|
+
"""
|
|
26
|
+
Get the source table details
|
|
27
|
+
"""
|
|
28
|
+
source_name = source_connection.get("sourceName")
|
|
29
|
+
source_database = source_connection.get("connectionConfiguration", {}).get(
|
|
30
|
+
"database"
|
|
31
|
+
)
|
|
32
|
+
source_schema = stream.get("namespace")
|
|
33
|
+
|
|
34
|
+
# Check if source is supported
|
|
35
|
+
if source_name not in [
|
|
36
|
+
AirbyteSource.POSTGRES.value,
|
|
37
|
+
AirbyteSource.MSSQL.value,
|
|
38
|
+
AirbyteSource.MYSQL.value,
|
|
39
|
+
AirbyteSource.MONGODB.value,
|
|
40
|
+
]:
|
|
41
|
+
logger.warning(
|
|
42
|
+
f"Lineage of airbyte pipeline with source [{source_name}] is not supported yet"
|
|
43
|
+
)
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
# Handle specific database configurations
|
|
47
|
+
if source_name == AirbyteSource.MYSQL.value:
|
|
48
|
+
source_schema = source_database
|
|
49
|
+
source_database = None
|
|
50
|
+
elif source_name == AirbyteSource.MONGODB.value:
|
|
51
|
+
source_schema = (
|
|
52
|
+
source_connection.get("connectionConfiguration", {})
|
|
53
|
+
.get("database_config", {})
|
|
54
|
+
.get("database")
|
|
55
|
+
)
|
|
56
|
+
source_database = None
|
|
57
|
+
|
|
58
|
+
return TableDetails(
|
|
59
|
+
name=stream["name"],
|
|
60
|
+
schema=source_schema,
|
|
61
|
+
database=source_database,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_destination_table_details(
|
|
66
|
+
stream: dict, destination_connection: dict
|
|
67
|
+
) -> TableDetails:
|
|
68
|
+
"""
|
|
69
|
+
Get the destination table details
|
|
70
|
+
"""
|
|
71
|
+
destination_name = destination_connection.get("destinationName")
|
|
72
|
+
destination_database = destination_connection.get(
|
|
73
|
+
"connectionConfiguration", {}
|
|
74
|
+
).get("database")
|
|
75
|
+
destination_schema = destination_connection.get("connectionConfiguration", {}).get(
|
|
76
|
+
"schema"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Check if destination is supported
|
|
80
|
+
if destination_name not in [
|
|
81
|
+
AirbyteDestination.POSTGRES.value,
|
|
82
|
+
AirbyteDestination.MSSQL.value,
|
|
83
|
+
AirbyteDestination.MYSQL.value,
|
|
84
|
+
]:
|
|
85
|
+
logger.warning(
|
|
86
|
+
f"Lineage of airbyte pipeline with destination [{destination_name}] is not supported yet"
|
|
87
|
+
)
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
# Handle specific database configurations
|
|
91
|
+
if destination_name == AirbyteDestination.MYSQL.value:
|
|
92
|
+
destination_schema = destination_database
|
|
93
|
+
destination_database = None
|
|
94
|
+
|
|
95
|
+
return TableDetails(
|
|
96
|
+
name=stream["name"],
|
|
97
|
+
schema=destination_schema,
|
|
98
|
+
database=destination_database,
|
|
99
|
+
)
|
|
@@ -14,7 +14,7 @@ Openlineage Source Model module
|
|
|
14
14
|
|
|
15
15
|
from dataclasses import dataclass
|
|
16
16
|
from enum import Enum
|
|
17
|
-
from typing import Any, Dict, List
|
|
17
|
+
from typing import Any, Dict, List, Optional
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
@dataclass
|
|
@@ -76,8 +76,9 @@ class TableDetails:
|
|
|
76
76
|
Minimal table information.
|
|
77
77
|
"""
|
|
78
78
|
|
|
79
|
-
schema: str
|
|
80
79
|
name: str
|
|
80
|
+
schema: str
|
|
81
|
+
database: Optional[str] = None
|
|
81
82
|
|
|
82
83
|
|
|
83
84
|
class EventType(str, Enum):
|
|
@@ -319,8 +319,7 @@ class PipelineServiceSource(TopologyRunnerMixin, Source, ABC):
|
|
|
319
319
|
def _get_table_fqn_from_om(self, table_details: TableDetails) -> Optional[str]:
|
|
320
320
|
"""
|
|
321
321
|
Based on partial schema and table names look for matching table object in open metadata.
|
|
322
|
-
:param
|
|
323
|
-
:param table: table name
|
|
322
|
+
:param table_details: TableDetails object containing table name, schema, database information
|
|
324
323
|
:return: fully qualified name of a Table in Open Metadata
|
|
325
324
|
"""
|
|
326
325
|
result = None
|
|
@@ -330,7 +329,7 @@ class PipelineServiceSource(TopologyRunnerMixin, Source, ABC):
|
|
|
330
329
|
metadata=self.metadata,
|
|
331
330
|
entity_type=Table,
|
|
332
331
|
service_name=db_service,
|
|
333
|
-
database_name=
|
|
332
|
+
database_name=table_details.database,
|
|
334
333
|
schema_name=table_details.schema,
|
|
335
334
|
table_name=table_details.name,
|
|
336
335
|
)
|
|
@@ -64,6 +64,7 @@ from metadata.readers.file.config_source_factory import get_reader
|
|
|
64
64
|
from metadata.utils import fqn
|
|
65
65
|
from metadata.utils.filters import filter_by_container
|
|
66
66
|
from metadata.utils.logger import ingestion_logger
|
|
67
|
+
from metadata.utils.s3_utils import list_s3_objects
|
|
67
68
|
from metadata.utils.tag_utils import get_ometa_tag_and_classification, get_tag_label
|
|
68
69
|
|
|
69
70
|
logger = ingestion_logger()
|
|
@@ -345,14 +346,13 @@ class S3Source(StorageServiceSource):
|
|
|
345
346
|
try:
|
|
346
347
|
prefix = self._get_sample_file_prefix(metadata_entry=metadata_entry)
|
|
347
348
|
if prefix:
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
)
|
|
349
|
+
kwargs = {"Bucket": bucket_response.name, "Prefix": prefix}
|
|
350
|
+
response = list_s3_objects(self.s3_client, **kwargs)
|
|
351
351
|
# total depth is depth of prefix + depth of the metadata entry
|
|
352
352
|
total_depth = metadata_entry.depth + len(prefix[:-1].split("/"))
|
|
353
353
|
candidate_keys = {
|
|
354
354
|
"/".join(entry.get("Key").split("/")[:total_depth]) + "/"
|
|
355
|
-
for entry in response
|
|
355
|
+
for entry in response
|
|
356
356
|
if entry
|
|
357
357
|
and entry.get("Key")
|
|
358
358
|
and len(entry.get("Key").split("/")) > total_depth
|
|
@@ -464,12 +464,11 @@ class S3Source(StorageServiceSource):
|
|
|
464
464
|
parent: Optional[EntityReference] = None,
|
|
465
465
|
):
|
|
466
466
|
bucket_name = bucket_response.name
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
)
|
|
467
|
+
kwargs = {"Bucket": bucket_name, "Prefix": metadata_entry.dataPath}
|
|
468
|
+
response = list_s3_objects(self.s3_client, **kwargs)
|
|
470
469
|
candidate_keys = [
|
|
471
470
|
entry["Key"]
|
|
472
|
-
for entry in response
|
|
471
|
+
for entry in response
|
|
473
472
|
if entry and entry.get("Key") and not entry.get("Key").endswith("/")
|
|
474
473
|
]
|
|
475
474
|
for key in candidate_keys:
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Classifier for PII detection and sensitivity tagging.
|
|
13
|
+
"""
|
|
14
|
+
from abc import ABC, abstractmethod
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
from typing import (
|
|
17
|
+
Any,
|
|
18
|
+
DefaultDict,
|
|
19
|
+
Dict,
|
|
20
|
+
Generic,
|
|
21
|
+
Hashable,
|
|
22
|
+
Mapping,
|
|
23
|
+
Optional,
|
|
24
|
+
Sequence,
|
|
25
|
+
Set,
|
|
26
|
+
TypeVar,
|
|
27
|
+
final,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from presidio_analyzer import AnalyzerEngine
|
|
31
|
+
|
|
32
|
+
from metadata.generated.schema.entity.data.table import DataType
|
|
33
|
+
from metadata.pii.algorithms.column_patterns import get_pii_column_name_patterns
|
|
34
|
+
from metadata.pii.algorithms.feature_extraction import (
|
|
35
|
+
extract_pii_from_column_names,
|
|
36
|
+
extract_pii_tags,
|
|
37
|
+
is_non_pii_datatype,
|
|
38
|
+
split_column_name,
|
|
39
|
+
)
|
|
40
|
+
from metadata.pii.algorithms.preprocessing import preprocess_values
|
|
41
|
+
from metadata.pii.algorithms.presidio_patches import url_patcher
|
|
42
|
+
from metadata.pii.algorithms.presidio_utils import (
|
|
43
|
+
build_analyzer_engine,
|
|
44
|
+
set_presidio_logger_level,
|
|
45
|
+
)
|
|
46
|
+
from metadata.pii.algorithms.tags import PIISensitivityTag, PIITag
|
|
47
|
+
|
|
48
|
+
T = TypeVar("T", bound=Hashable)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ColumnClassifier(ABC, Generic[T]):
|
|
52
|
+
"""
|
|
53
|
+
Base class for column classifiers.
|
|
54
|
+
This class defines the interface for classifiers that predict the class
|
|
55
|
+
of a column based on its data and metadata.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def predict_scores(
|
|
60
|
+
self,
|
|
61
|
+
sample_data: Sequence[Any],
|
|
62
|
+
column_name: Optional[str] = None,
|
|
63
|
+
column_data_type: Optional[DataType] = None,
|
|
64
|
+
) -> Mapping[T, float]:
|
|
65
|
+
"""
|
|
66
|
+
Predict the scores for the given column and sample data of the column.
|
|
67
|
+
The scores are a mapping of class labels to their respective scores:
|
|
68
|
+
higher scores indicate a higher likelihood of the class for the given inputs.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# Implementations
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@final
|
|
76
|
+
class HeuristicPIIClassifier(ColumnClassifier[PIITag]):
|
|
77
|
+
"""
|
|
78
|
+
Heuristic PII Column Classifier
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
*,
|
|
84
|
+
column_name_contribution: float = 0.5,
|
|
85
|
+
score_cutoff: float = 0.1,
|
|
86
|
+
relative_cardinality_cutoff: float = 0.01,
|
|
87
|
+
):
|
|
88
|
+
set_presidio_logger_level()
|
|
89
|
+
self._presidio_analyzer: AnalyzerEngine = build_analyzer_engine()
|
|
90
|
+
self._column_name_patterns = get_pii_column_name_patterns()
|
|
91
|
+
|
|
92
|
+
self._column_name_contribution = column_name_contribution
|
|
93
|
+
self._score_cutoff = score_cutoff
|
|
94
|
+
self._relative_cardinality_cutoff = relative_cardinality_cutoff
|
|
95
|
+
|
|
96
|
+
def predict_scores(
|
|
97
|
+
self,
|
|
98
|
+
sample_data: Sequence[Any],
|
|
99
|
+
column_name: Optional[str] = None,
|
|
100
|
+
column_data_type: Optional[DataType] = None,
|
|
101
|
+
) -> Mapping[PIITag, float]:
|
|
102
|
+
|
|
103
|
+
if column_data_type is not None and is_non_pii_datatype(column_data_type):
|
|
104
|
+
return {}
|
|
105
|
+
|
|
106
|
+
str_values = preprocess_values(sample_data)
|
|
107
|
+
|
|
108
|
+
if not str_values:
|
|
109
|
+
return {}
|
|
110
|
+
|
|
111
|
+
# Relative cardinality test
|
|
112
|
+
unique_values = set(str_values)
|
|
113
|
+
|
|
114
|
+
if len(unique_values) / len(str_values) < self._relative_cardinality_cutoff:
|
|
115
|
+
return {}
|
|
116
|
+
context = split_column_name(column_name) if column_name else None
|
|
117
|
+
|
|
118
|
+
content_results = extract_pii_tags(
|
|
119
|
+
self._presidio_analyzer,
|
|
120
|
+
str_values,
|
|
121
|
+
context=context,
|
|
122
|
+
recognizer_result_patcher=url_patcher,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
column_name_matches: Set[PIITag] = set()
|
|
126
|
+
|
|
127
|
+
if column_name is not None:
|
|
128
|
+
column_name_matches = extract_pii_from_column_names(
|
|
129
|
+
column_name, patterns=self._column_name_patterns
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
final_results: Dict[PIITag, float] = {}
|
|
133
|
+
|
|
134
|
+
for tag, score in content_results.items():
|
|
135
|
+
final_score = score
|
|
136
|
+
if tag in column_name_matches:
|
|
137
|
+
final_score += self._column_name_contribution
|
|
138
|
+
# Apply the score cutoff
|
|
139
|
+
if final_score >= self._score_cutoff:
|
|
140
|
+
final_results[tag] = final_score
|
|
141
|
+
|
|
142
|
+
return final_results
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class PIISensitiveClassifier(ColumnClassifier[PIISensitivityTag]):
|
|
146
|
+
"""
|
|
147
|
+
Implements a classifier for PII sensitivity tags based on a given
|
|
148
|
+
PII column classifier. If no classifier is provided, it defaults to
|
|
149
|
+
using the HeuristicPIIColumnClassifier.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def __init__(self, classifier: Optional[ColumnClassifier[PIITag]] = None):
|
|
153
|
+
self.classifier: ColumnClassifier[PIITag] = (
|
|
154
|
+
classifier or HeuristicPIIClassifier()
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
def predict_scores(
|
|
158
|
+
self,
|
|
159
|
+
sample_data: Sequence[Any],
|
|
160
|
+
column_name: Optional[str] = None,
|
|
161
|
+
column_data_type: Optional[DataType] = None,
|
|
162
|
+
) -> Mapping[PIISensitivityTag, float]:
|
|
163
|
+
pii_tags = self.classifier.predict_scores(
|
|
164
|
+
sample_data, column_name, column_data_type
|
|
165
|
+
)
|
|
166
|
+
results: DefaultDict[PIISensitivityTag, float] = defaultdict(float)
|
|
167
|
+
counts: DefaultDict[PIISensitivityTag, int] = defaultdict(int)
|
|
168
|
+
|
|
169
|
+
for tag, score in pii_tags.items():
|
|
170
|
+
# Convert PIITag to PIISensitivityTag
|
|
171
|
+
pii_sensitivity = tag.sensitivity()
|
|
172
|
+
results[pii_sensitivity] += score
|
|
173
|
+
counts[pii_sensitivity] += 1
|
|
174
|
+
|
|
175
|
+
# Normalize the scores
|
|
176
|
+
for tag in results:
|
|
177
|
+
if counts[tag] > 0:
|
|
178
|
+
results[tag] /= counts[tag]
|
|
179
|
+
|
|
180
|
+
return results
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Definition of custom patterns for the PII detection.
|
|
13
|
+
Only patterns for column names are implemented here; for content,
|
|
14
|
+
we rely on the Presidio library.
|
|
15
|
+
"""
|
|
16
|
+
import re
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
from functools import lru_cache
|
|
19
|
+
from typing import DefaultDict, List, Mapping, Union
|
|
20
|
+
|
|
21
|
+
from metadata.pii.algorithms.tags import PIITag
|
|
22
|
+
|
|
23
|
+
# Regex patterns for PII detection in column names, not for content
|
|
24
|
+
_pii_column_name_regexes: Mapping[PIITag, Union[str, List[str]]] = {
|
|
25
|
+
PIITag.US_SSN: "^.*(ssn|social).*$",
|
|
26
|
+
PIITag.CREDIT_CARD: "^.*(credit).*(card).*$",
|
|
27
|
+
PIITag.US_BANK_NUMBER: [
|
|
28
|
+
r"\b(account|acct|acc)[_-]?(number|num|no)\b", # account_number, account_num
|
|
29
|
+
r"\bbank[_-]?(account|number|num|no)?\b", # bank_account, bank_number
|
|
30
|
+
],
|
|
31
|
+
PIITag.IBAN_CODE: [
|
|
32
|
+
r"\b(account|acct|acc)[_-]?(number|num|no)\b", # account_number, account_num
|
|
33
|
+
r"\bbank[_-]?(account|number|num|no)?\b", # bank_account, bank_number
|
|
34
|
+
r"\biban(?:[_]?(number|code))?\b", # iban, iban_number, iban_code
|
|
35
|
+
r"\bbank[_]?iban\b", # bank_iban
|
|
36
|
+
r"\binternational[_]?(account|bank[_]?number)\b", # international_account, international_bank_number
|
|
37
|
+
],
|
|
38
|
+
PIITag.EMAIL_ADDRESS: "^(email|e-mail|mail)(.*address)?$",
|
|
39
|
+
PIITag.PERSON: "^.*(user|client|person|first|last|maiden|nick).*(name).*$",
|
|
40
|
+
PIITag.DATE_TIME: "^.*(date|time|dob|birthday|dod).*$",
|
|
41
|
+
PIITag.NRP: "^.*(gender|nationality).*$",
|
|
42
|
+
PIITag.LOCATION: "^.*(address|city|state|county|country|zipcode|zip|postal|zone|borough).*$",
|
|
43
|
+
PIITag.PHONE_NUMBER: "^.*(phone).*$",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@lru_cache
|
|
48
|
+
def get_pii_column_name_patterns() -> Mapping[PIITag, List[re.Pattern[str]]]:
|
|
49
|
+
"""
|
|
50
|
+
Returns the regex patterns for PII detection in column names.
|
|
51
|
+
The patterns are cached for performance.
|
|
52
|
+
"""
|
|
53
|
+
patterns: DefaultDict[PIITag, List[re.Pattern[str]]] = defaultdict(list)
|
|
54
|
+
|
|
55
|
+
for pii_type, regexes in _pii_column_name_regexes.items():
|
|
56
|
+
if isinstance(regexes, str):
|
|
57
|
+
regexes = [regexes]
|
|
58
|
+
for regex in regexes:
|
|
59
|
+
patterns[pii_type].append(re.compile(regex, re.IGNORECASE))
|
|
60
|
+
|
|
61
|
+
return patterns
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Extraction of PII features (from text, column names, and data types) to be used
|
|
13
|
+
for the PII classification model.
|
|
14
|
+
"""
|
|
15
|
+
import logging
|
|
16
|
+
import re
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
from typing import DefaultDict, Dict, Iterable, List, Mapping, Optional, Sequence, Set
|
|
19
|
+
|
|
20
|
+
from presidio_analyzer import AnalyzerEngine
|
|
21
|
+
|
|
22
|
+
from metadata.generated.schema.entity.data.table import DataType
|
|
23
|
+
from metadata.pii.algorithms.presidio_patches import PresidioRecognizerResultPatcher
|
|
24
|
+
from metadata.pii.algorithms.tags import PIITag
|
|
25
|
+
from metadata.pii.scanners.ner_scanner import SUPPORTED_LANG
|
|
26
|
+
from metadata.utils.logger import pii_logger
|
|
27
|
+
|
|
28
|
+
logger = pii_logger()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def extract_pii_tags(
|
|
32
|
+
analyzer: AnalyzerEngine,
|
|
33
|
+
texts: Sequence[str],
|
|
34
|
+
context: Optional[List[str]] = None,
|
|
35
|
+
recognizer_result_patcher: Optional[PresidioRecognizerResultPatcher] = None,
|
|
36
|
+
) -> Dict[PIITag, float]:
|
|
37
|
+
"""
|
|
38
|
+
Extract PII entities from a batch of texts.
|
|
39
|
+
|
|
40
|
+
The results are averaged over the batch. In general, the larger the batch,
|
|
41
|
+
the better the results, as some single texts might be noisy or contain
|
|
42
|
+
false positives.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
analyzer (AnalyzerEngine): The analyzer engine to use for PII detection.
|
|
46
|
+
texts (Sequence[str]): A sequence of texts to analyze.
|
|
47
|
+
context (Optional[List[str]]): Optional context to provide to the analyzer.
|
|
48
|
+
This can be used to improve the accuracy of the PII detection.
|
|
49
|
+
For example, keywords extracted from column names.
|
|
50
|
+
recognizer_result_patcher (Optional[PresidioRecognizerResultPatcher]): A function
|
|
51
|
+
that takes a recognizer result and returns a modified result.
|
|
52
|
+
Returns:
|
|
53
|
+
Mapping[PIITag, float]: A mapping of PII entity types to their average scores.
|
|
54
|
+
"""
|
|
55
|
+
entity_scores: DefaultDict[PIITag, float] = defaultdict(float)
|
|
56
|
+
|
|
57
|
+
if SUPPORTED_LANG not in analyzer.supported_languages:
|
|
58
|
+
raise ValueError(
|
|
59
|
+
f"The analyzer does not support {SUPPORTED_LANG}, which is required for this function."
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
for text in texts:
|
|
63
|
+
results = analyzer.analyze(
|
|
64
|
+
text, language=SUPPORTED_LANG, context=context, entities=PIITag.values()
|
|
65
|
+
)
|
|
66
|
+
if recognizer_result_patcher is not None:
|
|
67
|
+
results = recognizer_result_patcher(results, text)
|
|
68
|
+
|
|
69
|
+
for result in results:
|
|
70
|
+
try:
|
|
71
|
+
# This should be safe because the analyzer only considers the entities that we passed
|
|
72
|
+
pii_entity = PIITag[result.entity_type]
|
|
73
|
+
entity_scores[pii_entity] += result.score
|
|
74
|
+
except KeyError:
|
|
75
|
+
logging.error(f"Unrecognized PII entity type: {result.entity_type}.")
|
|
76
|
+
|
|
77
|
+
# normalize the scores if the batch is not empty
|
|
78
|
+
if len(texts):
|
|
79
|
+
for entity in entity_scores:
|
|
80
|
+
entity_scores[entity] /= len(texts)
|
|
81
|
+
|
|
82
|
+
return entity_scores
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def split_column_name(column_name: str) -> List[str]:
|
|
86
|
+
"""
|
|
87
|
+
Split a column name into its components.
|
|
88
|
+
This is used for passing column names to the analyzer as context.
|
|
89
|
+
"""
|
|
90
|
+
# Split by common delimiters
|
|
91
|
+
delimiters = ["_", "-", " ", ".", "/"]
|
|
92
|
+
regex_pattern = "|".join(map(re.escape, delimiters))
|
|
93
|
+
return list(re.split(regex_pattern, column_name.lower()))
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def extract_pii_from_column_names(
|
|
97
|
+
column_name: str, patterns: Mapping[PIITag, Iterable[re.Pattern[str]]]
|
|
98
|
+
) -> Set[PIITag]:
|
|
99
|
+
"""
|
|
100
|
+
Extract PII entities from a column name using a collection of regex patterns
|
|
101
|
+
for each PII type. This is used to match patterns in column names that might
|
|
102
|
+
indicate the presence of PII data.
|
|
103
|
+
|
|
104
|
+
Example: "user_email" might match the EMAIL_ADDRESS pattern, returning
|
|
105
|
+
a set containing the PII tag PIITag.EMAIL_ADDRESS.
|
|
106
|
+
"""
|
|
107
|
+
results: Set[PIITag] = set()
|
|
108
|
+
|
|
109
|
+
for pii_type, pii_type_patterns in patterns.items():
|
|
110
|
+
for pattern in pii_type_patterns:
|
|
111
|
+
if pattern.match(column_name) is not None:
|
|
112
|
+
results.add(pii_type)
|
|
113
|
+
break # No need to check other patterns for this PII type
|
|
114
|
+
|
|
115
|
+
return results
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def is_non_pii_datatype(dtype: DataType) -> bool:
|
|
119
|
+
"""
|
|
120
|
+
Determine whether a column with the given data type is unlikely
|
|
121
|
+
to contain PII and can be safely excluded from PII detection or scanning.
|
|
122
|
+
"""
|
|
123
|
+
non_pii_types = {
|
|
124
|
+
DataType.BOOLEAN,
|
|
125
|
+
DataType.BIT,
|
|
126
|
+
DataType.NULL,
|
|
127
|
+
DataType.ERROR,
|
|
128
|
+
DataType.FIXED,
|
|
129
|
+
DataType.AGGREGATEFUNCTION,
|
|
130
|
+
DataType.HLLSKETCH,
|
|
131
|
+
DataType.QUANTILE_STATE,
|
|
132
|
+
DataType.AGG_STATE,
|
|
133
|
+
DataType.BITMAP,
|
|
134
|
+
DataType.PG_LSN,
|
|
135
|
+
DataType.PG_SNAPSHOT,
|
|
136
|
+
DataType.TXID_SNAPSHOT,
|
|
137
|
+
DataType.TSQUERY,
|
|
138
|
+
DataType.TSVECTOR,
|
|
139
|
+
DataType.UNKNOWN,
|
|
140
|
+
DataType.LOWCARDINALITY,
|
|
141
|
+
DataType.MEASURE_HIDDEN,
|
|
142
|
+
DataType.MEASURE_VISIBLE,
|
|
143
|
+
}
|
|
144
|
+
geo_data_types = {
|
|
145
|
+
DataType.GEOGRAPHY,
|
|
146
|
+
DataType.GEOMETRY,
|
|
147
|
+
DataType.SPATIAL,
|
|
148
|
+
DataType.POINT,
|
|
149
|
+
DataType.POLYGON,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
excluded_data_types = non_pii_types | geo_data_types
|
|
153
|
+
|
|
154
|
+
return dtype in excluded_data_types
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Preprocessing functions for the classification tasks.
|
|
13
|
+
"""
|
|
14
|
+
import datetime
|
|
15
|
+
import json
|
|
16
|
+
from typing import Any, List, Mapping, Optional, Sequence
|
|
17
|
+
|
|
18
|
+
from metadata.utils.logger import pii_logger
|
|
19
|
+
|
|
20
|
+
logger = pii_logger()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# pylint: disable=too-many-return-statements
|
|
24
|
+
def convert_to_str(value: Any) -> Optional[str]:
|
|
25
|
+
"""
|
|
26
|
+
Convert the given value to a string. This is a conversion
|
|
27
|
+
tailored to our use case, not a generic one.
|
|
28
|
+
"""
|
|
29
|
+
if isinstance(value, str):
|
|
30
|
+
return value
|
|
31
|
+
if isinstance(value, (int, float, datetime.datetime, datetime.date)):
|
|
32
|
+
# Values we want to convert to string out of the box
|
|
33
|
+
return str(value)
|
|
34
|
+
if isinstance(value, bytes):
|
|
35
|
+
return value.decode("utf-8", errors="ignore")
|
|
36
|
+
if isinstance(value, (Sequence, Mapping)):
|
|
37
|
+
try:
|
|
38
|
+
return json.dumps(value, default=str)
|
|
39
|
+
except (TypeError, ValueError, OverflowError) as e:
|
|
40
|
+
# If the value cannot be serialized to JSON, return None
|
|
41
|
+
logger.warning(f"Failed to convert value to JSON: {e}")
|
|
42
|
+
return None
|
|
43
|
+
if value is None:
|
|
44
|
+
# We want to skip None values, not convert them to "None"
|
|
45
|
+
return None
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def preprocess_values(values: Sequence[Any]) -> List[str]:
|
|
50
|
+
result: List[str] = []
|
|
51
|
+
for value in values:
|
|
52
|
+
converted_value = convert_to_str(value)
|
|
53
|
+
if converted_value is None:
|
|
54
|
+
# Skip None values
|
|
55
|
+
continue
|
|
56
|
+
# skip empty strings
|
|
57
|
+
if not converted_value.strip():
|
|
58
|
+
continue
|
|
59
|
+
# Add the converted value as is, without any further processing
|
|
60
|
+
result.append(converted_value)
|
|
61
|
+
|
|
62
|
+
return result
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Patch the Presidio recognizer results to make adapt them to specific use cases.
|
|
13
|
+
"""
|
|
14
|
+
from typing import List, Protocol, Sequence
|
|
15
|
+
|
|
16
|
+
from presidio_analyzer import RecognizerResult
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PresidioRecognizerResultPatcher(Protocol):
|
|
20
|
+
"""
|
|
21
|
+
A protocol for a function that takes a recognizer result and returns a modified result.
|
|
22
|
+
Sometimes we need to patch the recognizer result to make it compatible with our use case.
|
|
23
|
+
For instance, Presidio yields URL false positive with email address.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __call__(
|
|
27
|
+
self, recognizer_results: Sequence[RecognizerResult], text: str
|
|
28
|
+
) -> Sequence[RecognizerResult]:
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def url_patcher(
|
|
33
|
+
recognizer_results: Sequence[RecognizerResult], text: str
|
|
34
|
+
) -> Sequence[RecognizerResult]:
|
|
35
|
+
"""
|
|
36
|
+
Patch the recognizer result to remove URL false positive with email address.
|
|
37
|
+
"""
|
|
38
|
+
patched_result: List[RecognizerResult] = []
|
|
39
|
+
for result in recognizer_results:
|
|
40
|
+
if result.entity_type == "URL":
|
|
41
|
+
if text[: result.start].endswith("@"):
|
|
42
|
+
# probably an email address, skip the URL
|
|
43
|
+
continue
|
|
44
|
+
patched_result.append(result)
|
|
45
|
+
return patched_result
|